In [1]:
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import urlopen as uReq
from urllib.request import HTTPError 

In [2]:
def read_one_page(url):
    """
    Creates the timetable for one course.
    
    Args:
        url (string): the url for the course.

    Return: 
        a dataframe with the course info.
    """
    
    page_html = uReq(url)
    soup = BeautifulSoup(page_html, 'html.parser')
    tables = soup.find_all("table")
    tabulka = tables[-1]
    
    records = [] # store all of the records in this list
    for soup_row in tabulka.findAll('tr'):
        row = []
        for soup_cell in soup_row.findAll('td'):
            content = soup_cell.text
            row.append(content)
        records.append(row)
        
    return pd.DataFrame(data=records[1:])

In [3]:

def create_full_timetable(year=20):
    """
    If year=20, it returns the full timetable for the academic year 2020/2021. 
    Possible values: [17, 18, 19, 20]
    """

    url_base = 'http://www.lse.ac.uk/school/timetables/lecture_seminar_timetable/lecture_and_seminar_timetable__20' + str(year) + '_' + str(year+1) + '/'

    timetable = pd.DataFrame()
    i = 0
    while True:
        i = i + 1
        url = url_base + str(i) + '.htm'       
        try:
            data = read_one_page(url)
        except HTTPError:
            break        
        timetable = timetable.append(data)

    timetable = timetable.rename(columns={0: 'Code', 1: 'Title', 2: 'Type', 3: 'Group', 4: 'Day',                                          5: 'Room', 6: 'Start', 7: 'Finish', 8: 'MT Wks', 
                                          9: 'LT Wks', 10: 'ST Wks'})

    valid_course_codes = timetable['Code'].apply(lambda x: len(x) > 3)
    return timetable[valid_course_codes]

In [4]:
# it takes ~7 mins
df = create_full_timetable(19)

### Examples of reading the timetable

In [5]:
# display timetable for specific courses

selected_course = ['MA423', 'MA424', 'MA427', 'MA428', 'ST449', 'MA498', 'MA426']
mask0 = df['Code'].isin(selected_course)

selection = df[mask0]
display(selection)

Unnamed: 0,Code,Title,Type,Group,Day,Room,Start,Finish,MT Wks,LT Wks,ST Wks
0,MA423,MA423 Fundamentals of Operations Research,LEC,1,Tue,NAB.1.04,13:00,15:00,1234578910.0,,
1,MA423,MA423 Fundamentals of Operations Research,LEC,1,Tue,NAB.LG.01,13:00,15:00,6.0,,
2,MA423,MA423 Fundamentals of Operations Research,LEC,1,Tue,NAB.1.04,13:00,16:00,,,1.0
3,MA423,MA423 Fundamentals of Operations Research,SEM,1,Wed,32L.G.15,15:00,16:30,234567891011.0,,
4,MA423,MA423 Fundamentals of Operations Research,SEM,2,Wed,32L.G.17,12:30,14:00,234567891011.0,,
0,MA424,MA424 Modelling in Operations Research,LEC,1,Mon,CBG.1.03,11:00,13:00,12345678910.0,,
1,MA424,MA424 Modelling in Operations Research,SEM,1,Thu,STC.S018,09:30,11:00,2345678910.0,,
2,MA424,MA424 Modelling in Operations Research,SEM,2,Fri,FAW.4.03,14:00,15:30,2345678910.0,,
3,MA424,MA424 Modelling in Operations Research,WOR,1,Tue,FAW.4.03,09:00,11:00,2345678910.0,,
4,MA424,MA424 Modelling in Operations Research,WOR,2,Wed,STC.S018,09:00,11:00,2345678910.0,,


In [6]:
# display timetable for all 4th year courses

year = 4    # other options are 1, 2 and 3

mask1 = df['Code'].apply(lambda x: x[2]==str(year))
selection = df[mask1]
display(selection)

Unnamed: 0,Code,Title,Type,Group,Day,Room,Start,Finish,MT Wks,LT Wks,ST Wks
0,AC411,"AC411 Accounting, Strategy and Control",SEM,1,Wed,PAR.2.03,08:30,10:00,11,,
1,AC411,"AC411 Accounting, Strategy and Control",SEM,1,Mon,PAR.2.03,08:30,10:00,23456,,
2,AC411,"AC411 Accounting, Strategy and Control",SEM,1,Wed,PAR.2.03,08:30,10:00,2345678,,
3,AC411,"AC411 Accounting, Strategy and Control",SEM,1,Mon,PAR.2.03,08:30,10:00,78,,
4,AC411,"AC411 Accounting, Strategy and Control",SEM,1,Mon,PAR.2.03,08:30,10:00,91011,,
...,...,...,...,...,...,...,...,...,...,...,...
2,ST452,ST452 Probability and Mathematical Statistics I,SEM,1,Wed,32L.G.06,14:00,15:00,11,,
3,ST452,ST452 Probability and Mathematical Statistics I,SEM,1,Thu,32L.G.02,14:00,15:00,10,,
4,ST452,ST452 Probability and Mathematical Statistics I,SEM,1,Wed,32L.G.06,14:00,15:00,23456789,,
0,ST453,ST453 Probability and Mathematical Statistics II,LEC,1,Fri,NAB.1.19,12:00,14:00,,123457891011,


In [7]:
# display timetable for all Philosophy courses

subject = 'PH'   # other options are 'MA', 'ST', 'GV', 'LL' etc.

mask2 = df['Code'].apply(lambda x: x[:2] in [subject])
selection = df[mask2]
display(selection)

Unnamed: 0,Code,Title,Type,Group,Day,Room,Start,Finish,MT Wks,LT Wks,ST Wks
0,PH101,PH101 Logic,LEC,1,Mon,CLM.G.02,12:00,13:00,123457891011,123457891011,
1,PH101,PH101 Logic,LEC,1,Mon,OT,09:00,10:00,234910,234910,
0,PH103,PH103 The Big Questions: An Introduction to Ph...,LEC,1,Tue,CBG.B1.02,13:00,14:00,1011,123457891011,
1,PH103,PH103 The Big Questions: An Introduction to Ph...,LEC,1,Tue,CBG.B1.02,13:00,14:00,12,,
2,PH103,PH103 The Big Questions: An Introduction to Ph...,LEC,1,Tue,CBG.B1.02,13:00,14:00,34,,
...,...,...,...,...,...,...,...,...,...,...,...
1,PH502,PH502 Reasoning and Logic,LEC,1,Mon,OT,09:00,10:00,234910,234910,
0,PH551,PH551 Research Seminar in the Philosophy of Na...,SEM,1,Mon,LAK.2.06,14:00,15:30,1234567891011,12345678910,12345678
0,PH555,PH555 Research Seminar in the Philosophy of Ec...,SEM,1,Wed,LAK.2.06,16:00,18:00,123457891011,,
1,PH555,PH555 Research Seminar in the Philosophy of Ec...,SEM,1,Wed,LAK.2.06,16:00,18:00,,123457891011,


In [8]:
# display timetable for all Philosophy 4th year courses

mask2 = df['Code'].apply(lambda x: x[:2] in [subject])
selection = df[mask1 & mask2]
display(selection)

Unnamed: 0,Code,Title,Type,Group,Day,Room,Start,Finish,MT Wks,LT Wks,ST Wks
0,PH400,PH400 Philosophy of Science,LEC,1,Mon,NAB.LG.03,12:00,13:00,123457891011,,
1,PH400,PH400 Philosophy of Science,LEC,1,Mon,NAB.2.04,12:00,13:00,,123457891011,
2,PH400,PH400 Philosophy of Science,SEM,1,Tue,PAN.3.02,16:00,17:30,123457891011,,
3,PH400,PH400 Philosophy of Science,SEM,1,Tue,PAN.3.02,16:00,17:30,,123457891011,
4,PH400,PH400 Philosophy of Science,SEM,1,Fri,32L.G.09,09:00,10:30,,11,
...,...,...,...,...,...,...,...,...,...,...,...
2,PH458,PH458 Evidence and Policy,SEM,1,Wed,CBG.1.10,17:00,18:30,5,,
3,PH458,PH458 Evidence and Policy,SEM,2,Thu,PAN.1.04,15:00,16:30,12357891011,,
4,PH458,PH458 Evidence and Policy,SEM,2,Wed,32L.G.20,10:30,12:00,5,,
5,PH458,PH458 Evidence and Policy,SEM,3,Thu,PAN.1.04,13:30,15:00,12357891011,,
