In [68]:
import pandas as pd
import numpy as np
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
import ssl
import math

In [69]:
def get_soup(year, gender, pg_no, print_url=False):
    
    url = ("https://results.london-marathon.co.uk/" + year + 
           "/?event=MAS&num_results=1000&page=" + str(pg_no) + 
           "&pid=list&pidp=start&search%5Bsex%5D=" + gender)
    if print_url:
        print(url)

    request_site = Request(url, headers={"User-Agent": "Mozilla/5.0"})
    webpage = urlopen(request_site).read()

    soup = BeautifulSoup(webpage, 'lxml')
    
    return soup
    

In [71]:
def create_df(year):
    
    df = pd.DataFrame(columns=["Overall Place", "Gender Place", "Category Place", "Name", "Club", "Runner Number",
                           "Gender", "Category", "Event", "Half Time", "Finish Time"])
    errors = pd.DataFrame(columns=["Page", "li item", "Error"])

    for gender in ["M", "W"]:

        soup = get_soup(year, gender, "1", print_url=True)

        # list_length = soup.find("li", class_="list-group-item").text.split(" ")[0]
        list_length = int(soup.find("li", class_="list-group-item").text.split(" ")[0])
        n_pages = math.trunc(list_length/1000 + 1)

        for n in list(range(1,n_pages + 1)):

            soup = get_soup(year, gender, str(n))
            datas = soup.find_all("li")
            print(f'Reading page {n} of {n_pages} in {gender}')

            for i, data in enumerate(datas):
        #         print(data.prettify())
                try:
                    place_overall = data.find_all('div', class_="list-field type-place place-secondary hidden-xs numeric")[0].text
                    place_gender = data.find('div', class_="list-field type-place place-primary numeric").text
                    place_category = data.find_all('div', class_="list-field type-place place-secondary hidden-xs numeric")[1].text
                    name = data.find('h4', class_="list-field type-fullname").text
                    club = data.find('div', class_="list-field type-field hidden-xs").text[4:]
                    runner_no = data.find('div', class_="list-field type-field").text[13:]
                    category = data.find('div', class_='list-field type-age_class').text[8:]
                    event = data.find('div', class_="list-field type-event_name").text[5:]
                    half_time = data.find('div', class_="split list-field type-time hidden-xs").text[5:]
                    finish_time = data.find('div', class_="split list-field type-time").text[6:]
                    df.loc[len(df.index)] = [place_overall, place_gender, place_category, name, club,
                                            runner_no, gender, category, event, half_time, finish_time]
                except Exception as e:
                    errors.loc[len(errors.index)] = [n, i, e]
                    continue
    
    return df

In [72]:
def create_df_old(year):
    
    df = pd.DataFrame(columns=["Overall Place", "Gender Place", "Category Place", "Name", "Club", "Runner Number",
                            "Gender", "Category", "Event", "Half Time", "Finish Time"])
    errors = pd.DataFrame(columns=["Page", "tr item", "Error"])

    for gender in ["M", "W"]:

        soup = get_soup(year, gender, "1", print_url=True)

        # list_length = soup.find("li", class_="list-group-item").text.split(" ")[0]
        list_length = int(soup.find("div", class_="list-info-text").text.split(" ")[0])
        n_pages = math.trunc(list_length/1000 + 1)

        for n in list(range(1,n_pages + 1)):

            soup = get_soup(year, gender, n)
            datas = soup.find_all("tr")

            print(f'Reading page {n} of {n_pages} in {gender}')

            for i, data in enumerate(datas):
    #             print(data.prettify())
                try:
                    place_overall = data.find_all('td')[0].text
                    place_gender = data.find_all('td')[1].text
                    place_category = data.find_all('td')[2].text
                    name = data.find_all('td')[3].text[1:-1]
                    club = data.find_all('td')[4].text
                    runner_no = data.find_all('td')[5].text
                    category = data.find_all('td')[6].text
                    event = "Mass"
                    half_time = data.find_all('td')[7].text
                    finish_time = data.find_all('td')[8].text
                    df.loc[len(df.index)] = [place_overall, place_gender, place_category, name, club,
                                            runner_no, gender, category, event, half_time, finish_time]
    #             except IndexError:
    #                 continue
                except Exception as e:
                    errors.loc[len(errors.index)] = [n, i, e]
                    continue
    return df

In [70]:
def numeric_places(df):
    df["Overall Place"] = pd.to_numeric(df["Overall Place"], errors='coerce')
    df["Gender Place"] = pd.to_numeric(df["Gender Place"], errors='coerce')
    df["Category Place"] = pd.to_numeric(df["Category Place"], errors='coerce')
    # df[["Overall Place", "Gender Place", "Category Place"]] = df[["Overall Place", "Gender Place", 
    # "Category Place"]].astype(int)
    return df

In [73]:
# 2021 and 2019 mass Results
for year in ["2021", "2019"]:

    # choose not to authenticate security certificate
    # https://clay-atlas.com/us/blog/2021/09/26/python-en-urllib-error-ssl-certificate/

    ssl._create_default_https_context = ssl._create_unverified_context

    print()
    print(year)

    df = create_df(year)
                    
    # df[["Overall Place", "Gender Place", "Category Place"]] = df[["Overall Place", "Gender Place",
    #                                                               "Category Place"]].astype(int)
    df = numeric_places(df)

    df = df.sort_values("Overall Place")
    df.to_csv("London_" + year + "_mass_results.csv", index=False)


2021

https://results.london-marathon.co.uk/2021/?event=MAS&num_results=1000&page=1&pid=list&pidp=start&search%5Bsex%5D=M
Reading page 1 of 22 in M
Reading page 2 of 22 in M
Reading page 3 of 22 in M
Reading page 4 of 22 in M
Reading page 5 of 22 in M
Reading page 6 of 22 in M
Reading page 7 of 22 in M
Reading page 8 of 22 in M
Reading page 9 of 22 in M
Reading page 10 of 22 in M
Reading page 11 of 22 in M
Reading page 12 of 22 in M
Reading page 13 of 22 in M
Reading page 14 of 22 in M
Reading page 15 of 22 in M
Reading page 16 of 22 in M
Reading page 17 of 22 in M
Reading page 18 of 22 in M
Reading page 19 of 22 in M
Reading page 20 of 22 in M
Reading page 21 of 22 in M
Reading page 22 of 22 in M

https://results.london-marathon.co.uk/2021/?event=MAS&num_results=1000&page=1&pid=list&pidp=start&search%5Bsex%5D=W
Reading page 1 of 15 in W
Reading page 2 of 15 in W
Reading page 3 of 15 in W
Reading page 4 of 15 in W
Reading page 5 of 15 in W
Reading page 6 of 15 in W
Reading page 7 of 1

In [74]:
# 2014 to 2018 mass results

for year in ["2018", "2017", "2016", "2015", "2014"]:

    # choose not to authenticate security certificate
    # https://clay-atlas.com/us/blog/2021/09/26/python-en-urllib-error-ssl-certificate/

    ssl._create_default_https_context = ssl._create_unverified_context

    print()
    print(year)
    df = create_df_old(year)

    df = numeric_places(df)

    df = df.sort_values("Overall Place")
    df.to_csv("London_" + year + "_mass_results.csv", index=False)
    print("end")


2018

https://results.london-marathon.co.uk/2018/?event=MAS&num_results=1000&page=1&pid=list&pidp=start&search%5Bsex%5D=M
Reading page 1 of 24 in M
Reading page 2 of 24 in M
Reading page 3 of 24 in M
Reading page 4 of 24 in M
Reading page 5 of 24 in M
Reading page 6 of 24 in M
Reading page 7 of 24 in M
Reading page 8 of 24 in M
Reading page 9 of 24 in M
Reading page 10 of 24 in M
Reading page 11 of 24 in M
Reading page 12 of 24 in M
Reading page 13 of 24 in M
Reading page 14 of 24 in M
Reading page 15 of 24 in M
Reading page 16 of 24 in M
Reading page 17 of 24 in M
Reading page 18 of 24 in M
Reading page 19 of 24 in M
Reading page 20 of 24 in M
Reading page 21 of 24 in M
Reading page 22 of 24 in M
Reading page 23 of 24 in M
Reading page 24 of 24 in M

https://results.london-marathon.co.uk/2018/?event=MAS&num_results=1000&page=1&pid=list&pidp=start&search%5Bsex%5D=W
Reading page 1 of 17 in W
Reading page 2 of 17 in W
Reading page 3 of 17 in W
Reading page 4 of 17 in W
Reading page 5 of

In [75]:
# Checking 2021 Mass Results

df = pd.read_csv("London_2021_mass_results.csv")

overall_places = list(df["Overall Place"])

missing_places = []

for i in [*range(len(df.index))][1:]:
    if i not in overall_places:
        previous_place = i - 1
        preprevious_place = i - 2
        count1 = overall_places.count(previous_place)
        count2 = overall_places.count(preprevious_place)
        if count1 < 2 and count2 < 3:
            print(f'{i} missing')
            print(f'{count1} finishers in {previous_place}')
            print(f'{count2} finishers in {preprevious_place}')
            print()

2263 missing
1 finishers in 2262
1 finishers in 2261

35798 missing
1 finishers in 35797
1 finishers in 35796



In [76]:
i = 35798

df[(df["Overall Place"] > i - 5) & 
   (df["Overall Place"] < i + 6)].sort_values("Overall Place")

Unnamed: 0,Overall Place,Gender Place,Category Place,Name,Club,Runner Number,Gender,Category,Event,Half Time,Finish Time
35792,35794,14359,6396,"Pilbeam, Leanne (GBR)",–,56693,W,18-39,Mass,3:42:38,08:08:04
35793,35795,14360,2534,"MACQUAID, SARAH",–,57847,W,40-44,Mass,4:01:18,08:08:07
35794,35796,14361,6397,"Marling, Michala (GBR)",–,30256,W,18-39,Mass,3:43:36,08:08:42
35795,35797,14362,6398,"Meadows, Hayley (GBR)",–,29257,W,18-39,Mass,3:43:35,08:08:42
35796,35799,21436,9397,"Flynn, Jay (GBR)",–,44253,M,18-39,Mass,3:16:17,08:08:52
35797,35800,14364,2244,"Caldeira, Michelle (GBR)",–,36650,W,45-49,Mass,3:36:13,08:08:56
35798,35801,14365,6399,"Tredgold, Ashden-Rose (GBR)",–,33940,W,18-39,Mass,3:40:03,08:09:02
35799,35802,14366,2535,"Billows, Lindsey (GBR)",–,3357,W,40-44,Mass,3:54:10,08:09:52
35800,35803,14367,2245,"Scales, Samantha (GBR)",–,38936,W,45-49,Mass,3:35:57,08:09:53


In [77]:
for year in ["2018", "2017", "2016", "2015", "2014"]:

    print()
    print(year)
    df = pd.read_csv("London_" + year + "_mass_results.csv")
    print(len(df[df["Gender"] == "W"]))


2018
16418

2017
15464

2016
15029

2015
14378

2014
13272


In [78]:
df = pd.read_csv("London_2017_mass_results.csv")
# df[df["Name"].str.contains("Kevin")]
# df[df["Runner Number"] == 43227]
# df[df["Club"].str.contains("Kent")].head(20)
df[df["Gender"] == "W"]

Unnamed: 0,Overall Place,Gender Place,Category Place,Name,Club,Runner Number,Gender,Category,Event,Half Time,Finish Time
200,201,1,1.0,"Boniface, Anna (GBR)",Reading...,666,W,18-39,Mass,01:17:21,02:37:07
286,287,2,2.0,"Clements, Amy (GBR)",Kent AC,823,W,18-39,Mass,01:18:31,02:39:11
297,298,3,3.0,"Davis, Julia (GBR)",Winchester...,715,W,18-39,Mass,01:17:21,02:39:27
338,339,4,4.0,"Edwards, Rosie (GBR)",Rotherha...,843,W,18-39,Mass,01:18:51,02:40:49
362,363,5,5.0,"Gyurko, Fanni (HUN)",Central AC,640,W,18-39,Mass,01:19:09,02:41:20
...,...,...,...,...,...,...,...,...,...,...,...
39277,39279,15460,1964.0,"Thatcher, Sydney (FRA)",,61995,W,45-49,Mass,03:38:53,08:44:47
39278,39280,15461,8774.0,"Feasey, Georgina Elizabeth (GBR)",,8331,W,18-39,Mass,,08:50:19
39279,39281,15462,8775.0,"Fisher, Kimberley (USA)",,55435,W,18-39,Mass,,08:52:00
39280,39282,15463,8776.0,"Hewetson, Anna (GBR)",,36065,W,18-39,Mass,03:44:26,08:53:13


In [79]:
# 2021 Elite Results

year = "2021"

df = pd.DataFrame(columns=["Overall Place", "Gender Place", "Category Place", "Name", "Club", "Runner Number",
                           "Gender", "Category", "Event", "Half Time", "Finish Time"])
errors = pd.DataFrame(columns=["li item", "Error"])

for gender in ["M", "W"]:

    url = ("https://results.london-marathon.co.uk/" + year +
           "/?event=ELIT&pid=list&pidp=start&search%5Bsex%5D=" + gender)
    html = urlopen(url)
    soup = BeautifulSoup(html, 'lxml')
    datas = soup.find_all("li")

    print()
    print(f'Reading {gender} in elites')

    for i, data in enumerate(datas):
#         print(data.prettify())
        try:
            place_overall = data.find_all('div', class_="list-field type-place place-secondary hidden-xs numeric")[0].text
            place_gender = data.find('div', class_="list-field type-place place-primary numeric").text
            place_category = data.find_all('div', class_="list-field type-place place-secondary hidden-xs numeric")[1].text
            name = data.find('h4', class_="list-field type-fullname").text
            club = data.find('div', class_="list-field type-field hidden-xs").text[4:]
            runner_no = data.find('div', class_="list-field type-field").text[13:]
            category = data.find('div', class_='list-field type-age_class').text[8:]
            event = data.find('div', class_="list-field type-event_name").text[5:]
            half_time = data.find('div', class_="split list-field type-time hidden-xs").text[5:]
            finish_time = data.find('div', class_="split list-field type-time").text[6:]
            df.loc[len(df.index)] = [place_overall, place_gender, place_category, name, club,
                                     runner_no, gender, category, event, half_time, finish_time]
        except Exception as e:
            errors.loc[len(errors.index)] = [i, e]
            continue

# df[["Overall Place", "Gender Place", "Category Place"]] = df[["Overall Place", "Gender Place",
#                                                               "Category Place"]].astype(int)
df = numeric_places(df)

df = df.sort_values("Overall Place")
df.to_csv("London_2021_elite_results.csv", index=False)

HTTPError: HTTP Error 403: Forbidden

In [None]:
# 2020 Elite Results

year = "2020"

df = pd.DataFrame(columns=["Overall Place", "Gender Place", "Category Place", "Name", "Club", "Runner Number",
                           "Gender", "Category", "Event", "Half Time", "Finish Time"])
errors = pd.DataFrame(columns=["li item", "Error"])

for gender in ["M", "W"]:
    
    url = ("https://results.london-marathon.co.uk/" + year + 
           "/?event=LMR" + gender + 
           "&pid=list&pidp=results_nav&search%5Bsex%5D=" + gender)
    
    html = urlopen(url)
    soup = BeautifulSoup(html, 'lxml')
    datas = soup.find_all("li")

    print()
    print(f'Reading {gender} in elites')

    for i, data in enumerate(datas):
#         print(data.prettify())
        try:
            place_overall = data.find_all('div', class_="list-field type-place place-secondary hidden-xs numeric")[0].text
            place_gender = data.find('div', class_="list-field type-place place-primary numeric").text
            place_category = data.find_all('div', class_="list-field type-place place-secondary hidden-xs numeric")[1].text
            name = data.find('h4', class_="list-field type-eval").text
            club = data.find('div', class_="list-field type-field hidden-xs").text[4:]
            runner_no = data.find('div', class_="list-field type-field").text[14:]
            category = data.find('div', class_='list-field type-age_class').text[8:]
            event = "Elite " + gender
            half_time = ""
            finish_time = data.find('div', class_="list-field type-time").text[6:]
            df.loc[len(df.index)] = [place_overall, place_gender, place_category, name, club,
                                     runner_no, gender, category, event, half_time, finish_time]
        except IndexError:
            continue
#         except Exception as e:
#             errors.loc[len(errors.index)] = [i, e]
#             continue
            
df.to_csv("London_2020_elite_results.csv", index=False)

In [None]:
# 2019 Elite Results

year = "2019"

df = pd.DataFrame(columns=["Overall Place", "Gender Place", "Category Place", "Name", "Club", "Runner Number",
                           "Gender", "Category", "Event", "Half Time", "Finish Time"])
errors = pd.DataFrame(columns=["li item", "Error"])

for gender in ["M", "W"]:
    
    print()
    url = ("https://results.london-marathon.co.uk/" + year + 
           "/?event=ELIT&num_results=100&pid=list&pidp=start&search%5Bsex%5D=" + gender)    
    print(url)
    
    html = urlopen(url)
    soup = BeautifulSoup(html, 'lxml')
    datas = soup.find_all("li")

    print(f'Reading {gender} in elites')

    for i, data in enumerate(datas):
#         print(data.prettify())
        try:
            place_overall = data.find_all('div', class_="list-field type-place place-secondary hidden-xs numeric")[0].text
            place_gender = data.find('div', class_="list-field type-place place-primary numeric").text
            place_category = data.find_all('div', class_="list-field type-place place-secondary hidden-xs numeric")[1].text
            name = data.find('h4', class_="list-field type-fullname").text
            club = data.find('div', class_="list-field type-field hidden-xs").text[4:]
            runner_no = data.find('div', class_="list-field type-field").text[14:]
            category = data.find('div', class_='list-field type-age_class').text[8:]
            event = "Elite " + gender
            half_time = data.find('div', class_="split list-field type-time hidden-xs").text[4:]
            finish_time = data.find('div', class_="split list-field type-time").text[6:]
            df.loc[len(df.index)] = [place_overall, place_gender, place_category, name, club,
                                     runner_no, gender, category, event, half_time, finish_time]
        except IndexError:
            continue
#         except Exception as e:
#             errors.loc[len(errors.index)] = [i, e]
#             continue
            
df.to_csv("London_" + year + "_elite_results.csv", index=False)

In [None]:
# 2018 Elite Results

year = "2018"

df = pd.DataFrame(columns=["Overall Place", "Gender Place", "Category Place", "Name", "Club", "Runner Number",
                           "Gender", "Category", "Event", "Half Time", "Finish Time"])
errors = pd.DataFrame(columns=["li item", "Error"])

for gender in ["M", "W"]:
    
    print()
    url = ("https://results.london-marathon.co.uk/" + year + 
           "/?event=ELIT&num_results=100&pid=list&pidp=start&search%5Bsex%5D=" + gender)    
    print(url)
    
    html = urlopen(url)
    soup = BeautifulSoup(html, 'lxml')
    datas = soup.find_all("tr")
    print(len(datas))

    print(f'Reading {gender} in elites')

    for i, data in enumerate(datas[1:]):
#         print(data.prettify())
        try:
            place_overall = data.find_all('td')[0].text
            place_gender = data.find_all('td')[1].text
            place_category = data.find_all('td')[2].text
            name = data.find_all('td')[3].text[1:-1]
            club = data.find_all('td')[4].text
            runner_no = data.find_all('td')[5].text
            category = data.find_all('td')[6].text
            event = "Elite"
            half_time = data.find_all('td')[7].text
            finish_time = data.find_all('td')[8].text
            df.loc[len(df.index)] = [place_overall, place_gender, place_category, name, club,
                                     runner_no, gender, category, event, half_time, finish_time]
        except IndexError:
            continue
#         except Exception as e:
# #             errors.loc[len(errors.index)] = [i, e]
#             print(i, e)
#             continue
# df[["Overall Place", "Gender Place", "Category Place"]] = df[["Overall Place", "Gender Place",
#                                                               "Category Place"]].astype(int) 
df = numeric_places(df)

df = df.sort_values("Overall Place")            
df.to_csv("London_" + year + "_elite_results.csv", index=False)

In [None]:
# 2017 Elite Results

year = "2017"

df = pd.DataFrame(columns=["Overall Place", "Gender Place", "Category Place", "Name", "Club", "Runner Number",
                           "Gender", "Category", "Event", "Half Time", "Finish Time"])
errors = pd.DataFrame(columns=["li item", "Error"])

for gender in ["M", "W"]:
    
    print()
    url = ("https://results.london-marathon.co.uk/" + year + 
           "/?event=ELIT&num_results=100&pid=list&pidp=start&search%5Bsex%5D=" + gender)    
    print(url)
    
    html = urlopen(url)
    soup = BeautifulSoup(html, 'lxml')
    datas = soup.find_all("tr")
    print(len(datas))

    print(f'Reading {gender} in elites')

    for i, data in enumerate(datas[1:]):
#         print(data.prettify())
        try:
            place_overall = data.find_all('td')[0].text
            place_gender = data.find_all('td')[1].text
            place_category = data.find_all('td')[2].text
            name = data.find_all('td')[3].text[1:-1]
            club = data.find_all('td')[4].text
            runner_no = data.find_all('td')[5].text
            category = data.find_all('td')[6].text
            event = "Elite"
            half_time = data.find_all('td')[7].text
            finish_time = data.find_all('td')[8].text
            df.loc[len(df.index)] = [place_overall, place_gender, place_category, name, club,
                                     runner_no, gender, category, event, half_time, finish_time]
        except IndexError:
            continue
#         except Exception as e:
# #             errors.loc[len(errors.index)] = [i, e]
#             print(i, e)
#             continue
# df[["Overall Place", "Gender Place", "Category Place"]] = df[["Overall Place", "Gender Place",
#                                                               "Category Place"]].astype(int)
df = numeric_places(df)
                                                    
df = df.sort_values("Overall Place")            
df.to_csv("London_" + year + "_elite_results.csv", index=False)

In [None]:
# 2016 Elite Results

year = "2016"

df = pd.DataFrame(columns=["Overall Place", "Gender Place", "Category Place", "Name", "Club", "Runner Number",
                           "Gender", "Category", "Event", "Half Time", "Finish Time"])
errors = pd.DataFrame(columns=["li item", "Error"])

for gender in ["M", "W"]:
    
    print()
    url = ("https://results.london-marathon.co.uk/" + year + 
           "/?event=ELIT&num_results=100&pid=list&pidp=start&search%5Bsex%5D=" + gender)    
    print(url)
    
    html = urlopen(url)
    soup = BeautifulSoup(html, 'lxml')
    datas = soup.find_all("tr")
    print(len(datas))

    print(f'Reading {gender} in elites')

    for i, data in enumerate(datas[1:]):
#         print(data.prettify())
        try:
            place_overall = data.find_all('td')[0].text
            place_gender = data.find_all('td')[1].text
            place_category = data.find_all('td')[2].text
            name = data.find_all('td')[3].text[1:-1]
            club = data.find_all('td')[4].text
            runner_no = data.find_all('td')[5].text
            category = data.find_all('td')[6].text
            event = "Elite"
            half_time = data.find_all('td')[7].text
            finish_time = data.find_all('td')[8].text
            df.loc[len(df.index)] = [place_overall, place_gender, place_category, name, club,
                                     runner_no, gender, category, event, half_time, finish_time]
        except IndexError:
            continue
#         except Exception as e:
# #             errors.loc[len(errors.index)] = [i, e]
#             print(i, e)
#             continue
# df[["Overall Place", "Gender Place", "Category Place"]] = df[["Overall Place", "Gender Place",
#                                                               "Category Place"]].astype(int)
df = numeric_places(df)

df = df.sort_values("Overall Place")            
df.to_csv("London_" + year + "_elite_results.csv", index=False)

print("end")

In [None]:
df = pd.read_csv("London_2016_elite_results.csv")
df

In [None]:
# 2015 Elite Results

year = "2015"

df = pd.DataFrame(columns=["Overall Place", "Gender Place", "Category Place", "Name", "Club", "Runner Number",
                           "Gender", "Category", "Event", "Half Time", "Finish Time"])
errors = pd.DataFrame(columns=["li item", "Error"])

for gender in ["M", "W"]:
    
    print()
    url = ("https://results.london-marathon.co.uk/" + year + 
           "/?event=ELIT&num_results=100&pid=list&pidp=start&search%5Bsex%5D=" + gender)    
    print(url)
    
    html = urlopen(url)
    soup = BeautifulSoup(html, 'lxml')
    datas = soup.find_all("tr")
    print(len(datas))

    print(f'Reading {gender} in elites')

    for i, data in enumerate(datas[1:]):
#         print(data.prettify())
        try:
            place_overall = data.find_all('td')[0].text
            place_gender = data.find_all('td')[1].text
            place_category = data.find_all('td')[2].text
            name = data.find_all('td')[3].text[1:-1]
            club = data.find_all('td')[4].text
            runner_no = data.find_all('td')[5].text
            category = data.find_all('td')[6].text
            event = "Elite"
            half_time = data.find_all('td')[7].text
            finish_time = data.find_all('td')[8].text
            df.loc[len(df.index)] = [place_overall, place_gender, place_category, name, club,
                                     runner_no, gender, category, event, half_time, finish_time]
        except IndexError:
            continue
#         except Exception as e:
# #             errors.loc[len(errors.index)] = [i, e]
#             print(i, e)
#             continue

df = numeric_places(df)

df = df.sort_values("Overall Place")            
df.to_csv("London_" + year + "_elite_results.csv", index=False)

print("end")

In [None]:
df = pd.read_csv("London_2015_elite_results.csv")
df

In [None]:
# 2014 Elite Results

year = "2014"

df = pd.DataFrame(columns=["Overall Place", "Gender Place", "Category Place", "Name", "Club", "Runner Number",
                           "Gender", "Category", "Event", "Half Time", "Finish Time"])
errors = pd.DataFrame(columns=["li item", "Error"])

for gender in ["M", "W"]:
    
    print()
    url = ("https://results.london-marathon.co.uk/" + year + 
           "/?event=ELIT&num_results=100&pid=list&pidp=start&search%5Bsex%5D=" + gender)    
    print(url)
    
    html = urlopen(url)
    soup = BeautifulSoup(html, 'lxml')
    datas = soup.find_all("tr")
    print(len(datas))

    print(f'Reading {gender} in elites')

    for i, data in enumerate(datas[1:]):
#         print(data.prettify())
        try:
            place_overall = data.find_all('td')[0].text
            place_gender = data.find_all('td')[1].text
            place_category = data.find_all('td')[2].text
            name = data.find_all('td')[3].text[1:]
            club = data.find_all('td')[5].text
            runner_no = data.find_all('td')[6].text
            category = data.find_all('td')[7].text
            event = "Mass"
            half_time = data.find_all('td')[8].text
            finish_time = data.find_all('td')[9].text
            df.loc[len(df.index)] = [place_overall, place_gender, place_category, name, club,
                                     runner_no, gender, category, event, half_time, finish_time]
        except IndexError:
            continue
#         except Exception as e:
# #             errors.loc[len(errors.index)] = [i, e]
#             print(i, e)
#             continue

df = numeric_places(df)
                                                  
df = df.sort_values("Overall Place")            
df.to_csv("London_" + year + "_elite_results.csv", index=False)

print("end")

In [None]:
df = pd.read_csv("London_2014_elite_results.csv")
df