# Brighton Marathon Results Scraper

This notebook scrapes the Brighton Marathon results from the official website, including split times for each runner.

In [9]:
from src.Scraper import Scraper
from itertools import product

import pandas as pd
import logging

# Configure the logger
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

In [10]:
#MARATHON_URL = "https://brighton.r.mikatiming.com/2025/"
#RUNNING_DATA_PATH = "data/running_data_2025.csv"
#INDIVIDUAL_DATA_PATH = "data/individual_running_data_2025.csv"

MARATHON_URL = "https://brighton.r.mikatiming.com/2024/"
RUNNING_DATA_PATH = "data/running_data_2024.csv"
INDIVIDUAL_DATA_PATH = "data/individual_running_data_2024.csv"


In [11]:
def get_page_url(page: int, age: int, sex: str):
 return f"https://brighton.r.mikatiming.com/2025/?page={page}&event=BRMA&pid=search&search%5Bage_class%5D={age}&search%5Bsex%5D={sex}&search%5Bnation%5D=%25&search_sort=name"

In [12]:
scraper = Scraper()

2025-04-18 10:53:17 - INFO - Get LATEST chromedriver version for google-chrome
2025-04-18 10:53:17 - INFO - Get LATEST chromedriver version for google-chrome
2025-04-18 10:53:17 - INFO - Driver [C:\Users\norri\.wdm\drivers\chromedriver\win64\135.0.7049.95\chromedriver-win32/chromedriver.exe] found in cache


In [13]:
search_params = {
    'age' : [18, 40, 45, 50, 55, 60, 65, 70, 75],
    'sex' : ['M', 'W', 'D']  
    }

# Get Main Data and hyperlinks

In [14]:
def scrape_all_pages(params: dict) -> pd.DataFrame:

    params_running_data = pd.DataFrame()

    page_number = 1
    last_data_frame_size = 1

    while(last_data_frame_size > 0 ):
        page_data = scraper.scrape_runner_data(get_page_url(page_number, params['age'], params['sex']))
        params_running_data = pd.concat([params_running_data, page_data], ignore_index=True)

        page_number += 1
        last_data_frame_size = page_data.shape[0]

        if page_number % 10 == 0:
            logging.info(f'page {page_number} scraped. Results size: {params_running_data.shape[0]}')

    params_running_data['sex'] = params['sex']
    params_running_data['age'] = params['age']

    return params_running_data

In [15]:

running_data = pd.DataFrame()

keys = search_params.keys()
for values in product(*search_params.values()):
    combo = dict(zip(keys, values))
    
    logging.info(f'scraping pages with params: {combo}')

    running_data = pd.concat([running_data, scrape_all_pages(combo)])


2025-04-18 10:53:19 - INFO - scraping pages with params: {'age': 18, 'sex': 'M'}
2025-04-18 10:53:37 - INFO - page 10 scraped. Results size: 225
2025-04-18 10:53:46 - INFO - page 20 scraped. Results size: 475
2025-04-18 10:53:54 - INFO - page 30 scraped. Results size: 725
2025-04-18 10:54:01 - INFO - page 40 scraped. Results size: 975
2025-04-18 10:54:10 - INFO - page 50 scraped. Results size: 1225
2025-04-18 10:54:18 - INFO - page 60 scraped. Results size: 1475
2025-04-18 10:54:28 - INFO - page 70 scraped. Results size: 1725
2025-04-18 10:54:37 - INFO - page 80 scraped. Results size: 1975
2025-04-18 10:54:46 - INFO - page 90 scraped. Results size: 2225
2025-04-18 10:54:56 - INFO - page 100 scraped. Results size: 2475
2025-04-18 10:55:05 - INFO - page 110 scraped. Results size: 2725
2025-04-18 10:55:14 - INFO - page 120 scraped. Results size: 2975
2025-04-18 10:55:26 - INFO - page 130 scraped. Results size: 3225
2025-04-18 10:55:36 - INFO - page 140 scraped. Results size: 3475
2025-04-



2025-04-18 10:58:37 - INFO - page 10 scraped. Results size: 225
2025-04-18 10:58:45 - INFO - page 20 scraped. Results size: 475
2025-04-18 10:58:53 - INFO - page 30 scraped. Results size: 725
2025-04-18 10:59:01 - INFO - page 40 scraped. Results size: 975
2025-04-18 10:59:10 - INFO - page 50 scraped. Results size: 1225
2025-04-18 10:59:21 - INFO - page 60 scraped. Results size: 1475
2025-04-18 10:59:31 - INFO - page 70 scraped. Results size: 1725
2025-04-18 10:59:40 - INFO - page 80 scraped. Results size: 1975
2025-04-18 10:59:48 - INFO - page 90 scraped. Results size: 2225
2025-04-18 10:59:56 - INFO - page 100 scraped. Results size: 2475
2025-04-18 11:00:06 - INFO - page 110 scraped. Results size: 2725
2025-04-18 11:00:23 - INFO - page 120 scraped. Results size: 2975
2025-04-18 11:00:31 - INFO - page 130 scraped. Results size: 3225
2025-04-18 11:00:40 - INFO - page 140 scraped. Results size: 3475
2025-04-18 11:00:47 - INFO - page 150 scraped. Results size: 3725
2025-04-18 11:00:56 - I



2025-04-18 11:01:27 - INFO - scraping pages with params: {'age': 40, 'sex': 'M'}




2025-04-18 11:01:34 - INFO - page 10 scraped. Results size: 225
2025-04-18 11:01:42 - INFO - page 20 scraped. Results size: 475
2025-04-18 11:01:50 - INFO - page 30 scraped. Results size: 725
2025-04-18 11:02:00 - INFO - page 40 scraped. Results size: 975
2025-04-18 11:02:11 - INFO - page 50 scraped. Results size: 1225
2025-04-18 11:02:22 - INFO - page 60 scraped. Results size: 1451
2025-04-18 11:02:23 - INFO - scraping pages with params: {'age': 40, 'sex': 'W'}




2025-04-18 11:02:30 - INFO - page 10 scraped. Results size: 225
2025-04-18 11:02:39 - INFO - page 20 scraped. Results size: 475
2025-04-18 11:02:49 - INFO - page 30 scraped. Results size: 725
2025-04-18 11:02:51 - INFO - scraping pages with params: {'age': 40, 'sex': 'D'}




2025-04-18 11:02:52 - INFO - scraping pages with params: {'age': 45, 'sex': 'M'}




2025-04-18 11:02:59 - INFO - page 10 scraped. Results size: 225
2025-04-18 11:03:08 - INFO - page 20 scraped. Results size: 475
2025-04-18 11:03:17 - INFO - page 30 scraped. Results size: 725
2025-04-18 11:03:27 - INFO - page 40 scraped. Results size: 975
2025-04-18 11:03:37 - INFO - page 50 scraped. Results size: 1204
2025-04-18 11:03:38 - INFO - scraping pages with params: {'age': 45, 'sex': 'W'}




2025-04-18 11:03:44 - INFO - page 10 scraped. Results size: 225
2025-04-18 11:03:53 - INFO - page 20 scraped. Results size: 475
2025-04-18 11:04:04 - INFO - scraping pages with params: {'age': 45, 'sex': 'D'}




2025-04-18 11:04:04 - INFO - scraping pages with params: {'age': 50, 'sex': 'M'}




2025-04-18 11:04:11 - INFO - page 10 scraped. Results size: 225
2025-04-18 11:04:21 - INFO - page 20 scraped. Results size: 475
2025-04-18 11:04:29 - INFO - page 30 scraped. Results size: 725
2025-04-18 11:04:41 - INFO - page 40 scraped. Results size: 975
2025-04-18 11:04:43 - INFO - scraping pages with params: {'age': 50, 'sex': 'W'}




2025-04-18 11:04:51 - INFO - page 10 scraped. Results size: 225
2025-04-18 11:05:01 - INFO - page 20 scraped. Results size: 475
2025-04-18 11:05:05 - INFO - scraping pages with params: {'age': 50, 'sex': 'D'}




2025-04-18 11:05:06 - INFO - scraping pages with params: {'age': 55, 'sex': 'M'}




2025-04-18 11:05:14 - INFO - page 10 scraped. Results size: 225
2025-04-18 11:05:23 - INFO - page 20 scraped. Results size: 475
2025-04-18 11:05:30 - INFO - scraping pages with params: {'age': 55, 'sex': 'W'}




2025-04-18 11:05:38 - INFO - page 10 scraped. Results size: 225
2025-04-18 11:05:44 - INFO - scraping pages with params: {'age': 55, 'sex': 'D'}




2025-04-18 11:05:44 - INFO - scraping pages with params: {'age': 60, 'sex': 'M'}




2025-04-18 11:05:55 - INFO - page 10 scraped. Results size: 225
2025-04-18 11:06:01 - INFO - scraping pages with params: {'age': 60, 'sex': 'W'}




2025-04-18 11:06:07 - INFO - scraping pages with params: {'age': 60, 'sex': 'D'}




2025-04-18 11:06:08 - INFO - scraping pages with params: {'age': 65, 'sex': 'M'}




2025-04-18 11:06:14 - INFO - scraping pages with params: {'age': 65, 'sex': 'W'}




2025-04-18 11:06:17 - INFO - scraping pages with params: {'age': 65, 'sex': 'D'}




2025-04-18 11:06:17 - INFO - scraping pages with params: {'age': 70, 'sex': 'M'}




2025-04-18 11:06:20 - INFO - scraping pages with params: {'age': 70, 'sex': 'W'}




2025-04-18 11:06:21 - INFO - scraping pages with params: {'age': 70, 'sex': 'D'}




2025-04-18 11:06:22 - INFO - scraping pages with params: {'age': 75, 'sex': 'M'}




2025-04-18 11:06:23 - INFO - scraping pages with params: {'age': 75, 'sex': 'W'}




2025-04-18 11:06:24 - INFO - scraping pages with params: {'age': 75, 'sex': 'D'}




In [16]:
running_data.to_csv(RUNNING_DATA_PATH)

# Get Individual Times

In [17]:
running_data.head()

Unnamed: 0,bib_number,name,finish_time,hyper_link,sex,age
0,15554,Matt Aaronson,03:51:13,?content=detail&fpid=search&pid=search&idp=9TG...,M,18
1,35638,Younnis Abbad-Andaloussi,–,?content=detail&fpid=search&pid=search&idp=9TG...,M,18
2,16565,Aaron Abbott,–,?content=detail&fpid=search&pid=search&idp=9TG...,M,18
3,32897,Adam Abbott,05:18:58,?content=detail&fpid=search&pid=search&idp=9TG...,M,18
4,20095,Alexander Abbott,03:21:27,?content=detail&fpid=search&pid=search&idp=9TG...,M,18


In [18]:
running_data = pd.read_csv(RUNNING_DATA_PATH)

In [19]:
individual_running_data = pd.DataFrame()

for index, row in running_data.iterrows():
    ind_data = scraper.scrape_individual_data(MARATHON_URL + row['hyper_link'])
    ind_data['bib_number'] = row['bib_number']

    individual_running_data = pd.concat([individual_running_data, ind_data])

    if index % 100 == 0:
        logging.info(f'index {index} scraped. Results size: {individual_running_data.shape[0]}')
    

2025-04-18 11:06:25 - INFO - index 0 scraped. Results size: 0
2025-04-18 11:06:55 - INFO - index 100 scraped. Results size: 0
2025-04-18 11:07:25 - INFO - index 200 scraped. Results size: 0
2025-04-18 11:07:55 - INFO - index 300 scraped. Results size: 0
2025-04-18 11:08:26 - INFO - index 400 scraped. Results size: 0
2025-04-18 11:08:57 - INFO - index 500 scraped. Results size: 0
2025-04-18 11:09:29 - INFO - index 600 scraped. Results size: 0
2025-04-18 11:10:01 - INFO - index 700 scraped. Results size: 0
2025-04-18 11:10:34 - INFO - index 800 scraped. Results size: 0
2025-04-18 11:11:09 - INFO - index 900 scraped. Results size: 0
2025-04-18 11:11:44 - INFO - index 1000 scraped. Results size: 0
2025-04-18 11:12:19 - INFO - index 1100 scraped. Results size: 0
2025-04-18 11:12:52 - INFO - index 1200 scraped. Results size: 0
2025-04-18 11:13:30 - INFO - index 1300 scraped. Results size: 0
2025-04-18 11:14:08 - INFO - index 1400 scraped. Results size: 0


KeyboardInterrupt: 

In [8]:
individual_running_data.to_csv(INDIVIDUAL_DATA_PATH)