# Brighton Marathon Results Scraper

This notebook scrapes the Brighton Marathon results from the official website, including split times for each runner.

In [1]:
from src.Scraper import Scraper

import pandas as pd
import logging

# Configure the logger
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

In [11]:
#MARATHON_URL = "https://brighton.r.mikatiming.com/2025/"
#RUNNING_DATA_PATH = "data/running_data_2025.csv"
#INDIVIDUAL_DATA_PATH = "data/individual_running_data_2025.csv"

MARATHON_URL = "https://brighton.r.mikatiming.com/2024/"
RUNNING_DATA_PATH = "data/running_data_2024.csv"
INDIVIDUAL_DATA_PATH = "data/individual_running_data_2024.csv"


In [12]:
def get_page_url(page: int):
    return MARATHON_URL + f"?page={page}&event=BRMA&pid=list"

In [13]:
scraper = Scraper()

2025-04-13 15:26:36 - INFO - Get LATEST chromedriver version for google-chrome
2025-04-13 15:26:36 - INFO - Get LATEST chromedriver version for google-chrome
2025-04-13 15:26:37 - INFO - Driver [C:\Users\norri\.wdm\drivers\chromedriver\win64\135.0.7049.84\chromedriver-win32/chromedriver.exe] found in cache


# Get Main Data and hyperlinks

In [14]:
running_data = pd.DataFrame()

page_number = 1
last_data_frame_size = 1

while(last_data_frame_size > 0 ):
    page_data = scraper.scrape_runner_data(get_page_url(page_number))
    running_data = pd.concat([running_data, page_data], ignore_index=True)

    page_number += 1
    last_data_frame_size = page_data.shape[0]

    if page_number % 10 == 0:
        logging.info(f'page {page_number} scraped. Results size: {running_data.shape[0]}')



2025-04-13 15:26:47 - INFO - page 10 scraped. Results size: 225
2025-04-13 15:26:54 - INFO - page 20 scraped. Results size: 475
2025-04-13 15:27:02 - INFO - page 30 scraped. Results size: 725
2025-04-13 15:27:09 - INFO - page 40 scraped. Results size: 975
2025-04-13 15:27:18 - INFO - page 50 scraped. Results size: 1225
2025-04-13 15:27:27 - INFO - page 60 scraped. Results size: 1475
2025-04-13 15:27:36 - INFO - page 70 scraped. Results size: 1725
2025-04-13 15:27:45 - INFO - page 80 scraped. Results size: 1975
2025-04-13 15:27:54 - INFO - page 90 scraped. Results size: 2225
2025-04-13 15:28:04 - INFO - page 100 scraped. Results size: 2475
2025-04-13 15:28:15 - INFO - page 110 scraped. Results size: 2725
2025-04-13 15:28:24 - INFO - page 120 scraped. Results size: 2975
2025-04-13 15:28:32 - INFO - page 130 scraped. Results size: 3225
2025-04-13 15:28:43 - INFO - page 140 scraped. Results size: 3475
2025-04-13 15:28:52 - INFO - page 150 scraped. Results size: 3725
2025-04-13 15:29:01 - I



In [15]:
running_data.to_csv(RUNNING_DATA_PATH)

# Get Individual Times

In [16]:
running_data = pd.read_csv(RUNNING_DATA_PATH)

In [21]:
individual_running_data = pd.DataFrame()

for index, row in running_data.iterrows():
    ind_data = scraper.scrape_individual_data(MARATHON_URL + row['hyper_link'])
    ind_data['bib_number'] = row['bib_number']

    individual_running_data = pd.concat([individual_running_data, ind_data])

    if index % 100 == 0:
        logging.info(f'index {index} scraped. Results size: {individual_running_data.shape[0]}')
    


2025-04-13 15:35:50 - INFO - index 0 scraped. Results size: 10
2025-04-13 15:36:22 - INFO - index 100 scraped. Results size: 1010
2025-04-13 15:36:52 - INFO - index 200 scraped. Results size: 2010
2025-04-13 15:37:24 - INFO - index 300 scraped. Results size: 3010
2025-04-13 15:37:55 - INFO - index 400 scraped. Results size: 4010
2025-04-13 15:38:26 - INFO - index 500 scraped. Results size: 5010
2025-04-13 15:38:57 - INFO - index 600 scraped. Results size: 6010
2025-04-13 15:39:31 - INFO - index 700 scraped. Results size: 7010
2025-04-13 15:40:06 - INFO - index 800 scraped. Results size: 8010
2025-04-13 15:40:40 - INFO - index 900 scraped. Results size: 9010
2025-04-13 15:41:16 - INFO - index 1000 scraped. Results size: 10010
2025-04-13 15:41:51 - INFO - index 1100 scraped. Results size: 11010
2025-04-13 15:42:29 - INFO - index 1200 scraped. Results size: 12010
2025-04-13 15:43:06 - INFO - index 1300 scraped. Results size: 13010
2025-04-13 15:43:43 - INFO - index 1400 scraped. Results s

In [22]:
individual_running_data.to_csv(INDIVIDUAL_DATA_PATH)