# Brighton Marathon Results Scraper

This notebook scrapes the Brighton Marathon results from the official website, including split times for each runner.

In [3]:
from src.Scraper import Scraper
from itertools import product

import pandas as pd
import logging

# Configure the logger
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

In [4]:
MARATHON_URL = "https://brighton.r.mikatiming.com/2025/"
RUNNING_DATA_PATH = "data/running_data_2025.csv"
INDIVIDUAL_DATA_PATH = "data/individual_running_data_2025.csv"

#MARATHON_URL = "https://brighton.r.mikatiming.com/2024/"
#RUNNING_DATA_PATH = "data/running_data_2024.csv"
#INDIVIDUAL_DATA_PATH = "data/individual_running_data_2024.csv"


In [4]:
def get_page_url(page: int, age: int, sex: str):
 return f"https://brighton.r.mikatiming.com/2025/?page={page}&event=BRMA&pid=search&search%5Bage_class%5D={age}&search%5Bsex%5D={sex}&search%5Bnation%5D=%25&search_sort=name"

In [None]:
scraper = Scraper()

In [11]:
search_params = {
    'age' : [18, 40, 45, 50, 55, 60, 65, 70, 75],
    'sex' : ['M', 'W', 'D']  
    }

# Get Main Data and hyperlinks

In [12]:
def scrape_all_pages(params: dict) -> pd.DataFrame:

    params_running_data = pd.DataFrame()

    page_number = 1
    last_data_frame_size = 1

    while(last_data_frame_size > 0 ):
        page_data = scraper.scrape_runner_data(get_page_url(page_number, params['age'], params['sex']))
        params_running_data = pd.concat([params_running_data, page_data], ignore_index=True)

        page_number += 1
        last_data_frame_size = page_data.shape[0]

        if page_number % 10 == 0:
            logging.info(f'page {page_number} scraped. Results size: {params_running_data.shape[0]}')

    params_running_data['sex'] = params['sex']
    params_running_data['age'] = params['age']

    return params_running_data

In [None]:

running_data = pd.DataFrame()

keys = search_params.keys()
for values in product(*search_params.values()):
    combo = dict(zip(keys, values))
    
    logging.info(f'scraping pages with params: {combo}')

    running_data = pd.concat([running_data, scrape_all_pages(combo)])


In [19]:
running_data.to_csv(RUNNING_DATA_PATH)

# Get Individual Times

In [None]:
running_data.head()

In [6]:
running_data = pd.read_csv(RUNNING_DATA_PATH)

In [None]:
individual_running_data = pd.DataFrame()

for index, row in running_data.iterrows():
    ind_data = scraper.scrape_individual_data(MARATHON_URL + row['hyper_link'])
    ind_data['bib_number'] = row['bib_number']

    individual_running_data = pd.concat([individual_running_data, ind_data])

    if index % 100 == 0:
        logging.info(f'index {index} scraped. Results size: {individual_running_data.shape[0]}')
    

In [8]:
individual_running_data.to_csv(INDIVIDUAL_DATA_PATH)