# 🛠️ Scraping template for race results (pagination + CSV saving)

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

# ---------------------- CONFIGURATION ----------------------

In [3]:
BASE_URL = 'https://example.com/race-results?page='  # Example, change for real project
START_PAGE = 1
END_PAGE = 5  # Will update dynamically if needed
DELAY_RANGE = (1, 3)  # Seconds between requests

# Output CSV filenames
RESULTS_CSV = 'race_results.csv'
RACE_INFO_CSV = 'race_info.csv'

# ---------------------- DATA STORAGE ----------------------

In [4]:
race_results = []
race_info = []

# ---------------------- SCRAPING FUNCTION ----------------------

In [5]:
def scrape_page(url):
    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    if response.status_code != 200:
        print(f"Failed to fetch {url}")
        return None
    return BeautifulSoup(response.text, 'html.parser')

# ---------------------- MAIN LOOP ----------------------

In [None]:
for page in range(START_PAGE, END_PAGE + 1):
    print(f"Scraping page {page}...")
    url = f"{BASE_URL}{page}"
    soup = scrape_page(url)
    if soup is None:
        continue

    # Find table rows (adapt to real HTML structure)
    table_rows = soup.find_all('tr')
    
    for row in table_rows[1:]:  # Skip header
        cols = row.find_all('td')
        if len(cols) > 5:  # Check if expected number of columns exist
            race_results.append({
                'Name': cols[0].text.strip(),
                'Age': cols[1].text.strip(),
                'Gender': cols[2].text.strip(),
                'Finish Time': cols[3].text.strip(),
                'Overall Place': cols[4].text.strip(),
                'Division': cols[5].text.strip(),
                'City/State/Country': cols[6].text.strip() if len(cols) > 6 else ''
            })

    # Delay to not overload server
    time.sleep(random.uniform(*DELAY_RANGE))

# Example static race info (can also be parsed)
race_info.append({
    'Race Name': 'Sample Race',
    'Date': '2024-01-01',
    'Location': 'New York, USA',
    'Distance': '10K'
})

# ---------------------- SAVE RESULTS ----------------------

In [None]:
# ---------------------- SAVE RESULTS ----------------------
pd.DataFrame(race_results).to_csv(f'data/{RESULTS_CSV}', index=False)
pd.DataFrame(race_info).to_csv(f'data/{RACE_INFO_CSV}', index=False)

print(f"✅ Scraping complete! Data saved in 'data/' folder.")