In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


def fetch_initial_data(years=None):
    # Fetch initial data for available seasons and meetings from the website
    url = "https://www.gt-world-challenge-europe.com/results?filter_season_id=0&filter_meeting_id=0&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Extract available seasons
    seasons_options = soup.select('#filter_season_id option')
    seasons = {option.text.strip(): option['value'] for option in seasons_options if option['value']}

    # Filter seasons by specified years, if provided
    if years:
        seasons = {year: seasons[year] for year in years if year in seasons}
    
    # Extract available meetings
    meetings_options = soup.select('#filter_meeting_id option')
    meetings = {option.text.strip(): option['value'] for option in meetings_options if option['value']}
    
    return seasons, meetings


def parse_years(input_years):
    # Parse input years, supporting single year, list of years, or a year range
    if isinstance(input_years, list):
        return [str(year) for year in input_years]
    elif isinstance(input_years, str) and '-' in input_years:
        start_year, end_year = map(int, input_years.split('-'))
        return [str(year) for year in range(start_year, end_year - 1, -1)]
    elif input_years:
        return [str(input_years)]
    else:
        return None


def get_race_data(input_years=None):
    # Get race data for specified years or all years if none specified
    base_url = "https://www.gt-world-challenge-europe.com/results"
    years = parse_years(input_years)
    seasons, meetings = fetch_initial_data(years)
    results = []

    for season_name, season_id in seasons.items():
        for meeting_name, meeting_id in meetings.items():
            # Fetch race options for each season and meeting combination
            url = f"{base_url}?filter_season_id={season_id}&filter_meeting_id={meeting_id}&filter_race_id="
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
            race_options = soup.select('#filter_race_id option')

            races = []
            for option in race_options:
                if option['value']:  # Exclude empty values
                    races.append({"race_id": option['value'], "name": option.text.strip()})
            
            # Append season, meeting, and races information to the results
            results.append({
                "season": season_name,
                "season_id": season_id,
                "meeting": meeting_name,
                "meeting_id": meeting_id,
                "races": races
            })

    return results


def load_race_data(input_years=None):
    # Load race data, optionally filtered by years
    years = parse_years(input_years)  # Use parse_years to handle the input
    all_data = get_race_data(years)  # Fetch data using existing filtering logic
    return all_data


def get_race_results(filter_season, filter_meeting, filter_race, race_data):
    # Fetch race results for a specific season, meeting, and race using preloaded data
    # Find corresponding IDs for season, meeting, and race
    season_id, meeting_id, race_id = None, None, None
    for data in race_data:
        if data['season'] == filter_season:
            season_id = data['season_id']
            if data['meeting'] == filter_meeting:
                meeting_id = data['meeting_id']
                for race in data['races']:
                    if race['name'] == filter_race:
                        race_id = race['race_id']
                        break
                break
    
    if not season_id or not meeting_id or not race_id:
        raise ValueError("One of the parameters was not found. Please check the accuracy of the entered data.")
    
    # Form and execute the request with the found IDs to fetch race data
    final_url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season_id}&filter_meeting_id={meeting_id}&filter_race_id={race_id}"
    response = requests.get(final_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Parse the race data
    rows = soup.select('.table__body tr')
    data = []
    for row in rows:
        cols = row.find_all('td')
        if not cols:  # Skip completely empty rows
            continue
        row_data = [col.text.strip() for col in cols]
        while len(row_data) < 9:  # Ensure each row matches the expected number of columns
            row_data.append(None)
        data.append(row_data)
    
    # Create the DataFrame with the specified columns
    df_columns = ['Pos', 'Car #', 'Class', 'Drivers', 'Team', 'Car', 'Time', 'Laps', 'Gap']
    df = pd.DataFrame(data, columns=df_columns if data else [])
    
    return df


def summarize_race_data(race_data):
    # Preparing an empty list to store information about each race
    summary_data = []

    # We go through all the downloaded data to collect information
    for data in race_data:
        season = data['season']
        meeting = data['meeting']
        for race in data['races']:
            race_name = race['name']
            # Adding race information to the list
            summary_data.append({
                'Season': season,
                'Meeting': meeting,
                'Race Name': race_name
            })

    # Creating a DataFrame from the collected information
    summary_df = pd.DataFrame(summary_data)

    return summary_df


In [2]:
my_race_data = load_race_data(2023)

In [9]:
race_summary_df = summarize_race_data(my_race_data)

race_summary_df.head(5)

Unnamed: 0,Season,Meeting,Race Name
0,2023,Monza,Main Race after 2.30 hours
1,2023,Monza,Main Race after 1.30 hour
2,2023,Monza,Main Race after 30 mins
3,2023,Monza,Main Race
4,2023,Monza,Combined Qualifying


In [7]:
df = get_race_results("2023", "Barcelona", "Main Race", my_race_data)

df.head()

Unnamed: 0,Pos,Car #,Class,Drivers,Team,Car,Time,Laps,Gap
0,1,51,Pro Cup,"Alessio Rovera, Robert Shwartzman, Nicklas Nie...",AF Corse - Francorchamps Motors,Ferrari 296 GT3,1:41.540,88,
1,2,71,Pro Cup,"Antonio Fuoco, Daniel Serra, Davide Rigon",AF Corse - Francorchamps Motors,Ferrari 296 GT3,1:41.665,88,0.422
2,3,96,Pro Cup,"Thomas Preining, Laurin Heinrich, Dennis Olsen",Rutronik Racing,Porsche 911 GT3 R (992),1:41.562,88,2.328
3,4,777,Pro Cup,"Maro Engel, Luca Stolz, Fabian Schiller",Mercedes-AMG Team AlManar,Mercedes-AMG GT3 EVO,1:41.467,88,2.77
4,5,88,Pro Cup,"Raffaele Marciello, Jules Gounon, Timur Bogusl...",AKKODIS ASP Team,Mercedes-AMG GT3 EVO,1:42.162,88,4.413
