In [49]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup



def parse_years(input_years):
    # Processes input parameters for years, supporting specific years, lists, and ranges.
    if isinstance(input_years, list):
        return [str(year) for year in input_years]
    elif isinstance(input_years, str) and '-' in input_years:
        start_year, end_year = map(int, input_years.split('-'))
        return [str(year) for year in range(start_year, end_year + 1)]  # Including the end year
    elif isinstance(input_years, int):
        return [str(input_years)]  # Convert a single year into a list of strings
    return None



def get_seasons():
    url = "https://www.gt-world-challenge-europe.com/results?filter_season_id=0&filter_meeting_id=0&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    seasons_options = soup.select('#filter_season_id option')
    seasons = {option.text.strip(): option['value'] for option in seasons_options if option['value']}
    
    return seasons



def get_meetings(season_id):
    url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season_id}&filter_meeting_id=0&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    meetings_options = soup.select('#filter_meeting_id option')
    meetings = {option.text.strip(): option['value'] for option in meetings_options if option['value']}
    
    return meetings



def get_races(season_id, meeting_id):
    url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season_id}&filter_meeting_id={meeting_id}&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extracting all available race options from the dropdown list on the page
    races_options = soup.select('#filter_race_id option')
    
    # Skipping the first item as it's just a label, not an actual race
    races = [{'name': option.text.strip(), 'race_id': option['value']} for option in races_options if option['value']]
    
    return races



def load_race_data(input_years=None):
    all_data = []
    seasons = get_seasons()

    years = parse_years(input_years)
    filtered_seasons = {year: id for year, id in seasons.items() if not years or year in years}

    for season, season_id in filtered_seasons.items():
        meetings = get_meetings(season_id)
        for meeting, meeting_id in meetings.items():
            races = get_races(season_id, meeting_id)
            all_data.append({
                "season": season,
                "season_id": season_id,
                "meeting": meeting,
                "meeting_id": meeting_id,
                "races": races
            })

    return all_data



def get_race_results(season, meeting, race_name, race_data):
    # Searching for the season ID, meeting ID, and race ID based on the provided names
    season_id, meeting_id, race_id = None, None, None
    for data in race_data:
        if data['season'] == season and data['meeting'] == meeting:
            season_id = data['season_id']
            meeting_id = data['meeting_id']
            for race in data['races']:
                if race['name'] == race_name:
                    race_id = race['race_id']
                    break
            break

    if not season_id or not meeting_id or not race_id:
        raise ValueError("Cannot find specified race information.")

    # Formulating and executing the request
    final_url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season_id}&filter_meeting_id={meeting_id}&filter_race_id={race_id}"
    response = requests.get(final_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Searching for the results table
    results_table = soup.find('table', {'class': 'table session'})
    if results_table is None:
        print(f"No results found for {race_name} in {meeting}, {season}.")
        return pd.DataFrame()

    # Extracting data from the table
    rows = results_table.find('tbody').find_all('tr')
    data = []
    for row in rows:
        cols = row.find_all('td')
        row_data = [col.text.strip() for col in cols]
        data.append(row_data)

    # Defining column names based on the first row of the table
    headers = [th.text.strip() for th in results_table.find('thead').find_all('th')]

    return pd.DataFrame(data, columns=headers)



def all_race_data(race_data):
    # Preparing an empty list to store information about each race
    summary_data = []

    # We go through all the downloaded data to collect information
    for data in race_data:
        season = data['season']
        meeting = data['meeting']
        for race in data['races']:
            race_name = race['name']
            # Adding race information to the list
            summary_data.append({
                'Season': season,
                'Meeting': meeting,
                'Race Name': race_name
            })

    # Creating a DataFrame from the collected information
    summary_df = pd.DataFrame(summary_data)

    return summary_df



def save_race_data_to_csv(years=None, meetings=None, race_type=None, race_data=None, save_path='.', create_folders=False):
    if not race_data:
        print("No data to process.")
        return

    if isinstance(years, str):
        years = parse_years(years)
    if isinstance(meetings, str):
        meetings = [meetings]
    if race_type and isinstance(race_type, str):
        race_type = [race_type.lower()]

    data_dir = os.path.join(save_path, 'data_csv')
    os.makedirs(data_dir, exist_ok=True)
    
    for data in race_data:
        if years and data['season'] not in years:
            continue
        if meetings and data['meeting'] not in meetings:
            continue
        
        meeting_dir = os.path.join(data_dir, data['season'], data['meeting'].replace(" ", "_"))
        os.makedirs(meeting_dir, exist_ok=True)
        
        for race in data['races']:
            if race_type:
                # Check for a match with the specified race type
                if not any(r.lower() == race['name'].lower() for r in race_type):
                    continue  # Skip the race if it does not match the specified type
            
            # Determine the save path based on the create_folders flag and race name
            folder_path = meeting_dir
            if create_folders:
                if "qualifying" in race['name'].lower():
                    folder_path = os.path.join(meeting_dir, "Qualifications")
                elif "race" in race['name'].lower():  # Changed to include any race name containing "race"
                    folder_path = os.path.join(meeting_dir, "Races")
                os.makedirs(folder_path, exist_ok=True)
            
            file_path = os.path.join(folder_path, f"{race['name'].replace(' ', '_')}.csv")
            try:
                df = get_race_results(data['season'], data['meeting'], race['name'], race_data)
                if not df.empty:
                    df.to_csv(file_path, index=False)
                    print(f"Saved: {file_path}")
                else:
                    print(f"No results found for {race['name']} in {data['meeting']}, {data['season']}.")
            except Exception as e:
                print(f"Error saving {race['name']} in {data['meeting']}, {data['season']}: {e}")


In [12]:
my_race_data = load_race_data(2023)

In [19]:
race_summary_df = all_race_data(my_race_data)

race_summary_df.head(5)

Unnamed: 0,Season,Meeting,Race Name
0,2023,Monza,Main Race after 2.30 hours
1,2023,Monza,Main Race after 1.30 hour
2,2023,Monza,Main Race after 30 mins
3,2023,Monza,Main Race
4,2023,Monza,Combined Qualifying


In [40]:
meetings = race_summary_df['Meeting'].drop_duplicates().reset_index(drop=True)
results = race_summary_df[race_summary_df['Meeting'] == 'Brands Hatch']['Race Name'].reset_index(drop=True)
print(meetings.head(15))
print(results.head(15))

0                          Monza
1                   Brands Hatch
2     Circuit Paul Ricard 1000Km
3    CrowdStrike 24 Hours of Spa
4                         Misano
5                    Nürburgring
6                     Hockenheim
7                       Valencia
8                      Barcelona
9                      Zandvoort
Name: Meeting, dtype: object
0            Race 2
1            Race 1
2          Pit walk
3      Qualifying 2
4      Qualifying 1
5    Pre Qualifying
6     Free practice
Name: Race Name, dtype: object


In [41]:
df = get_race_results("2023", "Brands Hatch", "Race 1", my_race_data)

df.head()

Unnamed: 0,Pos,Car #,Class,Drivers,Team,Car,Time,Laps,Gap
0,1,88,Pro Cup,"Raffaele Marciello, Timur Boguslavskiy",AKKODIS ASP Team,Mercedes-AMG GT3,1:23.642,42,
1,2,40,Pro Cup,"Mattia Drudi, Ricardo Feller",Tresor Orange 1,Audi R8 LMS evo II GT3,1:23.765,42,0.616
2,3,32,Pro Cup,"Charles Weerts, Dries Vanthoor",Team WRT,BMW M4 GT3,1:23.726,42,3.683
3,4,11,Pro Cup,"Lucas Legeret, Christopher Haase",ComToYou Racing,Audi R8 LMS evo II GT3,1:23.778,42,27.432
4,5,14,Pro Cup,"Konsta Lappalainen, Giacomo Altoe",Emil Frey Racing,Ferrari 296 GT3,1:24.066,42,28.251


In [54]:
save_race_data_to_csv(years="2023", race_data=my_race_data, create_folders=True)

Saved: .\data_csv\2023\Monza\Races\Main_Race_after_2.30_hours.csv
Saved: .\data_csv\2023\Monza\Races\Main_Race_after_1.30_hour.csv
Saved: .\data_csv\2023\Monza\Races\Main_Race_after_30_mins.csv
Saved: .\data_csv\2023\Monza\Races\Main_Race.csv
Saved: .\data_csv\2023\Monza\Qualifications\Combined_Qualifying.csv
No results found for Qualifying 3 in Monza, 2023.
Saved: .\data_csv\2023\Monza\Qualifications\Qualifying_2.csv
Saved: .\data_csv\2023\Monza\Qualifications\Qualifying_1.csv
No results found for Pit walk in Monza, 2023.
No results found for Fanatec Esports GT Pro Series in Monza, 2023.
No results found for Fanatec Esports GT Pro Series in Monza, 2023.
Saved: .\data_csv\2023\Monza\Qualifications\Pre_Qualifying.csv
Saved: .\data_csv\2023\Monza\Free_practice.csv
Saved: .\data_csv\2023\Monza\Bronze_Test.csv
Saved: .\data_csv\2023\Brands_Hatch\Races\Race_2.csv
Saved: .\data_csv\2023\Brands_Hatch\Races\Race_1.csv
No results found for Pit walk in Brands Hatch, 2023.
Saved: .\data_csv\2023\