In [1]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup


def parse_years(input_years):
    # Processes input parameters for years, supporting specific years, lists, and ranges.
    if isinstance(input_years, list):
        return [str(year) for year in input_years]
    elif isinstance(input_years, str) and '-' in input_years:
        start_year, end_year = map(int, input_years.split('-'))
        return [str(year) for year in range(start_year, end_year + 1)]  # Including the end year
    elif isinstance(input_years, int):
        return [str(input_years)]  # Convert a single year into a list of strings
    return None


def get_seasons_and_meetings(years=None):
    # Fetch initial data for available seasons and meetings from the website
    url = "https://www.gt-world-challenge-europe.com/results?filter_season_id=0&filter_meeting_id=0&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Extract available seasons
    seasons_options = soup.select('#filter_season_id option')
    seasons = {option.text.strip(): option['value'] for option in seasons_options if option['value']}

    # Filter seasons by specified years, if provided
    if years:
        seasons = {year: seasons[year] for year in years if year in seasons}
    
    # Extract available meetings
    meetings_options = soup.select('#filter_meeting_id option')
    meetings = {option.text.strip(): option['value'] for option in meetings_options if option['value']}
    
    return seasons, meetings


def get_races(input_years=None):
    # Get race data for specified years or all years if none specified
    base_url = "https://www.gt-world-challenge-europe.com/results"
    years = parse_years(input_years)
    seasons, meetings = get_seasons_and_meetings(years)
    results = []

    for season_name, season_id in seasons.items():
        for meeting_name, meeting_id in meetings.items():
            # Fetch race options for each season and meeting combination
            url = f"{base_url}?filter_season_id={season_id}&filter_meeting_id={meeting_id}&filter_race_id="
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
            race_options = soup.select('#filter_race_id option')

            races = []
            for option in race_options:
                if option['value']:  # Exclude empty values
                    races.append({"race_id": option['value'], "name": option.text.strip()})
            
            # Append season, meeting, and races information to the results
            results.append({
                "season": season_name,
                "season_id": season_id,
                "meeting": meeting_name,
                "meeting_id": meeting_id,
                "races": races
            })

    return results


def get_race_results(season, meeting, race_name, race_data):
    # Get season, meeting, and race IDs
    season_id, meeting_id, race_id = None, None, None
    for data in race_data:
        if data['season'] == season and data['meeting'] == meeting:
            season_id = data['season_id']
            meeting_id = data['meeting_id']
            for race in data['races']:
                if race['name'] == race_name:
                    race_id = race['race_id']
                    break
            break
    
    if not season_id or not meeting_id or not race_id:
        raise ValueError(f"Cannot find data for season: {season}, meeting: {meeting}, race: {race_name}")

    # Construct URL and request the site
    final_url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season_id}&filter_meeting_id={meeting_id}&filter_race_id={race_id}"
    response = requests.get(final_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Check for the presence of a table on the page
    table = soup.find('table', {'class': 'table'})
    if table:
        headers = [header.text.strip() for header in table.find('thead').find_all('th')]
        data_rows = [[col.text.strip() for col in row.find_all('td')] for row in table.find('tbody').find_all('tr')]
        return pd.DataFrame(data_rows, columns=headers)
    else:
        print(f"No data table found for race: {race_name} in season: {season}, meeting: {meeting}. Skipping this race.")
        return pd.DataFrame()  # Return an empty DataFrame for missing data


def load_race_data(input_years=None):
    # Load race data, optionally filtered by years
    years = parse_years(input_years)  # Use parse_years to handle the input
    all_data = get_races(years)  # Fetch data using existing filtering logic
    return all_data


def all_race_data(race_data):
    # Preparing an empty list to store information about each race
    summary_data = []

    # We go through all the downloaded data to collect information
    for data in race_data:
        season = data['season']
        meeting = data['meeting']
        for race in data['races']:
            race_name = race['name']
            # Adding race information to the list
            summary_data.append({
                'Season': season,
                'Meeting': meeting,
                'Race Name': race_name
            })

    # Creating a DataFrame from the collected information
    summary_df = pd.DataFrame(summary_data)

    return summary_df


def save_race_data_to_csv(years=None, meetings=None, race_type=None, race_data=None, save_path='.', create_folders=False):
    if not race_data:
        print("No data to process.")
        return

    if isinstance(years, str):
        years = parse_years(years)
    if isinstance(meetings, str):
        meetings = [meetings]
    if race_type and isinstance(race_type, str):
        race_type = [race_type.lower()]

    data_dir = os.path.join(save_path, 'data_csv')
    os.makedirs(data_dir, exist_ok=True)
    
    for data in race_data:
        if years and data['season'] not in years:
            continue
        if meetings and data['meeting'] not in meetings:
            continue
        
        meeting_dir = os.path.join(data_dir, data['season'], data['meeting'].replace(" ", "_"))
        os.makedirs(meeting_dir, exist_ok=True)
        
        for race in data['races']:
            if race_type:
                # Check for a match with the specified race type
                if not any(r.lower() == race['name'].lower() for r in race_type):
                    continue  # Skip the race if it does not match the specified type
            
            # Determine the save path based on the create_folders flag
            folder_path = meeting_dir
            if create_folders:
                if "qualifying" in race['name'].lower():
                    folder_path = os.path.join(meeting_dir, "Qualifications")
                elif "main race" in race['name'].lower():
                    folder_path = os.path.join(meeting_dir, "Races")
                os.makedirs(folder_path, exist_ok=True)
            
            file_path = os.path.join(folder_path, f"{race['name'].replace(' ', '_')}.csv")
            df = get_race_results(data['season'], data['meeting'], race['name'], race_data)
            df.to_csv(file_path, index=False)
            print(f"Saved: {file_path}")


In [2]:
my_race_data = load_race_data(2023)

In [7]:
race_summary_df = all_race_data(my_race_data)

race_summary_df.head(5)

Unnamed: 0,Season,Meeting,Race Name
0,2023,Monza,Main Race after 2.30 hours
1,2023,Monza,Main Race after 1.30 hour
2,2023,Monza,Main Race after 30 mins
3,2023,Monza,Main Race
4,2023,Monza,Combined Qualifying


In [8]:
results = race_summary_df[race_summary_df['Meeting'] == 'Monza']['Race Name'].reset_index(drop=True)
results.head(15)

0        Main Race after 2.30 hours
1         Main Race after 1.30 hour
2           Main Race after 30 mins
3                         Main Race
4               Combined Qualifying
5                      Qualifying 3
6                      Qualifying 2
7                      Qualifying 1
8                          Pit walk
9     Fanatec Esports GT Pro Series
10                   Pre Qualifying
11                    Free practice
12                      Bronze Test
Name: Race Name, dtype: object

In [None]:
df = get_race_results("2023", "Monza", "Fanatec Esports GT Pro Series", my_race_data)

df.head()

In [22]:
save_race_data_to_csv(years="2023", meetings="Monza", race_type="Qualifying 1", race_data=my_race_data, create_folders=True, save_path="C:\\Users\\ireev\\Desktop\\")

Saved: C:\Users\ireev\Desktop\data_csv\2023\Monza\Qualifications\Qualifying_1.csv
