In [21]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup


def parse_years(input_years):
    # Processes input parameters for years, supporting specific years, lists, and ranges.
    if isinstance(input_years, list):
        return [str(year) for year in input_years]
    elif isinstance(input_years, str) and '-' in input_years:
        start_year, end_year = map(int, input_years.split('-'))
        return [str(year) for year in range(start_year, end_year + 1)]  # Including the end year
    elif isinstance(input_years, int):
        return [str(input_years)]  # Convert a single year into a list of strings
    return None


def get_seasons():
    url = "https://www.gt-world-challenge-europe.com/results?filter_season_id=0&filter_meeting_id=0&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    seasons_options = soup.select('#filter_season_id option')
    seasons = {option.text.strip(): option['value'] for option in seasons_options if option['value']}
    
    return seasons



def get_meetings(season_id):
    url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season_id}&filter_meeting_id=0&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    meetings_options = soup.select('#filter_meeting_id option')
    meetings = {option.text.strip(): option['value'] for option in meetings_options if option['value']}
    
    return meetings



def get_races(season_id, meeting_id):
    url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season_id}&filter_meeting_id={meeting_id}&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Извлечение всех доступных опций гонок из выпадающего списка на странице
    races_options = soup.select('#filter_race_id option')
    
    # Пропускаем первый элемент, так как это просто подпись, а не фактическая гонка
    races = [{'name': option.text.strip(), 'race_id': option['value']} for option in races_options if option['value']]
    
    return races



def load_race_data(input_years=None):
    all_data = []
    seasons = get_seasons()

    years = parse_years(input_years)
    filtered_seasons = {year: id for year, id in seasons.items() if not years or year in years}

    for season, season_id in filtered_seasons.items():
        meetings = get_meetings(season_id)
        for meeting, meeting_id in meetings.items():
            races = get_races(season_id, meeting_id)
            all_data.append({
                "season": season,
                "season_id": season_id,
                "meeting": meeting,
                "meeting_id": meeting_id,
                "races": races
            })

    return all_data



def get_race_results(season, meeting, race_name, race_data):
    # Ищем ID сезона, встречи и заезда на основе предоставленных имен
    season_id, meeting_id, race_id = None, None, None
    for data in race_data:
        if data['season'] == season and data['meeting'] == meeting:
            season_id = data['season_id']
            meeting_id = data['meeting_id']
            for race in data['races']:
                if race['name'] == race_name:
                    race_id = race['race_id']
                    break
            break

    if not season_id or not meeting_id or not race_id:
        raise ValueError("Cannot find specified race information.")

    # Формируем и выполняем запрос
    final_url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season_id}&filter_meeting_id={meeting_id}&filter_race_id={race_id}"
    response = requests.get(final_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Ищем таблицу результатов
    results_table = soup.find('table', {'class': 'table session'})
    if results_table is None:
        print(f"No results found for {race_name} in {meeting}, {season}.")
        return pd.DataFrame()

    # Извлекаем данные из таблицы
    rows = results_table.find('tbody').find_all('tr')
    data = []
    for row in rows:
        cols = row.find_all('td')
        row_data = [col.text.strip() for col in cols]
        data.append(row_data)

    # Определяем названия столбцов на основе первой строки таблицы
    headers = [th.text.strip() for th in results_table.find('thead').find_all('th')]

    return pd.DataFrame(data, columns=headers)














def all_race_data(race_data):
    # Preparing an empty list to store information about each race
    summary_data = []

    # We go through all the downloaded data to collect information
    for data in race_data:
        season = data['season']
        meeting = data['meeting']
        for race in data['races']:
            race_name = race['name']
            # Adding race information to the list
            summary_data.append({
                'Season': season,
                'Meeting': meeting,
                'Race Name': race_name
            })

    # Creating a DataFrame from the collected information
    summary_df = pd.DataFrame(summary_data)

    return summary_df


def save_race_data_to_csv(years=None, meetings=None, race_type=None, race_data=None, save_path='.', create_folders=False):
    if not race_data:
        print("No data to process.")
        return

    if isinstance(years, str):
        years = parse_years(years)
    if isinstance(meetings, str):
        meetings = [meetings]
    if race_type and isinstance(race_type, str):
        race_type = [race_type.lower()]

    data_dir = os.path.join(save_path, 'data_csv')
    os.makedirs(data_dir, exist_ok=True)
    
    for data in race_data:
        if years and data['season'] not in years:
            continue
        if meetings and data['meeting'] not in meetings:
            continue
        
        meeting_dir = os.path.join(data_dir, data['season'], data['meeting'].replace(" ", "_"))
        os.makedirs(meeting_dir, exist_ok=True)
        
        for race in data['races']:
            if race_type:
                # Check for a match with the specified race type
                if not any(r.lower() == race['name'].lower() for r in race_type):
                    continue  # Skip the race if it does not match the specified type
            
            # Determine the save path based on the create_folders flag
            folder_path = meeting_dir
            if create_folders:
                if "qualifying" in race['name'].lower():
                    folder_path = os.path.join(meeting_dir, "Qualifications")
                elif "main race" in race['name'].lower():
                    folder_path = os.path.join(meeting_dir, "Races")
                os.makedirs(folder_path, exist_ok=True)
            
            file_path = os.path.join(folder_path, f"{race['name'].replace(' ', '_')}.csv")
            df = get_race_results(data['season'], data['meeting'], race['name'], race_data)
            df.to_csv(file_path, index=False)
            print(f"Saved: {file_path}")


In [12]:
my_race_data = load_race_data(2023)

In [15]:
seasons = get_seasons()
season_id = seasons['2023']  # Получаем ID для сезона 2023
meetings = get_meetings(season_id)

for meeting_name, meeting_id in meetings.items():
    races = get_races(season_id, meeting_id)
    print(meeting_name, races)



Meeting []
Monza [{'name': 'Main Race after 2.30 hours', 'race_id': '1356'}, {'name': 'Main Race after 1.30 hour', 'race_id': '1355'}, {'name': 'Main Race after 30 mins', 'race_id': '1357'}, {'name': 'Main Race', 'race_id': '1329'}, {'name': 'Combined Qualifying', 'race_id': '1354'}, {'name': 'Qualifying 3', 'race_id': '1353'}, {'name': 'Qualifying 2', 'race_id': '1352'}, {'name': 'Qualifying 1', 'race_id': '1328'}, {'name': 'Pit walk', 'race_id': '1351'}, {'name': 'Fanatec Esports GT Pro Series', 'race_id': '1350'}, {'name': 'Pre Qualifying', 'race_id': '1327'}, {'name': 'Free practice', 'race_id': '1326'}, {'name': 'Bronze Test', 'race_id': '1325'}]
Brands Hatch [{'name': 'Race 2', 'race_id': '1324'}, {'name': 'Race 1', 'race_id': '1323'}, {'name': 'Pit walk', 'race_id': '1358'}, {'name': 'Qualifying 2', 'race_id': '1359'}, {'name': 'Qualifying 1', 'race_id': '1322'}, {'name': 'Pre Qualifying', 'race_id': '1321'}, {'name': 'Free practice', 'race_id': '1320'}]
Circuit Paul Ricard 1000

In [19]:
race_summary_df = all_race_data(my_race_data)

race_summary_df.head(5)

Unnamed: 0,Season,Meeting,Race Name
0,2023,Monza,Main Race after 2.30 hours
1,2023,Monza,Main Race after 1.30 hour
2,2023,Monza,Main Race after 30 mins
3,2023,Monza,Main Race
4,2023,Monza,Combined Qualifying


In [20]:
results = race_summary_df[race_summary_df['Meeting'] == 'Monza']['Race Name'].reset_index(drop=True)
results.head(15)

0         Main Race after 1.30 hour
1                         Main Race
2               Combined Qualifying
3                      Qualifying 3
4                      Qualifying 2
5                      Qualifying 1
6                    Pre Qualifying
7                     Free practice
8                       Bronze Test
9      Official Paid Test Session 2
10    Official Paid Test Sessions 1
Name: Race Name, dtype: object

In [27]:
df = get_race_results("2023", "Monza", "Main Race after 1.30 hour", my_race_data)

df.head()

Unnamed: 0,Pos,Car #,Class,Drivers,Team,Car,Time,Laps,Gap
0,1,98,Pro Cup,"Philipp Eng, Marco Wittmann, Nick Yelloly",Rowe Racing,BMW M4 GT3,1:47.837,45,
1,2,32,Pro Cup,"Dries Vanthoor, Sheldon van der Linde, Charles...",Team WRT,BMW M4 GT3,1:47.873,45,1.258
2,3,998,Pro Cup,"Daniel Harper, Neil Verhagen, Max Hesse",Rowe Racing,BMW M4 GT3,1:47.934,45,6.633
3,4,6,Pro Cup,"Sandy Mitchell, Marco Mapelli, Franck Perera",K-Pax Racing,Lamborghini Huracan GT3 EVO 2,1:47.702,45,9.525
4,5,63,Pro Cup,"Mirko Bortolotti, Andrea Caldarelli, Jordan Pe...",Iron Lynx,Lamborghini Huracan GT3 EVO 2,1:47.982,45,14.553


In [22]:
save_race_data_to_csv(years="2023", meetings="Monza", race_type="Qualifying 1", race_data=my_race_data, create_folders=True, save_path="C:\\Users\\ireev\\Desktop\\")

Saved: C:\Users\ireev\Desktop\data_csv\2023\Monza\Qualifications\Qualifying_1.csv
