In [1]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup


def parse_years(input_years):
    # Processes input parameters for years, supporting specific years, lists, and ranges.
    if isinstance(input_years, list):
        return [str(year) for year in input_years]
    elif isinstance(input_years, str) and '-' in input_years:
        start_year, end_year = map(int, input_years.split('-'))
        return [str(year) for year in range(start_year, end_year + 1)]  # Including the end year
    elif isinstance(input_years, int):
        return [str(input_years)]  # Convert a single year into a list of strings
    return None


def get_seasons():
    url = "https://www.gt-world-challenge-europe.com/results?filter_season_id=0&filter_meeting_id=0&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    seasons_options = soup.select('#filter_season_id option')
    seasons = {option.text.strip(): option['value'] for option in seasons_options if option['value']}
    
    return seasons



def get_meetings(season_id):
    url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season_id}&filter_meeting_id=0&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    meetings_options = soup.select('#filter_meeting_id option')
    meetings = {option.text.strip(): option['value'] for option in meetings_options if option['value']}
    
    return meetings



def get_races(season_id, meeting_id):
    url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season_id}&filter_meeting_id={meeting_id}&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Извлечение всех доступных опций гонок из выпадающего списка на странице
    races_options = soup.select('#filter_race_id option')
    
    # Пропускаем первый элемент, так как это просто подпись, а не фактическая гонка
    races = [{'name': option.text.strip(), 'race_id': option['value']} for option in races_options if option['value']]
    
    return races



def get_race_results(season, meeting, race_name, race_data_df):
    # Находим строку в DataFrame с нужными season, meeting, и race_name
    race_row = race_data_df[(race_data_df['Season'] == str(season)) & 
                            (race_data_df['Meeting'] == meeting) & 
                            (race_data_df['Race Name'] == race_name)]
    
    if race_row.empty:
        print(f"Cannot find data for season: {season}, meeting: {meeting}, race: {race_name}")
        return pd.DataFrame()  # Возвращаем пустой DataFrame, если данные не найдены

    # Берём ID заезда
    race_id = race_row['Race ID'].values[0]

    # Строим URL и делаем запрос
    final_url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season}&filter_meeting_id={meeting}&filter_race_id={race_id}"
    response = requests.get(final_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Проверяем наличие таблицы на странице
    table = soup.find('table', {'class': 'table'})
    if table:
        headers = [header.text.strip() for header in table.find('thead').find_all('th')]
        data_rows = [[col.text.strip() for col in row.find_all('td')] for row in table.find('tbody').find_all('tr')]
        return pd.DataFrame(data_rows, columns=headers)
    else:
        print(f"No data table found for race: {race_name} in season: {season}, meeting: {meeting}.")
        return pd.DataFrame()  # Возвращаем пустой DataFrame, если данных нет



def load_race_data(input_years=None):
    all_data = []
    seasons = get_seasons()

    years = parse_years(input_years)
    filtered_seasons = {year: id for year, id in seasons.items() if not years or year in years}

    for season, season_id in filtered_seasons.items():
        meetings = get_meetings(season_id)
        for meeting, meeting_id in meetings.items():
            races = get_races(season_id, meeting_id)
            all_data.append({
                "season": season,
                "season_id": season_id,
                "meeting": meeting,
                "meeting_id": meeting_id,
                "races": races
            })

    return all_data









def all_race_data(race_data):
    # Preparing an empty list to store information about each race
    summary_data = []

    # We go through all the downloaded data to collect information
    for data in race_data:
        season = data['season']
        meeting = data['meeting']
        for race in data['races']:
            race_name = race['name']
            # Adding race information to the list
            summary_data.append({
                'Season': season,
                'Meeting': meeting,
                'Race Name': race_name
            })

    # Creating a DataFrame from the collected information
    summary_df = pd.DataFrame(summary_data)

    return summary_df


def save_race_data_to_csv(years=None, meetings=None, race_type=None, race_data=None, save_path='.', create_folders=False):
    if not race_data:
        print("No data to process.")
        return

    if isinstance(years, str):
        years = parse_years(years)
    if isinstance(meetings, str):
        meetings = [meetings]
    if race_type and isinstance(race_type, str):
        race_type = [race_type.lower()]

    data_dir = os.path.join(save_path, 'data_csv')
    os.makedirs(data_dir, exist_ok=True)
    
    for data in race_data:
        if years and data['season'] not in years:
            continue
        if meetings and data['meeting'] not in meetings:
            continue
        
        meeting_dir = os.path.join(data_dir, data['season'], data['meeting'].replace(" ", "_"))
        os.makedirs(meeting_dir, exist_ok=True)
        
        for race in data['races']:
            if race_type:
                # Check for a match with the specified race type
                if not any(r.lower() == race['name'].lower() for r in race_type):
                    continue  # Skip the race if it does not match the specified type
            
            # Determine the save path based on the create_folders flag
            folder_path = meeting_dir
            if create_folders:
                if "qualifying" in race['name'].lower():
                    folder_path = os.path.join(meeting_dir, "Qualifications")
                elif "main race" in race['name'].lower():
                    folder_path = os.path.join(meeting_dir, "Races")
                os.makedirs(folder_path, exist_ok=True)
            
            file_path = os.path.join(folder_path, f"{race['name'].replace(' ', '_')}.csv")
            df = get_race_results(data['season'], data['meeting'], race['name'], race_data)
            df.to_csv(file_path, index=False)
            print(f"Saved: {file_path}")


In [2]:
load_race_data(2023)

[{'season': '2023',
  'season_id': '23',
  'meeting': 'Meeting',
  'meeting_id': '0',
  'races': []},
 {'season': '2023',
  'season_id': '23',
  'meeting': 'Monza',
  'meeting_id': '211',
  'races': [{'name': 'Main Race after 2.30 hours', 'race_id': '1356'},
   {'name': 'Main Race after 1.30 hour', 'race_id': '1355'},
   {'name': 'Main Race after 30 mins', 'race_id': '1357'},
   {'name': 'Main Race', 'race_id': '1329'},
   {'name': 'Combined Qualifying', 'race_id': '1354'},
   {'name': 'Qualifying 3', 'race_id': '1353'},
   {'name': 'Qualifying 2', 'race_id': '1352'},
   {'name': 'Qualifying 1', 'race_id': '1328'},
   {'name': 'Pit walk', 'race_id': '1351'},
   {'name': 'Fanatec Esports GT Pro Series', 'race_id': '1350'},
   {'name': 'Pre Qualifying', 'race_id': '1327'},
   {'name': 'Free practice', 'race_id': '1326'},
   {'name': 'Bronze Test', 'race_id': '1325'}]},
 {'season': '2023',
  'season_id': '23',
  'meeting': 'Brands Hatch',
  'meeting_id': '197',
  'races': [{'name': 'Race 

In [10]:
seasons = get_seasons()
meetings = get_meetings(seasons)
get_races(seasons, meetings)


TypeError: get_races() takes from 0 to 1 positional arguments but 2 were given

In [15]:
seasons = get_seasons()
season_id = seasons['2023']  # Получаем ID для сезона 2023
meetings = get_meetings(season_id)

for meeting_name, meeting_id in meetings.items():
    races = get_races(season_id, meeting_id)
    print(meeting_name, races)



Meeting []
Monza [{'name': 'Main Race after 2.30 hours', 'race_id': '1356'}, {'name': 'Main Race after 1.30 hour', 'race_id': '1355'}, {'name': 'Main Race after 30 mins', 'race_id': '1357'}, {'name': 'Main Race', 'race_id': '1329'}, {'name': 'Combined Qualifying', 'race_id': '1354'}, {'name': 'Qualifying 3', 'race_id': '1353'}, {'name': 'Qualifying 2', 'race_id': '1352'}, {'name': 'Qualifying 1', 'race_id': '1328'}, {'name': 'Pit walk', 'race_id': '1351'}, {'name': 'Fanatec Esports GT Pro Series', 'race_id': '1350'}, {'name': 'Pre Qualifying', 'race_id': '1327'}, {'name': 'Free practice', 'race_id': '1326'}, {'name': 'Bronze Test', 'race_id': '1325'}]
Brands Hatch [{'name': 'Race 2', 'race_id': '1324'}, {'name': 'Race 1', 'race_id': '1323'}, {'name': 'Pit walk', 'race_id': '1358'}, {'name': 'Qualifying 2', 'race_id': '1359'}, {'name': 'Qualifying 1', 'race_id': '1322'}, {'name': 'Pre Qualifying', 'race_id': '1321'}, {'name': 'Free practice', 'race_id': '1320'}]
Circuit Paul Ricard 1000

In [7]:
race_summary_df = all_race_data(my_race_data)

race_summary_df.head(5)

Unnamed: 0,Season,Meeting,Race Name
0,2023,Monza,Main Race after 2.30 hours
1,2023,Monza,Main Race after 1.30 hour
2,2023,Monza,Main Race after 30 mins
3,2023,Monza,Main Race
4,2023,Monza,Combined Qualifying


In [8]:
results = race_summary_df[race_summary_df['Meeting'] == 'Monza']['Race Name'].reset_index(drop=True)
results.head(15)

0        Main Race after 2.30 hours
1         Main Race after 1.30 hour
2           Main Race after 30 mins
3                         Main Race
4               Combined Qualifying
5                      Qualifying 3
6                      Qualifying 2
7                      Qualifying 1
8                          Pit walk
9     Fanatec Esports GT Pro Series
10                   Pre Qualifying
11                    Free practice
12                      Bronze Test
Name: Race Name, dtype: object

In [4]:
df = get_race_results("2023", "Monza", "Fanatec Esports GT Pro Series", my_race_data)

df.head()

ValueError: Cannot find data for season: 2023, meeting: Monza, race: Fanatec Esports GT Pro Series

In [22]:
save_race_data_to_csv(years="2023", meetings="Monza", race_type="Qualifying 1", race_data=my_race_data, create_folders=True, save_path="C:\\Users\\ireev\\Desktop\\")

Saved: C:\Users\ireev\Desktop\data_csv\2023\Monza\Qualifications\Qualifying_1.csv
