In [13]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup


def parse_years(input_years):
    """Обрабатывает входные параметры годов, поддерживая конкретные годы, списки и диапазоны."""
    if isinstance(input_years, list):
        return [str(year) for year in input_years]
    elif isinstance(input_years, str) and '-' in input_years:
        start_year, end_year = map(int, input_years.split('-'))
        return [str(year) for year in range(start_year, end_year + 1)]  # Включительно конечный год
    elif isinstance(input_years, int):
        return [str(input_years)]  # Преобразование одиночного года в список строк
    return None



def get_seasons_and_meetings(years=None):
    # Fetch initial data for available seasons and meetings from the website
    url = "https://www.gt-world-challenge-europe.com/results?filter_season_id=0&filter_meeting_id=0&filter_race_id="
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Extract available seasons
    seasons_options = soup.select('#filter_season_id option')
    seasons = {option.text.strip(): option['value'] for option in seasons_options if option['value']}

    # Filter seasons by specified years, if provided
    if years:
        seasons = {year: seasons[year] for year in years if year in seasons}
    
    # Extract available meetings
    meetings_options = soup.select('#filter_meeting_id option')
    meetings = {option.text.strip(): option['value'] for option in meetings_options if option['value']}
    
    return seasons, meetings


def get_races(input_years=None):
    # Get race data for specified years or all years if none specified
    base_url = "https://www.gt-world-challenge-europe.com/results"
    years = parse_years(input_years)
    seasons, meetings = get_seasons_and_meetings(years)
    results = []

    for season_name, season_id in seasons.items():
        for meeting_name, meeting_id in meetings.items():
            # Fetch race options for each season and meeting combination
            url = f"{base_url}?filter_season_id={season_id}&filter_meeting_id={meeting_id}&filter_race_id="
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
            race_options = soup.select('#filter_race_id option')

            races = []
            for option in race_options:
                if option['value']:  # Exclude empty values
                    races.append({"race_id": option['value'], "name": option.text.strip()})
            
            # Append season, meeting, and races information to the results
            results.append({
                "season": season_name,
                "season_id": season_id,
                "meeting": meeting_name,
                "meeting_id": meeting_id,
                "races": races
            })

    return results


def get_race_results(filter_season, filter_meeting, filter_race, race_data):
    # Fetch race results for a specific season, meeting, and race using preloaded data
    # Find corresponding IDs for season, meeting, and race
    season_id, meeting_id, race_id = None, None, None
    for data in race_data:
        if data['season'] == filter_season:
            season_id = data['season_id']
            if data['meeting'] == filter_meeting:
                meeting_id = data['meeting_id']
                for race in data['races']:
                    if race['name'] == filter_race:
                        race_id = race['race_id']
                        break
                break
    
    if not season_id or not meeting_id or not race_id:
        raise ValueError("One of the parameters was not found. Please check the accuracy of the entered data.")
    
    # Form and execute the request with the found IDs to fetch race data
    final_url = f"https://www.gt-world-challenge-europe.com/results?filter_season_id={season_id}&filter_meeting_id={meeting_id}&filter_race_id={race_id}"
    response = requests.get(final_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Parse the race data
    rows = soup.select('.table__body tr')
    data = []
    for row in rows:
        cols = row.find_all('td')
        if not cols:  # Skip completely empty rows
            continue
        row_data = [col.text.strip() for col in cols]
        while len(row_data) < 9:  # Ensure each row matches the expected number of columns
            row_data.append(None)
        data.append(row_data)
    
    # Create the DataFrame with the specified columns
    df_columns = ['Pos', 'Car #', 'Class', 'Drivers', 'Team', 'Car', 'Time', 'Laps', 'Gap']
    df = pd.DataFrame(data, columns=df_columns if data else [])
    
    return df



def load_race_data(input_years=None):
    # Load race data, optionally filtered by years
    years = parse_years(input_years)  # Use parse_years to handle the input
    all_data = get_races(years)  # Fetch data using existing filtering logic
    return all_data


def all_info(race_data):
    # Preparing an empty list to store information about each race
    summary_data = []

    # We go through all the downloaded data to collect information
    for data in race_data:
        season = data['season']
        meeting = data['meeting']
        for race in data['races']:
            race_name = race['name']
            # Adding race information to the list
            summary_data.append({
                'Season': season,
                'Meeting': meeting,
                'Race Name': race_name
            })

    # Creating a DataFrame from the collected information
    summary_df = pd.DataFrame(summary_data)

    return summary_df



def save_race_data_to_csv(years=None, meetings=None, race_type=None, race_data=None, save_path='.', create_folders=False):
    # Обработка входных параметров
    years = parse_years(years) if years else None
    if isinstance(meetings, str):
        meetings = [meetings]
    if isinstance(race_type, str):
        race_type = [race_type]

    # Создание основной директории для сохранения данных
    data_dir = os.path.join(save_path, 'data_csv')
    os.makedirs(data_dir, exist_ok=True)
    
    for data in race_data:
        # Фильтрация по годам
        if years and data['season'] not in years:
            continue
        
        # Фильтрация по трассам
        if meetings and data['meeting'] not in meetings:
            continue
        
        meeting_dir = os.path.join(data_dir, data['season'], data['meeting'].replace(" ", "_"))
        os.makedirs(meeting_dir, exist_ok=True)

        for race in data['races']:
            race_name_lower = race['name'].lower()
            folder_path = meeting_dir  # Путь по умолчанию без создания дополнительных папок
            
            # Проверка на соответствие типу заезда и создание папок при необходимости
            if create_folders:
                if any(rt.lower() in race_name_lower for rt in race_type) if race_type else True:
                    if 'main race' in race_name_lower:
                        folder_path = os.path.join(meeting_dir, "Races")
                    elif 'qualifying' in race_name_lower:
                        folder_path = os.path.join(meeting_dir, "Qualifications")
                    os.makedirs(folder_path, exist_ok=True)
            
            # Проверка на соответствие названию заезда, если указано
            if race_type and not any(rt.lower() == race_name_lower for rt in race_type):
                continue

            # Загрузка данных заезда и сохранение в CSV
            file_path = os.path.join(folder_path, f"{race['name'].replace(' ', '_')}.csv")
            df = get_race_results(data['season'], data['meeting'], race['name'], race_data)
            df.to_csv(file_path, index=False)
            print(f"Сохранено: {file_path}")

In [10]:
my_race_data = load_race_data(2023)

In [14]:
race_summary_df = all_info(my_race_data)

race_summary_df.head(5)

Unnamed: 0,Season,Meeting,Race Name
0,2023,Monza,Main Race after 2.30 hours
1,2023,Monza,Main Race after 1.30 hour
2,2023,Monza,Main Race after 30 mins
3,2023,Monza,Main Race
4,2023,Monza,Combined Qualifying


In [15]:
results = race_summary_df[race_summary_df['Meeting'] == 'Monza']['Race Name'].reset_index(drop=True)
results.head(15)

0        Main Race after 2.30 hours
1         Main Race after 1.30 hour
2           Main Race after 30 mins
3                         Main Race
4               Combined Qualifying
5                      Qualifying 3
6                      Qualifying 2
7                      Qualifying 1
8                          Pit walk
9     Fanatec Esports GT Pro Series
10                   Pre Qualifying
11                    Free practice
12                      Bronze Test
Name: Race Name, dtype: object

In [17]:
df = get_race_results("2023", "Barcelona", "Qualifying 3", my_race_data)

df.head()

Unnamed: 0,Pos,Car #,Class,Drivers,Team,Car,Time,Laps,Gap
0,1,51,Pro Cup,"Alessio Rovera, Robert Shwartzman, Nicklas Nie...",AF Corse - Francorchamps Motors,Ferrari 296 GT3,1:39.103,,
1,2,87,Pro Cup,"Lorenzo Ferrari, Thomas Drouet, Maximilian Götz",Mercedes-AMG Team Akkodis ASP,Mercedes-AMG GT3 EVO,1:39.428,,0.325
2,3,998,Pro Cup,"Daniel Harper, Neil Verhagen, Max Hesse",Rowe Racing,BMW M4 GT3,1:39.480,,0.377
3,4,78,Pro-AM Cup,"Adam Balon, Rob Collard, Dennis Lind",Barwell Motorsport,Lamborghini Huracan GT3 EVO 2,1:39.501,,0.398
4,5,54,Pro Cup,"Ayhancan Güven, Sven Müller, Christian Engelhart",Dinamic GT,Porsche 911 GT3 R (992),1:39.585,,0.482
