### Formula 1 Machine Learning Model

** Please be sure to run `pip install fastf1` before running**


In [1]:
import fastf1
import pandas as pd
from IPython.display import display
from datetime import datetime, timezone

years = [2024, 2025]
required_sessions = ['Session4DateUtc', 'Session5DateUtc']  # Qualifying & Race
cache_dir = 'cache'

fastf1.Cache.enable_cache(cache_dir)
fastf1.set_log_level('DEBUG')

now = datetime.now(timezone.utc)

# final dataframe
all_combined = []

for year in years:
    print(f"\n=== Processing season {year} ===")
    try:
        schedule = fastf1.get_event_schedule(year, include_testing=True)
    except Exception as e:
        print(f"Could not fetch schedule for {year}: {e}")
        continue

    #Only events where both Quali & Race have completed
    completed = []
    for _, evt in schedule.iterrows():
        ok = True
        for key in required_sessions:
            if key not in evt or pd.isna(evt[key]):
                ok = False
                break
            sess_time = evt[key]
            if sess_time.tzinfo is None:
                sess_time = sess_time.replace(tzinfo=timezone.utc)
            if sess_time >= now:
                ok = False
                break
        if ok:
            completed.append(evt)

    if not completed:
        print(f"No fully completed events found for {year}.")
        continue

    df_events = pd.DataFrame(completed)

    # Pull Qualifying & Race results for each completed event
    for _, evt in df_events.iterrows():
        name = evt['EventName']
        print(f"\n— {year} {name} —")
        try:
            # Qualifying
            q = fastf1.get_session(year, name, 'Q')
            q.load()
            q_res = q.results[['Abbreviation', 'Position', 'Q1', 'Q2', 'Q3']].copy()
            q_res.rename(columns={
                'Position': 'QualiPosition',
                'Q1':       'QualiQ1',
                'Q2':       'QualiQ2',
                'Q3':       'QualiQ3'
            }, inplace=True)
        except Exception as e:
            print(f"Qualifying failed for {name}: {e}")
            continue

        try:
            # Race
            r = fastf1.get_session(year, name, 'R')
            r.load()

            # Skip if no race results
            if r.results.empty or 'Abbreviation' not in r.results.columns:
                print(f"No race results for {name}, skipping.")
                continue

            # Compute each driver's fastest lap (laps are named by 'Driver', not 'Abbreviation')
            laps = r.laps
            if laps.empty or 'Driver' not in laps.columns:
                fastest = pd.DataFrame(columns=['Abbreviation', 'FastestLapTime'])
            else:
                fastest = (
                    laps
                    .groupby('Driver')['LapTime']
                    .min()
                    .reset_index()
                    .rename(columns={
                        'Driver':       'Abbreviation',
                        'LapTime':      'FastestLapTime'
                    })
                )

            # Compile race results
            r_res = r.results[['Abbreviation', 'Position', 'GridPosition', 'Time', 'Status', 'Points']].copy()
            r_res.rename(columns={
                'Position': 'RacePosition',
                'Time':     'RaceTime'
            }, inplace=True)
        except Exception as e:
            print(f"Race failed for {name}: {e}")
            continue

        # Merge Quali + Race + FastestLap
        merged = pd.merge(q_res, r_res, on='Abbreviation', how='inner')
        merged = pd.merge(merged, fastest, on='Abbreviation', how='left')
        merged['Year']      = year
        merged['EventName'] = name
        all_combined.append(merged)

# Final merge
if all_combined:
    final_df = pd.concat(all_combined, ignore_index=True)
    display(
        final_df[[
            'Year', 'EventName', 'Abbreviation',
            'QualiPosition', 'QualiQ1', 'QualiQ2', 'QualiQ3',
            'RacePosition', 'GridPosition', 'RaceTime',
            'FastestLapTime', 'Status', 'Points'
        ]]
        .sort_values(['Year', 'EventName', 'QualiPosition'])
    )
else:
    print("No data could be pulled for any of the specified seasons.")


core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...



=== Processing season 2024 ===

— 2024 Bahrain Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '63', '55', '11', '14', '4', '81', '44', '27', '22', '18', '23', '3', '20', '77', '24', '2', '31', '10']
core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req       


— 2024 Saudi Arabian Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '11', '14', '81', '4', '63', '44', '22', '18', '38', '23', '20', '3', '27', '77', '31', '10', '2', '24']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req 


— 2024 Australian Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 19 drivers: ['1', '55', '11', '4', '16', '81', '63', '22', '18', '14', '44', '23', '77', '20', '31', '27', '10', '3', '24']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
structure      DEBUG 	Failed to parse timestamp '-1:57:37.891' in Ergastresponse.
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processi


— 2024 Japanese Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '4', '55', '14', '81', '44', '16', '63', '22', '3', '27', '77', '23', '31', '18', '10', '20', '2', '24']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req      


— 2024 Chinese Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '4', '81', '16', '55', '63', '27', '77', '18', '3', '31', '23', '10', '24', '20', '44', '22', '2']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req       


— 2024 Miami Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '55', '11', '4', '81', '63', '44', '27', '22', '18', '10', '31', '23', '14', '77', '2', '3', '20', '24']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req         


— 2024 Emilia Romagna Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '81', '4', '16', '55', '63', '22', '44', '3', '27', '11', '31', '18', '23', '10', '77', '24', '20', '14', '2']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req


— 2024 Monaco Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '55', '4', '63', '1', '44', '22', '23', '10', '31', '3', '18', '27', '14', '2', '20', '11', '77', '24']
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req        


— 2024 Canadian Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '1', '4', '81', '3', '14', '44', '22', '18', '23', '16', '55', '2', '20', '10', '11', '77', '31', '27', '24']
core           INFO 	Loading data for Canadian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req      


— 2024 Spanish Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '44', '63', '16', '55', '10', '11', '31', '81', '14', '77', '27', '18', '24', '20', '22', '3', '23', '2']
core           INFO 	Loading data for Spanish Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req       


— 2024 Austrian Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '63', '55', '44', '16', '81', '11', '27', '31', '3', '20', '10', '22', '14', '23', '18', '77', '2', '24']
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
structure      DEBUG 	Failed to parse timestamp '-1:52:13.256' in Ergastresponse.
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Proce


— 2024 British Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '44', '4', '1', '81', '27', '55', '18', '23', '14', '16', '2', '22', '24', '3', '77', '20', '31', '11', '10']
core           INFO 	Loading data for British Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req       


— 2024 Hungarian Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '1', '55', '44', '16', '14', '18', '3', '22', '27', '77', '23', '2', '20', '11', '63', '24', '31', '10']
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req     


— 2024 Belgian Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '11', '44', '4', '81', '63', '55', '14', '31', '23', '10', '3', '77', '18', '27', '20', '22', '2', '24']
core           INFO 	Loading data for Belgian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
structure      DEBUG 	Failed to parse timestamp '-1:59:59.474' in Ergastresponse.
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Proces


— 2024 Dutch Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '81', '63', '11', '16', '14', '18', '10', '55', '23', '44', '22', '27', '20', '3', '31', '77', '24', '2']
core           INFO 	Loading data for Dutch Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req         


— 2024 Italian Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '63', '16', '55', '44', '1', '11', '23', '27', '14', '3', '20', '10', '31', '22', '18', '43', '77', '24']
core           INFO 	Loading data for Italian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req      


— 2024 Azerbaijan Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '55', '11', '63', '1', '44', '14', '43', '23', '50', '22', '27', '18', '3', '10', '4', '77', '24', '31']
core           INFO 	Loading data for Azerbaijan Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
structure      DEBUG 	Failed to parse timestamp '-1:55:43.191' in Ergastresponse.
structure      DEBUG 	Failed to parse timestamp '-1:55:43.761' in Ergastresponse.
structure      DEBUG 	Failed to parse timestamp '-1:50:23.073' in Ergastresponse.
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for t


— 2024 Singapore Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '44', '63', '81', '27', '14', '22', '16', '55', '23', '43', '11', '20', '31', '3', '18', '10', '77', '24']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
structure      DEBUG 	Failed to parse timestamp '-1:55:59.335' in Ergastresponse.
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Pro


— 2024 United States Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '55', '16', '81', '63', '10', '14', '20', '11', '22', '27', '31', '18', '30', '23', '43', '77', '44', '24']
core           INFO 	Loading data for United States Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
re


— 2024 Mexico City Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '1', '4', '16', '63', '44', '20', '10', '23', '27', '22', '30', '14', '18', '77', '43', '81', '11', '31', '24']
core           INFO 	Loading data for Mexico City Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req 


— 2024 São Paulo Grand Prix —


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '63', '22', '31', '30', '16', '23', '81', '14', '18', '77', '1', '11', '55', '10', '44', '50', '43', '27', '24']
core           INFO 	Loading data for São Paulo Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 


— 2024 Las Vegas Grand Prix —


req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for timing_app_data. Loading data...
_api           INFO 	Fetching timing app data...
req            INFO 	Data has been written to


— 2024 Qatar Grand Prix —


core           INFO 	Loading data for Qatar Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            INFO 	D


— 2024 Abu Dhabi Grand Prix —


req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for timing_app_data. Loading data...
_api           INFO 	Fetching timing app data...
req            INFO 	Data has been written to


=== Processing season 2025 ===

— 2025 Australian Grand Prix —


core           INFO 	Loading data for Australian Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
structure      DEBUG 	Failed to parse timestamp '' in Ergastresponse.
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing


— 2025 Chinese Grand Prix —


req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for timing_app_data. Loading data...
_api           INFO 	Fetching timing app data...
req            INFO 	Data has been written to


— 2025 Japanese Grand Prix —


req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for timing_app_data. Loading data...
_api           INFO 	Fetching timing app data...
req            INFO 	Data has been written to


— 2025 Bahrain Grand Prix —


req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
structure      DEBUG 	Failed to parse timestamp '' in Ergastresponse.
structure      DEBUG 	Failed to parse timestamp '' in Ergastresponse.
structure      DEBUG 	Failed to parse timestamp '' in Ergastresponse.
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            I


— 2025 Saudi Arabian Grand Prix —


req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
structure      DEBUG 	Failed to parse timestamp '' in Ergastresponse.
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for timing_app_data. Loading data...
_api           INFO 	Fet


— 2025 Miami Grand Prix —


req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for timing_app_data. Loading data...
_api           INFO 	Fetching timing app data...
req            INFO 	Data has been written to


— 2025 Emilia Romagna Grand Prix —


req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
structure      DEBUG 	Failed to parse timestamp '' in Ergastresponse.
structure      DEBUG 	Failed to parse timestamp '' in Ergastresponse.
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached da

KeyboardInterrupt: 

In [None]:
final_df

In [None]:
df = final_df.copy()

#ADD NEW FEATURES
#grid position difference
df['GridPosDiff'] = df['GridPosition'] - df['RacePosition']

# MadeQuali2 / MadeQuali3
df['MadeQuali2'] = df['QualiQ2'].notna().astype(int)
df['MadeQuali3'] = df['QualiQ3'].notna().astype(int)

#TotalRaceTime
def compute_total_time(group):
    # winner's race time
    winner_time = group.loc[group['RacePosition'] == 1, 'RaceTime'].iloc[0]
    # calculate TotalRaceTime
    group['TotalRaceTime'] = group.apply(
        lambda r: r['RaceTime'] if r['RacePosition'] == 1
                  else winner_time + r['RaceTime'],
        axis=1
    )
    return group

df = (
    df
    .groupby(['Year', 'EventName'], group_keys=False)
    .apply(compute_total_time)
)

#one hot encoding for categorical features
df = pd.get_dummies(
    df,
    columns=['Status', 'EventName', 'Abbreviation', 'MadeQuali2', 'MadeQuali3'],
    prefix=['Status', 'Event', 'Driver', 'MadeQ2', 'MadeQ3']
)


In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
df

In [None]:
#drop unnecessary/misleading columns
df = df.drop(columns=['RaceTime'])

In [None]:
print(df.columns.tolist())

In [None]:
#check if there are any missing values
missing_counts = df.isnull().sum()
print("columns with missing features")
print(missing_counts[missing_counts > 0])

In [None]:
#add penalty to drivers who did not make it to Q2 or Q3
def _add_penalty(x):
    if isinstance(x, pd.Timedelta):
        return x + pd.Timedelta(seconds=1)
    else:
        return x + 1
def penalize_missing(group):
    for col in ['QualiQ1','QualiQ2','QualiQ3','FastestLapTime','TotalRaceTime']:
        if col not in group:
            continue
        # find max
        max_val = group[col].max(skipna=True)
        # add penalty
        penalty = _add_penalty(max_val)
        group[col] = group[col].fillna(penalty)
    return group

event_cols = [c for c in df.columns if c.startswith('Event_')]
df['EventName'] = (
    df[event_cols]
    .idxmax(axis=1)                     
    .str.replace('Event_', '', 1)      
)

df = df.groupby(['Year','EventName'], group_keys=False).apply(penalize_missing)
df = df.drop(columns=['EventName'])

In [None]:
#should be no more missing values!
missing_counts = df.isnull().sum()
print("columns with missing features")
print(missing_counts[missing_counts > 0])

In [None]:
df

In [None]:
from sklearn.model_selection import GroupShuffleSplit
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

#Drop columns that give away the race winner
leaky = [
    'RaceTime',
    'FastestLapTime',
    'TotalRaceTime',
    'GridPosDiff'
]
df = df.drop(columns=[c for c in leaky if c in df.columns])

# Convert timedelta columns to seconds
td_cols = df.select_dtypes(include=['timedelta64[ns]']).columns
for col in td_cols:
    df[col] = df[col].dt.total_seconds().astype(float)

# Get Abbreviation and EventName back from one hot encoding to calculate accuracy later
driver_cols = [c for c in df.columns if c.startswith('Driver_')]
df['Abbreviation'] = df[driver_cols].idxmax(axis=1).str.replace('Driver_', '', regex=False)
event_cols  = [c for c in df.columns if c.startswith('Event_')]
df['EventName']    = df[event_cols].idxmax(axis=1).str.replace('Event_',  '', regex=False)

# Build pre-race feature set: keep Quali and grid only
X = df.drop(columns=[
    'RacePosition',  # target
    'Points',        # also post-race
    'Abbreviation',
    'EventName'
])
#predict RacePosition
y = df['RacePosition']

# Group by Race
race_id = df['Year'].astype(str) + '_' + df['EventName']

# Create train/test split
gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = next(gss.split(X, y, groups=race_id))
X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

# Use random forest
model = RandomForestRegressor(n_estimators=120)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Test MAE (position):", mean_absolute_error(y_test, y_pred))

# Get predicted podium
test_df = df.iloc[test_idx].copy()
test_df['PredPos']   = y_pred
test_df['ActualPos'] = test_df['RacePosition']
test_df['RaceID']    = race_id.iloc[test_idx]
pred_top3 = (
    test_df
    .groupby('RaceID')[['Abbreviation', 'PredPos']]
    .apply(lambda g: g.sort_values('PredPos')['Abbreviation'].tolist()[:3])
)

# Get actual podium
true_top3 = (
    test_df[test_df['ActualPos'] <= 3]
    .groupby('RaceID')[['Abbreviation', 'ActualPos']]
    .apply(lambda g: g.sort_values('ActualPos')['Abbreviation'].tolist())
)

# Get 1st, 2nd, 3rd position accuracies (as well as overall top 3)
race_ids = pred_top3.index
n = len(race_ids)
acc1 = sum(pred_top3[r][0] == true_top3[r][0] for r in race_ids) / n
acc2 = sum(pred_top3[r][1] == true_top3[r][1] for r in race_ids) / n
acc3 = sum(pred_top3[r][2] == true_top3[r][2] for r in race_ids) / n
set_acc = sum(set(pred_top3[r]) == set(true_top3[r]) for r in race_ids) / n

print(f"1st place accuracy : {acc1:.3f}")
print(f"2nd place accuracy : {acc2:.3f}")
print(f"3rd place accuracy : {acc3:.3f}")
print(f"Top-3 set accuracy : {set_acc:.3f}")

print("Race predictions vs actual podium:")
for race in pred_top3.index:
    print(f"\nRace: {race}")
    print(f"  Predicted: {pred_top3[race]}")
    print(f"  Actual   : {true_top3[race]}")


In [None]:
#calculate accuracy on 10 different splits
n_splits = 10
gss = GroupShuffleSplit(n_splits=n_splits, test_size=0.2, random_state=42)

maes = []
acc1s = []
acc2s = []
acc3s = []
set_accs = []

for fold, (train_idx, test_idx) in enumerate(gss.split(X, y, groups=race_id), 1):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # MAE for this fold
    maes.append(mean_absolute_error(y_test, y_pred))
    tmp = df.iloc[test_idx].copy()
    tmp['PredPos'] = y_pred
    tmp['ActualPos'] = tmp['RacePosition']
    tmp['RaceID'] = race_id.iloc[test_idx]

    # get pred & true podium lists
    pred_pods = (
        tmp
        .groupby('RaceID')[['Abbreviation','PredPos']]
        .apply(lambda g: g.sort_values('PredPos')['Abbreviation'].tolist()[:3])
    )
    
    true_pods = (
        tmp[tmp['ActualPos'] <= 3]
        .groupby('RaceID')[['Abbreviation','ActualPos']]
        .apply(lambda g: g.sort_values('ActualPos')['Abbreviation'].tolist())
    )
    
    n = len(pred_pods)
    acc1 = sum(pred_pods[r][0] == true_pods[r][0] for r in pred_pods.index) / n
    acc2 = sum(pred_pods[r][1] == true_pods[r][1] for r in pred_pods.index) / n
    acc3 = sum(pred_pods[r][2] == true_pods[r][2] for r in pred_pods.index) / n
    set_acc = sum(set(pred_pods[r]) == set(true_pods[r]) for r in pred_pods.index) / n

    acc1s.append(acc1)
    acc2s.append(acc2)
    acc3s.append(acc3)
    set_accs.append(set_acc)

    print(f"Fold {fold}: MAE={maes[-1]:.3f}, acc1={acc1:.3f}, acc2={acc2:.3f}, acc3={acc3:.3f}, set={set_acc:.3f}")

# finally, print averages
print("\nOverall (mean ± std):")
print(f" MAE       : {np.mean(maes):.3f} ± {np.std(maes):.3f}")
print(f" 1st place : {np.mean(acc1s):.3f} ± {np.std(acc1s):.3f}")
print(f" 2nd place : {np.mean(acc2s):.3f} ± {np.std(acc2s):.3f}")
print(f" 3rd place : {np.mean(acc3s):.3f} ± {np.std(acc3s):.3f}")
print(f" Top-3 set : {np.mean(set_accs):.3f} ± {np.std(set_accs):.3f}")


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import StandardScaler

#create train/test split
gss = GroupShuffleSplit(n_splits=1, test_size=0.2)
train_idx, test_idx = next(gss.split(X, y, groups=race_id))
X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

#scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# Create neural net
model = models.Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),  # reduces overfitting
    layers.Dense(64, activation='relu'),
    layers.Dense(1)  # output finishing position
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# train
history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_test_scaled, y_test),
    epochs=10,
    batch_size=32,
    verbose=2
)


mse, mae = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Test MAE (position): {mae:.3f}")

y_pred = model.predict(X_test_scaled).flatten()
test_df = df.iloc[test_idx].copy()
test_df['PredPos']   = y_pred
test_df['ActualPos'] = test_df['RacePosition']
test_df['RaceID']    = race_id.iloc[test_idx]


pred_top3 = (
    test_df
    .groupby('RaceID')[['Abbreviation', 'PredPos']]
    .apply(lambda g: g.sort_values('PredPos')['Abbreviation'].tolist()[:3])
)

true_top3 = (
    test_df[test_df['ActualPos'] <= 3]
    .groupby('RaceID')[['Abbreviation', 'ActualPos']]
    .apply(lambda g: g.sort_values('ActualPos')['Abbreviation'].tolist())
)

# print accuracy
race_ids = pred_top3.index
n = len(race_ids)
acc1 = sum(pred_top3[r][0] == true_top3[r][0] for r in race_ids) / n
acc2 = sum(pred_top3[r][1] == true_top3[r][1] for r in race_ids) / n
acc3 = sum(pred_top3[r][2] == true_top3[r][2] for r in race_ids) / n
set_acc = sum(set(pred_top3[r]) == set(true_top3[r]) for r in race_ids) / n

print(f"1st place accuracy : {acc1:.3f}")
print(f"2nd place accuracy : {acc2:.3f}")
print(f"3rd place accuracy : {acc3:.3f}")
print(f"Top-3 set accuracy : {set_acc:.3f}")

# predictions v. actual
print("\nRace predictions vs actual podium:")
for race in race_ids:
    print(f"\nRace: {race}")
    print(f"  Predicted: {pred_top3[race]}")
    print(f"  Actual   : {true_top3[race]}")


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Train Loss (MSE)')
plt.plot(history.history['val_loss'], label='Validation Loss (MSE)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training vs Validation Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor

# Create train/test split
gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = next(gss.split(X, y, groups=race_id))
X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# Train XGBoost model
model = XGBRegressor(n_estimators=150, max_depth=5, learning_rate=0.05, random_state=42)
model.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = model.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
print(f"Test MAE (position): {mae:.3f}")

# Build test DataFrame with predictions
test_df = df.iloc[test_idx].copy()
test_df['PredPos']   = y_pred
test_df['ActualPos'] = test_df['RacePosition']
test_df['RaceID']    = race_id.iloc[test_idx]

# Predicted podiums
pred_top3 = (
    test_df
    .groupby('RaceID')[['Abbreviation', 'PredPos']]
    .apply(lambda g: g.sort_values('PredPos')['Abbreviation'].tolist()[:3])
)

# Actual podiums
true_top3 = (
    test_df[test_df['ActualPos'] <= 3]
    .groupby('RaceID')[['Abbreviation', 'ActualPos']]
    .apply(lambda g: g.sort_values('ActualPos')['Abbreviation'].tolist())
)

# Accuracy metrics
race_ids = pred_top3.index
n = len(race_ids)
acc1 = sum(pred_top3[r][0] == true_top3[r][0] for r in race_ids) / n
acc2 = sum(pred_top3[r][1] == true_top3[r][1] for r in race_ids) / n
acc3 = sum(pred_top3[r][2] == true_top3[r][2] for r in race_ids) / n
set_acc = sum(set(pred_top3[r]) == set(true_top3[r]) for r in race_ids) / n

print(f"1st place accuracy : {acc1:.3f}")
print(f"2nd place accuracy : {acc2:.3f}")
print(f"3rd place accuracy : {acc3:.3f}")
print(f"Top-3 set accuracy : {set_acc:.3f}")

# Sample predictions
print("\nRace predictions vs actual podium:")
for race in race_ids:
    print(f"\nRace: {race}")
    print(f"  Predicted: {pred_top3[race]}")
    print(f"  Actual   : {true_top3[race]}")


In [None]:
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

# Train LightGBM model
model = lgb.LGBMRegressor(
    num_leaves=15,
    learning_rate=0.05,
    n_estimators=150,
    random_state=42
)
model.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = model.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
print(f"Test MAE (position): {mae:.3f}")

# Reuse test_df structure
test_df = df.iloc[test_idx].copy()
test_df['PredPos']   = y_pred
test_df['ActualPos'] = test_df['RacePosition']
test_df['RaceID']    = race_id.iloc[test_idx]

# Predicted podiums
pred_top3 = (
    test_df
    .groupby('RaceID')[['Abbreviation', 'PredPos']]
    .apply(lambda g: g.sort_values('PredPos')['Abbreviation'].tolist()[:3])
)

# Actual podiums
true_top3 = (
    test_df[test_df['ActualPos'] <= 3]
    .groupby('RaceID')[['Abbreviation', 'ActualPos']]
    .apply(lambda g: g.sort_values('ActualPos')['Abbreviation'].tolist())
)

# Accuracy metrics
test_df = df.iloc[test_idx].copy()
test_df['PredPos']   = y_pred
test_df['ActualPos'] = test_df['RacePosition']
test_df['RaceID']    = race_id.iloc[test_idx]

# Podium predictions
pred_top3 = (
    test_df
    .groupby('RaceID')[['Abbreviation', 'PredPos']]
    .apply(lambda g: g.sort_values('PredPos')['Abbreviation'].tolist()[:3])
)

# Actual podiums
true_top3 = (
    test_df[test_df['ActualPos'] <= 3]
    .groupby('RaceID')[['Abbreviation', 'ActualPos']]
    .apply(lambda g: g.sort_values('ActualPos')['Abbreviation'].tolist())
)

# Accuracy metrics (your exact code)
race_ids = pred_top3.index
n = len(race_ids)
acc1 = sum(pred_top3[r][0] == true_top3[r][0] for r in race_ids) / n
acc2 = sum(pred_top3[r][1] == true_top3[r][1] for r in race_ids) / n
acc3 = sum(pred_top3[r][2] == true_top3[r][2] for r in race_ids) / n
set_acc = sum(set(pred_top3[r]) == set(true_top3[r]) for r in race_ids) / n

print(f"1st place accuracy : {acc1:.3f}")
print(f"2nd place accuracy : {acc2:.3f}")
print(f"3rd place accuracy : {acc3:.3f}")
print(f"Top-3 set accuracy : {set_acc:.3f}")

# Sample predictions
print("\nRace predictions vs actual podium:")
for race in race_ids:
    print(f"\nRace: {race}")
    print(f"  Predicted: {pred_top3[race]}")
    print(f"  Actual   : {true_top3[race]}")
