In [24]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import fastf1
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [25]:
cache_folder = 'f1_data_cache'
if not os.path.exists(cache_folder):
    os.makedirs(cache_folder)
fastf1.Cache.enable_cache(cache_folder)

In [None]:
driver_numbers = [
    'PIA',  # Piastri
    'RUS',  # Russell
    'LEC',  # Leclerc
    'ANT',  # Antonelli
    'GAS',  # Gasly
    'NOR',   # Norris
    'VER',   # Verstappen
    'SAI',  # Sainz
    'HAM',  # Hamilton
    'TSU',  # Tsunoda
    'HAD',   # Hadjar
    'ALO',  # Alonso
    'OCO',  # Ocon
    'ALB',  # Albon
    'HUL',  # Hulkenberg
    'LAW',  # Lawson
    'STR',  # Stroll
    'BEA',  # Bearman
    'DOO', # Doohan
    'BOR' # Bortoleto
]
races = [
    ('Australia Prix'),
    ('China Prix'),
    ('Japan Prix'),
    ('Bahrain Prix'),
    ('Saudi Arabia Prix')
]

In [None]:
X_list = []
Y_list = []
for race_name in races:
    try:
        race = fastf1.get_session(2025, race_name, 'R')
        race.load()
        qualifying = fastf1.get_session(2025, race_name, 'Q')
        qualifying.load()

        race_results = race.results[['Abbreviation', 'Position']]
        driver_laps = race.laps[['Driver', 'LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time']]
        merged_race_data = pd.merge(
            race_results,
            driver_laps.rename(columns={'Driver': 'Abbreviation'}),
            on='Abbreviation')
        qualifying_results = qualifying.results[['Abbreviation', 'Position']]

        merged = pd.merge(
            qualifying_results.rename(columns={'Position': 'QualifyingPosition'}),
            race_results.rename(columns={'Position': 'RaceResult'}),
            on='Abbreviation'
        )
        merged = merged.dropna()

        X_list.append(merged[['QualifyingPosition']])
        Y_list.append(merged['RaceResult'])

    except Exception as e:
        print(f"Error in {race_name}: {e}")

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '63', '12', '23', '18', '27', '16', '81', '44', '10', '22', '31', '87', '30', '5', '14', '55', '7', '6']
core           INFO 	Loading data for Australian Grand Prix 

In [75]:

quali = fastf1.get_session(2025, 'Saudi Arabia Prix', 'Q')
quali.load()
quali_laps = quali.laps[['Driver', 'LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'IsAccurate']]
quali_laps.dropna(inplace=True)

quali_laps = quali_laps[quali_laps['LapTime'].notna() & ~quali_laps['IsAccurate']]
fastest_laps = quali_laps.groupby('Driver').min()['LapTime']
#fastest_laps_sorted = fastest_laps.sort_values(by='LapTime')
print(fastest_laps)

core           INFO 	Loading data for Saudi Arabian Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '81', '63', '16', '12', '55', '44', '22', '10', '4', '23', '30', '14', '6', '87', '18', '7', '27', '31', '5']


Driver
ALB   0 days 00:01:48.614000
ALO   0 days 00:01:43.610000
ANT   0 days 00:01:49.802000
BEA   0 days 00:01:43.191000
BOR   0 days 00:01:50.078000
DOO   0 days 00:02:00.380000
GAS   0 days 00:01:34.742000
HAD   0 days 00:01:55.827000
HAM   0 days 00:01:48.562000
HUL   0 days 00:01:43.612000
LAW   0 days 00:01:38.928000
LEC   0 days 00:01:41.591000
NOR   0 days 00:01:57.345000
OCO   0 days 00:01:45.433000
PIA   0 days 00:01:58.456000
RUS   0 days 00:01:49.241000
SAI   0 days 00:01:47.836000
STR   0 days 00:01:51.645000
TSU   0 days 00:01:55.708000
VER   0 days 00:01:38.462000
Name: LapTime, dtype: timedelta64[ns]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  quali_laps.dropna(inplace=True)


In [53]:
race = fastf1.get_session(2025, 'Saudi Arabia Prix', 'R')
race.load()
driver_laps = race.laps[['Driver','Position', 'LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'LapNumber', 'LapTime']]
race_results = race.results[['Abbreviation', 'Position']]
merged_race_data = pd.merge(
            race_results,
            driver_laps.rename(columns={'Driver': 'Abbreviation', 'Position': 'FinalPosition'}),
            on='Abbreviation')
merged_race_data.dropna(inplace=True)
print(merged_race_data[merged_race_data['Abbreviation'] == 'RUS'].head())

core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '1', '16', '4', '63', '12', '44', '55', '23', '6', '14', '30', '87', '31', '27', '18', '7', '5', '22', '10']


    Abbreviation  Position  FinalPosition                LapTime  \
203          RUS       5.0            3.0 0 days 00:01:35.170000   
204          RUS       5.0            3.0 0 days 00:01:33.996000   
205          RUS       5.0            3.0 0 days 00:01:34.073000   
206          RUS       5.0            3.0 0 days 00:01:34.038000   
207          RUS       5.0            3.0 0 days 00:01:33.857000   

               Sector1Time            Sector2Time            Sector3Time  \
203 0 days 00:00:35.754000 0 days 00:00:29.477000 0 days 00:00:29.939000   
204 0 days 00:00:34.781000 0 days 00:00:29.406000 0 days 00:00:29.809000   
205 0 days 00:00:34.730000 0 days 00:00:29.526000 0 days 00:00:29.817000   
206 0 days 00:00:34.794000 0 days 00:00:29.572000 0 days 00:00:29.672000   
207 0 days 00:00:34.672000 0 days 00:00:29.477000 0 days 00:00:29.708000   

     LapNumber                LapTime  
203        4.0 0 days 00:01:35.170000  
204        5.0 0 days 00:01:33.996000  
205        6.0

In [29]:
X = pd.concat(X_list, ignore_index=True)
y = pd.concat(Y_list, ignore_index=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = GradientBoostingRegressor()
model.fit(X_train, y_train)

In [30]:
predict_quali = fastf1.get_session(2025, 'Miami Prix', 'Q')
predict_quali.load()

predict_quali_results = predict_quali.results[['Abbreviation', 'Position']]
X_predict = predict_quali_results.rename(columns={'Position': 'QualifyingPosition'})

predictions = model.predict(X_predict[['QualifyingPosition']])

prediction_df = pd.DataFrame({
    'Driver': X_predict['Abbreviation'],
    'PredictedRacePosition': predictions
}).sort_values('PredictedRacePosition')

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print(prediction_df)
print(f"Mae error: {mae:.2f} positions")

core           INFO 	Loading data for Miami Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '12', '81', '63', '55', '23', '16', '31', '22', '6', '44', '5', '7', '30', '27', '14', '10', '18', '87']


   Driver  PredictedRacePosition
1     VER               1.020999
4     NOR               3.413729
12    ANT               3.510340
81    PIA               5.506373
23    ALB               8.053883
55    SAI               8.477121
22    TSU               9.678917
63    RUS              10.615616
16    LEC              12.154773
31    OCO              12.154773
30    LAW              12.650990
14    ALO              12.826837
6     HAD              12.963709
5     BOR              13.544922
44    HAM              13.910327
7     DOO              14.414715
18    STR              15.038916
27    HUL              15.166826
10    GAS              15.194878
87    BEA              15.472176
Mae error: 3.45 positions
