In [2]:
import os
import fastf1 as ff1
import fastf1.plotting
import matplotlib.pyplot as plt
import pandas as pd
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error


warnings.filterwarnings('ignore')

# Create cache directory if it doesn't exist
cache_dir = 'cache'
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)

ff1.Cache.enable_cache(cache_dir)

In [3]:
schedule = ff1.get_event_schedule(2025)
schedule[schedule['Country'] == 'Australia']

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,...,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport
1,1,Australia,Melbourne,FORMULA 1 LOUIS VUITTON AUSTRALIAN GRAND PRIX ...,2025-03-16,Australian Grand Prix,conventional,Practice 1,2025-03-14 12:30:00+11:00,2025-03-14 01:30:00,...,Practice 3,2025-03-15 12:30:00+11:00,2025-03-15 01:30:00,Qualifying,2025-03-15 16:00:00+11:00,2025-03-15 05:00:00,Race,2025-03-16 15:00:00+11:00,2025-03-16 04:00:00,True


In [19]:
session = ff1.get_session(2025, 'Las Vegas Grand Prix', 'Q')
session.load()

laps = session.laps
laps

req            INFO 	Using cached data for season_schedule
core           INFO 	Loading data for Las Vegas Grand Prix - Qualifying [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
req            INFO 	No cached data found for car_data. Loading data...
_api           INFO 	Fetching car data...
req            INFO 	No cached data found for position_data. Loading data...
_api           INFO 	Fet

DataNotLoadedError: The data you are trying to access has not been loaded yet. See `Session.load`

In [6]:
def get_event_data(year, event_name):
    '''
    Get qualifying and race data for current year.

    Arguments:
    year -- int, the year of the event
    event_name -- str, the name of the event

    Returns:
    pd.DataFrame -- DataFrame with qualifying times and race lap times from current year
    '''

    # Get free practice data 
    fps = ['FP1', 'FP2', 'FP3']
    fp_results = {}
    for fp in fps:
        try:
            session_fp = ff1.get_session(year, event_name, fp)
            session_fp.load()

            drivers = session_fp.laps['Driver'].unique()
            for driver in drivers:
                if driver not in fp_results:
                    fp_results[driver] = {
                        'driver': driver,
                        'team': session_fp.laps.loc[session_fp.laps['Driver'] == driver, 'Team'].iloc[0],
                    }
                driver_laps = session_fp.laps.pick_driver(driver)

                if not driver_laps.empty:
                    sector_times = driver_laps[['Sector1Time', 'Sector2Time', 'Sector3Time']].mean().dropna()             
                    total_times = sector_times.sum().total_seconds() if not sector_times.isna().any() else None

                    fp_results[driver] = {
                        **fp_results[driver],
                        f'{fp}_total_time': total_times
                    }
        except Exception as e:
            print(f"Could not load {fp} for {event_name} {year}: {e}")
            continue
    fp_df = pd.DataFrame.from_dict(fp_results, orient='index')

    # Get current year qualifying data
    try:
        session_q = ff1.get_session(year, event_name, 'Q')
        session_q.load()
        
        qualifying_laps = session_q.laps[['Driver', 'LapTime']].copy()
        qualifying_laps = qualifying_laps.dropna(subset=['LapTime'])
        
        # Get best qualifying lap per driver
        qualifying_best = qualifying_laps.groupby('Driver')['LapTime'].min().reset_index()
        qualifying_best['Qualifying_Time'] = qualifying_best['LapTime'].dt.total_seconds()
        qualifying_best = qualifying_best[['Driver', 'Qualifying_Time']]
        
    except Exception as e:
        print(f"Could not load qualifying for {event_name} {year}: {e}")
        return pd.DataFrame()
    
    # Get current year race data
    try:
        session_r = ff1.get_session(year, event_name, 'R')
        session_r.load()
        
        race_laps = session_r.laps[['Driver', 'LapTime']].copy()
        race_laps = race_laps.dropna(subset=['LapTime'])
        race_laps['Race_Time'] = race_laps['LapTime'].dt.total_seconds()
        race_laps = race_laps[['Driver', 'Race_Time']]

        # Add weather condition
        weather = session_r.weather_data[['AirTemp', 'TrackTemp', 'Humidity', 'WindSpeed', 'Rainfall', 'Pressure']]
        if weather is not None and not weather.empty:
            for col in weather.columns:
                race_laps[f'Weather_{col}'] = weather[col].mean()
        
    except Exception as e:
        print(f"Could not load race for {event_name} {year}: {e}")
        return pd.DataFrame()
    
    # Merge qualifying and race data
    merged_df = qualifying_best.merge(race_laps, on='Driver', how='inner')
    merged_df = merged_df.merge(fp_df, left_on='Driver', right_on='driver', how='left')
    merged_df.drop(columns=['driver'], inplace=True)
    merged_df['Year'] = year
    merged_df['EventName'] = event_name
    
    return merged_df

In [7]:
australian_gp_2025_data = get_event_data(2025, 'Australian Grand Prix')
australian_gp_2025_data

core           INFO 	Loading data for Australian Grand Prix - Practice 1 [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '5', '6', '7', '10', '12', '14', '16', '18', '22', '23', '27', '30', '31', '44', '55', '63', '81', '87']
core           INFO 	Loading data for Australian Grand Prix - Practice 2 [v3.6.1]
req            INFO 	Usin

Unnamed: 0,Driver,Qualifying_Time,Race_Time,Weather_AirTemp,Weather_TrackTemp,Weather_Humidity,Weather_WindSpeed,Weather_Rainfall,Weather_Pressure,team,FP1_total_time,FP2_total_time,FP3_total_time,Year,EventName
0,ALB,75.737,132.195,15.707865,18.942135,78.421348,3.475281,0.325843,1009.901685,Williams,107.037677,100.042091,105.053031,2025,Australian Grand Prix
1,ALB,75.737,142.084,15.707865,18.942135,78.421348,3.475281,0.325843,1009.901685,Williams,107.037677,100.042091,105.053031,2025,Australian Grand Prix
2,ALB,75.737,139.612,15.707865,18.942135,78.421348,3.475281,0.325843,1009.901685,Williams,107.037677,100.042091,105.053031,2025,Australian Grand Prix
3,ALB,75.737,129.065,15.707865,18.942135,78.421348,3.475281,0.325843,1009.901685,Williams,107.037677,100.042091,105.053031,2025,Australian Grand Prix
4,ALB,75.737,98.281,15.707865,18.942135,78.421348,3.475281,0.325843,1009.901685,Williams,107.037677,100.042091,105.053031,2025,Australian Grand Prix
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
800,VER,75.481,90.859,15.707865,18.942135,78.421348,3.475281,0.325843,1009.901685,Red Bull Racing,104.157015,100.398777,109.207585,2025,Australian Grand Prix
801,VER,75.481,89.100,15.707865,18.942135,78.421348,3.475281,0.325843,1009.901685,Red Bull Racing,104.157015,100.398777,109.207585,2025,Australian Grand Prix
802,VER,75.481,88.538,15.707865,18.942135,78.421348,3.475281,0.325843,1009.901685,Red Bull Racing,104.157015,100.398777,109.207585,2025,Australian Grand Prix
803,VER,75.481,87.921,15.707865,18.942135,78.421348,3.475281,0.325843,1009.901685,Red Bull Racing,104.157015,100.398777,109.207585,2025,Australian Grand Prix


In [14]:
# Gradient Boosting Regressor model to predict race
X = australian_gp_2025_data[['Qualifying_Time']]
y = australian_gp_2025_data['Race_Time']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

Mean Absolute Error: 16.139764543693552


In [15]:
results_df = pd.DataFrame({
    'Driver': australian_gp_2025_data.loc[X_test.index, 'Driver'],
    'Actual': y_test,
    'Predicted': y_pred
})

In [18]:
results_df


Unnamed: 0,Driver,Actual,Predicted
192,GAS,93.124,103.627999
718,TSU,91.793,105.685314
168,BOR,132.146,100.531078
522,OCO,102.035,103.082319
536,OCO,93.152,103.082319
...,...,...,...
659,STR,93.631,105.950216
356,LAW,92.883,102.474583
525,OCO,88.505,103.082319
749,TSU,91.891,105.685314
