INSTALL FASTF1 API

In [1]:
pip install fastf1

Collecting fastf1
  Downloading fastf1-3.6.0-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-25.1.1-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-2.2.1-py3-none-any.whl.metadata (5.6 kB)
Downloading fastf1-3.6.0-py3-none-any.whl (148 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [29]:
import os
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

# Define cache directory
cache_dir = "/content/f1_cache"

# Create the directory
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)

In [30]:
fastf1.Cache.enable_cache(cache_dir)
print("Cache enabled at:", cache_dir)

Cache enabled at: /content/f1_cache


LOAD THE 2024 DATA OF THE AUSTRALIAN GRAND PRIX

In [65]:
#Load Fast F1 2024 AUS GP race session
session_2024 = fastf1.get_session(2024, 3, 'R')
session_2024.load()

core           INFO 	Loading data for Australian Grand Prix - Race [v3.6.0]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Race [v3.6.0]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
DEBUG:fastf1.ergast:Failed to parse timestamp '-1:57:37.891' in Ergastresponse.
req            INFO 	No cached data found for session_status_

RETRIEV THE 2024 LAP TIME

In [66]:
#Extract lap times
laps_2024 = session_2024.laps[["Driver","LapTime"]].copy()
laps_2024.dropna(subset=["LapTime"], inplace=True)
laps_2024["LapTime (s)"] = laps_2024["LapTime"].dt.total_seconds()
laps_2024.head()

Unnamed: 0,Driver,LapTime,LapTime (s)
0,VER,0 days 00:01:27.458000,87.458
1,VER,0 days 00:01:24.099000,84.099
2,VER,0 days 00:01:23.115000,83.115
4,GAS,0 days 00:01:37.304000,97.304
5,GAS,0 days 00:01:24.649000,84.649


MAKE A DF FOR THE DRIVERS MAPPED WITH 2025 QUALI TIME

In [67]:
# 2025 Qualifying data
qualifying_2025 = pd.DataFrame({
    "Driver":[
        "Lando Norris", "Oscar Piastri", "Max Verstappen", "George Russel",
        "Yuki Tsunado", "Alexander Albon", "Charles Leclerc", "Lewis Hamilton",
        "Pierre Gasly", "Carlos Sainz", "Lance Stroll", "Fernando Alonso"],
    "QualifyingTime (s)":[
        75.096, 75.103 ,75.481, 75.546, 75.670,
        75.737, 75.753 ,75.973, 75.980, 76.662,76.4, 76.51
    ]
})
qualifying_2025.head(12)

Unnamed: 0,Driver,QualifyingTime (s)
0,Lando Norris,75.096
1,Oscar Piastri,75.103
2,Max Verstappen,75.481
3,George Russel,75.546
4,Yuki Tsunado,75.67
5,Alexander Albon,75.737
6,Charles Leclerc,75.753
7,Lewis Hamilton,75.973
8,Pierre Gasly,75.98
9,Carlos Sainz,76.662


MAP THE NAMES OF THE DRIVERS WITH F1 3-LETTER CODE

In [68]:
# map full names to fastF1 3-letter code
drive_mapping = {
    "Lando Norris": "NOR", "Oscar Piastri":"PIA", "Max Verstappen":"VER", "George Russel":"RUS",
    "Yuki Tsunado":"TSU", "Alexander Albon":"ALB", "Charles Leclerc":"LEC", "Lewis Hamilton":"HAM",
    "Pierre Gasly":"GAS", "Carlos Sainz":"SAI", "Lance Stroll":"STR", "Fernando Alonso":"ALO"
}

qualifying_2025["DriverCode"] = qualifying_2025["Driver"].map(drive_mapping)

MERGE 2025 DATA WITH 2024 RACE DATA

In [69]:
#merge 2025 qualifying data with 2024 race data
merged_data = qualifying_2025.merge(laps_2024, left_on="DriverCode", right_on="Driver", how="left")
merged_data.dropna(inplace=True)
merged_data["LapTime"] = merged_data["LapTime"].dt.total_seconds()
merged_data.head()

Unnamed: 0,Driver_x,QualifyingTime (s),DriverCode,Driver_y,LapTime,LapTime (s)
0,Lando Norris,75.096,NOR,NOR,89.784,89.784
1,Lando Norris,75.096,NOR,NOR,83.183,83.183
2,Lando Norris,75.096,NOR,NOR,82.656,82.656
3,Lando Norris,75.096,NOR,NOR,82.609,82.609
4,Lando Norris,75.096,NOR,NOR,82.685,82.685


In [70]:
#use only QualifyingTime (s) as a feature
x = merged_data[["QualifyingTime (s)"]]
y = merged_data["LapTime"]

In [71]:
if x.shape[0] == 0:
  raise ValueError("No data available for training.")

In [72]:
print(y.head())

0    89.784
1    83.183
2    82.656
3    82.609
4    82.685
Name: LapTime, dtype: float64


In [73]:
print(merged_data.dtypes)

Driver_x               object
QualifyingTime (s)    float64
DriverCode             object
Driver_y               object
LapTime               float64
LapTime (s)           float64
dtype: object


TRAIN THE MODEL AND PREDICT USUIBG 2025 QUALI TIME

In [74]:
#Train Gradient Boosting model
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=39)
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=39)
model.fit(x_train, y_train)

In [75]:
#dataset split
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(475, 1) (475,) (119, 1) (119,)


In [76]:
#Predict using 2025 qualifying times
predicted_lap_times = model.predict(qualifying_2025[["QualifyingTime (s)"]])
qualifying_2025["PredictedRaceTime"] = predicted_lap_times
qualifying_2025.head()

Unnamed: 0,Driver,QualifyingTime (s),DriverCode,PredictedRaceTime
0,Lando Norris,75.096,NOR,82.711596
1,Oscar Piastri,75.103,PIA,84.33784
2,Max Verstappen,75.481,VER,85.229186
3,George Russel,75.546,RUS,83.88412
4,Yuki Tsunado,75.67,TSU,84.421664


SORT THE DATA AND PRINT THE FINAL RESULT

In [77]:
#rank drivers by predicted race times
qualifying_2025 = qualifying_2025.sort_values(by="PredictedRaceTime")
qualifying_2025["Position"] = range(1, len(qualifying_2025) + 1)
qualifying_2025.head()

Unnamed: 0,Driver,QualifyingTime (s),DriverCode,PredictedRaceTime,Position
0,Lando Norris,75.096,NOR,82.711596,1
6,Charles Leclerc,75.753,LEC,83.080455,2
9,Carlos Sainz,76.662,SAI,83.617395,3
3,George Russel,75.546,RUS,83.88412,4
1,Oscar Piastri,75.103,PIA,84.33784,5


In [78]:
#print final prediction
print("\n Predicted 2025 Australian GP Winner \n")
print(qualifying_2025[["Driver","PredictedRaceTime"]])


 Predicted 2025 Australian GP Winner 

             Driver  PredictedRaceTime
0      Lando Norris          82.711596
6   Charles Leclerc          83.080455
9      Carlos Sainz          83.617395
3     George Russel          83.884120
1     Oscar Piastri          84.337840
4      Yuki Tsunado          84.421664
5   Alexander Albon          84.639193
10     Lance Stroll          84.701603
11  Fernando Alonso          85.109374
2    Max Verstappen          85.229186
8      Pierre Gasly          85.545296
7    Lewis Hamilton          86.038567


In [79]:
#evaluate model
y_pred = model.predict(x_test)
print(f"\n Model Error(MAE):{mean_absolute_error(y_test, y_pred):.2f} seconds")


 Model Error(MAE):3.37 seconds
