In [2]:
# Import necessary llibraries
!pip install fastf1
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from matplotlib import pyplot as plt

Collecting fastf1
  Downloading fastf1-3.5.2-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-24.1.2-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-1.4.3-py2.py3-none-any.whl.metadata (3.1 kB)
Downloading fastf1-3.5.2-py3-none-any.whl (150 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
# Set the desired cache director

import os
cache_dir = "f1_cache"

# Create the cache directory if it doesn't exist
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)

fastf1.Cache.enable_cache(cache_dir)

In [4]:
# Loading FastF1 2024 Australian GP race session data
session_2024 = fastf1.get_session(2024, 'Australian Grand Prix', 'R')
session_2024.load()

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.2]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Race [v3.5.2]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
DEBUG:fastf1.ergast:Failed to parse timestamp '-1:57:37.891' in Ergastresponse.
req            INFO 	No cached data found for session_status_

In [5]:
# Extract Lap Times
laps_2024 = session_2024.laps[['Driver', 'LapTime']].copy()
laps_2024.dropna(subset=['LapTime'], inplace=True)
laps_2024['LapTime (s)'] = laps_2024['LapTime'].dt.total_seconds()

In [6]:
# 2025 Qualifying Data
session_2025 = fastf1.get_session(2025, 'Australian Grand Prix', 'Q')
session_2025.load()

core           INFO 	Loading data for Australian Grand Prix - Qualifying [v3.5.2]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Qualifying [v3.5.2]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
DEBUG:fastf1.ergast:Failed to parse timestamp '' in Ergastresponse.
req            INFO 	No cached data found for session_status_

In [7]:
# Import pandas library
import pandas as pd

# Create a DataFrame for 2025 Qualifying Data
qualifying_2025 = pd.DataFrame({
    "Driver": ["Lando Norris", "Oscar Piastri", "Max Verstappen", "George Russell",
               "Yuki Tsunoda", "Alexander Albon", "Charles Leclerc", "Lewis Hamilton",
               "Pierre Gasly", "Carlos Sainz"],
    "QualifyingTime (s)": [75.096, 75.180, 75.481, 75.546, 75.670,
                           75.737, 75.755, 75.973, 75.980, 76.062]
})

# Display the DataFrame
print(qualifying_2025)

            Driver  QualifyingTime (s)
0     Lando Norris              75.096
1    Oscar Piastri              75.180
2   Max Verstappen              75.481
3   George Russell              75.546
4     Yuki Tsunoda              75.670
5  Alexander Albon              75.737
6  Charles Leclerc              75.755
7   Lewis Hamilton              75.973
8     Pierre Gasly              75.980
9     Carlos Sainz              76.062


In [8]:
# Dictionary mapping driver names to 3-letter codes

driver_mapping = {
    "Lando Norris": "NOR", "Oscar Piastri": "PIA", "Max Verstappen": "VER",
    "George Russell": "RUS", "Yuki Tsunoda": "TSU", "Alexander Albon": "ALB",
    "Charles Leclerc": "LEC", "Lewis Hamilton": "HAM", "Pierre Gasly": "GAS",
    "Carlos Sainz": "SAI","Lance Stroll": "STR", "Fernando Alonso": "ALO",
    "Esteban Ocon": "OCO"}

# Map full driver names to their 3-letter codes
qualifying_2025["DriverCode"] = qualifying_2025["Driver"].map(driver_mapping)


merged_data = qualifying_2025.merge(
    laps_2024,
    left_on="DriverCode",
    right_on="Driver"
)

# Extracting QualifyingTime from merged_data as feature (X)
X = merged_data["QualifyingTime (s)"]

# Extracting LapTime from merged_data as target (y)
y = merged_data["LapTime (s)"]

In [23]:
# Check if dataset is empty after preprocessing
if X.shape[0] == 0:
    raise ValueError("Dataset is empty after preprocessing. Check data sources!")

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X.values.reshape(-1, 1), y, test_size=0.2, random_state=3
)

# Create and train Gradient Boosting Model
model = GradientBoostingRegressor(
    n_estimators=200, learning_rate=0.1, random_state=3)
model.fit(X_train, y_train)

# Predict lap times using the trained model
predicted_lap_times = model.predict(
    qualifying_2025["QualifyingTime (s)"].values.reshape(-1, 1)
)

# Add predicted lap times to qualifying_2025 DataFrame
qualifying_2025["PredictedRaceTime (s)"] = predicted_lap_times

# Rank drivers by predicted race times
qualifying_2025 = qualifying_2025.sort_values(by="PredictedRaceTime (s)")

# Display the ranked DataFrame
print(qualifying_2025[["Driver", "PredictedRaceTime (s)"]])

            Driver  PredictedRaceTime (s)
0     Lando Norris              82.542343
9     Carlos Sainz              83.371898
6  Charles Leclerc              83.438032
2   Max Verstappen              83.607030
3   George Russell              83.837653
1    Oscar Piastri              84.013792
5  Alexander Albon              84.654497
4     Yuki Tsunoda              84.689247
8     Pierre Gasly              85.083022
7   Lewis Hamilton              85.647466


In [10]:
# Predict lap times using the trained model on the test set
predicted_lap_times = model.predict(X_test)

# Evaluate using MAE
mae = mean_absolute_error(y_test, predicted_lap_times)
print(f"Mean Absolute Error (MAE): {mae}")

Mean Absolute Error (MAE): 2.6751092800163234
