In [1]:
pip install fastf1 pandas numpy scikit-learn

Collecting fastf1
  Downloading fastf1-3.6.0-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-25.1.1-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-2.2.1-py3-none-any.whl.metadata (5.6 kB)
Downloading fastf1-3.6.0-py3-none-any.whl (148 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [2]:
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [3]:
import os
os.makedirs("f1_cache", exist_ok=True)   # This creates the directory if it does not exist
fastf1.Cache.enable_cache("f1_cache")    # Now enable the cache
# speed up repeated access to race data
fastf1.Cache.enable_cache("f1_cache")

In [4]:
session_2024 = fastf1.get_session(2024, "China", "R")
session_2024.load()

core           INFO 	Loading data for Chinese Grand Prix - Race [v3.6.0]
INFO:fastf1.fastf1.core:Loading data for Chinese Grand Prix - Race [v3.6.0]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_d

In [5]:
# extract lap and sector time
laps_2024 = session_2024.laps[["Driver", "LapTime", "Sector1Time", "Sector2Time", "Sector3Time"]].copy()
print("📊 Raw lap data:")
print(laps_2024.head())

📊 Raw lap data:
  Driver                LapTime            Sector1Time            Sector2Time  \
0    VER 0 days 00:01:41.528000                    NaT 0 days 00:00:29.650000   
1    VER 0 days 00:01:40.103000 0 days 00:00:26.651000 0 days 00:00:30.091000   
2    VER 0 days 00:01:40.494000 0 days 00:00:27.011000 0 days 00:00:30.190000   
3    VER 0 days 00:01:40.573000 0 days 00:00:26.923000 0 days 00:00:30.187000   
4    VER 0 days 00:01:40.919000 0 days 00:00:27.106000 0 days 00:00:30.299000   

             Sector3Time  
0 0 days 00:00:43.295000  
1 0 days 00:00:43.361000  
2 0 days 00:00:43.293000  
3 0 days 00:00:43.463000  
4 0 days 00:00:43.514000  


In [6]:
laps_2024.dropna(inplace=True)

In [7]:
# time to sec
for col in ["LapTime", "Sector1Time", "Sector2Time", "Sector3Time"]:
    laps_2024[f"{col} (s)"] = laps_2024[col].dt.total_seconds()

print("\n📏 Lap and sector times in seconds (sample):")
print(laps_2024[["Driver", "LapTime (s)", "Sector1Time (s)", "Sector2Time (s)", "Sector3Time (s)"]].head())


📏 Lap and sector times in seconds (sample):
  Driver  LapTime (s)  Sector1Time (s)  Sector2Time (s)  Sector3Time (s)
1    VER      100.103           26.651           30.091           43.361
2    VER      100.494           27.011           30.190           43.293
3    VER      100.573           26.923           30.187           43.463
4    VER      100.919           27.106           30.299           43.514
5    VER      101.147           27.114           30.431           43.602


In [8]:
# avg sec time per driver
sector_times_2024 = laps_2024.groupby("Driver")[["Sector1Time (s)", "Sector2Time (s)", "Sector3Time (s)"]].mean().reset_index()
print("\n📊 Average sector times per driver:")
print(sector_times_2024.head())


📊 Average sector times per driver:
  Driver  Sector1Time (s)  Sector2Time (s)  Sector3Time (s)
0    ALB        29.195855        32.869764        47.183400
1    ALO        28.987283        31.951038        46.439189
2    BOT        28.427278        31.032222        44.450000
3    GAS        29.548364        32.672455        47.085909
4    HAM        29.151218        32.691655        47.230545


In [9]:
# 2025 Chinese GP Qualifying Times
qualifying_2025 = pd.DataFrame({
    "Driver": ["Oscar Piastri", "George Russell", "Lando Norris", "Max Verstappen", "Lewis Hamilton",
               "Charles Leclerc", "Isack Hadjar", "Andrea Kimi Antonelli", "Yuki Tsunoda", "Alexander Albon",
               "Esteban Ocon", "Nico Hülkenberg", "Fernando Alonso", "Lance Stroll", "Carlos Sainz Jr.",
               "Pierre Gasly", "Oliver Bearman", "Jack Doohan", "Gabriel Bortoleto", "Liam Lawson"],
    "QualifyingTime (s)": [90.641, 90.723, 90.793, 90.817, 90.927,
                           91.021, 91.079, 91.103, 91.638, 91.706,
                           91.625, 91.632, 91.688, 91.773, 91.840,
                           91.992, 92.018, 92.092, 92.141, 92.174]
})

print("\n📋 2025 Qualifying Times (Chinese GP):")
print(qualifying_2025)


📋 2025 Qualifying Times (Chinese GP):
                   Driver  QualifyingTime (s)
0           Oscar Piastri              90.641
1          George Russell              90.723
2            Lando Norris              90.793
3          Max Verstappen              90.817
4          Lewis Hamilton              90.927
5         Charles Leclerc              91.021
6            Isack Hadjar              91.079
7   Andrea Kimi Antonelli              91.103
8            Yuki Tsunoda              91.638
9         Alexander Albon              91.706
10           Esteban Ocon              91.625
11        Nico Hülkenberg              91.632
12        Fernando Alonso              91.688
13           Lance Stroll              91.773
14       Carlos Sainz Jr.              91.840
15           Pierre Gasly              91.992
16         Oliver Bearman              92.018
17            Jack Doohan              92.092
18      Gabriel Bortoleto              92.141
19            Liam Lawson              92

In [10]:
driver_mapping = {
    "Oscar Piastri": "PIA", "George Russell": "RUS", "Lando Norris": "NOR", "Max Verstappen": "VER",
    "Lewis Hamilton": "HAM", "Charles Leclerc": "LEC", "Isack Hadjar": "HAD", "Andrea Kimi Antonelli": "ANT",
    "Yuki Tsunoda": "TSU", "Alexander Albon": "ALB", "Esteban Ocon": "OCO", "Nico Hülkenberg": "HUL",
    "Fernando Alonso": "ALO", "Lance Stroll": "STR", "Carlos Sainz Jr.": "SAI", "Pierre Gasly": "GAS",
    "Oliver Bearman": "BEA", "Jack Doohan": "DOO", "Gabriel Bortoleto": "BOR", "Liam Lawson": "LAW"
}

qualifying_2025["DriverCode"] = qualifying_2025["Driver"].map(driver_mapping)
print("\nDriver mapping applied:")
print(qualifying_2025.head())


Driver mapping applied:
           Driver  QualifyingTime (s) DriverCode
0   Oscar Piastri              90.641        PIA
1  George Russell              90.723        RUS
2    Lando Norris              90.793        NOR
3  Max Verstappen              90.817        VER
4  Lewis Hamilton              90.927        HAM


In [11]:
merged_data = qualifying_2025.merge(sector_times_2024, left_on="DriverCode", right_on="Driver", how="left")
print("\n🔗 Merged 2025 Quali + 2024 Sector data:")
print(merged_data.head())


🔗 Merged 2025 Quali + 2024 Sector data:
         Driver_x  QualifyingTime (s) DriverCode Driver_y  Sector1Time (s)  \
0   Oscar Piastri              90.641        PIA      PIA        28.937296   
1  George Russell              90.723        RUS      RUS        28.795722   
2    Lando Norris              90.793        NOR      NOR        28.553593   
3  Max Verstappen              90.817        VER      VER        28.198173   
4  Lewis Hamilton              90.927        HAM      HAM        29.151218   

   Sector2Time (s)  Sector3Time (s)  
0        32.519426        46.923907  
1        32.411685        46.822019  
2        32.451481        46.493556  
3        31.471942        45.636635  
4        32.691655        47.230545  


In [12]:
# Features: Use all sector times and qualifying time
X = merged_data[["QualifyingTime (s)", "Sector1Time (s)", "Sector2Time (s)", "Sector3Time (s)"]].fillna(0)

# Target: Average lap time per driver from 2024 Chinese GP
y = laps_2024.groupby("Driver")["LapTime (s)"].mean().reset_index()["LapTime (s)"]

print("\nFeatures (X):")
print(X.head())

print("\nTarget (y):")
print(y.head())


Features (X):
   QualifyingTime (s)  Sector1Time (s)  Sector2Time (s)  Sector3Time (s)
0              90.641        28.937296        32.519426        46.923907
1              90.723        28.795722        32.411685        46.822019
2              90.793        28.553593        32.451481        46.493556
3              90.817        28.198173        31.471942        45.636635
4              90.927        29.151218        32.691655        47.230545

Target (y):
0    109.249018
1    107.377509
2    103.909500
3    109.306727
4    109.073418
Name: LapTime (s), dtype: float64


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=38)

model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, random_state=38)
model.fit(X_train, y_train)

print("\n Model trained successfully.")


 Model trained successfully.


In [14]:
predicted_race_times = model.predict(X)
qualifying_2025["PredictedRaceTime (s)"] = predicted_race_times

# Sort by predicted race time
qualifying_2025 = qualifying_2025.sort_values(by="PredictedRaceTime (s)")

In [15]:
print("\n🏁 Predicted 2025 Chinese GP Results 🏁")
print(qualifying_2025[["Driver", "PredictedRaceTime (s)"]])


🏁 Predicted 2025 Chinese GP Results 🏁
                   Driver  PredictedRaceTime (s)
2            Lando Norris             103.911242
16         Oliver Bearman             105.768026
10           Esteban Ocon             105.937552
6            Isack Hadjar             106.014323
8            Yuki Tsunoda             107.250668
1          George Russell             107.378067
5         Charles Leclerc             107.433970
14       Carlos Sainz Jr.             107.987549
13           Lance Stroll             108.029351
11        Nico Hülkenberg             108.380663
7   Andrea Kimi Antonelli             108.485977
18      Gabriel Bortoleto             108.539843
19            Liam Lawson             108.539843
4          Lewis Hamilton             109.072694
9         Alexander Albon             109.209534
0           Oscar Piastri             109.248207
3          Max Verstappen             109.305052
15           Pierre Gasly             109.554230
17            Jack Doohan     

In [16]:
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print(f"\n📏 Model Mean Absolute Error (MAE): {mae:.2f} seconds")


📏 Model Mean Absolute Error (MAE): 1.35 seconds
