In [19]:
import fastf1
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns 
import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.impute import SimpleImputer

In [5]:
# Get the F1 calendar for 2024
events = fastf1.get_event_schedule(2024)
print(events[['RoundNumber', 'EventName', 'EventDate']])

    RoundNumber                  EventName  EventDate
0             0         Pre-Season Testing 2024-02-23
1             1         Bahrain Grand Prix 2024-03-02
2             2   Saudi Arabian Grand Prix 2024-03-09
3             3      Australian Grand Prix 2024-03-24
4             4        Japanese Grand Prix 2024-04-07
5             5         Chinese Grand Prix 2024-04-21
6             6           Miami Grand Prix 2024-05-05
7             7  Emilia Romagna Grand Prix 2024-05-19
8             8          Monaco Grand Prix 2024-05-26
9             9        Canadian Grand Prix 2024-06-09
10           10         Spanish Grand Prix 2024-06-23
11           11        Austrian Grand Prix 2024-06-30
12           12         British Grand Prix 2024-07-07
13           13       Hungarian Grand Prix 2024-07-21
14           14         Belgian Grand Prix 2024-07-28
15           15           Dutch Grand Prix 2024-08-25
16           16         Italian Grand Prix 2024-09-01
17           17      Azerbai

In [6]:
# Load 2024 Las Vegas Race Data 
session = fastf1.get_session(2024, 22, "R")
session.load()

core           INFO 	Loading data for Las Vegas Grand Prix - Race [v3.6.0]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No 

In [7]:
# Extract lap data
laps =session.laps[["Driver", "LapTime", "Sector1Time", "Sector2Time", "Sector3Time"]].dropna()

In [8]:
# Convert to seconds 
for col in ["LapTime", "Sector1Time", "Sector2Time", "Sector3Time"]:
    laps[col] = laps[col].dt.total_seconds()

In [9]:
# Compute average sector times per driver
sector_times = (
    laps.groupby("Driver")[["Sector1Time", "Sector2Time", "Sector3Time"]].mean().reset_index()
)
sector_times["TotalSectorTime"] = (
    sector_times["Sector1Time"] + sector_times["Sector2Time"] + sector_times["Sector3Time"]
)

In [10]:
# 2025 Qualifying Data 
qualifying = pd.DataFrame({
    "Driver":["VER","NOR","PIA","RUS","SAI","ALB","LEC","OCO","HAM","STR","GAS","ALO","HUL"],
    "QualifyingTime":[74.669,73.954,74.129,75.100,75.362,75.213,74.063,74.942,
                       74.382,76.563,75.994,74.924,75.596]
})

In [11]:
# Clean air 
clean_air = {
    "VER":93.5,"HAM":94.1,"LEC":93.7,"NOR":93.8,"ALO":95.2,
    "PIA":93.4,"RUS":93.9,"SAI":94.6,"STR":95.4,"HUL":95.5,"OCO":95.7
}

In [12]:
qualifying['CleanAirPace'] = qualifying['Driver'].map(clean_air)

In [14]:
# Team Performace 
team_points = {
    "McLaren":279,"Mercedes":147,"Red Bull":131,"Williams":51,"Ferrari":114,
    "Haas":20,"Aston Martin":14,"Kick Sauber":6,"Racing Bulls":10,"Alpine":7
}
max_points = max(team_points.values())
team_score = {t:p/max_points for t,p in team_points.items()}

driver_team = {
    "VER":"Red Bull","NOR":"McLaren","PIA":"McLaren","LEC":"Ferrari","RUS":"Mercedes",
    "HAM":"Mercedes","GAS":"Alpine","ALO":"Aston Martin","SAI":"Ferrari","HUL":"Kick Sauber",
    "OCO":"Alpine","STR":"Aston Martin","ALB":"Williams"
}

qualifying["Team"] = qualifying["Driver"].map(driver_team)
qualifying["TeamScore"] = qualifying["Team"].map(team_score)


In [15]:
# Add Weather Forecast 
qualifying["RainProb"] = 0.0
qualifying["Temp"] = 16

In [16]:
# Average Position Change 
pos_change = {
    "VER": -0.8,"NOR":1.1,"PIA":0.3,"RUS":0.6,"SAI":-0.4,"ALB":0.7,
    "LEC":-1.3,"OCO":-0.1,"HAM":0.2,"STR":1.0,"GAS":-0.3,"ALO":-0.5,"HUL":0.0
}
qualifying["PosChange"] = qualifying["Driver"].map(pos_change)

In [17]:
# Merge with 2024 Sector Data 
data = qualifying.merge(sector_times[["Driver","TotalSectorTime"]], on="Driver", how="left")


In [18]:
# Target = 2024 race pace (driver avg lap time)
y = laps.groupby("Driver")["LapTime"].mean().reindex(data["Driver"])

# Features
X = data[[
    "QualifyingTime","RainProb","Temp","TeamScore","CleanAirPace",
    "PosChange","TotalSectorTime"
]]

In [20]:
# Train Model 
imputer = SimpleImputer(strategy="median")
X_imp = imputer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_imp, y, test_size=0.3, random_state=42)

model = GradientBoostingRegressor(
    n_estimators=120,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)
model.fit(X_train, y_train)

# Predict
data["PredictedRacePace"] = model.predict(X_imp)

In [21]:
# Final predicted order
results = data.sort_values("PredictedRacePace").reset_index(drop=True)
print("\nüèÅ Predicted 2025 Las Vegas GP Finishing Order\n")
print(results[["Driver","PredictedRacePace"]])


üèÅ Predicted 2025 Las Vegas GP Finishing Order

   Driver  PredictedRacePace
0     RUS          98.422496
1     HAM          98.442820
2     VER          98.626658
3     SAI          98.633943
4     LEC          98.701431
5     NOR          99.235961
6     PIA          99.365327
7     STR          99.493167
8     HUL          99.532693
9     ALO          99.532889
10    OCO         100.375581
11    ALB         100.786413
12    GAS         102.260206


In [22]:
# MAE
y_pred = model.predict(X_test)
print(f"\nModel MAE: {mean_absolute_error(y_test, y_pred):.3f} sec")


Model MAE: 0.157 sec


In [23]:
# Podium
pod = results.loc[:2, "Driver"]
print("\nüèÜ Podium Prediction:")
print(f"1st: {pod.iloc[0]}")
print(f"2nd: {pod.iloc[1]}")
print(f"3rd: {pod.iloc[2]}")


üèÜ Podium Prediction:
1st: RUS
2nd: HAM
3rd: VER
