<a href="https://colab.research.google.com/github/Suvana-Rohanlal/Race_predictor/blob/main/race_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [137]:
!pip install fastf1 pandas numpy scikit-learn



In [138]:
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [136]:
# Enable FastF1 caching
# fastf1.Cache.enable_cache("f1_cache")

# Load FastF1 2024 Australian GP race session
session_2024 = fastf1.get_session(2024, 3, "R")
# print(session_2024.event)
session_2024.load()

core           INFO 	Loading data for Dutch Grand Prix - Race [v3.6.0]
INFO:fastf1.fastf1.core:Loading data for Dutch Grand Prix - Race [v3.6.0]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data.

In [139]:
# Extract lap times
laps_2024 = session_2024.laps[["Driver", "LapTime"]].copy()
# Drop NA rows
laps_2024.dropna(subset=["LapTime"], inplace=True)
# Create a column with the laptime in seconds
laps_2024["LapTime (s)"] = laps_2024["LapTime"].dt.total_seconds()


In [140]:
driver_mapping = { "Lando Norris": "NOR", "Oscar Piastri": "PIA", "Max Verstappen": "VER", "George Russell": "RUS",
                  "Yuki Tsunoda": "TSU", "Alexander Albon": "ALB", "Charles Leclerc": "LEC", "Lewis Hamilton": "HAM",
                   "Pierre Gasly": "GAS", "Carlos Sainz": "SAI", "Lance Stroll": "STR", "Fernando Alonso": "ALO" }

# Compute average race pace per driver from 2024
driver_avg_times = laps_2024.groupby("Driver")["LapTime (s)"].mean().reset_index()
driver_avg_times.rename(columns={"LapTime (s)": "AvgRacePace_2024"}, inplace=True)


# Map FastF1 codes back to full names
driver_mapping_rev = {v: k for k, v in driver_mapping.items()}  # invert your dict
driver_avg_times["Driver"] = driver_avg_times["Driver"].map(driver_mapping_rev)

# Keep only the 2025 drivers we care about
drivers_2025 = list(driver_mapping.keys())
avg_racepace_2024 = driver_avg_times[driver_avg_times["Driver"].isin(drivers_2025)].reset_index(drop=True)

# Final DataFrame in same style as qualifying_2025
racepace_2024 = pd.DataFrame({
    "Driver": avg_racepace_2024["Driver"],
    "DriverCode": avg_racepace_2024["Driver"].map(driver_mapping),   # add 3-letter code
    "AvgRacePace_2024 (s)": avg_racepace_2024["AvgRacePace_2024"]
})

# Merge with 2024 averages
merged_data = racepace_2024.merge(laps_2024, left_on="DriverCode", right_on="Driver", how="left")
print(merged_data)

            Driver_x DriverCode  AvgRacePace_2024 (s) Driver_y  \
0    Alexander Albon        ALB             77.270845      ALB   
1    Alexander Albon        ALB             77.270845      ALB   
2    Alexander Albon        ALB             77.270845      ALB   
3    Alexander Albon        ALB             77.270845      ALB   
4    Alexander Albon        ALB             77.270845      ALB   
..               ...        ...                   ...      ...   
854   Max Verstappen        VER             75.950208      VER   
855   Max Verstappen        VER             75.950208      VER   
856   Max Verstappen        VER             75.950208      VER   
857   Max Verstappen        VER             75.950208      VER   
858   Max Verstappen        VER             75.950208      VER   

                   LapTime  LapTime (s)  
0   0 days 00:01:28.535000       88.535  
1   0 days 00:01:19.114000       79.114  
2   0 days 00:01:18.072000       78.072  
3   0 days 00:01:17.747000       77.747

In [141]:
# # 2025 Qualifying Data
# qualifying_2025 = pd.DataFrame({
#     "Driver": ["Lando Norris", "Oscar Piastri", "Max Verstappen", "George Russell", "Yuki Tsunoda",
#                "Alexander Albon", "Charles Leclerc", "Lewis Hamilton", "Pierre Gasly", "Carlos Sainz", "Fernando Alonso", "Lance Stroll"],
#     "QualifyingTime (s)": [75.096, 75.180, 75.481, 75.546, 75.670,
#                            75.737, 75.755, 75.973, 75.980, 76.062, 76.4, 76.5]
# })

# # Map full names to FastF1 3-letter codes
# driver_mapping = {
#     "Lando Norris": "NOR", "Oscar Piastri": "PIA", "Max Verstappen": "VER", "George Russell": "RUS",
#     "Yuki Tsunoda": "TSU", "Alexander Albon": "ALB", "Charles Leclerc": "LEC", "Lewis Hamilton": "HAM",
#     "Pierre Gasly": "GAS", "Carlos Sainz": "SAI", "Lance Stroll": "STR", "Fernando Alonso": "ALO"
# }

# qualifying_2025["DriverCode"] = qualifying_2025["Driver"].map(driver_mapping)

# # Merge 2025 Qualifying Data with 2024 Race Data
# merged_data = qualifying_2025.merge(laps_2024, left_on="DriverCode", right_on="Driver")
# print(merged_data)

In [142]:

# Use only average 2024 pace as feature
X = merged_data[["AvgRacePace_2024 (s)"]]
y = merged_data["LapTime (s)"]

# # Use only "QualifyingTime (s)" as a feature
# X = merged_data[["QualifyingTime (s)"]]
# y = merged_data["LapTime (s)"]

if X.shape[0] == 0:
    raise ValueError("Dataset is empty after preprocessing. Check data sources!")

In [143]:
# Train Gradient Boosting Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=39)
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=39)
model.fit(X_train, y_train)


In [144]:
# Predict 2025 race pace using 2024 averages
predicted_lap_times = model.predict(racepace_2024[["AvgRacePace_2024 (s)"]])
racepace_2024["PredictedRaceTime (s)"] = predicted_lap_times

# # Predict using 2025 qualifying times
# predicted_lap_times = model.predict(qualifying_2025[["QualifyingTime (s)"]])
# qualifying_2025["PredictedRaceTime (s)"] = predicted_lap_times

In [145]:
# Rank drivers by predicted race time
racepace_2024 = racepace_2024.sort_values(by="PredictedRaceTime (s)")

# Print final predictions
print("\n🏁 Predicted 2025 Australian GP Winner 🏁\n")
print(racepace_2024[["Driver", "PredictedRaceTime (s)"]])

# Evaluate Model
y_pred = model.predict(X_test)
print(f"\n🔍 Model Error (MAE): {mean_absolute_error(y_test, y_pred):.2f} seconds")
# Rank drivers by predicted race time
# qualifying_2025 = qualifying_2025.sort_values(by="PredictedRaceTime (s)")

# # Print final predictions
# print("\n🏁 Predicted 2025 Chinese GP Winner 🏁\n")
# print(qualifying_2025[["Driver", "PredictedRaceTime (s)"]])

# # Evaluate Model
# y_pred = model.predict(X_test)
# print(f"\n🔍 Model Error (MAE): {mean_absolute_error(y_test, y_pred):.2f} seconds")


🏁 Predicted 2025 Australian GP Winner 🏁

             Driver  PredictedRaceTime (s)
5      Lando Norris              75.656202
6     Oscar Piastri              75.879980
4   Charles Leclerc              75.906464
11   Max Verstappen              75.973506
8      Carlos Sainz              76.101556
3    Lewis Hamilton              76.328466
7    George Russell              76.403737
9      Lance Stroll              76.804741
2      Pierre Gasly              76.979657
1   Fernando Alonso              77.032162
0   Alexander Albon              77.409167
10     Yuki Tsunoda              77.741819

🔍 Model Error (MAE): 1.27 seconds
