In [2]:
# Install required package
!pip install fastf1

# Imports
import os
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

# Define cache directory
cache_dir = "/content/f1_cache"
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)
fastf1.Cache.enable_cache(cache_dir)
print("Cache enabled at:", cache_dir)

# Load FastF1 2024 Chinese GP race session
session_2024 = fastf1.get_session(2024, 'China', 'R')
session_2024.load()

# Extract lap times
laps_2024 = session_2024.laps[["Driver", "LapTime"]].copy()
laps_2024.dropna(subset=["LapTime"], inplace=True)
laps_2024["LapTime (s)"] = laps_2024["LapTime"].dt.total_seconds()

# 2025 Qualifying Data (Update with actual qualifying results)
qualifying_2025 = pd.DataFrame({
    "Driver": [
        "Oscar Piastri", "George Russell", "Lando Norris", "Max Verstappen", "Lewis Hamilton",
        "Charles Leclerc", "Isack Hadjar", "Andrea Kimi Antonelli", "Yuki Tsunoda",
        "Alexander Albon", "Esteban Ocon", "Nico Hulkenberg"
    ],
    "QualifyingTime (s)": [
        90.641, 90.723, 90.793, 90.817, 90.927, 91.021, 91.079, 91.103,
        91.638, 91.706, 91.625, 91.632
    ]
})

# Map driver names to FastF1 codes (ensure all codes are correct)
driver_mapping = {
    "Oscar Piastri": "PIA", "George Russell": "RUS", "Lando Norris": "NOR",
    "Max Verstappen": "VER", "Lewis Hamilton": "HAM", "Charles Leclerc": "LEC",
    "Isack Hadjar": "HAD", "Andrea Kimi Antonelli": "ANT", "Yuki Tsunoda": "TSU",
    "Alexander Albon": "ALB", "Esteban Ocon": "OCO", "Nico Hulkenberg": "HUL"
}

qualifying_2025["DriverCode"] = qualifying_2025["Driver"].map(driver_mapping)

# Merge 2025 qualifying data with 2024 race lap times
merged_data = qualifying_2025.merge(laps_2024, left_on="DriverCode", right_on="Driver", how="left")
merged_data.dropna(inplace=True)
merged_data["LapTime"] = merged_data["LapTime"].dt.total_seconds()

# Feature selection
X = merged_data[["QualifyingTime (s)"]]
y = merged_data["LapTime"]
if X.shape[0] == 0:
    raise ValueError("No data available for training.")

# Train Gradient Boosting Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

# Predict 2025 race times
predicted_times = model.predict(qualifying_2025[["QualifyingTime (s)"]])
qualifying_2025["PredictedRaceTime"] = predicted_times

# Rank drivers
qualifying_2025 = qualifying_2025.sort_values(by="PredictedRaceTime")
qualifying_2025["Position"] = range(1, len(qualifying_2025) + 1)

# Print final predictions
print("\n Predicted 2025 Chinese GP Results \n")
print(qualifying_2025[["Position", "Driver", "PredictedRaceTime"]])

# Evaluate model
y_pred = model.predict(X_test)
print(f"\n Model Error (MAE): {mean_absolute_error(y_test, y_pred):.2f} seconds")




core           INFO 	Loading data for Chinese Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Chinese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core         

Cache enabled at: /content/f1_cache


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '11', '16', '55', '63', '14', '81', '44', '27', '31', '23', '10', '24', '18', '20', '2', '3', '22', '77']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['1', '4', '11', '16', '55', '63', '14', '81', '44', '27', '31', '23', '10', '24', '18', '20', '2', '3', '22', '77']



 Predicted 2025 Chinese GP Results 

    Position                 Driver  PredictedRaceTime
1          1         George Russell         104.734827
3          2         Max Verstappen         105.139185
6          3           Isack Hadjar         105.172271
5          4        Charles Leclerc         105.172271
7          5  Andrea Kimi Antonelli         105.172271
2          6           Lando Norris         106.026881
11         7        Nico Hulkenberg         108.295564
9          8        Alexander Albon         109.106818
0          9          Oscar Piastri         109.328879
4         10         Lewis Hamilton         109.604882
10        11           Esteban Ocon         110.003381
8         12           Yuki Tsunoda         111.362576

 Model Error (MAE): 10.72 seconds
