In [13]:
!pip install fastf1 tabulate xgboost



In [14]:
import os
import fastf1
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from xgboost import XGBRegressor
from tabulate import tabulate

In [15]:
cache_dir = "/content/saudi_f1_cache"
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)
fastf1.Cache.enable_cache(cache_dir)

In [16]:
# Full 2025 grid with confirmed driver numbers
grid_2025_full = pd.DataFrame({
    "Driver": ["Lando Norris", "Oscar Piastri", "Max Verstappen", "Lewis Hamilton",
               "Charles Leclerc", "Alex Albon", "Carlos Sainz", "George Russell",
               "Fernando Alonso", "Lance Stroll", "Pierre Gasly", "Yuki Tsunoda",
               "Esteban Ocon", "Nico Hulkenberg", "Kimi Antonelli", "Isack Hadjar",
               "Ollie Bearman", "Liam Lawson", "Jack Doohan", "Gabriel Bortoleto"],
    "DriverCode": ["NOR", "PIA", "VER", "HAM", "LEC", "ALB", "SAI", "RUS",
                   "ALO", "STR", "GAS", "TSU", "OCO", "HUL", "ANT", "HAD",
                   "BEA", "LAW", "DOO", "BOR"],
    "DriverNumber": ['4', '81', '1', '44', '16', '23', '55', '63',
                     '14', '18', '10', '22', '31', '27', '12', '6',
                     '87', '30', '7', '5']
}).astype({"DriverNumber": str})

# Initialize data
data_2025_full = grid_2025_full.copy()

In [17]:
# Load 2024 Saudi GP data
session = fastf1.get_session(2024, 'Saudi', 'R')
session.load()
results = session.results[["Abbreviation", "Position"]].copy()
results.rename(columns={"Abbreviation": "DriverCode"}, inplace=True)
results["Saudi2024Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
data_2025_full = data_2025_full.merge(results[["DriverCode", "Saudi2024Rank"]], on="DriverCode", how="left")

core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
c

In [18]:
# 2025 Saudi Arabia Qualifying Results
quali_2025_saudi = pd.DataFrame({
    "Driver": [
        "Max Verstappen", "Oscar Piastri", "George Russell", "Charles Leclerc",
        "Kimi Antonelli", "Carlos Sainz", "Lewis Hamilton", "Yuki Tsunoda",
        "Pierre Gasly", "Lando Norris", "Alex Albon", "Liam Lawson",
        "Fernando Alonso", "Isack Hadjar", "Ollie Bearman", "Lance Stroll",
        "Jack Doohan", "Nico Hulkenberg", "Esteban Ocon", "Gabriel Bortoleto"
    ],
    "Saudi2025QualiRank": list(range(1, 21))
})

data_2025_full = data_2025_full.merge(
    quali_2025_saudi[["Driver", "Saudi2025QualiRank"]],
    on="Driver", how="left"
)

In [19]:
# 2025 Season Races (Australia, China, Japan, Saudi)
races_2025 = ["Australia", "China", "Japanese", "Bahrain"]
for race in races_2025:
    session = fastf1.get_session(2025, race, 'R')
    session.load()
    results = session.results[["Abbreviation", "Position"]].copy()
    results.rename(columns={"Abbreviation": "DriverCode"}, inplace=True)
    results[f"{race}Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
    data_2025_full = data_2025_full.merge(results[["DriverCode", f"{race}Rank"]], on="DriverCode", how="left")
data_2025_full["Season2025AvgRank"] = data_2025_full[["AustraliaRank", "ChinaRank", "JapaneseRank", "BahrainRank"]].mean(axis=1)

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core   

In [20]:
# Scores
data_2025_full["Score_2024"] = 20 - data_2025_full["Saudi2024Rank"].fillna(20)
y_train = (
    1.0 * data_2025_full["Score_2024"] +
    3.0 * (20 - data_2025_full["Saudi2025QualiRank"]) +
    2.0 * (20 - data_2025_full["Season2025AvgRank"])
) / 6.0

# Features
features = ["Saudi2024Rank","Saudi2025QualiRank","Season2025AvgRank"]
X_train = data_2025_full[features].fillna(20).astype(float)
X_train["Saudi2024Rank"] *= 1.0
X_train["Saudi2025QualiRank"] *= 3.0
X_train["Season2025AvgRank"] *= 2.0

In [21]:
# Regressor
model = XGBRegressor(n_estimators=100, max_depth=4, learning_rate=0.05, random_state=42, objective='reg:squarederror')
model.fit(X_train, y_train)

In [22]:
X_pred = data_2025_full[features].fillna(20)
X_pred["Saudi2024Rank"] = X_pred["Saudi2024Rank"] * 1.0
X_pred["Saudi2025QualiRank"] = X_pred["Saudi2025QualiRank"] * 3.0
X_pred["Season2025AvgRank"] = X_pred["Season2025AvgRank"] * 2.0
scores = model.predict(X_pred)
data_2025_full["WinProbability"] = 0.1 + 0.8 / (1 + np.exp(-3 * (scores - scores.mean()) / scores.std()))
data_2025_full["PredictedWinner"] = (data_2025_full["WinProbability"] == data_2025_full["WinProbability"].max()).astype(int)
winner = data_2025_full[data_2025_full["PredictedWinner"] == 1]["Driver"].values[0]

In [24]:
# Output
print("\n✨ Post-Quali Feature Importances (2024 + 2025 Season) ✨")
print(tabulate(pd.DataFrame(list(zip(features, model.feature_importances_)), columns=["Feature", "Importance"]),
               headers="keys", tablefmt="psql", showindex=False))
print("\n🏆 Post-Quali Predicted 2025 Bahrain GP Winner Odds (2024 + 2025 Season) 🏆\n")
print(tabulate(data_2025_full[["Driver", "DriverNumber", "Saudi2024Rank", "Saudi2025QualiRank", "Season2025AvgRank", "WinProbability"]]
               .sort_values("WinProbability", ascending=False),
               headers=["Driver", "No.", "2024 Rank", "2025 Quali", "2025 Avg Rank", "Win Prob."],
               tablefmt="fancy_grid",
               showindex=False,
               floatfmt=("", "", ".0f", ".0f", ".1f", ".3f")))
print(f"\n🎯 Predicted Winner (Post-Quali): {winner} 🎯")


✨ Post-Quali Feature Importances (2024 + 2025 Season) ✨
+--------------------+--------------+
| Feature            |   Importance |
|--------------------+--------------|
| Saudi2024Rank      |    0.0352327 |
| Saudi2025QualiRank |    0.0542512 |
| Season2025AvgRank  |    0.910516  |
+--------------------+--------------+

🏆 Post-Quali Predicted 2025 Bahrain GP Winner Odds (2024 + 2025 Season) 🏆

╒═══════════════════╤═══════╤═════════════╤══════════════╤═════════════════╤═════════════╕
│ Driver            │   No. │   2024 Rank │   2025 Quali │   2025 Avg Rank │   Win Prob. │
╞═══════════════════╪═══════╪═════════════╪══════════════╪═════════════════╪═════════════╡
│ Max Verstappen    │     1 │           1 │            1 │             3.2 │       0.897 │
├───────────────────┼───────┼─────────────┼──────────────┼─────────────────┼─────────────┤
│ Oscar Piastri     │    81 │           4 │            2 │             3.5 │       0.895 │
├───────────────────┼───────┼─────────────┼────────────