In [21]:
# Install the essentials
!pip install fastf1 tabulate xgboost



In [22]:
import os
import fastf1
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from xgboost import XGBRegressor
from tabulate import tabulate

In [23]:
cache_dir = "/content/bahrain_f1_cache"
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)
fastf1.Cache.enable_cache(cache_dir)

In [24]:
# Full 2025 grid with confirmed driver numbers
grid_2025_full = pd.DataFrame({
    "Driver": ["Lando Norris", "Oscar Piastri", "Max Verstappen", "Lewis Hamilton",
               "Charles Leclerc", "Alex Albon", "Carlos Sainz", "George Russell",
               "Fernando Alonso", "Lance Stroll", "Pierre Gasly", "Yuki Tsunoda",
               "Esteban Ocon", "Nico Hulkenberg", "Kimi Antonelli", "Isack Hadjar",
               "Ollie Bearman", "Liam Lawson", "Jack Doohan", "Gabriel Bortoleto"],
    "DriverCode": ["NOR", "PIA", "VER", "HAM", "LEC", "ALB", "SAI", "RUS",
                   "ALO", "STR", "GAS", "TSU", "OCO", "HUL", "ANT", "HAD",
                   "BEA", "LAW", "DOO", "BOR"],
    "DriverNumber": ['4', '81', '1', '44', '16', '23', '55', '63',
                     '14', '18', '10', '22', '31', '27', '12', '6',
                     '87', '30', '7', '5']
}).astype({"DriverNumber": str})

# Initialize data
data_2025_full = grid_2025_full.copy()

In [25]:
# Load 2024 Bahrain GP data
session = fastf1.get_session(2024, 'Bahrain', 'R')
session.load()
results = session.results[["Abbreviation", "Position"]].copy()
results.rename(columns={"Abbreviation": "DriverCode"}, inplace=True)
results["Bahrain2024Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
data_2025_full = data_2025_full.merge(results[["DriverCode", "Bahrain2024Rank"]], on="DriverCode", how="left")

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core         

In [26]:
# 2025 Bahrain Starting Grid (April 12, 2025, post-penalties)
quali_2025 = pd.DataFrame({
    "Driver": ["Oscar Piastri", "Charles Leclerc", "George Russell", "Pierre Gasly",
               "Kimi Antonelli", "Lando Norris", "Max Verstappen", "Carlos Sainz",
               "Lewis Hamilton", "Yuki Tsunoda", "Jack Doohan", "Isack Hadjar",
               "Fernando Alonso", "Esteban Ocon", "Alex Albon", "Nico Hulkenberg",
               "Liam Lawson", "Gabriel Bortoleto", "Lance Stroll", "Ollie Bearman"],
    "Bahrain2025QualiRank": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
})
data_2025_full = data_2025_full.merge(quali_2025[["Driver", "Bahrain2025QualiRank"]], on="Driver", how="left")

In [27]:
# 2025 Season Races (Australia, China, Japan)
races_2025 = ["Australia", "China", "Japanese"]
for race in races_2025:
    session = fastf1.get_session(2025, race, 'R')
    session.load()
    results = session.results[["Abbreviation", "Position"]].copy()
    results.rename(columns={"Abbreviation": "DriverCode"}, inplace=True)
    results[f"{race}Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
    data_2025_full = data_2025_full.merge(results[["DriverCode", f"{race}Rank"]], on="DriverCode", how="left")
data_2025_full["Season2025AvgRank"] = data_2025_full[["AustraliaRank", "ChinaRank", "JapaneseRank"]].mean(axis=1)

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core   

In [28]:
# Scores
data_2025_full["Score_2024"] = 20 - data_2025_full["Bahrain2024Rank"].fillna(20)
y_train = (1.0 * data_2025_full["Score_2024"] +
           3.0 * (20 - data_2025_full["Bahrain2025QualiRank"]) +
           2.0 * (20 - data_2025_full["Season2025AvgRank"])) / 6.0
# Features
features = ["Bahrain2024Rank", "Bahrain2025QualiRank", "Season2025AvgRank"]
X_train = data_2025_full[features].fillna(20)
X_train["Bahrain2024Rank"] = X_train["Bahrain2024Rank"] * 1.0
X_train["Bahrain2025QualiRank"] = X_train["Bahrain2025QualiRank"] * 3.0
X_train["Season2025AvgRank"] = X_train["Season2025AvgRank"] * 2.0

In [29]:
# Regressor
model = XGBRegressor(n_estimators=100, max_depth=4, learning_rate=0.05, random_state=42, objective='reg:squarederror')
model.fit(X_train, y_train)

# Predict
X_pred = data_2025_full[features].fillna(20)
X_pred["Bahrain2024Rank"] = X_pred["Bahrain2024Rank"] * 1.0
X_pred["Bahrain2025QualiRank"] = X_pred["Bahrain2025QualiRank"] * 3.0
X_pred["Season2025AvgRank"] = X_pred["Season2025AvgRank"] * 2.0
scores = model.predict(X_pred)
data_2025_full["WinProbability"] = 0.1 + 0.8 / (1 + np.exp(-3 * (scores - scores.mean()) / scores.std()))
data_2025_full["PredictedWinner"] = (data_2025_full["WinProbability"] == data_2025_full["WinProbability"].max()).astype(int)
winner = data_2025_full[data_2025_full["PredictedWinner"] == 1]["Driver"].values[0]

In [30]:
# Output
print("\n✨ Post-Quali Feature Importances (2024 + 2025 Season) ✨")
print(tabulate(pd.DataFrame(list(zip(features, model.feature_importances_)), columns=["Feature", "Importance"]),
               headers="keys", tablefmt="psql", showindex=False))
print("\n🏆 Post-Quali Predicted 2025 Bahrain GP Winner Odds (2024 + 2025 Season) 🏆\n")
print(tabulate(data_2025_full[["Driver", "DriverNumber", "Bahrain2024Rank", "Bahrain2025QualiRank", "Season2025AvgRank", "WinProbability"]]
               .sort_values("WinProbability", ascending=False),
               headers=["Driver", "No.", "2024 Rank", "2025 Quali", "2025 Avg Rank", "Win Prob."],
               tablefmt="fancy_grid",
               showindex=False,
               floatfmt=("", "", ".0f", ".0f", ".1f", ".3f")))
print(f"\n🎯 Predicted Winner (Post-Quali): {winner} 🎯")


✨ Post-Quali Feature Importances (2024 + 2025 Season) ✨
+----------------------+--------------+
| Feature              |   Importance |
|----------------------+--------------|
| Bahrain2024Rank      |    0.0184917 |
| Bahrain2025QualiRank |    0.848923  |
| Season2025AvgRank    |    0.132585  |
+----------------------+--------------+

🏆 Post-Quali Predicted 2025 Bahrain GP Winner Odds (2024 + 2025 Season) 🏆

╒═══════════════════╤═══════╤═════════════╤══════════════╤═════════════════╤═════════════╕
│ Driver            │   No. │   2024 Rank │   2025 Quali │   2025 Avg Rank │   Win Prob. │
╞═══════════════════╪═══════╪═════════════╪══════════════╪═════════════════╪═════════════╡
│ Oscar Piastri     │    81 │           8 │            1 │             4.3 │       0.893 │
├───────────────────┼───────┼─────────────┼──────────────┼─────────────────┼─────────────┤
│ George Russell    │    63 │           5 │            3 │             3.7 │       0.893 │
├───────────────────┼───────┼────────────