In [1]:
!pip install fastf1 tabulate xgboost

Collecting fastf1
  Downloading fastf1-3.5.3-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-24.1.3-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-2.2.1-py3-none-any.whl.metadata (5.6 kB)
Downloading fastf1-3.5.3-py3-none-any.whl (151 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [2]:
import os
import fastf1
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate

In [3]:
# Cache setup
cache_dir = "/content/monaco_f1_cache"
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)
fastf1.Cache.enable_cache(cache_dir)

In [4]:
# 2025 grid
grid_2025_full = pd.DataFrame({
    "Driver": ["Lando Norris", "Oscar Piastri", "Max Verstappen", "Lewis Hamilton",
               "Charles Leclerc", "Alex Albon", "Carlos Sainz", "George Russell",
               "Fernando Alonso", "Lance Stroll", "Pierre Gasly", "Yuki Tsunoda",
               "Esteban Ocon", "Nico Hulkenberg", "Kimi Antonelli", "Isack Hadjar",
               "Ollie Bearman", "Liam Lawson", "Franco Colapinto", "Gabriel Bortoleto"],
    "DriverCode": ["NOR", "PIA", "VER", "HAM", "LEC", "ALB", "SAI", "RUS",
                   "ALO", "STR", "GAS", "TSU", "OCO", "HUL", "ANT", "HAD",
                   "BEA", "LAW", "COL", "BOR"],
    "DriverNumber": ['4', '81', '1', '44', '16', '23', '55', '63',
                     '14', '18', '10', '22', '31', '27', '12', '6',
                     '87', '30', '43', '5']
}).astype({"DriverNumber": str})

data_2025_full = grid_2025_full.copy()

In [5]:
#2024 Monaco Grand Prix
try:
    session = fastf1.get_session(2024, 'Monaco', 'R')
    session.load()
    results = session.results[["Abbreviation", "Position"]].copy()
    results.rename(columns={"Abbreviation": "DriverCode"}, inplace=True)
    results["Monaco2024Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
    data_2025_full = data_2025_full.merge(results[["DriverCode", "Monaco2024Rank"]], on="DriverCode", how="left")
except Exception as e:
    print(f"Failed to load 2024 Monaco GP data: {e}")

core           INFO 	Loading data for Monaco Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Monaco Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_dat

In [6]:
#2025 Monaco Grand Prix Qualifying
quali_2025_monaco = pd.DataFrame({
    "Driver": [
        "Lando Norris", "Charles Leclerc", "Oscar Piastri","Max Verstappen",
        "Isack Hadjar", "Fernando Alonso", "Lewis Hamilton", "Esteban Ocon",
        "Liam Lawson", "Alex Albon", "Carlos Sainz", "Yuki Tsunoda",
        "Nico Hulkenberg", "George Russell", "Kimi Antonelli", "Gabriel Bortoleto",
        "Pierre Gasly", "Franco Colapinto", "Lance Stroll", "Ollie Bearman"
    ],
    "Monaco2025QualiRank": list(range(1, 21))
})

# Merge Monaco Quali data into the full 2025 dataset
data_2025_full = data_2025_full.merge(
    quali_2025_monaco[["Driver", "Monaco2025QualiRank"]],
    on="Driver", how="left"
)

In [7]:
races_2025 = ["Australia", "China", "Japanese", "Bahrain", "Saudi", "Miami", "Imola"]

for race in races_2025:
    session = fastf1.get_session(2025, race, 'R')
    session.load()
    results = session.results[["Abbreviation", "Position"]].copy()
    results.rename(columns={"Abbreviation": "DriverCode"}, inplace=True)
    results[f"{race}Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)

    if f"{race}Rank" in data_2025_full.columns:
        data_2025_full = data_2025_full.drop(columns=[f"{race}Rank"])

    data_2025_full = data_2025_full.merge(results[["DriverCode", f"{race}Rank"]], on="DriverCode", how="left")

# Compute average rank across all listed races
rank_columns = [f"{race}Rank" for race in races_2025]
data_2025_full["Season2025AvgRank"] = data_2025_full[rank_columns].mean(axis=1)


core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_st

In [8]:
# Features and target
features = ["Monaco2024Rank", "Monaco2025QualiRank", "Season2025AvgRank"]
X_train = data_2025_full[features].fillna(20).astype(float)
y_train = (
    1.0 * (20 - data_2025_full["Monaco2024Rank"].fillna(20)) +
    2.0 * (20 - data_2025_full["Monaco2025QualiRank"].fillna(20)) +
    1.0 * (20 - data_2025_full["Season2025AvgRank"].fillna(20))
) / 4
# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_pred = scaler.transform(data_2025_full[features].fillna(20).astype(float))

In [9]:
# Model
model = XGBRegressor(n_estimators=100, max_depth=3, learning_rate=0.05, random_state=42, objective='reg:squarederror', alpha=0.5, reg_lambda=0.5)
model.fit(X_train, y_train)
# Predict
scores = model.predict(X_pred)
data_2025_full["WinProbability"] = 0.1 + 0.8 / (1 + np.exp(-3 * (scores - scores.mean()) / scores.std()))
data_2025_full["PredictedWinner"] = (data_2025_full["WinProbability"] == data_2025_full["WinProbability"].max()).astype(int)
winner = data_2025_full[data_2025_full["PredictedWinner"] == 1]["Driver"].values[0]

In [10]:
# Output
print("\n✨ Feature Importances (2024 + Qualifying + Season Avg:) ✨")
print(tabulate(pd.DataFrame({"Feature": features, "Importance": model.feature_importances_}),
               headers="keys", tablefmt="psql", showindex=False))
print("\n🏆 Predicted 2025 Miami GP Winner Odds (Post-Quali) 🏆\n")
print(tabulate(data_2025_full[["Driver", "DriverNumber", "Monaco2024Rank", "Monaco2025QualiRank", "Season2025AvgRank", "WinProbability"]]
               .sort_values("WinProbability", ascending=False),
               headers=["Driver", "No.", "2024 Monaco Rank", "2025 Quali", "2025 Avg Rank", "Win Prob."],
               tablefmt="fancy_grid",
               showindex=False,
               floatfmt=("", "", ".0f", ".0f", ".1f", ".3f")))
print(f"\n🎯 Predicted Winner (Post-Quali, Sprint Included): {winner} 🎯")


✨ Feature Importances (2024 + Qualifying + Season Avg:) ✨
+---------------------+--------------+
| Feature             |   Importance |
|---------------------+--------------|
| Monaco2024Rank      |     0.547973 |
| Monaco2025QualiRank |     0.31976  |
| Season2025AvgRank   |     0.132267 |
+---------------------+--------------+

🏆 Predicted 2025 Miami GP Winner Odds (Post-Quali) 🏆

╒═══════════════════╤═══════╤════════════════════╤══════════════╤═════════════════╤═════════════╕
│ Driver            │   No. │   2024 Monaco Rank │   2025 Quali │   2025 Avg Rank │   Win Prob. │
╞═══════════════════╪═══════╪════════════════════╪══════════════╪═════════════════╪═════════════╡
│ Lando Norris      │     4 │                  4 │            1 │             2.3 │       0.894 │
├───────────────────┼───────┼────────────────────┼──────────────┼─────────────────┼─────────────┤
│ Oscar Piastri     │    81 │                  2 │            3 │             2.7 │       0.894 │
├───────────────────┼────