In [None]:
!pip install fastf1 tabulate xgboost

In [5]:
import os
import fastf1
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate

In [6]:
# Cache setup
cache_dir = "/content/miami_f1_cache"
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)
fastf1.Cache.enable_cache(cache_dir)

In [7]:
# 2025 grid
grid_2025_full = pd.DataFrame({
    "Driver": ["Lando Norris", "Oscar Piastri", "Max Verstappen", "Lewis Hamilton",
               "Charles Leclerc", "Alex Albon", "Carlos Sainz", "George Russell",
               "Fernando Alonso", "Lance Stroll", "Pierre Gasly", "Yuki Tsunoda",
               "Esteban Ocon", "Nico Hulkenberg", "Kimi Antonelli", "Isack Hadjar",
               "Ollie Bearman", "Liam Lawson", "Jack Doohan", "Gabriel Bortoleto"],
    "DriverCode": ["NOR", "PIA", "VER", "HAM", "LEC", "ALB", "SAI", "RUS",
                   "ALO", "STR", "GAS", "TSU", "OCO", "HUL", "ANT", "HAD",
                   "BEA", "LAW", "DOO", "BOR"],
    "DriverNumber": ['4', '81', '1', '44', '16', '23', '55', '63',
                     '14', '18', '10', '22', '31', '27', '12', '6',
                     '87', '30', '7', '5']
}).astype({"DriverNumber": str})

data_2025_full = grid_2025_full.copy()

In [None]:
# Load 2024 Miami GP data
try:
    session = fastf1.get_session(2024, 'Miami', 'R')
    session.load()
    results = session.results[["Abbreviation", "Position"]].copy()
    results.rename(columns={"Abbreviation": "DriverCode"}, inplace=True)
    results["Miami2024Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
    data_2025_full = data_2025_full.merge(results[["DriverCode", "Miami2024Rank"]], on="DriverCode", how="left")
except Exception as e:
    print(f"Failed to load 2024 Miami GP data: {e}")

In [9]:
# 2025 Miami Sprint Qualifying
sprint_quali_2025_miami = pd.DataFrame({
    "Driver": [
        "Kimi Antonelli", "Oscar Piastri", "Lando Norris", "Max Verstappen",
        "George Russell", "Charles Leclerc", "Lewis Hamilton", "Alex Albon",
        "Isack Hadjar", "Fernando Alonso", "Yuki Tsunoda", "Lance Stroll",
        "Pierre Gasly", "Esteban Ocon", "Nico Hulkenberg", "Liam Lawson",
        "Jack Doohan", "Ollie Bearman", "Gabriel Bortoleto", "Carlos Sainz"
    ],
    "Miami2025SprintQualiRank": list(range(1, 21))
})
data_2025_full = data_2025_full.merge(
    sprint_quali_2025_miami[["Driver", "Miami2025SprintQualiRank"]],
    on="Driver", how="left"
)

In [10]:
# 2025 Miami Sprint Race
sprint_2025_miami = pd.DataFrame({
    "Driver": [
        "Lando Norris", "Oscar Piastri", "Lewis Hamilton", "George Russell",
        "Lance Stroll", "Yuki Tsunoda", "Kimi Antonelli", "Pierre Gasly",
        "Alex Albon", "Liam Lawson", "Ollie Bearman", "Jack Doohan",
        "Gabriel Bortoleto", "Nico Hulkenberg", "Esteban Ocon", "Fernando Alonso",
        "Max Verstappen", "Charles Leclerc", "Isack Hadjar", "Carlos Sainz"
    ],
    "Miami2025SprintRank": list(range(1, 21))
})
data_2025_full = data_2025_full.merge(
    sprint_2025_miami[["Driver", "Miami2025SprintRank"]],
    on="Driver", how="left"
)

In [11]:
# 2025 Miami Grand Prix Qualifying
quali_2025_miami = pd.DataFrame({
    "Driver": [
        "Max Verstappen", "Lando Norris", "Kimi Antonelli", "Oscar Piastri",
        "George Russell", "Carlos Sainz", "Alex Albon", "Charles Leclerc",
        "Esteban Ocon", "Yuki Tsunoda", "Isack Hadjar", "Lewis Hamilton",
        "Gabriel Bortoleto", "Jack Doohan", "Liam Lawson", "Fernando Alonso",
        "Lance Stroll", "Pierre Gasly", "Nico Hulkenberg", "Ollie Bearman"
    ],
    "Miami2025QualiRank": list(range(1, 21))
})
data_2025_full = data_2025_full.merge(
    quali_2025_miami[["Driver", "Miami2025QualiRank"]],
    on="Driver", how="left"
)

In [None]:
# 2025 Season Races
races_2025 = ["Australia", "China", "Japan", "Saudi"]
for race in races_2025:
    try:
        session = fastf1.get_session(2025, race, 'R')
        session.load()
        results = session.results[["Abbreviation", "Position"]].copy()
        results.rename(columns={"Abbreviation": "DriverCode"}, inplace=True)
        results[f"{race}Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
        data_2025_full = data_2025_full.merge(results[["DriverCode", f"{race}Rank"]], on="DriverCode", how="left")
    except Exception as e:
        print(f"Skipping {race} 2025 (data unavailable): {e}")
data_2025_full["Season2025AvgRank"] = data_2025_full[[f"{race}Rank" for race in races_2025]].mean(axis=1, skipna=True)

In [37]:
# Features and target
features = ["Miami2024Rank", "Miami2025SprintQualiRank", "Miami2025SprintRank", "Miami2025QualiRank", "Season2025AvgRank"]
X_train = data_2025_full[features].fillna(20).astype(float)
y_train = (
    1.0 * (20 - data_2025_full["Miami2024Rank"].fillna(20)) +
    0.3 * (20 - data_2025_full["Miami2025SprintQualiRank"].fillna(20)) +
    1.5 * (20 - data_2025_full["Miami2025SprintRank"].fillna(20)) +
    7.0 * (20 - data_2025_full["Miami2025QualiRank"].fillna(20)) +
    3.0 * (20 - data_2025_full["Season2025AvgRank"].fillna(20))
) / 12.8
# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_pred = scaler.transform(data_2025_full[features].fillna(20).astype(float))

In [38]:
# Model
model = XGBRegressor(n_estimators=100, max_depth=3, learning_rate=0.05, random_state=42, objective='reg:squarederror', alpha=0.5, reg_lambda=0.5)
model.fit(X_train, y_train)
# Predict
scores = model.predict(X_pred)
data_2025_full["WinProbability"] = 0.1 + 0.8 / (1 + np.exp(-3 * (scores - scores.mean()) / scores.std()))
data_2025_full["PredictedWinner"] = (data_2025_full["WinProbability"] == data_2025_full["WinProbability"].max()).astype(int)
winner = data_2025_full[data_2025_full["PredictedWinner"] == 1]["Driver"].values[0]

In [39]:
# Output
print("\n✨ Feature Importances (2024 Miami + 2025 Season, Miami Sprint) ✨")
print(tabulate(pd.DataFrame({"Feature": features, "Importance": model.feature_importances_}),
               headers="keys", tablefmt="psql", showindex=False))
print("\n🏆 Predicted 2025 Miami GP Winner Odds (Post-Quali, Sprint Included) 🏆\n")
print(tabulate(data_2025_full[["Driver", "DriverNumber", "Miami2024Rank", "Miami2025SprintQualiRank", "Miami2025SprintRank", "Miami2025QualiRank", "Season2025AvgRank", "WinProbability"]]
               .sort_values("WinProbability", ascending=False),
               headers=["Driver", "No.", "2024 Miami Rank", "2025 Sprint Quali", "2025 Sprint Rank", "2025 Quali", "2025 Avg Rank", "Win Prob."],
               tablefmt="fancy_grid",
               showindex=False,
               floatfmt=("", "", ".0f", ".0f", ".0f", ".0f", ".1f", ".3f")))
print(f"\n🎯 Predicted Winner (Post-Quali, Sprint Included): {winner} 🎯")


✨ Feature Importances (2024 Miami + 2025 Season, Miami Sprint) ✨
+--------------------------+--------------+
| Feature                  |   Importance |
|--------------------------+--------------|
| Miami2024Rank            |   0.0121761  |
| Miami2025SprintQualiRank |   0.286622   |
| Miami2025SprintRank      |   0.00611829 |
| Miami2025QualiRank       |   0.522516   |
| Season2025AvgRank        |   0.172567   |
+--------------------------+--------------+

🏆 Predicted 2025 Miami GP Winner Odds (Post-Quali, Sprint Included) 🏆

╒═══════════════════╤═══════╤═══════════════════╤═════════════════════╤════════════════════╤══════════════╤═════════════════╤═════════════╕
│ Driver            │   No. │   2024 Miami Rank │   2025 Sprint Quali │   2025 Sprint Rank │   2025 Quali │   2025 Avg Rank │   Win Prob. │
╞═══════════════════╪═══════╪═══════════════════╪═════════════════════╪════════════════════╪══════════════╪═════════════════╪═════════════╡
│ Lando Norris      │     4 │                 