In [1]:
# Install the essentials
!pip install fastf1 tabulate xgboost pandas numpy

Collecting fastf1
  Downloading fastf1-3.5.3-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-24.1.3-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-2.2.0-py3-none-any.whl.metadata (4.9 kB)
Downloading fastf1-3.5.3-py3-none-any.whl (151 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [5]:
import os
import fastf1
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from xgboost import XGBRegressor
from tabulate import tabulate

In [3]:
# Cache setup—Suzuka only
cache_dir = "/content/japan_f1_cache"
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)
fastf1.Cache.enable_cache(cache_dir)

# 2025 grid—veterans only
grid_2025_veterans = pd.DataFrame({
    "Driver": ["Lando Norris", "Oscar Piastri", "Max Verstappen", "Lewis Hamilton",
               "Charles Leclerc", "Alex Albon", "Carlos Sainz", "George Russell",
               "Fernando Alonso", "Lance Stroll", "Pierre Gasly", "Yuki Tsunoda",
               "Esteban Ocon", "Nico Hulkenberg"],
    "DriverCode": ["NOR", "PIA", "VER", "HAM", "LEC", "ALB", "SAI", "RUS",
                   "ALO", "STR", "GAS", "TSU", "OCO", "HUL"],
    "DriverNumber": ['4', '81', '1', '44', '16', '23', '55', '63',
                     '14', '18', '10', '22', '31', '27']
}).astype({"DriverNumber": str})

# Load past Japanese GP data (2022, 2023, 2024)
data_2025 = grid_2025_veterans.copy()
for year in [2022, 2023, 2024]:
    session = fastf1.get_session(year, 'Japanese', 'R')
    session.load()
    results = session.results[["DriverNumber", "Position"]].copy()  # Explicit copy to avoid view issues
    results.loc[:, f"Japan{year}Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
    results.loc[:, f"Weather_{year}"] = 1 if year == 2022 else 0  # 2022 wet, 2023-24 dry
    data_2025 = data_2025.merge(results[["DriverNumber", f"Japan{year}Rank", f"Weather_{year}"]], on="DriverNumber", how="left")

core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
DEBUG:fastf1.api:Falling back to livetiming mirror (https://livetiming-mirror.fastf1.dev)
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
DEBUG:fastf1.api:Falling back to livetiming mirror (https://livetiming-mirror.fastf1.dev)
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.r

In [6]:
# Define inverted rank scores—lower rank = higher score
data_2025["Score_2022"] = 20 - data_2025["Japan2022Rank"].fillna(20)  # NaN = last place
data_2025["Score_2023"] = 20 - data_2025["Japan2023Rank"].fillna(20)
data_2025["Score_2024"] = 20 - data_2025["Japan2024Rank"].fillna(20)
# Weighted average score—less 2023 dominance
y_train = (data_2025["Score_2022"] + 1.2 * data_2025["Score_2023"] + 1.5 * data_2025["Score_2024"]) / 3.7

# Weather diff—wet vs. dry performance
data_2025["DryAvgRank"] = (data_2025["Japan2023Rank"] + data_2025["Japan2024Rank"]) / 2
data_2025["WetDiff"] = (data_2025["Japan2022Rank"] - data_2025["DryAvgRank"]) * 3  # Triple the effect

# Features
features = ["Japan2022Rank", "Japan2023Rank", "Japan2024Rank", "WetDiff"]
X_train = data_2025[features].fillna(20)
X_train["Japan2024Rank"] = X_train["Japan2024Rank"] * 1.5
X_train["Japan2023Rank"] = X_train["Japan2023Rank"] * 1.2
X_train["WetDiff"] = X_train["WetDiff"] * 3  # Amplify wet signal

# Regressor
model = XGBRegressor(n_estimators=100, max_depth=3, learning_rate=0.05, random_state=42, objective='reg:squarederror')
model.fit(X_train, y_train)

# Predict and squash
X_pred = data_2025[features].fillna(20)
X_pred["Japan2024Rank"] = X_pred["Japan2024Rank"] * 1.5
X_pred["Japan2023Rank"] = X_pred["Japan2023Rank"] * 1.2
X_pred["WetDiff"] = X_pred["WetDiff"] * 3
scores = model.predict(X_pred)
data_2025["WinProbability"] = 0.1 + 0.8 / (1 + np.exp(-2 * (scores - scores.mean()) / scores.std()))
data_2025["PredictedWinner"] = (data_2025["WinProbability"] == data_2025["WinProbability"].max()).astype(int)
winner = data_2025[data_2025["PredictedWinner"] == 1]["Driver"].values[0]

# Output
print("\n✨ Pre-Quali Feature Importances ✨")
print(tabulate(pd.DataFrame(list(zip(features, model.feature_importances_)), columns=["Feature", "Importance"]),
               headers="keys", tablefmt="psql", showindex=False))
print("\n🏆 Pre-Quali Predicted 2025 Japanese GP Winner Odds (Veterans Only) 🏆\n")
print(tabulate(data_2025[["Driver", "DriverNumber", "Japan2022Rank", "Japan2023Rank", "Japan2024Rank", "WinProbability"]].sort_values("WinProbability", ascending=False),
               headers=["Driver", "No.", "2022 Rank", "2023 Rank", "2024 Rank", "Win Prob."], tablefmt="fancy_grid", showindex=False,
               floatfmt=(".0f", "", ".0f", ".0f", ".0f", ".3f")))
print(f"\n🎯 Predicted Winner (Pre-Quali): {winner} 🎯")


✨ Pre-Quali Feature Importances ✨
+---------------+--------------+
| Feature       |   Importance |
|---------------+--------------|
| Japan2022Rank |    0.0478655 |
| Japan2023Rank |    0.901817  |
| Japan2024Rank |    0.0503172 |
| WetDiff       |    0         |
+---------------+--------------+

🏆 Pre-Quali Predicted 2025 Japanese GP Winner Odds (Veterans Only) 🏆

╒═════════════════╤═══════╤═════════════╤═════════════╤═════════════╤═════════════╕
│ Driver          │   No. │   2022 Rank │   2023 Rank │   2024 Rank │   Win Prob. │
╞═════════════════╪═══════╪═════════════╪═════════════╪═════════════╪═════════════╡
│ Max Verstappen  │     1 │           1 │           1 │           1 │       0.877 │
├─────────────────┼───────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Charles Leclerc │    16 │           3 │           4 │           4 │       0.840 │
├─────────────────┼───────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Lando Norris    │     4 │          10 │ 

In [16]:
grid_2025_full = pd.DataFrame({
    "Driver": ["Lando Norris", "Oscar Piastri", "Max Verstappen", "Lewis Hamilton",
               "Charles Leclerc", "Alex Albon", "Carlos Sainz", "George Russell",
               "Fernando Alonso", "Lance Stroll", "Pierre Gasly", "Yuki Tsunoda",
               "Esteban Ocon", "Nico Hulkenberg", "Kimi Antonelli", "Isack Hadjar",
               "Ollie Bearman", "Liam Lawson", "Jack Doohan", "Gabriel Bortoleto"],
    "DriverCode": ["NOR", "PIA", "VER", "HAM", "LEC", "ALB", "SAI", "RUS",
                   "ALO", "STR", "GAS", "TSU", "OCO", "HUL", "ANT", "HAD",
                   "BEA", "LAW", "DOO", "BOR"],
    "DriverNumber": ['4', '81', '1', '44', '16', '23', '55', '63',
                     '14', '18', '10', '22', '31', '27', '12', '6',
                     '87', '30', '7', '5']
}).astype({"DriverNumber": str})

# Initialize data
data_2025_full = grid_2025_full.copy()

# Load 2024 Japanese GP data
session = fastf1.get_session(2024, 'Japanese', 'R')
session.load()
results = session.results[["Abbreviation", "Position"]].copy()
results.rename(columns={"Abbreviation": "DriverCode"}, inplace=True)
results["Japan2024Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
data_2025_full = data_2025_full.merge(results[["DriverCode", "Japan2024Rank"]], on="DriverCode", how="left")

# 2025 Qualifying (April 5, 2025)
quali_2025 = pd.DataFrame({
    "Driver": ["Max Verstappen", "Lando Norris", "Oscar Piastri", "Charles Leclerc",
               "George Russell", "Kimi Antonelli", "Isack Hadjar", "Lewis Hamilton",
               "Alex Albon", "Ollie Bearman", "Pierre Gasly", "Carlos Sainz",
               "Fernando Alonso", "Liam Lawson", "Yuki Tsunoda", "Nico Hulkenberg",
               "Gabriel Bortoleto", "Esteban Ocon", "Jack Doohan", "Lance Stroll"],
    "Japan2025QualiRank": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
})
data_2025_full = data_2025_full.merge(quali_2025[["Driver", "Japan2025QualiRank"]], on="Driver", how="left")

# 2025 Season Races (actual Australia and China results)
races_2025 = ["Australia", "China"]
for race in races_2025:
    session = fastf1.get_session(2025, race, 'R')
    session.load()
    results = session.results[["Abbreviation", "Position"]].copy()
    results.rename(columns={"Abbreviation": "DriverCode"}, inplace=True)
    results[f"{race}Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
    data_2025_full = data_2025_full.merge(results[["DriverCode", f"{race}Rank"]], on="DriverCode", how="left")
data_2025_full["Season2025AvgRank"] = data_2025_full[["AustraliaRank", "ChinaRank"]].mean(axis=1)

# Scores
data_2025_full["Score_2024"] = 20 - data_2025_full["Japan2024Rank"].fillna(20)
y_train = (1.0 * data_2025_full["Score_2024"] +
           2.0 * (20 - data_2025_full["Japan2025QualiRank"]) +
           2.0 * (20 - data_2025_full["Season2025AvgRank"])) / 5.0

# Features
features = ["Japan2024Rank", "Japan2025QualiRank", "Season2025AvgRank"]
X_train = data_2025_full[features].fillna(20)
X_train["Japan2024Rank"] = X_train["Japan2024Rank"] * 1.0
X_train["Japan2025QualiRank"] = X_train["Japan2025QualiRank"] * 2.0
X_train["Season2025AvgRank"] = X_train["Season2025AvgRank"] * 2.0

# Regressor
model = XGBRegressor(n_estimators=100, max_depth=3, learning_rate=0.05, random_state=42, objective='reg:squarederror')
model.fit(X_train, y_train)

# Predict
X_pred = data_2025_full[features].fillna(20)
X_pred["Japan2024Rank"] = X_pred["Japan2024Rank"] * 1.0
X_pred["Japan2025QualiRank"] = X_pred["Japan2025QualiRank"] * 2.0
X_pred["Season2025AvgRank"] = X_pred["Season2025AvgRank"] * 2.0
scores = model.predict(X_pred)
data_2025_full["WinProbability"] = 0.1 + 0.8 / (1 + np.exp(-2 * (scores - scores.mean()) / scores.std()))
data_2025_full["PredictedWinner"] = (data_2025_full["WinProbability"] == data_2025_full["WinProbability"].max()).astype(int)
winner = data_2025_full[data_2025_full["PredictedWinner"] == 1]["Driver"].values[0]

# Output
print("\n✨ Post-Quali Feature Importances (2024 + 2025 Season) ✨")
print(tabulate(pd.DataFrame(list(zip(features, model.feature_importances_)), columns=["Feature", "Importance"]),
               headers="keys", tablefmt="psql", showindex=False))
print("\n🏆 Post-Quali Predicted 2025 Japanese GP Winner Odds (2024 + 2025 Season) 🏆\n")
print(tabulate(data_2025_full[["Driver", "DriverNumber", "Japan2024Rank", "Japan2025QualiRank", "Season2025AvgRank", "WinProbability"]]
               .sort_values("WinProbability", ascending=False),
               headers=["Driver", "No.", "2024 Rank", "2025 Quali", "2025 Avg Rank", "Win Prob."],
               tablefmt="fancy_grid",
               showindex=False,
               floatfmt=("", "", ".0f", ".0f", ".1f", ".3f")))
print(f"\n🎯 Predicted Winner (Post-Quali): {winner} 🎯")

core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core       


✨ Post-Quali Feature Importances (2024 + 2025 Season) ✨
+--------------------+--------------+
| Feature            |   Importance |
|--------------------+--------------|
| Japan2024Rank      |    0.0391251 |
| Japan2025QualiRank |    0.866247  |
| Season2025AvgRank  |    0.0946282 |
+--------------------+--------------+

🏆 Post-Quali Predicted 2025 Japanese GP Winner Odds (2024 + 2025 Season) 🏆

╒═══════════════════╤═══════╤═════════════╤══════════════╤═════════════════╤═════════════╕
│ Driver            │   No. │   2024 Rank │   2025 Quali │   2025 Avg Rank │   Win Prob. │
╞═══════════════════╪═══════╪═════════════╪══════════════╪═════════════════╪═════════════╡
│ Max Verstappen    │     1 │           1 │            1 │             3.0 │       0.885 │
├───────────────────┼───────┼─────────────┼──────────────┼─────────────────┼─────────────┤
│ Lando Norris      │     4 │           5 │            2 │             1.5 │       0.884 │
├───────────────────┼───────┼─────────────┼───────────