In [3]:
# Install the essentials
!pip install fastf1 tabulate xgboost pandas numpy



In [4]:
import os
import fastf1
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from tabulate import tabulate

In [15]:
# Cache setup—Suzuka only
cache_dir = "/content/japan_f1_cache"
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)
fastf1.Cache.enable_cache(cache_dir)

# 2025 grid—veterans only
grid_2025_veterans = pd.DataFrame({
    "Driver": ["Lando Norris", "Oscar Piastri", "Max Verstappen", "Lewis Hamilton",
               "Charles Leclerc", "Alex Albon", "Carlos Sainz", "George Russell",
               "Fernando Alonso", "Lance Stroll", "Pierre Gasly", "Yuki Tsunoda",
               "Esteban Ocon", "Nico Hulkenberg"],
    "DriverCode": ["NOR", "PIA", "VER", "HAM", "LEC", "ALB", "SAI", "RUS",
                   "ALO", "STR", "GAS", "TSU", "OCO", "HUL"],
    "DriverNumber": ['4', '81', '1', '44', '16', '23', '55', '63',
                     '14', '18', '10', '22', '31', '27']
}).astype({"DriverNumber": str})

# Load past Japanese GP data (2022, 2023, 2024)
data_2025 = grid_2025_veterans.copy()
for year in [2022, 2023, 2024]:
    session = fastf1.get_session(year, 'Japanese', 'R')
    session.load()
    results = session.results[["DriverNumber", "Position"]].copy()  # Explicit copy to avoid view issues
    results.loc[:, f"Japan{year}Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
    results.loc[:, f"Weather_{year}"] = 1 if year == 2022 else 0  # 2022 wet, 2023-24 dry
    data_2025 = data_2025.merge(results[["DriverNumber", f"Japan{year}Rank", f"Weather_{year}"]], on="DriverNumber", how="left")

core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core       

In [17]:
# Define inverted rank scores—lower rank = higher score
data_2025["Score_2022"] = 20 - data_2025["Japan2022Rank"].fillna(20)  # NaN = last place
data_2025["Score_2023"] = 20 - data_2025["Japan2023Rank"].fillna(20)
data_2025["Score_2024"] = 20 - data_2025["Japan2024Rank"].fillna(20)
# Weighted average score—less 2023 dominance
y_train = (data_2025["Score_2022"] + 1.2 * data_2025["Score_2023"] + 1.5 * data_2025["Score_2024"]) / 3.7

# Weather diff—wet vs. dry performance
data_2025["DryAvgRank"] = (data_2025["Japan2023Rank"] + data_2025["Japan2024Rank"]) / 2
data_2025["WetDiff"] = (data_2025["Japan2022Rank"] - data_2025["DryAvgRank"]) * 3  # Triple the effect

# Features
features = ["Japan2022Rank", "Japan2023Rank", "Japan2024Rank", "WetDiff"]
X_train = data_2025[features].fillna(20)
X_train["Japan2024Rank"] = X_train["Japan2024Rank"] * 1.5
X_train["Japan2023Rank"] = X_train["Japan2023Rank"] * 1.2
X_train["WetDiff"] = X_train["WetDiff"] * 3  # Amplify wet signal

# Regressor
model = XGBRegressor(n_estimators=100, max_depth=3, learning_rate=0.05, random_state=42, objective='reg:squarederror')
model.fit(X_train, y_train)

# Predict and squash
X_pred = data_2025[features].fillna(20)
X_pred["Japan2024Rank"] = X_pred["Japan2024Rank"] * 1.5
X_pred["Japan2023Rank"] = X_pred["Japan2023Rank"] * 1.2
X_pred["WetDiff"] = X_pred["WetDiff"] * 3
scores = model.predict(X_pred)
data_2025["WinProbability"] = 0.1 + 0.8 / (1 + np.exp(-2 * (scores - scores.mean()) / scores.std()))
data_2025["PredictedWinner"] = (data_2025["WinProbability"] == data_2025["WinProbability"].max()).astype(int)
winner = data_2025[data_2025["PredictedWinner"] == 1]["Driver"].values[0]

# Output
print("\n✨ Pre-Quali Feature Importances ✨")
print(tabulate(pd.DataFrame(list(zip(features, model.feature_importances_)), columns=["Feature", "Importance"]),
               headers="keys", tablefmt="psql", showindex=False))
print("\n🏆 Pre-Quali Predicted 2025 Japanese GP Winner Odds (Veterans Only) 🏆\n")
print(tabulate(data_2025[["Driver", "DriverNumber", "Japan2022Rank", "Japan2023Rank", "Japan2024Rank", "WinProbability"]].sort_values("WinProbability", ascending=False),
               headers=["Driver", "No.", "2022 Rank", "2023 Rank", "2024 Rank", "Win Prob."], tablefmt="fancy_grid", showindex=False,
               floatfmt=(".0f", "", ".0f", ".0f", ".0f", ".3f")))
print(f"\n🎯 Predicted Winner (Pre-Quali): {winner} 🎯")


✨ Pre-Quali Feature Importances ✨
+---------------+--------------+
| Feature       |   Importance |
|---------------+--------------|
| Japan2022Rank |    0.0478655 |
| Japan2023Rank |    0.901817  |
| Japan2024Rank |    0.0503172 |
| WetDiff       |    0         |
+---------------+--------------+

🏆 Pre-Quali Predicted 2025 Japanese GP Winner Odds (Veterans Only) 🏆

╒═════════════════╤═══════╤═════════════╤═════════════╤═════════════╤═════════════╕
│ Driver          │   No. │   2022 Rank │   2023 Rank │   2024 Rank │   Win Prob. │
╞═════════════════╪═══════╪═════════════╪═════════════╪═════════════╪═════════════╡
│ Max Verstappen  │     1 │           1 │           1 │           1 │       0.877 │
├─────────────────┼───────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Charles Leclerc │    16 │           3 │           4 │           4 │       0.840 │
├─────────────────┼───────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Lando Norris    │     4 │          10 │ 

In [11]:
# --- Post-Quali Setup  ---
# Full 2025 grid—no experience
grid_2025_full = pd.DataFrame({
    "Driver": ["Lando Norris", "Oscar Piastri", "Max Verstappen", "Liam Lawson",
               "Lewis Hamilton", "Charles Leclerc", "Alex Albon", "Carlos Sainz",
               "George Russell", "Kimi Antonelli", "Fernando Alonso", "Lance Stroll",
               "Pierre Gasly", "Jack Doohan", "Yuki Tsunoda", "Isack Hadjar",
               "Oliver Bearman", "Esteban Ocon", "Nico Hulkenberg", "Gabriel Bortoleto"],
    "DriverCode": ["NOR", "PIA", "VER", "LAW", "HAM", "LEC", "ALB", "SAI", "RUS", "ANT",
                   "ALO", "STR", "GAS", "DOO", "TSU", "HAD", "BEA", "OCO", "HUL", "BOR"],
    "DriverNumber": ['4', '81', '1', '30', '44', '16', '23', '55', '63', '87',
                     '14', '18', '10', '12', '22', '6', '7', '31', '27', '5']
}).astype({"DriverNumber": str})

# Load past data for full grid
data_full_2025 = grid_2025_full.copy()
for year in [2022, 2023, 2024]:
    session = fastf1.get_session(year, 'Japanese', 'R')
    session.load()
    results = session.results[["DriverNumber", "Position"]].copy()  # Explicit copy to avoid view issues
    results.loc[:, f"Japan{year}Rank"] = pd.to_numeric(results["Position"], errors='coerce').rank(method="first").astype(int)
    results.loc[:, f"Weather_{year}"] = 1 if year == 2022 else 0  # 2022 wet, 2023-24 dry
    data_full_2025 = data_full_2025.merge(results[["DriverNumber", f"Japan{year}Rank", f"Weather_{year}"]], on="DriverNumber", how="left")

# Japan 2025 Quali
# session_quali_2025 = fastf1.get_session(2025, 'Japanese', 'Q')
# session_quali_2025.load()
# quali_2025 = session_quali_2025.laps[["DriverNumber", "LapTime"]].groupby("DriverNumber").agg({"LapTime": "min"}).reset_index()
# quali_2025["QualiTime (s)"] = quali_2025["LapTime"].dt.total_seconds()
# quali_2025["QualiRank"] = quali_2025["QualiTime (s)"].rank(method="first").astype(int)
# quali_2025["QualiDelta"] = quali_2025["QualiTime (s)"] - quali_2025["QualiTime (s)"].min()
# quali_2025["Pole"] = (quali_2025["QualiRank"] == 1).astype(int)
# data_full_2025 = data_full_2025.merge(quali_2025[["DriverNumber", "QualiTime (s)", "QualiRank", "QualiDelta", "Pole"]], on="DriverNumber", how="left")

# Post-quali features—no experience
#features_full = ["Japan2022Rank", "Japan2023Rank", "Japan2024Rank", "Weather_2022", "Weather_2023", "Weather_2024",
#                 "QualiRank", "QualiDelta", "Pole"]
# X_train_full = data_full_2025[features_full].fillna(data_full_2025[features_full].median())
# X_train_full["Japan2024Rank"] = X_train_full["Japan2024Rank"] * 2
# X_train_full["Japan2023Rank"] = X_train_full["Japan2023Rank"] * 1.5
# X_train_full["QualiRank"] = X_train_full["QualiRank"] * 2
# X_train_full["QualiDelta"] = X_train_full["QualiDelta"] * 20
# X_train_full["Pole"] = X_train_full["Pole"] * 10

# Re-train and predict
# y_train_full = data_full_2025[["Japan2022Winner", "Japan2023Winner", "Japan2024Winner"]].max(axis=1)
# model.fit(X_train_full, y_train_full)
# X_pred_full = data_full_2025[features_full].fillna(data_full_2025[features_full].median())
# X_pred_full["Japan2024Rank"] = X_pred_full["Japan2024Rank"] * 2
# X_pred_full["Japan2023Rank"] = X_pred_full["Japan2023Rank"] * 1.5
# X_pred_full["QualiRank"] = X_pred_full["QualiRank"] * 2
# X_pred_full["QualiDelta"] = X_pred_full["QualiDelta"] * 20
# X_pred_full["Pole"] = X_pred_full["Pole"] * 10
# data_full_2025["WinProbability"] = model.predict_proba(X_pred_full)[:, 1]
# data_full_2025["PredictedWinner"] = (data_full_2025["WinProbability"] == data_full_2025["WinProbability"].max()).astype(int)
# winner_full = data_full_2025[data_full_2025["PredictedWinner"] == 1]["Driver"].values[0]

# Post-quali output
# print("\n✨ Post-Quali Feature Importances ✨")
# print(tabulate(pd.DataFrame(list(zip(features_full, model.feature_importances_)), columns=["Feature", "Importance"]),
#                headers="keys", tablefmt="psql", showindex=False))
# print("\n🏆 Post-Quali Predicted 2025 Japanese GP Winner Odds (Full Grid) 🏆\n")
# print(tabulate(data_full_2025[["Driver", "DriverNumber", "Japan2024Rank", "QualiTime (s)", "QualiRank", "WinProbability"]].sort_values("WinProbability", ascending=False),
#                headers=["Driver", "No.", "2024 Rank", "Quali Time (s)", "Quali Rank", "Win Prob."], tablefmt="fancy_grid", showindex=False,
#                floatfmt=(".0f", "", ".0f", ".3f", ".0f", ".3f")))
# print(f"\n🎯 Predicted Winner (Post-Quali): {winner_full} 🎯")

# Save pre-quali results
#data_2025.to_csv("japan_2025_pre_quali_pred_no_exp.csv", index=False)

core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core       