In [110]:
# ===============================================================
#                F1 Bahrain Tyre Strategy Model
#    Option 1: Strategy Classifier (M-H-H, S-M-H, etc)
#    Option 2: Compound + Stint Length Regression
# ===============================================================

import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split

In [111]:
df = pd.read_csv("data/bahrain_2019_2023_race_laps.csv")   # <-- replace with your real file
df["stint_lap"] = df.groupby(["race_id", "driver", "compound"]).cumcount()

# Keep only pre-race-available features
df = df[[
    "race_id", "driver", "season",
    "lap_time", "compound", "stint_lap",
    "track_temp", "air_temp", "wind_speed", "wind_dir"
]]


In [112]:
driver_means = df.groupby("driver")["lap_time"].mean()
global_mean = df["lap_time"].mean()
driver_offsets = driver_means - global_mean

def get_driver_offset(d):
    return float(driver_offsets.get(d, 0.0))

In [113]:
from sklearn.ensemble import RandomForestRegressor

lap_features = ["compound", "stint_lap", "track_temp", "air_temp",
                "wind_speed", "wind_dir"]
lap_target = "lap_time"

pre_lap = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), ["compound"]),
    ("num", "passthrough", ["stint_lap", "track_temp", "air_temp",
                            "wind_speed", "wind_dir"])
])

laptime_model = Pipeline([
    ("pre", pre_lap),
    ("rf", RandomForestRegressor(n_estimators=150, max_depth=12))
])

laptime_model.fit(df[lap_features], df[lap_target])

In [114]:
from functools import lru_cache

@lru_cache(None)
def fast_predict(comp, stint_lap, t_temp, a_temp, w_speed, w_dir):
    row_df = pd.DataFrame([{
        "compound": comp,
        "stint_lap": stint_lap,
        "track_temp": t_temp,
        "air_temp": a_temp,
        "wind_speed": w_speed,
        "wind_dir": w_dir
    }])
    return float(laptime_model.predict(row_df)[0])


In [115]:
def simulate_strategy(compounds, stints, weather, driver_offset):
    total = 0.0
    t_temp = weather["track_temp"]
    a_temp = weather["air_temp"]
    w_speed = weather["wind_speed"]
    w_dir = weather["wind_dir"]

    for comp, stint_len in zip(compounds, stints):
        for lap_age in range(stint_len):
            base = fast_predict(comp, lap_age, t_temp, a_temp, w_speed, w_dir)
            total += (base + driver_offset)

        total += 21.5  # pit loss

    return total


In [116]:
def brute_force_best_strategy(race_driver_df):
    """Return best strategy & stint lengths using limited Bahrain choices"""
    weather = {
        "track_temp": race_driver_df["track_temp"].mean(),
        "air_temp": race_driver_df["air_temp"].mean(),
        "wind_speed": race_driver_df["wind_speed"].mean(),
        "wind_dir": race_driver_df["wind_dir"].mean(),
    }

    compounds_options = [
    # Most common
    ["M","H","H"],
    ["M","M","H"],
    ["S","M","H"],

    # Reverse variants
    ["H","M","H"],
    ["H","H","M"],

    # Soft-heavy (rare but sometimes seen)
    ["S","S","M"],
    ["S","S","H"],
    ["S","H","S"],

    # 2-stop variants
    ["S","M"],
    ["S","H"],
    ["H","S"],

    # Experimental but possible
    ["H","H"],
]


    L = len(race_driver_df["lap_time"])
    laps = 57  # Bahrain fixed

    best_time = 1e12
    best_strategy = None
    best_stints = None

    for comp_seq in compounds_options:
        k = len(comp_seq)
        # simple stint range
        for a in range(12, 22):
            if k == 2:
                b = laps - a
                stints = [a, b]
                if min(stints) < 8: continue
                t = simulate_strategy(comp_seq, stints, weather,
                                      get_driver_offset(race_driver_df["driver"].iloc[0]))
                if t < best_time:
                    best_time, best_strategy, best_stints = t, comp_seq, stints
            else:
                for b in range(15, 25):
                    c = laps - (a + b)
                    if c < 10 or c>30: continue
                    stints = [a, b, c]
                    t = simulate_strategy(comp_seq, stints, weather,
                                          get_driver_offset(race_driver_df["driver"].iloc[0]))
                    if t < best_time:
                        best_time, best_strategy, best_stints = t, comp_seq, stints

    return "-".join(best_strategy), best_stints

In [117]:
# ---------------------------------------------------------------
# BUILD TRAINING DATASET (race/driver-level)
# ---------------------------------------------------------------
rows = []

train_df = df[df["season"] < 2023]

for (race_id, driver), g in train_df.groupby(["race_id", "driver"]):
    strat, stints = brute_force_best_strategy(g)

    rows.append({
        "driver": driver,
        "season": g["season"].iloc[0],
        "track_temp": g["track_temp"].mean(),
        "air_temp": g["air_temp"].mean(),
        "wind_speed": g["wind_speed"].mean(),
        "wind_dir": g["wind_dir"].mean(),
        "driver_offset": get_driver_offset(driver),
        "strategy": strat,
        "stint1": stints[0] if len(stints)>0 else None,
        "stint2": stints[1] if len(stints)>1 else None,
        "stint3": stints[2] if len(stints)>2 else None
    })

train_strat = pd.DataFrame(rows)

In [118]:
# ---------------------------------------------------------------
# OPTION 1: STRATEGY CLASSIFIER
# ---------------------------------------------------------------
X = train_strat[[
    "driver", "season", "track_temp", "air_temp",
    "wind_speed", "wind_dir", "driver_offset"
]]
y = train_strat["strategy"] 

cat = ["driver"]
num = ["season", "track_temp", "air_temp", "wind_speed", "wind_dir", "driver_offset"]

pre_strat = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat),
    ("num", "passthrough", num)
])

strategy_model = Pipeline([
    ("pre", pre_strat),
    ("clf", RandomForestClassifier(n_estimators=300, max_depth=14))
])

strategy_model.fit(X, y)

In [119]:
# ---------------------------------------------------------------
# OPTION 2: STINT REGRESSION
# ---------------------------------------------------------------
reg_models = {}

for idx in ["1","2","3"]:
    target = f"stint{idx}"
    if train_strat[target].isna().all():
        continue

    y_reg = train_strat[target]
    X_reg = train_strat[[
        "driver", "season", "track_temp", "air_temp",
        "wind_speed", "wind_dir", "driver_offset", "strategy"
    ]]

    cat_r = ["driver","strategy"]
    num_r = ["season","track_temp","air_temp","wind_speed","wind_dir","driver_offset"]

    pre_r = ColumnTransformer([
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_r),
        ("num", "passthrough", num_r)
    ])

    model = Pipeline([
        ("pre", pre_r),
        ("rf", RandomForestRegressor(n_estimators=200, max_depth=12))
    ])

    model.fit(X_reg, y_reg)
    reg_models[idx] = model


In [120]:
# ---------------------------------------------------------------
# PREDICTION FUNCTION (FOR 2023 OR FUTURE)
# ---------------------------------------------------------------
def predict_strategy(driver, track_temp, air_temp, wind_speed, wind_dir):
    """Returns recommended tyre sequence and estimated stint lengths."""

    base = pd.DataFrame([{
        "driver": driver,
        "season": 2023,
        "track_temp": track_temp,
        "air_temp": air_temp,
        "wind_speed": wind_speed,
        "wind_dir": wind_dir,
        "driver_offset": get_driver_offset(driver)
    }])

    # Option 1: strategy class
    predicted_strategy = strategy_model.predict(base)[0]

    # Option 2: stint lengths
    stint_preds = []
    for idx, m in reg_models.items():
        Xs = base.copy()
        Xs["strategy"] = predicted_strategy
        stint_preds.append(int(m.predict(Xs)[0]))

    return predicted_strategy, stint_preds


In [121]:
# ---------------------------------------------------------------
# EXAMPLE USAGE
# ---------------------------------------------------------------
rec_strat, rec_stints = predict_strategy(
    driver="VER",
    track_temp=32,
    air_temp=27,
    wind_speed=4.0,
    wind_dir=210
)

print("Recommended Strategy:", rec_strat)
print("Predicted Stints:", rec_stints)

Recommended Strategy: S-M
Predicted Stints: [21, 36]


In [122]:
df_2023 = df[df["season"] == 2023].copy()


In [None]:
def get_actual_strategy_and_stints(g):
    """
    g is laps for ONE driver ONE race
    Returns:
      strategy_str: e.g. "SOFT-HARD"
      stints: [18, 39]
    """

    # Detect where compound changes → stint boundaries
    change_laps = g[g["compound"] != g["compound"].shift()].index.tolist()
    change_laps.append(g.index[-1] + 1)

    compounds = []
    stints = []

    for i in range(len(change_laps) - 1):
        start = change_laps[i]
        end = change_laps[i+1]
        stint_len = end - start
        comp = g.loc[start, "compound"]

        compounds.append(comp)
        stints.append(stint_len)

    return "-".join(compounds), stints


In [None]:
eval_rows = []

df_2023 = df[df["season"] == 2023]

for driver in df_2023["driver"].unique():
    g = df_2023[df_2023["driver"] == driver].reset_index()

    actual_strat, actual_stints = get_actual_strategy_and_stints(g)

    eval_rows.append({
        "driver": driver,
        "actual_strategy": actual_strat,
        "actual_stints": actual_stints,
        "track_temp": g["track_temp"].mean(),
        "air_temp": g["air_temp"].mean(),
        "wind_speed": g["wind_speed"].mean(),
        "wind_dir": g["wind_dir"].mean(),
        "driver_offset": get_driver_offset(driver)
    })

eval_2023 = pd.DataFrame(eval_rows)
eval_2023


Unnamed: 0,driver,actual_strategy,track_temp,air_temp,wind_speed,wind_dir,driver_offset
0,VER,SOFT-HARD,29.870175,26.735088,0.482456,167.701754,-1.246603
1,GAS,SOFT-HARD-SOFT,29.849123,26.721053,0.475439,179.684211,-0.582379
2,PER,SOFT-HARD,29.870175,26.736842,0.507018,173.280702,-0.822552
3,ALO,SOFT-HARD,29.861404,26.733333,0.505263,177.649123,0.222388
4,LEC,SOFT-HARD,30.248718,26.905128,0.628205,193.871795,-1.175304
5,STR,SOFT-HARD,29.859649,26.733333,0.510526,181.017544,0.224687
6,SAR,SOFT-HARD-SOFT,29.869643,26.728571,0.464286,163.571429,-0.634781
7,MAG,HARD-SOFT,29.8625,26.728571,0.469643,171.517857,1.261543
8,DEV,SOFT-HARD,29.864286,26.730357,0.458929,172.75,-0.25287
9,TSU,SOFT-HARD-SOFT,29.847368,26.722807,0.470175,171.45614,-0.06892


In [None]:
def predict_full(row):
    base = pd.DataFrame([{
        "driver": row["driver"],
        "season": 2023,
        "track_temp": row["track_temp"],
        "air_temp": row["air_temp"],
        "wind_speed": row["wind_speed"],
        "wind_dir": row["wind_dir"],
        "driver_offset": row["driver_offset"],
    }])

    # Strategy class prediction
    pred_strat = strategy_model.predict(base)[0]

    # Stint length predictions
    pred_stints = []
    for idx, model in reg_models.items():
        temp = base.copy()
        temp["strategy"] = pred_strat
        pred_stints.append(int(model.predict(temp)[0]))

    return pred_strat, pred_stints

preds = eval_2023.apply(predict_full, axis=1)
eval_2023["pred_strategy"] = preds.apply(lambda x: x[0])
eval_2023["pred_stints"]   = preds.apply(lambda x: x[1])

eval_2023


In [126]:
accuracy = (eval_2023["pred_strategy"] == eval_2023["actual_strategy"]).mean()
print("2023 Strategy Accuracy:", accuracy)


2023 Strategy Accuracy: 0.0


In [127]:
eval_2023[[
    "driver",
    "actual_strategy",
    "pred_strategy",
    "pred_stints"
]]


Unnamed: 0,driver,actual_strategy,pred_strategy,pred_stints
0,VER,SOFT-HARD,S-M,"[21, 36]"
1,GAS,SOFT-HARD-SOFT,S-M,"[21, 36]"
2,PER,SOFT-HARD,S-M,"[21, 36]"
3,ALO,SOFT-HARD,S-M,"[21, 36]"
4,LEC,SOFT-HARD,S-M,"[21, 36]"
5,STR,SOFT-HARD,S-M,"[21, 36]"
6,SAR,SOFT-HARD-SOFT,S-M,"[21, 36]"
7,MAG,HARD-SOFT,S-M,"[21, 36]"
8,DEV,SOFT-HARD,S-M,"[21, 36]"
9,TSU,SOFT-HARD-SOFT,S-M,"[21, 36]"
