# F1 Pit Stop Strategy – Medium Dataset Builder (Full 2024 Season Races) + Light Testing on the Dataset

End-to-end data pipeline to build a **lap-level dataset** for Machine Learning modeling.  
Sources used:
- **FastF1** → telemetry, lap times, tyre compounds, stints, weather, events  
- **Ergast API** → grid positions, race results, pit stop durations  
- **Meteostat** → historical weather enrichment (optional)  
- Output: `data/f1_laps_gold.parquet` (final “gold” dataset)  


# Import & Setup

In [1]:
# --- Imports & setup ---
import os, json, math, datetime as dt
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import requests

import fastf1
from fastf1 import get_session

from meteostat import Point, Hourly

# Cache for FastF1 (speeds up repeated runs)
os.makedirs("cache_fastf1", exist_ok=True)
fastf1.Cache.enable_cache("cache_fastf1")

# Data layers
os.makedirs("data/bronze", exist_ok=True)
os.makedirs("data/silver", exist_ok=True)
os.makedirs("data/gold", exist_ok=True)


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
sns.set_style('whitegrid')
plt.style.use('fivethirtyeight')

print("Setup OK ")

Setup OK 


# TARGET_RACES and CIRCUIT_COORDS (medium sample for Phase 2, all races of 2024 season, except sprints)

In [2]:
# ---- F1 2024 full calendar ----
TARGET_RACES = [
    (2024, "Bahrain"),
    (2024, "Saudi Arabia"),
    (2024, "Australia"),
    (2024, "Japan"),
    (2024, "China"),
    (2024, "Miami"),
    (2024, "Emilia Romagna"),
    (2024, "Monaco"),
    (2024, "Canada"),
    (2024, "Spain"),
    (2024, "Austria"),
    (2024, "Great Britain"),
    (2024, "Hungary"),
    (2024, "Belgium"),
    (2024, "Netherlands"),
    (2024, "Italy"),
    (2024, "Azerbaijan"),
    (2024, "Singapore"),
    (2024, "United States"),
    (2024, "Mexico"),
    (2024, "Brazil"),
    (2024, "Las Vegas"),
    (2024, "Qatar"),
    (2024, "Abu Dhabi")
]

# ---- Coordinates (approx) for Meteostat enrichment ----
CIRCUIT_COORDS = {
    "Bahrain": (26.0325, 50.5106),
    "Saudi Arabia": (21.6319, 39.1044),
    "Australia": (-37.8497, 144.9680),
    "Japan": (34.8431, 136.5410),
    "China": (31.3389, 121.2197),
    "Miami": (25.9580, -80.2389),
    "Emilia Romagna": (44.3439, 11.7167),
    "Monaco": (43.7347, 7.4206),
    "Canada": (45.5000, -73.5228),
    "Spain": (41.5689, 2.2572),
    "Austria": (47.2197, 14.7647),
    "Great Britain": (52.0733, -1.0141),
    "Hungary": (47.5789, 19.2486),
    "Belgium": (50.4372, 5.9714),
    "Netherlands": (52.3889, 4.5400),
    "Italy": (45.6156, 9.2811),
    "Azerbaijan": (40.3725, 49.8533),
    "Singapore": (1.2914, 103.8644),
    "United States": (30.1328, -97.6411),
    "Mexico": (19.4042, -99.0907),
    "Brazil": (-23.7010, -46.6970),
    "Las Vegas": (36.1147, -115.1728),
    "Qatar": (25.4889, 51.4542),
    "Abu Dhabi": (24.4672, 54.6031)
}


# FastF1 extractor (`load_fastf1_laps`)

In [3]:
# --- FastF1 lap extractor (fresh session per GP) ---
def load_fastf1_laps(year: int, gp: str) -> pd.DataFrame:
    """Extract lap-level data from FastF1 (fresh session each time)."""
    import os, shutil
    import pandas as pd

    # clearing only the cache for this GP to avoid mixing sessions
    cache_dir = os.path.join("cache_fastf1", f"{year}_{gp}")
    shutil.rmtree(cache_dir, ignore_errors=True)

    ses = get_session(year, gp, "R")
    ses.load()   # plain load works on 3.6.1
    laps = ses.laps.copy()

    df = pd.DataFrame()
    df["race_id"] = f"{year}_{gp.lower()}"
    df["year"] = year
    df["circuit_name"] = gp
    df["driver_code"] = laps["Driver"].astype(str)
    df["team"] = laps["Team"].astype(str)
    df["lap_number"] = laps["LapNumber"].astype(int)
    df["position"] = laps["Position"].astype("Int64")

    # Lap & sector times
    for src, dst in [
        ("LapTime","lap_time_s"),
        ("Sector1Time","sector1_s"),
        ("Sector2Time","sector2_s"),
        ("Sector3Time","sector3_s"),
    ]:
        df[dst] = laps[src].dt.total_seconds()

    # Tyres / stints
    df["compound"] = laps["Compound"].astype("category")
    df["tyre_age_laps"] = laps["TyreLife"].astype(float)
    df["stint_id"] = laps["Stint"].astype("Int64")

    # Pit flags
    df["pit_in_flag"]  = laps["PitInTime"].notna().astype(int)
    df["pit_out_flag"] = laps["PitOutTime"].notna().astype(int)

    # Weather (from timing feed)
    for src, dst in [
        ("AirTemp","air_temp_c"),
        ("TrackTemp","track_temp_c"),
        ("WindSpeed","wind_speed_ms"),
        ("WindDirection","wind_direction_deg"),
        ("Humidity","humidity"),
    ]:
        if src in laps.columns:
            df[dst] = laps[src].astype(float)

    # Safety Car / VSC approx (from TrackStatus)
    if "TrackStatus" in laps.columns:
        status = laps["TrackStatus"].astype(str).fillna("")
        df["safety_car_flag"] = status.str.contains(r"(4|5)").astype(int)
        df["virtual_sc_flag"] = status.str.contains("6").astype(int)
    else:
        df["safety_car_flag"] = 0
        df["virtual_sc_flag"] = 0

    # Fallback stint_id: increase when compound changes
    if df["stint_id"].isna().any():
        df = df.sort_values(["driver_code","lap_number"])
        df["stint_id"] = (
            df.groupby("driver_code")["compound"]
              .apply(lambda s: (s != s.shift()).cumsum())
        )

    # Fallback tyre_age: count laps within stint
    if df["tyre_age_laps"].isna().any():
        df["tyre_age_laps"] = (
            df.groupby(["driver_code","stint_id"]).cumcount() + 1
        )

    return df


# NEW fallback helper (add_fastf1_grid_and_pitdur) — replaces Ergast when offline

In [4]:
def add_fastf1_grid_and_pitdur(laps_df: pd.DataFrame, year: int, gp: str) -> pd.DataFrame:
    """Derive grid/finish and pit-stop duration using FastF1 only (robust)."""
    ses = get_session(year, gp, "R")
    ses.load()

    # --- Grid / finish (robust driver column detection) ---
    res = ses.results.copy()
    possible_cols = ["Driver", "Abbreviation", "BroadcastName", "FullName", "DriverId"]
    driver_col = next((c for c in possible_cols if c in res.columns), None)
    if driver_col is None:
        raise ValueError(f"No driver column in session.results. Got: {list(res.columns)}")

    keep = [c for c in [driver_col, "GridPosition", "Position"] if c in res.columns]
    res = res[keep].copy().rename(columns={
        driver_col: "driver_code",
        "GridPosition": "grid_position",
        "Position": "finish_position"
    })
    laps_df = laps_df.merge(res, on="driver_code", how="left")

    # --- Pit durations from FastF1: pair PitIn(t) with PitOut(t+1), assign to outlap ---
    pitdur = compute_pit_durations_from_fastf1(ses)
    if not pitdur.empty:
        laps_df = laps_df.merge(pitdur, on=["driver_code","lap_number"], how="left")
    else:
        laps_df["pit_stop_duration_s"] = np.nan

    return laps_df




In [5]:
def compute_pit_durations_from_fastf1(ses) -> pd.DataFrame:
    """
    Building a table of pit-stop durations using FastF1 by pairing:
      - PitInTime from lap t
      - PitOutTime from lap t+1 (same driver)
    Duration is assigned to the OUTLAP (lap t+1).
    """
    laps_raw = ses.laps[["Driver","LapNumber","PitInTime","PitOutTime"]].copy()
    laps_raw = laps_raw.sort_values(["Driver","LapNumber"]).reset_index(drop=True)

    rows = []
    for drv, g in laps_raw.groupby("Driver"):
        g = g.reset_index(drop=True)
        for i in range(len(g)-1):
            pin  = g.loc[i,   "PitInTime"]
            pout = g.loc[i+1, "PitOutTime"]
            if pd.notna(pin) and pd.notna(pout):
                dur = (pout - pin).total_seconds()
                outlap = int(g.loc[i+1, "LapNumber"])
                rows.append({
                    "driver_code": drv,
                    "lap_number": outlap,          # assign duration to OUTLAP
                    "pit_stop_duration_s": dur
                })
    return pd.DataFrame(rows)


# Meteostat Enrichment + feature engineering

In [6]:
# --- Meteostat enrichment (optional lightweight context) ---
def meteostat_enrich(df: pd.DataFrame) -> pd.DataFrame:
    """Adding rough annual medians from Meteostat (contextual weather)."""
    gp = df["circuit_name"].iloc[0]
    year = int(df["year"].iloc[0])
    coords = CIRCUIT_COORDS.get(gp)
    if not coords:
        return df

    point = Point(coords[0], coords[1])
    start = dt.datetime(year, 1, 1)
    end   = dt.datetime(year, 12, 31, 23)

    try:
        met = Hourly(point, start, end).fetch()
        df["meteo_temp_c_annual_med"] = float(met["temp"].median()) if "temp" in met else np.nan
        df["meteo_wind_ms_annual_med"] = float(met["wspd"].median()) if "wspd" in met else np.nan
    except Exception:
        df["meteo_temp_c_annual_med"] = np.nan
        df["meteo_wind_ms_annual_med"] = np.nan
    return df


# --- Feature engineering: lags, degradation, pit-loss estimate ---
def add_lag_features(df: pd.DataFrame, cols=("lap_time_s",), lags=(1,2,3)) -> pd.DataFrame:
    """Add driver-wise lagged features of lap times."""
    df = df.sort_values(["race_id","driver_code","lap_number"]).copy()
    for c in cols:
        for L in lags:
            df[f"{c}_lag{L}"] = df.groupby(["race_id","driver_code"])[c].shift(L)
    return df


def add_degradation(df: pd.DataFrame) -> pd.DataFrame:
    """Estimate tyre degradation slope (sec/lap) within each stint."""
    df = df.sort_values(["race_id","driver_code","stint_id","lap_number"]).copy()
    df["degradation_s_per_lap"] = np.nan
    for (_, _, _), g in df.groupby(["race_id","driver_code","stint_id"]):
        y = g["lap_time_s"].values
        if np.isfinite(y).sum() >= 4:
            x = np.arange(len(y))
            var = x.var()
            slope = float(np.cov(x, y, bias=True)[0,1] / (var if var>0 else 1))
            df.loc[g.index, "degradation_s_per_lap"] = slope
    return df


def estimate_pit_loss(df: pd.DataFrame) -> pd.DataFrame:
    """Approximate pit loss per race (out-lap minus median of prior laps)."""
    df = df.sort_values(["race_id","driver_code","lap_number"]).copy()
    df["pit_loss_est_s"] = np.nan
    for (_, _), g in df.groupby(["race_id","driver_code"]):
        pit_idx = g.index[g["pit_in_flag"]==1].tolist()
        for ix in pit_idx:
            nxt = g.index.get_loc(ix) + 1
            if nxt < len(g):
                out_i = g.index[nxt]
                prev = g.loc[:ix].tail(4).head(3)["lap_time_s"]
                if len(prev) >= 2 and pd.notna(g.loc[out_i, "lap_time_s"]):
                    df.loc[out_i, "pit_loss_est_s"] = float(g.loc[out_i, "lap_time_s"] - prev.median())
    pitloss = (df.groupby("race_id")["pit_loss_est_s"]
                 .median().rename("pit_loss_time_s").reset_index())
    return df.merge(pitloss, on="race_id", how="left")


# Build loop (extract → enrich → features → export)

In [7]:
import pyarrow
# --- Build dataset for the selected races ---
all_dfs = []

for year, gp in TARGET_RACES:
    print(f"==> Building: {year} / {gp}")

    # Extracting lap-level data from FastF1
    laps = load_fastf1_laps(year, gp)

    # Adding grid/finish + pit durations using FastF1 only (fallback for Ergast)
    laps = add_fastf1_grid_and_pitdur(laps, year, gp)

    # Ensuring metadata columns exist and are filled (important fix!)
    laps["year"] = year
    laps["circuit_name"] = gp

    # Meteostat enrichment 
    laps = meteostat_enrich(laps)

    # Feature engineering
    laps = add_lag_features(laps, cols=("lap_time_s",), lags=(1,2,3))
    laps = add_degradation(laps)
    laps = estimate_pit_loss(laps)

    # Save per-race “silver” dataset
    out_path = f"data/silver/{year}_{gp.lower()}_laps.parquet"
    laps.to_parquet(out_path, index=False)
    all_dfs.append(laps)

# Concatenate all races -> “gold” dataset
df_gold = pd.concat(all_dfs, ignore_index=True)
df_gold.to_parquet("data/gold/f1_laps_gold.parquet", index=False)
df_gold["race_id"] = df_gold["year"].astype(str) + "_" + df_gold["circuit_name"].str.lower()
df_gold.head(10)


==> Building: 2024 / Bahrain


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']
core           INFO 	Loading data for Bahrain Grand Prix - Rac

==> Building: 2024 / Saudi Arabia


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '81', '14', '63', '38', '4', '44', '27', '23', '20', '31', '2', '22', '3', '77', '24', '18', '10']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_statu

==> Building: 2024 / Australia


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 19 drivers: ['55', '16', '4', '81', '11', '18', '22', '14', '27', '20', '23', '3', '10', '77', '24', '31', '63', '44', '1']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req         

==> Building: 2024 / Japan


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '4', '14', '63', '81', '44', '22', '27', '18', '20', '77', '31', '10', '2', '24', '3', '23']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req      

==> Building: 2024 / China


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '11', '16', '55', '63', '14', '81', '44', '27', '31', '23', '10', '24', '18', '20', '2', '3', '22', '77']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req       

==> Building: 2024 / Miami


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '11', '55', '44', '22', '63', '14', '31', '27', '10', '81', '24', '3', '77', '18', '23', '20', '2']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req         

==> Building: 2024 / Emilia Romagna


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '16', '81', '55', '44', '63', '11', '18', '22', '27', '20', '3', '31', '24', '10', '2', '77', '14', '23']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req

==> Building: 2024 / Monaco


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '55', '4', '63', '1', '44', '22', '23', '10', '14', '3', '77', '18', '2', '24', '31', '11', '27', '20']
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req        

==> Building: 2024 / Canada


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '63', '44', '81', '14', '18', '3', '10', '31', '27', '20', '77', '22', '24', '55', '23', '11', '16', '2']
core           INFO 	Loading data for Canadian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req      

==> Building: 2024 / Spain


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '63', '16', '55', '81', '11', '10', '31', '27', '14', '24', '18', '3', '77', '20', '23', '22', '2']
core           INFO 	Loading data for Spanish Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data

==> Building: 2024 / Austria


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '81', '55', '44', '1', '27', '11', '20', '3', '10', '16', '31', '18', '22', '23', '77', '24', '14', '2', '4']
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_dat

==> Building: 2024 / Great Britain


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '81', '55', '44', '1', '27', '11', '20', '3', '10', '16', '31', '18', '22', '23', '77', '24', '14', '2', '4']
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req      

==> Building: 2024 / Hungary


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '44', '16', '1', '55', '11', '63', '22', '18', '14', '3', '27', '23', '20', '77', '2', '31', '24', '10']
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req     

==> Building: 2024 / Belgium


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '81', '16', '1', '4', '55', '11', '14', '31', '3', '18', '23', '10', '20', '77', '22', '2', '27', '24', '63']
core           INFO 	Loading data for Belgian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req       

==> Building: 2024 / Netherlands


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '81', '55', '11', '63', '44', '10', '14', '27', '3', '18', '23', '31', '2', '22', '20', '77', '24']
core           INFO 	Loading data for Dutch Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req         

==> Building: 2024 / Italy


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '4', '55', '44', '1', '63', '11', '23', '20', '14', '43', '3', '31', '10', '77', '27', '24', '18', '22']
core           INFO 	Loading data for Italian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req      

==> Building: 2024 / Azerbaijan


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '16', '63', '4', '1', '14', '23', '43', '44', '50', '27', '10', '3', '24', '31', '77', '11', '55', '18', '22']
core           INFO 	Loading data for Azerbaijan Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_

==> Building: 2024 / Singapore


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '81', '63', '16', '44', '55', '14', '27', '11', '43', '22', '31', '18', '24', '77', '10', '3', '20', '23']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req    

==> Building: 2024 / United States


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '1', '4', '81', '63', '11', '27', '30', '43', '20', '10', '14', '22', '18', '23', '77', '31', '24', '44']
core           INFO 	Loading data for United States Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_sta

==> Building: 2024 / Mexico


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '4', '16', '44', '63', '1', '20', '81', '27', '10', '18', '43', '31', '77', '24', '30', '11', '14', '23', '22']
core           INFO 	Loading data for Mexico City Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req 

==> Building: 2024 / Brazil


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '31', '10', '63', '16', '4', '22', '81', '30', '44', '11', '50', '77', '14', '24', '55', '43', '23', '18', '27']
core           INFO 	Loading data for São Paulo Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req   

==> Building: 2024 / Las Vegas


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '44', '55', '16', '1', '4', '81', '27', '22', '11', '14', '20', '24', '43', '18', '30', '31', '77', '23', '10']
core           INFO 	Loading data for Las Vegas Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req   

==> Building: 2024 / Qatar


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '81', '63', '10', '55', '14', '24', '20', '4', '77', '44', '22', '30', '23', '27', '11', '18', '43', '31']
core           INFO 	Loading data for Qatar Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req       

==> Building: 2024 / Abu Dhabi


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '55', '16', '44', '63', '1', '10', '27', '14', '81', '23', '22', '24', '18', '61', '20', '30', '77', '43', '11']
core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_

Unnamed: 0,race_id,year,circuit_name,driver_code,team,lap_number,position,lap_time_s,sector1_s,sector2_s,sector3_s,compound,tyre_age_laps,stint_id,pit_in_flag,pit_out_flag,safety_car_flag,virtual_sc_flag,grid_position,finish_position,pit_stop_duration_s,meteo_temp_c_annual_med,meteo_wind_ms_annual_med,lap_time_s_lag1,lap_time_s_lag2,lap_time_s_lag3,degradation_s_per_lap,pit_loss_est_s,pit_loss_time_s
0,2024_bahrain,2024,Bahrain,ALB,Williams,1,11,103.888,,44.007,24.378,SOFT,1.0,1,0,0,0,0,13.0,15.0,,28.9,14.8,,,,,,
1,2024_bahrain,2024,Bahrain,ALB,Williams,2,11,98.826,31.419,43.106,24.301,SOFT,2.0,1,0,0,0,0,13.0,15.0,,28.9,14.8,,,,,,
2,2024_bahrain,2024,Bahrain,ALB,Williams,3,11,98.507,31.002,43.081,24.424,SOFT,3.0,1,0,0,0,0,13.0,15.0,,28.9,14.8,,,,,,
3,2024_bahrain,2024,Bahrain,ALB,Williams,4,11,98.422,31.023,43.007,24.392,SOFT,4.0,1,0,0,0,0,13.0,15.0,,28.9,14.8,,,,,,
4,2024_bahrain,2024,Bahrain,ALB,Williams,5,11,98.509,31.049,43.027,24.433,SOFT,5.0,1,0,0,0,0,13.0,15.0,,28.9,14.8,,,,,,
5,2024_bahrain,2024,Bahrain,ALB,Williams,6,11,98.575,31.034,43.141,24.4,SOFT,6.0,1,0,0,0,0,13.0,15.0,,28.9,14.8,,,,,,
6,2024_bahrain,2024,Bahrain,ALB,Williams,7,11,98.971,31.498,43.186,24.287,SOFT,7.0,1,0,0,0,0,13.0,15.0,,28.9,14.8,,,,,,
7,2024_bahrain,2024,Bahrain,ALB,Williams,8,11,98.66,31.493,42.899,24.268,SOFT,8.0,1,0,0,0,0,13.0,15.0,,28.9,14.8,,,,,,
8,2024_bahrain,2024,Bahrain,ALB,Williams,9,11,98.652,31.458,42.976,24.218,SOFT,9.0,1,0,0,0,0,13.0,15.0,,28.9,14.8,,,,,,
9,2024_bahrain,2024,Bahrain,ALB,Williams,10,11,98.952,31.624,42.955,24.373,SOFT,10.0,1,0,0,0,0,13.0,15.0,,28.9,14.8,,,,,,


In [8]:
df_gold.shape

(27050, 29)

In [9]:
df_gold = df_gold.sort_values(["race_id","driver_code","lap_number"]).copy()
for L in (1,2,3):
    df_gold[f"lap_time_s_lag{L}"] = (
        df_gold.groupby(["race_id","driver_code"])["lap_time_s"].shift(L)
    )

In [10]:
df_gold.tail(20)

Unnamed: 0,race_id,year,circuit_name,driver_code,team,lap_number,position,lap_time_s,sector1_s,sector2_s,sector3_s,compound,tyre_age_laps,stint_id,pit_in_flag,pit_out_flag,safety_car_flag,virtual_sc_flag,grid_position,finish_position,pit_stop_duration_s,meteo_temp_c_annual_med,meteo_wind_ms_annual_med,lap_time_s_lag1,lap_time_s_lag2,lap_time_s_lag3,degradation_s_per_lap,pit_loss_est_s,pit_loss_time_s
21764,2024_united states,2024,United States,ZHO,Kick Sauber,36,19,126.093,50.655,41.812,33.626,HARD,1.0,3,0,1,0,0,18.0,19.0,24.911,23.3,9.4,101.783,101.387,101.13,,,
21765,2024_united states,2024,United States,ZHO,Kick Sauber,37,19,100.161,27.284,39.92,32.957,HARD,2.0,3,0,0,0,0,18.0,19.0,,23.3,9.4,126.093,101.783,101.387,,,
21766,2024_united states,2024,United States,ZHO,Kick Sauber,38,19,100.148,27.053,39.945,33.15,HARD,3.0,3,0,0,0,0,18.0,19.0,,23.3,9.4,100.161,126.093,101.783,,,
21767,2024_united states,2024,United States,ZHO,Kick Sauber,39,19,101.88,27.073,41.338,33.469,HARD,4.0,3,0,0,0,0,18.0,19.0,,23.3,9.4,100.148,100.161,126.093,,,
21768,2024_united states,2024,United States,ZHO,Kick Sauber,40,19,100.131,26.932,40.16,33.039,HARD,5.0,3,0,0,0,0,18.0,19.0,,23.3,9.4,101.88,100.148,100.161,,,
21769,2024_united states,2024,United States,ZHO,Kick Sauber,41,19,100.342,26.983,40.289,33.07,HARD,6.0,3,0,0,0,0,18.0,19.0,,23.3,9.4,100.131,101.88,100.148,,,
21770,2024_united states,2024,United States,ZHO,Kick Sauber,42,19,100.168,27.009,40.017,33.142,HARD,7.0,3,0,0,0,0,18.0,19.0,,23.3,9.4,100.342,100.131,101.88,,,
21771,2024_united states,2024,United States,ZHO,Kick Sauber,43,19,100.241,27.182,40.096,32.963,HARD,8.0,3,0,0,0,0,18.0,19.0,,23.3,9.4,100.168,100.342,100.131,,,
21772,2024_united states,2024,United States,ZHO,Kick Sauber,44,19,100.033,27.01,40.007,33.016,HARD,9.0,3,0,0,0,0,18.0,19.0,,23.3,9.4,100.241,100.168,100.342,,,
21773,2024_united states,2024,United States,ZHO,Kick Sauber,45,19,99.658,26.957,39.874,32.827,HARD,10.0,3,0,0,0,0,18.0,19.0,,23.3,9.4,100.033,100.241,100.168,,,


In [11]:
df_gold["delta_lap_vs_prev"] = df_gold["lap_time_s"] - df_gold["lap_time_s_lag1"]
df_gold["avg_last_3_laps"] = df_gold[["lap_time_s_lag1","lap_time_s_lag2","lap_time_s_lag3"]].mean(axis=1)

In [12]:
df_gold.head(10)

Unnamed: 0,race_id,year,circuit_name,driver_code,team,lap_number,position,lap_time_s,sector1_s,sector2_s,sector3_s,compound,tyre_age_laps,stint_id,pit_in_flag,pit_out_flag,safety_car_flag,virtual_sc_flag,grid_position,finish_position,pit_stop_duration_s,meteo_temp_c_annual_med,meteo_wind_ms_annual_med,lap_time_s_lag1,lap_time_s_lag2,lap_time_s_lag3,degradation_s_per_lap,pit_loss_est_s,pit_loss_time_s,delta_lap_vs_prev,avg_last_3_laps
26015,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,1,15,101.19,,40.443,35.86,MEDIUM,1.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,,,,,,,,
26016,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,2,15,120.191,18.394,56.603,45.194,MEDIUM,2.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,101.19,,,,,,19.001,101.19
26017,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,3,15,95.776,19.929,42.24,33.607,MEDIUM,3.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,120.191,101.19,,,,,-24.415,110.6905
26018,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,4,15,90.06,18.099,37.915,34.046,MEDIUM,4.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,95.776,120.191,101.19,,,,-5.716,105.719
26019,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,5,15,90.369,18.232,37.968,34.169,MEDIUM,5.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.06,95.776,120.191,,,,0.309,102.009
26020,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,6,14,90.815,18.319,38.131,34.365,MEDIUM,6.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.369,90.06,95.776,,,,0.446,92.068333
26021,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,7,14,90.317,18.202,38.655,33.46,MEDIUM,7.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.815,90.369,90.06,,,,-0.498,90.414667
26022,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,8,14,90.705,18.155,38.72,33.83,MEDIUM,8.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.317,90.815,90.369,,,,0.388,90.500333
26023,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,9,14,90.469,18.043,38.698,33.728,MEDIUM,9.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.705,90.317,90.815,,,,-0.236,90.612333
26024,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,10,14,90.748,18.169,38.859,33.72,MEDIUM,10.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.469,90.705,90.317,,,,0.279,90.497


In [13]:
df_gold["pit_in_next2"] = ((df_gold.groupby(["race_id","driver_code"])["pit_in_flag"].shift(-1).fillna(0) + 
                       df_gold.groupby(["race_id","driver_code"])["pit_in_flag"].shift(-2).fillna(0)) > 0).astype(int)

In [14]:
df_gold.head(15)

Unnamed: 0,race_id,year,circuit_name,driver_code,team,lap_number,position,lap_time_s,sector1_s,sector2_s,sector3_s,compound,tyre_age_laps,stint_id,pit_in_flag,pit_out_flag,safety_car_flag,virtual_sc_flag,grid_position,finish_position,pit_stop_duration_s,meteo_temp_c_annual_med,meteo_wind_ms_annual_med,lap_time_s_lag1,lap_time_s_lag2,lap_time_s_lag3,degradation_s_per_lap,pit_loss_est_s,pit_loss_time_s,delta_lap_vs_prev,avg_last_3_laps,pit_in_next2
26015,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,1,15,101.19,,40.443,35.86,MEDIUM,1.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,,,,,,,,,0
26016,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,2,15,120.191,18.394,56.603,45.194,MEDIUM,2.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,101.19,,,,,,19.001,101.19,0
26017,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,3,15,95.776,19.929,42.24,33.607,MEDIUM,3.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,120.191,101.19,,,,,-24.415,110.6905,0
26018,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,4,15,90.06,18.099,37.915,34.046,MEDIUM,4.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,95.776,120.191,101.19,,,,-5.716,105.719,0
26019,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,5,15,90.369,18.232,37.968,34.169,MEDIUM,5.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.06,95.776,120.191,,,,0.309,102.009,0
26020,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,6,14,90.815,18.319,38.131,34.365,MEDIUM,6.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.369,90.06,95.776,,,,0.446,92.068333,0
26021,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,7,14,90.317,18.202,38.655,33.46,MEDIUM,7.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.815,90.369,90.06,,,,-0.498,90.414667,0
26022,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,8,14,90.705,18.155,38.72,33.83,MEDIUM,8.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.317,90.815,90.369,,,,0.388,90.500333,0
26023,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,9,14,90.469,18.043,38.698,33.728,MEDIUM,9.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.705,90.317,90.815,,,,-0.236,90.612333,0
26024,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,10,14,90.748,18.169,38.859,33.72,MEDIUM,10.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.469,90.705,90.317,,,,0.279,90.497,1


In [15]:
print(df_gold["degradation_s_per_lap"].unique())
print(df_gold["pit_loss_est_s"].unique())
print(df_gold["pit_loss_time_s"].unique())

[nan]
[nan]
[nan]


In [16]:
df_gold.columns

Index(['race_id', 'year', 'circuit_name', 'driver_code', 'team', 'lap_number',
       'position', 'lap_time_s', 'sector1_s', 'sector2_s', 'sector3_s',
       'compound', 'tyre_age_laps', 'stint_id', 'pit_in_flag', 'pit_out_flag',
       'safety_car_flag', 'virtual_sc_flag', 'grid_position',
       'finish_position', 'pit_stop_duration_s', 'meteo_temp_c_annual_med',
       'meteo_wind_ms_annual_med', 'lap_time_s_lag1', 'lap_time_s_lag2',
       'lap_time_s_lag3', 'degradation_s_per_lap', 'pit_loss_est_s',
       'pit_loss_time_s', 'delta_lap_vs_prev', 'avg_last_3_laps',
       'pit_in_next2'],
      dtype='object')

In [17]:
df_gold.head()

Unnamed: 0,race_id,year,circuit_name,driver_code,team,lap_number,position,lap_time_s,sector1_s,sector2_s,sector3_s,compound,tyre_age_laps,stint_id,pit_in_flag,pit_out_flag,safety_car_flag,virtual_sc_flag,grid_position,finish_position,pit_stop_duration_s,meteo_temp_c_annual_med,meteo_wind_ms_annual_med,lap_time_s_lag1,lap_time_s_lag2,lap_time_s_lag3,degradation_s_per_lap,pit_loss_est_s,pit_loss_time_s,delta_lap_vs_prev,avg_last_3_laps,pit_in_next2
26015,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,1,15,101.19,,40.443,35.86,MEDIUM,1.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,,,,,,,,,0
26016,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,2,15,120.191,18.394,56.603,45.194,MEDIUM,2.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,101.19,,,,,,19.001,101.19,0
26017,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,3,15,95.776,19.929,42.24,33.607,MEDIUM,3.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,120.191,101.19,,,,,-24.415,110.6905,0
26018,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,4,15,90.06,18.099,37.915,34.046,MEDIUM,4.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,95.776,120.191,101.19,,,,-5.716,105.719,0
26019,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,5,15,90.369,18.232,37.968,34.169,MEDIUM,5.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.06,95.776,120.191,,,,0.309,102.009,0


In [18]:
df_gold.to_csv("data/gold/2024_season.csv", index=False)

In [19]:
df_gold = df_gold.sort_values(["driver_code","race_id","lap_number"]).reset_index(drop=True)

In [20]:
df_gold.head()

Unnamed: 0,race_id,year,circuit_name,driver_code,team,lap_number,position,lap_time_s,sector1_s,sector2_s,sector3_s,compound,tyre_age_laps,stint_id,pit_in_flag,pit_out_flag,safety_car_flag,virtual_sc_flag,grid_position,finish_position,pit_stop_duration_s,meteo_temp_c_annual_med,meteo_wind_ms_annual_med,lap_time_s_lag1,lap_time_s_lag2,lap_time_s_lag3,degradation_s_per_lap,pit_loss_est_s,pit_loss_time_s,delta_lap_vs_prev,avg_last_3_laps,pit_in_next2
0,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,1,15,101.19,,40.443,35.86,MEDIUM,1.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,,,,,,,,,0
1,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,2,15,120.191,18.394,56.603,45.194,MEDIUM,2.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,101.19,,,,,,19.001,101.19,0
2,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,3,15,95.776,19.929,42.24,33.607,MEDIUM,3.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,120.191,101.19,,,,,-24.415,110.6905,0
3,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,4,15,90.06,18.099,37.915,34.046,MEDIUM,4.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,95.776,120.191,101.19,,,,-5.716,105.719,0
4,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,5,15,90.369,18.232,37.968,34.169,MEDIUM,5.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.06,95.776,120.191,,,,0.309,102.009,0


# Rolling Pace + Index in Stint

In [21]:
# rolling average/std pe 5 ture per driver+race
df_gold["rolling_avg_5_laps"] = (
    df_gold.groupby(["driver_code","race_id"])["lap_time_s"]
      .transform(lambda s: s.rolling(5, min_periods=1).mean())
)

df_gold["rolling_std_5_laps"] = (
    df_gold.groupby(["driver_code","race_id"])["lap_time_s"]
      .transform(lambda s: s.rolling(5, min_periods=2).std())
)

# indexul turului în cadrul stintului
df_gold["stint_lap_index"] = df_gold.groupby(["driver_code","race_id","stint_id"]).cumcount() + 1

# sanity check
df_gold[["rolling_avg_5_laps","rolling_std_5_laps","stint_lap_index"]].head()


Unnamed: 0,rolling_avg_5_laps,rolling_std_5_laps,stint_lap_index
0,101.19,,1
1,110.6905,13.435736,2
2,105.719,12.822127,3
3,101.80425,13.07309,4
4,99.5172,12.423054,5


# Robust Degradation + imputing (No-NaN policy)

In [22]:
# OLS slope on the last n points (needs at least 3)
def slope_last_n(series, n=5):
    y = series.dropna().to_numpy()
    if y.size < 3:
        return np.nan
    # keep only the tail if we have more than n points
    y = y[-n:] if y.size >= n else y
    # simple 0..k index for regression x
    x = np.arange(y.size, dtype=float)

    # classic least-squares slope (no intercept handling needed here)
    xm, ym = x.mean(), y.mean()
    num = np.sum((x - xm) * (y - ym))
    den = np.sum((x - xm) ** 2)
    return num / den if den else np.nan


# 2.1: OLS trend over the last 5 laps within a stint
df_gold["degradation_trend_ols_5"] = (
    df_gold.groupby(["driver_code", "race_id", "stint_id"])["lap_time_s"]
          .transform(lambda s: slope_last_n(s, n=5))
)


# 2.2: winsorize delta_lap_vs_prev to reduce the impact of outliers
def winsorize(g):
    g = g.copy()
    if g.notna().sum() < 5:
        return g
    low, high = g.quantile(0.05), g.quantile(0.95)
    return g.clip(lower=low, upper=high)


# precompute winsorized deltas at driver–race level
delta_w = (
    df_gold.groupby(["driver_code", "race_id"])["delta_lap_vs_prev"]
          .transform(winsorize)
)


# 2.3: chained imputation for degradation_s_per_lap
df_gold["degradation_imputed_flag"] = 0

mask_nan_deg = df_gold["degradation_s_per_lap"].isna()

# fallback 1: use OLS(5) trend when available
sel = mask_nan_deg & df_gold["degradation_trend_ols_5"].notna()
df_gold.loc[sel, "degradation_s_per_lap"] = df_gold.loc[sel, "degradation_trend_ols_5"]

# recompute mask and try the next source
mask_nan_deg = df_gold["degradation_s_per_lap"].isna()

# fallback 2: use winsorized delta vs previous lap
sel = mask_nan_deg & delta_w.notna()
df_gold.loc[sel, "degradation_s_per_lap"] = delta_w[sel]

# fallback 3: fill remaining gaps with 0 and mark them
mask_nan_deg = df_gold["degradation_s_per_lap"].isna()
df_gold.loc[mask_nan_deg, "degradation_s_per_lap"] = 0.0
df_gold.loc[mask_nan_deg, "degradation_imputed_flag"] = 1

# quick sanity check: all three columns should have no NaNs now except allowed cases
df_gold[["degradation_s_per_lap", "degradation_trend_ols_5", "degradation_imputed_flag"]].isna().sum()

degradation_s_per_lap         0
degradation_trend_ols_5     142
degradation_imputed_flag      0
dtype: int64

# Pace vs. field + traffic (battle proxy)

In [23]:
from pandas.api.types import is_numeric_dtype

# --- Prereqs for the "battle proxy" features ---

# 0) ensure a stable sort so all lag/rolling ops reference the correct order
df_gold = df_gold.sort_values(["driver_code", "race_id", "lap_number"]).reset_index(drop=True)

# coerce position to numeric in case it was read as object (e.g., strings)
if not is_numeric_dtype(df_gold["position"]):
    df_gold["position"] = pd.to_numeric(df_gold["position"], errors="coerce")


# 2) compute rolling std over the last 5 laps if the column is missing
#    min_periods=2 avoids NaN chains at the start of a stint/race
if "rolling_std_5_laps" not in df_gold.columns:
    df_gold["rolling_std_5_laps"] = (
        df_gold.groupby(["driver_code", "race_id"])["lap_time_s"]
              .transform(lambda s: s.rolling(5, min_periods=2).std())
    )

# 3) create a 3-lap lag of position and the net change over those 3 laps
#    these capture short-term race dynamics for each driver within a race
if "position_lag3" not in df_gold.columns:
    df_gold["position_lag3"] = (
        df_gold.groupby(["driver_code", "race_id"])["position"].shift(3)
    )

if "position_change_last_3" not in df_gold.columns:
    df_gold["position_change_last_3"] = df_gold["position"] - df_gold["position_lag3"]


In [24]:
# adaptive threshold (90th percentile) for each race based on lap-time variability
# if all values are NaN for a given race, default to 0.0 to avoid propagation of NaNs
battle_thresh = (
    df_gold.groupby("race_id")["rolling_std_5_laps"]
          .transform(lambda s: s.dropna().quantile(0.90) if s.notna().any() else 0.0)
)

# condition 1: driver changed position (±1 or more) over the last 3 laps
# condition 2: high lap-time volatility (above race-specific threshold)
# fill missing values with False to keep boolean logic stable before casting to int
cond_pos = df_gold["position_change_last_3"].abs().ge(1).fillna(False)
cond_vol = df_gold["rolling_std_5_laps"].gt(battle_thresh).fillna(False)

# flag laps where either condition is met → driver likely engaged in a battle
df_gold["in_battle_flag"] = (cond_pos | cond_vol).astype("int8")

# apply a small time penalty when in battle (to model clean air disadvantage)
# this value (0.20s) can be tuned based on validation or domain insights
df_gold["clean_air_penalty_s"] = df_gold["in_battle_flag"] * 0.20


In [25]:
df_gold["in_battle_flag"].value_counts(dropna=False)
df_gold[["position_change_last_3","rolling_std_5_laps","in_battle_flag"]].head(10)

Unnamed: 0,position_change_last_3,rolling_std_5_laps,in_battle_flag
0,,,0
1,,13.435736,1
2,,12.822127,1
3,0.0,13.07309,1
4,0.0,12.423054,1
5,-1.0,12.929916,1
6,-1.0,2.423857,1
7,-1.0,0.306002,1
8,0.0,0.216065,0
9,0.0,0.209655,0


# Pit multipliers + fallbacks (no-NaN guarantee)

In [26]:
# 1) multipliers based on track conditions (to model reduced pit time during cautions)
df_gold["pit_state_multiplier"] = 1.0
df_gold.loc[df_gold["virtual_sc_flag"] == 1, "pit_state_multiplier"] = 0.70
df_gold.loc[df_gold["safety_car_flag"] == 1, "pit_state_multiplier"] = 0.55

# 2) compute median pit stop duration per race + team (fallback to race median if team data missing)
team_med = df_gold.groupby(["race_id", "team"])["pit_stop_duration_s"].transform("median")
race_med = df_gold.groupby("race_id")["pit_stop_duration_s"].transform("median")
df_gold["pit_stop_duration_fallback_s"] = team_med.fillna(race_med)

# 3) estimate pit loss time using fallback duration multiplied by condition multipliers
# fill missing pit_loss_time_s values with these estimates
df_gold["pit_loss_time_fallback_s"] = (
    df_gold["pit_stop_duration_fallback_s"] * df_gold["pit_state_multiplier"]
)
mask_time = df_gold["pit_loss_time_s"].isna()
df_gold.loc[mask_time, "pit_loss_time_s"] = df_gold.loc[mask_time, "pit_loss_time_fallback_s"]

# flag rows where pit_loss_time_s was imputed
df_gold["pit_loss_time_imputed_flag"] = 0
df_gold.loc[mask_time, "pit_loss_time_imputed_flag"] = 1

# 4) for cases with invalid or missing outlap estimates, copy pit_loss_time_s into pit_loss_est_s
# then flag these imputations
mask_est = df_gold["pit_loss_est_s"].isna()
df_gold.loc[mask_est, "pit_loss_est_s"] = df_gold.loc[mask_est, "pit_loss_time_s"]

df_gold["pit_loss_est_imputed_flag"] = 0
df_gold.loc[mask_est, "pit_loss_est_imputed_flag"] = 1


# Risk Score

In [27]:
# normalize rolling_std_5_laps within each race to capture local variation
# returns z-scores; if std is 0 or NaN, return zeros to avoid division errors
def zscore_by_race(s):
    m, sd = s.mean(), s.std()
    return (s - m) / sd if sd not in (0, np.nan) else (s * 0)

# composite risk indicator:
# combines variability (z-score), battle involvement, and imputation flags
# clipping ensures extreme volatility doesn’t dominate the score
risk = (
    df_gold.groupby("race_id")["rolling_std_5_laps"].transform(zscore_by_race).clip(-1, 3).fillna(0)
    + 0.5 * df_gold["in_battle_flag"]
    + 0.5 * df_gold["pit_loss_time_imputed_flag"]
    + 0.5 * df_gold["degradation_imputed_flag"]
)

# final normalization: simple sigmoid to rescale into 0–1 range
df_gold["risk_score"] = 1 / (1 + np.exp(-risk))


# Race Context (quality of life)

In [28]:
df_gold["race_laps_total"] = df_gold.groupby("race_id")["lap_number"].transform("max")
df_gold["lap_progress_pct"] = df_gold["lap_number"] / df_gold["race_laps_total"]

# Sanity Check - QA

In [29]:
qa = {
    "NaN_degradation_s_per_lap": int(df_gold["degradation_s_per_lap"].isna().sum()),
    "NaN_pit_loss_est_s":        int(df_gold["pit_loss_est_s"].isna().sum()),
    "NaN_pit_loss_time_s":       int(df_gold["pit_loss_time_s"].isna().sum()),
    "risk_score_mean":           round(df_gold["risk_score"].mean(), 3),
}
qa


{'NaN_degradation_s_per_lap': 0,
 'NaN_pit_loss_est_s': 0,
 'NaN_pit_loss_time_s': 0,
 'risk_score_mean': np.float64(0.622)}

In [30]:
df_gold.head()

Unnamed: 0,race_id,year,circuit_name,driver_code,team,lap_number,position,lap_time_s,sector1_s,sector2_s,sector3_s,compound,tyre_age_laps,stint_id,pit_in_flag,pit_out_flag,safety_car_flag,virtual_sc_flag,grid_position,finish_position,pit_stop_duration_s,meteo_temp_c_annual_med,meteo_wind_ms_annual_med,lap_time_s_lag1,lap_time_s_lag2,lap_time_s_lag3,degradation_s_per_lap,pit_loss_est_s,pit_loss_time_s,delta_lap_vs_prev,avg_last_3_laps,pit_in_next2,rolling_avg_5_laps,rolling_std_5_laps,stint_lap_index,degradation_trend_ols_5,degradation_imputed_flag,position_lag3,position_change_last_3,in_battle_flag,clean_air_penalty_s,pit_state_multiplier,pit_stop_duration_fallback_s,pit_loss_time_fallback_s,pit_loss_time_imputed_flag,pit_loss_est_imputed_flag,risk_score,race_laps_total,lap_progress_pct
0,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,1,15,101.19,,40.443,35.86,MEDIUM,1.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,,,,0.3281,23.503,23.503,,,0,101.19,,1,0.3281,0,,,0,0.0,1.0,23.503,23.503,1,1,0.622459,58,0.017241
1,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,2,15,120.191,18.394,56.603,45.194,MEDIUM,2.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,101.19,,,0.3281,16.4521,16.4521,19.001,101.19,0,110.6905,13.435736,2,0.3281,0,,,1,0.2,0.7,23.503,16.4521,1,1,0.967803,58,0.034483
2,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,3,15,95.776,19.929,42.24,33.607,MEDIUM,3.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,120.191,101.19,,0.3281,16.4521,16.4521,-24.415,110.6905,0,105.719,12.822127,3,0.3281,0,,,1,0.2,0.7,23.503,16.4521,1,1,0.963263,58,0.051724
3,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,4,15,90.06,18.099,37.915,34.046,MEDIUM,4.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,95.776,120.191,101.19,0.3281,23.503,23.503,-5.716,105.719,0,101.80425,13.07309,4,0.3281,0,15.0,0.0,1,0.2,1.0,23.503,23.503,1,1,0.96519,58,0.068966
4,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,5,15,90.369,18.232,37.968,34.169,MEDIUM,5.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.06,95.776,120.191,0.3281,23.503,23.503,0.309,102.009,0,99.5172,12.423054,5,0.3281,0,15.0,0.0,1,0.2,1.0,23.503,23.503,1,1,0.959986,58,0.086207


In [31]:
df_gold.shape

(27050, 49)

In [32]:
df_gold.to_csv("data/gold/2024_season.csv", index=False)

# QA for the dataset

In [33]:
df_gold.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27050 entries, 0 to 27049
Data columns (total 49 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   race_id                       27050 non-null  object 
 1   year                          27050 non-null  int64  
 2   circuit_name                  27050 non-null  object 
 3   driver_code                   27050 non-null  object 
 4   team                          27050 non-null  object 
 5   lap_number                    27050 non-null  int64  
 6   position                      27023 non-null  Int64  
 7   lap_time_s                    26826 non-null  float64
 8   sector1_s                     26510 non-null  float64
 9   sector2_s                     27019 non-null  float64
 10  sector3_s                     26998 non-null  float64
 11  compound                      27050 non-null  object 
 12  tyre_age_laps                 27050 non-null  float64
 13  s

In [34]:
df_gold.isna().sum().sort_values(ascending=False).head(10)

pit_stop_duration_s       26226
lap_time_s_lag3            1577
position_change_last_3     1421
position_lag3              1407
lap_time_s_lag2            1116
delta_lap_vs_prev           809
lap_time_s_lag1             654
sector1_s                   540
rolling_std_5_laps          515
avg_last_3_laps             480
dtype: int64

In [35]:
df_gold.describe().T.tail(10)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
in_battle_flag,27050.0,0.398484,0.489595,0.0,0.0,0.0,1.0,1.0
clean_air_penalty_s,27050.0,0.079697,0.097919,0.0,0.0,0.0,0.2,0.2
pit_state_multiplier,27050.0,0.979665,0.091049,0.55,1.0,1.0,1.0,1.0
pit_stop_duration_fallback_s,27050.0,144.155202,457.233298,17.366,21.743,23.016,25.9905,2381.591
pit_loss_time_fallback_s,27050.0,140.876216,450.849765,11.341,21.561,22.8075,25.613,2381.591
pit_loss_time_imputed_flag,27050.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
pit_loss_est_imputed_flag,27050.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
risk_score,27050.0,0.622359,0.165098,0.410811,0.494937,0.569732,0.682438,0.989013
race_laps_total,27050.0,62.147098,8.659197,44.0,56.0,62.0,70.0,78.0
lap_progress_pct,27050.0,0.498032,0.287123,0.012821,0.246377,0.492958,0.746032,1.0


In [36]:
df_gold.columns

Index(['race_id', 'year', 'circuit_name', 'driver_code', 'team', 'lap_number',
       'position', 'lap_time_s', 'sector1_s', 'sector2_s', 'sector3_s',
       'compound', 'tyre_age_laps', 'stint_id', 'pit_in_flag', 'pit_out_flag',
       'safety_car_flag', 'virtual_sc_flag', 'grid_position',
       'finish_position', 'pit_stop_duration_s', 'meteo_temp_c_annual_med',
       'meteo_wind_ms_annual_med', 'lap_time_s_lag1', 'lap_time_s_lag2',
       'lap_time_s_lag3', 'degradation_s_per_lap', 'pit_loss_est_s',
       'pit_loss_time_s', 'delta_lap_vs_prev', 'avg_last_3_laps',
       'pit_in_next2', 'rolling_avg_5_laps', 'rolling_std_5_laps',
       'stint_lap_index', 'degradation_trend_ols_5',
       'degradation_imputed_flag', 'position_lag3', 'position_change_last_3',
       'in_battle_flag', 'clean_air_penalty_s', 'pit_state_multiplier',
       'pit_stop_duration_fallback_s', 'pit_loss_time_fallback_s',
       'pit_loss_time_imputed_flag', 'pit_loss_est_imputed_flag', 'risk_score',
       

In [37]:
df_gold["expected_gain_vs_stayout_s"] = (
    df_gold["pit_state_multiplier"] * df_gold["pit_loss_time_s"] -
    df_gold["degradation_s_per_lap"] * 10
)

In [38]:
df_gold["expected_gain_vs_stayout_s"]

0          20.222000
1           8.235470
2           8.235470
3          20.222000
4          20.222000
5          20.222000
6          20.222000
7          20.222000
8          20.222000
9          20.222000
10         20.222000
11         20.222000
12         23.831000
13         23.831000
14         23.831000
15         23.831000
16         23.831000
17         23.831000
18         23.831000
19         23.831000
20         23.831000
21         23.831000
22         23.831000
23         23.831000
24         23.831000
25         23.831000
26         23.831000
27         23.831000
28         23.831000
29         23.831000
30         23.831000
31         23.831000
32         23.831000
33         23.831000
34         23.831000
35         23.831000
36         23.831000
37         23.831000
38         23.831000
39         23.831000
40         23.831000
41         23.831000
42         23.831000
43         23.831000
44         23.831000
45         23.831000
46         23.831000
47         23

# The final DataPoints for the dataset, in the future I might increase the number of rows. I possibly might go from 1 full season to 3 or 5 full seasons.

In [61]:
df_gold.head(10)

Unnamed: 0,race_id,year,circuit_name,driver_code,team,lap_number,position,lap_time_s,sector1_s,sector2_s,sector3_s,compound,tyre_age_laps,stint_id,pit_in_flag,pit_out_flag,safety_car_flag,virtual_sc_flag,grid_position,finish_position,pit_stop_duration_s,meteo_temp_c_annual_med,meteo_wind_ms_annual_med,lap_time_s_lag1,lap_time_s_lag2,lap_time_s_lag3,degradation_s_per_lap,pit_loss_est_s,pit_loss_time_s,delta_lap_vs_prev,avg_last_3_laps,pit_in_next2,rolling_avg_5_laps,rolling_std_5_laps,stint_lap_index,degradation_trend_ols_5,degradation_imputed_flag,position_lag3,position_change_last_3,in_battle_flag,clean_air_penalty_s,pit_state_multiplier,pit_stop_duration_fallback_s,pit_loss_time_fallback_s,pit_loss_time_imputed_flag,pit_loss_est_imputed_flag,risk_score,race_laps_total,lap_progress_pct,expected_gain_vs_stayout_s
0,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,1,15,101.19,,40.443,35.86,MEDIUM,1.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,,,,0.3281,23.503,23.503,,,0,101.19,,1,0.3281,0,,,0,0.0,1.0,23.503,23.503,1,1,0.622459,58,0.017241,20.222
1,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,2,15,120.191,18.394,56.603,45.194,MEDIUM,2.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,101.19,,,0.3281,16.4521,16.4521,19.001,101.19,0,110.6905,13.435736,2,0.3281,0,,,1,0.2,0.7,23.503,16.4521,1,1,0.967803,58,0.034483,8.23547
2,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,3,15,95.776,19.929,42.24,33.607,MEDIUM,3.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,120.191,101.19,,0.3281,16.4521,16.4521,-24.415,110.6905,0,105.719,12.822127,3,0.3281,0,,,1,0.2,0.7,23.503,16.4521,1,1,0.963263,58,0.051724,8.23547
3,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,4,15,90.06,18.099,37.915,34.046,MEDIUM,4.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,95.776,120.191,101.19,0.3281,23.503,23.503,-5.716,105.719,0,101.80425,13.07309,4,0.3281,0,15.0,0.0,1,0.2,1.0,23.503,23.503,1,1,0.96519,58,0.068966,20.222
4,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,5,15,90.369,18.232,37.968,34.169,MEDIUM,5.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.06,95.776,120.191,0.3281,23.503,23.503,0.309,102.009,0,99.5172,12.423054,5,0.3281,0,15.0,0.0,1,0.2,1.0,23.503,23.503,1,1,0.959986,58,0.086207,20.222
5,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,6,14,90.815,18.319,38.131,34.365,MEDIUM,6.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.369,90.06,95.776,0.3281,23.503,23.503,0.446,92.068333,0,97.4422,12.929916,6,0.3281,0,15.0,-1.0,1,0.2,1.0,23.503,23.503,1,1,0.964103,58,0.103448,20.222
6,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,7,14,90.317,18.202,38.655,33.46,MEDIUM,7.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.815,90.369,90.06,0.3281,23.503,23.503,-0.498,90.414667,0,91.4674,2.423857,7,0.3281,0,15.0,-1.0,1,0.2,1.0,23.503,23.503,1,1,0.721422,58,0.12069,20.222
7,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,8,14,90.705,18.155,38.72,33.83,MEDIUM,8.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.317,90.815,90.369,0.3281,23.503,23.503,0.388,90.500333,0,90.4532,0.306002,8,0.3281,0,15.0,-1.0,1,0.2,1.0,23.503,23.503,1,1,0.617752,58,0.137931,20.222
8,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,9,14,90.469,18.043,38.698,33.728,MEDIUM,9.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.705,90.317,90.815,0.3281,23.503,23.503,-0.236,90.612333,0,90.535,0.216065,9,0.3281,0,14.0,0.0,0,0.0,1.0,23.503,23.503,1,1,0.49,58,0.155172,20.222
9,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,10,14,90.748,18.169,38.859,33.72,MEDIUM,10.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.469,90.705,90.317,0.3281,23.503,23.503,0.279,90.497,1,90.6108,0.209655,10,0.3281,0,14.0,0.0,0,0.0,1.0,23.503,23.503,1,1,0.489643,58,0.172414,20.222


# For the final output we will use 3 models, decision, plan and explain

## Training the Decision Model

### Feature Set + Target

In [39]:
feat_cols = [
    # pace & trend
    "lap_time_s","lap_time_s_lag1","lap_time_s_lag2","lap_time_s_lag3",
    "avg_last_3_laps","delta_lap_vs_prev","rolling_avg_5_laps","rolling_std_5_laps",
    # tyres
    "compound","tyre_age_laps","stint_lap_index","degradation_s_per_lap","degradation_trend_ols_5",
    # context
    "position","grid_position","lap_number","race_laps_total","lap_progress_pct","in_battle_flag",
    # safety & meteo
    "safety_car_flag","virtual_sc_flag","meteo_temp_c_annual_med","meteo_wind_ms_annual_med",
    # pit cost / strategy
    "pit_loss_est_s","pit_loss_time_s","pit_state_multiplier","clean_air_penalty_s",
    # misc risk
    "risk_score",
    # IDs pentru encoding contextual
    "circuit_name","team","driver_code"
]
target_col = "pit_in_next2"
group_col  = "race_id"

### Pipeline train/test (GroupKFold pe curse) + Calibration

In [40]:
import numpy as np, pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GroupKFold
from sklearn.experimental import enable_hist_gradient_boosting  # noqa
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import roc_auc_score, average_precision_score, classification_report

# work on a copy to avoid side effects
df = df_gold.copy()

# basic split: features, target, and grouping key
X = df[feat_cols]
y = df[target_col].astype(int)
groups = df[group_col]

# separate numeric vs categorical features
num_cols = X.select_dtypes(include=[np.number]).columns.tolist()
cat_cols = list(set(feat_cols) - set(num_cols))  # e.g. ["compound","circuit_name","team","driver_code"]

# preprocessing:
# - median impute numerics
# - most_frequent impute categoricals + one-hot encode
pre = ColumnTransformer(
    transformers=[
        ("num", SimpleImputer(strategy="median"), num_cols),
        ("cat", Pipeline([
            ("imp", SimpleImputer(strategy="most_frequent")),
            ("ohe", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
        ]), cat_cols),
    ],
    remainder="drop",
    verbose_feature_names_out=False
)

# tree-based baseline that handles large, sparse one-hot outputs efficiently
base_clf = HistGradientBoostingClassifier(
    max_depth=None, learning_rate=0.08, max_iter=300, l2_regularization=0.0,
    min_samples_leaf=40, random_state=42
)

# full pipeline: preprocessing -> classifier
pipe = Pipeline([("pre", pre), ("clf", base_clf)])

# grouped CV to prevent leakage across related samples (e.g., same race/session)
gkf = GroupKFold(n_splits=5)
auc_scores, ap_scores = [], []

for fold, (tr, te) in enumerate(gkf.split(X, y, groups), 1):
    # fit preprocessing + model on train indices
    pipe_fit = pipe.fit(X.iloc[tr], y.iloc[tr])

    # probability calibration on the trained model (simple approach on full train)
    calib = CalibratedClassifierCV(pipe_fit["clf"], method="sigmoid", cv="prefit")
    calib.fit(pipe_fit["pre"].transform(X.iloc[tr]), y.iloc[tr])

    # transform test fold once with the fitted preprocessor
    Xte_t = pipe_fit["pre"].transform(X.iloc[te])

    # calibrated probabilities for metrics
    p = calib.predict_proba(Xte_t)[:, 1]
    auc = roc_auc_score(y.iloc[te], p)
    ap  = average_precision_score(y.iloc[te], p)
    auc_scores.append(auc); ap_scores.append(ap)
    print(f"Fold {fold}  AUC={auc:.3f}  AP={ap:.3f}")

# quick summary of CV performance
print(f"\nCV AUC mean={np.mean(auc_scores):.3f} ±{np.std(auc_scores):.3f} | AP mean={np.mean(ap_scores):.3f}")

# final fit on all data, then calibrate on full set (MVP approach)
pipe.fit(X, y)
calib_final = CalibratedClassifierCV(pipe["clf"], method="sigmoid", cv="prefit").fit(
    pipe["pre"].transform(X), y
)




Fold 1  AUC=0.856  AP=0.217




Fold 2  AUC=0.897  AP=0.382




Fold 3  AUC=0.907  AP=0.499




Fold 4  AUC=0.846  AP=0.261




Fold 5  AUC=0.766  AP=0.139

CV AUC mean=0.855 ±0.050 | AP mean=0.300




### Light heuristic for recommended_compound_next

In [41]:
def recommend_compound(row):
    laps_left = row["race_laps_total"] - row["lap_number"]
    t = row["meteo_temp_c_annual_med"]

    # if Safety Car (SC) or Virtual Safety Car (VSC) is active,
    # prefer a harder compound to ensure durability and stable pace on restart
    if row["safety_car_flag"] == 1 or row["virtual_sc_flag"] == 1:
        return "H" if laps_left > 20 else "M"

    # high ambient temperature favors harder compounds (H or M)
    if t >= 28:
        return "H" if laps_left > 18 else "M"

    # medium temperature range: balance between durability and grip
    if 20 <= t < 28:
        if laps_left > 22:
            return "H"
        if laps_left > 12:
            return "M"
        return "S"

    # low temperatures: softer compounds perform better and warm up faster
    if laps_left > 20:
        return "M"
    return "S"


### Builder for JSON Decision

In [42]:
def decision_snapshot(row, p1, p2):
    expected_gain = float(row["expected_gain_vs_stayout_s"])

    # pit_now = 1 if model predicts high pit probability (p2 > 0.65)
    # and expected gain from pitting is positive
    pit_now = int((p2 > 0.65) and (expected_gain > 0))

    # recommended next compound determined from conditions in recommend_compound()
    comp_next = recommend_compound(row)

    return {
        "decision": {
            # binary flag: whether the model suggests pitting on this lap
            "pit_recommend_now": pit_now,

            # model probabilities for pitting within next 1–2 laps
            "pit_probability_next_1_lap": round(float(p1), 2),
            "pit_probability_next_2_laps": round(float(p2), 2),

            # suggested compound for next stint
            "recommended_compound_next": comp_next,

            # estimated length of next stint (min 8, max 30 laps)
            "target_stint_length_laps": int(
                max(8, min(30, (row["race_laps_total"] - row["lap_number"]) // 2))
            ),

            # will be filled later during the planning stage
            "expected_total_race_time_s": None,

            # expected time gain vs staying out
            "expected_gain_vs_stayout_s": round(expected_gain, 1),

            # rough confidence interval for gain estimate
            "confidence_band_s": [round(expected_gain - 2.3, 1),
                                  round(expected_gain + 2.3, 1)]
        }
    }

### Inference on any row (lap snapshot)

In [43]:
def predict_decision_for_idx(idx):
    # pull a single row by integer index
    row = df.iloc[idx]

    # transform the row with the fitted preprocessor (expects 2D input)
    Xt = pipe["pre"].transform(row[feat_cols].to_frame().T)

    # calibrated probability of pitting (next ~2 laps proxy)
    p = calib_final.predict_proba(Xt)[:, 1][0]

    # derive a slightly more conservative 1-lap probability
    # (simple downscale to avoid over-assertive calls on immediate pit)
    p1 = max(0, min(1, p / 1.15))
    p2 = p

    # assemble the human-readable decision snapshot
    return decision_snapshot(row, p1, p2)


# example usage:
snap = predict_decision_for_idx(1234)
snap


{'decision': {'pit_recommend_now': 0,
  'pit_probability_next_1_lap': 0.0,
  'pit_probability_next_2_laps': 0.0,
  'recommended_compound_next': 'H',
  'target_stint_length_laps': 20,
  'expected_total_race_time_s': None,
  'expected_gain_vs_stayout_s': -41.9,
  'confidence_band_s': [-44.2, -39.6]}}

## Veryfing the Decision Model

### Is the model learning anything?

In [44]:
# distribution of target + quick probs
print("positive rate (pit_in_next2=1):", df["pit_in_next2"].mean())
Xt_all = pipe["pre"].transform(df[feat_cols])
p_all = calib_final.predict_proba(Xt_all)[:,1]
print("p_all mean:", p_all.mean(), "p_all>0.5:", (p_all>0.5).mean())

positive rate (pit_in_next2=1): 0.05815157116451017
p_all mean: 0.05815161520855594 p_all>0.5: 0.05707948243992606


### Does it spike near real pit stops?

In [45]:
# take laps that truly pit in next 2 laps
ix_pos = df.index[df["pit_in_next2"]==1][:20]
for i in ix_pos[:5]:
    print(i, df.loc[i, ["race_id","lap_number","driver_code","pit_in_next2"]].to_dict(),
          float(calib_final.predict_proba(pipe["pre"].transform(df.loc[[i],feat_cols]))[:,1]))


9 {'race_id': '2024_abu dhabi', 'lap_number': 10, 'driver_code': 'ALB', 'pit_in_next2': 1} 0.3052316756036588
10 {'race_id': '2024_abu dhabi', 'lap_number': 11, 'driver_code': 'ALB', 'pit_in_next2': 1} 0.693656712593192
60 {'race_id': '2024_australia', 'lap_number': 4, 'driver_code': 'ALB', 'pit_in_next2': 1} 0.8833441503532686
61 {'race_id': '2024_australia', 'lap_number': 5, 'driver_code': 'ALB', 'pit_in_next2': 1} 0.993326127770352
81 {'race_id': '2024_australia', 'lap_number': 25, 'driver_code': 'ALB', 'pit_in_next2': 1} 0.7772573874729529




### Top-scored snapshots – spot check

In [46]:
top_ix = np.argsort(-p_all)[:10]
df.loc[top_ix, ["race_id","lap_number","driver_code","compound","tyre_age_laps","pit_in_next2"]]

Unnamed: 0,race_id,lap_number,driver_code,compound,tyre_age_laps,pit_in_next2
5689,2024_qatar,35,GAS,MEDIUM,35.0,1
15390,2024_qatar,35,PER,MEDIUM,35.0,1
12822,2024_qatar,35,NOR,MEDIUM,35.0,1
2304,2024_qatar,35,ALO,MEDIUM,35.0,1
26797,2024_qatar,35,ZHO,MEDIUM,35.0,1
25428,2024_qatar,35,VER,MEDIUM,35.0,1
26798,2024_qatar,36,ZHO,HARD,1.0,1
10120,2024_qatar,35,LEC,MEDIUM,36.0,1
24023,2024_qatar,35,TSU,MEDIUM,35.0,1
9312,2024_brazil,31,LEC,INTERMEDIATE,10.0,1


### Confusion @ threshold τ (e.g., 0.65)

In [47]:
from sklearn.metrics import confusion_matrix, classification_report
tau = 0.65
pred = (p_all >= tau).astype(int)
print(confusion_matrix(df["pit_in_next2"], pred))
print(classification_report(df["pit_in_next2"], pred, digits=3))

[[25438    39]
 [  139  1434]]
              precision    recall  f1-score   support

           0      0.995     0.998     0.997     25477
           1      0.974     0.912     0.942      1573

    accuracy                          0.993     27050
   macro avg      0.984     0.955     0.969     27050
weighted avg      0.993     0.993     0.993     27050



### Calibration sanity (bins)

In [48]:
bins = pd.qcut(p_all, 10, duplicates="drop")
cal = df.groupby(bins)["pit_in_next2"].mean()
print(pd.DataFrame({"bin_mid": [i.mid for i in cal.index], "emp_rate": cal.values}))

        bin_mid  emp_rate
0 -4.999999e-04  0.000000
1  1.485000e-09  0.000000
2  9.600000e-09  0.000370
3  4.725000e-08  0.000000
4  2.365000e-07  0.000000
5  1.612500e-06  0.000000
6  1.471500e-05  0.000370
7  2.003000e-04  0.002218
8  8.287000e-03  0.007394
9  5.081000e-01  0.571165




In [49]:
taus = np.linspace(0.4, 0.8, 21)
gain = []
for t in taus:
    decision = (p_all >= t).astype(int)
    # proxy utility: take action only when gain>0
    gain.append((decision * df["expected_gain_vs_stayout_s"]).mean())
best_tau = float(taus[int(np.argmax(gain))])


In [50]:
def decisions_for_driver_race(driver, race):
    rows = df[(df.driver_code==driver) & (df.race_id==race)].index
    out = []
    for i in rows:
        row = df.loc[i]
        Xt = pipe["pre"].transform(row[feat_cols].to_frame().T)
        p2 = float(calib_final.predict_proba(Xt)[:,1])
        p1 = max(0.0, min(1.0, p2/1.15))
        out.append(decision_snapshot(row, p1, p2))
    return out


In [51]:
def est_lap_pace(compound, age, base_pace, deg_per_lap):
    # base pace comes from rolling_avg_5_laps adjusted by compound deltas
    comp_delta = {"H": +0.6, "M": +0.2, "S": 0.0, "INTER": +6.0, "WET": +12.0}
    return base_pace + comp_delta.get(compound, 0.2) + max(age,0)*deg_per_lap

def simulate_plan(row, stops):
    laps_left = int(row["race_laps_total"] - row["lap_number"])
    pit_cost = float(row["pit_state_multiplier"] * row["pit_loss_time_s"])
    base = float(row["rolling_avg_5_laps"] or row["avg_last_3_laps"] or row["lap_time_s"])
    deg  = float(row["degradation_s_per_lap"])

    # naive splitter of remaining laps into 'stops+1' stints
    stint_lengths = []
    if stops==1: stint_lengths = [max(8, laps_left//2), laps_left - max(8, laps_left//2)]
    if stops==2: stint_lengths = [max(8, laps_left//3)]*2 + [laps_left - 2*max(8, laps_left//3)]
    if stops==3: stint_lengths = [max(7, laps_left//4)]*3 + [laps_left - 3*max(7, laps_left//4)]

    comp_seq = []
    rem = []
    t_total = 0.0
    cur_age = int(row["tyre_age_laps"])
    cur_comp = row["compound"]

    for k, L in enumerate(stint_lengths):
        # choose compound via heuristic for now
        nxt = recommend_compound(row) if k>0 else cur_comp
        comp_seq.append(nxt)
        # integrate per-lap pace
        stint_time = sum(est_lap_pace(nxt, a if k>0 else (cur_age+a), base, deg) for a in range(L))
        t_total += stint_time
        if k < len(stint_lengths)-1:
            t_total += pit_cost  # add pit between stints
        rem.append({"from_lap": int(row["lap_number"] + sum(stint_lengths[:k])),
                    "to_lap":   int(row["lap_number"] + sum(stint_lengths[:k+1])),
                    "compound": nxt,
                    "planned_pit_lap": int(row["lap_number"] + sum(stint_lengths[:k+1]))})

    return t_total, rem

def build_plan(row):
    candidates = []
    for stops in [1,2,3]:
        t, stints = simulate_plan(row, stops)
        candidates.append((t, stops, stints))
    t_best, nstops, best_stints = min(candidates, key=lambda x: x[0])
    plan = {
        "planned_number_of_stops": int(nstops),
        "stints": best_stints,
        "scenario_branches": {
            "if_SC_in_[{} , {}]".format(row["lap_number"], row["lap_number"]+4): {"pit_now": True, "compound": "H"},
            "if_rain_prob_gt_0.4": {"pit_for": "Inter"}
        }
    }
    return plan, t_best


In [52]:
def decision_plus_plan(idx):
    row = df.loc[idx]
    Xt = pipe["pre"].transform(row[feat_cols].to_frame().T)
    p2 = float(calib_final.predict_proba(Xt)[:,1]); p1 = max(0,min(1,p2/1.15))
    dec = decision_snapshot(row, p1, p2)

    plan, t_best = build_plan(row)
    dec["decision"]["expected_total_race_time_s"] = round(t_best, 1)
    return {"decision": dec["decision"], "plan": plan}


In [53]:
idx = df.index[df["pit_in_next2"] == 1][0]  

In [54]:
res = decision_plus_plan(idx)

import json
print(json.dumps(res, indent=2))


{
  "decision": {
    "pit_recommend_now": 0,
    "pit_probability_next_1_lap": 0.27,
    "pit_probability_next_2_laps": 0.31,
    "recommended_compound_next": "H",
    "target_stint_length_laps": 24,
    "expected_total_race_time_s": 4569.8,
    "expected_gain_vs_stayout_s": 20.2,
    "confidence_band_s": [
      17.9,
      22.5
    ]
  },
  "plan": {
    "planned_number_of_stops": 3,
    "stints": [
      {
        "from_lap": 10,
        "to_lap": 22,
        "compound": "MEDIUM",
        "planned_pit_lap": 22
      },
      {
        "from_lap": 22,
        "to_lap": 34,
        "compound": "H",
        "planned_pit_lap": 34
      },
      {
        "from_lap": 34,
        "to_lap": 46,
        "compound": "H",
        "planned_pit_lap": 46
      },
      {
        "from_lap": 46,
        "to_lap": 58,
        "compound": "H",
        "planned_pit_lap": 58
      }
    ],
    "scenario_branches": {
      "if_SC_in_[10 , 14]": {
        "pit_now": true,
        "compound": "H"
     



In [55]:
for lap in [10, 15, 20, 25]:
    idx = df[(df.driver_code=="VER") & (df.race_id=="2024_qatar") & (df.lap_number==lap)].index[0]
    res = decision_plus_plan(idx)
    print(f"Lap {lap}: {res['decision']['pit_probability_next_2_laps']:.2f}")


Lap 10: 0.00
Lap 15: 0.00
Lap 20: 0.00
Lap 25: 0.00




In [56]:
outputs = [decision_plus_plan(i) for i in df[(df.driver_code=="VER") & (df.race_id=="2024_qatar")].index]
import json
with open("decision_output_qatar.json", "w") as f:
    json.dump(outputs, f, indent=2)




In [57]:
df_gold.head(20)

Unnamed: 0,race_id,year,circuit_name,driver_code,team,lap_number,position,lap_time_s,sector1_s,sector2_s,sector3_s,compound,tyre_age_laps,stint_id,pit_in_flag,pit_out_flag,safety_car_flag,virtual_sc_flag,grid_position,finish_position,pit_stop_duration_s,meteo_temp_c_annual_med,meteo_wind_ms_annual_med,lap_time_s_lag1,lap_time_s_lag2,lap_time_s_lag3,degradation_s_per_lap,pit_loss_est_s,pit_loss_time_s,delta_lap_vs_prev,avg_last_3_laps,pit_in_next2,rolling_avg_5_laps,rolling_std_5_laps,stint_lap_index,degradation_trend_ols_5,degradation_imputed_flag,position_lag3,position_change_last_3,in_battle_flag,clean_air_penalty_s,pit_state_multiplier,pit_stop_duration_fallback_s,pit_loss_time_fallback_s,pit_loss_time_imputed_flag,pit_loss_est_imputed_flag,risk_score,race_laps_total,lap_progress_pct,expected_gain_vs_stayout_s
0,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,1,15,101.19,,40.443,35.86,MEDIUM,1.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,,,,0.3281,23.503,23.503,,,0,101.19,,1,0.3281,0,,,0,0.0,1.0,23.503,23.503,1,1,0.622459,58,0.017241,20.222
1,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,2,15,120.191,18.394,56.603,45.194,MEDIUM,2.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,101.19,,,0.3281,16.4521,16.4521,19.001,101.19,0,110.6905,13.435736,2,0.3281,0,,,1,0.2,0.7,23.503,16.4521,1,1,0.967803,58,0.034483,8.23547
2,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,3,15,95.776,19.929,42.24,33.607,MEDIUM,3.0,1,0,0,0,1,18.0,11.0,,30.0,13.0,120.191,101.19,,0.3281,16.4521,16.4521,-24.415,110.6905,0,105.719,12.822127,3,0.3281,0,,,1,0.2,0.7,23.503,16.4521,1,1,0.963263,58,0.051724,8.23547
3,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,4,15,90.06,18.099,37.915,34.046,MEDIUM,4.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,95.776,120.191,101.19,0.3281,23.503,23.503,-5.716,105.719,0,101.80425,13.07309,4,0.3281,0,15.0,0.0,1,0.2,1.0,23.503,23.503,1,1,0.96519,58,0.068966,20.222
4,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,5,15,90.369,18.232,37.968,34.169,MEDIUM,5.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.06,95.776,120.191,0.3281,23.503,23.503,0.309,102.009,0,99.5172,12.423054,5,0.3281,0,15.0,0.0,1,0.2,1.0,23.503,23.503,1,1,0.959986,58,0.086207,20.222
5,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,6,14,90.815,18.319,38.131,34.365,MEDIUM,6.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.369,90.06,95.776,0.3281,23.503,23.503,0.446,92.068333,0,97.4422,12.929916,6,0.3281,0,15.0,-1.0,1,0.2,1.0,23.503,23.503,1,1,0.964103,58,0.103448,20.222
6,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,7,14,90.317,18.202,38.655,33.46,MEDIUM,7.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.815,90.369,90.06,0.3281,23.503,23.503,-0.498,90.414667,0,91.4674,2.423857,7,0.3281,0,15.0,-1.0,1,0.2,1.0,23.503,23.503,1,1,0.721422,58,0.12069,20.222
7,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,8,14,90.705,18.155,38.72,33.83,MEDIUM,8.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.317,90.815,90.369,0.3281,23.503,23.503,0.388,90.500333,0,90.4532,0.306002,8,0.3281,0,15.0,-1.0,1,0.2,1.0,23.503,23.503,1,1,0.617752,58,0.137931,20.222
8,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,9,14,90.469,18.043,38.698,33.728,MEDIUM,9.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.705,90.317,90.815,0.3281,23.503,23.503,-0.236,90.612333,0,90.535,0.216065,9,0.3281,0,14.0,0.0,0,0.0,1.0,23.503,23.503,1,1,0.49,58,0.155172,20.222
9,2024_abu dhabi,2024,Abu Dhabi,ALB,Williams,10,14,90.748,18.169,38.859,33.72,MEDIUM,10.0,1,0,0,0,0,18.0,11.0,,30.0,13.0,90.469,90.705,90.317,0.3281,23.503,23.503,0.279,90.497,1,90.6108,0.209655,10,0.3281,0,14.0,0.0,0,0.0,1.0,23.503,23.503,1,1,0.489643,58,0.172414,20.222
