# F1 Strategy Project â€” Full 10-Model Notebook (Exported)

This notebook contains the expanded 10-model scaffolds and a runnable Bi-LSTM training section.

**How to use**:
1. Place your `features_all.parquet` (generated from FastF1/OpenF1) into the `data/` folder.
2. Open this notebook in VS Code / Colab and run cells sequentially.
3. The **Bi-LSTM** training cell will create a `models/bilstm` folder with saved model, TensorBoard logs, and `metrics.json`.


In [None]:
# Setup cell
# Install packages if needed (uncomment)
!pip install fastf1 xgboost lightgbm optuna shap imbalanced-learn scikit-learn tensorflow pandas matplotlib seaborn joblib

from pathlib import Path
DATA_DIR = Path('data')
MODEL_DIR = Path('models')
LOG_DIR = Path('logs')
DATA_DIR.mkdir(exist_ok=True)
MODEL_DIR.mkdir(exist_ok=True)
LOG_DIR.mkdir(exist_ok=True)
print('DATA_DIR=', DATA_DIR, 'MODEL_DIR=', MODEL_DIR, 'LOG_DIR=', LOG_DIR)


Collecting tensorflow
  Using cached tensorflow-2.20.0-cp311-cp311-win_amd64.whl.metadata (4.6 kB)
Using cached tensorflow-2.20.0-cp311-cp311-win_amd64.whl (331.8 MB)
Installing collected packages: tensorflow
Successfully installed tensorflow-2.20.0
DATA_DIR= data MODEL_DIR= models LOG_DIR= logs



[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: C:\Users\HP\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [1]:
# Check features file
from pathlib import Path
feat = Path('data/features_all.parquet')
if not feat.exists():
    print('features_all.parquet not found in data/. Please run ingestion and feature-building first.')
else:
    import pandas as pd
    df = pd.read_parquet(feat)
    print('Loaded features_all.parquet with shape', df.shape)
    display(df.head())


Loaded features_all.parquet with shape (3758, 12)


Unnamed: 0,Driver,Team,LapNumber,Stint,Compound,lap_sec,tyre_age,lap_delta,rolling_mean_3,pit_next,Season,GrandPrix
0,VER,Red Bull Racing,1.0,1.0,SOFT,97.284,1,,97.284,0,2024,Bahrain
1,VER,Red Bull Racing,2.0,1.0,SOFT,96.296,2,-0.988,96.79,0,2024,Bahrain
2,VER,Red Bull Racing,3.0,1.0,SOFT,96.753,3,0.457,96.777667,0,2024,Bahrain
3,VER,Red Bull Racing,4.0,1.0,SOFT,96.647,4,-0.106,96.565333,0,2024,Bahrain
4,VER,Red Bull Racing,5.0,1.0,SOFT,97.173,5,0.526,96.857667,0,2024,Bahrain


In [2]:
# Bi-LSTM training cell (run this to train)
import os, time, json, numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support
import torch
import torch.nn as nn
import torch.optim as optim


SEQ_LEN = 40
BATCH_SIZE = 64
EPOCHS = 30
OUTDIR = 'models/bilstm_' + time.strftime('%Y%m%d-%H%M%S')
os.makedirs(OUTDIR, exist_ok=True)
LOGDIR = 'logs/bilstm_' + time.strftime('%Y%m%d-%H%M%S')
os.makedirs(LOGDIR, exist_ok=True)

feat_path = 'data/features_all.parquet'
if not os.path.exists(feat_path):
    raise FileNotFoundError('Put features_all.parquet into data/ and re-run.')

df = pd.read_parquet(feat_path)
if 'pit_next' not in df.columns:
    raise KeyError('pit_next target missing in features.')

features = [c for c in ['lap_sec','tyre_age','lap_delta','rolling_mean_3'] if c in df.columns]
df = df.dropna(subset=features + ['pit_next'])

seqs = []; targets = []
for (driver, stint), g in df.groupby(['Driver','Stint']):
    g = g.sort_values('LapNumber')
    vals = g[features].values
    t = g['pit_next'].values
    for i in range(len(vals)-1):
        start = max(0, i-SEQ_LEN+1)
        seq = vals[start:i+1]
        if seq.shape[0] < SEQ_LEN:
            pad = np.zeros((SEQ_LEN - seq.shape[0], seq.shape[1]))
            seq = np.vstack([pad, seq])
        seqs.append(seq)
        targets.append(t[i+1])

X = np.array(seqs); y = np.array(targets).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Scale per feature
n_feats = X.shape[2]
for i in range(n_feats):
    s = StandardScaler()
    s.fit(X_train[:,:,i].reshape(-1,1))
    X_train[:,:,i] = s.transform(X_train[:,:,i].reshape(-1,1)).reshape(-1, SEQ_LEN)
    X_test[:,:,i] = s.transform(X_test[:,:,i].reshape(-1,1)).reshape(-1, SEQ_LEN)

class Attention(Layer):
    def __init__(self, units):
        super(Attention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)
    def call(self, hidden_states):
        query = hidden_states[:, -1, :]
        query_with_time = tf.expand_dims(query, 1)
        score = tf.nn.tanh(self.W1(hidden_states) + self.W2(query_with_time))
        attention_weights = tf.nn.softmax(self.V(score), axis=1)
        context_vector = attention_weights * hidden_states
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector

inp = Input(shape=(SEQ_LEN, n_feats))
x = Bidirectional(LSTM(128, return_sequences=True))(inp)
context = Attention(64)(x)
x = Dense(64, activation='relu')(context)
x = Dropout(0.2)(x)
out = Dense(1, activation='sigmoid')(x)
model = Model(inputs=inp, outputs=out)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name='auc')])
model.summary()

es = EarlyStopping(patience=6, restore_best_weights=True, monitor='val_auc', mode='max')
ckpt = ModelCheckpoint(os.path.join(OUTDIR,'bilstm_best.h5'), save_best_only=True, monitor='val_auc', mode='max')
tb = TensorBoard(log_dir=LOGDIR)
history = model.fit(X_train, y_train, validation_split=0.1, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[es,ckpt,tb])

y_prob = model.predict(X_test).ravel()
y_pred = (y_prob > 0.5).astype(int)
auc = roc_auc_score(y_test, y_prob)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')

metrics = {'auc': float(auc), 'precision': float(precision), 'recall': float(recall), 'f1': float(f1)}
with open(os.path.join(OUTDIR, 'metrics.json'), 'w') as f:
    json.dump(metrics, f, indent=2)
print('Saved model and metrics to', OUTDIR)


OSError: [WinError 1114] A dynamic link library (DLL) initialization routine failed. Error loading "C:\Users\HP\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\torch\lib\c10.dll" or one of its dependencies.