Import Libs

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

import numpy as np
import pandas as pd

from joblib import dump, load


from datetime import datetime

import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report

In [35]:
cotaData = pd.read_csv(r"C:\Users\lordw\OneDrive\Documents\ML\MLprojects\F1hackTx\backend\data\mercedes_cota_2022_2024.csv")
cotaData.columns

Index(['timestamp', 'season', 'driver', 'lap_number', 'position',
       'interval_gap', 'flag_status', 'incident_message', 'lap_time',
       'push_signal', 'tyre_compound', 'stint_lap_count', 'tyre_wear_pct',
       'tyre_temp_C', 'engine_power_pct', 'throttle_pct', 'speed_kph',
       'drs_status', 'weather_condition', 'rainfall_mm', 'air_temperature_C',
       'fuel_load_kg'],
      dtype='object')

In [36]:
cotaData['timestamp'] = pd.to_datetime(cotaData['timestamp'])
cotaData = cotaData.sort_values('timestamp').reset_index(drop=True)

In [37]:
# --- Step 0: Ensure binary target ---
# Convert rainfall_mm to 0/1
cotaData['rain_binary'] = (cotaData['rainfall_mm'] > 0).astype(int)

# --- Parameters ---
window_size = 10      # number of laps in your window
lag = 10              # how many laps ahead to predict
rain_col = 'rain_binary'
predictor_cols = ['rainfall_mm', 'air_temperature_C']

features = []
targets = []

# --- Build windowed features and targets ---
for i in range(len(cotaData) - window_size - lag + 1):
    # Current window
    window_start = i
    window_end = i + window_size
    window = cotaData.iloc[window_start:window_end]
    
    # Flatten features across the window
    X_window = window[predictor_cols].values.flatten()
    
    # Target: rain 10 laps ahead
    target_idx = window_end + lag - 1
    y_window = cotaData.iloc[target_idx][rain_col]
    
    features.append(X_window)
    targets.append(y_window)

X = np.array(features)
y = np.array(targets)

# --- Train/test split (time series aware) ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# --- Fit Logistic Regression ---
model = LogisticRegression(max_iter=500)
model.fit(X_train, y_train)

# --- Predictions ---
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 score:", f1_score(y_test, y_pred))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))
# --- Optional: class distribution check ---
print("Train class distribution:", np.bincount(y_train))
print("Test class distribution:", np.bincount(y_test))

Accuracy: 1.0
F1 score: 0.0
Confusion matrix:
 [[64]]
Train class distribution: [226  27]
Test class distribution: [64]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [None]:
# Save the model
dump(model, 'logistic_model.joblib')