In [13]:
# %%
from sqlalchemy import create_engine, func
from sqlalchemy.orm import sessionmaker
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../")))
from DB.models import init_db, Circuit, Season, RacingWeekend, Driver, Session, SessionResult, Lap, TyreRaceData, Team, DriverTeamSession, TeamCircuitStats

from sqlalchemy.orm import joinedload
import pandas as pd

# Initialize the database session
engine, db_session = init_db()

# Query lap data with joins to get relevant features
laps_query = (
    db_session.query(
        Lap.lap_num,
        Lap.position,
        Lap.session_id,
        Lap.driver_id,
        Lap.tyre,
        Lap.pit,
        Lap.lap_time,
        Lap.rainfall,
        TyreRaceData.a,
        TyreRaceData.b,
        TyreRaceData.c,
        Session.wet.label("session_wet")
    )
    .join(Session, Lap.session_id == Session.session_id)
    .join(TyreRaceData, (Lap.session_id == TyreRaceData.race_id) & (Lap.driver_id == TyreRaceData.driver_id))
    .filter(Session.session_type == "Race")
)

# Convert query results to a DataFrame
data = pd.read_sql(laps_query.statement, laps_query.session.bind)

(data)

Unnamed: 0,lap_num,position,session_id,driver_id,tyre,pit,lap_time,rainfall,a,b,c,session_wet
0,1,17,5,1,2,False,105.008,False,-0.001907,0.155377,0.171162,False
1,1,17,5,1,2,False,105.008,False,0.003444,-0.163703,3.102967,False
2,2,16,5,1,2,False,91.585,False,-0.001907,0.155377,0.171162,False
3,2,16,5,1,2,False,91.585,False,0.003444,-0.163703,3.102967,False
4,3,16,5,1,2,False,91.406,False,-0.001907,0.155377,0.171162,False
...,...,...,...,...,...,...,...,...,...,...,...,...
203983,54,10,608,47,3,False,87.731,False,-0.001961,0.124354,0.323352,False
203984,55,10,608,47,3,False,87.781,False,-0.001961,0.124354,0.323352,False
203985,56,10,608,47,3,False,87.816,False,-0.001961,0.124354,0.323352,False
203986,57,10,608,47,3,False,88.554,False,-0.001961,0.124354,0.323352,False


In [14]:
# Add cumulative tyre wear feature
data['tyre_wear'] = data.groupby(['session_id', 'driver_id', 'tyre'])['lap_time'].cumsum()

# Add position change feature (example: difference from previous lap)
data['position_change'] = data.groupby(['session_id', 'driver_id'])['position'].diff().fillna(0)

# Encode tyre types as categorical variables
data['tyre'] = data['tyre'].map({0: 'Soft', 1: 'Medium', 2: 'Hard'})


# Identify pit stops
data['next_tyre'] = data.groupby(['session_id', 'driver_id'])['tyre'].shift(-1)
data['pit_stop_lap'] = data['lap_num'].where(data['pit']).ffill()

# Drop rows where labels are NaN
data = data.dropna(subset=['next_tyre', 'pit_stop_lap'])

In [15]:
from sklearn.model_selection import train_test_split

# Features and labels
X = data[['lap_num', 'tyre_wear', 'position_change', 'rainfall', 'session_wet', 'a', 'b', 'c']]
y_tyre = data['next_tyre']
y_pit_lap = data['pit_stop_lap']

# Train-test split
X_train, X_test, y_tyre_train, y_tyre_test, y_pit_lap_train, y_pit_lap_test = train_test_split(
    X, y_tyre, y_pit_lap, test_size=0.2, random_state=42
)

In [16]:
from sklearn.preprocessing import LabelEncoder

# Encode tyre types
tyre_encoder = LabelEncoder()
y_tyre_train_encoded = tyre_encoder.fit_transform(y_tyre_train)
y_tyre_test_encoded = tyre_encoder.transform(y_tyre_test)

In [17]:
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Combine targets into a single DataFrame
y_train = pd.DataFrame({'tyre': y_tyre_train_encoded, 'pit_lap': y_pit_lap_train})
y_test = pd.DataFrame({'tyre': y_tyre_test_encoded, 'pit_lap': y_pit_lap_test})

# Train multi-output model
model = MultiOutputClassifier(RandomForestClassifier(random_state=42))
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)


In [21]:
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    precision_score,
    recall_score,
    f1_score
)

# Evaluate Tyre Prediction
accuracy_tyre = accuracy_score(y_test['tyre'], y_pred[:, 0])
conf_matrix_tyre = confusion_matrix(y_test['tyre'], y_pred[:, 0])
report_tyre = classification_report(y_test['tyre'], y_pred[:, 0], target_names=tyre_encoder.classes_)

# Evaluate Pit Lap Prediction
accuracy_pit_lap = accuracy_score(y_test['pit_lap'], y_pred[:, 1])
conf_matrix_pit_lap = confusion_matrix(y_test['pit_lap'], y_pred[:, 1])

# Print Results
print("Tyre Prediction:")
print(f"Accuracy: {accuracy_tyre}")
print("Confusion Matrix:")
print(conf_matrix_tyre)
print("Classification Report:")
print(report_tyre)

print("\nPit Lap Prediction:")
print(f"Accuracy: {accuracy_pit_lap}")
print("Confusion Matrix:")
print(conf_matrix_pit_lap)

Tyre Prediction:
Accuracy: 0.9574402670414617
Confusion Matrix:
[[15175    98]
 [  871  6624]]
Classification Report:
              precision    recall  f1-score   support

        Hard       0.95      0.99      0.97     15273
      Medium       0.99      0.88      0.93      7495

    accuracy                           0.96     22768
   macro avg       0.97      0.94      0.95     22768
weighted avg       0.96      0.96      0.96     22768


Pit Lap Prediction:
Accuracy: 0.9647312016865777
Confusion Matrix:
[[198   0   0 ...   0   0   0]
 [  0 332   0 ...   0   0   0]
 [  0   3 146 ...   0   0   0]
 ...
 [  0   0   0 ...  39   0   0]
 [  0   0   0 ...   0  12   0]
 [  0   0   0 ...   0   0  19]]


In [26]:
# %%
from sqlalchemy.orm import joinedload

# Step 1: Query Qualifying Positions for Race 2 of 2024
quali_query = (
    db_session.query(
        SessionResult.position.label("quali_position"),
        Driver.driver_name,
        Driver.driver_id,
        Session.session_id
    )
    .join(Driver, SessionResult.driver_id == Driver.driver_id)
    .join(Session, SessionResult.session_id == Session.session_id)
    .join(RacingWeekend, Session.weekend_id == RacingWeekend.racing_weekend_id)
    .filter(RacingWeekend.year == 2024, RacingWeekend.round == 2, Session.session_type == "Qualifying")
)

quali_data = pd.read_sql(quali_query.statement, quali_query.session.bind)

# Get HAM's qualifying position and driver ID
ham_quali = quali_data[quali_data['driver_name'] == "Lewis Hamilton"]
ham_driver_id = ham_quali['driver_id'].values[0]
ham_quali_position = ham_quali['quali_position'].values[0]

print(f"HAM Qualifying Position: {ham_quali_position}")

# Step 2: Query Lap Data for Race 2 of 2024
race_lap_query = (
    db_session.query(
        Lap.lap_num,
        Lap.position,
        Lap.session_id,
        Lap.driver_id,
        Lap.tyre,
        Lap.pit,
        Lap.lap_time,
        Lap.rainfall,
        TyreRaceData.a,
        TyreRaceData.b,
        TyreRaceData.c,
        Session.wet.label("session_wet")
    )
    .join(Session, Lap.session_id == Session.session_id)
    .join(TyreRaceData, (Lap.session_id == TyreRaceData.race_id) & (Lap.driver_id == TyreRaceData.driver_id))
    .join(RacingWeekend, Session.weekend_id == RacingWeekend.racing_weekend_id)
    .filter(RacingWeekend.year == 2024, RacingWeekend.round == 6, Session.session_type == "Race")
)

race_lap_data = pd.read_sql(race_lap_query.statement, race_lap_query.session.bind)

# Filter for HAM's data
ham_lap_data = race_lap_data[race_lap_data['driver_id'] == ham_driver_id]

# Add cumulative tyre wear feature
ham_lap_data['tyre_wear'] = ham_lap_data.groupby(['session_id', 'driver_id', 'tyre'])['lap_time'].cumsum()

# Add position change feature
ham_lap_data['position_change'] = ham_lap_data.groupby(['session_id', 'driver_id'])['position'].diff().fillna(0)

# Encode tyre types as categorical variables
ham_lap_data['tyre'] = ham_lap_data['tyre'].map({0: 'Soft', 1: 'Medium', 2: 'Hard'})

# Prepare input features for prediction
ham_input = ham_lap_data[['lap_num', 'tyre_wear', 'position_change', 'rainfall', 'session_wet', 'a', 'b', 'c']]

# Step 3: Predict Pit Stops and Tyre Choices for HAM
ham_predictions = model.predict(ham_input)

# Decode tyre predictions
predicted_tyres = tyre_encoder.inverse_transform(ham_predictions[:, 0].astype(int))  # Ensure integer type
predicted_pit_laps = ham_predictions[:, 1]

# Combine predictions with lap data
ham_lap_data['predicted_tyre'] = predicted_tyres
ham_lap_data['predicted_pit_lap'] = predicted_pit_laps

# Display predictions
print("\nHAM Predictions:")
# Ensure all rows are displayed (no truncation)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000)
(ham_lap_data[['lap_num', 'predicted_tyre', 'predicted_pit_lap']])

HAM Qualifying Position: 8

HAM Predictions:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ham_lap_data['tyre_wear'] = ham_lap_data.groupby(['session_id', 'driver_id', 'tyre'])['lap_time'].cumsum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ham_lap_data['position_change'] = ham_lap_data.groupby(['session_id', 'driver_id'])['position'].diff().fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-v

Unnamed: 0,lap_num,predicted_tyre,predicted_pit_lap
1191,1,Hard,3.0
1192,2,Hard,35.0
1193,3,Hard,35.0
1194,4,Hard,35.0
1195,5,Hard,35.0
1196,6,Hard,35.0
1197,7,Hard,35.0
1198,8,Hard,35.0
1199,9,Hard,35.0
1200,10,Hard,35.0
