In [1]:
from sqlalchemy import create_engine, func
from sqlalchemy.orm import sessionmaker
import sys
import os
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../")))
from DB.models import init_db, Circuit, Season, RacingWeekend, Driver, Session, SessionResult, Lap, TyreRaceData, Team, DriverTeamSession, TeamCircuitStats

# Initialize the database session
engine, db_session = init_db()


query = (
    db_session.query(
        RacingWeekend.year,
        Circuit.circuit_name,
        Session.session_type,
        Lap.driver_id,
        Lap.lap_num,
        Lap.tyre.label("current_tyre"),
        Lap.tyre_laps,
        Lap.lap_time,
        Lap.position,
        Lap.rainfall,
        Lap.pit.label("pit_stop"),
        TeamCircuitStats.pit_time.label("avg_pit_time"),
        TeamCircuitStats.quali_to_race_percent_diff
    )
    .join(Session, Lap.session_id == Session.session_id)
    .join(RacingWeekend, Session.weekend_id == RacingWeekend.racing_weekend_id)
    .join(Circuit, RacingWeekend.circut_id == Circuit.circuit_id)
    .join(TeamCircuitStats, Circuit.circuit_id == TeamCircuitStats.circuit_id)
    .filter(Session.session_type == "Race")  # Focus on race sessions
)

data = query.all()

In [2]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Convert query results to a DataFrame
df = pd.DataFrame(data)

# Group by race and driver
grouped = df.groupby(['year', 'circuit_name', 'driver_id'])

# Encode categorical variables
# Encode categorical variables
encoder = OneHotEncoder()  # Remove sparse=False
encoded_features = encoder.fit_transform(df[['circuit_name', 'current_tyre']]).toarray()  # Convert to dense array
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(['circuit_name', 'current_tyre']))

# Normalize numerical features
scaler = StandardScaler()
numerical_features = df[['lap_num', 'tyre_laps', 'lap_time', 'position', 'avg_pit_time', 'quali_to_race_percent_diff']]
scaled_features = scaler.fit_transform(numerical_features)

# Combine features
X = pd.concat([pd.DataFrame(scaled_features, columns=numerical_features.columns), encoded_df], axis=1)

# Labels
y_starting_tyre = grouped['current_tyre'].first()  # Starting tyre
y_pit_decisions = grouped.apply(lambda x: list(zip(x['pit_stop'], x['current_tyre'])))  # Pit stop and tyre choices

  y_pit_decisions = grouped.apply(lambda x: list(zip(x['pit_stop'], x['current_tyre'])))  # Pit stop and tyre choices


In [3]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding

# Prepare input sequences
max_laps = df['lap_num'].max()
sequences = []
labels = []

for _, group in grouped:
    # Extract features for the current group
    seq = group[['lap_num', 'tyre_laps', 'lap_time', 'position', 'avg_pit_time', 'quali_to_race_percent_diff']].values
    
    # Truncate sequences longer than max_laps
    if len(seq) > max_laps:
        seq = seq[:max_laps]
    
    # Pad sequences shorter than max_laps
    pad_width = max(0, max_laps - len(seq))  # Ensure non-negative padding
    seq = np.pad(seq, ((0, pad_width), (0, 0)), mode='constant')  # Pad to fixed length
    sequences.append(seq)
    
    # Prepare labels for the current group
    label = list(zip(group['pit_stop'], group['current_tyre']))
    
    # Truncate labels longer than max_laps
    if len(label) > max_laps:
        label = label[:max_laps]
    
    # Pad labels shorter than max_laps
    label = [(0, 0)] * (max_laps - len(label)) + label  # Pad labels
    labels.append(label)

# Convert lists to numpy arrays
X_seq = np.array(sequences)
y_seq = np.array(labels)



2025-01-30 18:15:23.961522: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738260923.985832  406834 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738260923.993750  406834 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-30 18:15:24.039346: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Build LSTM model
model = Sequential([
    LSTM(64, input_shape=(max_laps, X_seq.shape[2]), return_sequences=True),  # Return sequences
    Dense(32, activation='relu'),
    Dense(2, activation='softmax')  # Now outputs a sequence
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_seq, y_seq, epochs=10, batch_size=32)

model.save('model.h5')  # Saves the model in HDF5 format

I0000 00:00:1738260934.048902  406834 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1767 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6
  super().__init__(**kwargs)
W0000 00:00:1738260934.785048  407121 gpu_backend_lib.cc:579] Can't find libdevice directory ${CUDA_DIR}/nvvm/libdevice. This may result in compilation or runtime failures, if the program we try to run uses routines from libdevice.
Searched for CUDA in the following directories:
  ./cuda_sdk_lib
  ipykernel_launcher.runfiles/cuda_nvcc
  ipykern/cuda_nvcc
  
  /usr/local/cuda
  /home/ben/Individual_Project/env/lib/python3.10/site-packages/tensorflow/python/platform/../../../nvidia/cuda_nvcc
  /home/ben/Individual_Project/env/lib/python3.10/site-packages/tensorflow/python/platform/../../../../nvidia/cuda_nvcc
  /home/ben/Individual_Project/env/lib/python3.10/site-packages/tensorflow/python/platform/../../cuda
  .
You 

UnknownError: {{function_node __wrapped__Sign_device_/job:localhost/replica:0/task:0/device:GPU:0}} JIT compilation failed. [Op:Sign] name: 

In [43]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Flatten the true labels and predictions
y_true_flat = y_seq.reshape(-1, y_seq.shape[-1])  # Flatten true labels
y_pred_flat = model.predict(X_seq).reshape(-1, y_seq.shape[-1])  # Flatten predictions

# Convert probabilities to class labels (argmax)
y_true_classes = np.argmax(y_true_flat, axis=1)
y_pred_classes = np.argmax(y_pred_flat, axis=1)

In [44]:
from sklearn.metrics import confusion_matrix

# Compute the confusion matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)

# Extract TP, FP, FN, TN
# Assuming binary classification (class 0 and class 1)
TP = cm[1, 1]  # True Positives: Correctly predicted positive class
FP = cm[0, 1]  # False Positives: Incorrectly predicted positive class
FN = cm[1, 0]  # False Negatives: Incorrectly predicted negative class
TN = cm[0, 0]  # True Negatives: Correctly predicted negative class

# Print the results
print("Confusion Matrix:")
print(cm)
print("\nTrue Positives (TP):", TP)
print("False Positives (FP):", FP)
print("False Negatives (FN):", FN)
print("True Negatives (TN):", TN)

# Optionally, calculate additional metrics
precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0
accuracy = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

print("\nPrecision:", precision)
print("Recall:", recall)
print("Accuracy:", accuracy)
print("F1 Score:", f1_score)

# Test on new race

In [1]:
# Query the race details
year = 2023
round_number = 8
driver_id = 5

# Get the racing weekend ID
racing_weekend = (
    db_session.query(RacingWeekend)
    .filter_by(year=year, round=round_number)
    .first()
)

if not racing_weekend:
    raise ValueError("Racing weekend not found!")

weekend_id = racing_weekend.racing_weekend_id
circuit_id = racing_weekend.circut_id

# Get the circuit name
circuit_name = db_session.query(Circuit.circuit_name).filter_by(circuit_id=circuit_id).scalar()

# Get the team circuit stats for the driver's team
team_stats = (
    db_session.query(TeamCircuitStats)
    .join(DriverTeamSession, DriverTeamSession.team_id == TeamCircuitStats.team_id)
    .filter(
        DriverTeamSession.driver_id == driver_id,
        TeamCircuitStats.circuit_id == circuit_id
    )
    .first()
)

if not team_stats:
    raise ValueError("Team circuit stats not found!")

avg_pit_time = team_stats.pit_time
quali_to_race_percent_diff = team_stats.quali_to_race_percent_diff

# Get lap data for the race session
race_session = (
    db_session.query(Session)
    .filter_by(weekend_id=weekend_id, session_type="Race")
    .first()
)

if not race_session:
    raise ValueError("Race session not found!")

session_id = race_session.session_id

# Query lap data for the driver
lap_data = (
    db_session.query(Lap)
    .filter_by(session_id=session_id, driver_id=driver_id)
    .order_by(Lap.lap_num.asc())
    .all()
)

NameError: name 'db_session' is not defined

In [46]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Extract features from lap data
data = []
for lap in lap_data:
    data.append({
        "lap_num": lap.lap_num,
        "tyre_laps": lap.tyre_laps,
        "lap_time": lap.lap_time,
        "position": lap.position,
        "pit_stop": lap.pit,
        "current_tyre": lap.tyre,
        "rainfall": lap.rainfall,
        "avg_pit_time": avg_pit_time,
        "quali_to_race_percent_diff": quali_to_race_percent_diff,
        "circuit_name": circuit_name,
    })

df = pd.DataFrame(data)

# Encode categorical variables
encoder = OneHotEncoder()
encoded_features = encoder.fit_transform(df[['circuit_name', 'current_tyre']]).toarray()
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(['circuit_name', 'current_tyre']))

# Normalize numerical features
scaler = StandardScaler()
numerical_features = df[['lap_num', 'tyre_laps', 'lap_time', 'position', 'avg_pit_time', 'quali_to_race_percent_diff']]
scaled_features = scaler.fit_transform(numerical_features)

# Combine features
X = pd.concat([pd.DataFrame(scaled_features, columns=numerical_features.columns), encoded_df], axis=1)

# Pad or truncate the sequence to match max_laps
max_laps = 70  # Example value; adjust based on training data
pad_width = max(0, max_laps - len(X))
X_seq = np.pad(X.values, ((0, pad_width), (0, 0)), mode='constant')
X_seq = np.expand_dims(X_seq, axis=0)  # Add batch dimension

In [47]:
from tensorflow.keras.models import load_model
import numpy as np

# Load the trained model
model = load_model('model.h5')

# Select only the first 6 features (or the features used during training)
X_seq = X_seq[:, :, :6]

# Define the expected number of timesteps
max_laps = 87

# Adjust the number of timesteps
if X_seq.shape[1] < max_laps:
    # Pad if fewer timesteps
    pad_width = max_laps - X_seq.shape[1]
    X_seq_final = np.pad(X_seq, ((0, 0), (0, pad_width), (0, 0)), mode='constant')
else:
    # Truncate if more timesteps
    X_seq_final = X_seq[:, :max_laps, :]

# Make predictions
predictions = model.predict(X_seq_final)

# Interpret predictions
predicted_pit_stops = predictions[0, :, 0]  # First output: pit stop probabilities
predicted_tyres = predictions[0, :, 1]      # Second output: tyre choice probabilities

# Convert probabilities to binary decisions
pit_stop_decisions = (predicted_pit_stops > 0.5).astype(int)
tyre_choices = np.argmax(predicted_tyres, axis=1)  # Assuming softmax output

# Print results
for lap_num, pit_stop, tyre in zip(range(1, max_laps + 1), pit_stop_decisions, tyre_choices):
    if lap_num <= len(lap_data):  # Only consider actual laps
        print(f"Lap {lap_num}:")
        if pit_stop:
            print("  Pit stop predicted.")
            print(f"  Recommended tyre: {tyre}")
        else:
            print("  No pit stop predicted.")

2025-01-30 14:50:36.660118: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-01-30 14:50:36.662101: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-01-30 14:50:36.663470: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2025-01-30 14:50:37.020780: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2025-01-30 14:50:37.022687: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2025-01-30 14:50:37.025319: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

AxisError: axis 1 is out of bounds for array of dimension 1

In [None]:
for lap_num, pit_stop, tyre in zip(range(1, max_laps + 1), pit_stop_decisions, tyre_choices):
    if lap_num <= len(lap_data):  # Only consider actual laps
        print(f"Lap {lap_num}:")
        if pit_stop:
            print("  Pit stop predicted.")
            print(f"  Recommended tyre: {tyre}")
        else:
            print("  No pit stop predicted.")