In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Flatten
from tensorflow.keras.layers import LSTM
from keras.callbacks import EarlyStopping
from sklearn.utils import class_weight
from keras.callbacks import History
from sklearn.metrics import confusion_matrix
from keras.layers import LSTM, Dropout, SimpleRNN
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras import models, layers, regularizers
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [None]:
df = pd.read_csv('f1dataset1.csv', encoding='utf-8')

# HANDLING NANs
df['laptime'] = np.where(df['laptime'].isna(), 0.0, df['laptime'])
df['race_progress'] = np.where(df['race_progress'].isna(), 0.0, df['race_progress'])
df['tyreageprogress'] = np.where(df['tyreageprogress'].isna(), 0.0, df['tyreageprogress'])

# shuffle data
shuffled_data = df.sample(frac=1, random_state=42)  # Set random_state for reproducibility

In [None]:
from tensorflow.keras.layers import Input, Dense, Dropout, Reshape, SimpleRNN, Flatten, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers

# Step 1 - Generate a subset from the shuffled dataset
subset_data = shuffled_data[['race_progress', 'tyreageprogress', 'is_leader', 'relativecompound', 'racetrackcat', 'fcystatus',
        'remaining_pit_stops', 'pursuer_tyre_change', 'close_ahead', 'pitstop']].sample(frac=0.1, random_state=42).copy()

# Step 1.2 - Separate input features (X) and target variable (y)
X = subset_data[['race_progress', 'tyreageprogress', 'is_leader', 'relativecompound', 'racetrackcat', 'fcystatus',
        'remaining_pit_stops', 'pursuer_tyre_change', 'close_ahead']].copy()
y = subset_data['pitstop'].copy()

# Step 1.3 - Separate categorical and numerical features
cat_features = ['is_leader', 'relativecompound', 'racetrackcat', 'fcystatus', 'remaining_pit_stops',
                'pursuer_tyre_change', 'close_ahead']
num_features = ['race_progress', 'tyreageprogress']

# Perform preprocessing on numerical features
scaler = StandardScaler()
X[num_features] = scaler.fit_transform(X[num_features])

# Perform preprocessing on categorical features
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
X_encoded = encoder.fit_transform(X[cat_features])

# Combine preprocessed numerical and categorical features
X_processed = np.concatenate((X_encoded, X[num_features]), axis=1)

# Encode the categorical labels into integer values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Calculate class frequencies
class_frequencies = np.bincount(y_encoded)
total_samples = np.sum(class_frequencies)

# Calculate class weights
class_weights = total_samples / (len(class_frequencies) * class_frequencies)
class_weights_dict = dict(enumerate(class_weights))


# Split the subset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_processed, y_encoded, test_size=0.1, random_state=42)

# Reshape X_processed to include the timestep dimension
X_processed_reshaped = np.reshape(X_processed, (X_processed.shape[0], X_processed.shape[1], 1))

# Build the FFNN model with L1 and L2 regularization
ffnn_model = tf.keras.models.Sequential()
ffnn_model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(X_processed.shape[1],),
                                     kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.001)))
ffnn_model.add(tf.keras.layers.Dropout(0.2))
ffnn_model.add(tf.keras.layers.Dense(64, activation='relu',
                                     kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.001)))
ffnn_model.add(tf.keras.layers.Dropout(0.2))
ffnn_model.add(tf.keras.layers.Dense(64, activation='relu',
                                     kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.001)))
ffnn_model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

# Build the RNN model
rnn_model = tf.keras.models.Sequential()
rnn_model.add(tf.keras.layers.SimpleRNN(64, input_shape=(X_processed.shape[1], 1),
                                        kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.001)))
rnn_model.add(tf.keras.layers.Dense(32, activation='relu',
                                    kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.001)))
rnn_model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

# Combine the FFNN and RNN models
combined_model_input = Input(shape=(X_processed.shape[1],))
ffnn_output = ffnn_model(combined_model_input)
rnn_input = Reshape((X_processed.shape[1], 1))(combined_model_input)
rnn_output = rnn_model(rnn_input)
combined_output = concatenate([ffnn_output, rnn_output])
combined_output = Dense(1, activation='sigmoid',
                        kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.001))(combined_output)

# Create the combined model
combined_model = Model(inputs=combined_model_input, outputs=combined_output)

# Compile the combined model
combined_model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['accuracy'])

# Define the EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Train the model with early stopping
history = History()
from imblearn.under_sampling import RandomUnderSampler

# Create an instance of RandomUnderSampler
under_sampler = RandomUnderSampler(random_state=42)

# Apply undersampling to X_train and y_train
X_train_resampled, y_train_resampled = under_sampler.fit_resample(X_train, y_train)

# Train the model using the resampled data
history = combined_model.fit(X_train_resampled, y_train_resampled,
                             class_weight=class_weights_dict,
                             batch_size=256,
                             epochs=10,
                             validation_split=0.1,
                             callbacks=[early_stopping, history])

In [None]:
from sklearn.metrics import classification_report

# Predict on the validation set
y_pred = combined_model.predict(X_val)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary predictions

# Convert y_val and y_pred to 1-dimensional arrays
y_val = y_val.ravel()
y_pred = y_pred.ravel()

# Generate the classification report
report = classification_report(y_val, y_pred, zero_division=1)
print(report)

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

p = precision_score(y_val, y_pred)
r = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

print("Precision:", p)
print("Recall", r)
print("F1 Score:", f1)

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve

# Calculate precision and recall
precision, recall, _ = precision_recall_curve(y_val, y_pred, pos_label=1)

# Plot precision-recall curve
plt.plot(recall, precision, color='b', label='Precision-Recall Curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.show()

In [None]:
# Preprocess the whole dataset using the same transformations
X_processed = np.concatenate((X_encoded, X[num_features]), axis=1)
X_reshaped = np.reshape(X_processed, (X_processed.shape[0], X_processed.shape[1], 1))

# Generate predictions on the whole dataset
y_pred = np.round(hybrid_model.predict(X_reshaped)).astype(int)

# Calculate the confusion matrix
conf = confusion_matrix(y_encoded, y_pred)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()


In [None]:
# Get the training and validation loss values from the history object
train_loss = history.history['loss']
val_loss = history.history['val_loss']

# Get the predicted values on the validation set
y_pred = combined_model.predict(X_val)

# Plot the training and validation loss
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

## tuning

In [None]:
import optuna
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from tensorflow.keras.callbacks import EarlyStopping, History
from tensorflow.keras.layers import Input, Dense, Dropout, Reshape, SimpleRNN, Flatten, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers
import numpy as np

# Step 1 - Generate a subset from the shuffled dataset
subset_data = shuffled_data[['race_progress', 'tyreageprogress', 'is_leader', 'relativecompound', 'racetrackcat', 'fcystatus',
        'remaining_pit_stops', 'pursuer_tyre_change', 'close_ahead', 'pitstop']].sample(frac=0.1, random_state=42).copy()

# Step 1.2 - Separate input features (X) and target variable (y)
X = subset_data[['race_progress', 'tyreageprogress', 'is_leader', 'relativecompound', 'racetrackcat', 'fcystatus',
        'remaining_pit_stops', 'pursuer_tyre_change', 'close_ahead']].copy()
y = subset_data['pitstop'].copy()

# Step 1.3 - Separate categorical and numerical features
cat_features = ['is_leader', 'relativecompound', 'racetrackcat', 'fcystatus', 'remaining_pit_stops',
                'pursuer_tyre_change', 'close_ahead']
num_features = ['race_progress', 'tyreageprogress']

# Perform preprocessing on numerical features
scaler = StandardScaler()
X[num_features] = scaler.fit_transform(X[num_features])

# Perform preprocessing on categorical features
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
X_encoded = encoder.fit_transform(X[cat_features])

# Combine preprocessed numerical and categorical features
X_processed = np.concatenate((X_encoded, X[num_features]), axis=1)

# Encode the categorical labels into integer values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Calculate class frequencies
class_frequencies = np.bincount(y_encoded)
total_samples = np.sum(class_frequencies)

# Calculate class weights
class_weights = total_samples / (len(class_frequencies) * class_frequencies)
class_weights_dict = dict(enumerate(class_weights))


# Split the subset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_processed, y_encoded, test_size=0.1, random_state=42)

# Reshape X_processed to include the timestep dimension
X_processed_reshaped = np.reshape(X_processed, (X_processed.shape[0], X_processed.shape[1], 1))


# Define the objective function for Optuna
def objective(trial):
    l1 = trial.suggest_float('l1', 0.001, 0.1, log=True)
    l2 = trial.suggest_float('l2', 0.001, 0.1, log=True)

    # Build the FFNN model with L1 and L2 regularization
    ffnn_model = tf.keras.models.Sequential()
    ffnn_model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(X_processed.shape[1],),
                                         kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2)))
    ffnn_model.add(tf.keras.layers.Dropout(0.2))
    ffnn_model.add(tf.keras.layers.Dense(64, activation='relu',
                                         kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2)))
    ffnn_model.add(tf.keras.layers.Dropout(0.2))
    ffnn_model.add(tf.keras.layers.Dense(64, activation='relu',
                                         kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2)))
    ffnn_model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    # Build the RNN model
    rnn_model = tf.keras.models.Sequential()
    rnn_model.add(tf.keras.layers.SimpleRNN(64, input_shape=(X_processed.shape[1], 1),
                                            kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2)))
    rnn_model.add(tf.keras.layers.Dense(32, activation='relu',
                                        kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2)))
    rnn_model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    # Combine the FFNN and RNN models
    combined_model_input = Input(shape=(X_processed.shape[1],))
    ffnn_output = ffnn_model(combined_model_input)
    rnn_input = Reshape((X_processed.shape[1], 1))(combined_model_input)
    rnn_output = rnn_model(rnn_input)
    combined_output = concatenate([ffnn_output, rnn_output])
    combined_output = Dense(1, activation='sigmoid',
                            kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2))(combined_output)

    # Create the combined model
    combined_model = Model(inputs=combined_model_input, outputs=combined_output)

    # Compile the combined model
    combined_model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['accuracy'])

    # Define the EarlyStopping callback
    early_stopping = EarlyStopping(monitor='val_loss', patience=3)

    # Train the model with early stopping
    history = History()
    from imblearn.under_sampling import RandomUnderSampler

    # Create an instance of RandomUnderSampler
    under_sampler = RandomUnderSampler(random_state=42)

    # Apply undersampling to X_train and y_train
    X_train_resampled, y_train_resampled = under_sampler.fit_resample(X_train, y_train)

    # Train the model using the resampled data
    combined_model.fit(X_train_resampled, y_train_resampled,
                       class_weight=class_weights_dict,
                       batch_size=256,
                       epochs=10,
                       validation_split=0.1,
                       callbacks=[early_stopping, history])

    # Calculate predictions on the validation set
    y_val_pred = combined_model.predict(X_val)
    y_val_pred_binary = np.round(y_val_pred)

    # Calculate the F1 score
    f1 = f1_score(y_val, y_val_pred_binary)

    # Return the F1 score as the objective value for Optuna
    return f1

# Create an Optuna study
study = optuna.create_study(direction='maximize')

# Run the hyperparameter search
study.optimize(objective, n_trials=100)

# Print the best hyperparameters and the best validation accuracy
print('Best hyperparameters:', study.best_params)
print('Best F1 score:', study.best_value)
