In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras import models, layers, regularizers
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras import models, layers, regularizers
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [None]:
# read df
df = pd.read_csv(r'f1dataset2.csv', encoding='utf-8')

In [None]:
# shuffle data
shuffled_data = df.sample(frac=1, random_state=42)  # Set random_state for reproducibility

In [None]:
# Generate a subset
subset_data = shuffled_data[['race_progress', 'remaining_pit_stops', 'relativecompound', 'location', 'fulfilled_second_compound', 'number_of_available_compounds']].sample(frac=0.1, random_state=42)

# Separate input features (X) and target variable (y)
X = subset_data[['race_progress', 'remaining_pit_stops', 'location', 'fulfilled_second_compound', 'number_of_available_compounds']]
y = subset_data['relativecompound']

# Separate categorical and numerical features
cat_features = ['remaining_pit_stops', 'location', 'fulfilled_second_compound', 'number_of_available_compounds']
num_features = ['race_progress']

# Perform preprocessing on numerical features
scaler = StandardScaler()
X[num_features] = scaler.fit_transform(X[num_features])

# Perform preprocessing on categorical features
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
X_encoded = encoder.fit_transform(X[cat_features])

# Combine preprocessed numerical and categorical features
X_processed = np.concatenate((X_encoded, X[num_features]), axis=1)

# Create an instance of LabelEncoder
label_encoder = LabelEncoder()

# Encode the categorical labels into integer values
y_encoded = label_encoder.fit_transform(y)

# Split the subset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_processed, y_encoded, test_size=0.1, random_state=42)

In [None]:
import optuna
from sklearn.metrics import accuracy_score

def objective(trial):
    # Define the hyperparameters to tune
    units = trial.suggest_int('units', 16, 256)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    optimizer_name = trial.suggest_categorical('optimizer', ['adam', 'rmsprop', 'nadam'])
    l2_reg = trial.suggest_loguniform('l2_reg', 1e-5, 1e-1)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])

    # Define the model
    model = models.Sequential()
    model.add(layers.Dense(units, activation='relu', input_shape=(X_processed.shape[1],),
                           kernel_regularizer=regularizers.l2(l2_reg)))
    model.add(layers.Dense(3, activation='softmax'))

    # Define the optimizer
    if optimizer_name == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer_name == 'rmsprop':
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
    elif optimizer_name == 'nadam':
        optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    else:
        raise ValueError(f"Unsupported optimizer: {optimizer_name}")

    # Compile the model
    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    # Train the model
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=50, verbose=0)

    # Evaluate the model on the validation set
    y_val_pred = model.predict(X_val)
    y_val_pred_binary = np.argmax(y_val_pred, axis=1)
    accuracy = accuracy_score(y_val, y_val_pred_binary)

    # Return the accuracy as the performance metric
    return accuracy


In [None]:
# Define the study name and storage location
study_name = "NN2"
storage = "sqlite:///NN2.db"
study = optuna.create_study(direction='maximize', study_name=study_name, storage=storage)
study.optimize(objective, n_trials=50)

# Get the best hyperparameters and their performance
best_params = study.best_params
best_accuracy = study.best_value

print("Best Hyperparameters:")
for key, value in best_params.items():
    print(f"{key}: {value}")

print(f"Best Accuracy: {best_accuracy:.2f}")


In [None]:
# from optuna.visualization import plot_optimization_history
# # Load the saved study
# study_name = "tc_tuning_2"
# storage = "sqlite:///tc_tuning2.db"
# study = optuna.load_study(study_name=study_name, storage=storage)

# # Plot the optimization history
# plot_optimization_history(study)

## using the tuned parameters

In [None]:
# Generate a subset
subset_data = shuffled_data[['race_progress', 'remaining_pit_stops', 'relativecompound', 'location', 'fulfilled_second_compound', 'number_of_available_compounds']].sample(frac=0.1, random_state=42)

# Separate input features (X) and target variable (y)
X = subset_data[['race_progress', 'remaining_pit_stops', 'location', 'fulfilled_second_compound', 'number_of_available_compounds']]
y = subset_data['relativecompound']

# Separate categorical and numerical features
cat_features = ['remaining_pit_stops', 'location', 'fulfilled_second_compound', 'number_of_available_compounds']
num_features = ['race_progress']

# Perform preprocessing on numerical features
scaler = StandardScaler()
X[num_features] = scaler.fit_transform(X[num_features])

# Perform preprocessing on categorical features
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
X_encoded = encoder.fit_transform(X[cat_features])

# Combine preprocessed numerical and categorical features
X_processed = np.concatenate((X_encoded, X[num_features]), axis=1)

# Create an instance of LabelEncoder
label_encoder = LabelEncoder()

# Encode the categorical labels into integer values
y_encoded = label_encoder.fit_transform(y)

# Split the subset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_processed, y_encoded, test_size=0.1, random_state=42)

model = models.Sequential()
model.add(layers.Dense(31, activation='relu', input_shape=(X_processed.shape[1],), kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.Dropout(0.2553398099761799))
model.add(layers.Dense(3, activation='softmax'))

# Compile the model with the specified learning rate
optimizer = tf.keras.optimizers.Nadam(learning_rate=0.0075533912129734)
model.compile(optimizer=optimizer,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=32, epochs=150)

In [None]:
# Get the accuracy values from the training history
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Plot the accuracy graph
epochs = range(1, len(accuracy) + 1)
plt.plot(epochs, accuracy, 'b', label='Training Accuracy')
plt.plot(epochs, val_accuracy, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# Evaluate the model on the validation set
_, accuracy = model.evaluate(X_val, y_val)

print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
# Get the accuracy values from the training history
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Plot the accuracy graph
epochs = range(1, len(accuracy) + 1)
plt.plot(epochs, accuracy, 'b', label='Training Accuracy')
plt.plot(epochs, val_accuracy, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()