In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras import models, layers, regularizers
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [None]:
# read df
df = pd.read_csv(r'f1dataset2.csv', encoding='utf-8')

# shuffle data
shuffled_data = df.sample(frac=1, random_state=42)  # Set random_state for reproducibility

In [None]:
# Generate a subset
subset_data = shuffled_data[['race_progress', 'remaining_pit_stops', 'relativecompound', 'location', 'fulfilled_second_compound', 'number_of_available_compounds']].sample(frac=0.1, random_state=42)

# Separate input features (X) and target variable (y)
X = subset_data[['race_progress', 'remaining_pit_stops', 'location', 'fulfilled_second_compound', 'number_of_available_compounds']]
y = subset_data['relativecompound']

# Separate categorical and numerical features
cat_features = ['remaining_pit_stops', 'location', 'fulfilled_second_compound', 'number_of_available_compounds']
num_features = ['race_progress']

# Perform preprocessing on numerical features
scaler = StandardScaler()
X[num_features] = scaler.fit_transform(X[num_features])

# Perform preprocessing on categorical features
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
X_encoded = encoder.fit_transform(X[cat_features])

# Combine preprocessed numerical and categorical features
X_processed = np.concatenate((X_encoded, X[num_features]), axis=1)

# Create an instance of LabelEncoder
label_encoder = LabelEncoder()

# Encode the categorical labels into integer values
y_encoded = label_encoder.fit_transform(y)

# Split the subset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_processed, y_encoded, test_size=0.1, random_state=42)

# Define the model
model = models.Sequential()
model.add(layers.Dense(32, activation='relu', input_shape=(X_processed.shape[1],), kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.Dense(3, activation='softmax'))

# Compile the model
model.compile(optimizer='nadam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=256, epochs=100)

In [None]:
# Evaluate the model on the validation set
_, accuracy = model.evaluate(X_val, y_val)

print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
# Calculate the predicted labels for the validation set
y_val_pred_labels = np.argmax(y_val_pred, axis=1)

# Calculate precision, recall, and F1 score
precision = precision_score(y_val, y_val_pred_labels, average='weighted')
recall = recall_score(y_val, y_val_pred_labels, average='weighted')
f1 = f1_score(y_val, y_val_pred_labels, average='weighted')

print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


In [None]:
# Get the accuracy values from the training history
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Plot the accuracy graph
epochs = range(1, len(accuracy) + 1)
plt.plot(epochs, accuracy, 'b', label='Training Accuracy')
plt.plot(epochs, val_accuracy, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix

# Make predictions on the validation set
y_val_pred_prob = model.predict(X_val)
y_val_pred = np.argmax(y_val_pred_prob, axis=1)

# Create a confusion matrix
cm = confusion_matrix(y_val, y_val_pred)

# Print the confusion matrix
print("Confusion Matrix:")
print(cm)


In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

#hard = 0 , med = 1 , soft = 2
# Make predictions on the validation set
y_val_pred_prob = model.predict(X_val)
y_val_pred = np.argmax(y_val_pred_prob, axis=1)

# Create a confusion matrix
cm = confusion_matrix(y_val, y_val_pred)

# Plot the confusion matrix using seaborn
labels = np.unique(y_val)
sns.heatmap(cm, annot=True, cmap='Blues', fmt='d', xticklabels=labels, yticklabels=labels)

# Add labels and title to the plot
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')

# Display the plot
plt.show()
