In [8]:
#IMPORTS

import math
import time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
import sklearn
from sklearn.metrics import f1_score
import seaborn as sns
import tensorflow as tf
import sklearn.metrics as metrics
from sklearn import tree
from sklearn.svm import SVC
from tensorflow import keras
from keras.models import Sequential
from keras.callbacks import History
from keras.layers import Dense
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree, export_text
from sklearn.pipeline import Pipeline
from sklearn import decomposition, datasets, linear_model
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, mean_squared_error, precision_recall_curve

In [2]:
# df
df = pd.read_csv(r"f1dataset2.csv", encoding='utf-8')

# shuffle data
df.sample(frac=1, random_state=42)  # Set random_state for reproducibility

Unnamed: 0,race_id,lapno,position,driver_id,laptime,racetime,gap,interval,compound,tireage,...,availablecompounds,nolaps,nolapsplanned,tracklength,result_position,race_progress,remaining_pit_stops,relativecompound,fulfilled_second_compound,number_of_available_compounds
5137,10,39,8,2,81.409,3359.370,53.270,4.213,A4,4,...,"A3,A4,I,W",67,67,4574.0,6.0,0.591911,3,soft,True,2
12971,24,22,3,1,91.539,2053.765,7.592,0.829,A2,9,...,"A1,A2,I,W",66,66,4655.0,2.0,0.337230,3,medium,True,2
15602,30,17,9,11,116.247,2018.568,27.143,0.870,A3,6,...,"A2,A3,I,W",43,44,7004.0,7.0,0.397662,3,soft,True,2
32042,64,61,3,2,85.713,5402.691,68.730,64.496,A3,23,...,"A1,A2,A3,I,W",66,66,4655.0,3.0,0.926337,3,soft,True,3
61152,121,46,6,12,101.104,4798.425,67.134,9.043,A6,8,...,"A4,A6,A7,I,W",55,55,5554.0,5.0,0.840344,3,soft,True,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54343,108,50,4,27,106.204,4819.198,32.964,11.191,A3,30,...,"A3,A4,A6,I,W",53,53,5842.0,4.0,0.943811,3,hard,True,3
38158,77,29,9,12,81.954,2432.185,71.501,0.779,A3,28,...,"A3,A4,A5,I,W",71,71,4304.0,4.0,0.415287,3,soft,True,3
860,2,51,6,10,106.140,5576.819,91.814,44.688,A1,12,...,"A1,A2,I,W",56,56,5543.0,6.0,0.912786,3,hard,True,2
15795,30,34,6,9,115.180,4026.454,32.002,0.946,A2,13,...,"A2,A3,I,W",43,44,7004.0,6.0,0.793285,3,medium,True,2


In [3]:
label_encode = LabelEncoder()
df['relativecompound'] = label_encode.fit_transform(df['relativecompound']) #hard = 0 , med = 1 , soft = 2

In [None]:
# Separate categorical and numerical features
cat_features = ['remaining_pit_stops', 'location', 'fulfilled_second_compound', 'number_of_available_compounds']
num_features = ['race_progress']

# Splitting the data into input features (X) and pit stop labels (y)
x_cat = df[cat_features]
x_num = df[num_features]
X = pd.concat([x_cat, x_num], axis=1)
y = df['relativecompound']

# One-hot encoding categorical features
encoder = OneHotEncoder()
X_encoded = encoder.fit_transform(x_cat).toarray()

# Combining encoded categorical features with numerical features
X = np.concatenate((X_encoded, x_num), axis=1)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## SVM

In [6]:
# Train the SVM classifier
svm = SVC(kernel='linear')
svm.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svm.predict(X_test_scaled)

In [9]:
f1_svm = f1_score(y_test, y_pred, average='weighted')
print("F1 score:", f1_svm*100)

F1 score: 65.14339306490166


## DT

In [10]:
# Train the decision tree classifier
tree = DecisionTreeClassifier()
tree.fit(X_train_scaled, y_train)

# Make predictions
y_pred = tree.predict(X_test_scaled)

In [11]:
f1_dt = f1_score(y_test, y_pred, average='weighted')
print("F1 score:", f1_dt*100)

F1 score: 77.3643342910298


## LR

In [12]:
# Train the logistic regression model with increased max_iter
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_scaled, y_train)

# Make predictions
y_pred = lr.predict(X_test_scaled)

In [13]:
f1_lr = f1_score(y_test, y_pred, average='weighted')
print("F1 score:", f1_lr * 100)

F1 score: 68.66986117320333


## NN

In [24]:
# Create an instance of LabelEncoder
label_encoder = LabelEncoder()

# Fit label encoder on y_train and transform y_train and y_test
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Determine the number of classes (unique labels)
num_classes = len(label_encoder.classes_)

# Define the neural network architecture
model = Sequential()
model.add(Dense(16, activation='relu', input_dim=X_train_scaled.shape[1]))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Sigmoid activation for binary classification

# Compile the model with binary cross-entropy loss
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model using the encoded labels
model.fit(X_train_scaled, y_train_encoded, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test_encoded))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x24e9a2a5520>

In [26]:
# Make predictions
y_pred_prob = model.predict(X_test_scaled)
y_pred_labels = np.argmax(y_pred_prob, axis=1)  

# Calculate the F1 score
f1_nn = f1_score(y_test_encoded, y_pred_labels, average='weighted')
print("F1 score:", f1_nn * 100)

F1 score: 1.6287791849598503
