In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import Dense
from sklearn.metrics import confusion_matrix
import seaborn as sns
from keras import Input
from sklearn.metrics import precision_recall_fscore_support
from tensorflow.keras.models import Model
from sklearn.model_selection import KFold
import os
import pickle

In [25]:
file_path = '../../Data/Verified/BaseLineLabeledWithoutSelfLabeledValues/'
file_name = 'RecordedDrivingData.csv'
k = 5
model_path_directory = '../../AIModels/BaselineModel/100Epochs/'

In [26]:
recorded_driving_dataframe = pd.read_csv(file_path + file_name)

recorded_driving_dataframe['Timestamp'] = pd.to_datetime(recorded_driving_dataframe['Timestamp'])

primary_data = recorded_driving_dataframe[['Lateral acceleration','Longitudinal acceleration']]
full_data = recorded_driving_dataframe
data_labels = recorded_driving_dataframe['Label']
data_labels = data_labels.loc[primary_data.index]

In [27]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(data_labels)

primary_data = primary_data.to_numpy()

In [28]:
k_fold = KFold(n_splits=k, shuffle=True, random_state=1)

accuracy_list = []
precision_list = []
recall_list = []
f1_list = []
specificity_list = []
confusion_matrix_list = []
step = 1

for indexes_of_train_data, indexes_of_test_data in k_fold.split(primary_data):
    print(f'Computing Fold number {step}')
    step = step + 1
    
    features_train, features_test = primary_data[indexes_of_train_data], primary_data[indexes_of_test_data]
    labels_train, labels_test = encoded_labels[indexes_of_train_data], encoded_labels[indexes_of_test_data]

    input_layer = Input(shape=(2,))
    dense_1 = Dense(16, activation='relu')(input_layer)
    dense_2 = Dense(16, activation='relu')(dense_1)
    dense_3 = Dense(4, activation='softmax')(dense_2)

    model = Model(inputs=input_layer, outputs=dense_3, name="BaselineComfortLevel")
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    model.fit(features_train, labels_train, epochs=100, batch_size=32, validation_data=(features_test, labels_test))

    tested_loss, tested_accuracy = model.evaluate(features_test, labels_test, verbose=0)
    accuracy_list.append(tested_accuracy)
    
    test_predictions = model.predict(features_test, verbose=0)
    test_labels_predictions = np.argmax(test_predictions, axis=1)

    precision, recall, f1, _ = precision_recall_fscore_support(labels_test, test_labels_predictions, average=None)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)

    confusion_m = confusion_matrix(labels_test, test_labels_predictions)
    confusion_matrix_list.append(confusion_m)
    
    specificity = []
    for i in range(len(confusion_m)):
        tn = np.sum(confusion_m) - (np.sum(confusion_m[:, i]) + np.sum(confusion_m[i, :]) - confusion_m[i, i])
        fp = np.sum(confusion_m[:, i]) - confusion_m[i, i]
        specificity.append(tn / (tn + fp))
    specificity_list.append(specificity)

In [29]:
average_accuracy = np.mean(accuracy_list)
average_precision = np.mean(precision_list, axis=0)
average_recall = np.mean(recall_list, axis=0)
average_f1 = np.mean(f1_list, axis=0)
average_specificity = np.mean(specificity_list, axis=0)

confusion_matrix_sum = np.sum(confusion_matrix_list, axis=0)
confusion_matrix_dataframe = pd.DataFrame(confusion_matrix_sum, index=label_encoder.classes_, columns=label_encoder.classes_)

plotting_order = ['Excellent', 'Acceptable', 'So and So', 'Uncomfortable']

confusion_matrix_dataframe = confusion_matrix_dataframe.loc[plotting_order, plotting_order]

plt.figure(figsize=(10, 7))
sns.heatmap(confusion_matrix_dataframe, annot=True, fmt='d', cmap='Blues')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Aggregated Confusion Matrix')
plt.show()

In [30]:
labels = label_encoder.inverse_transform([0, 1, 2, 3])
scores_dataframe = pd.DataFrame({
    'Label': labels,
    'Precision (%)': (average_precision * 100).round(2),
    'Recall (%)': (average_recall * 100).round(2),
    'F1-Score (%)': (average_f1 * 100).round(2),
    'Specificity (%)': (average_specificity * 100).round(2)
})
scores_dataframe['Label'] = pd.Categorical(scores_dataframe['Label'], categories=plotting_order, ordered=True)
scores_dataframe = scores_dataframe.sort_values('Label')
scores_dataframe = scores_dataframe.round(2)

fig, ax = plt.subplots()
ax.axis('tight')
ax.axis('off')
ax.table(cellText=scores_dataframe.values, colLabels=scores_dataframe.columns, cellLoc='center', loc='center', bbox=[0, 0, 1, 1])
plt.title('Classification Report', pad=20)
plt.show()

In [31]:
model.summary()

In [32]:
model_json_config = model.to_json()

os.makedirs(model_path_directory, exist_ok=True)

with open(model_path_directory + 'DrivingComfortabilityPredictingModel.json', 'w') as json_file:
    json_file.write(model_json_config)

model.save_weights(model_path_directory + 'DrivingComfortabilityPredictingModel.weights.h5')

with open(model_path_directory + 'DrivingComfortabilityPredictingModelHistory.pkl', 'wb') as f:
    pickle.dump(model.history, f)