In [None]:
import pandas as pd
import numpy as np

# Load the data and pivot so that each PID is a column
data = pd.read_csv('classified_driving_data.csv')
data_wide = data.pivot_table(index='interval', columns='PID', values='VALUE', aggfunc='mean')

Preprocessing data

In [None]:
from tensorflow import keras
from keras import Sequential

# Merge the classification column back to the df
classification = data[['interval', 'classification']].drop_duplicates()
data_wide = pd.merge(data_wide, classification, on='interval')

# Interval is not needed for training, and can be dropped here
X = data_wide.drop(columns=['classification', 'interval'])
y = keras.utils.to_categorical(data_wide['classification'].map({'Normal': 0, 'Slow': 1, 'Aggressive': 2}))

# Handle any missing values by filling with the mean of that column
X.fillna(X.mean(), inplace=True)

Preparing data for model training

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split the data and scale features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Logistic Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression

# Training the model
logreg_direct = LogisticRegression()
y_train_classes = np.argmax(y_train, axis=1)
logreg_direct.fit(X_train, y_train_classes)

# Prediction
y_pred_logreg_direct = logreg_direct.predict(X_test)
y_test_classes = np.argmax(y_test, axis=1)

Neural Network Model

In [None]:
model = Sequential()
model.add(keras.layers.Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(keras.layers.Dense(32, activation='relu'))
model.add(keras.layers.Dense(16, activation='relu'))
model.add(keras.layers.Dense(3, activation='softmax')) 

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Training the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

Model Results

In [None]:
from sklearn.metrics import classification_report, accuracy_score

print("Logistic Regression Accuracy:", accuracy_score(y_test_classes, y_pred_logreg_direct))
print("Logistic Regression Classification Report:\n", classification_report(y_test_classes, y_pred_logreg_direct))

print("Neural Network Accuracy:", accuracy_score(y_test_classes, y_pred_classes))
print("Classification Report:\n", classification_report(y_test_classes, y_pred_classes))

# Print what the majority of the training data was classified as
majority_class_index = np.argmax(np.sum(y_train, axis=0))
class_mapping = {0: 'normal', 1: 'slow', 2: 'aggressive'}
print("Based on the training data, the sample is generally classified as", class_mapping[majority_class_index], "driving.")