In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Read the dataset
data = pd.read_csv('sensor_raw2.csv')
window_size = 14

In [6]:
def train_model(data):
    X = []
    y = []

    # Select data only for DriverID = 1
    driver_data = data[data['DriverID'] == 'Driver-1']

    for _, task_data in driver_data.groupby('Class'):
        for i in range(0, len(task_data) - window_size):
            window = task_data.iloc[i:i + window_size]
            feature_vector = [window['AccX'].mean(),
                              window['AccY'].mean(),
                              window['AccZ'].mean(),
                              window['GyroX'].mean(),
                              window['GyroY'].mean(),
                              window['GyroZ'].mean()]
            X.append(feature_vector)
            y.append(window['Class'].values[0])

    model = RandomForestClassifier()
    model.fit(X, y)

    # Calculate and print model accuracy
    X_test = []
    y_test = []
    for _, task_data in driver_data.groupby('Class'):
        for i in range(len(task_data) - window_size, len(task_data)):
            window = task_data.iloc[i:i + window_size]
            feature_vector = [window['AccX'].mean(),
                              window['AccY'].mean(),
                              window['AccZ'].mean(),
                              window['GyroX'].mean(),
                              window['GyroY'].mean(),
                              window['GyroZ'].mean()]
            X_test.append(feature_vector)
            y_test.append(window['Class'].values[0])

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nModel Accuracy on DriverID = 1 data: {accuracy}")

    return model

def predict_classes(model, data, driver_id):
    class_counts = {1: 0, 2: 0, 3: 0, 4: 0}

    # Select data for the given DriverID
    driver_data = data[data['DriverID'] == f'Driver-{driver_id}']

    for i in range(0, len(driver_data) - window_size):
        window = driver_data.iloc[i:i + window_size]
        feature_vector = [window['AccX'].mean(),
                          window['AccY'].mean(),
                          window['AccZ'].mean(),
                          window['GyroX'].mean(),
                          window['GyroY'].mean(),
                          window['GyroZ'].mean()]
        predicted_class = int(model.predict([feature_vector])[0])
        class_counts[predicted_class] += 1

    print(f"\nActual Class Counts at the End of the Journey for DriverID = {driver_id}:")
    for class_label, count in class_counts.items():
        print(f"Class {class_label}: {count} times")

# Train the model on data for DriverID = 1
trained_model = train_model(data)

# Predict classes at the end of the journey for DriverID = 2 and DriverID = 3
predict_classes(trained_model, data, driver_id=2)
predict_classes(trained_model, data, driver_id=3)


Model Accuracy on DriverID = 1 data: 0.7678571428571429

Actual Class Counts at the End of the Journey for DriverID = 2:
Class 1: 122 times
Class 2: 88 times
Class 3: 125 times
Class 4: 14 times

Actual Class Counts at the End of the Journey for DriverID = 3:
Class 1: 199 times
Class 2: 98 times
Class 3: 27 times
Class 4: 45 times
