In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [3]:
# data = pd.read_csv('https://drive.google.com/file/d/1cctWeqIBFhFpPxGGoDvu_rfwtVhQsXyr/view?usp=sharing') #Generated Dataset
# data = pd.read_csv('https://raw.githubusercontent.com/Ani-RudE/urop/main/RandomForest/sensor_raw.csv') #Original Dataset
data = pd.read_csv('sensor_raw2.csv') #Cleaning Dataset
window_size = 14

In [6]:
def extract_features(data):
    features = []
    labels = []
    driver_ratings = {}
    for driver_id, driver_data in data.groupby('DriverID'):
        X = []
        y = []
        for _, task_data in driver_data.groupby('Class'):
            for i in range(0, len(task_data) - window_size):
                window = task_data.iloc[i:i + window_size]
                feature_vector = []
                feature_vector.append(window['AccX'].mean())
                feature_vector.append(window['AccY'].mean())
                feature_vector.append(window['AccZ'].mean())
                feature_vector.append(window['GyroX'].mean())
                feature_vector.append(window['GyroY'].mean())
                feature_vector.append(window['GyroZ'].mean())
                X.append(feature_vector)
                y.append(window['Class'].values[0])
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        model = RandomForestClassifier()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(accuracy)
        driver_ratings[driver_id] = accuracy
    return driver_ratings

In [7]:
driver_ratings = extract_features(data)

for driver_id, accuracy in driver_ratings.items():
    if accuracy > 0.9:
        driver_rating = "Excellent"
    elif accuracy > 0.8:
        driver_rating = "Good"
    elif accuracy > 0.7:
        driver_rating = "Average"
    else:
        driver_rating = "Poor"

    print(f"{driver_id} Rating: {driver_rating}")

1.0
0.9838709677419355
0.8939393939393939
Driver-1 Rating: Excellent
Driver-2 Rating: Excellent
Driver-3 Rating: Good
