# Imports

In [64]:
import sys
sys.path.append('.')
sys.path.append('..')

import scipy
import numpy as np
from tqdm import tqdm
from scipy.special import softmax
from scipy.stats import skew, kurtosis
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from src.datasets.supervised_dataset import SupervisedDataset
from src.models.label_encoder import LabelEncoder

In [48]:
data_dir = '../data/'
features = ['accelerometerAccelerationX(G)', 
            'accelerometerAccelerationY(G)',
            'accelerometerAccelerationZ(G)', 
            'motionYaw(rad)', 
            'motionRoll(rad)',
            'motionPitch(rad)', 
            'motionRotationRateX(rad/s)',
            'motionRotationRateY(rad/s)', 
            'motionRotationRateZ(rad/s)',
            'motionUserAccelerationX(G)', 
            'motionUserAccelerationY(G)',
            'motionUserAccelerationZ(G)', 
            'motionQuaternionX(R)',
            'motionQuaternionY(R)', 
            'motionQuaternionZ(R)', 
            'motionQuaternionW(R)',
            'motionGravityX(G)', 
            'motionGravityY(G)', 
            'motionGravityZ(G)'
]

# Compute Features

In [49]:
def compute_features(dataset):
    X = []
    y = []
    
    for sequence, label in dataset:
        # Compute the statistics for each column
        _min = np.min(sequence.numpy(), axis=0)
        _max = np.max(sequence.numpy(), axis=0)
        _std = np.std(sequence.numpy(), axis=0)
        _mean = np.mean(sequence.numpy(), axis=0)
        _skew = skew(sequence.numpy(), axis=0)
        _kurtosis = kurtosis(sequence.numpy(), axis=0)
        feats = np.concatenate((_min, _max, _std, _mean, _skew, _kurtosis)).reshape(1, -1)
        X.append(feats)
        y.append(label.item())
    
    return np.concatenate(X), np.array(y)

# Evaluate

In [50]:
def evaluate(y_true, y_pred, probs):
    f1 = f1_score(y_true, y_pred, average = 'macro')
    roc_auc_ovr = roc_auc_score(y_true, probs, multi_class='ovr')
    roc_auc_ovo = roc_auc_score(y_true, probs, multi_class='ovo')
    return f1, roc_auc_ovr, roc_auc_ovo

# KNN

In [51]:
k = 5
knn = KNeighborsClassifier(n_neighbors=k)

# Random Forest

In [52]:
n_estimators = 141
max_depth = 16
seed = 42

random_forest = RandomForestClassifier(n_estimators=n_estimators, 
                                    max_depth=max_depth, 
                                    random_state=seed)

# SVM

In [53]:
svc = make_pipeline(StandardScaler(), SVC(gamma='auto'))

# LOOCV

In [59]:
import random

def user_splits(seed=42):
    random.seed(seed)
    
    # List of users excluding user50 since it will always be in the training set
    users = ["user51", "user52", "user53", "user54", "user55", "user56", "user57", "user58", "user59", "user60"]
    splits = []
    
    # Loop over each user to be used as the test user
    for test_user in users:
        remaining_users = [user for user in users if user != test_user]
        # Shuffle the remaining users for random splits
        random.shuffle(remaining_users)
        # First 6 users (including user50) for training, next 4 for validation
        train_users = ['user50'] + remaining_users[:5]
        val_users = remaining_users[5:9]
        splits.append((train_users, val_users, [test_user]))
    return splits

print(user_splits(seed=0))

[(['user50', 'user59', 'user57', 'user53', 'user55', 'user56'], ['user54', 'user52', 'user60', 'user58'], ['user51']), (['user50', 'user54', 'user51', 'user55', 'user60', 'user58'], ['user53', 'user56', 'user57', 'user59'], ['user52']), (['user50', 'user54', 'user51', 'user55', 'user60', 'user58'], ['user57', 'user59', 'user56', 'user52'], ['user53']), (['user50', 'user59', 'user58', 'user60', 'user53', 'user51'], ['user56', 'user55', 'user57', 'user52'], ['user54']), (['user50', 'user52', 'user58', 'user57', 'user51', 'user53'], ['user56', 'user60', 'user59', 'user54'], ['user55']), (['user50', 'user53', 'user55', 'user60', 'user54', 'user51'], ['user59', 'user57', 'user58', 'user52'], ['user56']), (['user50', 'user53', 'user54', 'user51', 'user60', 'user58'], ['user55', 'user59', 'user52', 'user56'], ['user57']), (['user50', 'user56', 'user57', 'user59', 'user51', 'user54'], ['user55', 'user53', 'user60', 'user52'], ['user58']), (['user50', 'user54', 'user51', 'user58', 'user53', 'us

In [60]:
knn_results = {'f1': [], 'auc_ovr': [], 'auc_ovo': []}
random_forest_results = {'f1': [], 'auc_ovr': [], 'auc_ovo': []}
svm_results = {'f1': [], 'auc_ovr': [], 'auc_ovo': []}

for train_users, _, test_users in tqdm(user_splits(seed=0)):
    # Load data:
    label_encoder = LabelEncoder()
    train_data = SupervisedDataset(data_dir, train_users, features, label_encoder, normalize=False)
    test_data = SupervisedDataset(data_dir, test_users, features, label_encoder, normalize=False)

    X_train, y_train = compute_features(train_data)
    X_test, y_test = compute_features(test_data)

    # KNN:
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    probs = knn.predict_proba(X_test)
    f1, roc_auc_ovr, roc_auc_ovo = evaluate(y_test, y_pred, probs)
    knn_results['f1'].append(f1)
    knn_results['auc_ovr'].append(roc_auc_ovr)
    knn_results['auc_ovo'].append(roc_auc_ovo)

    # Random Forest:
    random_forest.fit(X_train, y_train)
    y_pred = random_forest.predict(X_test)
    probs = random_forest.predict_proba(X_test)
    f1, roc_auc_ovr, roc_auc_ovo = evaluate(y_test, y_pred, probs)
    random_forest_results['f1'].append(f1)
    random_forest_results['auc_ovr'].append(roc_auc_ovr)
    random_forest_results['auc_ovo'].append(roc_auc_ovo)

    # SVM:
    svc.fit(X_train, y_train)
    y_pred = svc.predict(X_test)
    probs = svc.decision_function(X_test)
    probs = softmax(probs, axis=1)
    f1, roc_auc_ovr, roc_auc_ovo = evaluate(y_test, y_pred, probs)
    svm_results['f1'].append(f1)
    svm_results['auc_ovr'].append(roc_auc_ovr)
    svm_results['auc_ovo'].append(roc_auc_ovo)

100%|██████████| 10/10 [01:53<00:00, 11.36s/it]


# Results

In [62]:
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, m-h, m+h

def print_results(method_name, results):
    mean, low, high = mean_confidence_interval(results['f1'])
    print("{} F1: {:.2f} ({:.2f}-{:.2f})".format(method_name, mean, low, high))

    mean, low, high = mean_confidence_interval(results['auc_ovr'])
    print("{} AUC (OVR): {:.2f} ({:.2f}-{:.2f})".format(method_name, mean, low, high))

    mean, low, high = mean_confidence_interval(results['auc_ovo'])
    print("{} AUC (OV0): {:.2f} ({:.2f}-{:.2f})".format(method_name, mean, low, high))

### KNN

In [66]:
print_results('KNN', knn_results)

KNN F1: 0.42 (0.37-0.48)
KNN AUC (OVR): 0.72 (0.68-0.77)
KNN AUC (OV0): 0.72 (0.68-0.77)


### Random Forest

In [67]:
print_results('Random Forest', random_forest_results)

Random Forest F1: 0.52 (0.43-0.61)
Random Forest AUC (OVR): 0.85 (0.82-0.88)
Random Forest AUC (OV0): 0.85 (0.82-0.88)


### SVM

In [68]:
print_results('SVM', svm_results)

SVM F1: 0.50 (0.42-0.57)
SVM AUC (OVR): 0.83 (0.80-0.87)
SVM AUC (OV0): 0.83 (0.80-0.87)
