# Imports

In [3]:
import sys
sys.path.append('.')
sys.path.append('..')

import os
import json
import scipy
import numpy as np
from tqdm import tqdm
from scipy.special import softmax
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import label_binarize
from sklearn.metrics import f1_score, roc_auc_score, roc_curve, auc
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from src.datasets.supervised_dataset import SupervisedDataset
from src.models.label_encoder import LabelEncoder

In [4]:
data_dir = '../data/'
features = ['accelerometerAccelerationX(G)', 
            'accelerometerAccelerationY(G)',
            'accelerometerAccelerationZ(G)', 
            'motionYaw(rad)', 
            'motionRoll(rad)',
            'motionPitch(rad)', 
            'motionRotationRateX(rad/s)',
            'motionRotationRateY(rad/s)', 
            'motionRotationRateZ(rad/s)',
            'motionUserAccelerationX(G)', 
            'motionUserAccelerationY(G)',
            'motionUserAccelerationZ(G)', 
            'motionQuaternionX(R)',
            'motionQuaternionY(R)', 
            'motionQuaternionZ(R)', 
            'motionQuaternionW(R)',
            'motionGravityX(G)', 
            'motionGravityY(G)', 
            'motionGravityZ(G)'
]

# Compute Features

In [5]:
def compute_features(dataset):
    X = []
    y = []
    
    for sequence, label in dataset:
        # Compute the statistics for each column
        _min = np.min(sequence.numpy(), axis=0)
        _max = np.max(sequence.numpy(), axis=0)
        _std = np.std(sequence.numpy(), axis=0)
        _mean = np.mean(sequence.numpy(), axis=0)
        _skew = skew(sequence.numpy(), axis=0)
        _kurtosis = kurtosis(sequence.numpy(), axis=0)
        feats = np.concatenate((_min, _max, _std, _mean, _skew, _kurtosis)).reshape(1, -1)
        X.append(feats)
        y.append(label.item())
    
    return np.concatenate(X), np.array(y)

# Evaluate

In [6]:
def evaluate(y_true, y_pred, probs):
    f1 = f1_score(y_true, y_pred, average = 'macro')
    roc_auc_ovr = roc_auc_score(y_true, probs, multi_class='ovr')
    roc_auc_ovo = roc_auc_score(y_true, probs, multi_class='ovo')
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(probs.shape[1]):
        fpr[i], tpr[i], _ = roc_curve(label_binarize(y_test, classes=[0, 1, 2, 3, 4])[:, i], probs[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    return f1, roc_auc_ovr, roc_auc_ovo, roc_auc

# KNN

In [7]:
k = 5
knn = KNeighborsClassifier(n_neighbors=k)

# Random Forest

In [8]:
n_estimators = 141
max_depth = 16
seed = 42

random_forest = RandomForestClassifier(n_estimators=n_estimators, 
                                    max_depth=max_depth, 
                                    random_state=seed)

# SVM

In [9]:
svc = make_pipeline(StandardScaler(), SVC(gamma='auto'))

# LOOCV

In [10]:
import random

def user_splits(seed=42):
    random.seed(seed)
    
    # List of users excluding user50 since it will always be in the training set
    users = ["user51", "user52", "user53", "user54", "user55", "user56", "user57", "user58", "user59", "user60"]
    splits = []
    
    # Loop over each user to be used as the test user
    for test_user in users:
        remaining_users = [user for user in users if user != test_user]
        # Shuffle the remaining users for random splits
        random.shuffle(remaining_users)
        # First 6 users (including user50) for training, next 4 for validation
        train_users = ['user50'] + remaining_users[:5]
        val_users = remaining_users[5:9]
        splits.append((train_users, val_users, [test_user]))
    return splits

#os.mkdir("loocv")
for i, (train_users, val_users, test_users) in tqdm(enumerate(user_splits(seed=0))):
    print(f"Split {i+1}: Train: ", train_users, "Val: ", val_users, "Test: ", test_users)
    save_split = False
    if save_split:
        os.mkdir(f"loocv/split_{i+1}")
        with open(f"loocv/split_{i+1}/users_train.txt", "w+") as f:
            for user in train_users:
                f.write(f"{user}\n")

        with open(f"loocv/split_{i+1}/users_val.txt", "w+") as f:
            for user in val_users:
                f.write(f"{user}\n")

        with open(f"loocv/split_{i+1}/users_test.txt", "w+") as f:
            for user in test_users:
                f.write(f"{user}\n")

10it [00:00, 14256.64it/s]

Split 1: Train:  ['user50', 'user59', 'user57', 'user53', 'user55', 'user56'] Val:  ['user54', 'user52', 'user60', 'user58'] Test:  ['user51']
Split 2: Train:  ['user50', 'user54', 'user51', 'user55', 'user60', 'user58'] Val:  ['user53', 'user56', 'user57', 'user59'] Test:  ['user52']
Split 3: Train:  ['user50', 'user54', 'user51', 'user55', 'user60', 'user58'] Val:  ['user57', 'user59', 'user56', 'user52'] Test:  ['user53']
Split 4: Train:  ['user50', 'user59', 'user58', 'user60', 'user53', 'user51'] Val:  ['user56', 'user55', 'user57', 'user52'] Test:  ['user54']
Split 5: Train:  ['user50', 'user52', 'user58', 'user57', 'user51', 'user53'] Val:  ['user56', 'user60', 'user59', 'user54'] Test:  ['user55']
Split 6: Train:  ['user50', 'user53', 'user55', 'user60', 'user54', 'user51'] Val:  ['user59', 'user57', 'user58', 'user52'] Test:  ['user56']
Split 7: Train:  ['user50', 'user53', 'user54', 'user51', 'user60', 'user58'] Val:  ['user55', 'user59', 'user52', 'user56'] Test:  ['user57']




In [11]:
knn_results = {'f1': [], 'auc_ovr': [], 'auc_ovo': [], 'auc': []}
random_forest_results = {'f1': [], 'auc_ovr': [], 'auc_ovo': [], 'auc': []}
svm_results = {'f1': [], 'auc_ovr': [], 'auc_ovo': [], 'auc': []}

for train_users, _, test_users in tqdm(user_splits(seed=0)):
    # Load data:
    label_encoder = LabelEncoder()
    train_data = SupervisedDataset(data_dir, train_users, features, label_encoder, normalize=False)
    test_data = SupervisedDataset(data_dir, test_users, features, label_encoder, normalize=False)

    X_train, y_train = compute_features(train_data)
    X_test, y_test = compute_features(test_data)

    # KNN:
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    probs = knn.predict_proba(X_test)
    f1, roc_auc_ovr, roc_auc_ovo, roc_auc = evaluate(y_test, y_pred, probs)
    knn_results['f1'].append(f1)
    knn_results['auc_ovr'].append(roc_auc_ovr)
    knn_results['auc_ovo'].append(roc_auc_ovo)
    knn_results['auc'].append(roc_auc)
    

    # Random Forest:
    random_forest.fit(X_train, y_train)
    y_pred = random_forest.predict(X_test)
    probs = random_forest.predict_proba(X_test)
    f1, roc_auc_ovr, roc_auc_ovo, roc_auc = evaluate(y_test, y_pred, probs)
    random_forest_results['f1'].append(f1)
    random_forest_results['auc_ovr'].append(roc_auc_ovr)
    random_forest_results['auc_ovo'].append(roc_auc_ovo)
    random_forest_results['auc'].append(roc_auc)

    # SVM:
    svc.fit(X_train, y_train)
    y_pred = svc.predict(X_test)
    probs = svc.decision_function(X_test)
    probs = softmax(probs, axis=1)
    f1, roc_auc_ovr, roc_auc_ovo, roc_auc = evaluate(y_test, y_pred, probs)
    svm_results['f1'].append(f1)
    svm_results['auc_ovr'].append(roc_auc_ovr)
    svm_results['auc_ovo'].append(roc_auc_ovo)
    svm_results['auc'].append(roc_auc)

100%|██████████| 10/10 [01:45<00:00, 10.54s/it]


In [44]:
v0_results = {'f1': [], 'auc_ovr': [], 'auc_ovo': [], 'auc': []}
v1_results = {'f1': [], 'auc_ovr': [], 'auc_ovo': [], 'auc': []}
v2_results = {'f1': [], 'auc_ovr': [], 'auc_ovo': [], 'auc': []}
v3_results = {'f1': [], 'auc_ovr': [], 'auc_ovo': [], 'auc': []}

for split in range(1, 11):
    # Transformer v0:  
    with open(f'../out/loocv/split_{split}/v0_pretrained/val_loss/results_test.json') as f:
        results = json.load(f)
        results['auc'] = {int(k):v for k,v in results['auc'].items()}
        v0_results['f1'].append(results['f1'])
        v0_results['auc_ovr'].append(results['auc_ovr'])
        v0_results['auc_ovo'].append(results['auc_ovo'])
        v0_results['auc'].append(results['auc'])
        
    # Transformer v1:  
    with open(f'../out/loocv/split_{split}/v1_pretrained/val_loss/results_test.json') as f:
        results = json.load(f)
        results['auc'] = {int(k):v for k,v in results['auc'].items()}
        v1_results['f1'].append(results['f1'])
        v1_results['auc_ovr'].append(results['auc_ovr'])
        v1_results['auc_ovo'].append(results['auc_ovo'])
        v1_results['auc'].append(results['auc'])
        
    # Transformer v2:  
    with open(f'../out/loocv/split_{split}/v2_pretrained/val_loss/results_test.json') as f:
        results = json.load(f)
        results['auc'] = {int(k):v for k,v in results['auc'].items()}
        v2_results['f1'].append(results['f1'])
        v2_results['auc_ovr'].append(results['auc_ovr'])
        v2_results['auc_ovo'].append(results['auc_ovo'])
        v2_results['auc'].append(results['auc'])
    
    # Transformer v3:  
    with open(f'../out/loocv/split_{split}/v3_pretrained/val_loss/results_test.json') as f:
        results = json.load(f)
        results['auc'] = {int(k):v for k,v in results['auc'].items()}
        v3_results['f1'].append(results['f1'])
        v3_results['auc_ovr'].append(results['auc_ovr'])
        v3_results['auc_ovo'].append(results['auc_ovo'])
        v3_results['auc'].append(results['auc'])


# Results

In [45]:
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, m-h, m+h

def print_results(method_name, results):
    mean, low, high = mean_confidence_interval(results['f1'])
    print("{} F1: {:.2f} ({:.2f}-{:.2f})".format(method_name, mean, low, high))

    mean, low, high = mean_confidence_interval(results['auc_ovr'])
    print("{} AUC (OVR): {:.2f} ({:.2f}-{:.2f})".format(method_name, mean, low, high))

    mean, low, high = mean_confidence_interval(results['auc_ovo'])
    print("{} AUC (OV0): {:.2f} ({:.2f}-{:.2f})".format(method_name, mean, low, high))
    
    mean, low, high = mean_confidence_interval([res[1] for res in results['auc']])
    print("{} AUC (1): {:.2f} ({:.2f}-{:.2f})".format(method_name, mean, low, high))

### KNN

In [46]:
print_results('KNN', knn_results)

KNN F1: 0.42 (0.37-0.48)
KNN AUC (OVR): 0.72 (0.68-0.77)
KNN AUC (OV0): 0.72 (0.68-0.77)
KNN AUC (1): 0.66 (0.62-0.70)


### Random Forest

In [47]:
print_results('Random Forest', random_forest_results)

Random Forest F1: 0.52 (0.43-0.61)
Random Forest AUC (OVR): 0.85 (0.82-0.88)
Random Forest AUC (OV0): 0.85 (0.82-0.88)
Random Forest AUC (1): 0.81 (0.76-0.85)


### SVM

In [48]:
print_results('SVM', svm_results)

SVM F1: 0.50 (0.42-0.57)
SVM AUC (OVR): 0.83 (0.80-0.87)
SVM AUC (OV0): 0.83 (0.80-0.87)
SVM AUC (1): 0.77 (0.72-0.82)


### Transformer v0

In [35]:
print_results('Transformer', v0_results)

Transformer F1: 0.53 (0.46-0.60)
Transformer AUC (OVR): 0.83 (0.80-0.87)
Transformer AUC (OV0): 0.83 (0.80-0.87)
Transformer AUC (1): 0.81 (0.77-0.85)


In [25]:
print_results('Transformer', v0_results)

Transformer F1: 0.46 (0.36-0.56)
Transformer AUC (OVR): 0.81 (0.76-0.86)
Transformer AUC (OV0): 0.81 (0.76-0.86)
Transformer AUC (1): 0.77 (0.73-0.81)


### Transformer v1

In [36]:
print_results('Transformer', v1_results)

Transformer F1: 0.51 (0.43-0.59)
Transformer AUC (OVR): 0.82 (0.78-0.87)
Transformer AUC (OV0): 0.82 (0.78-0.87)
Transformer AUC (1): 0.77 (0.72-0.81)


In [26]:
print_results('Transformer', v1_results)

Transformer F1: 0.54 (0.47-0.62)
Transformer AUC (OVR): 0.84 (0.80-0.88)
Transformer AUC (OV0): 0.84 (0.80-0.88)
Transformer AUC (1): 0.80 (0.74-0.85)


### Transformer v2

In [37]:
print_results('Transformer', v2_results)

Transformer F1: 0.57 (0.51-0.63)
Transformer AUC (OVR): 0.84 (0.79-0.89)
Transformer AUC (OV0): 0.84 (0.79-0.89)
Transformer AUC (1): 0.81 (0.78-0.83)


In [27]:
print_results('Transformer', v2_results)

Transformer F1: 0.56 (0.49-0.63)
Transformer AUC (OVR): 0.85 (0.82-0.89)
Transformer AUC (OV0): 0.85 (0.82-0.89)
Transformer AUC (1): 0.84 (0.81-0.88)


### Transformer v3

In [38]:
print_results('Transformer', v3_results)

Transformer F1: 0.55 (0.49-0.61)
Transformer AUC (OVR): 0.84 (0.78-0.89)
Transformer AUC (OV0): 0.84 (0.78-0.89)
Transformer AUC (1): 0.80 (0.76-0.83)


In [28]:
print_results('Transformer', v3_results)

Transformer F1: 0.49 (0.40-0.59)
Transformer AUC (OVR): 0.82 (0.75-0.88)
Transformer AUC (OV0): 0.82 (0.75-0.88)
Transformer AUC (1): 0.82 (0.78-0.86)


In [51]:
results = v2_results
for x in results['f1']:
    print('{:.2f}'.format(x))

print("\n")
for x in [res[0] for res in results['auc']]:
    print('{:.2f}'.format(x))

0.43
0.65
0.50
0.63
0.52
0.40
0.58
0.59
0.69
0.60


0.56
0.79
0.83
0.84
0.68
0.57
0.85
0.85
0.85
0.87
