In [1]:
%matplotlib widget

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
from pathlib import Path

import lightgbm as lgb

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_auc_score, f1_score, roc_curve
from sklearn.decomposition import PCA

from PfyMU.gait.train_classifier.core import load_datasets
from PfyMU.features import *

plt.style.use('ggplot')

In [3]:
def pca_func(x, fs):
    pca = PCA(n_components=3)
    
    return pca.fit_transform(x)
    
    
steps = {
    'walking': 0.4,
    'walking-impaired': 0.2,
    'sitting': 900,
    'standing': 300,
    'stairs-ascending': 0.3,
    'stairs-descending': 0.3,
    'cycling-50W': 0.3,
    'cycling-100W': 0.3,
    'default': 1.0
}

In [4]:
# gait_sets_path = Path('/Users/adamol/Documents/Datasets/gait/processed')
gait_sets_path = Path('/home/lukasadamowicz/Documents/Datasets/processed')

datasets = [
    gait_sets_path / 'bluesky2',
    gait_sets_path / 'daliac',
    gait_sets_path / 'ltmm',
    gait_sets_path / 'usc-had'
]

X, Y, subjects, activities = load_datasets(
    datasets, 
    goal_fs=50.0, 
    acc_mag=False, 
    window_length=3.0, 
    window_step=steps,
    signal_function=pca_func
)

# make stair-climbing in the positive class
mask = (activities == 'stairs-ascending') | (activities == 'stairs-descending')
Y_inc_str = Y.copy()
Y_inc_str[mask] = 1

In [5]:
print('Total samples (3.0s windows): ', Y.size)
print('Total walking samples: ', Y.sum())
print('Total non-walking samples: ', Y.size - Y.sum(), '\n')
print(f'% walking samples: {Y.sum() / Y.size * 100:.2f}')

Total samples (3.0s windows):  43163
Total walking samples:  20518
Total non-walking samples:  22645 

% walking samples: 47.54


In [6]:
unq_act, act_ct = np.unique(activities, return_counts=True)
N = np.sum(act_ct)
si = np.argsort(act_ct)
for a, c in zip(unq_act[si], act_ct[si]):
    print(f'{a:25s}: {c:5d} / {N:5d}{c/N:8.2f}')

sit-to-stand             :    16 / 43163    0.00
standing-assisted        :   207 / 43163    0.00
jumping-rope             :   212 / 43163    0.00
jumping                  :   311 / 43163    0.01
vacuuming                :   376 / 43163    0.01
lying                    :   378 / 43163    0.01
elevator-descending      :   475 / 43163    0.01
elevator-ascending       :   491 / 43163    0.01
running                  :   541 / 43163    0.01
sweeping                 :   612 / 43163    0.01
running-treadmill        :   755 / 43163    0.02
washing-dishes           :   776 / 43163    0.02
walking-left             :   787 / 43163    0.02
walking-right            :   842 / 43163    0.02
sleeping                 :  1126 / 43163    0.03
stairs-descending        :  2477 / 43163    0.06
cycling-50W              :  2509 / 43163    0.06
cycling-100W             :  2515 / 43163    0.06
stairs-ascending         :  2763 / 43163    0.06
standing                 :  2953 / 43163    0.07
sitting             

In [8]:
sa_df = pd.DataFrame(data={'Subject': subjects, 'Activity': activities})
sa_df['col1'] = 1.0

# get the subjects for which LOSO actually makes sense: those with multiple activities (ie more than just walking)
gbc = sa_df.groupby(['Subject', 'Activity'], as_index=False).count()
loso_subjects = [i for i in gbc.Subject.unique() if gbc.loc[gbc.Subject == i].shape[0] > 3]

random.seed(5)  # fix the generation so that its the same every time
random.shuffle(loso_subjects)

training_masks = []
validation_masks = []
testing_masks = []

for i in range(0, len(loso_subjects), 4):
    tr_m = np.ones(sa_df.shape[0], dtype='bool')
    v_m = np.zeros(sa_df.shape[0], dtype='bool')
    te_m = np.zeros(sa_df.shape[0], dtype='bool')
    
    for j in range(4):
        tr_m &= (sa_df.Subject != loso_subjects[i+j]).values
    for j in range(2):
        v_m |= (sa_df.Subject == loso_subjects[i+j]).values
    for j in range(2):
        te_m |= (sa_df.Subject == loso_subjects[i+j+2]).values
    
    training_masks.append(tr_m)
    validation_masks.append(v_m)
    testing_masks.append(te_m)

In [9]:
FB = Bank(window_length=None, window_step=None)

# add features
FB + Mean()
FB + MeanCrossRate()
# FB + StdDev()  # highly correlated with RMS
FB + Skewness()
FB + Kurtosis()
FB + Range()
FB + IQR()
FB + RMS()
FB + LinearSlope()
FB + SignalEntropy()
FB + SPARC()
FB + ComplexityInvariantDistance(normalize=True)
FB + JerkMetric(normalize=True)
FB + DimensionlessJerk(log=True, signal_type='acceleration')

FB + Autocorrelation(lag=15, normalize=True)
FB + SampleEntropy(m=2, r=0.5)
FB + PermutationEntropy(order=3, delay=1, normalize=True)
FB + RangeCountPercentage(range_min=0.4, range_max=1.5)
FB + DominantFrequency(low_cutoff=1.0, high_cutoff=3.5)
FB + DominantFrequencyValue(low_cutoff=0.25, high_cutoff=5.0)
FB + PowerSpectralSum(low_cutoff=1.0, high_cutoff=3.5)
FB + SpectralFlatness(low_cutoff=0.0, high_cutoff=6.0)
FB + SpectralEntropy(low_cutoff=0.0, high_cutoff=5.0)
FB + DetailPowerRatio(wavelet='coif4', freq_band=[1.0, 3.0])

In [10]:
X_feat, feature_names = FB.compute(X, fs=50.0, windowed=True, columns=['PC1', 'PC2', 'PC3'])



In [11]:
feats = pd.DataFrame(
    data=X_feat,
    columns=feature_names,
    dtype='float'
)
labels = Y
labels_istrs = Y_inc_str

In [22]:
tmp = np.random.rand(50, 1)
tmp.shape[2]

IndexError: tuple index out of range

# Stairs as negative class

In [19]:
acc, bacc, f1 = [], [], []
fpr, tpr, trsh = [], [], []

act_scores = pd.DataFrame(index=np.unique(activities))

i = 0
for trm, vm in zip(training_masks, validation_masks):
    clf = lgb.LGBMClassifier(learning_rate=0.2, random_state=42)

    clf.fit(feats.loc[trm].values, labels[trm]);
    y_pred = clf.predict(feats.loc[vm].values)
    y_score = clf.predict_proba(feats.loc[vm].values)

    acc.append(accuracy_score(labels[vm], y_pred, normalize=True))
    bacc.append(balanced_accuracy_score(labels[vm], y_pred))
    f1.append(f1_score(labels[vm], y_pred))
    
    f_, t_, tr_ = roc_curve(labels[vm], y_score[:, 1])
    fpr.append(f_)
    tpr.append(t_)
    trsh.append(tr_)
    
    for act in np.unique(activities):
        ytr_ = labels[vm & (activities == act)].astype(bool)
        ypr_ = y_pred[activities[vm] == act].astype(bool)
        
        if ytr_.size != 0:
            if all(ytr_):
                act_scores.loc[act, f'Fold {i}'] = f1_score(ytr_, ypr_, pos_label=1)
            else:
                act_scores.loc[act, f'Fold {i}'] = f1_score(~ytr_, ~ypr_, pos_label=1)
        else:
            act_scores.loc[act, f'Fold {i}'] = np.nan

    print(f'Accuracy: {acc[-1]*100:.2f}   Balanced Accuracy: {bacc[-1]*100:.2f}   F1: {f1[-1]*100:.2f}')
    i += 1

print('\n', '-'*50)
print(f'Mean Accuracy: {np.mean(acc)*100:.2f}  Mean Bal. Acc.: {np.mean(bacc)*100:.2f}  Mean F1: {np.mean(f1)*100:.2f}')

Accuracy: 97.03   Balanced Accuracy: 96.95   F1: 96.45
Accuracy: 97.26   Balanced Accuracy: 97.24   F1: 95.02
Accuracy: 96.55   Balanced Accuracy: 95.58   F1: 94.21
Accuracy: 97.41   Balanced Accuracy: 97.72   F1: 95.16
Accuracy: 91.67   Balanced Accuracy: 92.95   F1: 93.76
Accuracy: 98.95   Balanced Accuracy: 98.90   F1: 99.21
Accuracy: 94.85   Balanced Accuracy: 94.08   F1: 93.42
Accuracy: 94.90   Balanced Accuracy: 94.81   F1: 95.18
Accuracy: 95.12   Balanced Accuracy: 95.14   F1: 93.40
Accuracy: 96.93   Balanced Accuracy: 97.38   F1: 95.24
Accuracy: 89.45   Balanced Accuracy: 90.01   F1: 83.99

 --------------------------------------------------
Mean Accuracy: 95.46  Mean Bal. Acc.: 95.53  Mean F1: 94.10


In [17]:
f, (ax, ax1) = plt.subplots(ncols=2, figsize=(10, 5))

i = 1
for fp, tp, tr in zip(fpr, tpr, trsh):
    ax.plot(fp, tp, label=f'Fold {i}')
    ax1.plot(tr, np.sqrt(fp**2 + (1 - tp)**2))
    i += 1

ax1.set_xlim(-0.05, 1.05)
ax.legend()
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
f.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [18]:
act_scores

Unnamed: 0,Fold 0,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5,Fold 6,Fold 7,Fold 8,Fold 9,Fold 10
cycling-100W,1.0,0.909804,0.982659,1.0,,,,,0.95082,,0.976744
cycling-50W,1.0,0.986616,0.986767,1.0,,,,,1.0,,0.972973
elevator-ascending,,,,,,,0.989247,1.0,1.0,1.0,
elevator-descending,,,,,,,1.0,0.982659,1.0,1.0,
jumping,,,,,,,1.0,1.0,1.0,1.0,
jumping-rope,1.0,1.0,1.0,1.0,,,,,1.0,,1.0
lying,1.0,1.0,0.987013,1.0,,,,,1.0,,1.0
running,,,,,,,1.0,1.0,1.0,1.0,
running-treadmill,1.0,1.0,1.0,1.0,,,,,1.0,,1.0
sit-to-stand,,,,,,,1.0,,,,
