In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import os
import numpy as np

plt.rcParams['figure.figsize'] = (10.0, 4.0)

In [None]:
import sys

sys.path.append('..')

In [None]:
from flod.features_extraction import load_features
from flod.dataset import download_dataset
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from flod.classifiers.bsvclassifier import BSVClassifier
from scipy.stats import uniform as sp_randFloat
from scipy.stats import randint as sp_randInt
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, make_scorer
from sklearn.preprocessing import StandardScaler, MaxAbsScaler, RobustScaler, QuantileTransformer

In [None]:
CACHE_FOLDER = '../cache'
load_features.LABELS_PATH = '../labels.json'
person_id = 'SA23'

Let's load the features with 90% overlap

In [None]:
dataset_path = download_dataset(CACHE_FOLDER)
dataset = load_features.load_features(CACHE_FOLDER, dataset_path, 100, False, 0.9, person_id)

In [None]:
dataset

Let's see if the features reduced to two main components have a nice representation

In [None]:
X = dataset[['c1','c2', 'c3', 'c4']]
y = dataset['is_fall']

color = ['red' if i == 1 else 'green' for i in y]

In [None]:
X_full = MaxAbsScaler().fit_transform(X)
X = PCA(n_components=2).fit_transform(X_full)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=color, alpha=.2)
plt.show()

Maybe with 3 main components they look better.

In [None]:
from mpl_toolkits.mplot3d import Axes3D

X = PCA(n_components=3).fit_transform(X_full)

fig = plt.figure()
ax = Axes3D(fig)

ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, alpha=.1)
plt.show()

Let's try with less overlap over the windows to reduce the noise.

In [None]:
dataset_path = download_dataset(CACHE_FOLDER)
dataset = load_features.load_features(CACHE_FOLDER, dataset_path, 100, False, 0.3, person_id)

In [None]:
X = dataset[['c1','c2', 'c3', 'c4']]
y = dataset['is_fall']

color = ['red' if i == 1 else 'green' for i in y]

In [None]:
X_full = MaxAbsScaler().fit_transform(X)
X = PCA(n_components=2).fit_transform(X_full)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=color, alpha=.2)
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

X = PCA(n_components=3).fit_transform(X_full)

fig = plt.figure()
ax = Axes3D(fig)

ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, alpha=.1)
plt.show()

Let's try with filtered data

In [None]:
dataset_path = download_dataset(CACHE_FOLDER)
dataset = load_features.load_features(CACHE_FOLDER, dataset_path, 100, True, 0.9, person_id)

In [None]:
X = dataset[['f_c1','f_c2', 'f_c3', 'f_c4']]
y = dataset['is_fall']

color = ['red' if i == 1 else 'green' for i in y]

In [None]:
X_full = MaxAbsScaler().fit_transform(X)
X = PCA(n_components=2).fit_transform(X_full)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=color, alpha=.2)
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

X = PCA(n_components=3).fit_transform(X_full)

fig = plt.figure()
ax = Axes3D(fig)

ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, alpha=.1)
plt.show()

In [None]:
dataset_path = download_dataset(CACHE_FOLDER)
dataset = load_features.load_features(CACHE_FOLDER, dataset_path, 100, True, 0.3, person_id)

In [None]:
X = dataset[['f_c1','f_c2', 'f_c3', 'f_c4']]
y = dataset['is_fall']

color = ['red' if i == 1 else 'green' for i in y]

In [None]:
X_full = MaxAbsScaler().fit_transform(X)
X = PCA(n_components=2).fit_transform(X_full)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=color, alpha=.2)
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

X = PCA(n_components=3).fit_transform(X_full)

fig = plt.figure()
ax = Axes3D(fig)

ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, alpha=.1)
plt.show()

# Less questions, more training

In [None]:
dataset_path = download_dataset(CACHE_FOLDER)
dataset = load_features.load_features(CACHE_FOLDER, dataset_path, 100, True, 0.3, person_id)

X = dataset[['f_c1','f_c2', 'f_c3', 'f_c4']]
y = dataset['is_fall']

color = ['red' if i == 1 else 'green' for i in y]

In [None]:
from scipy.stats import randint, uniform
from sklearn.model_selection import cross_validate
from tqdm.notebook import tqdm
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold

pipe = Pipeline([
        ('scaler', MaxAbsScaler()),
        ('reduce_dim', PCA()),
        ('classifier', BSVClassifier())
])

params = {
    'scaler': [StandardScaler(), MaxAbsScaler(), RobustScaler(), QuantileTransformer()],
    'reduce_dim__n_components': randint(0, X.shape[1]),
    'classifier__n_iter': [10],
    'classifier__penalization': randint(0, 500),
    'classifier__q': randint(0, 500),
    'classifier__c': uniform()    
}

scoring = ['precision', 'recall', 'f1']
outer_cv = StratifiedKFold(n_splits=3, shuffle=True)
inner_cv = StratifiedKFold(n_splits=2, shuffle=True)


r_clf = RandomizedSearchCV(pipe, params, n_jobs=-1, cv=inner_cv, refit='f1', verbose=10, return_train_score=False, scoring=scoring, n_iter=10)
scores = cross_validate(r_clf, X=X, y=y, cv=outer_cv, n_jobs=-1, scoring=scoring, verbose=10, return_estimator=True, return_train_score=True)

In [None]:
import pandas as pd

cv_res = pd.DataFrame(scores)
cv_res['estimator_params'] = [est.best_params_ for est in cv_res['estimator']]
cv_res

In [None]:
print(f'Average precision: {np.average(cv_res["test_precision"]):.4f}')
print(f'Std precision: {np.std(cv_res["test_precision"]):.4f}')

print(f'\nAverage recall: {np.average(cv_res["test_recall"]):.4f}')
print(f'Std recall: {np.std(cv_res["test_recall"]):.4f}')

print(f'\nAverage f1: {np.average(cv_res["test_f1"]):.4f}')
print(f'Std f1: {np.std(cv_res["test_f1"]):.2f}')