For quicker iteration I define BSVClassifier also in this jupyter

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import os
import numpy as np

plt.rcParams['figure.figsize'] = (15.0, 5.0)

In [None]:
import sys

sys.path.append('..')

In [None]:
from flod.features_extraction import load_features
from flod.dataset import download_dataset
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from flod.classifiers.bsvclassifier import BSVClassifier
from scipy.stats import uniform as sp_randFloat
from scipy.stats import randint as sp_randInt
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, make_scorer
from sklearn.preprocessing import StandardScaler, MaxAbsScaler, RobustScaler, QuantileTransformer

In [None]:
CACHE_FOLDER = '../cache'
load_features.LABELS_PATH = '../labels.json'

In [None]:
dataset_path = download_dataset(CACHE_FOLDER)
dataset = load_features.load_features(CACHE_FOLDER, dataset_path, 100, False, 0.8)

In [None]:
dataset

In [None]:
X = dataset[['c1','c2', 'c3', 'c4']]
X_full = MaxAbsScaler().fit_transform(X)
y = dataset['is_fall']

In [None]:
color = ['red' if i == 1 else 'green' for i in y]

In [None]:
X = PCA(n_components=2).fit_transform(X_full)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=color, alpha=.2)
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D

X = PCA(n_components=3).fit_transform(X_full)

fig = plt.figure()
ax = Axes3D(fig)

ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, alpha=.1)
plt.show()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=True, stratify=y)

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold


pipe = Pipeline([
        ('scaler', MaxAbsScaler()),
        ('reduce_dim', PCA()),
        ('classifier', BSVClassifier())
])

In [None]:
from sklearn.model_selection import GridSearchCV

params = {
    'scaler': [StandardScaler(), MaxAbsScaler(), RobustScaler(), QuantileTransformer()],
    'reduce_dim__n_components': range(X.shape[1])[1:],
    'classifier__n_iter': [10],
    'classifier__penalization': np.random.uniform(1, 100, 3),
    'classifier__q': np.random.uniform(0.1, 100, 10)
}

scoring = ['precision', 'recall', 'f1']
cv = StratifiedKFold(n_splits=3, shuffle=True)
search = GridSearchCV(pipe, params, n_jobs=-1, cv=cv, refit='f1', verbose=5, return_train_score=True, scoring=scoring)

In [None]:
search.fit(X_train, y_train)

In [None]:
print(f'Best params: {search.best_params_}')
print(f'Best score: {search.best_score_}')

In [None]:
search.score(X_test, y_test)

In [None]:
import pandas as pd

cv_res = pd.DataFrame(search.cv_results_)
cv_res

In [None]:
clf = search.best_estimator_['classifier']

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, search.predict(X_test)))