For quicker iteration I define BSVClassifier also in this jupyter

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import os
import numpy as np

plt.rcParams['figure.figsize'] = (15.0, 5.0)

In [2]:
import sys

sys.path.append('..')

In [3]:
from flod.features_extraction import load_features
from flod.dataset import download_dataset
from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from flod.classifiers.bsvclassifier import BSVClassifier
from scipy.stats import uniform as sp_randFloat
from scipy.stats import randint as sp_randInt
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, make_scorer

In [4]:
CACHE_FOLDER = '../cache'
load_features.LABELS_PATH = '../labels.json'

In [5]:
dataset_path = download_dataset(CACHE_FOLDER)
dataset = load_features.load_features(CACHE_FOLDER, dataset_path)

39it [00:43,  1.12s/it]


In [6]:
X = np.array(dataset[['c1', 'c2', 'c3', 'c4']])  # Features
y = np.array(dataset['is_fall'])  # Labels

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=100, test_size=200, stratify=y, shuffle=True)


In [None]:
parameters = {
        'c': sp_randFloat(0, 1),
        'q': sp_randInt(0, 200),
        'penalization' : sp_randInt(0, 400),
        'n_iter': [10]
    }

cv = StratifiedKFold(n_splits=2, shuffle=True)
model = BSVClassifier()
# Probabilmente CV fa fatta stratified in qualche modo
randm_src = RandomizedSearchCV(estimator=model, param_distributions = parameters,
                           cv = cv, n_iter = 10, n_jobs=2, scoring=make_scorer(precision_score))
randm_src.fit(X_train, y_train)

print(" Results from Random Search " )
print("\n The best estimator across ALL searched params:\n", randm_src.best_estimator_)
print("\n The best score across ALL searched params:\n", randm_src.best_score_)
print("\n The best parameters across ALL searched params:\n", randm_src.best_params_)

In [None]:
print(f'Outliers {sum(y_train)}')
clf = BSVClassifier(**randm_src.best_params_)
clf.fit(X_train, y_train)

In [None]:
print(f'There are {len(clf.betas_)} betas')
print(f'The sum is {sum(clf.betas_)}')
print(f'There are {len([x for x in clf.betas_ if x < 0])} negative betas')

In [None]:
out_x = [i for i, y in enumerate(y_train) if y == 1]
out_y = [clf.c] * len(out_x)

plt.plot(clf.betas_, '.',label='betas')

plt.plot([clf.c for _ in range(len(clf.betas_))], label='c', color='brown')
plt.scatter(out_x, out_y, label='outlier', color='red')
plt.legend()
plt.show()

In [None]:
plt.hist(clf.betas_)
plt.show()

Il paper su bsv suggerisce che si parta da q come calcolato qui sotto e poi si cresca:

In [None]:
max_distance = 0
for i, x in enumerate(X_train):
    for j in range(len(X_train) - i):
        y = X_train[i+j]
        
        difference = x - y
        difference2 = np.linalg.norm(difference) ** 2
        
        if difference2 > max_distance:
            max_distance = difference2

print(max_distance)
print(f'Q >= {1/max_distance}')

In [None]:
counter = 0
for b in clf.betas_:
    if np.isclose(b, 0):
        counter += 1
        
print(f'Close to 0 are {counter}/{len(clf.betas_)}')

In [None]:
counter = 0
for b in clf.betas_:
    if np.isclose(b, clf.c):
        counter += 1
        
print(f'Close to c are {counter}/{len(clf.betas_)}')

In [None]:
sorted_values = sorted(zip(clf.betas_, X_train), key=lambda x: x[0])

In [None]:
bs, xs = sorted_values[int(len(sorted_values)/2)]

In [None]:
plt.plot(clf.radiuses_, label='radiuses')
plt.plot([clf.radiuses_[clf.sv_i] for _ in range(len(clf.radiuses_))], label='Best score')
out_y = [clf.radiuses_[clf.sv_i]] * len(out_x)
plt.scatter(out_x, out_y, label='outlier', color='red')
plt.legend()
plt.show()

In [None]:
y_pred = clf.predict(X_test)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

print(f'TP: outliers detected {tp}')
print(f'FP: false alarm {fp}')
print(f'FN: outlier not detected {fn}')
print(f'TN: daily activity not detected {tn}')

In [None]:
print(f'Precision {precision_score(y_test, y_pred)} Recall: {recall_score(y_test, y_pred)} F1: {f1_score(y_test, y_pred)}')