In [1]:
import numpy as np
from sklearn.pipeline import Pipeline
from python.utils import load_data, get_features_from 
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.impute import SimpleImputer
from python.features_extraction import ExtractFeatures
from sklearn.model_selection import GroupKFold
import matplotlib.pyplot as plt

In [2]:
fc_parameters = {
    "abs_energy": None,
    "maximum": None,
    "absolute_sum_of_changes": None,
    "fft_coefficient": [{"coeff": 20, "attr": "real"}],
    "fourier_entropy": [{"bins": 10}],
    "linear_trend": [{"attr": "slope"}],
    "mean": None,
    "number_peaks": [{"n":4}],
    "variance": None,
}

In [5]:
DATA_PATH = 'data'
X_train, y_train, X_test, subjects = load_data(DATA_PATH)

X_train size: (3500, 15872).
y_train size: (3500,).
X_test size: (3500, 15872).


In [148]:
X_train_fe = get_features_from(folder="LS/not_scaled")
X_test_fe = get_features_from(folder="TS/not_scaled")

In [None]:
pre_process = Pipeline([
    ("si", SimpleImputer(missing_values=-999999.99, strategy="mean")),
    ("pp", ExtractFeatures(fc_parameters)),
    ("si2", SimpleImputer(missing_values=np.nan, strategy="mean")),
    ("ss", StandardScaler())
])

In [220]:

knn_clf = Pipeline([
    ("pp", pre_process),
    ("knn_cls", KNeighborsClassifier(n_neighbors=10,weights='distance', p=1))
])

In [221]:
cv_res = cross_validate(knn_clf, X_train_fe, y_train, groups=subjects, cv=GroupKFold(len(np.unique(subjects))), return_train_score=True,scoring="accuracy")
cv_res["test_score"].mean()

In [None]:
param_grid = [
    {"knn_cls__n_neighbors": [1,2,3,5,10,20],"knn_cls__weights": ["uniform","distance"], "knn_cls__leaf_size": [30],"knn_cls__p":[1,2]}
]

gs = GridSearchCV(knn_clf, param_grid, cv=GroupKFold(len(np.unique(subjects))), n_jobs=-1)
gs.fit(X_train, y_train, groups=subjects)

cvres = gs.cv_results_
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(mean_score, params)

In [185]:
gs.best_score_

0.7472162139885468

In [187]:
gs.best_params_

{'knn_cls__leaf_size': 30,
 'knn_cls__n_neighbors': 10,
 'knn_cls__p': 1,
 'knn_cls__weights': 'distance'}