# Run Logistic Regression algorithm on extracted IMU feature maps

## Do relevant imports

In [10]:
%load_ext autoreload
%autoreload 2
import os
import numpy as np
from pathlib import Path
from utils.extract_features import get_dataset
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, make_scorer
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from joblib import dump, load
from tqdm.auto import tqdm

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
PATIENTS = list(range(1,15))
REL_LABELS = [-5,-4,-3,-2,-1,0,1,2,3,4]
SENSORS = ['wrists', 
           'ankles',
           'no_chest',
           'all']
NUM_WORKER = 4

## Load feature maps

In [3]:
dataset, labels = {}, {}
for sensors in SENSORS:
    dataset[sensors], labels[sensors] = get_dataset(
        PATIENTS, 
        sensors=sensors,
        w_size=128, 
        w_overlap=64, 
        data_root='/datasets/GaitDetection'
        )
    for i, p_labels in enumerate(labels[sensors]):
        rel_indices = np.isin(labels[sensors][i], REL_LABELS)
        dataset[sensors][i] = dataset[sensors][i][rel_indices]
        labels[sensors][i] = labels[sensors][i][rel_indices]
    for patient_labels in labels[sensors]:
            patient_labels[(patient_labels<=2) & (patient_labels>=0)]= 0.
            patient_labels[patient_labels>2]= 1.

## Generate training data and leave one out splits

In [4]:
data,target = {}, {}
for sensors in dataset:
    data[sensors] = np.concatenate(dataset[sensors])
    target[sensors] = np.concatenate(labels[sensors])

In [5]:
class CustomSplitter(object):
    def __init__(self, splitter, labels):
        self.splitter = splitter
        self.labels = np.concatenate(labels)
    
    def split(self, X=None, y=None, groups=None):
        
        for train_index, test_index in self.splitter.split():
            train_index = train_index[self.labels[train_index]>=0]
            yield train_index, test_index
            
    def get_n_splits(self, X=None, y=None, groups=None): 
        return self.splitter.get_n_splits()

In [6]:
def get_cv_splits(labels):
    # Generate CV splits
    test_folds = np.array([])
    for i,patient_labels in enumerate(labels):
        test_folds = np.concatenate((test_folds, i*np.ones(patient_labels.shape[0])))
    cv_splits = PredefinedSplit(test_folds)
    cv_splits = CustomSplitter(cv_splits, labels).split()
    return cv_splits

## Configure CLFs

In [7]:
def make_trans_scorer(score_func):
    def scorer(y_true, y_pred):
        y_true[y_true == -4]=1
        # Set stand_to_walk to always be correct
        y_true[(y_true==-3) & (y_pred==1)] = 1
        y_true[y_true==-3] = 0
        y_true[y_true<0] = 0
        return score_func(y_true, y_pred)
    return make_scorer(scorer)

In [15]:
def configure_svm_clf(cv_splits):
    # Configure classifier
    parameters = {'C':[0.1,1]}
    model = SVC(kernel='rbf',class_weight='balanced')
    clf = GridSearchCV(
        model, 
        parameters, 
        n_jobs = NUM_WORKER,
        cv = cv_splits, 
        scoring=make_trans_scorer(balanced_accuracy_score), 
        refit=True)
    return clf

## Define experiment

In [None]:
clf = {}
base_path = os.path.join('models', 'gait')
Path(base_path).mkdir(exist_ok=True, parents=True)
for sensors in tqdm(dataset):
    path = os.path.join(base_path,f'{sensors}.joblib')
    cv_splits = get_cv_splits(labels[sensors])
    clf[sensors] = configure_svm_clf(cv_splits)
    clf[sensors].fit(data[sensors], target[sensors])
    clf[sensors] = SVC(kernel='rbf',class_weight='balanced',**clf[sensors].best_params_)
    clf[sensors].fit(data[sensors][target[sensors]>=0], target[sensors][target[sensors]>=0])
    dump(clf[sensors], path)

  0%|          | 0/4 [00:00<?, ?it/s]