In [2]:
# !pip install --upgrade pip

TEST_MODE = 0 # Testing macro

import os
import numpy as np # linear algebraf
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm import tqdm
import warnings

pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import kurtosis
from scipy.stats import skew
from scipy.stats import iqr
from scipy.stats import median_abs_deviation
from scipy.stats import mode
from scipy.signal import find_peaks
from scipy.stats import entropy

from pandas.testing import assert_frame_equal

%run ./Preprocessing_utility-functions.ipynb
%run ./SEED-CONSTANTS.ipynb


print(f"\x1b[32mSEED: {SEED}\x1b[0m")
# Global utitlity functions are in separate notebook
if TEST_MODE: print("setup complete")

[32mSEED: 567[0m
--------------------[32mSEED and CONSTANTS imported[0m--------------------
[32mSEED: 567[0m
Python 3.9.10
EER: 0.333, Threshold: 0.600 <-- Arbitrary case
EER: 0.000, Threshold: 0.900 <-- Best case
EER: 1.000, Threshold: 0.900 <-- Worse case
EER: 0.400, Threshold: 0.200 <-- Worse case
EER: 0.167, Threshold: 0.600 <-- Arbitrary case
EER: 0.000, Threshold: 0.900 <-- Best case
EER: 1.000, Threshold: 0.900 <-- Worse case
EER: 0.333, Threshold: 1.000 <-- Worse case
--------------------[32mUtility functions imported[0m--------------------
[32mSEED: 567[0m
--------------------[32mSEED and CONSTANTS imported[0m--------------------
[32mSEED: 567[0m
Python 3.9.10
--------------------[32mPreprocessing utility functions imported[0m--------------------
[32mSEED: 567[0m
--------------------[32mSEED and CONSTANTS imported[0m--------------------
[32mSEED: 567[0m


In [3]:
!python --version

Python 3.9.10


In [4]:
def MakeWACAXExpDic(X_exp1_train_dic, X_exp2_train_dic, fitted_scaler_classifier_exp2_train_dic, X_exp1_test_dic, X_exp2_test_dic, fitted_scaler_classifier_exp2_test_dic):
    X_exp_train_dic = MakeWACAXExpDicOwner(X_exp2_train_dic, scaler_clip=False, scaler_type="MinMaxScaler")
    X_exp_train_dic = MakeWACAXExpDicUnknown(X_exp1_train_dic, X_exp_train_dic, fitted_raw_scaler_dict=fitted_scaler_classifier_exp2_train_dic)
    X_exp_test_dic = MakeWACAXExpDicOwner(X_exp2_test_dic, scaler_clip=False, scaler_type="MinMaxScaler")
    X_exp_test_dic = MakeWACAXExpDicUnknown(X_exp1_test_dic, X_exp_test_dic, fitted_raw_scaler_dict=fitted_scaler_classifier_exp2_test_dic)
    
    return X_exp_train_dic, X_exp_test_dic

In [9]:
import timeit
  

def extract_WACA_features(X_exp1_dic, X_exp2_dic, fitted_scaler_classifier_exp2_dic, scaler_clip, verbose=1):
    start = timeit.default_timer()
    X_exp_dic = MakeWACAXExpDicOwner(X_exp2_dic, scaler_clip=scaler_clip, scaler_type="MinMaxScaler")
    stop = timeit.default_timer()
    print('MakeWACAXExpDicOwner Time: ', stop - start)

    start = timeit.default_timer()
    X_exp_dic = MakeWACAXExpDicUnknown(X_exp1_dic, X_exp_dic, fitted_raw_scaler_dict=fitted_scaler_classifier_exp2_dic)
    stop = timeit.default_timer()
    print('MakeWACAXExpDicUnknown Time: ', stop - start)
    
    if verbose: print("Done extracting features")
    return X_exp_dic

EXTRACT_WACA_features_DICT={winsize: extract_WACA_features for winsize in WINDOW_SIZE_LST}

In [10]:
EXTRACT_WACA_features_DICT

{125: <function __main__.extract_WACA_features(X_exp1_dic, X_exp2_dic, fitted_scaler_classifier_exp2_dic, verbose=1)>,
 250: <function __main__.extract_WACA_features(X_exp1_dic, X_exp2_dic, fitted_scaler_classifier_exp2_dic, verbose=1)>,
 500: <function __main__.extract_WACA_features(X_exp1_dic, X_exp2_dic, fitted_scaler_classifier_exp2_dic, verbose=1)>,
 750: <function __main__.extract_WACA_features(X_exp1_dic, X_exp2_dic, fitted_scaler_classifier_exp2_dic, verbose=1)>,
 1000: <function __main__.extract_WACA_features(X_exp1_dic, X_exp2_dic, fitted_scaler_classifier_exp2_dic, verbose=1)>,
 1250: <function __main__.extract_WACA_features(X_exp1_dic, X_exp2_dic, fitted_scaler_classifier_exp2_dic, verbose=1)>,
 1500: <function __main__.extract_WACA_features(X_exp1_dic, X_exp2_dic, fitted_scaler_classifier_exp2_dic, verbose=1)>,
 1750: <function __main__.extract_WACA_features(X_exp1_dic, X_exp2_dic, fitted_scaler_classifier_exp2_dic, verbose=1)>,
 2000: <function __main__.extract_WACA_featu

In [6]:
def spectral_energy(x):
    '''
    spectral_energy according to Parseval's theorem
    '''
    # should i keep using rfft or just fft
    return (1/len(x)) * np.sum(np.abs(np.fft.rfft(x))**2)

def window_entropy(x, n_bins=13):
    
    heights, bins = np.histogram(x, bins=n_bins)
    heights = heights/sum(heights)
    window_entropy=entropy(heights, base=2, axis=0)
    
    return window_entropy

def average_absolute_difference_peaks(peaks, window_size):
    
    if len(peaks) == 0:
        return window_size
    elif len(peaks) == 1:
        return window_size
    
    n = len(peaks) * (len(peaks)-1)
    
    return np.abs(peaks[:, None] - peaks[None, :]).ravel().sum()/n

In [7]:
def signal_to_encoding(signal_df, freq=100):
    dic = {}

#     print("mean calculation started")
    dic['mean_x_a'] = np.mean(signal_df['x_a'])
    dic['mean_y_a'] = np.mean(signal_df['y_a'])
    dic['mean_z_a'] = np.mean(signal_df['z_a'])
    dic['mean_x_g'] = np.mean(signal_df['x_g'])
    dic['mean_y_g'] = np.mean(signal_df['y_g'])
    dic['mean_z_g'] = np.mean(signal_df['z_g'])
#     print("mean calculation ended")
    
#     print("median calculation started")
    dic['median_x_a'] = np.median(signal_df['x_a'])
    dic['median_y_a'] = np.median(signal_df['y_a'])
    dic['median_z_a'] = np.median(signal_df['z_a'])
    dic['median_x_g'] = np.median(signal_df['x_g'])
    dic['median_y_g'] = np.median(signal_df['y_g'])
    dic['median_z_g'] = np.median(signal_df['z_g'])
#     print("median calculation ended")
    
#     print("var calculation started")
    dic['var_x_a'] = np.var(signal_df['x_a'])
    dic['var_y_a'] = np.var(signal_df['y_a'])
    dic['var_z_a'] = np.var(signal_df['z_a'])
    dic['var_x_g'] = np.var(signal_df['x_g'])
    dic['var_y_g'] = np.var(signal_df['y_g'])
    dic['var_z_g'] = np.var(signal_df['z_g'])
#     print("var calculation ended")

#     print("avg absolute difference of peaks calculation started")
    peaks_x_a, _ = find_peaks(signal_df['x_a'], height=None)
    peaks_y_a, _ = find_peaks(signal_df['y_a'], height=None)
    peaks_z_a, _ = find_peaks(signal_df['z_a'], height=None)
    peaks_x_g, _ = find_peaks(signal_df['x_g'], height=None)
    peaks_y_g, _ = find_peaks(signal_df['y_g'], height=None)
    peaks_z_g, _ = find_peaks(signal_df['z_g'], height=None)
    
    window_size=len(signal_df['x_a'])
    dic['aadp_x_a'] = average_absolute_difference_peaks(peaks=peaks_x_a, window_size=window_size)
    dic['aadp_y_a'] = average_absolute_difference_peaks(peaks=peaks_y_a, window_size=window_size)
    dic['aadp_z_a'] = average_absolute_difference_peaks(peaks=peaks_z_a, window_size=window_size)
    dic['aadp_x_g'] = average_absolute_difference_peaks(peaks=peaks_x_g, window_size=window_size)
    dic['aadp_y_g'] = average_absolute_difference_peaks(peaks=peaks_y_g, window_size=window_size)
    dic['aadp_z_g'] = average_absolute_difference_peaks(peaks=peaks_z_g, window_size=window_size)
    

    
#     print("range calculation started")
    dic['ptp_x_a'] = np.ptp(signal_df['x_a'])
    dic['ptp_y_a'] = np.ptp(signal_df['y_a'])
    dic['ptp_z_a'] = np.ptp(signal_df['z_a'])
    dic['ptp_x_g'] = np.ptp(signal_df['x_g'])
    dic['ptp_y_g'] = np.ptp(signal_df['y_g'])
    dic['ptp_z_g'] = np.ptp(signal_df['z_g'])
#     print("range calculation ended")
    
#     print("mode calculation started")
    dic['mode_x_a'] = mode(signal_df['x_a'])[0][0]
    dic['mode_y_a'] = mode(signal_df['y_a'])[0][0]
    dic['mode_z_a'] = mode(signal_df['z_a'])[0][0]
    dic['mode_x_g'] = mode(signal_df['x_g'])[0][0]
    dic['mode_y_g'] = mode(signal_df['y_g'])[0][0]
    dic['mode_z_g'] = mode(signal_df['z_g'])[0][0]
#     print("mode calculation ended")
    
#     print("cov calculation started")
# seem to require 2 axes according to waca pattent
    # dic['cov_x_a'] = np.cov(signal_df['x_a']) * 1
    # dic['cov_y_a'] = np.cov(signal_df['y_a']) * 1
    # dic['cov_z_a'] = np.cov(signal_df['z_a']) * 1
    # dic['cov_x_g'] = np.cov(signal_df['x_g']) * 1
    # dic['cov_y_g'] = np.cov(signal_df['y_g']) * 1
    # dic['cov_z_g'] = np.cov(signal_df['z_g']) * 1
    dic['cov_xy_a'] = np.cov(signal_df['x_a'], signal_df['y_a'])[0][1]
    dic['cov_yz_a'] = np.cov(signal_df['y_a'], signal_df['z_a'])[0][1]
    dic['cov_xz_a'] = np.cov(signal_df['x_a'], signal_df['z_a'])[0][1]
    dic['cov_xy_g'] = np.cov(signal_df['x_g'], signal_df['y_g'])[0][1]
    dic['cov_yz_g'] = np.cov(signal_df['y_g'], signal_df['z_g'])[0][1]
    dic['cov_xz_g'] = np.cov(signal_df['x_g'], signal_df['z_g'])[0][1]
    
#     print("cov calculation ended")
    
#     print("mean absolute deviation calculation started")
    dic['mad_x_a'] = median_abs_deviation(signal_df['x_a'])
    dic['mad_y_a'] = median_abs_deviation(signal_df['y_a'])
    dic['mad_z_a'] = median_abs_deviation(signal_df['z_a'])
    dic['mad_x_g'] = median_abs_deviation(signal_df['x_g'])
    dic['mad_y_g'] = median_abs_deviation(signal_df['y_g'])
    dic['mad_z_g'] = median_abs_deviation(signal_df['z_g'])
#     print("mean absolute deviation calculation ended")
    
#     print("inter-quartile range calculation started")
    dic['iqr_x_a'] = iqr(signal_df['x_a'])
    dic['iqr_y_a'] = iqr(signal_df['y_a'])
    dic['iqr_z_a'] = iqr(signal_df['z_a'])
    dic['iqr_x_g'] = iqr(signal_df['x_g'])
    dic['iqr_y_g'] = iqr(signal_df['y_g'])
    dic['iqr_z_g'] = iqr(signal_df['z_g'])
#     print("inter-quartile range calculation ended")
    
#     print("correlation calculation started")
    dic['correlate_xy_a'] = np.corrcoef(signal_df['x_a'], signal_df['y_a'])[0][1]
    dic['correlate_yz_a'] = np.corrcoef(signal_df['y_a'], signal_df['z_a'])[0][1]
    dic['correlate_xz_a'] = np.corrcoef(signal_df['x_a'], signal_df['z_a'])[0][1]
    dic['correlate_xy_g'] = np.corrcoef(signal_df['x_g'], signal_df['y_g'])[0][1]
    dic['correlate_yz_g'] = np.corrcoef(signal_df['y_g'], signal_df['z_g'])[0][1]
    dic['correlate_xz_g'] = np.corrcoef(signal_df['x_g'], signal_df['z_g'])[0][1]
#     print("correlation calculation ended")
    
#     print("skew calculation started")
    dic['skew_x_a'] = skew(signal_df['x_a'])
    dic['skew_y_a'] = skew(signal_df['y_a'])
    dic['skew_z_a'] = skew(signal_df['z_a'])
    dic['skew_x_g'] = skew(signal_df['x_g'])
    dic['skew_y_g'] = skew(signal_df['y_g'])
    dic['skew_z_g'] = skew(signal_df['z_g'])
#     print("skew calculation ended")
    
#     print("kurtosis calculation started")
    dic['kurtosis_x_a'] = kurtosis(signal_df['x_a'])
    dic['kurtosis_y_a'] = kurtosis(signal_df['y_a'])
    dic['kurtosis_z_a'] = kurtosis(signal_df['z_a'])
    dic['kurtosis_x_g'] = kurtosis(signal_df['x_g'])
    dic['kurtosis_y_g'] = kurtosis(signal_df['y_g'])
    dic['kurtosis_z_g'] = kurtosis(signal_df['z_g'])
#     print("kurtosis calculation ended")
    
    
#     print("spectral energy calculation started")
    dic['spectral_energy_x_a'] = spectral_energy(signal_df['x_a'])
    dic['spectral_energy_y_a'] = spectral_energy(signal_df['y_a'])
    dic['spectral_energy_z_a'] = spectral_energy(signal_df['z_a'])
    dic['spectral_energy_x_g'] = spectral_energy(signal_df['x_g'])
    dic['spectral_energy_y_g'] = spectral_energy(signal_df['y_g'])
    dic['spectral_energy_z_g'] = spectral_energy(signal_df['z_g'])
#     print("spectral energy calculation ended")

    # print("entropy calculation started")

    dic['entropy_x_a'] = window_entropy(signal_df['x_a'])
    dic['entropy_y_a'] = window_entropy(signal_df['y_a'])
    dic['entropy_z_a'] = window_entropy(signal_df['z_a'])
    dic['entropy_x_g'] = window_entropy(signal_df['x_g'])
    dic['entropy_y_g'] = window_entropy(signal_df['y_g'])
    dic['entropy_z_g'] = window_entropy(signal_df['z_g'])
    
    # print("entropy calculation ended")


    
    vector = [dic['mean_x_a'], 
              dic['mean_y_a'],
              dic['mean_z_a'],
              dic['mean_x_g'],
              dic['mean_y_g'],
              dic['mean_z_g'],
              
              dic['median_x_a'],
              dic['median_y_a'],
              dic['median_z_a'],
              dic['median_x_g'],
              dic['median_y_g'],
              dic['median_z_g'],
              
              dic['var_x_a'],
              dic['var_y_a'],
              dic['var_z_a'],
              dic['var_x_g'],
              dic['var_y_g'],
              dic['var_z_g'],
              
              dic['aadp_x_a'],
              dic['aadp_y_a'],
              dic['aadp_z_a'],
              dic['aadp_x_g'],
              dic['aadp_y_g'],
              dic['aadp_z_g'],
              
              dic['ptp_x_a'],
              dic['ptp_y_a'],
              dic['ptp_z_a'],
              dic['ptp_x_g'],
              dic['ptp_y_g'],
              dic['ptp_z_g'],
              
              dic['mode_x_a'],
              dic['mode_y_a'],
              dic['mode_z_a'],
              dic['mode_x_g'],
              dic['mode_y_g'],
              dic['mode_z_g'],
              
              dic['cov_xy_a'],
              dic['cov_yz_a'],
              dic['cov_xz_a'],
              dic['cov_xy_g'],
              dic['cov_yz_g'],
              dic['cov_xz_g'],
              
              dic['mad_x_a'],
              dic['mad_y_a'],
              dic['mad_z_a'],
              dic['mad_x_g'],
              dic['mad_y_g'],
              dic['mad_z_g'],
              
              dic['iqr_x_a'],
              dic['iqr_y_a'],
              dic['iqr_z_a'],
              dic['iqr_x_g'],
              dic['iqr_y_g'],
              dic['iqr_z_g'],
              
              dic['correlate_xy_a'],
              dic['correlate_yz_a'],
              dic['correlate_xz_a'],
              dic['correlate_xy_g'],
              dic['correlate_yz_g'],
              dic['correlate_xz_g'],
              
              dic['skew_x_a'],
              dic['skew_y_a'],
              dic['skew_z_a'],
              dic['skew_x_g'],
              dic['skew_y_g'],
              dic['skew_z_g'],
              
              dic['kurtosis_x_a'],
              dic['kurtosis_y_a'],
              dic['kurtosis_z_a'],
              dic['kurtosis_x_g'],
              dic['kurtosis_y_g'],
              dic['kurtosis_z_g'],
              
              dic['spectral_energy_x_a'],
              dic['spectral_energy_y_a'],
              dic['spectral_energy_z_a'],
              dic['spectral_energy_x_g'],
              dic['spectral_energy_y_g'],
              dic['spectral_energy_z_g'],
              
              dic['entropy_x_a'],
              dic['entropy_y_a'],
              dic['entropy_z_a'],
              dic['entropy_x_g'],
              dic['entropy_y_g'],
              dic['entropy_z_g']
             ]
    
    
    return dic, np.array(vector)

# Split Dataset for Valid/Test  
In two splits: one used during hyperparameter optimization, and one used during testing.

The split is done along the subjects: All sessions of a single subject will either be in the validation split or in the testing split, never in both.

They did a 30 60 split.

# Reshaping Raw Features.
We have our own function of windows for this. Do this for both training and testing.

# Extracting time and frequency based features.
Again, we have a function for this. Do this for both training and testing.

# Hyperparameter Optimization 

I do not find any reaqsonable explaination how to use a cross-validation as we are talking about anomaly detection.

I am using the experiment 1 data as train, and experiment 2 data as validation.

# Using SVM in a real-world Scenario with multiple genuine users and intruders
Source: https://datascience.stackexchange.com/questions/23623/what-is-the-best-way-to-classify-data-not-belonging-to-set-of-classes

Stage 1: 
    Use one-class SVM to assign those images that do not belong to the set of predefined classes as the 9-th class.

Stage 2:
    For those images that passes through your filter, let the multi-class SVM assign them to one of the 8 classes.

Loading data:

In [9]:
def MakeWACAXExpDicOwner(X_exp_reg_df_dict, scaler_clip, scaler_type="MinMaxScaler"):
    '''k
    ???
    return 
    X_exp_dic
    dfLists are of the same size.
    '''
        
    return MakeXExpDicOwner(X_exp_reg_df_dict, scaler_clip=scaler_clip, scaler_type="MinMaxScaler",
                            feature_extractor=WACA_feature_extractor, feature_extractor_transformer=transform_user_WACA_windows)
    

    
def MakeWACAXExpDicUnknown(X_exp_unknown_df_dict, X_exp_dic, fitted_raw_scaler_dict):
    '''k
    ???
    return 
    X_exp_dic
    dfLists are of the same size.
    '''  
        
    return MakeXExpDicUnknown(X_exp_unknown_df_dict, X_exp_dic, fitted_raw_scaler_dict, 
                              feature_extractor=WACA_feature_extractor, feature_extractor_transformer=transform_user_WACA_windows)


def WACA_feature_extractor(X_exp):
    a = []
    
    for window in X_exp:
        a.append(signal_to_encoding(window)[1])
        
    return np.array(a)



def transform_user_WACA_windows(X_exp, fitted_scaler):
    
    
    transformed_X_exp = []
    
#     print(X_exp[0].shape)
    for window in X_exp:
        if len(window.shape) == 1:
            window = window.reshape(1, -1)
        scaled_array = fitted_scaler.transform(window)
        transformed_X_exp.append(scaled_array.reshape(-1))
        
    return np.array(transformed_X_exp)

In [3]:
print(20*'-' + "\x1b[32mWACA utility functions imported\x1b[0m" + 20*'-')

--------------------[32mWACA utility functions imported[0m--------------------
