In [1]:
import joblib
joblib.cpu_count()

32

In [19]:
# !pip install --upgrade pip

from joblib import Parallel, delayed

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import dataclasses
from sklearn.svm import OneClassSVM
from dataclasses import asdict
from tqdm import tqdm
import warnings
import random
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import VarianceThreshold # Feature selector
import ast

pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
import seaborn as sns


# Global utitlity functions are in separate notebook
%run ./Classification_utility-functions.ipynb
%run ./SEED-CONSTANTS.ipynb

np.random.seed(SEED)
print(f"Numpy Seed was set to: {SEED}")



SAVE_PATH_NAME="SCNN_trained_models"
CLASSIFIER_TYPE_LST=["OCSVM", "kNN", "LOF"]#["OCSVM", "kNN", "IF", "LOF"]
# WINDOW_SIZE_LST = [1750, 2000]#[125, 250, 500, 750, 1000, 1250, 1500, 1750, 2000]
OVERLAP=0.5

TRAINING_CONFIG_DICT_FILE_NAME="model_archi_performance_lr_dict.json"
TRAINING_CONFIG_DICT_FOLDER_PATH="siamese_cnn_results_final"
with open(f"{TRAINING_CONFIG_DICT_FOLDER_PATH}/{TRAINING_CONFIG_DICT_FILE_NAME}", 'r') as file:
    TRAINING_CONFIG_DICT=json.load(file)
    
    
print("Setup Complete")

[32mSEED: 567[0m
--------------------[32mSEED and CONSTANTS imported[0m--------------------
[32mSEED: 567[0m
Python 3.9.10
EER: 0.333, Threshold: 0.600 <-- Arbitrary case
EER: 0.000, Threshold: 0.900 <-- Best case
EER: 1.000, Threshold: 0.900 <-- Worse case
EER: 0.400, Threshold: 0.200 <-- Worse case
EER: 0.167, Threshold: 0.600 <-- Arbitrary case
EER: 0.000, Threshold: 0.900 <-- Best case
EER: 1.000, Threshold: 0.900 <-- Worse case
EER: 0.333, Threshold: 1.000 <-- Worse case
--------------------[32mUtility functions imported[0m--------------------
[32mSEED: 567[0m
--------------------[32mSEED and CONSTANTS imported[0m--------------------
[32mSEED: 567[0m
Python 3.9.10
--------------------[32mPreprocessing utility functions imported[0m--------------------
[32mSEED: 567[0m
--------------------[32mSEED and CONSTANTS imported[0m--------------------
[32mSEED: 567[0m
Python 3.9.10
--------------------[32mNeural Networks utility functions imported[0m------------------

In [3]:
import sklearn
sklearn.__dir__()
sklearn.__version__

'1.0.2'

In [4]:
@dataclasses.dataclass
class ExperimentParameters:
    """Contains all relevant parameters to run an experiment."""

    name: str  # Name of Parameter set. Used as identifier for charts etc.
    frequency: int
    max_subjects: int
    max_test_subjects: int
        
    user_ids: list
    num_sample_points_per_exp: int
    exp_begin_cutoff_idx: int
    exp_end_cutoff_idx: int
        
    
    seconds_per_subject_train: float
    seconds_per_subject_test: float
    window_size: int  # After resampling
    ocsvm_step_width: int  # After resampling
    scaler: str  # StandardScaler, MinMaxScaler, Normalizer, MaxAbsScaler, RobustScaler, PowerTransformer
    scaler_scope: str  # {"subject", "session"}
    scaler_global: bool  # fit transform scale on all data (True) or fit on training only (False)
    ocsvm_kernel: str # ocsvm kernel
    ocsvm_nu: float  # Best value found in random search, used for final model
    ocsvm_gamma: float  # Best value found in random search, used for final model
    feature_cols: list  # Columns used as features
    exclude_subjects: list  # Don't load data from those users
        
    # Calculated values
    def __post_init__(self):
        # HDF key of table:
        self.table_name = f"sensors_{self.frequency}hz"

        

# INSTANCES
# ===========================================================

# NAIVE_APPROACH
# -----------------------------------------------------------
NAIVE_MINMAX_OCSVM = ExperimentParameters(
    name="NAIVE-MINMAX_OCSVM",
    frequency=100,
    max_subjects=29,
    max_test_subjects=10,
    user_ids = [1, 2, 3, 4, 5, 6, 7, 8, 19, 21, 22, 26, 27, 28, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49],
    num_sample_points_per_exp=21000,
    exp_begin_cutoff_idx=500,
    exp_end_cutoff_idx=-500,
    seconds_per_subject_train=210,
    seconds_per_subject_test=210,    
    window_size=250,
    ocsvm_step_width=250,
    scaler="minmax",
    scaler_scope="subject",
    scaler_global=True,
    ocsvm_kernel="rbf",
    ocsvm_nu=None,
    ocsvm_gamma=None,
    feature_cols=[
        "x_a",
        "y_a",
        "z_a",
        "x_g",
        "y_g",
        "z_g",
    ],
    exclude_subjects=[],
)

# VALID_APPROACH
# -----------------------------------------------------------
VALID_MINMAX_OCSVM = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-MINMAX-OCSVM",
    scaler_global=False,
    ocsvm_nu=0.165,
    ocsvm_gamma=0.039,
)

# NAIVE_ROBUST_APPROACH
# -----------------------------------------------------------
NAIVE_ROBUST_OCSVM = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="NAIVE-ROBUST-OCSVM",
    scaler="robust",
    scaler_global=True,
    ocsvm_nu=0.153,
    ocsvm_gamma=0.091,  # below median, selected by chart
)

# ROBUST_APPROACH (VALID)
# -----------------------------------------------------------
VALID_ROBUST_OCSVM_125 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=125
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)


VALID_ROBUST_OCSVM_250 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=250
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_500 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=500
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_750 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=750
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_1000 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=1000
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_1250 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=1250
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_1500 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=1500
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_1750 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=1750
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_2000 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=2000
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

# NORMALIZER_APPROACH (VALID)
# -----------------------------------------------------------
VALID_NORMALIZER_OCSVM = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-NORMALIZER-OCSVM",
    scaler="Normalizer",
    scaler_global=False,
    ocsvm_nu=0.074,
    ocsvm_gamma= 0.029,
)

In [5]:
P = VALID_ROBUST_OCSVM_2000
P.ocsvm_step_width = int(P.window_size * .5)


In [6]:
P.smoothing = "Butterworth"


preprocessing_method="Realworld-per_unknown_window"
for clf_type in CLASSIFIER_TYPE_LST:
    rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
    rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)
    print(rival_test_hyperparameters_df["cut_off_freq"])


0    49
1    49
2    49
3    49
4    49
5    49
6    49
7    49
8    49
Name: cut_off_freq, dtype: int64
0    33
1    33
2    33
3    33
4    33
5    33
6    33
7    33
8    33
Name: cut_off_freq, dtype: int64
0    48
1    48
2    48
3    48
4    48
5    48
6    48
7    48
8    48
Name: cut_off_freq, dtype: int64
0    33
1    33
2    33
3    33
4    33
5    33
6    33
7    33
8    33
Name: cut_off_freq, dtype: int64


In [7]:
utils_ppp(P)

Unnamed: 0,Value
name,VALID-ROBUST-OCSVM
frequency,100
max_subjects,29
max_test_subjects,10
user_ids,"[1, 2, 3, 4, 5, 6, 7, 8, 19, 21, 22, 26, 27, 2..."
num_sample_points_per_exp,21000
exp_begin_cutoff_idx,500
exp_end_cutoff_idx,-500
seconds_per_subject_train,210
seconds_per_subject_test,210


In [8]:
utils_eer_scorer

make_scorer(utils_eer, greater_is_better=False)

In [15]:
#include 47 later
# user_ids = [9]
df_exps_dict = load_data_frames(P.user_ids, P.exp_begin_cutoff_idx, P.exp_end_cutoff_idx, P.num_sample_points_per_exp)
raw_dfList_exp1, raw_dfList_exp2 = df_exps_dict['dfList_exp1'], df_exps_dict['dfList_exp2']

Loading exp1 data:
1) accel_count: 28388, gyro_count: 31997
2) accel_count: 26010, gyro_count: 28954
3) accel_count: 28227, gyro_count: 31814
4) accel_count: 24860, gyro_count: 26105
5) accel_count: 24270, gyro_count: 24347
6) accel_count: 25012, gyro_count: 25060
7) accel_count: 25301, gyro_count: 25382
8) accel_count: 21975, gyro_count: 21658
19) accel_count: 24110, gyro_count: 25050
21) accel_count: 24326, gyro_count: 23809
22) accel_count: 29123, gyro_count: 28724
26) accel_count: 23148, gyro_count: 24291
27) accel_count: 24299, gyro_count: 23589
28) accel_count: 23807, gyro_count: 24523
29) accel_count: 24030, gyro_count: 23457
35) accel_count: 24388, gyro_count: 23673
36) accel_count: 24228, gyro_count: 24208
37) accel_count: 31945, gyro_count: 31816
38) accel_count: 22135, gyro_count: 22327
39) accel_count: 23573, gyro_count: 23459
40) accel_count: 23057, gyro_count: 24296
41) accel_count: 24102, gyro_count: 23681
42) accel_count: 24074, gyro_count: 24328
43) accel_count: 22631,

In [16]:
randomized_data_idx = list(range(len(P.user_ids)))
random.Random(SEED).shuffle(randomized_data_idx)
split_idx = 2 * (len(randomized_data_idx)//3) + 1
train_set = randomized_data_idx[: split_idx]
test_set = randomized_data_idx[split_idx: ]
# train_set = randomized_data_idx
print(f"train_set: {train_set}\ntest_set: {test_set}")
# train_set = test_set
# test_set = train_set
print(f"train_set: {train_set}\ntest_set: {test_set}")

train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5]
test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]
train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5]
test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]


In [17]:
num_sample_points_per_exp_user_47 = 18000
df_exps_dict_user_47 = load_data_frames([47], P.exp_begin_cutoff_idx, P.exp_end_cutoff_idx, num_sample_points_per_exp_user_47)
dfList_exp1_user_47, dfList_exp2_user_47 = df_exps_dict_user_47['dfList_exp1'], df_exps_dict_user_47['dfList_exp2']

raw_dfList_exp1_user_47 = dfList_exp1_user_47
raw_dfList_exp2_user_47 = dfList_exp2_user_47

Loading exp1 data:
47) accel_count: 22777, gyro_count: 22226
Loading exp2 data:
47) accel_count: 17718, gyro_count: 18353


In [9]:
print(f"train_set: {train_set}")
# print(f"X_exp1_train_dic: {X_exp1_train_dic.keys()}")
# print(f"X_exp2_train_dic: {X_exp2_train_dic.keys()}")

train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5]


In [10]:
print(f"test_set: {test_set}")
# print(f"X_exp1_test_dic: {X_exp1_test_dic.keys()}")
# print(f"X_exp2_test_dic: {X_exp2_test_dic.keys()}")

test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]


In [14]:
rival_test_hyperparameters_df

Unnamed: 0,window_size,step_width,Mean_EER,median_n_neighbors,median_contamination,cut_off_freq
0,125,62,0.213122,10,0.05005,33
1,250,125,0.165602,14,0.043934,33
2,500,250,0.145783,17,0.049031,33
3,750,375,0.134444,12,0.049031,33
4,1000,500,0.135637,12,0.055147,33
5,1250,625,0.139931,13,0.051069,33
6,1500,750,0.126749,11,0.044953,33
7,1750,875,0.132126,10,0.049031,33
8,2000,1000,0.133056,11,0.046992,33


In [74]:
TRAINING_CONFIG_DICT.keys()

dict_keys(['125', '250', '500', '750', '1000', '1250', '1500', '1750', '2000'])

In [75]:
TRAINING_CONFIG_DICT['125'].keys()

dict_keys(['SCNN_1_3_conv_1_dense_arg_dict_default', 'SCNN_3_123_conv_1_dense_arg_dict_default', 'SCNN_3_1_conv_1_dense_arg_dict_default', 'SCNN_1_2_conv_1_dense_arg_dict_default', 'SCNN_1_1_conv_1_dense_arg_dict_default', 'SCNN_1_2_conv_2_dense_arg_dict_default', 'SCNN_1_5_conv_1_dense_arg_dict_default', 'SCNN_4_1234_conv_1_dense_arg_dict_default'])

In [76]:
TRAINING_CONFIG_DICT['125']['SCNN_1_3_conv_1_dense_arg_dict_default'].keys()

dict_keys(['lr_epoch_log_dict', 'loss_record_dict', 'metric_record_dict', 'report_dict', 'ReduceLROnPlateau_args', 'arg_dict'])

In [77]:
TRAINING_CONFIG_DICT['125']['SCNN_1_3_conv_1_dense_arg_dict_default']["lr_epoch_log_dict"]

'{0: 0.001, 1: 0.001, 2: 0.001, 3: 0.001, 4: 0.001, 5: 0.001, 6: 0.001, 7: 0.001, 8: 0.001, 9: 0.001, 10: 0.001, 11: 0.001, 12: 0.001, 13: 0.0005, 14: 0.0005, 15: 0.00025, 16: 0.00025, 17: 0.000125, 18: 0.000125, 19: 0.000125, 20: 6.25e-05, 21: 6.25e-05, 22: 6.25e-05, 23: 3.125e-05, 24: 1.5625e-05, 25: 1.5625e-05, 26: 7.8125e-06, 27: 7.8125e-06, 28: 3.90625e-06, 29: 3.90625e-06}'

In [78]:
len(ast.literal_eval(TRAINING_CONFIG_DICT['125']['SCNN_1_3_conv_1_dense_arg_dict_default']["lr_epoch_log_dict"]))
os.path.isdir(f"final_experiments_results/{SAVE_PATH_NAME}/{model_classifier_type}/{exp_num}/{archi_name}/{winsize}")

NameError: name 'model_classifier_type' is not defined

In [6]:
def custom_save_model_exp_notebook(model,
                                   model_classifier_type, exp_num, archi_name, winsize, 
                                   arg_dict, loss_record_dict, metric_record_dict, other_dict=None):
    """EXP_PATH_NAME
    save the deep learning feature extractor model, along with dictionary of arguments as a json,
    best epoch found, a dictionary containing the accurcy and EER figures, and the window size, and
    a dicgionary of validation and training loss values over time that can be later plotted.
    inputs: 
    deep_feature_model: tf model
    arg_dict: serializable dictionary
    test_res_fig_dic: dict containing keys {"acc", "eer"}
    win_size: int
    loss_record_dict
    """
        
    base_path=f"final_experiments_results/{SAVE_PATH_NAME}/{model_classifier_type}/{exp_num}"
    path_to_archi_name=base_path+f"/{archi_name}"
    final_path=path_to_archi_name+f"/{winsize}"
    
    if not os.path.isdir(base_path):
        raise Exception(f"Base path does not exist: {base_path}")
        
    if not os.path.isdir(path_to_archi_name):
        os.mkdir(path_to_archi_name)

    if not os.path.isdir(final_path):
        os.mkdir(final_path)
        
    model.save(final_path+f"/{model_classifier_type}-{exp_num}-{archi_name}-{winsize}-model")
    deep_feature_model = extract_deep_feature_extactor(model)
    deep_feature_model.save(final_path+f"/{model_classifier_type}-{exp_num}-{archi_name}-{winsize}-deep_feature_extractor")
    
    
    with open(f"{final_path}/arg_dict.json", 'w') as file:
        arg_dict_json = json.dumps(arg_dict)
        file.write(arg_dict_json)
        
    with open(f"{final_path}/loss_record.json", 'w') as file:
        loss_record_json = json.dumps(loss_record_dict)
        file.write(loss_record_json)
        
    with open(f"{final_path}/metric_record.json", 'w') as file:
        metric_record_json = json.dumps(metric_record_dict)
        file.write(metric_record_json)
    
    if other_dict != None:
        with open(f"{final_path}/other_dict.json", 'w') as file:
            other_dict_json = json.dumps(other_dict)
            file.write(other_dict_json)

        
    fig_dict = utils_plot_validation_metric(metric_record_dict)
    for metric in fig_dict:
        fig = fig_dict[metric]
        fig.savefig(f'{final_path}/{metric}_epoch.svg', bbox_inches='tight')
    
    print(f"saved model at {final_path}")

In [7]:
# np.logspace(-4, -1, 50)

In [8]:
# init_experiment_params(exp_config=P)


In [10]:
def model_training_with_training_config_dict(dfList_dict, window_size_lst, train_set, exp_config, overlap, training_config_dict, save_info_dict):
    
    model_classifier_type = save_info_dict["model_classifier_type"]
    exp_num = save_info_dict["exp_num"]
    
    for window_size in tqdm(window_size_lst):
        
        np.random.seed(SEED)
        tf.random.set_seed(SEED)
        print(f"Seed was set to: {SEED}")

        if str(window_size) in training_config_dict:
            winsize_training_config_dict=training_config_dict[str(window_size)]
        else:
            raise Exception("Window size not in training_config_dict")
        
        exp_config.window_size=window_size
        exp_config.nn_step_width = int(exp_config.window_size * overlap)
        exp_config.scaler = 'RobustScaler'
        exp_config.model_variant = 'multi_head_fcn'

        dfList_exp1=dfList_dict["dfList_exp1"]
        dfList_exp2=dfList_dict["dfList_exp2"]
        dfList_exp1_user_47=dfList_dict["dfList_exp1_user_47"]
        dfList_exp2_user_47=dfList_dict["dfList_exp2_user_47"]

        # preparing train data
        X_train_exp1_dict, X_train_exp2_dict, fitted_scaler_train_exp2_dict=get_raw_windows(dfList_exp1, dfList_exp2, window_size, step_width=exp_config.nn_step_width, 
                                                                                            user_idx_set=train_set, scaler=exp_config.scaler, 
                                                                                            num_sample_points_per_exp=exp_config.num_sample_points_per_exp, 
                                                                                            EMA_per_win_span=exp_config.EMA_per_win_span, 
                                                                                            SMA_per_win_winsize=exp_config.SMA_per_win_winsize,
                                                                                            Butter_per_win_argdict=exp_config.Butter_per_win_argdict, 
                                                                                            verbose=0)

        X_train_exp1_dict_user_47, X_train_exp2_dict_user_47, fitted_scaler_train_exp2_dict_user_47=get_raw_windows_user_47(dfList_exp1_user_47, dfList_exp2_user_47, 
                                                                                                                            window_size, step_width=exp_config.nn_step_width, 
                                                                                                                            scaler=exp_config.scaler, 
                                                                                                                            num_sample_points_per_exp=exp_config.num_sample_points_per_exp, 
                                                                                                                            EMA_per_win_span=exp_config.EMA_per_win_span, 
                                                                                                                            SMA_per_win_winsize=exp_config.SMA_per_win_winsize,
                                                                                                                            Butter_per_win_argdict=exp_config.Butter_per_win_argdict, 
                                                                                                                            verbose=0)

        X_train_exp1_dict, X_train_exp2_dict, fitted_scaler_train_exp2_dict=append_user_47_to_data(X_train_exp1_dict, X_train_exp2_dict, fitted_scaler_train_exp2_dict, exp_config.user_ids, 
                                                                                                   X_train_exp1_dict_user_47, X_train_exp2_dict_user_47, fitted_scaler_train_exp2_dict_user_47, 
                                                                                                   verbose=0)


        # why dont i get equal neg and pos pairs???
        # num_pair_limit_train_2000 = 10348
        # num_pair_limit_valid_2000 = 5614
        # num_pair_limit_train_125 = 8*num_pair_limit_train_2000
        # num_pair_limit_valid_125 = 8*num_pair_limit_valid_2000
        num_samples=10348+5614#28000

        train_pairs_dict = prep_X_y_pair(X_train_exp2_dict, X_train_exp1_dict, list(X_train_exp2_dict.keys()), fitted_scaler_train_exp2_dict, num_pair_limit=num_samples)
        X_train, y_train, X_train_distro_dic = train_pairs_dict["X"], train_pairs_dict["y"], train_pairs_dict["X_dic"]


        # 2D Filter Model needs flat 4th dimension
        if exp_config.model_variant == "2d":
            X_train[0] = X_train[0].reshape((*X_train[0].shape, 1))
            X_train[1] = X_train[1].reshape((*X_train[1].shape, 1))

        print(
            f"Training samples:   {y_train.shape[0]}, shape: {X_train[0].shape},"
            + f" class balance: {np.unique(y_train, return_counts=True)}"
        )



        for archi_name in tqdm(winsize_training_config_dict):
            np.random.seed(SEED)
            tf.random.set_seed(SEED)
            print(archi_name)
            archi_dict=winsize_training_config_dict[archi_name]
            arg_dict=archi_dict["arg_dict"]
            optimal_lr_epoch_dict=ast.literal_eval(archi_dict["lr_epoch_log_dict"])

            create_model_func = get_create_model_func(exp_config.model_variant, exp_config.window_size, exp_config.feature_cols)

            print(arg_dict)
            loss_record_dict = {'loss': [], 'val_loss': []}
            metric_record_dict = {}
            model = create_model_func(arg_dict)

            # Train
            history = model.fit(
                x=X_train,
                y=y_train,
                batch_size=arg_dict["batch_size"],
                epochs=len(optimal_lr_epoch_dict), #depends on the len optimal_lr_epoch_dict
                verbose=1,
                shuffle=True,
                callbacks=[MetricsCallback((None, None, X_train, y_train), loss_record_dict=loss_record_dict, metric_record_dict=metric_record_dict, 
                                           epoch_evaluate_freq=10, save_plots=False, print_interm_epochs=False, early_stoping=False, 
                                           optimal_lr_epoch_dict=optimal_lr_epoch_dict,
                                           verbose=0)],
            )
            print(loss_record_dict)
            print("Training History:")
            loss_fig = utils_plot_training_loss(loss_record_dict)



            custom_save_model_exp_notebook(model=model,
                                           model_classifier_type=model_classifier_type, exp_num=exp_num, archi_name=archi_name, winsize=window_size, 
                                           arg_dict=arg_dict, loss_record_dict=loss_record_dict, metric_record_dict=metric_record_dict, 
                                           other_dict=None)
            del model
            del history
            K.clear_session()
            tf.compat.v1.reset_default_graph()

In [11]:
# train_config_dict={winsize_str: {"SCNN_1_5_conv_1_dense_arg_dict_default": TRAINING_CONFIG_DICT[winsize_str]["SCNN_1_5_conv_1_dense_arg_dict_default"]} 
#                    for winsize_str in TRAINING_CONFIG_DICT.keys()}

In [12]:
# train_config_dict['125'].keys()

In [13]:
# print(f"train_set: {train_set+[47]}")
# print(f"test_set: {test_set}")
# P.smoothing = "Butterworth"

# preprocessing_method="Naive"
# time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")




# init_experiment_params(exp_config=P)







# for clf_type in tqdm(["OCSVM"]):
    
#     rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
#     rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)
    
#     P.cut_off_freq=rival_test_hyperparameters_df["cut_off_freq"][0]
#     print(f"cut_off_freq: {P.cut_off_freq}")

#     ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
#     ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

#     ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
#     ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)


#     dfList_dict={
#                 "dfList_exp1": ffted_dfList_exp1,
#                 "dfList_exp2": ffted_dfList_exp2,
#                 "dfList_exp1_user_47": ffted_dfList_exp1_user_47,
#                 "dfList_exp2_user_47": ffted_dfList_exp2_user_47
#     }
    
#     save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_1_1}
#     model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
#                                              training_config_dict=train_config_dict, save_info_dict=save_info_dict)

# 0. No Smoothing

### Optimizing and Testing

In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = None

preprocessing_method=None
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")


init_experiment_params(exp_config=P)



for clf_type in tqdm(CLASSIFIER_TYPE_LST):


    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": raw_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": raw_dfList_exp2_user_47
    }
    
    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_0}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)

# 1. Butterworth frequency Cut-off

## 1.1 Naive Approach
### Optimizing and Testing

In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butterworth"

preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")


init_experiment_params(exp_config=P)




for clf_type in tqdm(CLASSIFIER_TYPE_LST):
    
    rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
    rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)
    
    P.cut_off_freq=rival_test_hyperparameters_df["cut_off_freq"][0]
    print(f"cut_off_freq: {P.cut_off_freq}")

    ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)


    dfList_dict={
                "dfList_exp1": ffted_dfList_exp1,
                "dfList_exp2": ffted_dfList_exp2,
                "dfList_exp1_user_47": ffted_dfList_exp1_user_47,
                "dfList_exp2_user_47": ffted_dfList_exp2_user_47
    }
    
    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_1_1}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)

train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5, 47]
test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]
reseting experiment params successful!


  0%|          | 0/3 [00:00<?, ?it/s]

cut_off_freq: 41



  0%|          | 0/9 [00:00<?, ?it/s][A

Seed was set to: 567
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 337
len_exp2_user_47: 289




  0%|          | 0/20 [00:00<?, ?it/s][A[A

  5%|▌         | 1/20 [00:16<05:06, 16.12s/it][A[A

 10%|█         | 2/20 [00:32<04:52, 16.22s/it][A[A

 15%|█▌        | 3/20 [00:48<04:33, 16.10s/it][A[A

 20%|██        | 4/20 [01:04<04:16, 16.03s/it][A[A

 25%|██▌       | 5/20 [01:20<03:59, 15.97s/it][A[A

 30%|███       | 6/20 [01:36<03:43, 15.96s/it][A[A

 35%|███▌      | 7/20 [01:51<03:27, 15.93s/it][A[A

 40%|████      | 8/20 [02:08<03:12, 16.05s/it][A[A

 45%|████▌     | 9/20 [02:24<02:55, 15.99s/it][A[A

 50%|█████     | 10/20 [02:40<02:39, 15.99s/it][A[A

 55%|█████▌    | 11/20 [02:55<02:23, 15.96s/it][A[A

 60%|██████    | 12/20 [03:11<02:07, 15.94s/it][A[A

 65%|██████▌   | 13/20 [03:27<01:51, 15.92s/it][A[A

 70%|███████   | 14/20 [03:43<01:35, 15.99s/it][A[A

 75%|███████▌  | 15/20 [03:59<01:19, 15.94s/it][A[A

 80%|████████  | 16/20 [04:15<01:03, 15.92s/it][A[A

## 1.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butterworth"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
    
    

init_experiment_params(exp_config=P)






P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }





for clf_type in tqdm(CLASSIFIER_TYPE_LST):
    
    rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
    rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)
    
    P.cut_off_freq=rival_test_hyperparameters_df["cut_off_freq"][0]
    
    P.Butter_per_win_argdict["cut_off_freq"]=P.cut_off_freq
    print(f"cut_off_freq: {P.cut_off_freq}")

    
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": ffted_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": ffted_dfList_exp2_user_47
    }

    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_1_2}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)

# 2. Butterworth frequency Cut-off + EMA span
## 2.1 Naive Approach
### Optimizing and Testing

In [37]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+EMA"


preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")


rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

P.cut_off_freq=rival_test_hyperparameters_df["cut_off_freq"][0]
P.span=rival_test_hyperparameters_df["EMA_span"][0]

print(f"cut_off_freq: {P.cut_off_freq}, EMA span: {P.span}")
rival_test_hyperparameters_df

train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5, 47]
test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]
cut_off_freq: 39, EMA span: 41


Unnamed: 0,window_size,step_width,Mean_EER,median_n_neighbors,median_contamination,cut_off_freq,EMA_span
0,125,62,0.18274,7,0.051069,39,41
1,250,125,0.142083,9,0.051069,39,41
2,500,250,0.135877,13,0.053108,39,41
3,750,375,0.141515,14,0.049031,39,41
4,1000,500,0.138618,13,0.049031,39,41
5,1250,625,0.13316,13,0.049031,39,41
6,1500,750,0.131276,12,0.049031,39,41
7,1750,875,0.147343,13,0.046992,39,41
8,2000,1000,0.154167,11,0.040876,39,41


In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+EMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")




init_experiment_params(exp_config=P)









for clf_type in tqdm(CLASSIFIER_TYPE_LST):
    
    rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
    rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

    P.cut_off_freq=rival_test_hyperparameters_df["cut_off_freq"][0]
    P.span=rival_test_hyperparameters_df["EMA_span"][0]

    print(f"cut_off_freq: {P.cut_off_freq}, EMA span: {P.span}")

    
    ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    EMAed_dfList_exp1 = get_EMAed_dfList(ffted_dfList_exp1, span=P.span)
    EMAed_dfList_exp2 = get_EMAed_dfList(ffted_dfList_exp2, span=P.span)
    
    ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    EMAed_dfList_exp1_user_47 = get_EMAed_dfList(ffted_dfList_exp1_user_47, span=P.span)
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(ffted_dfList_exp2_user_47, span=P.span)
    
    dfList_dict={
                "dfList_exp1": EMAed_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": EMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    

    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_2_1}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)

## 2.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+EMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")



init_experiment_params(exp_config=P)





P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }





for clf_type in tqdm(CLASSIFIER_TYPE_LST):
    
    rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
    rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

    P.cut_off_freq=rival_test_hyperparameters_df["cut_off_freq"][0]
    P.span=rival_test_hyperparameters_df["EMA_span"][0]

    print(f"cut_off_freq: {P.cut_off_freq}, EMA span: {P.span}")
    
    P.Butter_per_win_argdict["cut_off_freq"]=P.cut_off_freq
    P.EMA_per_win_span=P.span


    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    EMAed_dfList_exp2 = get_EMAed_dfList(ffted_dfList_exp2, span=P.span)
    
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(ffted_dfList_exp2_user_47, span=P.span)
    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    


    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_2_2}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)

# 3. EMA span
## 3.1 Naive Approach
### Optimizing and Testing

In [40]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "EMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")

rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

P.span=rival_test_hyperparameters_df["EMA_span"][0]

print(f"EMA span: {P.span}")
rival_test_hyperparameters_df

train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5, 47]
test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]
EMA span: 49


Unnamed: 0,window_size,step_width,Mean_EER,median_n_neighbors,median_contamination,EMA_span
0,125,62,0.160386,9,0.049031,49
1,250,125,0.135529,12,0.051069,49
2,500,250,0.12838,11,0.049031,49
3,750,375,0.128788,16,0.049031,49
4,1000,500,0.131301,9,0.049031,49
5,1250,625,0.140104,15,0.046992,49
6,1500,750,0.150206,14,0.049031,49
7,1750,875,0.154831,13,0.049031,49
8,2000,1000,0.186389,11,0.046992,49


In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "EMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
    
    
init_experiment_params(exp_config=P)







for clf_type in tqdm(CLASSIFIER_TYPE_LST):
    
    rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
    rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

    P.span=rival_test_hyperparameters_df["EMA_span"][0]
    print(f"EMA span: {P.span}")

    
    EMAed_dfList_exp1 = get_EMAed_dfList(raw_dfList_exp1, span=P.span)
    EMAed_dfList_exp2 = get_EMAed_dfList(raw_dfList_exp2, span=P.span)
    
    EMAed_dfList_exp1_user_47 = get_EMAed_dfList(raw_dfList_exp1_user_47, span=P.span)
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(raw_dfList_exp2_user_47, span=P.span)

    
    dfList_dict={
                "dfList_exp1": EMAed_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": EMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    

    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_3_1}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)


## 3.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "EMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")


init_experiment_params(exp_config=P)






for clf_type in tqdm(CLASSIFIER_TYPE_LST):
    
    rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
    rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

    P.span=rival_test_hyperparameters_df["EMA_span"][0]
    print(f"EMA span: {P.span}")

    P.EMA_per_win_span=P.span

    EMAed_dfList_exp2 = get_EMAed_dfList(raw_dfList_exp2, span=P.span)
    
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(raw_dfList_exp2_user_47, span=P.span)

    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    

    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_3_2}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)

# 4. SMA winsize
## 4.1 Naive Approach
### Optimizing and Testing

In [43]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "SMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")

rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

P.winsize=rival_test_hyperparameters_df["SMA_winsize"][0]
print(f"SMA winsize: {P.winsize}")

rival_test_hyperparameters_df

train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5, 47]
test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]
SMA winsize: 49


Unnamed: 0,window_size,step_width,Mean_EER,median_n_neighbors,median_contamination,SMA_winsize
0,125,62,0.169453,9,0.053108,49
1,250,125,0.146008,12,0.048011,49
2,500,250,0.146319,16,0.049031,49
3,750,375,0.141515,12,0.049031,49
4,1000,500,0.153523,13,0.049031,49
5,1250,625,0.138542,12,0.053108,49
6,1500,750,0.182922,16,0.049031,49
7,1750,875,0.170773,11,0.055147,49
8,2000,1000,0.181111,9,0.053108,49


In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "SMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")

init_experiment_params(exp_config=P)

    
    







for clf_type in tqdm(CLASSIFIER_TYPE_LST):
    
    rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
    rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

    P.winsize=rival_test_hyperparameters_df["SMA_winsize"][0]
    print(f"SMA winsize: {P.winsize}")


    SMAed_dfList_exp1 = get_SMAed_dfList(raw_dfList_exp1, winsize=P.winsize)
    SMAed_dfList_exp2 = get_SMAed_dfList(raw_dfList_exp2, winsize=P.winsize)
    
    SMAed_dfList_exp1_user_47 = get_SMAed_dfList(raw_dfList_exp1_user_47, winsize=P.winsize)
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(raw_dfList_exp2_user_47, winsize=P.winsize)

    
    dfList_dict={
                "dfList_exp1": SMAed_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": SMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_4_1}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)

## 4.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "SMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
init_experiment_params(exp_config=P)

    
    







for clf_type in tqdm(CLASSIFIER_TYPE_LST):
    rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
    rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

    P.winsize=rival_test_hyperparameters_df["SMA_winsize"][0]
    print(f"SMA winsize: {P.winsize}")

    P.SMA_per_win_winsize=P.winsize


    SMAed_dfList_exp2 = get_SMAed_dfList(raw_dfList_exp2, winsize=P.winsize)
    
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(raw_dfList_exp2_user_47, winsize=P.winsize)

    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_4_2}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)

# 5. Butterworth frequency Cut-off + SMA winsize
## 5.1 Naive Approach
### Optimizing and Testin

In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+SMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")


rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

P.winsize=rival_test_hyperparameters_df["SMA_winsize"][0]
P.cut_off_freq=rival_test_hyperparameters_df["cut_off_freq"][0]
print(f"cut_off_freq: {P.cut_off_freq}, winsize: {P.winsize}")
rival_test_hyperparameters_df

In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+SMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
init_experiment_params(exp_config=P)

    












for clf_type in tqdm(CLASSIFIER_TYPE_LST):
    
    rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
    rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

    P.winsize=rival_test_hyperparameters_df["SMA_winsize"][0]
    P.cut_off_freq=rival_test_hyperparameters_df["cut_off_freq"][0]
    print(f"cut_off_freq: {P.cut_off_freq}, winsize: {P.winsize}")

    
    ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    SMAed_dfList_exp1 = get_SMAed_dfList(ffted_dfList_exp1, winsize=P.winsize)
    SMAed_dfList_exp2 = get_SMAed_dfList(ffted_dfList_exp2, winsize=P.winsize)
    
    ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    SMAed_dfList_exp1_user_47 = get_SMAed_dfList(ffted_dfList_exp1_user_47, winsize=P.winsize)
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(ffted_dfList_exp2_user_47, winsize=P.winsize)
    
    dfList_dict={
                "dfList_exp1": SMAed_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": SMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_5_1}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)

## 5.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+SMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
init_experiment_params(exp_config=P)









P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }





for clf_type in tqdm(CLASSIFIER_TYPE_LST):
    
    rival_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/WACA-{clf_type}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
    rival_test_hyperparameters_df = pd.read_json(rival_test_hyperparameters_file_name)

    P.winsize=rival_test_hyperparameters_df["SMA_winsize"][0]
    P.cut_off_freq=rival_test_hyperparameters_df["cut_off_freq"][0]
    print(f"cut_off_freq: {P.cut_off_freq}, winsize: {P.winsize}")
    
    P.Butter_per_win_argdict["cut_off_freq"]=P.cut_off_freq
    P.SMA_per_win_winsize=P.winsize

    
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    SMAed_dfList_exp2 = get_SMAed_dfList(ffted_dfList_exp2, winsize=P.winsize)
    
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(ffted_dfList_exp2_user_47, winsize=P.winsize)
    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_5_2}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=OVERLAP, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)

# 6. The effect of Varying Overlap

In [None]:
print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")



time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")

init_experiment_params(exp_config=P)





for clf_type in tqdm(CLASSIFIER_TYPE_LST):
    overlap*=0.01
    max_window_size=2000
    step_width = int(max_window_size * (1-overlap))
    max_num_windows=max(len(getIndices(sampleSize=max_window_size, step=step_width, numSamplePoints=P.num_sample_points_per_exp)), 100)
    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": raw_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": raw_dfList_exp2_user_47
    }
    
    save_info_dict={"model_classifier_type": f"SCNN-{clf_type}", "exp_num": EXP_NUM_6}
    model_training_with_training_config_dict(dfList_dict=dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, overlap=overlap, 
                                             training_config_dict=TRAINING_CONFIG_DICT, save_info_dict=save_info_dict)

# Summary

In [None]:
df=return_and_save_final_result_df_as_json(final_exp_results_path=FINAL_EXP_RESULTS_PATH, exp_path_name=EXP_PATH_NAME, window_size_lst=WINDOW_SIZE_LST)
df

In [None]:
relative_df=return_and_save_final_relative_result_df_as_json(df, base_case_index=0, final_exp_results_path=FINAL_EXP_RESULTS_PATH, 
                                                             exp_path_name=EXP_PATH_NAME, window_size_lst=WINDOW_SIZE_LST)
relative_df

In [None]:
df.style.hide(axis='index').to_latex()


In [None]:
relative_df.style.hide(axis='index').to_latex()
