In [29]:
import joblib
EXP_PATH_NAME="WACA-LOF"
joblib.cpu_count()

32

In [30]:
# !pip install --upgrade pip

import os
from joblib import Parallel, delayed

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import dataclasses
from sklearn.svm import OneClassSVM
from dataclasses import asdict
from tqdm import tqdm
import warnings
import random
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import VarianceThreshold # Feature selector

pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
import seaborn as sns


# Global utitlity functions are in separate notebook
%run ./Classification_utility-functions.ipynb
%run ./SEED-CONSTANTS.ipynb

np.random.seed(SEED)
print(f"Numpy Seed was set to: {SEED}")

print("Setup Complete")

[32mSEED: 567[0m
FINAL_EXP_RESULTS_PATH: clip=False_experiments_results
--------------------[32mSEED and CONSTANTS imported[0m--------------------
[32mSEED: 567[0m
Python 3.9.10
EER: 0.333, Threshold: 0.600 <-- Arbitrary case
EER: 0.000, Threshold: 0.900 <-- Best case
EER: 1.000, Threshold: 0.900 <-- Worse case
EER: 0.400, Threshold: 0.200 <-- Worse case
EER: 0.167, Threshold: 0.600 <-- Arbitrary case
EER: 0.000, Threshold: 0.900 <-- Best case
EER: 1.000, Threshold: 0.900 <-- Worse case
EER: 0.333, Threshold: 1.000 <-- Worse case
--------------------[32mUtility functions imported[0m--------------------
[32mSEED: 567[0m
FINAL_EXP_RESULTS_PATH: clip=False_experiments_results
--------------------[32mSEED and CONSTANTS imported[0m--------------------
[32mSEED: 567[0m
Python 3.9.10
--------------------[32mPreprocessing utility functions imported[0m--------------------
[32mSEED: 567[0m
FINAL_EXP_RESULTS_PATH: clip=False_experiments_results
--------------------[32mSEED and 

In [3]:
import sklearn
sklearn.__dir__()
sklearn.__version__

'1.0.2'

In [4]:
@dataclasses.dataclass
class ExperimentParameters:
    """Contains all relevant parameters to run an experiment."""

    name: str  # Name of Parameter set. Used as identifier for charts etc.
    frequency: int
    max_subjects: int
    max_test_subjects: int
        
    user_ids: list
    num_sample_points_per_exp: int
    exp_begin_cutoff_idx: int
    exp_end_cutoff_idx: int
        
    
    seconds_per_subject_train: float
    seconds_per_subject_test: float
    window_size: int  # After resampling
    ocsvm_step_width: int  # After resampling
    scaler: str  # StandardScaler, MinMaxScaler, Normalizer, MaxAbsScaler, RobustScaler, PowerTransformer
    scaler_scope: str  # {"subject", "session"}
    scaler_global: bool  # fit transform scale on all data (True) or fit on training only (False)
    ocsvm_kernel: str # ocsvm kernel
    ocsvm_nu: float  # Best value found in random search, used for final model
    ocsvm_gamma: float  # Best value found in random search, used for final model
    feature_cols: list  # Columns used as features
    exclude_subjects: list  # Don't load data from those users
        
    # Calculated values
    def __post_init__(self):
        # HDF key of table:
        self.table_name = f"sensors_{self.frequency}hz"

        

# INSTANCES
# ===========================================================

# NAIVE_APPROACH
# -----------------------------------------------------------
NAIVE_MINMAX_OCSVM = ExperimentParameters(
    name="NAIVE-MINMAX_OCSVM",
    frequency=100,
    max_subjects=29,
    max_test_subjects=10,
    user_ids = [1, 2, 3, 4, 5, 6, 7, 8, 19, 21, 22, 26, 27, 28, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49],
    num_sample_points_per_exp=21000,
    exp_begin_cutoff_idx=500,
    exp_end_cutoff_idx=-500,
    seconds_per_subject_train=210,
    seconds_per_subject_test=210,    
    window_size=250,
    ocsvm_step_width=250,
    scaler="minmax",
    scaler_scope="subject",
    scaler_global=True,
    ocsvm_kernel="rbf",
    ocsvm_nu=None,
    ocsvm_gamma=None,
    feature_cols=[
        "x_a",
        "y_a",
        "z_a",
        "x_g",
        "y_g",
        "z_g",
    ],
    exclude_subjects=[],
)

# VALID_APPROACH
# -----------------------------------------------------------
VALID_MINMAX_OCSVM = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-MINMAX-OCSVM",
    scaler_global=False,
    ocsvm_nu=0.165,
    ocsvm_gamma=0.039,
)

# NAIVE_ROBUST_APPROACH
# -----------------------------------------------------------
NAIVE_ROBUST_OCSVM = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="NAIVE-ROBUST-OCSVM",
    scaler="robust",
    scaler_global=True,
    ocsvm_nu=0.153,
    ocsvm_gamma=0.091,  # below median, selected by chart
)

# ROBUST_APPROACH (VALID)
# -----------------------------------------------------------
VALID_ROBUST_OCSVM_125 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=125
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)


VALID_ROBUST_OCSVM_250 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=250
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_500 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=500
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_750 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=750
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_1000 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=1000
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_1250 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=1250
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_1500 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=1500
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_1750 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=1750
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

VALID_ROBUST_OCSVM_2000 = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    window_size=2000
#     ocsvm_nu=0.037,
#     ocsvm_gamma= 0.001,
)

# NORMALIZER_APPROACH (VALID)
# -----------------------------------------------------------
VALID_NORMALIZER_OCSVM = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-NORMALIZER-OCSVM",
    scaler="Normalizer",
    scaler_global=False,
    ocsvm_nu=0.074,
    ocsvm_gamma= 0.029,
)

In [5]:
P = VALID_ROBUST_OCSVM_2000
P.ocsvm_step_width = int(P.window_size * .5)
P.classifier="LOF"

P.train_cores=1 # 20 cores for every user and 1 core for the nested crossval function
P.test_cores=2 # 10 cores for every user and 2 for the nested crossval function
# P = VALID_NORMALIZER_OCSVM

param_dist = {
    'model__n_neighbors': np.arange(1, 20),
    'model__contamination': np.linspace(0.0001, 0.1, 50),
    #'metric': ['minkowski', 'manhattan', 'chebyshev', 'hamming', 'cosine']
}
P.LOF_metric="minkowski"
P.p=2

P.scaler_clip=False
P.is_NN=False


In [6]:
param_dist['model__n_neighbors']

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19])

In [7]:
utils_ppp(P)

Unnamed: 0,Value
name,VALID-ROBUST-OCSVM
frequency,100
max_subjects,29
max_test_subjects,10
user_ids,"[1, 2, 3, 4, 5, 6, 7, 8, 19, 21, 22, 26, 27, 2..."
num_sample_points_per_exp,21000
exp_begin_cutoff_idx,500
exp_end_cutoff_idx,-500
seconds_per_subject_train,210
seconds_per_subject_test,210


In [8]:
utils_eer_scorer

make_scorer(utils_eer, greater_is_better=False)

In [9]:
#include 47 later
# user_ids = [9]
df_exps_dict = load_data_frames(P.user_ids, P.exp_begin_cutoff_idx, P.exp_end_cutoff_idx, P.num_sample_points_per_exp)
raw_dfList_exp1, raw_dfList_exp2 = df_exps_dict['dfList_exp1'], df_exps_dict['dfList_exp2']

Loading exp1 data:
1) accel_count: 28388, gyro_count: 31997
2) accel_count: 26010, gyro_count: 28954
3) accel_count: 28227, gyro_count: 31814
4) accel_count: 24860, gyro_count: 26105
5) accel_count: 24270, gyro_count: 24347
6) accel_count: 25012, gyro_count: 25060
7) accel_count: 25301, gyro_count: 25382
8) accel_count: 21975, gyro_count: 21658
19) accel_count: 24110, gyro_count: 25050
21) accel_count: 24326, gyro_count: 23809
22) accel_count: 29123, gyro_count: 28724
26) accel_count: 23148, gyro_count: 24291
27) accel_count: 24299, gyro_count: 23589
28) accel_count: 23807, gyro_count: 24523
29) accel_count: 24030, gyro_count: 23457
35) accel_count: 24388, gyro_count: 23673
36) accel_count: 24228, gyro_count: 24208
37) accel_count: 31945, gyro_count: 31816
38) accel_count: 22135, gyro_count: 22327
39) accel_count: 23573, gyro_count: 23459
40) accel_count: 23057, gyro_count: 24296
41) accel_count: 24102, gyro_count: 23681
42) accel_count: 24074, gyro_count: 24328
43) accel_count: 22631,

In [10]:
randomized_data_idx = list(range(len(P.user_ids)))
random.Random(SEED).shuffle(randomized_data_idx)
split_idx = 2 * (len(randomized_data_idx)//3) + 1
train_set = randomized_data_idx[: split_idx]
test_set = randomized_data_idx[split_idx: ]
# train_set = randomized_data_idx
print(f"train_set: {train_set}\ntest_set: {test_set}")
# train_set = test_set
# test_set = train_set
print(f"train_set: {train_set}\ntest_set: {test_set}")

train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5]
test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]
train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5]
test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]


In [11]:
num_sample_points_per_exp_user_47 = 18000
df_exps_dict_user_47 = load_data_frames([47], P.exp_begin_cutoff_idx, P.exp_end_cutoff_idx, num_sample_points_per_exp_user_47)
dfList_exp1_user_47, dfList_exp2_user_47 = df_exps_dict_user_47['dfList_exp1'], df_exps_dict_user_47['dfList_exp2']

raw_dfList_exp1_user_47 = dfList_exp1_user_47
raw_dfList_exp2_user_47 = dfList_exp2_user_47

Loading exp1 data:
47) accel_count: 22777, gyro_count: 22226
Loading exp2 data:
47) accel_count: 17718, gyro_count: 18353


In [12]:
print(f"train_set: {train_set}")
# print(f"X_exp1_train_dic: {X_exp1_train_dic.keys()}")
# print(f"X_exp2_train_dic: {X_exp2_train_dic.keys()}")

train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5]


In [13]:
print(f"test_set: {test_set}")
# print(f"X_exp1_test_dic: {X_exp1_test_dic.keys()}")
# print(f"X_exp2_test_dic: {X_exp2_test_dic.keys()}")

test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]


In [14]:
# init_experiment_params(exp_config=P)

# print(f"train_set: {train_set+[47]}")
# print(f"test_set: {test_set}")
# P.smoothing = None
# # P.p=1 UNCOMMENT WHEN YOU RESET IT FOR THE REST OF EXPS


# preprocessing_method=None
# time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
# train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/Manhattan-dist_{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
# test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/Manhattan-dist_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
# with open(train_file_name, "w") as f:
#     f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
#     f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
# with open(test_file_name, "w") as f:
#     f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
#     f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    



# EER_df_train_dict={}
# EER_df_test_dict={}


# dfList_dict={
#             "dfList_exp1": raw_dfList_exp1,
#             "dfList_exp2": raw_dfList_exp2,
#             "dfList_exp1_user_47": raw_dfList_exp1_user_47,
#             "dfList_exp2_user_47": raw_dfList_exp2_user_47
# }

# test_dict_key=DASH_MACRO_NUM
# EER_df_train_dict[test_dict_key] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
#                                                                                                 extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
#                                                                                                 param_dist=param_dist)

# with open(train_file_name, "a") as f:
#     f.write("\n" + "-"*22 + "\n")
#     f.write(f"\ntest_dict_key: {test_dict_key}\n")
#     f.write(EER_df_train_dict[test_dict_key].to_string())

        
# mean_EER_train_dict={}
# for key in EER_df_train_dict:
#     mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
# #-------
# train_lst = list(mean_EER_train_dict.items())
# train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

# with open(train_file_name, "a") as f:
#     f.write("\n" + "-"*22 + "\n")
#     f.write(f"\nSorting based on Mean EER among windows\n")
#     for i in range(len(train_lst)):
#         f.write(f"{i+1}) {train_lst[i]}\n")
        

# min_key=train_lst[0][0]
# EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
#                                                                                    extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
#                                                                                    best_param_df=EER_df_train_dict[min_key])
# with open(test_file_name, "a") as f:
#     f.write("\n" + "-"*22 + "\n")
#     f.write(f"\Top smoothing parameter/s: {min_key}\n")
#     f.write(EER_df_test_dict[min_key].to_string())
# #-------
# #-------
# key_column= ["cut_off_freq"]
# EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
# eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

# EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
# eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
# #-------

# 0. No Smoothing
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = None



preprocessing_method=None
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    



EER_df_train_dict={}
EER_df_test_dict={}


dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": raw_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": raw_dfList_exp2_user_47
}


# #-----CV_FOLD-------
# for cv_fold_idx in range(len(THREE_FOLD_CV)):
#     train_set, test_set = THREE_FOLD_CV[cv_fold_idx]
#     print(f"train_set: {train_set}")
#     print(f"test_set: {test_set}")
    
    
#     test_dict_key=DASH_MACRO_NUM
#     EER_df_train_dict[test_dict_key] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
#                                                                                                extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
#                                                                                                param_dist=param_dist)

#     with open(train_file_name, "a") as f:
#         f.write("\n" + "-"*22 + f"Training results for cv_fold_idx: {cv_fold_idx}" + "-"*22 + "\n")  
#         f.write(f"\nTest_dict_key: {test_dict_key}\n")
#         f.write(EER_df_train_dict[test_dict_key].to_string())



#     min_key=test_dict_key
#     EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
#                                                                                        extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
#                                                                                        best_param_df=EER_df_train_dict[min_key])
#     with open(test_file_name, "a") as f:
#         f.write("\n" + "-"*22 + f"Testing results for cv_fold_idx: {cv_fold_idx}" + "-"*22 + "\n")
#         f.write(f"\Top smoothing parameter/s: {min_key}\n")
#         f.write(EER_df_test_dict[min_key].to_string())
#     #-------
#     #-------
#     key_column= ["cut_off_freq"]
#     EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
#     eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

#     EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df_cv{cv_fold_idx}.json')
#     eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df_cv{cv_fold_idx}.json')
#     #-------
    
min_key=DASH_MACRO_NUM
key_column= ["cut_off_freq"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_OCSVM_LOF_IF(\
                                cv_fold_idx=cv_fold_idx, 
                                cv_sets=THREE_FOLD_CV, 
                                dfList_dict=dfList_dict, 
                                window_size_lst=WINDOW_SIZE_LST, 
                                exp_config=P, 
                                extract_features_dict=EXTRACT_WACA_features_DICT, 
                                overlap=OVERLAP, 
                                param_dist=param_dist, 
                                train_file_name=train_file_name, 
                                test_file_name=test_file_name, 
                                preprocessing_params=min_key, 
                                key_column=key_column,
                                )

# 1. Butterworth frequency Cut-off

## 1.1 Naive Approach
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "Butterworth"

preprocessing_method="Naive"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"


EER_df_test_dict={}
    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]
min_key=P.cut_off_freq
print(f"cut_off_freq: {P.cut_off_freq}")

ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)


dfList_dict={
            "dfList_exp1": ffted_dfList_exp1,
            "dfList_exp2": ffted_dfList_exp2,
            "dfList_exp1_user_47": ffted_dfList_exp1_user_47,
            "dfList_exp2_user_47": ffted_dfList_exp2_user_47
}
    

# #-------


key_column= ["cut_off_freq"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_OCSVM_LOF_IF(\
                                cv_fold_idx=cv_fold_idx, 
                                cv_sets=THREE_FOLD_CV, 
                                dfList_dict=dfList_dict, 
                                window_size_lst=WINDOW_SIZE_LST, 
                                exp_config=P, 
                                extract_features_dict=EXTRACT_WACA_features_DICT, 
                                overlap=OVERLAP, 
                                param_dist=param_dist, 
                                train_file_name=train_file_name, 
                                test_file_name=test_file_name, 
                                preprocessing_params=min_key, 
                                key_column=key_column,
                                )

In [None]:
init_experiment_params(exp_config=P)

print(f"test_set: {test_set}")
P.smoothing = "Butterworth"

preprocessing_method="Naive"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"


EER_df_test_dict={}
    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]
min_key=P.cut_off_freq
print(f"cut_off_freq: {P.cut_off_freq}")

ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)


dfList_dict={
            "dfList_exp1": ffted_dfList_exp1,
            "dfList_exp2": ffted_dfList_exp2,
            "dfList_exp1_user_47": ffted_dfList_exp1_user_47,
            "dfList_exp2_user_47": ffted_dfList_exp2_user_47
}
    

# #-------

EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"cut_off_freq: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
# #-------
# #-------
key_column= ["cut_off_freq"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)


EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
# #-------

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butterworth"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    








EER_df_train_dict={}
EER_df_test_dict={}
for cut_off_freq in tqdm(CUT_OFF_FREQ_RANGE):
    P.cut_off_freq=cut_off_freq
    print(f"cut_off_freq: {P.cut_off_freq}")

    
    ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    
    ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    
    dfList_dict={
                "dfList_exp1": ffted_dfList_exp1,
                "dfList_exp2": ffted_dfList_exp2,
                "dfList_exp1_user_47": ffted_dfList_exp1_user_47,
                "dfList_exp2_user_47": ffted_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.cut_off_freq] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                                    extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                                    param_dist=param_dist)
        
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq: {P.cut_off_freq}\n")
        f.write(EER_df_train_dict[P.cut_off_freq].to_string())

        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

## 1.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "Butterworth"

preprocessing_method="Realworld-per_unknown_window"

time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"



P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }


EER_df_test_dict={}

    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]

P.Butter_per_win_argdict["cut_off_freq"]=P.cut_off_freq
min_key=P.cut_off_freq
print(f"cut_off_freq: {P.cut_off_freq}")


ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)


dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": ffted_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": ffted_dfList_exp2_user_47
}



key_column= ["cut_off_freq"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_OCSVM_LOF_IF(\
                                cv_fold_idx=cv_fold_idx, 
                                cv_sets=THREE_FOLD_CV, 
                                dfList_dict=dfList_dict, 
                                window_size_lst=WINDOW_SIZE_LST, 
                                exp_config=P, 
                                extract_features_dict=EXTRACT_WACA_features_DICT, 
                                overlap=OVERLAP, 
                                param_dist=param_dist, 
                                train_file_name=train_file_name, 
                                test_file_name=test_file_name, 
                                preprocessing_params=min_key, 
                                key_column=key_column,
                                )

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butterworth"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    







P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }


EER_df_train_dict={}
EER_df_test_dict={}

for cut_off_freq in tqdm(CUT_OFF_FREQ_RANGE):
    P.cut_off_freq=cut_off_freq
    P.Butter_per_win_argdict["cut_off_freq"]=cut_off_freq
    print(f"cut_off_freq: {P.cut_off_freq}")

    
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": ffted_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": ffted_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.cut_off_freq] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                                    extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                                    param_dist=param_dist)
        
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq: {P.cut_off_freq}\n")
        f.write(EER_df_train_dict[P.cut_off_freq].to_string())



mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

# 2. Butterworth frequency Cut-off + EMA span
## 2.1 Naive Approach
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_2)
P.smoothing = "Butter+EMA"

preprocessing_method="Naive"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"




EER_df_test_dict={}

    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]
P.span=old_test_hyperparameters_df["EMA_span"][0]

min_key= P.cut_off_freq, P.span
print(f"cut_off_freq: {P.cut_off_freq}, EMA span: {P.span}")


ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

EMAed_dfList_exp1 = get_EMAed_dfList(ffted_dfList_exp1, span=P.span)
EMAed_dfList_exp2 = get_EMAed_dfList(ffted_dfList_exp2, span=P.span)

ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

EMAed_dfList_exp1_user_47 = get_EMAed_dfList(ffted_dfList_exp1_user_47, span=P.span)
EMAed_dfList_exp2_user_47 = get_EMAed_dfList(ffted_dfList_exp2_user_47, span=P.span)

dfList_dict={
            "dfList_exp1": EMAed_dfList_exp1,
            "dfList_exp2": EMAed_dfList_exp2,
            "dfList_exp1_user_47": EMAed_dfList_exp1_user_47,
            "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
}



key_column= ["cut_off_freq", "EMA_span"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_OCSVM_LOF_IF(\
                                cv_fold_idx=cv_fold_idx, 
                                cv_sets=THREE_FOLD_CV, 
                                dfList_dict=dfList_dict, 
                                window_size_lst=WINDOW_SIZE_LST, 
                                exp_config=P, 
                                extract_features_dict=EXTRACT_WACA_features_DICT, 
                                overlap=OVERLAP, 
                                param_dist=param_dist, 
                                train_file_name=train_file_name, 
                                test_file_name=test_file_name, 
                                preprocessing_params=min_key, 
                                key_column=key_column,
                                )

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_2)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+EMA"


preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    
indices = list(range(1, 50))
mesh = np.array(np.meshgrid(indices, indices))
index_pairs = mesh.T.reshape(-1, 2)

print(f"total cut_off_span_pairs: {index_pairs.shape}, choice_num: {CHOICE_NUM_PAIRS}")
cut_off_span_pairs = index_pairs[np.random.choice(index_pairs.shape[0], size=CHOICE_NUM_PAIRS, replace=False), :]







EER_df_train_dict={}
EER_df_test_dict={}

for key_pair in tqdm(cut_off_span_pairs):
    
    key_pair = tuple(key_pair)
    cut_off_freq, span = key_pair[0], key_pair[1]
    P.cut_off_freq=cut_off_freq
    P.span=span
    print(f"cut_off_freq: {P.cut_off_freq}")
    print(f"span: {P.span}")

    
    ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    EMAed_dfList_exp1 = get_EMAed_dfList(ffted_dfList_exp1, span=P.span)
    EMAed_dfList_exp2 = get_EMAed_dfList(ffted_dfList_exp2, span=P.span)
    
    ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    EMAed_dfList_exp1_user_47 = get_EMAed_dfList(ffted_dfList_exp1_user_47, span=P.span)
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(ffted_dfList_exp2_user_47, span=P.span)
    
    dfList_dict={
                "dfList_exp1": EMAed_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": EMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[key_pair] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                      param_dist=param_dist)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq, span: {key_pair}\n")
        f.write(EER_df_train_dict[key_pair].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq", "EMA_span"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")
        

## 2.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_2)
P.smoothing = "Butter+EMA"


preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"



P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }

EER_df_test_dict={}


old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]
P.span=old_test_hyperparameters_df["EMA_span"][0]

P.Butter_per_win_argdict["cut_off_freq"]=P.cut_off_freq
P.EMA_per_win_span=P.span

min_key= P.cut_off_freq, P.span
print(f"cut_off_freq: {P.cut_off_freq}, EMA span: {P.span}")


ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
EMAed_dfList_exp2 = get_EMAed_dfList(ffted_dfList_exp2, span=P.span)

ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
EMAed_dfList_exp2_user_47 = get_EMAed_dfList(ffted_dfList_exp2_user_47, span=P.span)

dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": EMAed_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
}




key_column= ["cut_off_freq", "EMA_span"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_OCSVM_LOF_IF(\
                                cv_fold_idx=cv_fold_idx, 
                                cv_sets=THREE_FOLD_CV, 
                                dfList_dict=dfList_dict, 
                                window_size_lst=WINDOW_SIZE_LST, 
                                exp_config=P, 
                                extract_features_dict=EXTRACT_WACA_features_DICT, 
                                overlap=OVERLAP, 
                                param_dist=param_dist, 
                                train_file_name=train_file_name, 
                                test_file_name=test_file_name, 
                                preprocessing_params=min_key, 
                                key_column=key_column,
                                )

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_2)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+EMA"


preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    
indices = list(range(1, 50))
mesh = np.array(np.meshgrid(indices, indices))
index_pairs = mesh.T.reshape(-1, 2)

print(f"total cut_off_span_pairs: {index_pairs.shape}, choice_num: {CHOICE_NUM_PAIRS}")
cut_off_span_pairs = index_pairs[np.random.choice(index_pairs.shape[0], size=CHOICE_NUM_PAIRS, replace=False), :]






P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }


EER_df_train_dict={}
EER_df_test_dict={}

for key_pair in tqdm(cut_off_span_pairs):
    key_pair = tuple(key_pair)
    cut_off_freq, span = key_pair[0], key_pair[1]
    P.cut_off_freq=cut_off_freq
    P.Butter_per_win_argdict["cut_off_freq"]=cut_off_freq
    
    
    P.span=span
    P.EMA_per_win_span=span

    print(f"cut_off_freq: {P.cut_off_freq}")
    print(f"span: {P.span}")

    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    EMAed_dfList_exp2 = get_EMAed_dfList(ffted_dfList_exp2, span=P.span)
    
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(ffted_dfList_exp2_user_47, span=P.span)
    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[key_pair] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                      param_dist=param_dist)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq, span: {key_pair}\n")
        f.write(EER_df_train_dict[key_pair].to_string())



        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq", "EMA_span"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

# 3. EMA span
## 3.1 Naive Approach
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "EMA"


preprocessing_method="Naive"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"



EER_df_test_dict={}
    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.span=old_test_hyperparameters_df["EMA_span"][0]
min_key=P.span
print(f"EMA span: {P.span}")


EMAed_dfList_exp1 = get_EMAed_dfList(raw_dfList_exp1, span=P.span)
EMAed_dfList_exp2 = get_EMAed_dfList(raw_dfList_exp2, span=P.span)

EMAed_dfList_exp1_user_47 = get_EMAed_dfList(raw_dfList_exp1_user_47, span=P.span)
EMAed_dfList_exp2_user_47 = get_EMAed_dfList(raw_dfList_exp2_user_47, span=P.span)


dfList_dict={
            "dfList_exp1": EMAed_dfList_exp1,
            "dfList_exp2": EMAed_dfList_exp2,
            "dfList_exp1_user_47": EMAed_dfList_exp1_user_47,
            "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
}



key_column= ["EMA_span"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_OCSVM_LOF_IF(\
                                cv_fold_idx=cv_fold_idx, 
                                cv_sets=THREE_FOLD_CV, 
                                dfList_dict=dfList_dict, 
                                window_size_lst=WINDOW_SIZE_LST, 
                                exp_config=P, 
                                extract_features_dict=EXTRACT_WACA_features_DICT, 
                                overlap=OVERLAP, 
                                param_dist=param_dist, 
                                train_file_name=train_file_name, 
                                test_file_name=test_file_name, 
                                preprocessing_params=min_key, 
                                key_column=key_column,
                                )

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "EMA"


preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    





EER_df_train_dict={}
EER_df_test_dict={}
for span in tqdm(EMA_SPAN_RANGE):
    P.span=span
    print(f"EMA span: {P.span}")

    
    EMAed_dfList_exp1 = get_EMAed_dfList(raw_dfList_exp1, span=P.span)
    EMAed_dfList_exp2 = get_EMAed_dfList(raw_dfList_exp2, span=P.span)
    
    EMAed_dfList_exp1_user_47 = get_EMAed_dfList(raw_dfList_exp1_user_47, span=P.span)
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(raw_dfList_exp2_user_47, span=P.span)

    
    dfList_dict={
                "dfList_exp1": EMAed_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": EMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.span] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                    extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                    param_dist=param_dist)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\EMA span: {P.span}\n")
        f.write(EER_df_train_dict[P.span].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["EMA_span"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

## 3.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "EMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
    

EER_df_test_dict={}
    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.span=old_test_hyperparameters_df["EMA_span"][0]
P.EMA_per_win_span=P.span
min_key=P.span
print(f"EMA span: {P.span}")


EMAed_dfList_exp2 = get_EMAed_dfList(raw_dfList_exp2, span=P.span)

EMAed_dfList_exp2_user_47 = get_EMAed_dfList(raw_dfList_exp2_user_47, span=P.span)


dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": EMAed_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
}



key_column= ["EMA_span"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_OCSVM_LOF_IF(\
                                cv_fold_idx=cv_fold_idx, 
                                cv_sets=THREE_FOLD_CV, 
                                dfList_dict=dfList_dict, 
                                window_size_lst=WINDOW_SIZE_LST, 
                                exp_config=P, 
                                extract_features_dict=EXTRACT_WACA_features_DICT, 
                                overlap=OVERLAP, 
                                param_dist=param_dist, 
                                train_file_name=train_file_name, 
                                test_file_name=test_file_name, 
                                preprocessing_params=min_key, 
                                key_column=key_column,
                                )

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "EMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    




EER_df_train_dict={}
EER_df_test_dict={}
for span in tqdm(EMA_SPAN_RANGE):
    P.span=span
    print(f"EMA span: {P.span}")

    P.EMA_per_win_span=P.span

    EMAed_dfList_exp2 = get_EMAed_dfList(raw_dfList_exp2, span=P.span)
    
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(raw_dfList_exp2_user_47, span=P.span)

    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.span] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                    extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                    param_dist=param_dist)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\EMA span: {P.span}\n")
        f.write(EER_df_train_dict[P.span].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["EMA_span"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

# 4. SMA winsize
## 4.1 Naive Approach
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "SMA"


preprocessing_method="Naive"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"


EER_df_test_dict={}

old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.winsize=old_test_hyperparameters_df["SMA_winsize"][0]
min_key=P.winsize
print(f"SMA winsize: {P.winsize}")


SMAed_dfList_exp1 = get_SMAed_dfList(raw_dfList_exp1, winsize=P.winsize)
SMAed_dfList_exp2 = get_SMAed_dfList(raw_dfList_exp2, winsize=P.winsize)

SMAed_dfList_exp1_user_47 = get_SMAed_dfList(raw_dfList_exp1_user_47, winsize=P.winsize)
SMAed_dfList_exp2_user_47 = get_SMAed_dfList(raw_dfList_exp2_user_47, winsize=P.winsize)


dfList_dict={
            "dfList_exp1": SMAed_dfList_exp1,
            "dfList_exp2": SMAed_dfList_exp2,
            "dfList_exp1_user_47": SMAed_dfList_exp1_user_47,
            "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
}
    


key_column= ["SMA_winsize"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_OCSVM_LOF_IF(\
                                cv_fold_idx=cv_fold_idx, 
                                cv_sets=THREE_FOLD_CV, 
                                dfList_dict=dfList_dict, 
                                window_size_lst=WINDOW_SIZE_LST, 
                                exp_config=P, 
                                extract_features_dict=EXTRACT_WACA_features_DICT, 
                                overlap=OVERLAP, 
                                param_dist=param_dist, 
                                train_file_name=train_file_name, 
                                test_file_name=test_file_name, 
                                preprocessing_params=min_key, 
                                key_column=key_column,
                                )

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "SMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    





EER_df_train_dict={}
EER_df_test_dict={}
for winsize in tqdm(SMA_WINSIZE_RANGE):
    P.winsize=winsize
    print(f"SMA winsize: {P.winsize}")


    SMAed_dfList_exp1 = get_SMAed_dfList(raw_dfList_exp1, winsize=P.winsize)
    SMAed_dfList_exp2 = get_SMAed_dfList(raw_dfList_exp2, winsize=P.winsize)
    
    SMAed_dfList_exp1_user_47 = get_SMAed_dfList(raw_dfList_exp1_user_47, winsize=P.winsize)
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(raw_dfList_exp2_user_47, winsize=P.winsize)

    
    dfList_dict={
                "dfList_exp1": SMAed_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": SMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.winsize] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                       extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                       param_dist=param_dist)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\SMA winsize: {P.winsize}\n")
        f.write(EER_df_train_dict[P.winsize].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["SMA_winsize"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

## 4.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "SMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"


EER_df_test_dict={}

old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.winsize=old_test_hyperparameters_df["SMA_winsize"][0]
P.SMA_per_win_winsize=P.winsize
min_key=P.winsize
print(f"SMA winsize: {P.winsize}")


SMAed_dfList_exp2 = get_SMAed_dfList(raw_dfList_exp2, winsize=P.winsize)

SMAed_dfList_exp2_user_47 = get_SMAed_dfList(raw_dfList_exp2_user_47, winsize=P.winsize)


dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": SMAed_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
}


key_column= ["SMA_winsize"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_OCSVM_LOF_IF(\
                                cv_fold_idx=cv_fold_idx, 
                                cv_sets=THREE_FOLD_CV, 
                                dfList_dict=dfList_dict, 
                                window_size_lst=WINDOW_SIZE_LST, 
                                exp_config=P, 
                                extract_features_dict=EXTRACT_WACA_features_DICT, 
                                overlap=OVERLAP, 
                                param_dist=param_dist, 
                                train_file_name=train_file_name, 
                                test_file_name=test_file_name, 
                                preprocessing_params=min_key, 
                                key_column=key_column,
                                )

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "SMA"


preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    





EER_df_train_dict={}
EER_df_test_dict={}
for winsize in tqdm(SMA_WINSIZE_RANGE):
    P.winsize=winsize
    P.SMA_per_win_winsize=P.winsize

    print(f"SMA winsize: {P.winsize}")


    SMAed_dfList_exp2 = get_SMAed_dfList(raw_dfList_exp2, winsize=P.winsize)
    
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(raw_dfList_exp2_user_47, winsize=P.winsize)

    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.winsize] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                       extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                       param_dist=param_dist)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\SMA winsize: {P.winsize}\n")
        f.write(EER_df_train_dict[P.winsize].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["SMA_winsize"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

# 5. Butterworth frequency Cut-off + SMA winsize
## 5.1 Naive Approach
### Optimizing and Testin

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_5)
P.smoothing = "Butter+SMA"


preprocessing_method="Naive"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"


EER_df_test_dict={}

    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.winsize=old_test_hyperparameters_df["SMA_winsize"][0]
P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]

min_key= P.cut_off_freq, P.winsize
print(f"cut_off_freq: {P.cut_off_freq}, winsize: {P.winsize}")


ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

SMAed_dfList_exp1 = get_SMAed_dfList(ffted_dfList_exp1, winsize=P.winsize)
SMAed_dfList_exp2 = get_SMAed_dfList(ffted_dfList_exp2, winsize=P.winsize)

ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

SMAed_dfList_exp1_user_47 = get_SMAed_dfList(ffted_dfList_exp1_user_47, winsize=P.winsize)
SMAed_dfList_exp2_user_47 = get_SMAed_dfList(ffted_dfList_exp2_user_47, winsize=P.winsize)

dfList_dict={
            "dfList_exp1": SMAed_dfList_exp1,
            "dfList_exp2": SMAed_dfList_exp2,
            "dfList_exp1_user_47": SMAed_dfList_exp1_user_47,
            "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
}



key_column= ["cut_off_freq", "SMA_winsize"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_OCSVM_LOF_IF(\
                                cv_fold_idx=cv_fold_idx, 
                                cv_sets=THREE_FOLD_CV, 
                                dfList_dict=dfList_dict, 
                                window_size_lst=WINDOW_SIZE_LST, 
                                exp_config=P, 
                                extract_features_dict=EXTRACT_WACA_features_DICT, 
                                overlap=OVERLAP, 
                                param_dist=param_dist, 
                                train_file_name=train_file_name, 
                                test_file_name=test_file_name, 
                                preprocessing_params=min_key, 
                                key_column=key_column,
                                )

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_5)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+SMA"


preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    
indices = list(range(1, 50))
mesh = np.array(np.meshgrid(indices, indices))
index_pairs = mesh.T.reshape(-1, 2)

print(f"total cut_off_winsize_pairs: {index_pairs.shape}, choice_num: {CHOICE_NUM_PAIRS}")
cut_off_winsize_pairs = index_pairs[np.random.choice(index_pairs.shape[0], size=CHOICE_NUM_PAIRS, replace=False), :]








EER_df_train_dict={}
EER_df_test_dict={}

for key_pair in tqdm(cut_off_winsize_pairs):
    
    key_pair = tuple(key_pair)
    cut_off_freq, winsize = key_pair[0], key_pair[1]
    P.cut_off_freq=cut_off_freq
    P.winsize=winsize
    print(f"cut_off_freq: {P.cut_off_freq}")
    print(f"winsize: {P.winsize}")

    
    ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    SMAed_dfList_exp1 = get_SMAed_dfList(ffted_dfList_exp1, winsize=P.winsize)
    SMAed_dfList_exp2 = get_SMAed_dfList(ffted_dfList_exp2, winsize=P.winsize)
    
    ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    SMAed_dfList_exp1_user_47 = get_SMAed_dfList(ffted_dfList_exp1_user_47, winsize=P.winsize)
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(ffted_dfList_exp2_user_47, winsize=P.winsize)
    
    dfList_dict={
                "dfList_exp1": SMAed_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": SMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[key_pair] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                      param_dist=param_dist)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq, winsize: {key_pair}\n")
        f.write(EER_df_train_dict[key_pair].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq", "SMA_winsize"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

## 5.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_5)
P.smoothing = "Butter+SMA"


preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"


P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }


EER_df_test_dict={}

    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.winsize=old_test_hyperparameters_df["SMA_winsize"][0]
P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]

P.Butter_per_win_argdict["cut_off_freq"]=P.cut_off_freq
P.SMA_per_win_winsize=P.winsize

min_key= P.cut_off_freq, P.winsize
print(f"cut_off_freq: {P.cut_off_freq}, winsize: {P.winsize}")




ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
SMAed_dfList_exp2 = get_SMAed_dfList(ffted_dfList_exp2, winsize=P.winsize)

ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
SMAed_dfList_exp2_user_47 = get_SMAed_dfList(ffted_dfList_exp2_user_47, winsize=P.winsize)

dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": SMAed_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
}


key_column= ["cut_off_freq", "SMA_winsize"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_OCSVM_LOF_IF(\
                                cv_fold_idx=cv_fold_idx, 
                                cv_sets=THREE_FOLD_CV, 
                                dfList_dict=dfList_dict, 
                                window_size_lst=WINDOW_SIZE_LST, 
                                exp_config=P, 
                                extract_features_dict=EXTRACT_WACA_features_DICT, 
                                overlap=OVERLAP, 
                                param_dist=param_dist, 
                                train_file_name=train_file_name, 
                                test_file_name=test_file_name, 
                                preprocessing_params=min_key, 
                                key_column=key_column,
                                )

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_5)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+SMA"


preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    
indices = list(range(1, 50))
mesh = np.array(np.meshgrid(indices, indices))
index_pairs = mesh.T.reshape(-1, 2)

print(f"total cut_off_winsize_pairs: {index_pairs.shape}, choice_num: {CHOICE_NUM_PAIRS}")
cut_off_winsize_pairs = index_pairs[np.random.choice(index_pairs.shape[0], size=CHOICE_NUM_PAIRS, replace=False), :]







P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }


EER_df_train_dict={}
EER_df_test_dict={}

for key_pair in tqdm(cut_off_winsize_pairs):
    
    key_pair = tuple(key_pair)
    cut_off_freq, winsize = key_pair[0], key_pair[1]
    P.cut_off_freq=cut_off_freq
    P.Butter_per_win_argdict["cut_off_freq"]=P.cut_off_freq
    P.winsize=winsize
    P.SMA_per_win_winsize=P.winsize
    
    print(f"cut_off_freq: {P.cut_off_freq}")
    print(f"winsize: {P.winsize}")

    
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    SMAed_dfList_exp2 = get_SMAed_dfList(ffted_dfList_exp2, winsize=P.winsize)
    
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(ffted_dfList_exp2_user_47, winsize=P.winsize)
    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[key_pair] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                      param_dist=param_dist)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq, winsize: {key_pair}\n")
        f.write(EER_df_train_dict[key_pair].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq", "SMA_winsize"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

# 6. The effect of Varying Overlap

In [None]:
init_experiment_params(exp_config=P)

time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")

dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": raw_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": raw_dfList_exp2_user_47
}

for overlap in tqdm(OVERLAP_EXP_RANGE[14:]):

    train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/overlap={overlap}_Mean_EER_df_train_dict.txt"
    test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/overlap={overlap}_Mean_EER_df_test_dict.txt"
    
    with open(train_file_name, "w") as f:
        f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    with open(test_file_name, "w") as f:
        f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")


    overlap*=0.01
    print(f"overlap: {overlap}")
    max_window_size=2000
    step_width = int(max_window_size * (1-overlap))
    max_num_windows=min(len(getIndices(sampleSize=max_window_size, step=step_width, numSamplePoints=P.num_sample_points_per_exp)), param_dist['model__n_neighbors'][-1]+1)
    n_neighbors_params = np.arange(1, max_num_windows) 




    key_column= ["overlap"]
    #-----CV_FOLD-------
    for cv_fold_idx in range(len(THREE_FOLD_CV)):
        process_cv_fold_OCSVM_LOF_IF(\
                                    cv_fold_idx=cv_fold_idx, 
                                    cv_sets=THREE_FOLD_CV, 
                                    dfList_dict=dfList_dict, 
                                    window_size_lst=WINDOW_SIZE_LST, 
                                    exp_config=P, 
                                    extract_features_dict=EXTRACT_WACA_features_DICT, 
                                    overlap=overlap, 
                                    param_dist=param_dist, 
                                    train_file_name=train_file_name, 
                                    test_file_name=test_file_name, 
                                    preprocessing_params=overlap, 
                                    key_column=key_column,
                                    )

reseting experiment params successful!


  0%|          | 0/11 [00:00<?, ?it/s]

overlap: 0.5700000000000001
train_set: {0, 1, 2, 3, 4, 5, 6, 9, 10, 12, 14, 15, 16, 18, 19, 22, 23, 24, 25, 28}
test_set: {7, 8, 11, 13, 17, 20, 21, 26, 27, 29}



  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  17.206865056417882
MakeWACAXExpDicUnknown Time:  113.9773410698399
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 25191.02it/s]


 33%|███▎      | 1/3 [01:04<02:08, 64.22s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 17597.25it/s]


 67%|██████▋   | 2/3 [02:13<01:06, 66.95s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 27648.68it/s]


100%|██████████| 3/3 [03:33<00:00, 71.08s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 21129.99it/s]


 20%|██        | 1/5 [00:06<00:27,  6.85s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 13313.14it/s]


 40%|████      | 2/5 [00:13<00:20,  6.79s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 22017.34it/s]


 60%|██████    | 3/5 [00:20<00:13,  6.80s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 20555.28it/s]


 80%|████████  | 4/5 [00:27<00:06,  6.81s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 16548.84it/s]


100%|██████████| 5/5 [00:34<00:00,  6.80

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  8.347721057012677
MakeWACAXExpDicUnknown Time:  57.64509401470423
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 16650.67it/s]


 33%|███▎      | 1/3 [00:27<00:54, 27.11s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 21896.65it/s]


 67%|██████▋   | 2/3 [00:55<00:27, 27.81s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 24321.86it/s]


100%|██████████| 3/3 [01:23<00:00, 27.99s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 17443.56it/s]


 20%|██        | 1/5 [00:03<00:13,  3.41s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 23831.27it/s]


 40%|████      | 2/5 [00:06<00:09,  3.30s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 12877.81it/s]


 60%|██████    | 3/5 [00:09<00:06,  3.28s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 15589.31it/s]


 80%|████████  | 4/5 [00:13<00:03,  3.26s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 10724.38it/s]


100%|██████████| 5/5 [00:16<00:00,  3.30

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  4.76131768617779
MakeWACAXExpDicUnknown Time:  30.098450399935246
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 10619.84it/s]


 33%|███▎      | 1/3 [00:13<00:27, 13.95s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 14222.80it/s]


 67%|██████▋   | 2/3 [00:28<00:14, 14.08s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 22262.76it/s]


100%|██████████| 3/3 [00:42<00:00, 14.25s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 11780.10it/s]


 20%|██        | 1/5 [00:01<00:06,  1.66s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 22556.08it/s]


 40%|████      | 2/5 [00:03<00:04,  1.65s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 16591.39it/s]


 60%|██████    | 3/5 [00:04<00:03,  1.62s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 22598.62it/s]


 80%|████████  | 4/5 [00:06<00:01,  1.60s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 24686.90it/s]


100%|██████████| 5/5 [00:08<00:00,  1.62

window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  3.995430397801101
MakeWACAXExpDicUnknown Time:  17.510494044981897
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 21675.99it/s]


 33%|███▎      | 1/3 [00:08<00:17,  8.59s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 26487.55it/s]


 67%|██████▋   | 2/3 [00:16<00:08,  8.46s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 18682.87it/s]


100%|██████████| 3/3 [00:26<00:00,  8.71s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 21586.74it/s]


 20%|██        | 1/5 [00:01<00:04,  1.19s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 15221.57it/s]


 40%|████      | 2/5 [00:02<00:03,  1.16s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 16597.96it/s]


 60%|██████    | 3/5 [00:03<00:02,  1.16s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 25078.05it/s]


 80%|████████  | 4/5 [00:04<00:01,  1.17s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 16693.75it/s]


100%|██████████| 5/5 [00:05<00:00,  1.17

window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  3.0701964888721704
MakeWACAXExpDicUnknown Time:  15.606147618032992
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 19226.70it/s]


 33%|███▎      | 1/3 [00:07<00:15,  7.96s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 20641.26it/s]


 67%|██████▋   | 2/3 [00:18<00:09,  9.25s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 19991.92it/s]


100%|██████████| 3/3 [00:29<00:00,  9.71s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 19654.66it/s]


 20%|██        | 1/5 [00:01<00:04,  1.10s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 21525.81it/s]


 40%|████      | 2/5 [00:02<00:03,  1.11s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 20126.22it/s]


 60%|██████    | 3/5 [00:03<00:02,  1.11s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 17832.93it/s]


 80%|████████  | 4/5 [00:04<00:01,  1.11s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 16396.81it/s]


100%|██████████| 5/5 [00:05<00:00,  1.09

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  2.6148091182112694
MakeWACAXExpDicUnknown Time:  13.915659005753696
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 22751.85it/s]


 33%|███▎      | 1/3 [00:09<00:19,  9.96s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 23269.37it/s]


 67%|██████▋   | 2/3 [00:20<00:10, 10.06s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 16601.24it/s]


100%|██████████| 3/3 [00:30<00:00, 10.01s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 20049.25it/s]


 20%|██        | 1/5 [00:00<00:03,  1.01it/s][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 15161.05it/s]


 40%|████      | 2/5 [00:01<00:02,  1.02it/s][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 15265.89it/s]


 60%|██████    | 3/5 [00:02<00:01,  1.03it/s][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 19517.47it/s]


 80%|████████  | 4/5 [00:03<00:00,  1.03it/s][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 24223.53it/s]


100%|██████████| 5/5 [00:04<00:00,  1.03

window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  2.4741414673626423
MakeWACAXExpDicUnknown Time:  15.233926696702838
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 16011.85it/s]


 33%|███▎      | 1/3 [00:06<00:13,  6.74s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 18872.01it/s]


 67%|██████▋   | 2/3 [00:14<00:07,  7.30s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 25661.08it/s]


100%|██████████| 3/3 [00:20<00:00,  6.98s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 24973.53it/s]


 20%|██        | 1/5 [00:00<00:03,  1.11it/s][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 16581.55it/s]


 40%|████      | 2/5 [00:01<00:02,  1.12it/s][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 27971.35it/s]


 60%|██████    | 3/5 [00:02<00:01,  1.09it/s][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 24230.53it/s]


 80%|████████  | 4/5 [00:03<00:00,  1.04it/s][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 26124.60it/s]


100%|██████████| 5/5 [00:04<00:00,  1.08

window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  2.1934791123494506
MakeWACAXExpDicUnknown Time:  11.04429057892412
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 21258.51it/s]


 33%|███▎      | 1/3 [00:08<00:17,  8.61s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 26852.14it/s]


 67%|██████▋   | 2/3 [00:18<00:09,  9.12s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 22714.89it/s]


100%|██████████| 3/3 [00:25<00:00,  8.54s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 18546.56it/s]


 20%|██        | 1/5 [00:00<00:03,  1.01it/s][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 26181.67it/s]


 40%|████      | 2/5 [00:01<00:02,  1.05it/s][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 15953.99it/s]


 60%|██████    | 3/5 [00:02<00:01,  1.04it/s][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 17719.92it/s]


 80%|████████  | 4/5 [00:03<00:00,  1.06it/s][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 16390.40it/s]


100%|██████████| 5/5 [00:04<00:00,  1.05

window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  2.1472991723567247
MakeWACAXExpDicUnknown Time:  9.717151207849383
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 22151.06it/s]


 33%|███▎      | 1/3 [00:08<00:16,  8.47s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 19590.40it/s]


 67%|██████▋   | 2/3 [00:17<00:08,  8.81s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 19315.24it/s]


100%|██████████| 3/3 [00:25<00:00,  8.55s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 21692.81it/s]


 20%|██        | 1/5 [00:00<00:03,  1.14it/s][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 23211.42it/s]


 40%|████      | 2/5 [00:01<00:02,  1.08it/s][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 14490.60it/s]


 60%|██████    | 3/5 [00:02<00:01,  1.15it/s][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 15221.57it/s]


 80%|████████  | 4/5 [00:03<00:00,  1.17it/s][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 26083.98it/s]


100%|██████████| 5/5 [00:04<00:00,  1.15

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 394
len_exp2_user_47: 338
MakeWACAXExpDicOwner Time:  6.580041076056659
MakeWACAXExpDicUnknown Time:  40.59521010797471
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 8172.84it/s]


 20%|██        | 1/5 [00:01<00:04,  1.15s/it][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 9068.77it/s]


 40%|████      | 2/5 [00:02<00:03,  1.18s/it][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 8139.54it/s]


 60%|██████    | 3/5 [00:03<00:02,  1.13s/it][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 9149.88it/s]


 80%|████████  | 4/5 [00:04<00:01,  1.12s/it][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 9704.54it/s]


100%|██████████| 5/5 [00:05<00:00,  1.12s/it][A[A

 11%|█         | 1/9 [00:54<07:14, 54.31s/it][A

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 194
len_exp2_user_47: 166
MakeWACAXExpDicOwner Time:  3.637112201191485
MakeWACAXExpDicUnknown Time:  31.080385024659336
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 11719.21it/s]


 20%|██        | 1/5 [00:00<00:03,  1.17it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 11573.69it/s]


 40%|████      | 2/5 [00:01<00:02,  1.15it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 14126.99it/s]


 60%|██████    | 3/5 [00:02<00:01,  1.10it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 13464.86it/s]


 80%|████████  | 4/5 [00:03<00:00,  1.10it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 7326.30it/s]


100%|██████████| 5/5 [00:04<00:00,  1.10it/s][A[A

 22%|██▏       | 2/9 [01:34<05:20, 45.84s/it][A

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 96
len_exp2_user_47: 82
MakeWACAXExpDicOwner Time:  2.27609524410218
MakeWACAXExpDicUnknown Time:  18.020002147182822
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 10295.30it/s]


 20%|██        | 1/5 [00:00<00:01,  2.28it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 11125.47it/s]


 40%|████      | 2/5 [00:00<00:01,  2.34it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 14873.42it/s]


 60%|██████    | 3/5 [00:01<00:00,  2.30it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 10798.93it/s]


 80%|████████  | 4/5 [00:02<00:00,  1.69it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 11104.86it/s]


100%|██████████| 5/5 [00:03<00:00,  1.49it/s][A[A

 33%|███▎      | 3/9 [01:58<03:35, 35.92s/it][A

window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 63
len_exp2_user_47: 54
MakeWACAXExpDicOwner Time:  3.905243707820773
MakeWACAXExpDicUnknown Time:  25.31637274939567
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 7980.03it/s]


 20%|██        | 1/5 [00:01<00:04,  1.12s/it][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 14339.50it/s]


 40%|████      | 2/5 [00:02<00:03,  1.23s/it][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 11287.15it/s]


 60%|██████    | 3/5 [00:03<00:02,  1.25s/it][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 9747.39it/s]


 80%|████████  | 4/5 [00:04<00:01,  1.20s/it][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 8130.07it/s]


100%|██████████| 5/5 [00:06<00:00,  1.22s/it][A[A

 44%|████▍     | 4/9 [02:34<03:00, 36.03s/it][A

window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 47
len_exp2_user_47: 40
MakeWACAXExpDicOwner Time:  3.8883924279361963
MakeWACAXExpDicUnknown Time:  21.136013022623956
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 5283.16it/s]


 20%|██        | 1/5 [00:01<00:04,  1.11s/it][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 10300.35it/s]


 40%|████      | 2/5 [00:02<00:03,  1.16s/it][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 6561.80it/s]


 60%|██████    | 3/5 [00:03<00:02,  1.18s/it][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 8505.99it/s]


 80%|████████  | 4/5 [00:04<00:01,  1.11s/it][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 8907.00it/s]


100%|██████████| 5/5 [00:05<00:00,  1.13s/it][A[A

 56%|█████▌    | 5/9 [03:06<02:17, 34.46s/it][A

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 37
len_exp2_user_47: 32
MakeWACAXExpDicOwner Time:  3.62245090585202
MakeWACAXExpDicUnknown Time:  6.2483347076922655
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 8662.34it/s]


 20%|██        | 1/5 [00:00<00:01,  3.09it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 8442.64it/s]


 40%|████      | 2/5 [00:00<00:01,  2.88it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 9267.13it/s]


 60%|██████    | 3/5 [00:01<00:00,  2.49it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 8709.10it/s]


 80%|████████  | 4/5 [00:01<00:00,  2.39it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 7812.08it/s]


100%|██████████| 5/5 [00:02<00:00,  2.44it/s][A[A

 67%|██████▋   | 6/9 [03:18<01:21, 27.08s/it][A

window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 31
len_exp2_user_47: 26
MakeWACAXExpDicOwner Time:  1.4797136653214693
MakeWACAXExpDicUnknown Time:  8.833459904417396
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 7214.15it/s]


 20%|██        | 1/5 [00:00<00:01,  2.23it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 11069.69it/s]


 40%|████      | 2/5 [00:01<00:01,  1.85it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 9974.56it/s]


 60%|██████    | 3/5 [00:01<00:01,  1.65it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 9631.01it/s]


 80%|████████  | 4/5 [00:02<00:00,  1.64it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 11848.32it/s]


100%|██████████| 5/5 [00:02<00:00,  1.68it/s][A[A

 78%|███████▊  | 7/9 [03:32<00:45, 22.69s/it][A

window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 26
len_exp2_user_47: 22
MakeWACAXExpDicOwner Time:  2.011291641741991
MakeWACAXExpDicUnknown Time:  12.076810038648546
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 9834.24it/s]


 20%|██        | 1/5 [00:00<00:02,  1.39it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 7918.26it/s]


 40%|████      | 2/5 [00:01<00:02,  1.41it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 8335.26it/s]


 60%|██████    | 3/5 [00:02<00:01,  1.40it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 204.87it/s]


 80%|████████  | 4/5 [00:02<00:00,  1.42it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 12969.40it/s]


100%|██████████| 5/5 [00:03<00:00,  1.49it/s][A[A

 89%|████████▉ | 8/9 [03:50<00:21, 21.18s/it][A

window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 23
len_exp2_user_47: 19
MakeWACAXExpDicOwner Time:  1.759348377585411
MakeWACAXExpDicUnknown Time:  7.5285140024498105
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 8633.81it/s]


 20%|██        | 1/5 [00:00<00:01,  3.86it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 10315.55it/s]


 40%|████      | 2/5 [00:00<00:00,  3.68it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 8420.61it/s]


 60%|██████    | 3/5 [00:01<00:00,  2.65it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 2696.61it/s]


 80%|████████  | 4/5 [00:02<00:00,  1.60it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 5143.86it/s]


100%|██████████| 5/5 [00:03<00:00,  1.59it/s][A[A

100%|██████████| 9/9 [04:03<00:00, 27.04s/it][A


train_set: {0, 2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 20, 21, 23, 26, 27, 29}
test_set: {1, 3, 5, 6, 16, 19, 22, 24, 25, 28}



  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 394
len_exp2_user_47: 338
MakeWACAXExpDicOwner Time:  63.70132458303124
MakeWACAXExpDicUnknown Time:  231.16149803437293
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 18102.30it/s]


 33%|███▎      | 1/3 [02:40<05:21, 160.71s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 20550.24it/s]


 67%|██████▋   | 2/3 [06:19<03:14, 194.93s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 15090.14it/s]


100%|██████████| 3/3 [10:35<00:00, 211.90s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 12246.14it/s]


 20%|██        | 1/5 [00:16<01:04, 16.18s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 15363.75it/s]


 40%|████      | 2/5 [00:29<00:43, 14.56s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 18176.83it/s]


 60%|██████    | 3/5 [00:40<00:25, 12.89s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 18220.26it/s]


 80%|████████  | 4/5 [00:56<00:14, 14.12s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 13709.12it/s]


100%|██████████| 5/5 [01:07<00:00, 13

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 194
len_exp2_user_47: 166
MakeWACAXExpDicOwner Time:  19.360864027403295
MakeWACAXExpDicUnknown Time:  122.35135687049478
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 9434.94it/s]


 33%|███▎      | 1/3 [00:57<01:55, 57.91s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 15352.50it/s]


 67%|██████▋   | 2/3 [01:36<00:46, 46.68s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 21242.36it/s]


100%|██████████| 3/3 [02:23<00:00, 47.77s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 19143.33it/s]


 20%|██        | 1/5 [00:07<00:30,  7.69s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 16939.84it/s]


 40%|████      | 2/5 [00:14<00:21,  7.16s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 12442.31it/s]


 60%|██████    | 3/5 [00:21<00:14,  7.09s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 21421.37it/s]


 80%|████████  | 4/5 [00:27<00:06,  6.82s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 12599.29it/s]


100%|██████████| 5/5 [00:34<00:00,  6.85s

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 96
len_exp2_user_47: 82
MakeWACAXExpDicOwner Time:  13.503367246128619
MakeWACAXExpDicUnknown Time:  104.30809350218624
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 20223.26it/s]


 33%|███▎      | 1/3 [00:39<01:19, 39.65s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 17001.64it/s]


 67%|██████▋   | 2/3 [01:27<00:44, 44.62s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 12068.20it/s]


100%|██████████| 3/3 [02:28<00:00, 49.59s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 19445.08it/s]


 20%|██        | 1/5 [00:05<00:22,  5.62s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 13003.58it/s]


 40%|████      | 2/5 [00:11<00:16,  5.60s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 24001.74it/s]


 60%|██████    | 3/5 [00:16<00:11,  5.52s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 23994.87it/s]


 80%|████████  | 4/5 [00:19<00:04,  4.60s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 22483.54it/s]


100%|██████████| 5/5 [00:22<00:00,  4.58

window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 63
len_exp2_user_47: 54
MakeWACAXExpDicOwner Time:  8.60724260378629
MakeWACAXExpDicUnknown Time:  49.406780424527824
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 9733.82it/s]


 33%|███▎      | 1/3 [00:24<00:48, 24.25s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 22813.73it/s]


 67%|██████▋   | 2/3 [00:46<00:23, 23.27s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 15732.57it/s]


100%|██████████| 3/3 [01:12<00:00, 24.02s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 17060.42it/s]


 20%|██        | 1/5 [00:02<00:08,  2.21s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 15321.66it/s]


 40%|████      | 2/5 [00:04<00:05,  1.99s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 19355.35it/s]


 60%|██████    | 3/5 [00:05<00:03,  1.77s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 15595.11it/s]


 80%|████████  | 4/5 [00:06<00:01,  1.57s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 20400.31it/s]


100%|██████████| 5/5 [00:08<00:00,  1.62s

window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 47
len_exp2_user_47: 40
MakeWACAXExpDicOwner Time:  3.330050602555275
MakeWACAXExpDicUnknown Time:  27.881362412124872
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 19723.98it/s]


 33%|███▎      | 1/3 [00:22<00:44, 22.29s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 13920.69it/s]


 67%|██████▋   | 2/3 [00:46<00:23, 23.71s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 15414.57it/s]


100%|██████████| 3/3 [01:09<00:00, 23.30s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 20034.89it/s]


 20%|██        | 1/5 [00:02<00:08,  2.17s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 14543.36it/s]


 40%|████      | 2/5 [00:04<00:07,  2.38s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 16203.61it/s]


 60%|██████    | 3/5 [00:06<00:04,  2.33s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 22221.48it/s]


 80%|████████  | 4/5 [00:09<00:02,  2.30s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 23134.61it/s]


100%|██████████| 5/5 [00:11<00:00,  2.20

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 37
len_exp2_user_47: 32
MakeWACAXExpDicOwner Time:  5.320429272018373
MakeWACAXExpDicUnknown Time:  33.536738839931786
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 23051.96it/s]


 33%|███▎      | 1/3 [00:16<00:33, 16.88s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 15926.73it/s]


 67%|██████▋   | 2/3 [00:31<00:15, 15.68s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 17130.10it/s]


100%|██████████| 3/3 [00:52<00:00, 17.47s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 21437.79it/s]


 20%|██        | 1/5 [00:01<00:07,  1.81s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 13516.93it/s]


 40%|████      | 2/5 [00:03<00:05,  1.73s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 20296.66it/s]


 60%|██████    | 3/5 [00:04<00:03,  1.61s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 13895.33it/s]


 80%|████████  | 4/5 [00:06<00:01,  1.58s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 17844.31it/s]


100%|██████████| 5/5 [00:08<00:00,  1.80

window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 31
len_exp2_user_47: 26
MakeWACAXExpDicOwner Time:  6.421873078681529
MakeWACAXExpDicUnknown Time:  33.8069309014827
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 12420.21it/s]


 33%|███▎      | 1/3 [00:21<00:42, 21.33s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 14755.69it/s]


 67%|██████▋   | 2/3 [00:42<00:21, 21.02s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 25458.60it/s]


100%|██████████| 3/3 [01:04<00:00, 21.43s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 17056.95it/s]


 20%|██        | 1/5 [00:04<00:19,  4.80s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 10388.37it/s]


 40%|████      | 2/5 [00:09<00:13,  4.62s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 20184.33it/s]


 60%|██████    | 3/5 [00:12<00:08,  4.20s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 22869.71it/s]


 80%|████████  | 4/5 [00:17<00:04,  4.28s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 20867.18it/s]


100%|██████████| 5/5 [00:22<00:00,  4.42

window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 26
len_exp2_user_47: 22
MakeWACAXExpDicOwner Time:  9.99949957150966
MakeWACAXExpDicUnknown Time:  47.005152026191354
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 15213.29it/s]


 33%|███▎      | 1/3 [00:35<01:10, 35.20s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 16516.26it/s]


 67%|██████▋   | 2/3 [01:09<00:34, 34.92s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 22127.69it/s]


100%|██████████| 3/3 [01:29<00:00, 29.68s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 13041.99it/s]


 20%|██        | 1/5 [00:02<00:09,  2.41s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 15471.43it/s]


 40%|████      | 2/5 [00:04<00:07,  2.40s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 16871.70it/s]


 60%|██████    | 3/5 [00:07<00:04,  2.49s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 21492.72it/s]


 80%|████████  | 4/5 [00:09<00:02,  2.38s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 23790.72it/s]


100%|██████████| 5/5 [00:11<00:00,  2.24

window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 23
len_exp2_user_47: 19
MakeWACAXExpDicOwner Time:  3.846177263185382
MakeWACAXExpDicUnknown Time:  25.950133375823498
Done extracting features




  0%|          | 0/3 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 13792.52it/s]


 33%|███▎      | 1/3 [00:28<00:56, 28.12s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 15821.59it/s]


 67%|██████▋   | 2/3 [01:05<00:33, 33.58s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 10909.88it/s]


100%|██████████| 3/3 [01:33<00:00, 31.27s/it][A[A


  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 20/20 [00:00<00:00, 15372.20it/s]


 20%|██        | 1/5 [00:04<00:18,  4.60s/it][A[A


run: 1: 100%|██████████| 20/20 [00:00<00:00, 20595.65it/s]


 40%|████      | 2/5 [00:09<00:13,  4.55s/it][A[A


run: 2: 100%|██████████| 20/20 [00:00<00:00, 20836.09it/s]


 60%|██████    | 3/5 [00:13<00:08,  4.48s/it][A[A


run: 3: 100%|██████████| 20/20 [00:00<00:00, 18695.36it/s]


 80%|████████  | 4/5 [00:17<00:04,  4.40s/it][A[A


run: 4: 100%|██████████| 20/20 [00:00<00:00, 18094.50it/s]


100%|██████████| 5/5 [00:21<00:00,  4.24

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  17.922545137815177
MakeWACAXExpDicUnknown Time:  73.47802932932973
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 9825.03it/s]


 20%|██        | 1/5 [00:01<00:07,  1.95s/it][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 8015.10it/s]


 40%|████      | 2/5 [00:03<00:05,  1.94s/it][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 10315.55it/s]


 60%|██████    | 3/5 [00:05<00:03,  1.67s/it][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 8916.46it/s]


 80%|████████  | 4/5 [00:06<00:01,  1.50s/it][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 12111.76it/s]


100%|██████████| 5/5 [00:07<00:00,  1.51s/it][A[A

 11%|█         | 1/9 [01:45<14:07, 105.89s/it][A

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  4.1964131006971
MakeWACAXExpDicUnknown Time:  31.93518601357937
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 8893.77it/s]


 20%|██        | 1/5 [00:00<00:03,  1.31it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 8062.87it/s]


 40%|████      | 2/5 [00:01<00:02,  1.25it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 9273.28it/s]


 60%|██████    | 3/5 [00:02<00:01,  1.31it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 7833.96it/s]


 80%|████████  | 4/5 [00:03<00:00,  1.33it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 13005.59it/s]


100%|██████████| 5/5 [00:03<00:00,  1.33it/s][A[A

 22%|██▏       | 2/9 [02:26<07:52, 67.55s/it] [A

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  3.1785020008683205
MakeWACAXExpDicUnknown Time:  17.939189607277513
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 10694.30it/s]


 20%|██        | 1/5 [00:00<00:02,  1.82it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 5960.36it/s]


 40%|████      | 2/5 [00:01<00:01,  1.90it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 8365.19it/s]


 60%|██████    | 3/5 [00:01<00:01,  1.84it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 7788.87it/s]


 80%|████████  | 4/5 [00:02<00:00,  1.87it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 11994.01it/s]


100%|██████████| 5/5 [00:02<00:00,  1.90it/s][A[A

 33%|███▎      | 3/9 [02:50<04:46, 47.81s/it][A

window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  1.9936387790367007
MakeWACAXExpDicUnknown Time:  14.874823888763785
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 8451.15it/s]


 20%|██        | 1/5 [00:00<00:02,  1.92it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 8528.47it/s]


 40%|████      | 2/5 [00:00<00:01,  2.06it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 10190.24it/s]


 60%|██████    | 3/5 [00:01<00:00,  2.18it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 16384.00it/s]


 80%|████████  | 4/5 [00:01<00:00,  2.28it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 7355.85it/s]


100%|██████████| 5/5 [00:02<00:00,  2.24it/s][A[A

 44%|████▍     | 4/9 [03:10<03:03, 36.62s/it][A

window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  1.9378726482391357
MakeWACAXExpDicUnknown Time:  12.126787717454135
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 9532.51it/s]


 20%|██        | 1/5 [00:00<00:01,  2.28it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 8884.36it/s]


 40%|████      | 2/5 [00:00<00:01,  2.60it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 13050.11it/s]


 60%|██████    | 3/5 [00:01<00:00,  2.58it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 11403.76it/s]


 80%|████████  | 4/5 [00:01<00:00,  2.32it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 9984.06it/s]


100%|██████████| 5/5 [00:02<00:00,  2.32it/s][A[A

 56%|█████▌    | 5/9 [03:26<01:57, 29.37s/it][A

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  1.8972551068291068
MakeWACAXExpDicUnknown Time:  8.967617691494524
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 4255.15it/s]


 20%|██        | 1/5 [00:00<00:01,  2.37it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 10951.19it/s]


 40%|████      | 2/5 [00:00<00:01,  2.54it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 4980.77it/s]


 60%|██████    | 3/5 [00:01<00:00,  2.58it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 16403.22it/s]


 80%|████████  | 4/5 [00:01<00:00,  2.60it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 9057.02it/s]


100%|██████████| 5/5 [00:01<00:00,  2.58it/s][A[A

 67%|██████▋   | 6/9 [03:40<01:11, 23.87s/it][A

window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  2.3729104064404964
MakeWACAXExpDicUnknown Time:  8.454355494119227
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 10363.98it/s]


 20%|██        | 1/5 [00:00<00:01,  2.53it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 9293.83it/s]


 40%|████      | 2/5 [00:00<00:01,  2.59it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 12122.27it/s]


 60%|██████    | 3/5 [00:01<00:00,  2.57it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 7430.12it/s]


 80%|████████  | 4/5 [00:01<00:00,  2.62it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 8182.41it/s]


100%|██████████| 5/5 [00:01<00:00,  2.74it/s][A[A

 78%|███████▊  | 7/9 [03:52<00:40, 20.28s/it][A

window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  1.2418386796489358
MakeWACAXExpDicUnknown Time:  6.773800622671843
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 6980.04it/s]


 20%|██        | 1/5 [00:00<00:01,  2.87it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 12729.30it/s]


 40%|████      | 2/5 [00:00<00:01,  2.60it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 6137.41it/s]


 60%|██████    | 3/5 [00:01<00:00,  2.30it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 7027.99it/s]


 80%|████████  | 4/5 [00:01<00:00,  2.35it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 10594.35it/s]


100%|██████████| 5/5 [00:02<00:00,  2.34it/s][A[A

 89%|████████▉ | 8/9 [04:03<00:17, 17.14s/it][A

window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  1.2952119894325733
MakeWACAXExpDicUnknown Time:  5.796888489276171
Done extracting features




  0%|          | 0/5 [00:00<?, ?it/s][A[A


run: 0: 100%|██████████| 10/10 [00:00<00:00, 10060.70it/s]


 20%|██        | 1/5 [00:00<00:01,  2.85it/s][A[A


run: 1: 100%|██████████| 10/10 [00:00<00:00, 10675.25it/s]


 40%|████      | 2/5 [00:00<00:01,  2.95it/s][A[A


run: 2: 100%|██████████| 10/10 [00:00<00:00, 7873.67it/s]


 60%|██████    | 3/5 [00:00<00:00,  3.15it/s][A[A


run: 3: 100%|██████████| 10/10 [00:00<00:00, 8937.36it/s]


 80%|████████  | 4/5 [00:01<00:00,  3.14it/s][A[A


run: 4: 100%|██████████| 10/10 [00:00<00:00, 16339.32it/s]


100%|██████████| 5/5 [00:01<00:00,  3.11it/s][A[A

100%|██████████| 9/9 [04:12<00:00, 28.04s/it][A


train_set: {1, 3, 5, 6, 7, 8, 11, 13, 16, 17, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29}
test_set: {0, 2, 4, 9, 10, 12, 14, 15, 18, 23}



  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 394
len_exp2_user_47: 338
MakeWACAXExpDicOwner Time:  32.48487980943173


In [20]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")



time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/overlap_Mean_EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/overlap_Mean_EER_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    



EER_df_train_dict={}
EER_df_test_dict={}
for overlap in tqdm(OVERLAP_EXP_RANGE):
    overlap*=0.01
    max_window_size=2000
    step_width = int(max_window_size * (1-overlap))
    max_num_windows=min(len(getIndices(sampleSize=max_window_size, step=step_width, numSamplePoints=P.num_sample_points_per_exp)), param_dist['model__n_neighbors'][-1]+1)
    n_neighbors_params = np.arange(1, max_num_windows) 
    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": raw_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": raw_dfList_exp2_user_47
    }
    

    EER_df_train_dict[overlap] = calculate_EER_different_window_sizes_train_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                     extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                     param_dist=param_dist)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\overlap: {overlap}\n")
        f.write(EER_df_train_dict[overlap].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
overlap=min_key
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test_OCSVM_IF_LOF(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                                   extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                   best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["overlap"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

reseting experiment params successful!
train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5, 47]
test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]


  0%|          | 0/11 [00:00<?, ?it/s]
  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19


Process LokyProcess-81:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/opt/conda/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py", line 451, in _process_worker
    _process_reference_size = _get_memory_usage(pid, force_gc=True)
  File "/opt/conda/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py", line 117, in _get_memory_usage
    gc.collect()
KeyboardInterrupt
  0%|          | 0/9 [00:15<?, ?it/s]
  0%|          | 0/11 [00:15<?, ?it/s]


KeyboardInterrupt: 

In [19]:
OVERLAP_EXP_RANGE.index(57)

14

In [None]:
overlap=0.01
max_window_size=2000
step_width = int(max_window_size * (1-overlap))
max_num_windows=min(len(getIndices(sampleSize=max_window_size, step=step_width, numSamplePoints=P.num_sample_points_per_exp)), param_dist['model__n_neighbors'][-1]+1)
n_neighbors_params = np.arange(1, max_num_windows) 
n_neighbors_params

# Summary

In [None]:
#clip=True
df=return_and_save_final_result_df_as_json(final_exp_results_path=FINAL_EXP_RESULTS_PATH, exp_path_name=EXP_PATH_NAME, window_size_lst=WINDOW_SIZE_LST)
df

In [31]:
df=return_and_save_final_result_df_as_json(final_exp_results_path=FINAL_EXP_RESULTS_PATH, exp_path_name=EXP_PATH_NAME, window_size_lst=WINDOW_SIZE_LST_NN, save_file_suffix="-max1000")
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Window Size,Window Size,Window Size,Window Size,Window Size,Unnamed: 11_level_0,Unnamed: 12_level_0
Unnamed: 0_level_1,mean rank,cut_off_freq,EMA_span,SMA_winsize,type,125,250,500,750,1000,mean,variance
0,10,-,-,-,-,0.1596,0.10214,0.066176,0.054276,0.046387,0.085716,0.002161
1,3,37,-,-,Naive,0.151841,0.09481,0.061223,0.047542,0.042864,0.079656,0.002041
2,6,33,-,-,Real,0.156094,0.095819,0.062963,0.04771,0.046477,0.081813,0.00212
3,1,-,49,-,Naive,0.129547,0.090563,0.062048,0.05037,0.041057,0.074717,0.001286
4,10,-,1,-,Real,0.1596,0.10214,0.066176,0.054276,0.046387,0.085716,0.002161
5,4,-,-,49,Naive,0.137119,0.096684,0.071598,0.054411,0.044399,0.080842,0.001383
6,9,-,-,1,Real,0.159611,0.101519,0.065819,0.054242,0.046387,0.085516,0.002161
7,2,41,48,-,Naive,0.130591,0.091905,0.061089,0.049764,0.042367,0.075143,0.001318
8,7,40,3,-,Real,0.157539,0.100299,0.064101,0.049596,0.043315,0.08297,0.002226
9,8,43,-,34,Naive,0.145928,0.100455,0.074431,0.054646,0.04467,0.084026,0.001651


In [None]:
#clip=False
df=return_and_save_final_result_df_as_json(final_exp_results_path=FINAL_EXP_RESULTS_PATH, exp_path_name=EXP_PATH_NAME, window_size_lst=WINDOW_SIZE_LST)
df

In [None]:
relative_df=return_and_save_final_relative_result_df_as_json(df, base_case_index=0, final_exp_results_path=FINAL_EXP_RESULTS_PATH, exp_path_name=EXP_PATH_NAME, 
                                                             window_size_lst=WINDOW_SIZE_LST)
relative_df

In [None]:
relative_df=return_and_save_final_relative_result_df_as_json(df, base_case_index=0, final_exp_results_path=FINAL_EXP_RESULTS_PATH, exp_path_name=EXP_PATH_NAME, 
                                                             window_size_lst=WINDOW_SIZE_LST)
relative_df

In [None]:
df.style.format(STYLER_ERR_FORMAT_DICT).hide(axis='index').to_latex()


In [None]:
relative_df.style.format(STYLER_IMPROVEMENT_FORMAT_DICT).hide(axis='index').to_latex()


## Gini coef

In [None]:
gini_result_df=return_and_save_final_Gini_df_as_json(final_exp_results_path=FINAL_EXP_RESULTS_PATH, exp_path_name=EXP_PATH_NAME, window_size_lst=WINDOW_SIZE_LST)
gini_result_df

In [None]:
return_and_save_final_relative_gini_result_df_as_json(gini_result_df, base_case_index=0, final_exp_results_path=FINAL_EXP_RESULTS_PATH, 
                                                      exp_path_name=EXP_PATH_NAME, window_size_lst=WINDOW_SIZE_LST)

# 3. Overlap

## 3.1 EER

In [27]:
overlap_df=return_and_save_final_result_df_as_json_overlap(final_exp_results_path=FINAL_EXP_RESULTS_PATH, exp_path_name=EXP_PATH_NAME, window_size_lst=WINDOW_SIZE_LST)
overlap_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Window Size,Window Size,Window Size,Window Size,Window Size,Window Size,Window Size,Window Size,Window Size,Unnamed: 12_level_0,Unnamed: 13_level_0
Unnamed: 0_level_1,mean rank,overlap,125,250,500,750,1000,1250,1500,1750,2000,mean,variance
0,13,0.01,0.156656,0.106776,0.069004,0.051389,0.047354,0.041551,0.036905,0.038889,0.039815,0.065371,0.001664
1,18,0.05,0.15747,0.103136,0.064983,0.054087,0.046549,0.042048,0.043386,0.039198,0.041111,0.065774,0.001586
2,22,0.09,0.163023,0.110668,0.064412,0.055247,0.049242,0.047119,0.037407,0.037037,0.040505,0.067185,0.001807
3,21,0.13,0.168213,0.101948,0.064892,0.050347,0.04839,0.042885,0.040494,0.039601,0.042357,0.06657,0.001842
4,11,0.17,0.165919,0.107572,0.066519,0.052581,0.042815,0.043056,0.038657,0.033995,0.03642,0.065281,0.00195
5,19,0.21,0.164616,0.106569,0.063746,0.05381,0.050356,0.04418,0.037582,0.037434,0.036895,0.066132,0.00184
6,8,0.25,0.157811,0.108959,0.064949,0.051401,0.047051,0.038047,0.037037,0.037531,0.040883,0.064852,0.001737
7,16,0.29,0.164045,0.105163,0.066794,0.052564,0.049936,0.04066,0.042203,0.032986,0.036772,0.06568,0.001842
8,10,0.33,0.161809,0.107704,0.066458,0.052981,0.045,0.040201,0.039259,0.035512,0.035926,0.064983,0.001842
9,9,0.37,0.160925,0.104511,0.063664,0.056288,0.049074,0.039744,0.038272,0.035185,0.03669,0.064928,0.001767


## 3.2 EER relative

In [28]:
relative_overlap_df=return_and_save_final_relative_result_df_as_json_overlap(overlap_df, base_case_index=0, final_exp_results_path=FINAL_EXP_RESULTS_PATH, 
                                                                     exp_path_name=EXP_PATH_NAME, window_size_lst=WINDOW_SIZE_LST)
relative_overlap_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Window Size,Window Size,Window Size,Window Size,Window Size,Window Size,Window Size,Window Size,Window Size,Unnamed: 12_level_0,Unnamed: 13_level_0
Unnamed: 0_level_1,mean rank,overlap,125,250,500,750,1000,1250,1500,1750,2000,mean,variance
0,13,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,18,0.05,-0.519871,3.409091,5.826314,-5.250078,1.701371,-1.196133,-17.562724,-0.793651,-3.255814,-0.616937,4.680369
2,22,0.09,-4.064484,-3.645662,6.653702,-7.507508,-3.986795,-13.401424,-1.362007,4.761905,-1.733615,-2.774603,-8.602594
3,21,0.13,-7.377503,4.520974,5.958466,2.027027,-2.186058,-3.210673,-9.725209,-1.831502,-6.384778,-1.833885,-10.676362
4,11,0.17,-5.912871,-0.746261,3.601278,-2.320502,9.586592,-3.62117,-4.749104,12.585034,8.527132,0.136741,-17.171179
5,19,0.21,-5.081211,0.193645,7.61858,-4.710425,-6.338633,-6.327099,-1.834282,3.741497,7.334526,-1.164139,-10.541425
6,8,0.25,-0.737238,-2.044812,5.875109,-0.024349,0.641423,8.432515,-0.358423,3.492064,-2.683363,0.793445,-4.39426
7,16,0.29,-4.716541,1.510069,3.201498,-2.286902,-5.451743,2.143636,-14.355782,15.178571,7.641196,-0.473575,-10.701531
8,10,0.33,-3.289669,-0.86921,3.689581,-3.09822,4.972067,3.249768,-6.379928,8.683473,9.767442,0.592915,-10.681302
9,9,0.37,-2.724937,2.120635,7.737438,-9.532789,-3.631285,4.349689,-3.703704,9.52381,7.848837,0.677414,-6.189105
