In [None]:
import joblib
EXP_PATH_NAME="WACA-kNN"
joblib.cpu_count()

In [2]:
# !pip install --upgrade pip

import os

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import dataclasses
from dataclasses import asdict
from tqdm import tqdm
import warnings
import random
import time

pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
import seaborn as sns



%run ./Classification_utility-functions.ipynb
%run ./SEED-CONSTANTS.ipynb

np.random.seed(SEED)
print(f"Numpy Seed was set to: {SEED}")

print("Setup Complete")

[32mSEED: 567[0m
FINAL_EXP_RESULTS_PATH: clip=False_experiments_results
--------------------[32mSEED and CONSTANTS imported[0m--------------------
[32mSEED: 567[0m
Python 3.9.10
EER: 0.333, Threshold: 0.600 <-- Arbitrary case
EER: 0.000, Threshold: 0.900 <-- Best case
EER: 1.000, Threshold: 0.900 <-- Worse case
EER: 0.400, Threshold: 0.200 <-- Worse case
EER: 0.167, Threshold: 0.600 <-- Arbitrary case
EER: 0.000, Threshold: 0.900 <-- Best case
EER: 1.000, Threshold: 0.900 <-- Worse case
EER: 0.333, Threshold: 1.000 <-- Worse case
--------------------[32mUtility functions imported[0m--------------------
[32mSEED: 567[0m
FINAL_EXP_RESULTS_PATH: clip=False_experiments_results
--------------------[32mSEED and CONSTANTS imported[0m--------------------
[32mSEED: 567[0m
Python 3.9.10
--------------------[32mPreprocessing utility functions imported[0m--------------------
[32mSEED: 567[0m
FINAL_EXP_RESULTS_PATH: clip=False_experiments_results
--------------------[32mSEED and 

In [3]:
import sklearn
sklearn.__dir__()
sklearn.__version__

'1.0.2'

In [4]:
@dataclasses.dataclass
class ExperimentParameters:
    """Contains all relevant parameters to run an experiment."""

    name: str  # Name of Parameter set. Used as identifier for charts etc.
    frequency: int
    max_subjects: int
    max_test_subjects: int
        
    user_ids: list
    num_sample_points_per_exp: int
    exp_begin_cutoff_idx: int
    exp_end_cutoff_idx: int
        
    
    seconds_per_subject_train: float
    seconds_per_subject_test: float
    window_size: int  # After resampling
    ocsvm_step_width: int  # After resampling
    scaler: str  # StandardScaler, MinMaxScaler, Normalizer, MaxAbsScaler, RobustScaler, PowerTransformer
    scaler_scope: str  # {"subject", "session"}
    scaler_global: bool  # fit transform scale on all data (True) or fit on training only (False)
    ocsvm_kernel: str # ocsvm kernel
    ocsvm_nu: float  # Best value found in random search, used for final model
    ocsvm_gamma: float  # Best value found in random search, used for final model
    feature_cols: list  # Columns used as features
    exclude_subjects: list  # Don't load data from those users
        
    # Calculated values
    def __post_init__(self):
        # HDF key of table:
        self.table_name = f"sensors_{self.frequency}hz"

        

# INSTANCES
# ===========================================================

# NAIVE_APPROACH
# -----------------------------------------------------------
NAIVE_MINMAX_OCSVM = ExperimentParameters(
    name="NAIVE-MINMAX_OCSVM",
    frequency=100,
    max_subjects=29,
    max_test_subjects=10,
    user_ids = [1, 2, 3, 4, 5, 6, 7, 8, 19, 21, 22, 26, 27, 28, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49],
    num_sample_points_per_exp=21000,
    exp_begin_cutoff_idx=500,
    exp_end_cutoff_idx=-500,
    seconds_per_subject_train=210,
    seconds_per_subject_test=210,    
    window_size=250,
    ocsvm_step_width=250,
    scaler="minmax",
    scaler_scope="subject",
    scaler_global=True,
    ocsvm_kernel="rbf",
    ocsvm_nu=None,
    ocsvm_gamma=None,
    feature_cols=[
        "x_a",
        "y_a",
        "z_a",
        "x_g",
        "y_g",
        "z_g",
    ],
    exclude_subjects=[],
)

# VALID_APPROACH
# -----------------------------------------------------------
VALID_MINMAX_OCSVM = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-MINMAX-OCSVM",
    scaler_global=False,
    ocsvm_nu=0.165,
    ocsvm_gamma=0.039,
)

# NAIVE_ROBUST_APPROACH
# -----------------------------------------------------------
NAIVE_ROBUST_OCSVM = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="NAIVE-ROBUST-OCSVM",
    scaler="robust",
    scaler_global=True,
    ocsvm_nu=0.153,
    ocsvm_gamma=0.091,  # below median, selected by chart
)

# ROBUST_APPROACH (VALID)
# -----------------------------------------------------------
VALID_ROBUST_OCSVM = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-ROBUST-OCSVM",
    scaler="RobustScaler",
    scaler_global=False,
    ocsvm_nu=0.037,
    ocsvm_gamma= 0.001,
)
# NORMALIZER_APPROACH (VALID)
# -----------------------------------------------------------
VALID_NORMALIZER_OCSVM = dataclasses.replace(
    NAIVE_MINMAX_OCSVM,
    name="VALID-NORMALIZER-OCSVM",
    scaler="Normalizer",
    scaler_global=False,
    ocsvm_nu=0.074,
    ocsvm_gamma= 0.029,
)

In [5]:
P = VALID_ROBUST_OCSVM
# P = VALID_NORMALIZER_OCSVM
N_NEIGHBORS_PARAMS = np.arange(1,20) 
P.p=2
# n_neighbors_params = np.arange(1, 50, 5) 

P.scaler_clip=False
P.is_NN=False

In [6]:
utils_ppp(P)

Unnamed: 0,Value
name,VALID-ROBUST-OCSVM
frequency,100
max_subjects,29
max_test_subjects,10
user_ids,"[1, 2, 3, 4, 5, 6, 7, 8, 19, 21, 22, 26, 27, 2..."
num_sample_points_per_exp,21000
exp_begin_cutoff_idx,500
exp_end_cutoff_idx,-500
seconds_per_subject_train,210
seconds_per_subject_test,210


In [7]:
utils_eer_scorer

make_scorer(utils_eer, greater_is_better=False)

In [8]:
#include 47 later
# user_ids = [9]
df_exps_dict = load_data_frames(P.user_ids, P.exp_begin_cutoff_idx, P.exp_end_cutoff_idx, P.num_sample_points_per_exp)
raw_dfList_exp1, raw_dfList_exp2 = df_exps_dict['dfList_exp1'], df_exps_dict['dfList_exp2']

Loading exp1 data:
1) accel_count: 28388, gyro_count: 31997
2) accel_count: 26010, gyro_count: 28954
3) accel_count: 28227, gyro_count: 31814
4) accel_count: 24860, gyro_count: 26105
5) accel_count: 24270, gyro_count: 24347
6) accel_count: 25012, gyro_count: 25060
7) accel_count: 25301, gyro_count: 25382
8) accel_count: 21975, gyro_count: 21658
19) accel_count: 24110, gyro_count: 25050
21) accel_count: 24326, gyro_count: 23809
22) accel_count: 29123, gyro_count: 28724
26) accel_count: 23148, gyro_count: 24291
27) accel_count: 24299, gyro_count: 23589
28) accel_count: 23807, gyro_count: 24523
29) accel_count: 24030, gyro_count: 23457
35) accel_count: 24388, gyro_count: 23673
36) accel_count: 24228, gyro_count: 24208
37) accel_count: 31945, gyro_count: 31816
38) accel_count: 22135, gyro_count: 22327
39) accel_count: 23573, gyro_count: 23459
40) accel_count: 23057, gyro_count: 24296
41) accel_count: 24102, gyro_count: 23681
42) accel_count: 24074, gyro_count: 24328
43) accel_count: 22631,

In [9]:
randomized_data_idx = list(range(len(P.user_ids)))
random.Random(SEED).shuffle(randomized_data_idx)
split_idx = 2 * (len(randomized_data_idx)//3) + 1
train_set = randomized_data_idx[: split_idx]
test_set = randomized_data_idx[split_idx: ]
# train_set = randomized_data_idx
print(f"train_set: {train_set}\ntest_set: {test_set}")
# train_set = test_set
# test_set = train_set
print(f"train_set: {train_set}\ntest_set: {test_set}")

train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5]
test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]
train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5]
test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]


In [10]:
num_sample_points_per_exp_user_47 = 18000
df_exps_dict_user_47 = load_data_frames([47], P.exp_begin_cutoff_idx, P.exp_end_cutoff_idx, num_sample_points_per_exp_user_47)
dfList_exp1_user_47, dfList_exp2_user_47 = df_exps_dict_user_47['dfList_exp1'], df_exps_dict_user_47['dfList_exp2']

raw_dfList_exp1_user_47 = dfList_exp1_user_47
raw_dfList_exp2_user_47 = dfList_exp2_user_47

Loading exp1 data:
47) accel_count: 22777, gyro_count: 22226
Loading exp2 data:
47) accel_count: 17718, gyro_count: 18353


In [11]:
print(f"train_set: {train_set}")
# print(f"X_exp1_train_dic: {X_exp1_train_dic.keys()}")
# print(f"X_exp2_train_dic: {X_exp2_train_dic.keys()}")

train_set: [7, 24, 8, 11, 13, 19, 28, 21, 26, 3, 20, 22, 6, 25, 16, 1, 17, 27, 5]


In [12]:
print(f"test_set: {test_set}")
# print(f"X_exp1_test_dic: {X_exp1_test_dic.keys()}")
# print(f"X_exp2_test_dic: {X_exp2_test_dic.keys()}")

test_set: [0, 12, 14, 9, 18, 23, 2, 15, 10, 4]


In [None]:
# init_experiment_params(exp_config=P)
# P.smoothing = None
# P.p=1. #UNCOMMENT WHEN YOU RESET IT FOR THE REST OF EXPS


# preprocessing_method=None
# time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
# train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/Manhattan-dist_{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
# test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/Manhattan-dist_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
# with open(train_file_name, "w") as f:
#     f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
#     f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
# with open(test_file_name, "w") as f:
#     f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
#     f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    



# EER_df_train_dict={}
# EER_df_test_dict={}


# dfList_dict={
#             "dfList_exp1": raw_dfList_exp1,
#             "dfList_exp2": raw_dfList_exp2,
#             "dfList_exp1_user_47": raw_dfList_exp1_user_47,
#             "dfList_exp2_user_47": raw_dfList_exp2_user_47
# }

# test_dict_key=DASH_MACRO_NUM
# EER_df_train_dict[test_dict_key] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
#                                                                                                 extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
#                                                                                                 n_neighbors_params=N_NEIGHBORS_PARAMS)

# with open(train_file_name, "a") as f:
#     f.write("\n" + "-"*22 + "\n")
#     f.write(f"\ntest_dict_key: {test_dict_key}\n")
#     f.write(EER_df_train_dict[test_dict_key].to_string())

        
# mean_EER_train_dict={}
# for key in EER_df_train_dict:
#     mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
# #-------
# train_lst = list(mean_EER_train_dict.items())
# train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

# with open(train_file_name, "a") as f:
#     f.write("\n" + "-"*22 + "\n")
#     f.write(f"\nSorting based on Mean EER among windows\n")
#     for i in range(len(train_lst)):
#         f.write(f"{i+1}) {train_lst[i]}\n")
        

# min_key=train_lst[0][0]
# EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
#                                                                                    extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
#                                                                                    best_param_df=EER_df_train_dict[min_key])
# with open(test_file_name, "a") as f:
#     f.write("\n" + "-"*22 + "\n")
#     f.write(f"\Top smoothing parameter/s: {min_key}\n")
#     f.write(EER_df_test_dict[min_key].to_string())
# #-------
# #-------
# key_column= ["cut_off_freq"]
# EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
# eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

# EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
# eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
# #-------

In [None]:
# WINDOW_SIZE_LST=[2000]
P.p

# 0. No Smoothing
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = None



preprocessing_method=None
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"

# train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/tmp_train.txt"
# test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/tmp_test.txt"

with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")




EER_df_train_dict={}
EER_df_test_dict={}


dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": raw_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": raw_dfList_exp2_user_47
}


# #-----CV_FOLD-------
# for cv_fold_idx in range(len(THREE_FOLD_CV)):
#     train_set, test_set = THREE_FOLD_CV[cv_fold_idx]
#     print(f"train_set: {train_set}")
#     print(f"test_set: {test_set}")
    
    
#     test_dict_key=DASH_MACRO_NUM
#     EER_df_train_dict[test_dict_key] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
#                                                                                                    extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
#                                                                                                    n_neighbors_params=N_NEIGHBORS_PARAMS)

#     with open(train_file_name, "a") as f:
#         f.write("\n" + "-"*22 + f"Training results for cv_fold_idx: {cv_fold_idx}" + "-"*22 + "\n")  
#         f.write(f"\nTest_dict_key: {test_dict_key}\n")
#         f.write(EER_df_train_dict[test_dict_key].to_string())



#     min_key=test_dict_key
#     EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
#                                                                           extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
#                                                                           best_param_df=EER_df_train_dict[min_key])
#     with open(test_file_name, "a") as f:
#         f.write("\n" + "-"*22 + f"Testing results for cv_fold_idx: {cv_fold_idx}" + "-"*22 + "\n")
#         f.write(f"\nTest_dict_key: {test_dict_key}\n")
#         f.write(EER_df_test_dict[min_key].to_string())
#     #-------
#     #-------
#     key_column= ["cut_off_freq"]
#     EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
#     eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

#     EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df_cv{cv_fold_idx}.json')
#     eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df_cv{cv_fold_idx}.json')
#     #-------
    
#---------
key_column= ["cut_off_freq"]
min_key=DASH_MACRO_NUM
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_kNN(\
                        cv_fold_idx=cv_fold_idx, 
                        cv_sets=THREE_FOLD_CV, 
                        dfList_dict=dfList_dict, 
                        window_size_lst=WINDOW_SIZE_LST, 
                        exp_config=P, 
                        extract_features_dict=EXTRACT_WACA_features_DICT, 
                        overlap=OVERLAP, 
                        n_neighbors_params=N_NEIGHBORS_PARAMS, 
                        train_file_name=train_file_name, 
                        test_file_name=test_file_name, 
                        preprocessing_params=min_key, 
                        key_column=key_column,
                       )

# 1. Butterworth frequency Cut-off

## 1.1 Naive Approach
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "Butterworth"

preprocessing_method="Naive"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"


EER_df_test_dict={}
    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]
min_key=P.cut_off_freq
print(f"cut_off_freq: {P.cut_off_freq}")

ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)


dfList_dict={
            "dfList_exp1": ffted_dfList_exp1,
            "dfList_exp2": ffted_dfList_exp2,
            "dfList_exp1_user_47": ffted_dfList_exp1_user_47,
            "dfList_exp2_user_47": ffted_dfList_exp2_user_47
}

#---------
key_column= ["cut_off_freq"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_kNN(\
                        cv_fold_idx=cv_fold_idx, 
                        cv_sets=THREE_FOLD_CV, 
                        dfList_dict=dfList_dict, 
                        window_size_lst=WINDOW_SIZE_LST, 
                        exp_config=P, 
                        extract_features_dict=EXTRACT_WACA_features_DICT, 
                        overlap=OVERLAP, 
                        n_neighbors_params=N_NEIGHBORS_PARAMS, 
                        train_file_name=train_file_name, 
                        test_file_name=test_file_name, 
                        preprocessing_params=min_key, 
                        key_column=key_column,
                       )

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butterworth"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")








EER_df_train_dict={}
EER_df_test_dict={}
for cut_off_freq in tqdm(CUT_OFF_FREQ_RANGE):
    P.cut_off_freq=cut_off_freq
    print(f"cut_off_freq: {P.cut_off_freq}")

    
    ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    
    ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    
    dfList_dict={
                "dfList_exp1": ffted_dfList_exp1,
                "dfList_exp2": ffted_dfList_exp2,
                "dfList_exp1_user_47": ffted_dfList_exp1_user_47,
                "dfList_exp2_user_47": ffted_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.cut_off_freq] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                                    extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                                    n_neighbors_params=N_NEIGHBORS_PARAMS)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq: {P.cut_off_freq}\n")
        f.write(EER_df_train_dict[P.cut_off_freq].to_string())

        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

## 1.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "Butterworth"

preprocessing_method="Realworld-per_unknown_window"

time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"



P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }


EER_df_test_dict={}

    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]

P.Butter_per_win_argdict["cut_off_freq"]=P.cut_off_freq
min_key=P.cut_off_freq
print(f"cut_off_freq: {P.cut_off_freq}")


ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)


dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": ffted_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": ffted_dfList_exp2_user_47
}

key_column= ["cut_off_freq"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_kNN(\
                        cv_fold_idx=cv_fold_idx, 
                        cv_sets=THREE_FOLD_CV, 
                        dfList_dict=dfList_dict, 
                        window_size_lst=WINDOW_SIZE_LST, 
                        exp_config=P, 
                        extract_features_dict=EXTRACT_WACA_features_DICT, 
                        overlap=OVERLAP, 
                        n_neighbors_params=N_NEIGHBORS_PARAMS, 
                        train_file_name=train_file_name, 
                        test_file_name=test_file_name, 
                        preprocessing_params=min_key, 
                        key_column=key_column,
                       )

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butterworth"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    
    







P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }


EER_df_train_dict={}
EER_df_test_dict={}

for cut_off_freq in tqdm(CUT_OFF_FREQ_RANGE):
    P.cut_off_freq=cut_off_freq
    P.Butter_per_win_argdict["cut_off_freq"]=cut_off_freq
    print(f"cut_off_freq: {P.cut_off_freq}")

    
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": ffted_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": ffted_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.cut_off_freq] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                                    extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                                    n_neighbors_params=N_NEIGHBORS_PARAMS)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq: {P.cut_off_freq}\n")
        f.write(EER_df_train_dict[P.cut_off_freq].to_string())



mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

# 2. Butterworth frequency Cut-off + EMA span
## 2.1 Naive Approach
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_2)
P.smoothing = "Butter+EMA"

preprocessing_method="Naive"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"




EER_df_test_dict={}

    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]
P.span=old_test_hyperparameters_df["EMA_span"][0]

min_key= P.cut_off_freq, P.span
print(f"cut_off_freq: {P.cut_off_freq}, EMA span: {P.span}")


ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

EMAed_dfList_exp1 = get_EMAed_dfList(ffted_dfList_exp1, span=P.span)
EMAed_dfList_exp2 = get_EMAed_dfList(ffted_dfList_exp2, span=P.span)

ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

EMAed_dfList_exp1_user_47 = get_EMAed_dfList(ffted_dfList_exp1_user_47, span=P.span)
EMAed_dfList_exp2_user_47 = get_EMAed_dfList(ffted_dfList_exp2_user_47, span=P.span)

dfList_dict={
            "dfList_exp1": EMAed_dfList_exp1,
            "dfList_exp2": EMAed_dfList_exp2,
            "dfList_exp1_user_47": EMAed_dfList_exp1_user_47,
            "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
}


key_column= ["cut_off_freq", "EMA_span"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_kNN(\
                        cv_fold_idx=cv_fold_idx, 
                        cv_sets=THREE_FOLD_CV, 
                        dfList_dict=dfList_dict, 
                        window_size_lst=WINDOW_SIZE_LST, 
                        exp_config=P, 
                        extract_features_dict=EXTRACT_WACA_features_DICT, 
                        overlap=OVERLAP, 
                        n_neighbors_params=N_NEIGHBORS_PARAMS, 
                        train_file_name=train_file_name, 
                        test_file_name=test_file_name, 
                        preprocessing_params=min_key, 
                        key_column=key_column,
                       )

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_2)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+EMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    
    
indices = list(range(1, 50))
mesh = np.array(np.meshgrid(indices, indices))
index_pairs = mesh.T.reshape(-1, 2)

print(f"total cut_off_span_pairs: {index_pairs.shape}, choice_num: {CHOICE_NUM_PAIRS}")
cut_off_span_pairs = index_pairs[np.random.choice(index_pairs.shape[0], size=CHOICE_NUM_PAIRS, replace=False), :]








EER_df_train_dict={}
EER_df_test_dict={}

for key_pair in tqdm(cut_off_span_pairs):
    
    key_pair = tuple(key_pair)
    cut_off_freq, span = key_pair[0], key_pair[1]
    P.cut_off_freq=cut_off_freq
    P.span=span
    print(f"cut_off_freq: {P.cut_off_freq}")
    print(f"span: {P.span}")

    
    ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    EMAed_dfList_exp1 = get_EMAed_dfList(ffted_dfList_exp1, span=P.span)
    EMAed_dfList_exp2 = get_EMAed_dfList(ffted_dfList_exp2, span=P.span)
    
    ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    EMAed_dfList_exp1_user_47 = get_EMAed_dfList(ffted_dfList_exp1_user_47, span=P.span)
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(ffted_dfList_exp2_user_47, span=P.span)
    
    dfList_dict={
                "dfList_exp1": EMAed_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": EMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[key_pair] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                                                                  extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                                                                  n_neighbors_params=N_NEIGHBORS_PARAMS)
        
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq, span: {key_pair}\n")
        f.write(EER_df_train_dict[key_pair].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq", "EMA_span"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")
        

## 2.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_2)
P.smoothing = "Butter+EMA"


preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"



P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }

EER_df_test_dict={}


old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]
P.span=old_test_hyperparameters_df["EMA_span"][0]

P.Butter_per_win_argdict["cut_off_freq"]=P.cut_off_freq
P.EMA_per_win_span=P.span

min_key= P.cut_off_freq, P.span
print(f"cut_off_freq: {P.cut_off_freq}, EMA span: {P.span}")


ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
EMAed_dfList_exp2 = get_EMAed_dfList(ffted_dfList_exp2, span=P.span)

ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
EMAed_dfList_exp2_user_47 = get_EMAed_dfList(ffted_dfList_exp2_user_47, span=P.span)

dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": EMAed_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
}


key_column= ["cut_off_freq", "EMA_span"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_kNN(\
                        cv_fold_idx=cv_fold_idx, 
                        cv_sets=THREE_FOLD_CV, 
                        dfList_dict=dfList_dict, 
                        window_size_lst=WINDOW_SIZE_LST, 
                        exp_config=P, 
                        extract_features_dict=EXTRACT_WACA_features_DICT, 
                        overlap=OVERLAP, 
                        n_neighbors_params=N_NEIGHBORS_PARAMS, 
                        train_file_name=train_file_name, 
                        test_file_name=test_file_name, 
                        preprocessing_params=min_key, 
                        key_column=key_column,
                       )

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_2)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+EMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    
    
indices = list(range(1, 50))
mesh = np.array(np.meshgrid(indices, indices))
index_pairs = mesh.T.reshape(-1, 2)

print(f"total cut_off_span_pairs: {index_pairs.shape}, choice_num: {CHOICE_NUM_PAIRS}")
cut_off_span_pairs = index_pairs[np.random.choice(index_pairs.shape[0], size=CHOICE_NUM_PAIRS, replace=False), :]






P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }


EER_df_train_dict={}
EER_df_test_dict={}

for key_pair in tqdm(cut_off_span_pairs):
    key_pair = tuple(key_pair)
    cut_off_freq, span = key_pair[0], key_pair[1]
    P.cut_off_freq=cut_off_freq
    P.Butter_per_win_argdict["cut_off_freq"]=cut_off_freq
    
    
    P.span=span
    P.EMA_per_win_span=span

    print(f"cut_off_freq: {P.cut_off_freq}")
    print(f"span: {P.span}")

    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    EMAed_dfList_exp2 = get_EMAed_dfList(ffted_dfList_exp2, span=P.span)
    
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(ffted_dfList_exp2_user_47, span=P.span)
    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[key_pair] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                              extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                              n_neighbors_params=N_NEIGHBORS_PARAMS)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq, span: {key_pair}\n")
        f.write(EER_df_train_dict[key_pair].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq", "EMA_span"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

# 3. EMA span
## 3.1 Naive Approach
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "EMA"


preprocessing_method="Naive"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"



EER_df_test_dict={}
    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.span=old_test_hyperparameters_df["EMA_span"][0]
min_key=P.span
print(f"EMA span: {P.span}")


EMAed_dfList_exp1 = get_EMAed_dfList(raw_dfList_exp1, span=P.span)
EMAed_dfList_exp2 = get_EMAed_dfList(raw_dfList_exp2, span=P.span)

EMAed_dfList_exp1_user_47 = get_EMAed_dfList(raw_dfList_exp1_user_47, span=P.span)
EMAed_dfList_exp2_user_47 = get_EMAed_dfList(raw_dfList_exp2_user_47, span=P.span)


dfList_dict={
            "dfList_exp1": EMAed_dfList_exp1,
            "dfList_exp2": EMAed_dfList_exp2,
            "dfList_exp1_user_47": EMAed_dfList_exp1_user_47,
            "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
}



key_column= ["EMA_span"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_kNN(\
                        cv_fold_idx=cv_fold_idx, 
                        cv_sets=THREE_FOLD_CV, 
                        dfList_dict=dfList_dict, 
                        window_size_lst=WINDOW_SIZE_LST, 
                        exp_config=P, 
                        extract_features_dict=EXTRACT_WACA_features_DICT, 
                        overlap=OVERLAP, 
                        n_neighbors_params=N_NEIGHBORS_PARAMS, 
                        train_file_name=train_file_name, 
                        test_file_name=test_file_name, 
                        preprocessing_params=min_key, 
                        key_column=key_column,
                       )

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "EMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    
    





EER_df_train_dict={}
EER_df_test_dict={}
for span in tqdm(EMA_SPAN_RANGE):
    P.span=span
    print(f"EMA span: {P.span}")

    
    EMAed_dfList_exp1 = get_EMAed_dfList(raw_dfList_exp1, span=P.span)
    EMAed_dfList_exp2 = get_EMAed_dfList(raw_dfList_exp2, span=P.span)
    
    EMAed_dfList_exp1_user_47 = get_EMAed_dfList(raw_dfList_exp1_user_47, span=P.span)
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(raw_dfList_exp2_user_47, span=P.span)

    
    dfList_dict={
                "dfList_exp1": EMAed_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": EMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.span] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                            extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                            n_neighbors_params=N_NEIGHBORS_PARAMS)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\EMA span: {P.span}\n")
        f.write(EER_df_train_dict[P.span].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["EMA_span"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

## 3.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "EMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
    

EER_df_test_dict={}
    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.span=old_test_hyperparameters_df["EMA_span"][0]
P.EMA_per_win_span=P.span
min_key=P.span
print(f"EMA span: {P.span}")


EMAed_dfList_exp2 = get_EMAed_dfList(raw_dfList_exp2, span=P.span)

EMAed_dfList_exp2_user_47 = get_EMAed_dfList(raw_dfList_exp2_user_47, span=P.span)


dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": EMAed_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
}



key_column= ["EMA_span"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_kNN(\
                        cv_fold_idx=cv_fold_idx, 
                        cv_sets=THREE_FOLD_CV, 
                        dfList_dict=dfList_dict, 
                        window_size_lst=WINDOW_SIZE_LST, 
                        exp_config=P, 
                        extract_features_dict=EXTRACT_WACA_features_DICT, 
                        overlap=OVERLAP, 
                        n_neighbors_params=N_NEIGHBORS_PARAMS, 
                        train_file_name=train_file_name, 
                        test_file_name=test_file_name, 
                        preprocessing_params=min_key, 
                        key_column=key_column,
                       )

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "EMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    
    




EER_df_train_dict={}
EER_df_test_dict={}
for span in tqdm(EMA_SPAN_RANGE):
    P.span=span
    print(f"EMA span: {P.span}")

    P.EMA_per_win_span=P.span

    EMAed_dfList_exp2 = get_EMAed_dfList(raw_dfList_exp2, span=P.span)
    
    EMAed_dfList_exp2_user_47 = get_EMAed_dfList(raw_dfList_exp2_user_47, span=P.span)

    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": EMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": EMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.span] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                            extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                            n_neighbors_params=N_NEIGHBORS_PARAMS)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\EMA span: {P.span}\n")
        f.write(EER_df_train_dict[P.span].to_string())



        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["EMA_span"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

# 4. SMA winsize
## 4.1 Naive Approach
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "SMA"


preprocessing_method="Naive"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"


EER_df_test_dict={}

old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.winsize=old_test_hyperparameters_df["SMA_winsize"][0]
min_key=P.winsize
print(f"SMA winsize: {P.winsize}")


SMAed_dfList_exp1 = get_SMAed_dfList(raw_dfList_exp1, winsize=P.winsize)
SMAed_dfList_exp2 = get_SMAed_dfList(raw_dfList_exp2, winsize=P.winsize)

SMAed_dfList_exp1_user_47 = get_SMAed_dfList(raw_dfList_exp1_user_47, winsize=P.winsize)
SMAed_dfList_exp2_user_47 = get_SMAed_dfList(raw_dfList_exp2_user_47, winsize=P.winsize)


dfList_dict={
            "dfList_exp1": SMAed_dfList_exp1,
            "dfList_exp2": SMAed_dfList_exp2,
            "dfList_exp1_user_47": SMAed_dfList_exp1_user_47,
            "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
}


key_column= ["SMA_winsize"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_kNN(\
                        cv_fold_idx=cv_fold_idx, 
                        cv_sets=THREE_FOLD_CV, 
                        dfList_dict=dfList_dict, 
                        window_size_lst=WINDOW_SIZE_LST, 
                        exp_config=P, 
                        extract_features_dict=EXTRACT_WACA_features_DICT, 
                        overlap=OVERLAP, 
                        n_neighbors_params=N_NEIGHBORS_PARAMS, 
                        train_file_name=train_file_name, 
                        test_file_name=test_file_name, 
                        preprocessing_params=min_key, 
                        key_column=key_column,
                       )

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "SMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    
    





EER_df_train_dict={}
EER_df_test_dict={}
for winsize in tqdm(SMA_WINSIZE_RANGE):
    P.winsize=winsize
    print(f"SMA winsize: {P.winsize}")


    SMAed_dfList_exp1 = get_SMAed_dfList(raw_dfList_exp1, winsize=P.winsize)
    SMAed_dfList_exp2 = get_SMAed_dfList(raw_dfList_exp2, winsize=P.winsize)
    
    SMAed_dfList_exp1_user_47 = get_SMAed_dfList(raw_dfList_exp1_user_47, winsize=P.winsize)
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(raw_dfList_exp2_user_47, winsize=P.winsize)

    
    dfList_dict={
                "dfList_exp1": SMAed_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": SMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.winsize] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                               extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                               n_neighbors_params=N_NEIGHBORS_PARAMS)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\SMA winsize: {P.winsize}\n")
        f.write(EER_df_train_dict[P.winsize].to_string())



        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["SMA_winsize"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

## 4.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P)
P.smoothing = "SMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"


EER_df_test_dict={}

old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.winsize=old_test_hyperparameters_df["SMA_winsize"][0]
P.SMA_per_win_winsize=P.winsize
min_key=P.winsize
print(f"SMA winsize: {P.winsize}")


SMAed_dfList_exp2 = get_SMAed_dfList(raw_dfList_exp2, winsize=P.winsize)

SMAed_dfList_exp2_user_47 = get_SMAed_dfList(raw_dfList_exp2_user_47, winsize=P.winsize)


dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": SMAed_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
}


key_column= ["SMA_winsize"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_kNN(\
                        cv_fold_idx=cv_fold_idx, 
                        cv_sets=THREE_FOLD_CV, 
                        dfList_dict=dfList_dict, 
                        window_size_lst=WINDOW_SIZE_LST, 
                        exp_config=P, 
                        extract_features_dict=EXTRACT_WACA_features_DICT, 
                        overlap=OVERLAP, 
                        n_neighbors_params=N_NEIGHBORS_PARAMS, 
                        train_file_name=train_file_name, 
                        test_file_name=test_file_name, 
                        preprocessing_params=min_key, 
                        key_column=key_column,
                       )

In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "SMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    
    





EER_df_train_dict={}
EER_df_test_dict={}
for winsize in tqdm(SMA_WINSIZE_RANGE):
    P.winsize=winsize
    P.SMA_per_win_winsize=P.winsize

    print(f"SMA winsize: {P.winsize}")


    SMAed_dfList_exp2 = get_SMAed_dfList(raw_dfList_exp2, winsize=P.winsize)
    
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(raw_dfList_exp2_user_47, winsize=P.winsize)

    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[P.winsize] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                               extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                               n_neighbors_params=N_NEIGHBORS_PARAMS)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\SMA winsize: {P.winsize}\n")
        f.write(EER_df_train_dict[P.winsize].to_string())

        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["SMA_winsize"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

# 5. Butterworth frequency Cut-off + SMA winsize
## 5.1 Naive Approach
### Optimizing and Testin

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_5)
P.smoothing = "Butter+SMA"


preprocessing_method="Naive"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"


EER_df_test_dict={}

    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.winsize=old_test_hyperparameters_df["SMA_winsize"][0]
P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]

min_key= P.cut_off_freq, P.winsize
print(f"cut_off_freq: {P.cut_off_freq}, winsize: {P.winsize}")


ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

SMAed_dfList_exp1 = get_SMAed_dfList(ffted_dfList_exp1, winsize=P.winsize)
SMAed_dfList_exp2 = get_SMAed_dfList(ffted_dfList_exp2, winsize=P.winsize)

ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

SMAed_dfList_exp1_user_47 = get_SMAed_dfList(ffted_dfList_exp1_user_47, winsize=P.winsize)
SMAed_dfList_exp2_user_47 = get_SMAed_dfList(ffted_dfList_exp2_user_47, winsize=P.winsize)

dfList_dict={
            "dfList_exp1": SMAed_dfList_exp1,
            "dfList_exp2": SMAed_dfList_exp2,
            "dfList_exp1_user_47": SMAed_dfList_exp1_user_47,
            "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
}


key_column= ["cut_off_freq", "SMA_winsize"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_kNN(\
                        cv_fold_idx=cv_fold_idx, 
                        cv_sets=THREE_FOLD_CV, 
                        dfList_dict=dfList_dict, 
                        window_size_lst=WINDOW_SIZE_LST, 
                        exp_config=P, 
                        extract_features_dict=EXTRACT_WACA_features_DICT, 
                        overlap=OVERLAP, 
                        n_neighbors_params=N_NEIGHBORS_PARAMS, 
                        train_file_name=train_file_name, 
                        test_file_name=test_file_name, 
                        preprocessing_params=min_key, 
                        key_column=key_column,
                       )

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_5)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+SMA"



preprocessing_method="Naive"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    
    
indices = list(range(1, 50))
mesh = np.array(np.meshgrid(indices, indices))
index_pairs = mesh.T.reshape(-1, 2)

print(f"total cut_off_winsize_pairs: {index_pairs.shape}, choice_num: {CHOICE_NUM_PAIRS}")
cut_off_winsize_pairs = index_pairs[np.random.choice(index_pairs.shape[0], size=CHOICE_NUM_PAIRS, replace=False), :]








EER_df_train_dict={}
EER_df_test_dict={}

for key_pair in tqdm(cut_off_winsize_pairs):
    
    key_pair = tuple(key_pair)
    cut_off_freq, winsize = key_pair[0], key_pair[1]
    P.cut_off_freq=cut_off_freq
    P.winsize=winsize
    print(f"cut_off_freq: {P.cut_off_freq}")
    print(f"winsize: {P.winsize}")

    
    ffted_dfList_exp1 = get_ffted_dfList(raw_dfList_exp1, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    SMAed_dfList_exp1 = get_SMAed_dfList(ffted_dfList_exp1, winsize=P.winsize)
    SMAed_dfList_exp2 = get_SMAed_dfList(ffted_dfList_exp2, winsize=P.winsize)
    
    ffted_dfList_exp1_user_47 = get_ffted_dfList(raw_dfList_exp1_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)

    SMAed_dfList_exp1_user_47 = get_SMAed_dfList(ffted_dfList_exp1_user_47, winsize=P.winsize)
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(ffted_dfList_exp2_user_47, winsize=P.winsize)
    
    dfList_dict={
                "dfList_exp1": SMAed_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": SMAed_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[key_pair] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                              extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                              n_neighbors_params=N_NEIGHBORS_PARAMS)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq, winsize: {key_pair}\n")
        f.write(EER_df_train_dict[key_pair].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq", "SMA_winsize"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

## 5.2 Realworld Approach (per unknown window application of filter)
### Optimizing and Testing

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_5)
P.smoothing = "Butter+SMA"


preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y-%m-%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/clip={P.scaler_clip}-Minkowski_P={P.p}-Smoothing={P.smoothing}-Prep={preprocessing_method}-EER_df_test_dict.txt"
old_test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/old_{time_of_execution}_{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"


P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }


EER_df_test_dict={}

    
old_test_hyperparameters_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict_raw_df.json"
old_test_hyperparameters_df = pd.read_json(old_test_hyperparameters_file_name)
old_test_hyperparameters_df.to_json(f'{old_test_file_name[:-4]}_raw_df.json')

P.winsize=old_test_hyperparameters_df["SMA_winsize"][0]
P.cut_off_freq=old_test_hyperparameters_df["cut_off_freq"][0]

P.Butter_per_win_argdict["cut_off_freq"]=P.cut_off_freq
P.SMA_per_win_winsize=P.winsize

min_key= P.cut_off_freq, P.winsize
print(f"cut_off_freq: {P.cut_off_freq}, winsize: {P.winsize}")




ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
SMAed_dfList_exp2 = get_SMAed_dfList(ffted_dfList_exp2, winsize=P.winsize)

ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
SMAed_dfList_exp2_user_47 = get_SMAed_dfList(ffted_dfList_exp2_user_47, winsize=P.winsize)

dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": SMAed_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
}


key_column= ["cut_off_freq", "SMA_winsize"]
#-----CV_FOLD-------
for cv_fold_idx in range(len(THREE_FOLD_CV)):
    process_cv_fold_kNN(\
                        cv_fold_idx=cv_fold_idx, 
                        cv_sets=THREE_FOLD_CV, 
                        dfList_dict=dfList_dict, 
                        window_size_lst=WINDOW_SIZE_LST, 
                        exp_config=P, 
                        extract_features_dict=EXTRACT_WACA_features_DICT, 
                        overlap=OVERLAP, 
                        n_neighbors_params=N_NEIGHBORS_PARAMS, 
                        train_file_name=train_file_name, 
                        test_file_name=test_file_name, 
                        preprocessing_params=min_key, 
                        key_column=key_column,
                       )

In [None]:
P.smoothing
preprocessing_method

In [None]:
init_experiment_params(exp_config=P, exp_num=EXP_NUM_5)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")
P.smoothing = "Butter+SMA"



preprocessing_method="Realworld-per_unknown_window"
time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/{P.smoothing}_Mean_EER_{preprocessing_method}_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    
    
indices = list(range(1, 50))
mesh = np.array(np.meshgrid(indices, indices))
index_pairs = mesh.T.reshape(-1, 2)

print(f"total cut_off_winsize_pairs: {index_pairs.shape}, choice_num: {CHOICE_NUM_PAIRS}")
cut_off_winsize_pairs = index_pairs[np.random.choice(index_pairs.shape[0], size=CHOICE_NUM_PAIRS, replace=False), :]







P.Butter_per_win_argdict={
    "filter_order": P.filter_order,
    "cut_off_freq": None,
    "sampling_freq": P.sampling_freq,
    "filtfilt": P.filtfilt,
    }


EER_df_train_dict={}
EER_df_test_dict={}

for key_pair in tqdm(cut_off_winsize_pairs):
    
    key_pair = tuple(key_pair)
    cut_off_freq, winsize = key_pair[0], key_pair[1]
    P.cut_off_freq=cut_off_freq
    P.Butter_per_win_argdict["cut_off_freq"]=P.cut_off_freq
    
    P.winsize=winsize
    P.SMA_per_win_winsize=P.winsize
    
    print(f"cut_off_freq: {P.cut_off_freq}")
    print(f"winsize: {P.winsize}")

    
    ffted_dfList_exp2 = get_ffted_dfList(raw_dfList_exp2, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    SMAed_dfList_exp2 = get_SMAed_dfList(ffted_dfList_exp2, winsize=P.winsize)
    
    ffted_dfList_exp2_user_47 = get_ffted_dfList(raw_dfList_exp2_user_47, cut_off_freq=P.cut_off_freq, filter_order=P.filter_order, sampling_freq=P.sampling_freq, filtfilt=P.filtfilt)
    SMAed_dfList_exp2_user_47 = get_SMAed_dfList(ffted_dfList_exp2_user_47, winsize=P.winsize)
    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": SMAed_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": SMAed_dfList_exp2_user_47
    }
    

    EER_df_train_dict[key_pair] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                              extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                              n_neighbors_params=N_NEIGHBORS_PARAMS)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\cut_off_freq, winsize: {key_pair}\n")
        f.write(EER_df_train_dict[key_pair].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["cut_off_freq", "SMA_winsize"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------

mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

# 6. The effect of Varying Overlap

In [None]:
init_experiment_params(exp_config=P)

time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")

dfList_dict={
            "dfList_exp1": raw_dfList_exp1,
            "dfList_exp2": raw_dfList_exp2,
            "dfList_exp1_user_47": raw_dfList_exp1_user_47,
            "dfList_exp2_user_47": raw_dfList_exp2_user_47
}

for overlap in tqdm(OVERLAP_EXP_RANGE):

    train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/overlap={overlap}_Mean_EER_df_train_dict.txt"
    test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/overlap={overlap}_Mean_EER_df_test_dict.txt"
    
    with open(train_file_name, "w") as f:
        f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    with open(test_file_name, "w") as f:
        f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")


    overlap*=0.01
    print(f"overlap: {overlap}")
    max_window_size=2000
    step_width = int(max_window_size * (1-overlap))
    max_num_windows=min(len(getIndices(sampleSize=max_window_size, step=step_width, numSamplePoints=P.num_sample_points_per_exp)), N_NEIGHBORS_PARAMS[-1]+1)
    n_neighbors_params = np.arange(1, max_num_windows) 




    key_column= ["overlap"]
    #-----CV_FOLD-------
    for cv_fold_idx in range(len(THREE_FOLD_CV)):
        process_cv_fold_kNN(\
                            cv_fold_idx=cv_fold_idx, 
                            cv_sets=THREE_FOLD_CV, 
                            dfList_dict=dfList_dict, 
                            window_size_lst=WINDOW_SIZE_LST, 
                            exp_config=P, 
                            extract_features_dict=EXTRACT_WACA_features_DICT, 
                            overlap=overlap, 
                            n_neighbors_params=n_neighbors_params, 
                            train_file_name=train_file_name, 
                            test_file_name=test_file_name, 
                            preprocessing_params=overlap, 
                            key_column=key_column,
                           )

reseting experiment params successful!


  0%|          | 0/25 [00:00<?, ?it/s]

overlap: 0.01
train_set: {0, 1, 2, 3, 4, 5, 6, 9, 10, 12, 14, 15, 16, 18, 19, 22, 23, 24, 25, 28}
test_set: {7, 8, 11, 13, 17, 20, 21, 26, 27, 29}



  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  8.529141063801944
MakeWACAXExpDicUnknown Time:  57.842547392472625
Done extracting features



 11%|█         | 1/9 [02:23<19:05, 143.20s/it][A

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  4.104598788544536
MakeWACAXExpDicUnknown Time:  44.01284077949822
Done extracting features



 22%|██▏       | 2/9 [04:39<16:13, 139.14s/it][A

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  3.1239006482064724
MakeWACAXExpDicUnknown Time:  29.61167077999562
Done extracting features



 33%|███▎      | 3/9 [05:23<09:35, 95.84s/it] [A

window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  2.9025398483499885
MakeWACAXExpDicUnknown Time:  20.479793081991374
Done extracting features



 44%|████▍     | 4/9 [05:58<05:57, 71.55s/it][A

window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  2.589440457522869
MakeWACAXExpDicUnknown Time:  16.079033163376153
Done extracting features



 56%|█████▌    | 5/9 [06:26<03:44, 56.05s/it][A

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  2.5765244904905558
MakeWACAXExpDicUnknown Time:  15.51045602466911
Done extracting features



 67%|██████▋   | 6/9 [06:58<02:23, 47.82s/it][A

window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  3.000904256477952
MakeWACAXExpDicUnknown Time:  16.347674359567463
Done extracting features



 78%|███████▊  | 7/9 [07:31<01:26, 43.12s/it][A

window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  3.1800068616867065
MakeWACAXExpDicUnknown Time:  14.07335225213319
Done extracting features



 89%|████████▉ | 8/9 [08:03<00:39, 39.36s/it][A

window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  1.524534814991057
MakeWACAXExpDicUnknown Time:  7.583844147622585
Done extracting features



100%|██████████| 9/9 [08:24<00:00, 56.00s/it][A

  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 170
len_exp2_user_47: 146
MakeWACAXExpDicOwner Time:  7.727369498461485
MakeWACAXExpDicUnknown Time:  53.58146076556295
Done extracting features



 11%|█         | 1/9 [01:07<08:57, 67.21s/it][A

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 85
len_exp2_user_47: 72
MakeWACAXExpDicOwner Time:  3.986661206930876
MakeWACAXExpDicUnknown Time:  24.7096864329651
Done extracting features



 22%|██▏       | 2/9 [01:40<05:32, 47.50s/it][A

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 42
len_exp2_user_47: 36
MakeWACAXExpDicOwner Time:  2.526345049031079
MakeWACAXExpDicUnknown Time:  15.35869756899774
Done extracting features



 33%|███▎      | 3/9 [01:59<03:26, 34.40s/it][A

window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 28
len_exp2_user_47: 24
MakeWACAXExpDicOwner Time:  1.5791514674201608
MakeWACAXExpDicUnknown Time:  7.735887188464403
Done extracting features



 44%|████▍     | 4/9 [02:09<02:03, 24.72s/it][A

window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 21
len_exp2_user_47: 18
MakeWACAXExpDicOwner Time:  1.1907092239707708
MakeWACAXExpDicUnknown Time:  5.696951903402805
Done extracting features



 56%|█████▌    | 5/9 [02:17<01:13, 18.48s/it][A

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 16
len_exp2_user_47: 14
MakeWACAXExpDicOwner Time:  0.8351087812334299
MakeWACAXExpDicUnknown Time:  5.049308926798403
Done extracting features



 67%|██████▋   | 6/9 [02:23<00:43, 14.37s/it][A

window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 14
len_exp2_user_47: 12
MakeWACAXExpDicOwner Time:  0.9827857287600636
MakeWACAXExpDicUnknown Time:  4.0595796490088105
Done extracting features



 78%|███████▊  | 7/9 [02:28<00:22, 11.48s/it][A

window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 12
len_exp2_user_47: 10
MakeWACAXExpDicOwner Time:  0.6299756839871407



 89%|████████▉ | 8/9 [02:32<00:09,  9.09s/it][A

MakeWACAXExpDicUnknown Time:  3.0040828743949533
Done extracting features
window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 10
len_exp2_user_47: 9
MakeWACAXExpDicOwner Time:  0.4312313152477145



100%|██████████| 9/9 [02:36<00:00, 17.35s/it][A


MakeWACAXExpDicUnknown Time:  2.536687278188765
Done extracting features
train_set: {0, 2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 20, 21, 23, 26, 27, 29}
test_set: {1, 3, 5, 6, 16, 19, 22, 24, 25, 28}



  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 170
len_exp2_user_47: 146
MakeWACAXExpDicOwner Time:  13.964055831544101
MakeWACAXExpDicUnknown Time:  125.42391323298216
Done extracting features



 11%|█         | 1/9 [04:40<37:26, 280.81s/it][A

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 85
len_exp2_user_47: 72
MakeWACAXExpDicOwner Time:  8.592084778472781
MakeWACAXExpDicUnknown Time:  61.31079043354839
Done extracting features



 22%|██▏       | 2/9 [07:54<26:45, 229.41s/it][A

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 42
len_exp2_user_47: 36
MakeWACAXExpDicOwner Time:  2.3949658358469605
MakeWACAXExpDicUnknown Time:  24.81192760542035
Done extracting features



 33%|███▎      | 3/9 [08:31<14:11, 141.84s/it][A

window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 28
len_exp2_user_47: 24
MakeWACAXExpDicOwner Time:  2.3614584440365434
MakeWACAXExpDicUnknown Time:  16.555331209674478
Done extracting features



 44%|████▍     | 4/9 [09:00<08:05, 97.09s/it] [A

window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 21
len_exp2_user_47: 18
MakeWACAXExpDicOwner Time:  2.106020174920559
MakeWACAXExpDicUnknown Time:  13.087629702873528
Done extracting features



 56%|█████▌    | 5/9 [09:24<04:43, 70.79s/it][A

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 16
len_exp2_user_47: 14
MakeWACAXExpDicOwner Time:  2.1149986535310745
MakeWACAXExpDicUnknown Time:  11.702674079686403
Done extracting features



 67%|██████▋   | 6/9 [09:47<02:43, 54.62s/it][A

window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 14
len_exp2_user_47: 12
MakeWACAXExpDicOwner Time:  2.8999367421492934
MakeWACAXExpDicUnknown Time:  14.8975644512102
Done extracting features



 78%|███████▊  | 7/9 [10:17<01:33, 46.50s/it][A

window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 12
len_exp2_user_47: 10
MakeWACAXExpDicOwner Time:  2.3820012593641877
MakeWACAXExpDicUnknown Time:  12.303803419694304
Done extracting features



 89%|████████▉ | 8/9 [10:42<00:39, 39.72s/it][A

window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 10
len_exp2_user_47: 9
MakeWACAXExpDicOwner Time:  1.8222108287736773
MakeWACAXExpDicUnknown Time:  10.550780978053808
Done extracting features



100%|██████████| 9/9 [11:06<00:00, 74.08s/it][A

  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  6.684238580986857
MakeWACAXExpDicUnknown Time:  41.584088671952486
Done extracting features



 11%|█         | 1/9 [00:53<07:07, 53.39s/it][A

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  3.9444988081231713
MakeWACAXExpDicUnknown Time:  19.584825783967972
Done extracting features



 22%|██▏       | 2/9 [01:19<04:22, 37.46s/it][A

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  1.1426727278158069



 33%|███▎      | 3/9 [01:30<02:32, 25.37s/it][A

MakeWACAXExpDicUnknown Time:  9.295451014302671
Done extracting features
window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  0.7563139069825411



 44%|████▍     | 4/9 [01:36<01:29, 17.82s/it][A

MakeWACAXExpDicUnknown Time:  5.1443654438480735
Done extracting features
window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  0.8350281175225973
MakeWACAXExpDicUnknown Time:  5.285982830449939
Done extracting features



 56%|█████▌    | 5/9 [01:43<00:54, 13.74s/it][A

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  0.8127550231292844



 67%|██████▋   | 6/9 [01:48<00:32, 10.89s/it][A

MakeWACAXExpDicUnknown Time:  4.165617844089866
Done extracting features
window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  0.6206342205405235



 78%|███████▊  | 7/9 [01:53<00:17,  8.97s/it][A

MakeWACAXExpDicUnknown Time:  4.038254698738456
Done extracting features
window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  0.4406109619885683



 89%|████████▉ | 8/9 [01:56<00:07,  7.02s/it][A

MakeWACAXExpDicUnknown Time:  2.094666037708521
Done extracting features
window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  0.33545260690152645



100%|██████████| 9/9 [01:58<00:00, 13.22s/it][A


MakeWACAXExpDicUnknown Time:  1.6946416683495045
Done extracting features
train_set: {1, 3, 5, 6, 7, 8, 11, 13, 16, 17, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29}
test_set: {0, 2, 4, 9, 10, 12, 14, 15, 18, 23}



  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 170
len_exp2_user_47: 146
MakeWACAXExpDicOwner Time:  6.665308497846127
MakeWACAXExpDicUnknown Time:  74.66903078276664
Done extracting features



 11%|█         | 1/9 [03:09<25:18, 189.78s/it][A

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 85
len_exp2_user_47: 72
MakeWACAXExpDicOwner Time:  5.825105699710548
MakeWACAXExpDicUnknown Time:  61.63219138979912
Done extracting features



 22%|██▏       | 2/9 [06:03<21:03, 180.50s/it][A

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 42
len_exp2_user_47: 36
MakeWACAXExpDicOwner Time:  3.3274094499647617
MakeWACAXExpDicUnknown Time:  26.67732273414731
Done extracting features



 33%|███▎      | 3/9 [06:45<11:43, 117.30s/it][A

window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 28
len_exp2_user_47: 24
MakeWACAXExpDicOwner Time:  2.713779370300472
MakeWACAXExpDicUnknown Time:  20.56868146918714
Done extracting features



 44%|████▍     | 4/9 [07:21<07:04, 84.93s/it] [A

window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 21
len_exp2_user_47: 18
MakeWACAXExpDicOwner Time:  3.284764466807246
MakeWACAXExpDicUnknown Time:  12.495940098538995
Done extracting features



 56%|█████▌    | 5/9 [07:47<04:14, 63.72s/it][A

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 16
len_exp2_user_47: 14
MakeWACAXExpDicOwner Time:  2.295332412235439
MakeWACAXExpDicUnknown Time:  12.284452006220818
Done extracting features



 67%|██████▋   | 6/9 [08:14<02:34, 51.37s/it][A

window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 14
len_exp2_user_47: 12
MakeWACAXExpDicOwner Time:  2.269710618071258
MakeWACAXExpDicUnknown Time:  8.419622788205743
Done extracting features



 78%|███████▊  | 7/9 [08:31<01:20, 40.05s/it][A

window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 12
len_exp2_user_47: 10
MakeWACAXExpDicOwner Time:  1.1117931958287954
MakeWACAXExpDicUnknown Time:  8.458165244199336
Done extracting features



 89%|████████▉ | 8/9 [08:49<00:32, 32.99s/it][A

window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 10
len_exp2_user_47: 9
MakeWACAXExpDicOwner Time:  1.5426009558141232
MakeWACAXExpDicUnknown Time:  8.066852522082627
Done extracting features



100%|██████████| 9/9 [09:08<00:00, 60.95s/it][A

  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  7.814072878099978
MakeWACAXExpDicUnknown Time:  43.495653483085334
Done extracting features



 11%|█         | 1/9 [00:58<07:49, 58.67s/it][A

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  2.8536337167024612
MakeWACAXExpDicUnknown Time:  19.56613949779421
Done extracting features



 22%|██▏       | 2/9 [01:27<04:46, 40.87s/it][A

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  2.291360864415765
MakeWACAXExpDicUnknown Time:  11.59154006652534
Done extracting features



 33%|███▎      | 3/9 [01:41<02:54, 29.01s/it][A

window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  1.678167698904872
MakeWACAXExpDicUnknown Time:  9.702310654334724
Done extracting features



 44%|████▍     | 4/9 [01:54<01:51, 22.35s/it][A

window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  1.7305980706587434
MakeWACAXExpDicUnknown Time:  6.557984737679362
Done extracting features



 56%|█████▌    | 5/9 [02:02<01:09, 17.49s/it][A

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  1.033795777708292



 67%|██████▋   | 6/9 [02:08<00:40, 13.58s/it][A

MakeWACAXExpDicUnknown Time:  4.596187447197735
Done extracting features
window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  0.5034039858728647
MakeWACAXExpDicUnknown Time:  4.295810552313924
Done extracting features



 78%|███████▊  | 7/9 [02:14<00:21, 10.87s/it][A

window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  1.008501029573381
MakeWACAXExpDicUnknown Time:  4.010229877196252
Done extracting features



 89%|████████▉ | 8/9 [02:19<00:09,  9.16s/it][A

window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 10
len(exp2_df_user_set_dict): 10
MakeWACAXExpDicOwner Time:  0.7144563039764762
MakeWACAXExpDicUnknown Time:  2.9413887467235327
Done extracting features



100%|██████████| 9/9 [02:23<00:00, 15.98s/it][A
  4%|▍         | 1/25 [35:38<14:15:21, 2138.39s/it]

overlap: 0.05
train_set: {0, 1, 2, 3, 4, 5, 6, 9, 10, 12, 14, 15, 16, 18, 19, 22, 23, 24, 25, 28}
test_set: {7, 8, 11, 13, 17, 20, 21, 26, 27, 29}



  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  17.40810028463602
MakeWACAXExpDicUnknown Time:  110.20090768299997
Done extracting features



 11%|█         | 1/9 [04:49<38:33, 289.21s/it][A

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  9.691141751594841
MakeWACAXExpDicUnknown Time:  55.02368037123233
Done extracting features



 22%|██▏       | 2/9 [07:53<26:32, 227.43s/it][A

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  4.2208108846098185
MakeWACAXExpDicUnknown Time:  25.02827851381153
Done extracting features



 33%|███▎      | 3/9 [08:31<14:06, 141.08s/it][A

window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  1.8943397961556911
MakeWACAXExpDicUnknown Time:  16.990974615328014
Done extracting features



 44%|████▍     | 4/9 [08:59<08:01, 96.24s/it] [A

window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  1.5095852194353938
MakeWACAXExpDicUnknown Time:  8.815456349402666
Done extracting features



 56%|█████▌    | 5/9 [09:17<04:31, 67.96s/it][A

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  1.3175738770514727
MakeWACAXExpDicUnknown Time:  7.21937223803252
Done extracting features



 67%|██████▋   | 6/9 [09:32<02:30, 50.17s/it][A

window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  1.2123791249468923
MakeWACAXExpDicUnknown Time:  6.053384591825306
Done extracting features



 78%|███████▊  | 7/9 [09:47<01:17, 38.71s/it][A

window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  1.6751910913735628
MakeWACAXExpDicUnknown Time:  10.177837694063783
Done extracting features



 89%|████████▉ | 8/9 [10:09<00:33, 33.17s/it][A

window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 20
len(exp2_df_user_set_dict): 20
MakeWACAXExpDicOwner Time:  1.2822447065263987
MakeWACAXExpDicUnknown Time:  7.635876663029194
Done extracting features



100%|██████████| 9/9 [10:27<00:00, 69.67s/it][A

  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 177
len_exp2_user_47: 152
MakeWACAXExpDicOwner Time:  5.312436331063509
MakeWACAXExpDicUnknown Time:  30.197219403460622
Done extracting features



 11%|█         | 1/9 [00:39<05:14, 39.28s/it][A

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 88
len_exp2_user_47: 75
MakeWACAXExpDicOwner Time:  2.0988353257998824
MakeWACAXExpDicUnknown Time:  15.348136495798826
Done extracting features



 22%|██▏       | 2/9 [00:59<03:15, 27.94s/it][A

window_size: 500
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 44
len_exp2_user_47: 37
MakeWACAXExpDicOwner Time:  1.5875273365527391



 33%|███▎      | 3/9 [01:09<01:59, 19.95s/it][A

MakeWACAXExpDicUnknown Time:  8.307304156012833
Done extracting features
window_size: 750
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 29
len_exp2_user_47: 25
MakeWACAXExpDicOwner Time:  0.9138644058257341



 44%|████▍     | 4/9 [01:16<01:14, 14.81s/it][A

MakeWACAXExpDicUnknown Time:  5.65815007314086
Done extracting features
window_size: 1000
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 22
len_exp2_user_47: 18
MakeWACAXExpDicOwner Time:  0.5903041576966643
MakeWACAXExpDicUnknown Time:  5.408685807138681
Done extracting features



 56%|█████▌    | 5/9 [01:23<00:47, 11.77s/it][A

window_size: 1250
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 17
len_exp2_user_47: 15
MakeWACAXExpDicOwner Time:  0.8007869580760598
MakeWACAXExpDicUnknown Time:  4.314198703505099
Done extracting features



 67%|██████▋   | 6/9 [01:28<00:28,  9.65s/it][A

window_size: 1500
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 14
len_exp2_user_47: 12
MakeWACAXExpDicOwner Time:  0.6245483737438917
MakeWACAXExpDicUnknown Time:  3.5519499722868204
Done extracting features



 78%|███████▊  | 7/9 [01:33<00:15,  7.98s/it][A

window_size: 1750
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 12
len_exp2_user_47: 10
MakeWACAXExpDicOwner Time:  0.6883842786774039
MakeWACAXExpDicUnknown Time:  3.614115529693663
Done extracting features



 89%|████████▉ | 8/9 [01:37<00:06,  6.94s/it][A

window_size: 2000
WACA preprocessing
len(exp1_df_user_set_dict): 9
len(exp2_df_user_set_dict): 9
len(X_exp1_dict_user_47[47]): 11
len_exp2_user_47: 9
MakeWACAXExpDicOwner Time:  0.6994125079363585
MakeWACAXExpDicUnknown Time:  3.8005515662953258
Done extracting features



100%|██████████| 9/9 [01:42<00:00, 11.42s/it][A


train_set: {0, 2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 20, 21, 23, 26, 27, 29}
test_set: {1, 3, 5, 6, 16, 19, 22, 24, 25, 28}



  0%|          | 0/9 [00:00<?, ?it/s][A

window_size: 125
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 177
len_exp2_user_47: 152
MakeWACAXExpDicOwner Time:  15.290480955503881
MakeWACAXExpDicUnknown Time:  113.0548558337614
Done extracting features



 11%|█         | 1/9 [04:33<36:24, 273.00s/it][A

window_size: 250
WACA preprocessing
len(exp1_df_user_set_dict): 19
len(exp2_df_user_set_dict): 19
len(X_exp1_dict_user_47[47]): 88
len_exp2_user_47: 75
MakeWACAXExpDicOwner Time:  9.471507349982858
MakeWACAXExpDicUnknown Time:  54.19350157491863
Done extracting features


In [None]:
init_experiment_params(exp_config=P)

print(f"train_set: {train_set+[47]}")
print(f"test_set: {test_set}")



time_of_execution = time.strftime("%Y/%m/%d-%H:%M:%S")
train_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/overlap_Mean_EER_df_train_dict.txt"
test_file_name=f"{FINAL_EXP_RESULTS_PATH}/{EXP_PATH_NAME}/overlap_Mean_EER_df_test_dict.txt"
with open(train_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{train_file_name}" + "-"*40 + "\n")
    
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    
    




EER_df_train_dict={}
EER_df_test_dict={}
for overlap in tqdm(OVERLAP_EXP_RANGE):
    overlap*=0.01
    max_window_size=2000
    step_width = int(max_window_size * (1-overlap))
    max_num_windows=min(len(getIndices(sampleSize=max_window_size, step=step_width, numSamplePoints=P.num_sample_points_per_exp)), N_NEIGHBORS_PARAMS[-1]+1)
    n_neighbors_params = np.arange(1, max_num_windows) 
    
    dfList_dict={
                "dfList_exp1": raw_dfList_exp1,
                "dfList_exp2": raw_dfList_exp2,
                "dfList_exp1_user_47": raw_dfList_exp1_user_47,
                "dfList_exp2_user_47": raw_dfList_exp2_user_47
    }
    

    EER_df_train_dict[overlap] = calculate_EER_different_window_sizes_optimize_num_neighbors(dfList_dict, window_size_lst=WINDOW_SIZE_LST, train_set=train_set, exp_config=P, 
                                                                                             extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                                             n_neighbors_params=n_neighbors_params)
    with open(train_file_name, "a") as f:
        f.write("\n" + "-"*22 + "\n")
        f.write(f"\overlap: {overlap}\n")
        f.write(EER_df_train_dict[overlap].to_string())


        
mean_EER_train_dict={}
for key in EER_df_train_dict:
    mean_EER_train_dict[key] = EER_df_train_dict[key]["Mean_EER"].mean()
    
#-------
train_lst = list(mean_EER_train_dict.items())
train_lst.sort(key=lambda i: i[1], reverse=False) #sort ascending as it is an error rate

with open(train_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(train_lst)):
        f.write(f"{i+1}) {train_lst[i]}\n")
        

min_key=train_lst[0][0]
overlap=min_key
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["overlap"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------


mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

In [None]:
EER_df_train_dict[0.01]

In [None]:
with open(test_file_name, "w") as f:
    f.write("\n" + "-"*40 + f"{time_of_execution}" + "-"*40 + "\n")
    f.write("\n" + "-"*40 + f"{test_file_name}" + "-"*40 + "\n")
    
min_key=train_lst[0][0]
overlap=min_key
EER_df_test_dict[min_key] = calculate_EER_different_window_sizes_test(dfList_dict, window_size_lst=WINDOW_SIZE_LST, test_set=test_set, exp_config=P, 
                                                                      extract_features_func_dict=EXTRACT_WACA_features_DICT, overlap=OVERLAP, 
                                                                      best_param_df=EER_df_train_dict[min_key])
with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\Top smoothing parameter/s: {min_key}\n")
    f.write(EER_df_test_dict[min_key].to_string())
#-------
#-------
key_column= ["overlap"]
EER_df_test_dict_df=make_raw_exp_df_results(EER_df_test_dict, key_column)
eer_per_window_size_col_df =make_eer_per_window_size_col_df(EER_df_test_dict_df, key_column, window_size_lst=WINDOW_SIZE_LST)

EER_df_test_dict_df.to_json(f'{test_file_name[:-4]}_raw_df.json')
eer_per_window_size_col_df.to_json(f'{test_file_name[:-4]}_eer_per_window_size_col_df.json')
#-------


mean_EER_test_dict={}
for key in EER_df_test_dict:
    mean_EER_test_dict[key] = EER_df_test_dict[key]["Mean_EER"].mean()
    
l = list(mean_EER_test_dict.items())
l.sort(key=lambda i: i[1])

with open(test_file_name, "a") as f:
    f.write("\n" + "-"*22 + "\n")
    f.write(f"\nSorting based on Mean EER among windows\n")
    for i in range(len(l)):
        f.write(f"{i+1}) {l[i]}\n")

In [None]:
overlap=50
overlap*=0.01
max_window_size=2000
step_width = int(max_window_size * (1-overlap))
max_num_windows=len(getIndices(sampleSize=max_window_size, step=step_width, numSamplePoints=P.num_sample_points_per_exp))
n_neighbors_params = np.arange(1, max_num_windows) 
N_NEIGHBORS_PARAMS[-1]+1

In [None]:
list(range(1, 100, 4))

In [None]:
EXP_PATH_NAME
%run ./Classification_utility-functions.ipynb


# Summary

In [None]:

df=return_and_save_final_result_df_as_json(final_exp_results_path=FINAL_EXP_RESULTS_PATH, exp_path_name=EXP_PATH_NAME, 
                                           window_size_lst=WINDOW_SIZE_LST)
df

In [None]:
# np.mean([df[i]["('Window Size', 2000)"] for i in range(3)])

In [None]:
relative_df=return_and_save_final_relative_result_df_as_json(df, base_case_index=0, final_exp_results_path=FINAL_EXP_RESULTS_PATH, exp_path_name=EXP_PATH_NAME, 
                                                             window_size_lst=WINDOW_SIZE_LST)
relative_df

In [None]:
df.columns

In [None]:

df.style.format(STYLER_ERR_FORMAT_DICT).hide(axis='index').to_latex()


In [None]:
relative_df.style.format(STYLER_IMPROVEMENT_FORMAT_DICT).hide(axis='index').to_latex()


## Gini coef

In [None]:
gini_result_df=return_and_save_final_Gini_df_as_json(final_exp_results_path=FINAL_EXP_RESULTS_PATH, exp_path_name=EXP_PATH_NAME, window_size_lst=WINDOW_SIZE_LST)
gini_result_df

In [None]:
# for winsize in WINDOW_SIZE_LST:
#     print(np.mean([gini_result_df[i][('Window Size', winsize)] for i in range(3)]))



In [None]:
return_and_save_final_relative_gini_result_df_as_json(gini_result_df, base_case_index=0, final_exp_results_path=FINAL_EXP_RESULTS_PATH, 
                                                      exp_path_name=EXP_PATH_NAME, window_size_lst=WINDOW_SIZE_LST)