# This notebook will just attempt to correct some columns of the generated dataframes from the feature extractor functions

In [38]:
import datetime
import math
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
import requests
import re
import tensorflow as tf

from concurrent.futures import ThreadPoolExecutor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# import and load model architectures as well as decoder
from models.cueva import LSTM_FE
from models.llanes_jurado import LSTM_CNN
from utilities.preprocessors import correct_signals
from utilities.loaders import load_meta_data, concur_load_data, charge_raw_data, _combine_data

from utilities.visualizers import (
    view_time_frame,
    view_wavelet_coeffs,
    analyze,
    data_split_metric_values,
    view_value_frequency,
    multi_class_heatmap,
    view_metric_values,
    view_classified_labels,
    view_label_freq,
    disp_cat_feat,
    plot_all_features,
    describe_col,
    ModelResults,
    view_all_splits_results)

from utilities.feature_extractors import (
    concur_extract_features_from_all,
    extract_features,
    extract_features_hybrid,
    extract_features_per_hour)

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [39]:
train_files = os.listdir('./data/Electrodermal Activity artifact correction BEnchmark (EDABE)/Train/')
train_files

['ahixac_expert1.csv',
 'akakip_expert2.csv',
 'aqamom_expert2.csv',
 'aretez_expert1.csv',
 'asifex_expert2.csv',
 'axeyoh_expert2.csv',
 'efawep_expert2.csv',
 'egemow_expert2.csv',
 'ejofeq_expert2.csv',
 'erecij_expert1.csv',
 'esirur_expert1.csv',
 'ewehov_expert2.csv',
 'exozef_expert2.csv',
 'idagah_expert2.csv',
 'ihikay_expert1.csv',
 'ihinot_expert1.csv',
 'imocac_expert2.csv',
 'iqiyat_expert2.csv',
 'obujoh_expert2.csv',
 'ohayeh_expert1.csv',
 'ohufow_expert1.csv',
 'ojotew_expert1.csv',
 'onivuk_expert1.csv',
 'opunad_expert1.csv',
 'otecab_expert2.csv',
 'otuqom_expert1.csv',
 'owegud_expert2.csv',
 'oxisux_expert1.csv',
 'tchgij_expert2.csv',
 'ufoyek_expert2.csv',
 'uqozew_expert1.csv',
 'urogif_expert1.csv',
 'uzefow_expert1.csv']

In [40]:
test_files = os.listdir('./data/Electrodermal Activity artifact correction BEnchmark (EDABE)/Test/')
test_files

['afegip_expert1.csv',
 'ajeric_expert2.csv',
 'ekamis_expert2.csv',
 'iguted_expert1.csv',
 'inefoh_expert1.csv',
 'otafeh_expert1.csv',
 'oxused_expert2.csv',
 'pqbqpr_expert2.csv',
 'uhepah_expert1.csv',
 'ukudab_expert2.csv']

In [41]:
train_subjects_names = list(set([re.sub(r".csv", "", file) for file in train_files]))
train_subjects_names

['otecab_expert2',
 'uqozew_expert1',
 'uzefow_expert1',
 'asifex_expert2',
 'ejofeq_expert2',
 'idagah_expert2',
 'exozef_expert2',
 'ohufow_expert1',
 'iqiyat_expert2',
 'axeyoh_expert2',
 'ewehov_expert2',
 'obujoh_expert2',
 'opunad_expert1',
 'oxisux_expert1',
 'egemow_expert2',
 'tchgij_expert2',
 'otuqom_expert1',
 'ufoyek_expert2',
 'ihinot_expert1',
 'aqamom_expert2',
 'akakip_expert2',
 'onivuk_expert1',
 'ihikay_expert1',
 'ojotew_expert1',
 'urogif_expert1',
 'imocac_expert2',
 'efawep_expert2',
 'erecij_expert1',
 'owegud_expert2',
 'esirur_expert1',
 'ohayeh_expert1',
 'aretez_expert1',
 'ahixac_expert1']

In [42]:
test_subjects_names = list(set([re.sub(r".csv", "", file) for file in test_files]))
test_subjects_names

['inefoh_expert1',
 'pqbqpr_expert2',
 'iguted_expert1',
 'uhepah_expert1',
 'afegip_expert1',
 'ekamis_expert2',
 'ajeric_expert2',
 'otafeh_expert1',
 'oxused_expert2',
 'ukudab_expert2']

In [43]:
ahixac_lof = pd.read_csv(f'./data/Hybrid Artifact Detection Data/train/ahixac_expert1_lof.csv', index_col=0)
ahixac_lof

Unnamed: 0,raw_128hz_max,raw_128hz_min,raw_128hz_amp,raw_128hz_median,raw_128hz_std,raw_128hz_range,raw_128hz_shannon_entropy,raw_128hz_1d_max,raw_128hz_1d_min,raw_128hz_1d_amp,...,first_4thofa_sec_mean,second_4thofa_sec_mean,first_4thofa_sec_std,second_4thofa_sec_std,first_4thofa_sec_median,second_4thofa_sec_median,first_4thofa_sec_range,second_4thofa_sec_range,first_4thofa_sec_n_coeffs_above_zero,second_4thofa_sec_n_coeffs_above_zero
0,0.000222,0.000000,0.000212,0.000222,4.692443e-05,0.000222,0.189209,0.000111,0.000000,0.000106,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
1,0.000222,0.000222,0.000222,0.000222,5.421011e-20,0.000000,0.000000,0.000111,0.000111,0.000111,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
2,0.000222,0.000222,0.000222,0.000222,5.421011e-20,0.000000,0.000000,0.000111,0.000111,0.000111,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
3,0.000222,0.000222,0.000222,0.000222,5.421011e-20,0.000000,0.000000,0.000111,0.000111,0.000111,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
4,0.001998,0.000222,0.000232,0.000222,1.048551e-04,0.001776,0.074234,0.000999,0.000111,0.000116,...,0.035355,0.025000,0.158114,0.111803,0.000000,0.000000,0.707107,0.500000,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12998,2.708425,2.415385,2.517207,2.523077,6.941735e-02,0.293040,4.844371,1.353480,1.207692,1.258372,...,0.022097,0.040030,0.013441,0.030891,0.016836,0.035714,0.042090,0.104167,20.0,19.0
12999,2.726007,2.415385,2.539657,2.535531,8.540504e-02,0.310623,4.926130,1.363004,1.207692,1.269694,...,0.020093,0.035109,0.012448,0.029095,0.015456,0.028689,0.038640,0.095628,20.0,19.0
13000,2.733333,2.415385,2.565998,2.546520,8.968053e-02,0.317949,4.966927,1.366667,1.207692,1.283035,...,0.018818,0.033333,0.012547,0.029336,0.015207,0.028226,0.038016,0.094086,20.0,19.0
13001,2.733333,2.427106,2.589709,2.563370,8.095927e-02,0.306227,4.938119,1.366667,1.213553,1.294951,...,0.026159,0.043353,0.028361,0.032618,0.016349,0.037572,0.130794,0.098266,20.0,20.0


In [44]:
ahixac_lof['raw_128hz_skewness.1']

0         16.382514
1          0.000000
2          0.000000
3          0.000000
4        179.086260
            ...    
12998     -0.334008
12999     -0.734671
13000     -1.046450
13001     -1.266623
13002     -1.454367
Name: raw_128hz_skewness.1, Length: 13003, dtype: float64

In [45]:
ahixac_lof['filt_16hz_skewness.1']

0        0.815185
1        6.790413
2        9.458407
3        8.176298
4        9.333589
           ...   
12998   -1.189077
12999   -0.243388
13000   -0.592488
13001   -1.006455
13002   -1.226658
Name: filt_16hz_skewness.1, Length: 13003, dtype: float64

In [46]:
ren_ahixac_lof = ahixac_lof.rename(columns={
        'raw_128hz_skewness.1': 'raw_128hz_kurt',
        'filt_128hz_skewness.1': 'filt_128hz_kurt',
        'raw_16hz_skewness.1': 'raw_16hz_kurt',
        'filt_16hz_skewness.1': 'filt_16hz_kurt',
})

In [47]:
ren_ahixac_lof['raw_128hz_kurt']

0         16.382514
1          0.000000
2          0.000000
3          0.000000
4        179.086260
            ...    
12998     -0.334008
12999     -0.734671
13000     -1.046450
13001     -1.266623
13002     -1.454367
Name: raw_128hz_kurt, Length: 13003, dtype: float64

In [48]:
for index, train_subject_name in enumerate(train_subjects_names):
    print(f'subject: {train_subject_name}')

    # save both lstm features and lstm labels
    train_subject_lof = pd.read_csv(f'./data/Hybrid Artifact Detection Data/train/{train_subject_name}_lof.csv', index_col=0)
    train_subject_lof.rename(columns={
        'raw_128hz_skewness.1': 'raw_128hz_kurt',
        'filt_128hz_skewness.1': 'filt_128hz_kurt',
        'raw_16hz_skewness.1': 'raw_16hz_kurt',
        'filt_16hz_skewness.1': 'filt_16hz_kurt',
    }, inplace=True)
    train_subject_lof.to_csv(f'./data/Hybrid Artifact Detection Data/train/{train_subject_name}_lof.csv')

subject: otecab_expert2
subject: uqozew_expert1
subject: uzefow_expert1
subject: asifex_expert2
subject: ejofeq_expert2
subject: idagah_expert2
subject: exozef_expert2
subject: ohufow_expert1
subject: iqiyat_expert2
subject: axeyoh_expert2
subject: ewehov_expert2
subject: obujoh_expert2
subject: opunad_expert1
subject: oxisux_expert1
subject: egemow_expert2
subject: tchgij_expert2
subject: otuqom_expert1
subject: ufoyek_expert2
subject: ihinot_expert1
subject: aqamom_expert2
subject: akakip_expert2
subject: onivuk_expert1
subject: ihikay_expert1
subject: ojotew_expert1
subject: urogif_expert1
subject: imocac_expert2
subject: efawep_expert2
subject: erecij_expert1
subject: owegud_expert2
subject: esirur_expert1
subject: ohayeh_expert1
subject: aretez_expert1
subject: ahixac_expert1


In [49]:
for index, test_subject_name in enumerate(test_subjects_names):
    print(f'subject: {test_subject_name}')

    # save both lstm features and lstm labels
    test_subject_lof = pd.read_csv(f'./data/Hybrid Artifact Detection Data/test/{test_subject_name}_lof.csv', index_col=0)
    test_subject_lof.rename(columns={
        'raw_128hz_skewness.1': 'raw_128hz_kurt',
        'filt_128hz_skewness.1': 'filt_128hz_kurt',
        'raw_16hz_skewness.1': 'raw_16hz_kurt',
        'filt_16hz_skewness.1': 'filt_16hz_kurt',
    }, inplace=True)
    test_subject_lof.to_csv(f'./data/Hybrid Artifact Detection Data/test/{test_subject_name}_lof.csv')

subject: inefoh_expert1
subject: pqbqpr_expert2
subject: iguted_expert1
subject: uhepah_expert1
subject: afegip_expert1
subject: ekamis_expert2
subject: ajeric_expert2
subject: otafeh_expert1
subject: oxused_expert2
subject: ukudab_expert2
