In [141]:
import pandas as pd
import mne as mne
import os 
import time
import numpy as np
import matplotlib.pyplot as plt
import joblib
import constants
from IPython.utils import io
import time
import sys
import yasa
from scipy.signal import welch

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier

from sklearn.metrics import accuracy_score

#Import my modules
import format_eeg_data
import constants
import eeg_stat_ts
import run_expts

#TS Fresh Parameter Settings
from tsfresh.feature_extraction import ComprehensiveFCParameters
from tsfresh.feature_extraction import MinimalFCParameters
from tsfresh.feature_extraction import EfficientFCParameters
from tsfresh.feature_extraction import extract_features

# Set display options to show all rows and columns
pd.set_option('display.max_rows', 50)  # Show rows
pd.set_option('display.max_columns', 160)  # Show columns

In [142]:
data_types = ['Wake','N1', 'N2', 'N3', 'REM']

df_N1 = joblib.load('Data/TS_Fresh_Stats_Efficient_10_min_N1.pkl')
df_Wake = joblib.load('Data/TS_Fresh_Stats_Efficient_10_min_Wake.pkl')

In [143]:
region_channel_dict = constants.region_to_channel_dict
#Extract cols in order to be used for all channels
channel_cols_order = [col.removeprefix('Fp1_') for col in df_Wake.columns if col.startswith('Fp1'+'_')] 

region_dfs = []

for region in region_channel_dict.keys():
    channel_dfs = []
    for channel in region_channel_dict[region]: 
        channel_cols = [channel + '_' + col for col in channel_cols_order]
        region_cols = [region + '_' + col for col in channel_cols_order]

        #Choose the data type
        df = df_Wake[channel_cols].copy()
        # df = df_N1[channel_cols].copy()
        
        df.columns = [region + '_' + col for col in channel_cols_order] 
        channel_dfs.append(df)
    # All regions have this col '__query_similarity_count__query_None__threshold_0.0' as containing NANs    
    
    region_df = sum(channel_dfs)/len(channel_dfs)
    region_df = region_df.dropna(axis = 1)
    region_dfs.append( region_df )

feature_df = pd.concat(region_dfs, axis= 1)

#### Load labels (y data)

In [144]:
data_type = ['Wake', 'N1', 'N2','N3', 'REM'][0]
print(data_type)

Wake


In [145]:
#Change load path to the band power time series folder
load_path = '/user/home/ko20929/work/RBD_using_custom_package/Data/freq_6_second_files/'
y = pd.read_hdf(load_path + data_type +  '_y.h5', key='df', mode='r')
groups = pd.read_hdf(load_path + data_type +  '_groups.h5', key='df', mode='r')

##### Create regional_features_dict

In [146]:
#Generate a region to features dictionary - this will enable us to run expts regionally as before
regional_features_dict = {}
region_channel_dict = constants.region_to_channel_dict
regions = list(region_channel_dict.keys())
for region in regions:
    region_features = [col for col in feature_df.columns if col.startswith(region +'_')]
    regional_features_dict[region] = region_features

#### Create feature selection pipelines

In [147]:
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, chi2, SelectFromModel

In [148]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_classif

In [149]:
clfs_dict_feature_selection =            {
                                           'RF' : Pipeline([('skb', SelectKBest(f_classif, k = 100)), ('RF', RandomForestClassifier(random_state = 5) ) ] ) , 
                                           'DT' : Pipeline([('skb', SelectKBest(f_classif, k = 100)), ('DT', DecisionTreeClassifier(random_state = 5) ) ] ) , 
                                           'Ada_B' : Pipeline([('skb', SelectKBest(f_classif, k = 100)), ('Ada', AdaBoostClassifier(random_state = 5) ) ] )
                                          }

#### Run a test experiment on feature_df

In [121]:
clfs_dict = {'RF' : RandomForestClassifier(random_state = 5) , 'DT' : DecisionTreeClassifier() , 'Ada_B' : AdaBoostClassifier(random_state = 5)}
# clfs_dict = {'DT' : DecisionTreeClassifier()}

In [150]:
X = feature_df.copy() # y and groups already defined G 

In [151]:
t1 = time.time()
expt_num = 2
#1.Generate expt specific X,y,groups
X_expt , y_expt , groups_expt, expt_info = run_expts.generate_expt_x_y_groups(X,y,groups, expt_num )
results_df_regional = run_expts.run_mv_tsc(X_expt,y_expt,groups_expt,  clfs_dict_feature_selection , return_df = True , subset_names_and_cols = regional_features_dict, random_states = [1,2] )
t2 = time.time()

t2-t1

  58  59  60  61  62  63  66  67  68  69  70  71  72  73  74  75  76  77
  78  79  80  81  82  91 104 364 669 670] are constant.
  f = msb / msw
  58  59  60  61  62  63  66  67  68  69  70  71  72  73  74  75  76  77
  78  79  80  81  82  91 104 364 668 669 670] are constant.
  f = msb / msw
  58  59  60  61  62  63  66  67  68  69  70  71  72  73  74  75  76  77
  78  79  80  81  82  91 104 364 669 670] are constant.
  f = msb / msw
  58  59  60  61  62  63  66  67  68  69  70  71  72  73  74  75  76  77
  78  79  80  81  82  91 104 364 669 670] are constant.
  f = msb / msw
  58  59  60  61  62  63  66  67  68  69  70  71  72  73  74  75  76  77
  78  79  80  81  82  91 104 364 669 670] are constant.
  f = msb / msw
  58  59  60  61  62  63  66  67  68  69  70  71  72  73  74  75  76  77
  78  79  80  81  82  91 104 364 669 670] are constant.
  f = msb / msw
  58  59  60  61  62  63  66  67  68  69  70  71  72  73  74  75  76  77
  78  79  80  81  82  91 104 364 668 669 670] are con

25.313249111175537

In [152]:
raw_results_df = results_df_regional.copy()
results_df = run_expts.generate_subset_acc_std(raw_results_df,  return_df = True)
results_df

Unnamed: 0,random_state,Prefrontal_RF_acc,Prefrontal_RF_std,Prefrontal_DT_acc,Prefrontal_DT_std,Prefrontal_Ada_B_acc,Prefrontal_Ada_B_std,Frontal_RF_acc,Frontal_RF_std,Frontal_DT_acc,Frontal_DT_std,Frontal_Ada_B_acc,Frontal_Ada_B_std,Left Frontal_RF_acc,Left Frontal_RF_std,Left Frontal_DT_acc,Left Frontal_DT_std,Left Frontal_Ada_B_acc,Left Frontal_Ada_B_std,Right Frontal_RF_acc,Right Frontal_RF_std,Right Frontal_DT_acc,Right Frontal_DT_std,Right Frontal_Ada_B_acc,Right Frontal_Ada_B_std,Central_RF_acc,Central_RF_std,Central_DT_acc,Central_DT_std,Central_Ada_B_acc,Central_Ada_B_std,Left Central_RF_acc,Left Central_RF_std,Left Central_DT_acc,Left Central_DT_std,Left Central_Ada_B_acc,Left Central_Ada_B_std,Right Central_RF_acc,Right Central_RF_std,Right Central_DT_acc,Right Central_DT_std,Right Central_Ada_B_acc,Right Central_Ada_B_std,Left Temporal_RF_acc,Left Temporal_RF_std,Left Temporal_DT_acc,Left Temporal_DT_std,Left Temporal_Ada_B_acc,Left Temporal_Ada_B_std,Right Temporal_RF_acc,Right Temporal_RF_std,Right Temporal_DT_acc,Right Temporal_DT_std,Right Temporal_Ada_B_acc,Right Temporal_Ada_B_std,Parietal_RF_acc,Parietal_RF_std,Parietal_DT_acc,Parietal_DT_std,Parietal_Ada_B_acc,Parietal_Ada_B_std,Left Parietal_RF_acc,Left Parietal_RF_std,Left Parietal_DT_acc,Left Parietal_DT_std,Left Parietal_Ada_B_acc,Left Parietal_Ada_B_std,Right Parietal_RF_acc,Right Parietal_RF_std,Right Parietal_DT_acc,Right Parietal_DT_std,Right Parietal_Ada_B_acc,Right Parietal_Ada_B_std,Occipital_RF_acc,Occipital_RF_std,Occipital_DT_acc,Occipital_DT_std,Occipital_Ada_B_acc,Occipital_Ada_B_std
0,1,0.714286,0.173499,0.571429,0.173499,0.628571,0.111461,0.414286,0.159479,0.521429,0.23129,0.446429,0.121638,0.685714,0.166215,0.453571,0.258676,0.657143,0.181898,0.614286,0.215946,0.45,0.109731,0.503571,0.115396,0.55,0.076097,0.532143,0.110887,0.610714,0.131901,0.632143,0.091473,0.525,0.067386,0.603571,0.083299,0.496429,0.132865,0.557143,0.114842,0.582143,0.113277,0.560714,0.16115,0.496429,0.13988,0.560714,0.16115,0.55,0.148719,0.728571,0.136183,0.678571,0.148547,0.846429,0.121953,0.6,0.103879,0.671429,0.23217,0.607143,0.145949,0.732143,0.097153,0.707143,0.165446,0.5,0.091054,0.539286,0.259857,0.557143,0.114842,0.635714,0.14357,0.525,0.067386,0.45,0.175473
1,2,0.703571,0.185439,0.557143,0.160277,0.678571,0.117911,0.546429,0.219461,0.65,0.199041,0.632143,0.249387,0.621429,0.198013,0.546429,0.247333,0.621429,0.176199,0.55,0.076097,0.471429,0.034993,0.521429,0.142589,0.5,0.14419,0.607143,0.07053,0.528571,0.086455,0.596429,0.203791,0.525,0.067386,0.592857,0.248311,0.439286,0.16115,0.492857,0.128074,0.521429,0.201208,0.525,0.214167,0.55,0.076097,0.471429,0.125051,0.625,0.14419,0.7,0.217065,0.760714,0.133917,0.678571,0.117911,0.55,0.148719,0.728571,0.181547,0.65,0.205039,0.707143,0.165446,0.657143,0.143481,0.417857,0.113277,0.467857,0.110887,0.389286,0.096097,0.392857,0.132191,0.442857,0.160277,0.442857,0.114842


In [None]:
# Wake Data
# 74.6% best accuracy on wake data across two seeds
# 76.25% best accuracy on Wake data across two seeds with feature selection added (f_classif)

# N1 Data 
# 60% best accuracy on N1 data across two seeds
# 56% best accuracy with feature selection added (f_classif)


In [153]:
results_df.iloc[:,1:].mean(axis=0).max(axis = 0)
#.max(axis = 1)

0.7625

In [139]:
results_df.iloc[:,1:].mean(axis=0)

Prefrontal_RF_acc       0.522222
Prefrontal_RF_std       0.123710
Prefrontal_DT_acc       0.523333
Prefrontal_DT_std       0.086250
Prefrontal_Ada_B_acc    0.502222
                          ...   
Occipital_RF_std        0.231800
Occipital_DT_acc        0.478889
Occipital_DT_std        0.090567
Occipital_Ada_B_acc     0.473333
Occipital_Ada_B_std     0.149921
Length: 78, dtype: float64