### Combining Feature Sets  
  
This notebook will be dedicated to further feature evaluation. The goal is to combine feature sets to get an idea of how each if affecting performance and then to reduce dimensions by averaging channels and selecting the most important features from whatever features remain.  
  
I will also be trying PCA with some of these feature sets (especially the wavelet feature sets), but this will be done in a separate notebook.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import fastparquet, pyarrow
import mne
from mne.decoding import Scaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, f1_score, accuracy_score, precision_score, recall_score
from utils import *

In [2]:
mne.set_log_level('WARNING')

In [3]:
df = pd.read_csv('by_patient.csv')

In [4]:
other_df = activity_df(df, 'Other', 'expert_consensus')
seizure_df = activity_df(df, 'Seizure', 'expert_consensus')
gpd_df = activity_df(df, 'GPD', 'expert_consensus')
lpd_df = activity_df(df, 'LPD', 'expert_consensus')
grda_df = activity_df(df, 'GRDA', 'expert_consensus')
lrda_df = activity_df(df, 'LRDA', 'expert_consensus')
activity_df_list = [other_df, seizure_df, gpd_df, lpd_df, grda_df, lrda_df]

In [5]:
#other_indexes = get_indexes(other_df, 2000)
#seizure_indexes = get_indexes(seizure_df, 2000)
#gpd_indexes = get_indexes(gpd_df, 2000)
#lpd_indexes = get_indexes(lpd_df, 2000)
#grda_indexes = get_indexes(grda_df, 2000)
#lrda_indexes = get_indexes(lrda_df, 2000)

In [6]:
#activity_indexes = [other_indexes, seizure_indexes, gpd_indexes, lpd_indexes,
#                   grda_indexes, lrda_indexes]

In [11]:
#activity_indexes = pd.DataFrame(activity_indexes, index = ['Other', 'Seizure', 'GPD', 'LPD', 'GRDA', 'LRDA']).transpose()

In [12]:
#activity_indexes.to_csv('activity_indexes.csv', index = None)

In [6]:
activity_indexes = pd.read_csv('activity_indexes.csv')

In [7]:
index_lists = [activity_indexes[col] for col in activity_indexes.columns]

In [8]:
y = get_yvals(2000)['activity']

### Band Power Features

In [19]:
#band_pow_df = full_band_df(activity_df_list, index_lists, 1, None, bandpass = True,
#                          notch = False, reref = True)
#band_pow_df.to_csv('band_pow_df.csv', index = None)

In [22]:
X = pd.read_csv('band_pow_df.csv')

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [23]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power Training')
print(classification_report(y_train, training_yhat))
print('Band Power Testing')
print(classification_report(y_test, testing_yhat))
band_power_acc = accuracy_score(y_test, testing_yhat)
band_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
band_power_f1 = f1_score(y_test, testing_yhat, average = None)
band_power_prec = precision_score(y_test, testing_yhat, average = None)
band_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power Training
              precision    recall  f1-score   support

         GPD       0.83      0.94      0.88      1794
        GRDA       0.79      0.89      0.84      1819
         LPD       0.78      0.89      0.83      1800
        LRDA       0.84      0.90      0.87      1795
       Other       0.80      0.61      0.69      1808
     Seizure       0.91      0.69      0.78      1784

    accuracy                           0.82     10800
   macro avg       0.82      0.82      0.82     10800
weighted avg       0.82      0.82      0.82     10800

Band Power Testing
              precision    recall  f1-score   support

         GPD       0.78      0.89      0.83       206
        GRDA       0.64      0.78      0.71       181
         LPD       0.69      0.81      0.75       200
        LRDA       0.74      0.81      0.78       205
       Other       0.54      0.44      0.49       192
     Seizure       0.82      0.50      0.62       216

    accuracy                          

### Adding Hjorth Complexity to Band Power Set  
  
**Time-Domain**

In [25]:
#time_comp_df = full_complexity_df(activity_df_list, index_lists, 1, None,
#                                 bandpass = True, notch = False, reref = True, spectral = False)
#time_comp_df.to_csv('time_comp_df.csv', index = None)

In [26]:
X1 = pd.read_csv('band_pow_df.csv')
X2 = pd.read_csv('time_comp_df.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [27]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power and Hjorth Complexity (Time) Training')
print(classification_report(y_train, training_yhat))
print('Band Power and Hjorth Complexity (Time) Testing')
print(classification_report(y_test, testing_yhat))
comp_band_power_acc = accuracy_score(y_test, testing_yhat)
comp_band_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
comp_band_power_f1 = f1_score(y_test, testing_yhat, average = None)
comp_band_power_prec = precision_score(y_test, testing_yhat, average = None)
comp_band_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power and Hjorth Complexity (Time) Training
              precision    recall  f1-score   support

         GPD       0.83      0.94      0.88      1794
        GRDA       0.80      0.88      0.84      1819
         LPD       0.77      0.90      0.83      1800
        LRDA       0.84      0.90      0.87      1795
       Other       0.80      0.62      0.70      1808
     Seizure       0.90      0.69      0.78      1784

    accuracy                           0.82     10800
   macro avg       0.82      0.82      0.82     10800
weighted avg       0.82      0.82      0.82     10800

Band Power and Hjorth Complexity (Time) Testing
              precision    recall  f1-score   support

         GPD       0.81      0.89      0.85       206
        GRDA       0.65      0.80      0.72       181
         LPD       0.68      0.82      0.74       200
        LRDA       0.76      0.84      0.80       205
       Other       0.56      0.42      0.48       192
     Seizure       0.81      0.52  

In [28]:
precisions = pd.DataFrame(band_power_prec, columns = ['Band Power'])
precisions['Power and Complexity'] = comp_band_power_prec

recalls = pd.DataFrame(band_power_rec, columns = ['Band Power'])
recalls['Power and Complexity'] = comp_band_power_rec

f1_scores = pd.DataFrame(band_power_f1, columns = ['Band Power'])
f1_scores['Power and Complexity'] = comp_band_power_f1

### Notes on Results  
  
Essentially no change in results by adding the time-domain complexity data. That doesn't mean there isn't useful data in that feature set, but more work will need to be done in terms of dimensionality reduction and feature selection in order to see improvement in results.

In [29]:
precisions.round(decimals = 3)

Unnamed: 0,Band Power,Power and Complexity
0,0.779,0.806
1,0.643,0.647
2,0.694,0.679
3,0.744,0.764
4,0.545,0.559
5,0.82,0.812


In [30]:
recalls.round(decimals = 3)

Unnamed: 0,Band Power,Power and Complexity
0,0.888,0.888
1,0.785,0.801
2,0.805,0.825
3,0.81,0.839
4,0.443,0.417
5,0.505,0.519


In [31]:
f1_scores.round(decimals = 3)

Unnamed: 0,Band Power,Power and Complexity
0,0.83,0.845
1,0.706,0.716
2,0.745,0.745
3,0.776,0.8
4,0.489,0.478
5,0.625,0.633


**Frequency-Domain**

In [32]:
#freq_comp_df = full_complexity_df(activity_df_list, index_lists, 1, None,
#                                 bandpass = True, notch = False, reref = True, spectral = True)
#freq_comp_df.to_csv('freq_comp_df.csv', index = None)

In [33]:
X1 = pd.read_csv('band_pow_df.csv')
X2 = pd.read_csv('freq_comp_df.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [34]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power and Hjorth Complexity (Frequency) Training')
print(classification_report(y_train, training_yhat))
print('Band Power and Hjorth Complexity (Frequency) Testing')
print(classification_report(y_test, testing_yhat))
comp2_band_power_acc = accuracy_score(y_test, testing_yhat)
comp2_band_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
comp2_band_power_f1 = f1_score(y_test, testing_yhat, average = None)
comp2_band_power_prec = precision_score(y_test, testing_yhat, average = None)
comp2_band_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power and Hjorth Complexity (Frequency) Training
              precision    recall  f1-score   support

         GPD       0.82      0.94      0.88      1794
        GRDA       0.79      0.88      0.83      1819
         LPD       0.78      0.89      0.83      1800
        LRDA       0.84      0.90      0.87      1795
       Other       0.81      0.62      0.70      1808
     Seizure       0.90      0.69      0.78      1784

    accuracy                           0.82     10800
   macro avg       0.82      0.82      0.82     10800
weighted avg       0.82      0.82      0.82     10800

Band Power and Hjorth Complexity (Frequency) Testing
              precision    recall  f1-score   support

         GPD       0.77      0.88      0.82       206
        GRDA       0.69      0.80      0.74       181
         LPD       0.70      0.80      0.75       200
        LRDA       0.75      0.82      0.78       205
       Other       0.56      0.46      0.51       192
     Seizure       0.81  

In [35]:
precisions['Power and Spectral Complexity'] = comp2_band_power_prec
recalls['Power and Spectral Complexity'] = comp2_band_power_rec
f1_scores['Power and Spectral Complexity'] = comp2_band_power_f1

### Higuchi

In [36]:
#higuchi_fd_set = full_higuchi_df(activity_df_list, index_lists, 1, None, bandpass = True, 
#                                notch = False, reref = True)
#higuchi_fd_set.to_csv('higuchi_fd_set.csv', index = None)

In [37]:
X1 = pd.read_csv('band_pow_df.csv')
X2 = pd.read_csv('higuchi_fd_set.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [38]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power and Higuchi Fractal Dimension Training')
print(classification_report(y_train, training_yhat))
print('Band Power and Higuchi Fractal Dimension Testing')
print(classification_report(y_test, testing_yhat))
higuchi_power_acc = accuracy_score(y_test, testing_yhat)
higuchi_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
higuchi_power_f1 = f1_score(y_test, testing_yhat, average = None)
higuchi_power_prec = precision_score(y_test, testing_yhat, average = None)
higuchi_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power and Higuchi Fractal Dimension Training
              precision    recall  f1-score   support

         GPD       0.84      0.94      0.88      1794
        GRDA       0.80      0.88      0.84      1819
         LPD       0.77      0.90      0.83      1800
        LRDA       0.84      0.90      0.87      1795
       Other       0.80      0.62      0.70      1808
     Seizure       0.89      0.69      0.78      1784

    accuracy                           0.82     10800
   macro avg       0.82      0.82      0.82     10800
weighted avg       0.82      0.82      0.82     10800

Band Power and Higuchi Fractal Dimension Testing
              precision    recall  f1-score   support

         GPD       0.80      0.91      0.85       206
        GRDA       0.65      0.80      0.72       181
         LPD       0.68      0.81      0.74       200
        LRDA       0.75      0.80      0.77       205
       Other       0.56      0.41      0.47       192
     Seizure       0.81      0.54

In [39]:
precisions['Power and Higuchi'] = higuchi_power_prec
recalls['Power and Higuchi'] = higuchi_power_rec
f1_scores['Power and Higuchi'] = higuchi_power_f1

### Katz

In [41]:
#katz_fd_set = full_katz_df(activity_df_list, index_lists, 1, None, bandpass = True, 
#                                notch = False, reref = True)
#katz_fd_set.to_csv('katz_fd_set.csv', index = None)

In [42]:
X1 = pd.read_csv('band_pow_df.csv')
X2 = pd.read_csv('katz_fd_set.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [43]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power and Katz Fractal Dimension Training')
print(classification_report(y_train, training_yhat))
print('Band Power and Katz Fractal Dimension Testing')
print(classification_report(y_test, testing_yhat))
katz_power_acc = accuracy_score(y_test, testing_yhat)
katz_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
katz_power_f1 = f1_score(y_test, testing_yhat, average = None)
katz_power_prec = precision_score(y_test, testing_yhat, average = None)
katz_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power and Katz Fractal Dimension Training
              precision    recall  f1-score   support

         GPD       0.83      0.94      0.88      1794
        GRDA       0.79      0.88      0.84      1819
         LPD       0.77      0.90      0.83      1800
        LRDA       0.84      0.89      0.86      1795
       Other       0.79      0.61      0.69      1808
     Seizure       0.91      0.69      0.78      1784

    accuracy                           0.82     10800
   macro avg       0.82      0.82      0.81     10800
weighted avg       0.82      0.82      0.81     10800

Band Power and Katz Fractal Dimension Testing
              precision    recall  f1-score   support

         GPD       0.78      0.88      0.83       206
        GRDA       0.64      0.79      0.71       181
         LPD       0.68      0.81      0.74       200
        LRDA       0.73      0.80      0.77       205
       Other       0.54      0.42      0.47       192
     Seizure       0.83      0.50      

In [44]:
precisions['Power and Katz'] = katz_power_prec
recalls['Power and Katz'] = katz_power_rec
f1_scores['Power and Katz'] = katz_power_f1

### Wavelet Coef Energy

In [46]:
#coef_energy_df = full_coef_energy_df('db4', activity_df_list, index_lists, 1, None, bandpass = True, 
#                                notch = False, reref = True)
#coef_energy_df.to_csv('coef_energy_df.csv', index = None)

In [47]:
X1 = pd.read_csv('band_pow_df.csv')
X2 = pd.read_csv('coef_energy_df.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [48]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power and Wavelet Coef Energy Training')
print(classification_report(y_train, training_yhat))
print('Band Power and Wavelet Coef Energy Testing')
print(classification_report(y_test, testing_yhat))
coef_energy_power_acc = accuracy_score(y_test, testing_yhat)
coef_energy_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
coef_energy_power_f1 = f1_score(y_test, testing_yhat, average = None)
coef_energy_power_prec = precision_score(y_test, testing_yhat, average = None)
coef_energy_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power and Wavelet Coef Energy Training
              precision    recall  f1-score   support

         GPD       0.84      0.94      0.88      1794
        GRDA       0.79      0.88      0.83      1819
         LPD       0.78      0.90      0.84      1800
        LRDA       0.83      0.89      0.86      1795
       Other       0.78      0.64      0.70      1808
     Seizure       0.92      0.66      0.77      1784

    accuracy                           0.82     10800
   macro avg       0.82      0.82      0.81     10800
weighted avg       0.82      0.82      0.81     10800

Band Power and Wavelet Coef Energy Testing
              precision    recall  f1-score   support

         GPD       0.75      0.89      0.82       206
        GRDA       0.66      0.76      0.70       181
         LPD       0.68      0.83      0.75       200
        LRDA       0.77      0.80      0.79       205
       Other       0.56      0.45      0.50       192
     Seizure       0.84      0.51      0.64  

In [49]:
precisions['Power and Coef Energy'] = coef_energy_power_prec
recalls['Power and Coef Energy'] = coef_energy_power_rec
f1_scores['Power and Coef Energy'] = coef_energy_power_f1

### Teager-Kaiser Energy

In [50]:
#tk_energy_df = full_tk_energy_df('db4', activity_df_list, index_lists, 1, None, bandpass = True, 
#                                notch = False, reref = True)
#tk_energy_df.to_csv('tk_energy_df.csv', index = None)

In [51]:
X1 = pd.read_csv('band_pow_df.csv')
X2 = pd.read_csv('tk_energy_df.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [52]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power and Teager-Kaiser Energy Training')
print(classification_report(y_train, training_yhat))
print('Band Power and Teager-Kaiser Energy Testing')
print(classification_report(y_test, testing_yhat))
tk_energy_power_acc = accuracy_score(y_test, testing_yhat)
tk_energy_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
tk_energy_power_f1 = f1_score(y_test, testing_yhat, average = None)
tk_energy_power_prec = precision_score(y_test, testing_yhat, average = None)
tk_energy_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power and Teager-Kaiser Energy Training
              precision    recall  f1-score   support

         GPD       0.84      0.94      0.89      1794
        GRDA       0.78      0.89      0.83      1819
         LPD       0.78      0.88      0.83      1800
        LRDA       0.82      0.91      0.86      1795
       Other       0.79      0.63      0.70      1808
     Seizure       0.90      0.65      0.76      1784

    accuracy                           0.81     10800
   macro avg       0.82      0.81      0.81     10800
weighted avg       0.82      0.81      0.81     10800

Band Power and Teager-Kaiser Energy Testing
              precision    recall  f1-score   support

         GPD       0.73      0.88      0.80       206
        GRDA       0.63      0.76      0.69       181
         LPD       0.67      0.81      0.73       200
        LRDA       0.75      0.79      0.77       205
       Other       0.57      0.41      0.47       192
     Seizure       0.83      0.53      0.64

In [53]:
precisions['Power and TK Energy'] = tk_energy_power_prec
recalls['Power and TK Energy'] = tk_energy_power_rec
f1_scores['Power and TK Energy'] = tk_energy_power_f1

### Zero Crossings

In [54]:
#zero_xing_df = full_zero_xing_df(activity_df_list, index_lists, 1, None, bandpass = True, 
#                                notch = False, reref = True)
#zero_xing_df.to_csv('zero_xing_df.csv', index = None)

In [55]:
X1 = pd.read_csv('band_pow_df.csv')
X2 = pd.read_csv('zero_xing_df.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [56]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power and Zero Crossings Training')
print(classification_report(y_train, training_yhat))
print('Band Power and Zero Crossings Testing')
print(classification_report(y_test, testing_yhat))
zero_xing_power_acc = accuracy_score(y_test, testing_yhat)
zero_xing_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
zero_xing_power_f1 = f1_score(y_test, testing_yhat, average = None)
zero_xing_power_prec = precision_score(y_test, testing_yhat, average = None)
zero_xing_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power and Zero Crossings Training
              precision    recall  f1-score   support

         GPD       0.83      0.93      0.88      1794
        GRDA       0.79      0.89      0.84      1819
         LPD       0.78      0.90      0.83      1800
        LRDA       0.84      0.90      0.87      1795
       Other       0.79      0.61      0.69      1808
     Seizure       0.90      0.70      0.78      1784

    accuracy                           0.82     10800
   macro avg       0.82      0.82      0.82     10800
weighted avg       0.82      0.82      0.82     10800

Band Power and Zero Crossings Testing
              precision    recall  f1-score   support

         GPD       0.79      0.91      0.84       206
        GRDA       0.68      0.79      0.73       181
         LPD       0.68      0.82      0.74       200
        LRDA       0.77      0.83      0.80       205
       Other       0.55      0.41      0.47       192
     Seizure       0.82      0.56      0.67       216



In [57]:
precisions['Power and Zero Crossings'] = zero_xing_power_prec
recalls['Power and Zero Crossings'] = zero_xing_power_rec
f1_scores['Power and Zero Crossings'] = zero_xing_power_f1

### Line Length

In [64]:
#line_length_df = full_linelength_df(activity_df_list, index_lists, 1, None, bandpass = True, 
#                                notch = False, reref = True)
#line_length_df.to_csv('line_length_df.csv', index = None)

In [65]:
X1 = pd.read_csv('band_pow_df.csv')
X2 = pd.read_csv('line_length_df.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [66]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power and Line Length Training')
print(classification_report(y_train, training_yhat))
print('Band Power and Line Length Testing')
print(classification_report(y_test, testing_yhat))
line_length_power_acc = accuracy_score(y_test, testing_yhat)
line_length_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
line_length_power_f1 = f1_score(y_test, testing_yhat, average = None)
line_length_power_prec = precision_score(y_test, testing_yhat, average = None)
line_length_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power and Line Length Training
              precision    recall  f1-score   support

         GPD       0.83      0.94      0.88      1794
        GRDA       0.80      0.88      0.84      1819
         LPD       0.76      0.91      0.83      1800
        LRDA       0.85      0.89      0.87      1795
       Other       0.81      0.62      0.70      1808
     Seizure       0.91      0.69      0.79      1784

    accuracy                           0.82     10800
   macro avg       0.83      0.82      0.82     10800
weighted avg       0.83      0.82      0.82     10800

Band Power and Line Length Testing
              precision    recall  f1-score   support

         GPD       0.79      0.89      0.84       206
        GRDA       0.68      0.77      0.73       181
         LPD       0.69      0.81      0.74       200
        LRDA       0.73      0.80      0.77       205
       Other       0.55      0.45      0.50       192
     Seizure       0.84      0.55      0.66       216

    ac

In [67]:
precisions['Power and Line Length'] = line_length_power_prec
recalls['Power and Line Length'] = line_length_power_rec
f1_scores['Power and Line Length'] = line_length_power_f1

### Spectral Slope

In [68]:
#spectslope_df = full_spect_slope_df(activity_df_list, index_lists, 1, None, bandpass = True, 
#                                notch = False, reref = True)
#spectslope_df.to_csv('spectslope_df.csv', index = None)

In [69]:
X1 = pd.read_csv('band_pow_df.csv')
X2 = pd.read_csv('spectslope_df.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [70]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power and Spectral Slope Training')
print(classification_report(y_train, training_yhat))
print('Band Power and Spectral Slope Testing')
print(classification_report(y_test, testing_yhat))
spectslope_power_acc = accuracy_score(y_test, testing_yhat)
spectslope_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
spectslope_power_f1 = f1_score(y_test, testing_yhat, average = None)
spectslope_power_prec = precision_score(y_test, testing_yhat, average = None)
spectslope_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power and Spectral Slope Training
              precision    recall  f1-score   support

         GPD       0.84      0.94      0.89      1794
        GRDA       0.78      0.88      0.83      1819
         LPD       0.79      0.89      0.83      1800
        LRDA       0.84      0.89      0.86      1795
       Other       0.80      0.62      0.70      1808
     Seizure       0.90      0.70      0.79      1784

    accuracy                           0.82     10800
   macro avg       0.83      0.82      0.82     10800
weighted avg       0.82      0.82      0.82     10800

Band Power and Spectral Slope Testing
              precision    recall  f1-score   support

         GPD       0.78      0.88      0.83       206
        GRDA       0.66      0.76      0.71       181
         LPD       0.66      0.80      0.73       200
        LRDA       0.70      0.81      0.75       205
       Other       0.55      0.42      0.48       192
     Seizure       0.81      0.50      0.62       216



In [71]:
precisions['Power and Spectral Slope'] = spectslope_power_prec
recalls['Power and Spectral Slope'] = spectslope_power_rec
f1_scores['Power and Spectral Slope'] = spectslope_power_f1

### Hjorth Mobility: Time

In [72]:
#time_mob_df = full_mobility_df(activity_df_list, index_lists, 1, None, bandpass = True, notch = False, 
#                              reref = True, spectral = False)
#time_mob_df.to_csv('time_mob_df.csv', index = True)

In [73]:
X1 = pd.read_csv('band_pow_df.csv')
X2 = pd.read_csv('time_mob_df.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

### Notes on Results  
  
This is a big shift from the trend above. Rather than little to no effect, testing accuracy has increased to 78%. I'm not sure why and need to check the code for this function as well as the resulting feature data to make sure something else isn't going on.

In [74]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power and Hjorth Mobility Time Training')
print(classification_report(y_train, training_yhat))
print('Band Power and Hjorth Mobility Time Testing')
print(classification_report(y_test, testing_yhat))
time_mob_power_acc = accuracy_score(y_test, testing_yhat)
time_mob_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
time_mob_power_f1 = f1_score(y_test, testing_yhat, average = None)
time_mob_power_prec = precision_score(y_test, testing_yhat, average = None)
time_mob_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power and Hjorth Mobility Time Training
              precision    recall  f1-score   support

         GPD       0.85      0.95      0.89      1794
        GRDA       0.87      0.91      0.89      1819
         LPD       0.83      0.91      0.87      1800
        LRDA       0.92      0.91      0.92      1795
       Other       0.85      0.79      0.82      1808
     Seizure       0.90      0.73      0.81      1784

    accuracy                           0.87     10800
   macro avg       0.87      0.87      0.87     10800
weighted avg       0.87      0.87      0.87     10800

Band Power and Hjorth Mobility Time Testing
              precision    recall  f1-score   support

         GPD       0.80      0.91      0.85       206
        GRDA       0.78      0.86      0.82       181
         LPD       0.71      0.85      0.78       200
        LRDA       0.86      0.83      0.85       205
       Other       0.68      0.63      0.66       192
     Seizure       0.84      0.58      0.68

In [75]:
precisions['Power and Time Mobility'] = time_mob_power_prec
recalls['Power and Time Mobility'] = time_mob_power_rec
f1_scores['Power and Time Mobility'] = time_mob_power_f1

### Hjorth Mobility: Frequency

In [77]:
#spectral_mob_df = full_mobility_df(activity_df_list, index_lists, 1, None, bandpass = True, notch = False, 
#                              reref = True, spectral = True)
#spectral_mob_df.to_csv('spectral_mob_df.csv', index = True)

In [78]:
X1 = pd.read_csv('band_pow_df.csv')
X2 = pd.read_csv('spectral_mob_df.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

### Hjorth Mobility Results  
  
The Hjorth mobility features both do significantly better than the other features when combined with the band power features. I'm not sure why that is, but it's true for both Hjorth mobility in the time-domain and in the frequency-domain. I don't really know what to do with this information other than prioritizing these feature types going forward.

In [79]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Band Power and Spectral Mobility Training')
print(classification_report(y_train, training_yhat))
print('Band Power and Spectral Mobility Testing')
print(classification_report(y_test, testing_yhat))
spectral_mob_power_acc = accuracy_score(y_test, testing_yhat)
spectral_mob_power_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
spectral_mob_power_f1 = f1_score(y_test, testing_yhat, average = None)
spectral_mob_power_prec = precision_score(y_test, testing_yhat, average = None)
spectral_mob_power_rec = recall_score(y_test, testing_yhat, average = None)

Band Power and Spectral Mobility Training
              precision    recall  f1-score   support

         GPD       0.84      0.95      0.89      1794
        GRDA       0.88      0.91      0.90      1819
         LPD       0.82      0.92      0.87      1800
        LRDA       0.92      0.92      0.92      1795
       Other       0.86      0.80      0.83      1808
     Seizure       0.91      0.73      0.81      1784

    accuracy                           0.87     10800
   macro avg       0.87      0.87      0.87     10800
weighted avg       0.87      0.87      0.87     10800

Band Power and Spectral Mobility Testing
              precision    recall  f1-score   support

         GPD       0.78      0.90      0.84       206
        GRDA       0.79      0.87      0.83       181
         LPD       0.73      0.84      0.78       200
        LRDA       0.87      0.85      0.86       205
       Other       0.70      0.67      0.69       192
     Seizure       0.86      0.59      0.70      

In [80]:
precisions['Power and Spectral Mobility'] = spectral_mob_power_prec
recalls['Power and Spectral Mobility'] = spectral_mob_power_rec
f1_scores['Power and Spectral Mobility'] = spectral_mob_power_f1

### Hjorth Altogether

In [9]:
X1 = pd.read_csv('time_mob_df.csv')
X2 = pd.read_csv('spectral_mob_df.csv')
X3 = pd.read_csv('time_comp_df.csv')
X4 = pd.read_csv('freq_comp_df.csv')
cols1, cols2, cols3, cols4 = X1.columns, X2.columns, X3.columns, X4.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

scaler3 = StandardScaler()
X3 = scaler3.fit_transform(X3)
X3 = pd.DataFrame(X3, columns = cols3)

scaler4 = StandardScaler()
X4 = scaler4.fit_transform(X4)
X4 = pd.DataFrame(X4, columns = cols4)

X = pd.concat([X1, X2], axis = 1)
X = pd.concat([X, X3], axis = 1)
X = pd.concat([X, X4], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

### Notes on Results  
  
The Hjorth feature set has 76 columns. For frequency band power, it was 95 because there were 5 frequency band powers and 19 EEG channels. Here, there are two complexity and two mobility numbers and 19 EEG channels. Lower dimensionality and much better results. This may be the feature type to prioritize going forward.

In [10]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Hjorth Altogether Training')
print(classification_report(y_train, training_yhat))
print('Hjorth Altogether Testing')
print(classification_report(y_test, testing_yhat))
hjorth_acc = accuracy_score(y_test, testing_yhat)
hjorth_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
hjorth_f1 = f1_score(y_test, testing_yhat, average = None)
hjorth_prec = precision_score(y_test, testing_yhat, average = None)
hjorth_rec = recall_score(y_test, testing_yhat, average = None)

Hjorth Altogether Training
              precision    recall  f1-score   support

         GPD       0.85      0.95      0.89      1794
        GRDA       0.90      0.90      0.90      1819
         LPD       0.88      0.91      0.90      1800
        LRDA       0.95      0.93      0.94      1795
       Other       0.91      0.81      0.86      1808
     Seizure       0.85      0.84      0.85      1784

    accuracy                           0.89     10800
   macro avg       0.89      0.89      0.89     10800
weighted avg       0.89      0.89      0.89     10800

Hjorth Altogether Testing
              precision    recall  f1-score   support

         GPD       0.79      0.89      0.84       206
        GRDA       0.82      0.83      0.82       181
         LPD       0.82      0.84      0.83       200
        LRDA       0.93      0.92      0.92       205
       Other       0.81      0.69      0.75       192
     Seizure       0.75      0.73      0.74       216

    accuracy            

### Wavelet Altogether

In [11]:
X1 = pd.read_csv('coef_energy_df.csv')
X2 = pd.read_csv('tk_energy_df.csv')
cols1, cols2 = X1.columns, X2.columns

scaler1 = StandardScaler()
X1 = scaler1.fit_transform(X1)
X1 = pd.DataFrame(X1, columns = cols1)

scaler2 = StandardScaler()
X2 = scaler2.fit_transform(X2)
X2 = pd.DataFrame(X2, columns = cols2)

X = pd.concat([X1, X2], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

### Wavelet Results  
  
380 columns and a testing accuracy of 66%. I do intend to try including the coef energy feature data in the overall dataset. I intend to reduce the number of columns by averaging EEG signals by on specific electrode chains which are identified by common EEG montages. Then feature selection and maybe PCA will further reduce that number. The Hjorth parameters are the priority features. From there, some of the band power info, wavelet coef info, fractal dimension info, etc. should be useful.

In [12]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Wavelet Altogether Training')
print(classification_report(y_train, training_yhat))
print('Wavelet Altogether Testing')
print(classification_report(y_test, testing_yhat))
wavelet_acc = accuracy_score(y_test, testing_yhat)
wavelet_macro_f1 = f1_score(y_test, testing_yhat, average = 'macro')
wavelet_f1 = f1_score(y_test, testing_yhat, average = None)
wavelet_prec = precision_score(y_test, testing_yhat, average = None)
wavelet_rec = recall_score(y_test, testing_yhat, average = None)

Wavelet Altogether Training
              precision    recall  f1-score   support

         GPD       0.81      0.94      0.87      1794
        GRDA       0.78      0.88      0.82      1819
         LPD       0.78      0.86      0.82      1800
        LRDA       0.81      0.90      0.85      1795
       Other       0.79      0.62      0.69      1808
     Seizure       0.87      0.63      0.74      1784

    accuracy                           0.80     10800
   macro avg       0.81      0.80      0.80     10800
weighted avg       0.81      0.80      0.80     10800

Wavelet Altogether Testing
              precision    recall  f1-score   support

         GPD       0.71      0.88      0.79       206
        GRDA       0.66      0.72      0.69       181
         LPD       0.64      0.72      0.68       200
        LRDA       0.67      0.76      0.71       205
       Other       0.52      0.43      0.47       192
     Seizure       0.75      0.46      0.57       216

    accuracy          

### Brief Note on Results  
  
If I remember correctly, the first KNN model I ran without any preprocessing done other than standardization had a testing accuracy of 64% or 65%. Highpass filtering and re-referencing brought that up to about 70%. Adding new features to the feature set is having little to no effect. I don't really know why that is. I think what I'm likely to end up doing is signal averaging to handle some of the high correlation between columns and then use most of the feature types tested above in a single feature set. From there, I can determine which features are most important and may try PCA as well.

In [84]:
activity_list = ['GPD', 'GRDA', 'LPD', 'LRDA', 'Other', 'Seizure']
precisions.index = activity_list
recalls.index = activity_list
f1_scores.index = activity_list

In [85]:
precisions.round(decimals = 3)

Unnamed: 0,Band Power,Power and Complexity,Power and Spectral Complexity,Power and Higuchi,Power and Katz,Power and Coef Energy,Power and TK Energy,Power and Zero Crossings,Power and Line Length,Power and Spectral Slope,Power and Time Mobility,Power and Spectral Mobility
GPD,0.779,0.806,0.768,0.799,0.781,0.754,0.733,0.786,0.786,0.778,0.799,0.785
GRDA,0.643,0.647,0.689,0.655,0.644,0.656,0.626,0.684,0.683,0.662,0.776,0.789
LPD,0.694,0.679,0.704,0.682,0.678,0.679,0.665,0.679,0.686,0.664,0.71,0.725
LRDA,0.744,0.764,0.747,0.745,0.73,0.767,0.752,0.766,0.73,0.702,0.864,0.874
Other,0.545,0.559,0.56,0.556,0.537,0.565,0.569,0.553,0.554,0.555,0.684,0.701
Seizure,0.82,0.812,0.812,0.807,0.832,0.841,0.826,0.823,0.838,0.813,0.839,0.858


In [86]:
recalls.round(decimals = 3)

Unnamed: 0,Band Power,Power and Complexity,Power and Spectral Complexity,Power and Higuchi,Power and Katz,Power and Coef Energy,Power and TK Energy,Power and Zero Crossings,Power and Line Length,Power and Spectral Slope,Power and Time Mobility,Power and Spectral Mobility
GPD,0.888,0.888,0.883,0.908,0.883,0.893,0.879,0.908,0.893,0.883,0.908,0.903
GRDA,0.785,0.801,0.796,0.796,0.79,0.757,0.757,0.79,0.773,0.757,0.862,0.867
LPD,0.805,0.825,0.795,0.815,0.81,0.835,0.815,0.825,0.81,0.8,0.855,0.845
LRDA,0.81,0.839,0.82,0.8,0.805,0.805,0.785,0.829,0.805,0.815,0.834,0.849
Other,0.443,0.417,0.464,0.411,0.417,0.453,0.406,0.406,0.453,0.422,0.63,0.672
Seizure,0.505,0.519,0.542,0.542,0.505,0.514,0.528,0.56,0.551,0.505,0.579,0.588


In [87]:
f1_scores.round(decimals = 3)

Unnamed: 0,Band Power,Power and Complexity,Power and Spectral Complexity,Power and Higuchi,Power and Katz,Power and Coef Energy,Power and TK Energy,Power and Zero Crossings,Power and Line Length,Power and Spectral Slope,Power and Time Mobility,Power and Spectral Mobility
GPD,0.83,0.845,0.822,0.85,0.829,0.818,0.799,0.842,0.836,0.827,0.85,0.84
GRDA,0.706,0.716,0.738,0.718,0.71,0.703,0.685,0.733,0.725,0.706,0.817,0.826
LPD,0.745,0.745,0.746,0.743,0.738,0.749,0.733,0.745,0.743,0.726,0.776,0.781
LRDA,0.776,0.8,0.781,0.772,0.766,0.786,0.768,0.796,0.766,0.754,0.849,0.861
Other,0.489,0.478,0.507,0.473,0.469,0.503,0.474,0.468,0.499,0.479,0.656,0.686
Seizure,0.625,0.633,0.65,0.648,0.628,0.638,0.644,0.667,0.665,0.623,0.685,0.698
