In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import fastparquet, pyarrow
import mne
from mne.decoding import Scaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, f1_score, accuracy_score
from utils import *

In [2]:
mne.set_log_level('WARNING')

In [3]:
df = pd.read_csv('by_patient.csv')

In [4]:
other_df = activity_df(df, 'Other', 'expert_consensus')
seizure_df = activity_df(df, 'Seizure', 'expert_consensus')
gpd_df = activity_df(df, 'GPD', 'expert_consensus')
lpd_df = activity_df(df, 'LPD', 'expert_consensus')
grda_df = activity_df(df, 'GRDA', 'expert_consensus')
lrda_df = activity_df(df, 'LRDA', 'expert_consensus')
activity_df_list = [other_df, seizure_df, gpd_df, lpd_df, grda_df, lrda_df]

### Testing Each Step of Preprocessing Again  
  
I realized that the way I wrote my nested function in utils likely might have meant that even if I set bandpass and notch and reref as True, one of the functions within that overall function had them defaulted to False so that function would run bandpass = False when called. Re-Referencing having the effect that it did makes me think this isn't true and that setting the overall function's value for bandpass would set all internal functions' values and override whatever those internal functions' defaults are. But I'm going to run a few tests to see if I get the same results I was getting before altering my code so that the only place these values are set is the overall function.  
  
### No Filtering or Re-Rereferencing

In [8]:
y = get_yvals(2000)['activity']

In [7]:
#nofilt_df = full_band_df(2000, activity_df_list, None, None, bandpass = False, notch = False, reref = False)
#nofilt_df.to_csv('nofilt_df.csv', index = None)

In [8]:
pd.read_csv('nofilt_df.csv').shape

(12000, 95)

In [9]:
X = pd.read_csv('nofilt_df.csv')

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [10]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Training')
print(classification_report(y_train, training_yhat))
print('Testing')
print(classification_report(y_test, testing_yhat))

Training
              precision    recall  f1-score   support

         GPD       0.79      0.92      0.85      1794
        GRDA       0.72      0.85      0.78      1819
         LPD       0.75      0.86      0.80      1800
        LRDA       0.81      0.87      0.84      1795
       Other       0.76      0.55      0.64      1808
     Seizure       0.87      0.62      0.72      1784

    accuracy                           0.78     10800
   macro avg       0.78      0.78      0.77     10800
weighted avg       0.78      0.78      0.77     10800

Testing
              precision    recall  f1-score   support

         GPD       0.72      0.88      0.79       206
        GRDA       0.62      0.78      0.69       181
         LPD       0.65      0.74      0.70       200
        LRDA       0.71      0.82      0.76       205
       Other       0.55      0.32      0.41       192
     Seizure       0.77      0.50      0.61       216

    accuracy                           0.68      1200
   mac

### Highpass Only (1 Hz)

In [None]:
#lowfreqfilt_df = full_band_df(2000, activity_df_list, 1, None, bandpass = True, notch = False, reref = False)
#lowfreqfilt_df.to_csv('lowfreqfilt_df.csv', index = None)

In [12]:
X = pd.read_csv('lowfreqfilt_df.csv')

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [14]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Training')
print(classification_report(y_train, training_yhat))
print('Testing')
print(classification_report(y_test, testing_yhat))

Training
              precision    recall  f1-score   support

         GPD       0.80      0.93      0.86      1794
        GRDA       0.75      0.88      0.81      1819
         LPD       0.76      0.87      0.81      1800
        LRDA       0.81      0.88      0.85      1795
       Other       0.77      0.56      0.65      1808
     Seizure       0.89      0.63      0.74      1784

    accuracy                           0.79     10800
   macro avg       0.80      0.79      0.79     10800
weighted avg       0.80      0.79      0.78     10800

Testing
              precision    recall  f1-score   support

         GPD       0.69      0.84      0.76       206
        GRDA       0.61      0.71      0.65       181
         LPD       0.63      0.79      0.70       200
        LRDA       0.75      0.81      0.78       205
       Other       0.58      0.40      0.47       192
     Seizure       0.84      0.52      0.64       216

    accuracy                           0.68      1200
   mac

### Default Notch Filtering

In [6]:
#default_notch_df = full_band_df(2000, activity_df_list, 1, None, bandpass = True, notch = True, reref = False)
#default_notch_df.to_csv('default_notch_df.csv', index = None)

In [9]:
X = pd.read_csv('default_notch_df.csv')

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [10]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Training')
print(classification_report(y_train, training_yhat))
print('Testing')
print(classification_report(y_test, testing_yhat))

Training
              precision    recall  f1-score   support

         GPD       0.78      0.92      0.84      1794
        GRDA       0.74      0.88      0.81      1819
         LPD       0.74      0.87      0.80      1800
        LRDA       0.82      0.85      0.84      1795
       Other       0.73      0.55      0.63      1808
     Seizure       0.89      0.59      0.71      1784

    accuracy                           0.78     10800
   macro avg       0.78      0.78      0.77     10800
weighted avg       0.78      0.78      0.77     10800

Testing
              precision    recall  f1-score   support

         GPD       0.67      0.85      0.75       206
        GRDA       0.62      0.79      0.69       181
         LPD       0.62      0.73      0.67       200
        LRDA       0.75      0.82      0.78       205
       Other       0.50      0.31      0.39       192
     Seizure       0.76      0.44      0.55       216

    accuracy                           0.66      1200
   mac

### Global Average Re-Referencing

In [11]:
#glb_avg_df = full_band_df(2000, activity_df_list, 1, None, bandpass = True, notch = False, reref = True)
#glb_avg_df.to_csv('glb_avg_df.csv', index = None)

In [12]:
X = pd.read_csv('glb_avg_df.csv')

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [13]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
training_yhat = knn.predict(X_train)
testing_yhat = knn.predict(X_test)
print('Training')
print(classification_report(y_train, training_yhat))
print('Testing')
print(classification_report(y_test, testing_yhat))

Training
              precision    recall  f1-score   support

         GPD       0.83      0.93      0.88      1794
        GRDA       0.78      0.89      0.83      1819
         LPD       0.79      0.88      0.83      1800
        LRDA       0.85      0.89      0.87      1795
       Other       0.78      0.63      0.70      1808
     Seizure       0.90      0.69      0.78      1784

    accuracy                           0.82     10800
   macro avg       0.82      0.82      0.81     10800
weighted avg       0.82      0.82      0.81     10800

Testing
              precision    recall  f1-score   support

         GPD       0.73      0.87      0.80       206
        GRDA       0.66      0.81      0.72       181
         LPD       0.70      0.78      0.74       200
        LRDA       0.80      0.79      0.80       205
       Other       0.51      0.45      0.48       192
     Seizure       0.81      0.53      0.64       216

    accuracy                           0.70      1200
   mac

### Conclusion  
  
The way the functions were written wasn't an issue. These results are equivalent to results from before changes were made. The next step today is going to be testing different feature types.