In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os 
import sys
import glob
import pickle

import numpy as np
import pandas as pd     # dataframes
import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(0)

FILE_PATH = './drive/MyDrive/BME1473_Project/FeatureData/*'
RESULTS_PATH = './drive/MyDrive/BME1473_Project/Classification'

## Class Labeling - X and Y Data

In [None]:
import sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn import metrics
sklearn.__version__

'1.0.2'

In [None]:
def XYDataExtraction(data_frame_name, shuffle = True):

  feature_df = pd.read_json(data_frame_name, orient='index', compression = 'gzip')
  feature_df = feature_df.sample(frac = 1, random_state = 0)
  feature_df_drop = feature_df.drop(['class', 'file_id', 'location', 'CZ|sample_entropy', 'CZ|spec_entropy'], axis='columns')

  X = feature_df_drop.values
  
  # create condition(group) array
  class_series = feature_df['class']
  le = LabelEncoder()
  # change the string labels to ints
  Y = le.fit_transform(class_series)

  return feature_df, feature_df_drop, X, Y

### Obtaining the Pair-Plots between Pair of Features

In [None]:
sns.set(color_codes=True)

def plot_pairplot(data_x, data_y, feature_list, save_path, x_axis = 'CZ|D3_ratio', y_axis = 'CZ|D2_ratio'):
    scaler = StandardScaler()
    data_x_scaled = scaler.fit_transform(data_x)

    reduced_array = data_x_scaled[:, [feature_list.index(x_axis), feature_list.index(y_axis)]]
    reduced_x = pd.DataFrame(reduced_array, columns=[x_axis, y_axis])

    data_plot = reduced_x.copy()
    data_plot['class'] = np.vectorize({0:'Baseline', 1:'Seizure'}.get)(data_y)
    sns.pairplot(data_plot, 
                 hue = 'class', 
                 hue_order = ['Baseline','Seizure'],
                 markers=["o", "s"], 
                 plot_kws=dict(alpha = 0.5))
    
    plt.savefig(os.path.join(save_path, f'pairplot_{x_axis}_{y_axis}.png'))
    plt.show()

## Binary Classification

Support Vector Machine (SVM)

In [None]:
from mlxtend.plotting import plot_decision_regions
from itertools import combinations

def regionplot(X, Y, classifier, feature_list, savepath, classifier_name, x_axis, y_axis):
  scaler = StandardScaler()
  X_scaled = scaler.fit_transform(X)
  vis_data = X_scaled[:,[feature_list.index(x_axis), feature_list.index(y_axis)]]

  classifier.fit(vis_data, Y)

  plot_decision_regions(vis_data, Y, clf = classifier)
  plt.xlabel(x_axis) 
  plt.ylabel(y_axis)
  plt.title(f'Classification Accuracy: {classifier.score(vis_data, Y) * 100:.3f}%')
  plt.savefig(os.path.join(savepath, f'{classifier_name}_{x_axis}_{y_axis}_boundary.png'))
  plt.show()

def evaluation(X, Y, classifier, threshold, savepath, classifier_name):
  # Accuracy
  Y_probs = classifier.predict_proba(X)[:,1]
  Y_Predict = Y_probs > threshold
  eval_results = metrics.classification_report(Y, Y_Predict, target_names = ['Intra-Ictal', 'Ictal'], output_dict=True)
  tn, fp, fn, tp = metrics.confusion_matrix(Y, Y_Predict).ravel()
  specificity = tn / (tn+fp)
  
  fpr, tpr, thresholds = metrics.roc_curve(Y, Y_probs)
  roc_value = metrics.roc_auc_score(Y, Y_probs)

  plt.plot(fpr, tpr)
  plt.title(f'ROC = {roc_value * 100 :.3f}%')
  plt.xlabel('1 - Specificity'); plt.ylabel('Sensitivity')

  eval_df = pd.DataFrame(eval_results)
  eval_df['Specificity'] = [specificity, specificity, specificity, specificity]
  eval_df['ROC'] = [roc_value, roc_value, roc_value, roc_value]
  eval_df = eval_df.T

  eval_df.to_csv(os.path.join(savepath, f'evaluation_{classifier_name}.csv'), index = False)

def classifysvc(X, Y, feature_list, savepath, kernel = 'linear'):
  pipe_svc = Pipeline([('scl', StandardScaler()),
                        ('clf', SVC(C=100,
                                    kernel = kernel, 
                                    probability = True,
                                    random_state = 0))])

  feat_plots = list(combinations(feature_list, 2))[5:8]

  for feat_ in feat_plots:
    regionplot(X, Y, pipe_svc, feature_list, savepath, f'svc_{kernel}',  feat_[0], feat_[1])

  X_Train, X_Test, Y_Train, Y_Test = sklearn.model_selection.train_test_split(X, Y, test_size = 0.33, random_state = 0, stratify = Y)
  pipe_svc.fit(X_Train, Y_Train)
  evaluation(X_Test, Y_Test, pipe_svc, 0.5, savepath, f'svc_{kernel}')


In [None]:
CONFIG_DIRS = sorted(glob.glob(FILE_PATH))

file_paths = {}

for config_dir in CONFIG_DIRS:
  config_name = config_dir.split('/')[-1]
  file_paths[config_name] = {}

  SPECS_CONFIG_DIR = sorted(glob.glob(os.path.join(config_dir, '*')))

  if config_name == 'denoised':
    for data_ in SPECS_CONFIG_DIR:
      data_frame_name = data_.split('/')[-1]
      file_paths[config_name][data_frame_name] = sorted(glob.glob(os.path.join(data_, '*')))

  elif config_name == 'fir_filtering':
    for data_ in SPECS_CONFIG_DIR:
      data_frame_name = data_.split('/')[-1]
      file_paths[config_name][data_frame_name] = {}

      configs = sorted(glob.glob(os.path.join(data_, '*')))

      for config_ in configs:
        config_data_name = config_.split('/')[-1]
        file_paths[config_name][data_frame_name][config_data_name] = sorted(glob.glob(os.path.join(config_, '*')))

  elif config_name == 'dwt_filtering':
    for data_ in SPECS_CONFIG_DIR:
      data_frame_name = data_.split('/')[-1]
      file_paths[config_name][data_frame_name] = {}
      
      filters_ = sorted(glob.glob(os.path.join(data_, '*')))

      for filt_ in filters_:
        filt_name = filt_.split('/')[-1]
        file_paths[config_name][data_frame_name][filt_name] = sorted(glob.glob(os.path.join(filt_, '*')))

### Classification results for features related to data whitening steps

In [None]:
# for config_dir in file_paths.keys():
#   if config_dir == 'denoised':
#     for data_ in file_paths[config_dir].keys():
#       for path_ in file_paths[config_dir][data_]:
#         feat_type = path_.split('/')[-1].split('.')[0]
#         save_path = f'{RESULTS_PATH}/{config_dir}/{data_}_{feat_type}'

#         os.makedirs(save_path, exist_ok = True)

#         feature_df, feature_df_drop, X, Y = XYDataExtraction(path_, shuffle = True)
#         feature_list = list(feature_df_drop.columns)

#         feat_plots = list(combinations(feature_list, 2))[5:8]
#         for feat_ in feat_plots:
#           plot_pairplot(X, Y, feature_list, save_path, feat_[0], feat_[1])

#         classifysvc(X, Y, feature_list, save_path, kernel = 'linear')
#         classifysvc(X, Y, feature_list, save_path, kernel = 'rbf')

### Classification results for features related to FIR filtered EEG Signals

In [None]:
for config_dir in file_paths.keys():
  if config_dir == 'fir_filtering':
    for data_ in file_paths[config_dir].keys():
      for config_ in file_paths[config_dir][data_].keys():
        for path_ in file_paths[config_dir][data_][config_]:
          feat_type = path_.split('/')[-1].split('.')[0]
          save_path = f'{RESULTS_PATH}/{config_dir}/{data_}_{config_}_{feat_type}'

          os.makedirs(save_path, exist_ok = True)

          feature_df, feature_df_drop, X, Y = XYDataExtraction(path_, shuffle = True)
          feature_list = list(feature_df_drop.columns)

          feat_plots = list(combinations(feature_list, 2))[5:8]
          for feat_ in feat_plots:
            plot_pairplot(X, Y, feature_list, save_path, feat_[0], feat_[1])

          classifysvc(X, Y, feature_list, save_path, kernel = 'linear')
          classifysvc(X, Y, feature_list, save_path, kernel = 'rbf')

Output hidden; open in https://colab.research.google.com to view.

### Classication results for features extracted from DWT denoised signals

In [None]:
for config_dir in file_paths.keys():
  if config_dir == 'dwt_filtering':
    for data_ in file_paths[config_dir].keys():
      for filt_ in file_paths[config_dir][data_].keys():
        for path_ in file_paths[config_dir][data_][filt_]:
          feat_type = path_.split('/')[-1].split('.')[0]
          save_path = f'{RESULTS_PATH}/{config_dir}/{data_}_{filt_}_{feat_type}'

          os.makedirs(save_path, exist_ok = True)

          feature_df, feature_df_drop, X, Y = XYDataExtraction(path_, shuffle = True)
          feature_list = list(feature_df_drop.columns)

          feat_plots = list(combinations(feature_list, 2))[5:8]
          for feat_ in feat_plots:
            plot_pairplot(X, Y, feature_list, save_path, feat_[0], feat_[1])

          if np.isnan(X).any():
            X[np.isnan(X)] = 0

          classifysvc(X, Y, feature_list, save_path, kernel = 'linear')
          classifysvc(X, Y, feature_list, save_path, kernel = 'rbf')