In [None]:
# Imports #
import os
import time
import shutil
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.python.keras.utils.data_utils import Sequence

from sklearn import metrics
from sklearn import datasets
from sklearn import model_selection
from sklearn import linear_model
from sklearn.metrics import confusion_matrix, roc_curve, auc, roc_auc_score
from sklearn.utils.multiclass import unique_labels
from sklearn.model_selection import StratifiedKFold, KFold

In [None]:
# Utils #
def reshape_model(x_train, x_test):
    X_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1], x_train.shape[2]))
    X_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1], x_test.shape[2]))


    return X_train, X_test

def plot_confusion_matrix(cm, x_values, y_values,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title("Precision")
    plt.colorbar()
    xtick_marks = np.arange(len(x_values))
    ytick_marks = np.arange(len(y_values))
    plt.xticks(xtick_marks, x_values, rotation=45)
    plt.yticks(ytick_marks, y_values)

    fmt = 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('Dropout')
    plt.xlabel('Kernel Lenght')
    plt.show()
    
## multiclass or binary report
## If binary (sigmoid output), set binary parameter to True
def full_multiclass_report(model,
                           x,
                           y_true,
                           classes,
                           batch_size=32,
                           binary=True):

    # 1. Transform one-hot encoded y_true into their class number
    if not binary:
        y_true = np.argmax(y_true,axis=1)
    
    # 2. Predict classes and stores in y_pred
    y_pred = model.predict_classes(x, batch_size=batch_size)
    
    # 3. Print accuracy score
    print("Accuracy : "+ str(accuracy_score(y_true,y_pred)))
    
    print("")
    
    # 4. Print classification report
    print("Classification Report")
    print(classification_report(y_true,y_pred,digits=5))    
    
    # 5. Plot confusion matrix
    cnf_matrix = confusion_matrix(y_true,y_pred)
    print(cnf_matrix)
    plot_confusion_matrix(cnf_matrix,classes=classes)
    
def plot_history(histories):
    for history in histories:
        loss_list = [s for s in history.history.keys() if 'loss' in s and 'val' not in s]
        val_loss_list = [s for s in history.history.keys() if 'loss' in s and 'val' in s]
        acc_list = [s for s in history.history.keys() if 'acc' in s and 'val' not in s]
        val_acc_list = [s for s in history.history.keys() if 'acc' in s and 'val' in s]

        if len(loss_list) == 0:
            print('Loss is missing in history')
            return 

        ## As loss always exists
        epochs = range(1,len(history.history[loss_list[0]]) + 1)

        ## Loss
        plt.figure(1)
        for l in loss_list:
            plt.plot(epochs, history.history[l], 'r', label='Training loss (' + str(str(format(history.history[l][-1],'.5f'))+')'))
        for l in val_loss_list:
            plt.plot(epochs, history.history[l], 'g', label='Validation loss (' + str(str(format(history.history[l][-1],'.5f'))+')'))

        plt.title('Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
    
        ## Accuracy
        plt.figure(2)
        for l in acc_list:
            plt.plot(epochs, history.history[l], 'r', label='Training accuracy (' + str(format(history.history[l][-1],'.5f'))+')')
        for l in val_acc_list:    
            plt.plot(epochs, history.history[l], 'g', label='Validation accuracy (' + str(format(history.history[l][-1],'.5f'))+')')

        plt.title('Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()
    plt.show()
    
def plot_roc_curve(y_true, scores):
    n_classes = 2
    
    for score in scores:
#         plt.figure()
        # Compute ROC curve and ROC area for each class
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_true[:, i], score[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])

        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(y_true.ravel(), score.ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

        # plt.figure(4)
        lw = 2
        plt.plot(fpr[1], tpr[1], color='darkorange',
                 lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[1])
        plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic example')
        plt.legend(loc="lower right")
    plt.show()

def box_plot(results, names, y_label, title="BoxPlot",save_name="box_plot.png"):
    fig = plt.figure()
    fig.suptitle(title)
    plt.ylabel(y_label)
    ax = fig.add_subplot(111)
    plt.boxplot(results)
    ax.set_xticklabels(names)
    plt.savefig(save_name)
    plt.show()

# For Sequence trainig example
class BatchSequence(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

        return batch_x, batch_y
    
# fix random seed for reproducibility
seed = 7
save_path = os.path.join(os.getcwd(), "Models")

# CallBacks
earlystop_callback = EarlyStopping(monitor='val_accuracy', min_delta=0.0001, 
                                   patience=2, verbose=1, mode='auto', restore_best_weights=True)

In [None]:
# Load csv file
# train = pd.read_csv('files/train_200.csv')
train = pd.read_csv('files/train_20.csv')
train.rename(columns={'Unnamed: 0': 'ID'}, inplace=True)

# Build train and test dataset
y = np.array(train.drop(['ID'], axis=1)).squeeze()

X_train, X_test, y_train, y_test = train_test_split(train, y, test_size=0.2, random_state=0)

In [None]:
matrixes = []
for sample in X_test.ID:
    ms = pd.read_csv(sample + ".csv")
    ms.rename(columns={'Unnamed: 0': 'ID'}, inplace=True)
    ms = np.array(ms.drop(['ID'], axis=1))
    matrixes.append(ms)

X_test = np.array(matrixes)

In [None]:
matrixes = []
for sample in X_train.ID:
    ms = pd.read_csv(sample + ".csv")
    ms.rename(columns={'Unnamed: 0': 'ID'}, inplace=True)
    ms = np.array(ms.drop(['ID'], axis=1))
    matrixes.append(ms)

x_train = np.array(matrixes)

In [None]:
# Reshape Model 
X_train, X_test = reshape_model(x_train, X_test)
Y_train = to_categorical(y_train, 2)
Y_test = to_categorical(y_test, 2)

In [None]:
from EEGModels import EEGNet, ShallowConvNet, DeepConvNet
# Train Model
# batch_size = 30
# train_batch = BatchSequence(x_train, y_train, batch_size)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
cvloss = []
histories = []
roc_scores = []
cvacc = []

for train, test in kfold.split(x_train, y_train):
    # create model
    model = EEGNet(nb_classes=2, Chans=60, Samples=200, kernLength=100)

    # compile the model and set the optimizers
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics = ['accuracy'])
    
    # Fit the model
    history = model.fit(X_train[train], Y_train[train], epochs=20, batch_size=30, 
                        validation_data=(X_train[test], Y_train[test]), verbose=0, callbacks=[earlystop_callback])
    histories.append(history)
    # evaluate the model
    scores = model.evaluate(X_test, Y_test, verbose=0)
    y_score = model.predict(X_test, verbose=0)
    
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    print("-------------------------------------------------")
    cvloss.append(scores[0])
    cvacc.append(scores[1] * 100)
    roc_scores.append(y_score)
    
# print("loss: .2f (+/- .2f)"(np.mean(cvloss), np.std(cvloss)))
print("acc: %.2f%% (+/- %.2f%%)" % (np.mean(cvacc), np.std(cvacc)))

plot_history(histories)

model.summary()

# model_fit = model.fit(train_batch, epochs=12, callbacks=[earlystop_callback])

In [None]:
# HeatMap Plot
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
sns.set()

# Load the example flights dataset and convert to long-form

# df = pd.read_csv("heatmap.csv")

clean_data = pd.read_csv("files/heatmap_10_clean.csv")
labels = clean_data.pivot("Dropout Rate", "KernLength", "Accuracy")

# Draw a heatmap with the numeric values in each cell
f, ax = plt.subplots(figsize=(5, 3))
sns.heatmap(labels, annot=True, fmt='.2f', cmap='Blues', linewidths=.5, ax=ax)
plt.savefig("images/heatmap_clean10.png", bbox_inches='tight')

raw_data = pd.read_csv("files/heatmap_10_raw.csv")
labels = raw_data.pivot("Dropout Rate", "KernLength", "Accuracy")

# Draw a heatmap with the numeric values in each cell
f, ax = plt.subplots(figsize=(5, 3))
sns.heatmap(labels, annot=True, fmt='.2f', cmap='Blues', linewidths=.5, ax=ax)
plt.savefig("images/heatmap_raw10.png", bbox_inches='tight')

In [None]:
# TN, FP, FN, TP
# pred = model.predict(X_test)
# normalize_pred = [1 if element >= 0.5 else 0 for element in pred[:, 1]]

# tn, fp, fn, tp = confusion_matrix(normalize_pred, Y_test[:, 1]).ravel()
# (tn, fp, fn, tp)