In [30]:
import numpy
from keras import Sequential
from keras.layers import Dropout
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import StandardScaler


import os
import pandas as pd
import numpy as np
from time import time as t
from sklearn.metrics import classification_report
import h5py
from keras import optimizers
from keras import callbacks
import time
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.models import load_model


In [31]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import ParameterGrid


In [32]:
from keras import backend as K

def tp_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    return true_positives
    
def fp_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    return predicted_positives-true_positives

def tn_m(y_true, y_pred):
    print('y_true - 1 '+str((y_true-1)))
    false_negatives = K.sum(K.round(K.clip((y_true-1) * (y_pred-1), 0, 1)))
    return false_negatives
    
def fn_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    return possible_positives-true_positives

def recall_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

def precision_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


In [33]:
numpy.random.seed(0)

In [34]:
n_splits=4
train_size=0.8
test_size=0.2

In [35]:
class EEGSchizoDatasetBalanced():
    """EEG Alco Train dataset."""

    def __init__(self):
        """
        Args:
            none.
        """
        h5f = h5py.File('schizo_scalars_unbalanced.h5','r')
        self.spikes_seizure_eeg = h5f['dataset_schizo_scalars_unbalanced'][:]
        self.spikes_seizure_eeg=np.swapaxes(self.spikes_seizure_eeg,1,2)


        scalers = {}        
        for i in range(self.spikes_seizure_eeg.shape[1]):
            scalers[i] = StandardScaler()
            self.spikes_seizure_eeg[:, i, :] = scalers[i].fit_transform(self.spikes_seizure_eeg[:, i, :]) 

        h5f.close()
        
        h5f = h5py.File('schizo_labels_unbalanced.h5','r')
        self.labels_seizure_eeg = h5f['dataset_schizo_labels_unbalanced'][:]
        print(str(np.sum(self.labels_seizure_eeg))+'/'+str(len(self.labels_seizure_eeg)))
        h5f.close()
        
    def get_data(self):
        #all folds
        dataArray = list()
        sss = StratifiedShuffleSplit(n_splits=n_splits, train_size=train_size, test_size=test_size, random_state=0)
        for train_index, test_index in sss.split(self.spikes_seizure_eeg, self.labels_seizure_eeg):
            
            trainLabels=self.labels_seizure_eeg[train_index]
            trainValues=self.spikes_seizure_eeg[train_index]
            testLabels=self.labels_seizure_eeg[test_index]
            testValues=self.spikes_seizure_eeg[test_index]

            #BALANCING TRAINING DATA
            positivesIndices=trainLabels==1
            positiveEEGs=trainValues[positivesIndices]
            negativeEEGs=trainValues[~positivesIndices]
            print('positiveEEGs: '+str(len(positiveEEGs)))
            print('negativeEEGs: '+str(len(negativeEEGs)))

            n=np.min([len(positiveEEGs),len(negativeEEGs)])
            print(n)

            trainValues=(np.concatenate((positiveEEGs[0:n],negativeEEGs[0:n]),axis=0))
            trainLabels=(np.concatenate((np.full((n),1),np.full((n),0)),axis=0))
            
            shuffle = np.random.RandomState(seed=0).permutation(len(trainValues))
            trainValues = trainValues[shuffle]
            trainLabels = trainLabels[shuffle]
            currentSplit = {'X_train': (trainValues), 'X_test': (testValues), 
                            'y_train': (trainLabels), 'y_test': (testLabels)}
            dataArray.append(currentSplit)
        return dataArray

    def __len__(self):
        return len(self.spikes_seizure_eeg)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        eeg = torch.tensor(self.spikes_seizure_eeg[idx])
        print('eeg size (in getitem): '+str(eeg.size()))
        label = self.labels_seizure_eeg[idx]
            
        sample = {'eeg': eeg, 'label': label}
        return sample

In [36]:
seizureDataset = EEGSchizoDatasetBalanced()
dataArray = seizureDataset.get_data()

45/84
positiveEEGs: 36
negativeEEGs: 31
31
positiveEEGs: 36
negativeEEGs: 31
31
positiveEEGs: 36
negativeEEGs: 31
31
positiveEEGs: 36
negativeEEGs: 31
31


In [37]:
starting_point = 0

In [38]:
data = 'schizo_lstm'

In [39]:
class TimeHistory(callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.times = []

    def on_epoch_begin(self, batch, logs={}):
        self.epoch_time_start = time.time()
        print('epoch time start: '+str(self.epoch_time_start))

    def on_epoch_end(self, batch, logs={}):
        end_time = time.time() - self.epoch_time_start
        self.times.append(end_time)
        print('epoch time measured: '+str(end_time))

class TestMetricsCallback(callbacks.Callback):
    def __init__(self, test_data):
        self.test_data = test_data

    def on_epoch_end(self, epoch, logs={}):
        x, y = self.test_data
        loss, acc = self.model.evaluate(x, y, verbose=0)
        print('\nTesting loss: {}, acc: {}\n'.format(loss, acc))


In [40]:
n_epochs=250

In [41]:
resultsFilename = str(data)+"_results_data_FINAL.txt"

if os.path.isfile(resultsFilename):
    print ("Results file exists")
else:
    print ("Results file doesn't exist, creating new file...")


overallPrecisionList = list()
overallRecallList = list()
overallAccuracyList = list()
overallF1List = list()
overallTPList = list()
overallTNList = list()
overallFPList = list()
overallFNList = list()
overallConvergenceEpochList = list()
overallTrainingTimeList = list()
overallMeanEpochTimeList = list()


with open(resultsFilename, "a") as text_file:
    print(f"Test Results :\n\n", file=text_file)
for fold in np.arange(n_splits):
    early_stopping_callback = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=250)
    checkpoint_callback = ModelCheckpoint('optimal_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
    currentFold = fold
    # Train the network.
    print("Begin training for fold " + str(currentFold) + "\n")
    start = t()
    # create the model
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, 
                   input_shape=(16, 7680), activation='sigmoid'))
    #model.add(Dropout(0.2))
    model.add(LSTM(128, activation='sigmoid'))
    #model.add(Dropout(0.2))
    model.add(Dense(30, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    optimizer = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)

    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy',f1_m,precision_m, recall_m, tp_m, fp_m, tn_m, fn_m])
    print(model.summary())
    time_callback = TimeHistory()
    print('input data shape: '+str(dataArray[currentFold]['X_train'].shape))


    modelHistory=model.fit(dataArray[currentFold]['X_train'], dataArray[currentFold]['y_train'], epochs=n_epochs,
              batch_size=50, 
              validation_data=(dataArray[currentFold]['X_test'], dataArray[currentFold]['y_test']),
              callbacks=[time_callback, early_stopping_callback, checkpoint_callback])

    stoppingEpoch=early_stopping_callback.stopped_epoch
    modelTimes = time_callback.times
    
    historyF1 = modelHistory.history['val_f1_m']
    historyPrecision = modelHistory.history['val_precision_m']
    historyRecall = modelHistory.history['val_recall_m']
    historyAccuracy = modelHistory.history['val_acc']
    historyTP = modelHistory.history['val_tp_m']
    historyFP = modelHistory.history['val_fp_m']
    historyTN = modelHistory.history['val_tn_m']
    historyFN = modelHistory.history['val_fn_m']
    
    convergenceEpochs=str(historyAccuracy.index(max(historyAccuracy)))
    totalTrainingTime=np.sum(np.array(modelTimes[int(convergenceEpochs)]).astype(np.float))
    print('type(totalTrainingTime): '+str(type(totalTrainingTime)))
    print('type(convergenceEpochs): '+str(type(convergenceEpochs)))
    meanTimePerEpoch=totalTrainingTime/int(convergenceEpochs)
    

    with open(resultsFilename, "a") as text_file:
        print(f"Training complete for fold {str(currentFold)}\n", file=text_file)
        print(f"Epoch Times: {str(modelTimes[0:int(convergenceEpochs)])}\n", file=text_file)
        print(f"Fold {str(currentFold)} Test Metrics:\n", file=text_file)
        print(f"Fold TP: {str(historyTP)}\nFold TN: {str(historyTN)}\nFold FP: {str(historyFP)}\nFold FN: {str(historyFN)}\n", file=text_file)
        print(f"Fold Accuracy: {str(historyAccuracy)}\nFold Precision: {str(historyPrecision)}\nFold Recall: {str(historyRecall)}\nFold F1: {str(historyF1)}\n", file=text_file)
        print(f"Accuracy Maxes at Epoch: {str(convergenceEpochs)}\n", file=text_file)
        print(f"Early stopping at epoch: {str(stoppingEpoch)}\n", file=text_file)
        print(f"Training Epochs until max accuracy: {str(convergenceEpochs)}\n", file=text_file)
        print(f"Total Training Time: {str(totalTrainingTime)}\n", file=text_file)
        print(f"Mean Time per Epoch: {str(meanTimePerEpoch)}\n", file=text_file)

    # load the saved model
    saved_model = load_model('optimal_model.h5', custom_objects={"f1_m": f1_m,
                                                              "precision_m": precision_m,
                                                              "recall_m": recall_m,
                                                            "tp_m":tp_m, 
                                                             "fp_m":fp_m,
                                                             "tn_m":tn_m,
                                                             "fn_m":fn_m
                                                            })

    loss, accuracy, f1_score, precision, recall, tp, fp, tn, fn  = saved_model.evaluate(dataArray[currentFold]['X_test'], dataArray[currentFold]['y_test'], 
                            verbose=1)
    
    predictions=(saved_model.predict(dataArray[currentFold]['X_test'])>0.5).reshape(-1)
    truelabels=dataArray[currentFold]['y_test']==1
    tp=np.sum(np.logical_and(predictions, truelabels))
    tn=np.sum(np.logical_and(np.invert(predictions), np.invert(truelabels)))
    fp=np.sum(np.logical_and(predictions, np.invert(truelabels)))
    fn=np.sum(np.logical_and(np.invert(predictions), (truelabels)))
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    accuracy = (tp+tn)/(tp+tn+fp+fn)
    f1_score = (2*precision*recall)/(precision+recall)

    
    with open(resultsFilename, "a") as text_file:
        print(f"Final Test Metrics for {str(currentFold)}:\n", file=text_file)
        print(f"TP: {str(tp)}\n TN: {str(tn)}\n FP: {str(fp)}\n FN: {str(fn)}\n", file=text_file)
        print(f"Accuracy: {str(accuracy)}\n Precision: {str(precision)}\n Recall: {str(recall)}\n F1: {str(f1_score)}\n", file=text_file)

    overallPrecisionList.append(historyPrecision)
    overallRecallList.append(historyRecall)
    overallAccuracyList.append(historyAccuracy)
    overallF1List.append(historyF1)
    overallTPList.append(historyTP)
    overallTNList.append(historyTN)
    overallFPList.append(historyFP)
    overallFNList.append(historyFN)
    overallConvergenceEpochList.append(convergenceEpochs)
    overallTrainingTimeList.append(totalTrainingTime)
    overallMeanEpochTimeList.append(meanTimePerEpoch)
    
precisionMeanOverFolds=np.average(np.array(overallPrecisionList), axis=0)
recallMeanOverFolds=np.average(np.array(overallRecallList), axis=0)
accuracyMeanOverFolds=np.average(np.array(overallAccuracyList), axis=0)
F1MeanOverFolds=np.average(np.array(overallF1List), axis=0)
TPMeanOverFolds = np.average(np.array(overallTPList), axis=0)
TNMeanOverFolds = np.average(np.array(overallTNList), axis=0)
FPMeanOverFolds = np.average(np.array(overallFPList), axis=0)
FNMeanOverFolds = np.average(np.array(overallFNList), axis=0)
convergenceMeanOverFolds = np.mean(np.array(overallConvergenceEpochList).astype(np.float))
totalTrainingTimeMeanOverFolds = np.mean(np.array(overallTrainingTimeList).astype(np.float))
EpochTimeMeanOverFolds = np.mean(np.array(overallMeanEpochTimeList).astype(np.float))


maxAccuracyIndex=int(np.argmax(accuracyMeanOverFolds))


with open(str(resultsFilename), "a") as text_file:
    print(f"Training complete for all folds.", file=text_file)
    print(f"Mean Test Metrics Over All Folds:\n", file=text_file)
    print(f"Final Accuracy: {str(accuracyMeanOverFolds[maxAccuracyIndex])}\n", file=text_file)
    print(f"Final Precision: {str(precisionMeanOverFolds[maxAccuracyIndex])}\n", file=text_file)
    print(f"Final Recall: {str(recallMeanOverFolds[maxAccuracyIndex])}\n", file=text_file)
    print(f"Final F1: {str(F1MeanOverFolds[maxAccuracyIndex])}\n", file=text_file)
    print(f"Final TP: {str(TPMeanOverFolds[maxAccuracyIndex])}\n", file=text_file)
    print(f"Final FP: {str(FPMeanOverFolds[maxAccuracyIndex])}\n", file=text_file)
    print(f"Final TN: {str(TNMeanOverFolds[maxAccuracyIndex])}\n", file=text_file)
    print(f"Final FN: {str(FNMeanOverFolds[maxAccuracyIndex])}\n", file=text_file)
    print(f"Obs. Until Max Accuracy: {str(convergenceMeanOverFolds)}\n", file=text_file)
    print(
        f"TP: {str(list(TPMeanOverFolds))}\nTN: {str(list(TNMeanOverFolds))}\nFP: {str(list(FPMeanOverFolds))}\nFN: {str(list(FNMeanOverFolds))}\n",
        file=text_file)
    print(
        f"Accuracy: {str(list(accuracyMeanOverFolds))}\nPrecision: {str(list(precisionMeanOverFolds))}\nRecall: {str(list(recallMeanOverFolds))}\nF1: {str(list(F1MeanOverFolds))}\n",
        file=text_file)

Results file exists
Begin training for fold 0

y_true - 1 Tensor("metrics_10/tn_m/sub:0", shape=(?, ?), dtype=float32)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_13 (LSTM)               (None, 16, 128)           3998208   
_________________________________________________________________
lstm_14 (LSTM)               (None, 128)               131584    
_________________________________________________________________
dense_13 (Dense)             (None, 30)                3870      
_________________________________________________________________
dense_14 (Dense)             (None, 1)                 31        
Total params: 4,133,693
Trainable params: 4,133,693
Non-trainable params: 0
_________________________________________________________________
None
input data shape: (62, 16, 7680)
Train on 62 samples, validate on 17 samples
epoch time start: 1578163384.3404896
Epoch 1/250

Epoch 00001: 

In [42]:
import ast
f = open(resultsFilename)
accList=list()
precisionList=list()
recallList=list()
f1List=list()

for line in f:
    if(str(line[0:13])=='Fold Accuracy'):
        listStart1=line.find('[')
        listEnd1=line.find(']')
        list1=ast.literal_eval(line[listStart1:listEnd1+1])
        accList.append(list(list1))
    if(str(line[0:14])=='Fold Precision'):
        listStart2=line.find('[')
        listEnd2=line.find(']')
        list2=ast.literal_eval(line[listStart2:listEnd2+1])
        precisionList.append(list(list2))
    if(str(line[0:11])=='Fold Recall'):
        listStart3=line.find('[')
        listEnd3=line.find(']')
        list3=ast.literal_eval(line[listStart3:listEnd3+1])
        recallList.append(list(list3))
    if(str(line[0:7])=='Fold F1'):
        listStart4=line.find('[')
        listEnd4=line.find(']')
        list4=ast.literal_eval(line[listStart4:listEnd4+1])
        f1List.append(list(list4))
f.close()

In [43]:
import statistics

In [44]:
averageAccuracy = [statistics.mean(k) for k in zip(accList[0],accList[1],accList[2],accList[3])]
averagePrecisions=[statistics.mean(k) for k in zip(precisionList[0],precisionList[1],precisionList[2],precisionList[3])]
averageRecall=[statistics.mean(k) for k in zip(recallList[0],recallList[1],recallList[2],recallList[3])]
averageF1s=[statistics.mean(k) for k in zip(f1List[0],f1List[1],f1List[2],f1List[3])]

In [45]:
with open(resultsFilename, "a") as text_file:
    print(f"Average Accuracies List: {str(averageAccuracy)}", file=text_file)
    print(f"Average Precisions List: {str(averagePrecisions)}", file=text_file)
    print(f"Average Recalls List: {str(averageRecall)}", file=text_file)
    print(f"Average F1 List: {str(averageF1s)}", file=text_file)


