In [147]:
import sys, os
sys.path.append(os.path.realpath(".."))
os.environ["TF_XLA_FLAGS"]="--tf_xla_cpu_global_jit"

import util_funcs
from importlib import reload
import data_reader as read
import pandas as pd
import numpy as np
import pickle as pkl
import matplotlib.pyplot as plt
import constants
import clinical_text_analysis as cta
import tsfresh
from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix, roc_curve
from os import path
import keras_models.dataGen as dg
import predictGenderConvExp as pg

In [148]:
import keras
from keras.models import Model, Sequential
from keras.layers import Input, LSTM, Dense, Activation, Conv2D, Concatenate, Dropout, MaxPool2D, Conv3D, Flatten, LeakyReLU, BatchNormalization

# importing in the preprocessed data

In [149]:
trainData = pkl.load(open("../standardized_combined_simple_ensemble_train_data.pkl", 'rb'))

In [150]:
testData = pkl.load(open("../standardized_combined_simple_ensemble_test_data.pkl", 'rb'))

In [151]:
validData = pkl.load(open("../valid_standardized_combined_simple_ensemble_train_data.pkl", 'rb'))

In [152]:
def generate_x_y(data):
    x_data = np.stack([datum[0] for datum in data])
    x_data = x_data.reshape((*x_data.shape, 1))
    x_data.transpose(0, 2, 1, 3)
    y_data = np.array([datum[1] for datum in data])
    y_data = keras.utils.to_categorical(y_data)
    return x_data, y_data

In [153]:
testDataX, testDataY = generate_x_y(testData)
del testData

# Making a quick set of architectures

In [154]:
from keras_models.vanPutten import inception_like

In [None]:
model = inception_like((500, 21, 1), num_filters=100)

In [None]:
from keras.utils import multi_gpu_model
model = multi_gpu_model(model, 2)

In [None]:
from keras.optimizers import Adam
adam = Adam(lr=0.002)
model.compile(adam, loss="categorical_crossentropy", metrics=["acc"])

In [None]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 500, 21, 1)   0                                            
__________________________________________________________________________________________________
lambda_5 (Lambda)               (None, 500, 21, 1)   0           input_3[0][0]                    
__________________________________________________________________________________________________
lambda_6 (Lambda)               (None, 500, 21, 1)   0           input_3[0][0]                    
__________________________________________________________________________________________________
model_5 (Model)                 (None, 2)            9394702     lambda_5[0][0]                   
                                                                 lambda_6[0][0]                   
__________

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
cb = [EarlyStopping(monitor="loss", patience=3, verbose=True), EarlyStopping(monitor="val_loss", patience=5, verbose=True), ModelCheckpoint("yolo.h5",save_best_only=True, verbose=True),]

In [None]:
cb.append(ModelCheckpoint("yolo_bin_acc.h5",save_best_only=True, verbose=True, monitor="val_acc"))

In [None]:
reload(dg)

<module 'keras_models.dataGen' from '/home/ms994/dbmi_eeg_clustering/keras_models/dataGen.py'>

In [None]:
trainDataDg = dg.EdfDataGenerator(trainData, precache=True,
        time_first=True,
        n_classes=2, batch_size=64, max_length=500, shuffle=True)

In [None]:
validDataDg = dg.EdfDataGenerator(validData, precache=True,
        time_first=True,
        n_classes=2, batch_size=32, max_length=500, shuffle=True)

In [None]:
trainDataDg[1][0].shape

(64, 500, 21, 1)

In [None]:
history = model.fit_generator(trainDataDg, callbacks=cb, validation_data=validDataDg, epochs=1000, use_multiprocessing=True, workers=4)

Epoch 1/1000

Epoch 00001: val_loss improved from inf to 7.10428, saving model to yolo.h5

Epoch 00001: val_acc improved from -inf to 0.55556, saving model to yolo_bin_acc.h5
Epoch 2/1000

Epoch 00002: val_loss improved from 7.10428 to 7.05923, saving model to yolo.h5

Epoch 00002: val_acc improved from 0.55556 to 0.55903, saving model to yolo_bin_acc.h5
Epoch 3/1000

In [None]:
model = keras.models.load_model("yolo_bin_acc.h5")

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(history.history["loss"])

In [None]:
plt.plot(history.history["val_loss"])

In [None]:
y_pred = model.predict(testDataX)
roc_auc_score(testDataY.argmax(axis=1), y_pred.argmax(axis=1))

In [None]:
roc_auc_score(testDataY.argmax(axis=1), y_pred.argmax(axis=1))

In [None]:
samplingInfo = pkl.load(open("../test_standardized_edf_ensemble_sample_info.pkl","rb"))

In [None]:
reload(read)

In [None]:
edf_tokens = list(set([samplingInfo[key]["token_file_path"] for key in samplingInfo.keys()]))

In [None]:
ensembler = read.EdfDatasetEnsembler("combined", "01_tcp_ar", generate_sample_info=False, edf_tokens=edf_tokens)

In [None]:
len(edf_tokens)

In [None]:
ensembler.sampleInfo = samplingInfo
ensembler.labels = testDataY.argmax(axis=1)
ensembler.edf_tokens = edf_tokens

In [None]:
from addict import Dict

In [None]:
pred_vs_true = Dict()

In [None]:
trueLabel, pred = ensembler.getEnsemblePrediction(y_pred, mode="equal_vote")

In [None]:
roc_auc_score(trueLabel, pred)