In [1]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

import numpy as np
import pandas as pd
import pickle as pk
import matplotlib.pyplot as plt
from keras.models import Sequential, Model
from keras.layers import Input, BatchNormalization, Dense, Conv1D, Activation, Add, GlobalAveragePooling1D
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import cohen_kappa_score, roc_auc_score

Using TensorFlow backend.


In [2]:
def build_resnet(input_shape, n_feature_maps, nb_classes):
    x = Input(shape=(input_shape))
    conv_x = BatchNormalization()(x)
    conv_x = Conv1D(n_feature_maps, kernel_size=8, strides=1, padding='same')(conv_x)
    conv_x = BatchNormalization()(conv_x)
    conv_x = Activation('relu')(conv_x)
    conv_y = Conv1D(n_feature_maps, kernel_size=5, strides=1, padding='same')(conv_x)
    conv_y = BatchNormalization()(conv_y)
    conv_y = Activation('relu')(conv_y)
    conv_z = Conv1D(n_feature_maps, kernel_size=3, strides=1, padding='same')(conv_y)
    conv_z = BatchNormalization()(conv_z)
    is_expand_channels = not (input_shape[-1] == n_feature_maps)
    if is_expand_channels:
        shortcut_y = Conv1D(n_feature_maps, 1, strides=1, padding='same')(x)
        shortcut_y = BatchNormalization()(shortcut_y)
    else:
        shortcut_y = BatchNormalization()(x)
    y = Add()([shortcut_y, conv_z])
    y = Activation('relu')(y)
    x1 = y
    conv_x = Conv1D(n_feature_maps*2, kernel_size=8, strides=1, padding='same')(x1)
    conv_x = BatchNormalization()(conv_x)
    conv_x = Activation('relu')(conv_x)
    conv_y = Conv1D(n_feature_maps*2, kernel_size=5, strides=1, padding='same')(conv_x)
    conv_y = BatchNormalization()(conv_y)
    conv_y = Activation('relu')(conv_y)
    conv_z = Conv1D(n_feature_maps*2, kernel_size=3, strides=1, padding='same')(conv_y)
    conv_z = BatchNormalization()(conv_z)
    is_expand_channels = not (input_shape[-1] == n_feature_maps*2)
    if is_expand_channels:
        shortcut_y = Conv1D(n_feature_maps*2, kernel_size=1, strides=1, padding='same')(x1)
        shortcut_y = BatchNormalization()(shortcut_y)
    else:
        shortcut_y = BatchNormalization()(x1)
    y = Add()([shortcut_y, conv_z])
    y = Activation('relu')(y)
    x1 = y
    conv_x = Conv1D(n_feature_maps*2, kernel_size=8, strides=1, padding='same')(x1)
    conv_x = BatchNormalization()(conv_x)
    conv_x = Activation('relu')(conv_x)
    conv_y = Conv1D(n_feature_maps*2, kernel_size=5, strides=1, padding='same')(conv_x)
    conv_y = BatchNormalization()(conv_y)
    conv_y = Activation('relu')(conv_y)
    conv_z = Conv1D(n_feature_maps*2, kernel_size=3, strides=1, padding='same')(conv_y)
    conv_z = BatchNormalization()(conv_z)
    is_expand_channels = not (input_shape[-1] == n_feature_maps*2)
    if is_expand_channels:
        shortcut_y = Conv1D(n_feature_maps*2, kernel_size=1, strides=1, padding='same')(x1)
        shortcut_y = BatchNormalization()(shortcut_y)
    else:
        shortcut_y = BatchNormalization()(x1)
    y = Add()([shortcut_y, conv_z])
    y = Activation('relu')(y)
    full = GlobalAveragePooling1D()(y)
    out = Dense(nb_classes, activation='softmax')(full)
    return x, out

In [3]:
FOLDS = 5
PADDING = 100
MAX_POWER = 10

input_data = pk.load(open('input_data.pkl', 'rb'))
target_data = pk.load(open('target_data.pkl', 'rb'))

dimention = []
auc = []
kappa = []

skf = StratifiedKFold(n_splits=FOLDS, shuffle=True)
mms = MinMaxScaler()

for dim in np.power(2, np.arange(3, MAX_POWER+1)):
    print(f'nodes: {dim}')
    aucs = []
    kappas = []

    # Prepare training batches
    model_input = []
    model_target = []
    for i, t in zip(input_data, target_data):
        model_input.append(np.concatenate([np.zeros((PADDING - 1, i.shape[1])), i])[:PADDING,:])
        model_target.append(np.argmax(t))
    model_input = np.array(model_input)
    model_target = np.array(model_target)

    # Prepare k-fold training and test sets
    for train_index, test_index in skf.split(model_input, model_target):
        model_target[train_index.astype(int)]
        X_train_raw, X_test_raw = model_input[train_index], model_input[test_index]
        y_train_raw, y_test_raw = model_target[train_index], model_target[test_index]

        X_train = np.stack(X_train_raw)
        X_test = np.stack(X_test_raw)

        y_train = np.zeros((y_train_raw.size, y_train_raw.max() + 1))
        y_train[np.arange(y_train_raw.size),y_train_raw] = 1
        y_test = np.zeros((y_test_raw.size, y_test_raw.max() + 1))
        y_test[np.arange(y_test_raw.size),y_test_raw] = 1

        # Make the resnet
        x, y = build_resnet(X_train.shape[1:], dim, target_data[0].size)
        model = Model(inputs=x, outputs=y)
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        es = EarlyStopping(monitor='val_loss', patience=5, min_delta=0, restore_best_weights=True) 
        model.fit(X_train, y_train, epochs=1000, validation_split=0.25, callbacks=[es], verbose=True)

        # Get the average auc and kappa for all affects and folds
        y_pred = model.predict(X_test, batch_size=1)
        for y_t, y_p in zip(y_test.T, y_pred.T):
            y_p = mms.fit_transform(y_p.reshape(-1, 1))
            aucs.append(roc_auc_score(y_t, y_p))
            kappas.append(cohen_kappa_score(y_t, np.around(y_p)))

    dimention.append(dim)
    auc.append(np.mean(aucs))
    kappa.append(np.mean(kappas))
    print(f'auc: {auc[-1]}')
    print(f'kappa: {kappa[-1]}')

nodes: 8

Train on 1854 samples, validate on 618 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Train on 1854 samples, validate on 619 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Train on 1856 samples, validate on 619 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000


Train on 1857 samples, validate on 619 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Train on 1857 samples, validate on 619 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000


NameError: name 'layers' is not defined

In [None]:
plt.figure()
plt.plot(dimention[i*(MAX_POWER-2):i*(MAX_POWER-2)+MAX_POWER-2], auc[i*(MAX_POWER-2):i*(MAX_POWER-2)+MAX_POWER-2], marker='.')
plt.xlabel('Projected Dimensions')
plt.ylabel('ROC AUC')
plt.title('1d ResNet')
plt.legend()
plt.show()

plt.figure()
plt.plot(dimention[i*(MAX_POWER-2):i*(MAX_POWER-2)+MAX_POWER-2], kappa[i*(MAX_POWER-2):i*(MAX_POWER-2)+MAX_POWER-2], marker='.')
plt.xlabel('1dCNN w/ scaled softmax output')
plt.ylabel('Cohen\'s Kappa')
plt.title('1d ResNet')
plt.legend()
plt.show()