## Neural Network Model

Use 1D-KNN

Note: This code have to be run after preprocessing.ipynb

In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import numpy as np
import pandas as pd
import os
import pickle
from pathlib import Path
from keras import backend as K

Using TensorFlow backend.


In [2]:
from keras.models import Sequential
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import GlobalAveragePooling1D
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Dense
from keras import regularizers
from keras.callbacks import ReduceLROnPlateau
from sklearn.utils import class_weight
from keras.layers import LeakyReLU

In [3]:
trainset = pickle.load(open("./trainset.p",'rb'))
testset = pickle.load(open("./testset.p",'rb'))

In [4]:
USE_KFOLD = True
testset.groupby('stripPVC').count()

Unnamed: 0_level_0,R_interval_mean,R_interval_sd,age,annotation,gender,id,lead,p2p_mean,p2p_std,peak,strip,p2p_diff_form_norm
stripPVC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
False,937,937,937,937,937,937,937,937,937,937,937,937
True,674,674,674,674,674,674,674,674,674,674,674,674


In [5]:
xtrain = trainset['strip']
ytrain = [1 if i == True else 0 for i in trainset['stripPVC']]
xtest = testset['strip']
ytest = [1 if i == True else 0  for i in testset['stripPVC']]

In [6]:
# convert

xtrain = np.stack(xtrain)
xtrain = np.reshape(xtrain, (xtrain.shape[0], xtrain.shape[1], 1))
xtest = np.stack(xtest)
xtest = np.reshape(xtest, (xtest.shape[0], xtest.shape[1], 1))
xtrain.shape

(2864, 3600, 1)

### 1D-CNN

In [7]:
def sensitivity(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    return true_positives / (possible_positives + K.epsilon())
def specificity(y_true, y_pred):
    true_negatives = K.sum(K.round(K.clip((1-y_true) * (1-y_pred), 0, 1)))
    possible_negatives = K.sum(K.round(K.clip(1-y_true, 0, 1)))
    return true_negatives / (possible_negatives + K.epsilon())
def CNN_2(input_shape, blocks = 2, dropout = 0, regularization = 0.001, kernel_size = 16):  
    classifier = Sequential()
    classifier.add(Conv1D(filters = 100, kernel_size = kernel_size, activation = 'relu', input_shape = (input_shape, 1)))
    classifier.add(Dropout(dropout))
    classifier.add(Conv1D(filters = 160, kernel_size = kernel_size, activation = 'relu'))
    classifier.add(MaxPooling1D(pool_size = 2))
    classifier.add(LeakyReLU())
    classifier.add(LeakyReLU())
    classifier.add(Dropout(dropout))
    classifier.add(GlobalAveragePooling1D())
    classifier.add((Dense(1, activation='sigmoid')))
   
    
 
    
    classifier.compile(optimizer = 'adam',
                       loss = 'binary_crossentropy',
                       metrics = [sensitivity,specificity,'accuracy'])
    return classifier

In [8]:
MODEL_NUMBER = '1'
MODEL = CNN_2
N_SPLITS = 5
BATCH_SIZE = 10
EPOCHS = 10 

In [9]:
params = { 'blocks': 5, 'dropout': 0, 'regularization': 0, 'kernel_size': 16, 'reduceLR': True}
classifier = MODEL(input_shape=np.shape(xtrain)[1], blocks = params['blocks'], dropout=params['dropout'], regularization=params['regularization'])
callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.001)]

class_weights = class_weight.compute_class_weight('balanced',
                                                  np.unique(ytrain),
                                                  ytrain)
history = classifier.fit(xtrain,
                         ytrain,
                         batch_size = BATCH_SIZE,
                         epochs = EPOCHS,
                         validation_split=0.2,
                         class_weight = class_weights,
                         callbacks = callbacks
                        )

Train on 2291 samples, validate on 573 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
trainpred = classifier.predict_classes(xtrain)
testpred = classifier.predict_classes(xtest)

In [34]:
def metric(pred, real):
    from sklearn.metrics import confusion_matrix
    from sklearn.metrics import roc_auc_score
    ConfusionMatrix= pd.DataFrame(confusion_matrix(pred,real), columns=['True','False'], index=['True','False'])
    print(ConfusionMatrix)
    print('----------------')
    print('Precision:', ConfusionMatrix.iloc[0,0] / (ConfusionMatrix.iloc[0,0] + ConfusionMatrix.iloc[1,0]))
    print('Recall:', ConfusionMatrix.iloc[0,0] / (ConfusionMatrix.iloc[0,0] + ConfusionMatrix.iloc[1,1]))
    print('Accuracy:', (ConfusionMatrix.iloc[0,0] + ConfusionMatrix.iloc[1,1]) / (ConfusionMatrix.iloc[0,0] + ConfusionMatrix.iloc[1,0] + ConfusionMatrix.iloc[0,1] + ConfusionMatrix.iloc[1,1]))
    print('AUC:', roc_auc_score(pred,real))

In [11]:
metric([t[0] for t in testpred], ytest)


### Integrate with randomforest

In [12]:
from sklearn import preprocessing

col = trainset.drop(['peak','annotation','strip','stripPVC','id','lead'],axis=1).columns.values
scale = preprocessing.StandardScaler()
scale.fit(trainset.drop(['peak','annotation','strip','stripPVC','id','lead'], axis=1))
trainset['cnnProb'] = trainpred
testset['cnnProb'] = testpred
n_trainset = pd.DataFrame(scale.transform(trainset.drop(['peak','annotation','strip','stripPVC','id','lead'], axis=1)),columns=col)
n_testset = pd.DataFrame(scale.transform(testset.drop(['peak','annotation','strip','stripPVC','id','lead'], axis=1)),columns=col)
