In [1]:
import keras
from keras.layers import Input, Dense, Dropout
from keras.models import Model, load_model
from keras.layers.merge import concatenate
from keras.utils import to_categorical, plot_model
from keras.utils.vis_utils import model_to_dot
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import scale
from sklearn.metrics import confusion_matrix
import pandas as pd
import numpy as np
import os
import pydot as pyd
import seaborn as sb
import matplotlib.pyplot as plt

keras.utils.vis_utils.pydot = pyd

Using TensorFlow backend.


In [2]:
def base_network(net_input):
    output = Dense(3, activation='relu')(net_input)
    return output

In [3]:
def extra_network(net_input, hidden1_filter_num = 32, hidden2_filter_num = 64, hidden3_filter_num = 64, output_num=1):
    x = Dense(hidden1_filter_num, activation='relu')(net_input)
    x = Dense(hidden2_filter_num, activation='relu')(x)
    x = Dense(hidden3_filter_num, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(output_num, activation='relu')(x)
    return x

In [4]:
input_shape = (7,)

input1 = Input(input_shape, name = 'input1')
input2 = Input(input_shape, name = 'input2')
input3 = Input(input_shape, name = 'input3')
input4 = Input(input_shape, name = 'input4')
input5 = Input(input_shape, name = 'input5')
input6 = Input(input_shape, name = 'input6')
input7 = Input(input_shape, name = 'input7')
input8 = Input(input_shape, name = 'input8')

net1 = base_network(input1)
net2 = base_network(input2)
net3 = base_network(input3)
net4 = base_network(input4)
net5 = base_network(input5)
net6 = base_network(input6)
net7 = base_network(input7)
net8 = base_network(input8)

merged = concatenate([net1, net2, net3, net4, net5, net6, net7, net8])

net_output = extra_network(merged, 32, 64, 64, 2)





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [5]:
model = Model(inputs = [input1, input2, input3, input4, input5, input6, input7, input8], outputs = net_output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()



__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input1 (InputLayer)             (None, 7)            0                                            
__________________________________________________________________________________________________
input2 (InputLayer)             (None, 7)            0                                            
__________________________________________________________________________________________________
input3 (InputLayer)             (None, 7)            0                                            
__________________________________________________________________________________________________
input4 (InputLayer)             (None, 7)            0                                            
__________________________________________________________________________________________________
input5 (

In [6]:
from contextlib import redirect_stdout
with open('model_summary.txt', 'w') as f:
    with redirect_stdout(f):
        model.summary()

In [7]:
label_path = './AMLTraining.csv'
all_label = pd.read_csv(label_path)
files = os.listdir('./CSV/')

take_normal_data = 1000
take_aml_data = 7000
x1 = pd.read_csv('./CSV/' + files[0]).iloc[:take_normal_data]
x2 = pd.read_csv('./CSV/' + files[1]).iloc[:take_normal_data]
x3 = pd.read_csv('./CSV/' + files[2]).iloc[:take_normal_data]
x4 = pd.read_csv('./CSV/' + files[3]).iloc[:take_normal_data]
x5 = pd.read_csv('./CSV/' + files[4]).iloc[:take_normal_data]
x6 = pd.read_csv('./CSV/' + files[5]).iloc[:take_normal_data]
x7 = pd.read_csv('./CSV/' + files[6]).iloc[:take_normal_data]
x8 = pd.read_csv('./CSV/' + files[7]).iloc[:take_normal_data]
y = all_label.iloc[:1]['Label']
for i in range(take_normal_data - 1):
    y = pd.concat([y, all_label.iloc[0:1]['Label']])
for file_num, each_file in enumerate(files):
    if file_num >= 8:
        if all_label.iloc[file_num]['Label'] == 'normal':
                take_data = take_normal_data
        elif all_label.iloc[file_num]['Label'] == 'aml':
            take_data = take_aml_data
        else:
            continue
        if file_num % 8 == 0:
            x1 = pd.concat([x1, pd.read_csv('./CSV/' + each_file)[:take_data]])
            for i in range(take_data):
                y = pd.concat([y, all_label.iloc[file_num:file_num+1]['Label']])
        if file_num % 8 == 1:
            x2 = pd.concat([x2, pd.read_csv('./CSV/' + each_file)[:take_data]])
        if file_num % 8 == 2:
            x3 = pd.concat([x3, pd.read_csv('./CSV/' + each_file)[:take_data]])
        if file_num % 8 == 3:
            x4 = pd.concat([x4, pd.read_csv('./CSV/' + each_file)[:take_data]])
        if file_num % 8 == 4:
            x5 = pd.concat([x5, pd.read_csv('./CSV/' + each_file)[:take_data]])
        if file_num % 8 == 5:
            x6 = pd.concat([x6, pd.read_csv('./CSV/' + each_file)[:take_data]])
        if file_num % 8 == 6:
            x7 = pd.concat([x7, pd.read_csv('./CSV/' + each_file)[:take_data]])
        if file_num % 8 == 7:
            x8 = pd.concat([x8, pd.read_csv('./CSV/' + each_file)[:take_data]])

x1['FS Lin'] = scale(x1['FS Lin'])
x2['FS Lin'] = scale(x2['FS Lin'])
x3['FS Lin'] = scale(x3['FS Lin'])
x4['FS Lin'] = scale(x4['FS Lin'])
x5['FS Lin'] = scale(x5['FS Lin'])
x6['FS Lin'] = scale(x6['FS Lin'])
x7['FS Lin'] = scale(x7['FS Lin'])
x8['FS Lin'] = scale(x8['FS Lin'])

# x1.to_csv('x1.csv', index = False)
# x2.to_csv('x2.csv', index = False)
# x3.to_csv('x3.csv', index = False)
# x4.to_csv('x4.csv', index = False)
# x5.to_csv('x5.csv', index = False)
# x6.to_csv('x6.csv', index = False)
# x7.to_csv('x7.csv', index = False)
# x8.to_csv('x8.csv', index = False)
# y.to_csv('y.csv', index = False, header = True)

le = LabelEncoder()
y = le.fit_transform(y)
y_binary = to_categorical(y)

In [8]:
# x1 = pd.read_csv('./x1.csv')
# x2 = pd.read_csv('./x2.csv')
# x3 = pd.read_csv('./x3.csv')
# x4 = pd.read_csv('./x4.csv')
# x5 = pd.read_csv('./x5.csv')
# x6 = pd.read_csv('./x6.csv')
# x7 = pd.read_csv('./x7.csv')
# x8 = pd.read_csv('./x8.csv')
# y = pd.read_csv('./y.csv')
# le = LabelEncoder()
# y = le.fit_transform(y)
# y_binary = to_categorical(y)

In [9]:
model.fit([x1, x2, x3, x4, x5, x6, x7, x8], y=y_binary, epochs=25, batch_size=32, validation_split=0.1, shuffle=True)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Train on 285300 samples, validate on 31700 samples
Epoch 1/25





Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x2420630ee88>

In [10]:
model.save_weights('model_weights.h5')

In [11]:
# Save model
model.save('model.h5')

In [12]:
plot_model(model, to_file='model.png')

In [13]:
model = load_model('model.h5')
result = model.predict({'input1': x1, 'input2': x2, 'input3': x3, 'input4': x4, \
                        'input5': x5, 'input6': x6, 'input7': x7, 'input8': x8})
prediction = []
for i in range(len(result)):
    if result[i][0] > result[i][1]:
        prediction.append(0)
    else:
        prediction.append(1)

In [15]:
conf_mat = confusion_matrix(y, np.array(prediction))
row_labels = ['Truth: normal', 'Truth: AML']
column_labels = ['Pred: normal', 'Pred: AML']
df = pd.DataFrame(conf_mat, columns=column_labels, index=row_labels)
TN = conf_mat[0][0]/(np.sum(conf_mat))
FP = conf_mat[0][1]/(np.sum(conf_mat))
FN = conf_mat[1][0]/(np.sum(conf_mat))
TP = conf_mat[1][1]/(np.sum(conf_mat))
accuracy = (TN + TP)/(TN + FP + FN + TP)
error = 1 - accuracy
sensitivity = TP/(TP + FN)
specificity = TN/(FP + TN)
false_alarm = 1 - specificity
precision = TP/(TP + FP)
recall = TP/(TP + FN)
FDR = 1 - precision
F_measure = (2*TP)/(2*TP + FP + FN)
balanced_accuracy = (sensitivity + specificity)/2
MCC = (TP*TN - FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
print(df)
print('Accuracy: ' + str(accuracy))
print('F-measure:' + str(F_measure))
print('Balanced accuracy: ' + str(balanced_accuracy))
print('MCC: ' + str(MCC))

               Pred: normal  Pred: AML
Truth: normal        146979      14021
Truth: AML             6196     149804
Accuracy: 0.9362239747634069
F-measure:0.9367873055577269
Balanced accuracy: 0.9365975473801561
MCC: 0.8735796160689991
