The code has three parts:
Part A: coarse model training, all models trained on the large-scale Chexpert lung segmented image dataset

In [None]:
#load libraries
from keras.models import Sequential, Model, Input, load_model
from keras.layers import Conv2D, Dense, MaxPooling2D, SeparableConv2D, BatchNormalization, ZeroPadding2D, GlobalAveragePooling2D,Flatten,Average, Dropout
import time
import statistics
from keras import applications
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import classification_report,confusion_matrix, roc_curve, auc, accuracy_score, log_loss
import matplotlib.pyplot as plt
from keras.callbacks import ModelCheckpoint
import scikitplot as skplt
from itertools import cycle
from sklearn.utils import class_weight
from keras.models import load_model, Model, Sequential, Input
import numpy as np
import itertools
from keras.utils import plot_model, to_categorical
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.inception_v3 import InceptionV3
from keras.applications.xception import Xception
from keras.applications.densenet import DenseNet121
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, SGD
from keras.utils import to_categorical
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import label_binarize
from scipy import interp
%matplotlib inline

In [None]:
#important note: The size of the batch size plays an important role. 
#check whether the number of train, validation and test samples are
#absolutely divisible by the batch size. if not, make sure to add 1
# (+1) while fitting, evaluting and testing like this: do not use workers=1
#reset the generators everytime before using them otherwise you will
#get wierd results
#example:
#train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
#validation_steps=nb_validation_samples // batch_size + 1, verbose=1 
#scorecustom = custom_model.evaluate_generator(validation_generator, nb_validation_samples // batch_size + 1, verbose = 1)
#custom_y_pred = custom_model.predict_generator(test_generator, nb_test_samples//batch_size + 1, verbose=1)

#%% Loading the training data
#image dimensions and loading
img_width, img_height = 256, 256
train_data_dir = 'C:/Users/rajaramans2/codes/omsakthi_ensemble_visualization_kaggle/chexpert/data_binary_256/train/'
test_data_dir = 'C:/Users/rajaramans2/codes/omsakthi_ensemble_visualization_kaggle/chexpert/data_binary_256/test/'
epochs = 30
batch_size = 8 
num_classes= 2

# Since the models work with the data of the same shape, we 
#define a single input layer that will be used by every model.

input_shape = (img_width, img_height, 3)
model_input = Input(shape=input_shape)
print(model_input) 

In [None]:
#define custom confusion matrix function

def plot_confusion_matrix(cm, classes,
                          normalize=False, #if true all values in confusion matrix is between 0 and 1
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
#%%declaring image data generators, make sure to delcare shuffle=False

datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.1) #90/10, no augmentation except rescaling

train_generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size, class_mode='categorical', subset = 'training')

validation_generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size, class_mode='categorical', subset = 'validation')

test_generator = test_datagen.flow_from_directory(
        test_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical',shuffle=False)

#identify the number of samples
nb_train_samples = len(train_generator.filenames)
nb_validation_samples = len(validation_generator.filenames)
nb_test_samples = len(test_generator.filenames)

#check the class indices
print(train_generator.class_indices)
print(validation_generator.class_indices)
print(test_generator.class_indices)

#true labels
Y_test=test_generator.classes
print(Y_test.shape)

#convert test labels to categorical
Y_test1=to_categorical(Y_test, num_classes=num_classes, dtype='float32')
print(Y_test1.shape)

In [None]:
#%% assign class weights to balance model training and penalize over-represented classes

class_weights = class_weight.compute_class_weight(
               'balanced',
                np.unique(train_generator.classes), 
                train_generator.classes)
print(class_weights)

In [None]:
#%% define custom model architecture 

def custom_cnn(model_input):
    x = SeparableConv2D(64, (5, 5), padding='same', activation='relu')(model_input)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2,2))(x)
    
    x = SeparableConv2D(128, (5, 5), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2,2))(x)
    
    x = SeparableConv2D(256, (5, 5), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2,2))(x)
    
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    x = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=model_input, outputs=x, name='custom_cnn')
    return model

#instantiate the model
custom_model = custom_cnn(model_input)

#display model summary
custom_model.summary()

#plot the model
plot_model(custom_model, to_file='custom_model.png',show_shapes=True, show_layer_names=False)

In [None]:
#%% VGG16 model 

def vgg16_cnn(model_input):
    vgg16_cnn = VGG16(weights='imagenet', include_top=False, input_tensor=model_input)
    x = vgg16_cnn.output
    x = ZeroPadding2D(padding=(1, 1))(x)
    x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_vgg16')(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=vgg16_cnn.input, outputs=predictions, name='vgg16_custom')
    return model

#instantiate the model
vgg16_custom_model = vgg16_cnn(model_input)

#display model summary
vgg16_custom_model.summary()

#plot the model
plot_model(vgg16_custom_model, to_file='vgg16_custom_model.png',show_shapes=True, show_layer_names=False)


In [None]:
#%% VGG19 model 

def vgg19_cnn(model_input):
    vgg19_cnn = VGG19(weights='imagenet', include_top=False, input_tensor=model_input)
    x = vgg19_cnn.output
    x = ZeroPadding2D(padding=(1, 1))(x)
    x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_vgg19')(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=vgg19_cnn.input, outputs=predictions, name='vgg19_custom')
    return model

#instantiate the model
vgg19_custom_model = vgg19_cnn(model_input)

#display model summary
vgg19_custom_model.summary()

#plot the model
plot_model(vgg19_custom_model, to_file='vgg19_custom_model.png',show_shapes=True, show_layer_names=False)


In [None]:
#%% Inception-V3 model

def inceptionv3_cnn(model_input):
    inceptionv3_cnn = InceptionV3(weights='imagenet', include_top=False, input_tensor=model_input)
    x = inceptionv3_cnn.output
    x = ZeroPadding2D(padding=(1, 1))(x)
    x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_inceptionv3')(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=inceptionv3_cnn.input, outputs=predictions, name='inceptionv3_custom')
    return model

#instantiate the model
inceptionv3_custom_model = inceptionv3_cnn(model_input)

#display model summary
inceptionv3_custom_model.summary()      

#plot model
plot_model(inceptionv3_custom_model, to_file='inceptionv3_custom_model.png',show_shapes=True, show_layer_names=False)

In [None]:
#%% Xception model 

def xception_cnn(model_input):
    xception_cnn = Xception(weights='imagenet', include_top=False, input_tensor=model_input)
    x = xception_cnn.output
    x = ZeroPadding2D(padding=(1, 1))(x)
    x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_xception')(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=xception_cnn.input, outputs=predictions, name='xception_custom')
    return model

#instantiate the model
xception_custom_model = xception_cnn(model_input)

#plot model summary
xception_custom_model.summary()
plot_model(xception_custom_model, to_file='xception_custom_model.png',show_shapes=True, show_layer_names=False)

In [None]:
#%% DenseNet121 model

def densenet_cnn(model_input):
    densenet_cnn = DenseNet121(weights='imagenet', include_top=False, input_tensor=model_input)
    x = densenet_cnn.output
    x = ZeroPadding2D(padding=(1, 1))(x)
    x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_densenet')(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=densenet_cnn.input, outputs=predictions, name='densenet121_custom')
    return model

#instantiate the model
densenet_custom_model = densenet_cnn(model_input)

#display model summary
densenet_custom_model.summary()

#plot model
plot_model(densenet_custom_model, to_file='densenet_custom_model.png',show_shapes=True, show_layer_names=False)

In [None]:
#%% MobileNet model

def mobile_cnn(model_input):
    mobile_cnn = applications.mobilenet.MobileNet(weights='imagenet', include_top=False, input_tensor=model_input)
    x = mobile_cnn.output
    x = ZeroPadding2D(padding=(1, 1))(x)
    x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_mobilenet')(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=mobile_cnn.input, outputs=predictions, name='mobile_custom')
    return model

#instantiate the model
mobile_custom_model = mobile_cnn(model_input)

#display model summary
mobile_custom_model.summary()

#plot model
plot_model(mobile_custom_model, to_file='mobile_custom_model.png',show_shapes=True, show_layer_names=False)

In [None]:
#%% NasNet Mobile model

def NASNET_cnn(model_input):
    NASNET_cnn = applications.nasnet.NASNetMobile(weights='imagenet', 
                                                    include_top=False, input_tensor=model_input)
    x = NASNET_cnn.output
    x = ZeroPadding2D(padding=(1, 1))(x)
    x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_NASNET')(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=NASNET_cnn.input, outputs=predictions, name='NASNET_custom')
    return model

#instantiate the model
nasnet_custom_model = NASNET_cnn(model_input)

#display model summary
nasnet_custom_model.summary() 

#plot model
plot_model(nasnet_custom_model, to_file='nasnet_custom_model.png',show_shapes=True, show_layer_names=False)

In [None]:
#compile and train the custom model on the Chexpert data. Check if the train and
#validation data are absolutely divisible by batch size.
#Repeat for other pretrained models as well.

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
custom_model.compile(optimizer=sgd,loss='categorical_crossentropy',metrics=['accuracy']) 

filepath = 'weights/' + custom_model.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)

callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = custom_model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("custom_coarse_model.png")

In [None]:
#%% compile and train the VGG16 custom model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.95, nesterov=True)  
vgg16_custom_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + vgg16_custom_model.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = vgg16_custom_model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("vgg16_custom_coarse_model.png")

In [None]:
#%% compile and train the VGG19 custom model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.95, nesterov=True)  
vgg19_custom_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + vgg19_custom_model.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = vgg19_custom_model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("vgg19_custom_coarse_model.png")

In [None]:
#%% compile and train the InceptionV3 custom model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.95, nesterov=True)  
inceptionv3_custom_model.compile(optimizer=sgd,loss='categorical_crossentropy',metrics=['accuracy']) 
filepath = 'weights/' + inceptionv3_custom_model.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = inceptionv3_custom_model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("InceptionV3_custom_coarse_model.png")

In [None]:
#%% compile and train the Xception custom model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.95, nesterov=True)  
xception_custom_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + xception_custom_model.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = xception_custom_model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("Xception_custom_coarse_model.png")

In [None]:
#%% compile and train the DenseNet121 custom model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.95, nesterov=True)  
densenet_custom_model.compile(optimizer=sgd,loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + densenet_custom_model.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)

tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = densenet_custom_model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("DenseNet_custom_coarse_model.png")

In [None]:
#%% compile and train the Mobilenet custom model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.95, nesterov=True)  
mobile_custom_model.compile(optimizer=sgd,loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + mobile_custom_model.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)

tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = mobile_custom_model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("Mobilenet_custom_coarse_model.png")

In [None]:
#compile and train the NASNET custom model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.95, nesterov=True)
nasnet_custom_model.compile(optimizer=sgd,loss='categorical_crossentropy',metrics=['accuracy']) 

filepath = 'weights/' + nasnet_custom_model.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)

callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = nasnet_custom_model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("nasnet_custom_coarse_model.png")

Model Evaluation: Evaluate the CheXpert Trained models on the test data

In [None]:
#Evaluate the models by loading the best weights
custom_model.load_weights('weights/custom_cnn.06-0.8891.h5')
custom_model.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
custom_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
custom_y_pred = custom_model.predict_generator(test_generator,
                                        nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix
import itertools

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1),
                                    normalize=False, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#Evaluate the VGG16 model by loading the best weights
vgg16_custom_model.load_weights('weights/vgg16_custom.10-0.9209.h5')
vgg16_custom_model.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
vgg16_custom_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
vgg16_custom_y_pred = vgg16_custom_model.predict_generator(test_generator,
                                        nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix
import itertools

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1),
                                    normalize=False, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,vgg16_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#Evaluate the VGG19 model by loading the best weights
vgg19_custom_model.load_weights('weights/vgg19_custom.05-0.9197.h5')
vgg19_custom_model.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
vgg19_custom_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
vgg19_custom_y_pred = vgg19_custom_model.predict_generator(test_generator,
                                        nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix
import itertools

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1),
                                    normalize=False, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,vgg19_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#Evaluate the Inception-V3 model by loading the best weights
inceptionv3_custom_model.load_weights('weights/inceptionv3_custom.10-0.9179.h5')
inceptionv3_custom_model.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
inceptionv3_custom_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
inceptionv3_custom_y_pred = inceptionv3_custom_model.predict_generator(test_generator,
                                        nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix
import itertools

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1),
                                    normalize=False, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,inceptionv3_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#Evaluate the Xception model by loading the best weights
xception_custom_model.load_weights('weights/xception_custom.02-0.9167.h5')
xception_custom_model.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
xception_custom_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
xception_custom_y_pred = xception_custom_model.predict_generator(test_generator,
                                        nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix
import itertools

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1),
                                    normalize=False, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,xception_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#Evaluate the Densenet-121 model by loading the best weights
densenet_custom_model.load_weights('weights/densenet121_custom.01-0.9155.h5')
densenet_custom_model.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
densenet_custom_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
densenet_custom_y_pred = densenet_custom_model.predict_generator(test_generator,
                                        nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix
import itertools

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1),
                                    normalize=False, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,densenet_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#Evaluate the MobileNet model by loading the best weights
mobile_custom_model.load_weights('weights/mobile_custom.03-0.9172.h5')
mobile_custom_model.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
mobile_custom_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
mobile_custom_y_pred = mobile_custom_model.predict_generator(test_generator,
                                        nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix
import itertools

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1),
                                    normalize=False, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,mobile_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#Evaluate the NasNet Mobile model by loading the best weights
nasnet_custom_model.load_weights('weights/NASNET_custom.05-0.9178.h5')
nasnet_custom_model.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
nasnet_custom_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
nasnet_custom_y_pred = nasnet_custom_model.predict_generator(test_generator,
                                        nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix
import itertools

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1),
                                    normalize=False, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,nasnet_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

Part B:

By this time, all the custom and pretrained models are entirely retrained on the large-scale ChexPert data. The best model are stored. The models are reloaded and truncated at the intermediate layers (determined empirically) that gave the best performance on the Kaggle Pneumonia dataset. 

In [None]:
#load the Kaggle abnormality classifier data

img_width, img_height = 256, 256
num_classes = 2
train_data_dir = 'C:/abnormality_classifier_binary/abnormality_aug/train'
test_data_dir = 'C:/abnormality_classifier_binary/abnormality_aug/test'
epochs = 30
batch_size = 8 

# Since the models work with the data of the same shape, we 
#define a single input layer that will be used by every model.

input_shape = (img_width, img_height, 3)
model_input = Input(shape=input_shape)
print(model_input) 

In [None]:
#%%declaring image data generators, make sure to delcare shuffle=False

datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.1) #90/10, no augmentation except rescaling

train_generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size, class_mode='categorical', subset = 'training')

validation_generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size, class_mode='categorical', subset = 'validation')

test_generator = test_datagen.flow_from_directory(
        test_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical',shuffle=False)

#identify the number of samples
nb_train_samples = len(train_generator.filenames)
nb_validation_samples = len(validation_generator.filenames)
nb_test_samples = len(test_generator.filenames)

#check the class indices
print(train_generator.class_indices)
print(validation_generator.class_indices)
print(test_generator.class_indices)

#true labels
Y_test=test_generator.classes
print(Y_test.shape)

#convert test labels to categorical
Y_test1=to_categorical(Y_test, num_classes=num_classes, dtype='float32')
print(Y_test1.shape)

In [None]:
#%% assign class weights to balance model training and penalize over-represented classes

class_weights = class_weight.compute_class_weight(
               'balanced',
                np.unique(train_generator.classes), 
                train_generator.classes)
print(class_weights)

In [None]:
custom_model.load_weights('weights/custom_cnn.06-0.8891.h5')
custom_model.summary()
base_model_custom=Model(inputs=custom_model.input,outputs=custom_model.get_layer('separable_conv2d_3').output)

#addind the top layers
x = base_model_custom.output
x = ZeroPadding2D(padding=(1, 1))(x)
x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_custom')(x)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax', name='predictions')(x)
model_custom = Model(inputs=base_model_custom.input, outputs=predictions, name = 'custom_finetuned')
model_custom.summary()

In [None]:
#%% compile and train the model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_custom.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + model_custom.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = model_custom.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("weights/custom_coarse_to_fine.png")

In [None]:
#Evaluate the models by loading the best weights
model_custom.load_weights('weights/custom_finetuned.04-0.8442.h5')
model_custom.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_custom.compile(optimizer=sgd,loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
custom_y_pred = model_custom.predict_generator(test_generator, 
                                                           nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),custom_y_pred.argmax(axis=-1),
                                    normalize=True, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#VGG16 model
vgg16_custom_model.load_weights('weights/vgg16_custom.10-0.9209.h5')
vgg16_custom_model.summary()
base_model_vgg16=Model(inputs=vgg16_custom_model.input,outputs=vgg16_custom_model.get_layer('block5_conv3').output)
#addind the top layers
x = base_model_vgg16.output
x = ZeroPadding2D(padding=(1, 1))(x)
x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_vgg16')(x)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax', name='predictions')(x)
model_vgg16 = Model(inputs=base_model_vgg16.input, outputs=predictions, name = 'vgg16_finetuned')
model_vgg16.summary()

In [None]:
#%% compile and train the model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_vgg16.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + model_vgg16.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = model_vgg16.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("weights/vgg16_coarse_to_fine.png")

In [None]:
#Evaluate the models by loading the best weights
model_vgg16.load_weights('weights/vgg16_finetuned.06-0.8946.h5')
model_vgg16.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_vgg16.compile(optimizer=sgd,loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
vgg16_custom_y_pred = model_vgg16.predict_generator(test_generator, 
                                                           nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),vgg16_custom_y_pred.argmax(axis=-1),
                                    normalize=True, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,vgg16_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#VGG19 model
vgg19_custom_model.load_weights('weights/vgg19_custom.05-0.9197.h5')
vgg19_custom_model.summary()
base_model_vgg19=Model(inputs=vgg19_custom_model.input,outputs=vgg19_custom_model.get_layer('block4_pool').output)
#addind the top layers
x = base_model_vgg16.output
x = ZeroPadding2D(padding=(1, 1))(x)
x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_vgg19')(x)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax', name='predictions')(x)
model_vgg19 = Model(inputs=base_model_vgg19.input, outputs=predictions, name = 'vgg19_finetuned')
model_vgg19.summary()

In [None]:
#%% compile and train the model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_vgg19.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + model_vgg19.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = model_vgg19.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("weights/vgg19_coarse_to_fine.png")

In [None]:
#Evaluate the models by loading the best weights
model_vgg19.load_weights('weights/vgg19_finetuned.02-0.8921.h5')
model_vgg19.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_vgg19.compile(optimizer=sgd,loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
vgg19_custom_y_pred = model_vgg19.predict_generator(test_generator, 
                                                           nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),vgg19_custom_y_pred.argmax(axis=-1),
                                    normalize=True, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,vgg19_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#Inception-v3 model
inceptionv3_custom_model.load_weights('weights/inceptionv3_custom.10-0.9179.h5')
inceptionv3_custom_model.summary()
base_model_inceptionv3=Model(inputs=inceptionv3_custom_model.input,
                             outputs=inceptionv3_custom_model.get_layer('mixed3').output)
#addind the top layers
x = base_model_inceptionv3.output
x = ZeroPadding2D(padding=(1, 1))(x)
x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_inceptionv3')(x)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax', name='predictions')(x)
model_inceptionv3 = Model(inputs=base_model_inceptionv3.input, outputs=predictions, name = 'inceptionv3_finetuned')
model_inceptionv3.summary()

In [None]:
#%% compile and train the model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_inceptionv3.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + model_inceptionv3.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = model_inceptionv3.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("weights/inceptionv3_coarse_to_fine.png")

In [None]:
#Evaluate the Inception-V3 model by loading the best weights
model_inceptionv3.load_weights('weights/inceptionv3_finetuned.03-0.8821.h5')
model_inceptionv3.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_inceptionv3.compile(optimizer=sgd,loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
inceptionv3_custom_y_pred = model_inceptionv3.predict_generator(test_generator, 
                                                           nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),inceptionv3_custom_y_pred.argmax(axis=-1),
                                    normalize=True, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,inceptionv3_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#Xception model
xception_custom_model.load_weights('weights/xception_custom.02-0.9167.h5')
xception_custom_model.summary()
base_model_xception=Model(inputs=xception_custom_model.input,
                             outputs=xception_custom_model.get_layer('add_7').output)
#addind the top layers
x = base_model_xception.output
x = Activation('relu') (x)
x = ZeroPadding2D(padding=(1, 1))(x)
x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_xception')(x)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax', name='predictions')(x)
model_xception = Model(inputs=base_model_xception.input, outputs=predictions, name = 'xception_finetuned')
model_xception.summary()

In [None]:
#%% compile and train the model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_xception.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + model_xception.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = model_xception.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("weights/xception_coarse_to_fine.png")

In [None]:
#Evaluate the Inception-V3 model by loading the best weights
model_xception.load_weights('weights/xception_finetuned.08-0.8791.h5')
model_xception.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_xception.compile(optimizer=sgd,loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
xception_custom_y_pred = model_xception.predict_generator(test_generator, 
                                                           nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),xception_custom_y_pred.argmax(axis=-1),
                                    normalize=True, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,xception_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#load the best model weights: DenseNet121
densenet_custom_model.load_weights('weights/densenet121_custom.01-0.9155.h5'')
densenet_custom_model.summary()
base_model_densenet=Model(inputs=densenet_custom_model.input,outputs=densenet_custom_model.get_layer('pool3_conv').output)


#addind the top layers
x = base_model_densenet.output
x = MaxPooling2D(pool_size=(2, 2))(x)
x = ZeroPadding2D(padding=(1, 1))(x)
x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_densenet')(x)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax', name='predictions')(x)
model_densenet = Model(inputs=base_model_densenet.input, outputs=predictions, name = 'densenet_finetuned')
model_densenet.summary()

In [None]:
#%% compile and train the model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_densenet.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + model_densenet.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = model_densenet.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("weights/densenet_coarse_to_fine.png")

In [None]:
#Evaluate the models by loading the best weights
model_densenet.load_weights('weights/densenet_finetuned.06-0.8873.h5')
model_densenet.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_densenet.compile(optimizer=sgd,loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
densenet_custom_y_pred = model_densenet.predict_generator(test_generator, 
                                                           nb_test_samples // batch_size + 1, verbose=1)

In [None]:
#%% print classification report and plot confusion matrix

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot non-normalized confusion matrix
plt.figure(figsize=(20,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1),
                                    normalize=True, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

In [None]:
# evaluate the performance metrics

accuracy = accuracy_score(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),densenet_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,densenet_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#load the best model weights: MobileNet
mobile_custom_model.load_weights('weights/mobile_custom.03-0.9172.h5')
mobile_custom_model.summary()
base_model_mobile=Model(inputs=mobile_custom_model.input,outputs=mobile_custom_model.get_layer('conv_pw_6_relu').output)
#addind the top layers
x = base_model_mobile.output
x = ZeroPadding2D(padding=(1, 1))(x)
x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_mobile')(x)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax', name='predictions')(x)
model_mobile = Model(inputs=base_model_mobile.input, outputs=predictions, name = 'mobilenet_finetuned')
model_mobile.summary()


In [None]:
#%% compile and train the model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_mobile.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + model_mobile.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = model_mobile.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("weights/mobilenet_coarse_to_fine.png")

In [None]:
#Evaluate the models by loading the best weights: MobileNet
model_mobile.load_weights('weights/mobilenet_finetuned.07-0.8801.h5')
model_mobile.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_mobile.compile(optimizer=sgd,loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
mobile_custom_y_pred = model_mobile.predict_generator(test_generator, 
                                                           nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),mobile_custom_y_pred.argmax(axis=-1),
                                    normalize=True, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,mobile_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#load the best model weights: NasNet Mobile
nasnet_custom_model.load_weights('weights/NASNET_custom.05-0.9178.h5')
nasnet_custom_model.summary()
base_model_nasnet=Model(inputs=nasnet_custom_model.input,outputs=nasnet_custom_model.get_layer('activation_129').output)
#addind the top layers
x = base_model_nasnet.output
x = ZeroPadding2D(padding=(1, 1))(x)
x = Conv2D(1024, (3, 3), activation='relu', name='extra_conv_nasnet')(x)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax', name='predictions')(x)
model_nasnet = Model(inputs=base_model_nasnet.input, outputs=predictions, name = 'nasnet_finetuned')
model_nasnet.summary()

In [None]:
#%% compile and train the model

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_nasnet.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) 
filepath = 'weights/' + model_nasnet.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)
tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

#train the model
history = model_nasnet.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=epochs, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
N = epochs
plt.style.use("ggplot")
plt.figure(figsize=(20,10), dpi=300)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("weights/nasnet_coarse_to_fine.png")

In [None]:
#Evaluate the models by loading the best weights
model_nasnet.load_weights('weights/nasnet_finetuned.04-0.8740.h5')
model_nasnet.summary()
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)  
model_nasnet.compile(optimizer=sgd,loss='categorical_crossentropy', metrics=['accuracy']) 

#measure performance on test data, first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
nasnet_custom_y_pred = model_nasnet.predict_generator(test_generator, 
                                                           nb_test_samples // batch_size + 1, verbose=1)

In [None]:
accuracy = accuracy_score(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot normalized confusion matrix using scikit plot
skplt.metrics.plot_confusion_matrix(Y_test1.argmax(axis=-1),nasnet_custom_y_pred.argmax(axis=-1),
                                    normalize=True, x_tick_rotation=45, figsize=(20,10),
                                    title_fontsize='large', text_fontsize='medium')
plt.show()

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=300)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values
skplt.metrics.plot_roc(Y_test,nasnet_custom_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

Part - C: Now that the models are finetuned on the kaggle pneumonia data and the best models are stored, we further take benefit of the Ensembles to improve the performance than any individual constituent model.The best models trained on the abnormality data are ensembled using several ensemble learning strategies including majority voting, simple averaging, weighted averaging and stacked generalization. We kept the sequential CNN as the baseline and didnt use them in ensembles.

In [None]:
#lets do a dummy assignment of the predictions

vgg16_custom_y_pred1 = vgg16_custom_y_pred
vgg19_custom_y_pred1 = vgg19_custom_y_pred
xception_custom_y_pred1 = xception_custom_y_pred
inceptionv3_custom_y_pred1 = inceptionv3_custom_y_pred
densenet_custom_y_pred1 = densenet_custom_y_pred
nasnet_custom_y_pred1 = nasnet_custom_y_pred
mobile_custom_y_pred1 = mobile_custom_y_pred

#print the shape of the predictions
print("The shape of VGG16 custom model prediction vgg16_custom_y_pred is  = ", vgg16_custom_y_pred1.shape)
print("The shape of VGG19 custom model prediction vgg19_custom_y_pred is  = ", vgg19_custom_y_pred1.shape)
print("The shape of inceptionv3 custom model prediction inceptionv3_custom_y_pred is = ", inceptionv3_custom_y_pred1.shape)
print("The shape of densenet custom model prediction densenet_custom_y_pred is  = ", densenet_custom_y_pred1.shape)
print("The shape of Xception custom model prediction xception_custom_y_pred is = ", xception_custom_y_pred1.shape)
print("The shape of NASNET custom model prediction nasnet_custom_y_pred is  = ", nasnet_custom_y_pred1.shape)
print("The shape of MobileNet custom model prediction mobile_custom_y_pred is  = ", mobile_custom_y_pred1.shape)

## Max-Voting

In [None]:
vgg16_custom_y_pred1 = vgg16_custom_y_pred1.argmax(axis=-1)
print(vgg16_custom_y_pred1)
vgg19_custom_y_pred1 = vgg19_custom_y_pred1.argmax(axis=-1)
print(vgg19_custom_y_pred1)
inceptionv3_custom_y_pred1 = inceptionv3_custom_y_pred1.argmax(axis=-1)
print(inceptionv3_custom_y_pred1)
densenet_custom_y_pred1 = densenet_custom_y_pred1.argmax(axis=-1)
print(densenet_custom_y_pred1)
xception_custom_y_pred1 = xception_custom_y_pred1.argmax(axis=-1)
print(xception_custom_y_pred1)
nasnet_custom_y_pred1 = nasnet_custom_y_pred1.argmax(axis=-1)
print(nasnet_custom_y_pred1)
mobile_custom_y_pred1 = mobile_custom_y_pred1.argmax(axis=-1)
print(mobile_custom_y_pred1)

#max voting begins
max_voting_pred = np.array([])
for i in range(0,len(test_generator.filenames)):
    max_voting_pred = np.append(max_voting_pred, 
                                statistics.mode([vgg16_custom_y_pred1[i], vgg19_custom_y_pred1[i],
                                                 inceptionv3_custom_y_pred1[i], densenet_custom_y_pred1[i],
                                                 xception_custom_y_pred1[i],
                                                 nasnet_custom_y_pred1[i], mobile_custom_y_pred1[i],
                                                ]))
    
ensemble_model_max_voting_accuracy = accuracy_score(Y_test,max_voting_pred)
print("The max voting accuracy of the ensemble model is  = ", ensemble_model_max_voting_accuracy)

#save the predictions
np.savetxt('weights/max_voting_y_pred.csv',max_voting_pred,fmt='%i',delimiter = ",")

In [None]:
#plot confusion matrix

target_names = ['class 0(abnormal)', 'class 1(normal)'] 

#print classification report
print(classification_report(Y_test,max_voting_pred,target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test,max_voting_pred)
np.set_printoptions(precision=4)

# Plot non-normalized confusion matrix
plt.figure(figsize=(20,10), dpi=100)
plot_confusion_matrix(cnf_matrix, classes=target_names,
                      title='Confusion matrix for Max Voting ensemble without normalization')

plt.show()

In [None]:
#%% evaluate error

ensemble_model_maxvoting_mean_squared_error = mean_squared_error(Y_test,max_voting_pred)  
ensemble_model_maxvoting_mean_squared_log_error = mean_squared_log_error(Y_test,max_voting_pred)  
print("The max voting mean squared error of the ensemble model is  = ", ensemble_model_maxvoting_mean_squared_error)
print("The max voting mean squared log error of the ensemble model is  = ", ensemble_model_maxvoting_mean_squared_log_error)

## SIMPLE AVERAGING

In [None]:
#lets perform simple averaging of the predictions from individual models

average_pred=(inceptionv3_custom_y_pred + vgg19_custom_y_pred +
              densenet_custom_y_pred + xception_custom_y_pred +
              vgg16_custom_y_pred + nasnet_custom_y_pred +
              mobile_custom_y_pred)/7

#compute simple averaging accuracy
ensemble_model_averaging_accuracy = accuracy_score(Y_test,average_pred.argmax(axis=-1))
print("The averaging accuracy of the ensemble model is  = ", ensemble_model_averaging_accuracy)

In [None]:
#plot confusion matrix

target_names = ['class 0(abnormal)', 'class 1(normal)'] 

#print classification report
print(classification_report(Y_test,average_pred.argmax(axis=-1),target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test,average_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot non-normalized confusion matrix
plt.figure(figsize=(20,10), dpi=100)
plot_confusion_matrix(cnf_matrix, classes=target_names,
                      title='Confusion matrix for Average Ensemble without normalization')
plt.show()

#save the predictions
np.savetxt('weights/averaging_y_pred.csv',average_pred.argmax(axis=-1),fmt='%i',delimiter = ",")

In [None]:
#plot roc curves

skplt.metrics.plot_roc(Y_test,average_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#compute precision-recall curves

colors = cycle(['red', 'blue', 'green', 'cyan', 'teal'])

plt.figure(figsize=(15,10), dpi=100)
f_scores = np.linspace(0.2, 0.8, num=4)
lines = []
labels = []
for f_score in f_scores:
    x = np.linspace(0.01, 1)
    y = f_score * x / (2 * x - f_score)
    l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2)
    plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))
    
# For each class
precision = dict()
recall = dict()
average_precision = dict()
for i in range(num_classes):
    precision[i], recall[i], _ = precision_recall_curve(Y_test1[:, i],
                                                        average_pred[:, i])
    average_precision[i] = average_precision_score(Y_test1[:, i], average_pred[:, i])

# A "micro-average": quantifying score on all classes jointly
precision["micro"], recall["micro"], _ = precision_recall_curve(Y_test1.ravel(),
   average_pred.ravel())
average_precision["micro"] = average_precision_score(Y_test1, average_pred,
                                                     average="micro")
print('Average precision score, micro-averaged over all classes: {0:0.4f}'
      .format(average_precision["micro"]))

lines.append(l)
labels.append('iso-f1 curves')
l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2)
lines.append(l)
labels.append('micro-average Precision-recall (area = {0:0.4f})'
              ''.format(average_precision["micro"]))

for i, color in zip(range(num_classes), colors):
    l, = plt.plot(recall[i], precision[i], color=color, lw=2)
    lines.append(l)
    labels.append('Precision-recall for class {0} (area = {1:0.4f})'
                  ''.format(i, average_precision[i]))

fig = plt.gcf()
fig.subplots_adjust(bottom=0.05)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Extension of Precision-Recall curve to multi-class')
plt.legend(lines, labels, loc=(0, -.38), prop=dict(size=14))
plt.show()

In [None]:
# evaluate simple averaging error

ensemble_model_averaging_mean_squared_error = mean_squared_error(Y_test,average_pred.argmax(axis=-1))  
ensemble_model_averaging_mean_squared_log_error = mean_squared_log_error(Y_test,average_pred.argmax(axis=-1))  
print("The averaging mean squared error of the ensemble model is  = ", ensemble_model_averaging_mean_squared_error)
print("The averaging mean squared log error of the ensemble model is  = ", ensemble_model_averaging_mean_squared_log_error)

## Weighted Averaging

In [None]:
#assigning equal weights for top-2 models and equal weights for the other five models gave best results
#in this task, VGG16 got the best results, followed by VGG19. 
# so giving higher weightage to vgg16 and vgg19 (0.25) and equal weightages (0.1) to other models.

weighted_average_pred=(vgg16_custom_y_pred * 0.25 + vgg19_custom_y_pred * 0.25 +
                       inceptionv3_custom_y_pred * 0.1 + densenet_custom_y_pred * 0.1 + 
                       xception_custom_y_pred * 0.1 + nasnet_custom_y_pred * 0.1 +
                       mobile_custom_y_pred * 0.1)

#calculate weighted averaging accuracy
ensemble_model_weighted_averaging_accuracy = accuracy_score(Y_test,weighted_average_pred.argmax(axis=-1))
print("The weighted averaging accuracy of the ensemble model is  = ", ensemble_model_weighted_averaging_accuracy)

In [None]:
#plot confusion matrix

target_names = ['class 0(abnormal)', 'class 1(normal)'] #modify according to tasks

#print classification report
print(classification_report(Y_test,weighted_average_pred.argmax(axis=-1),target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test,weighted_average_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot non-normalized confusion matrix
plt.figure(figsize=(20,10), dpi=100)
plot_confusion_matrix(cnf_matrix, classes=target_names,
                      title='Confusion matrix for Weighted Average Ensemble without normalization')

plt.show()

#save the predictions
np.savetxt('weights/weighted_averaging_y_pred.csv',weighted_average_pred.argmax(axis=-1),fmt='%i',delimiter = ",")

In [None]:
#plot roc curves

skplt.metrics.plot_roc(Y_test,weighted_average_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#compute precision-recall curves

colors = cycle(['red', 'blue', 'green', 'cyan', 'teal'])

plt.figure(figsize=(15,10), dpi=100)
f_scores = np.linspace(0.2, 0.8, num=4)
lines = []
labels = []
for f_score in f_scores:
    x = np.linspace(0.01, 1)
    y = f_score * x / (2 * x - f_score)
    l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2)
    plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))
    
# For each class
precision = dict()
recall = dict()
average_precision = dict()
for i in range(num_classes):
    precision[i], recall[i], _ = precision_recall_curve(Y_test1[:, i],
                                                        weighted_average_pred[:, i])
    average_precision[i] = average_precision_score(Y_test1[:, i], weighted_average_pred[:, i])

# A "micro-average": quantifying score on all classes jointly
precision["micro"], recall["micro"], _ = precision_recall_curve(Y_test1.ravel(),
   weighted_average_pred.ravel())
average_precision["micro"] = average_precision_score(Y_test1, weighted_average_pred,
                                                     average="micro")
print('Average precision score, micro-averaged over all classes: {0:0.4f}'
      .format(average_precision["micro"]))

lines.append(l)
labels.append('iso-f1 curves')
l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2)
lines.append(l)
labels.append('micro-average Precision-recall (area = {0:0.4f})'
              ''.format(average_precision["micro"]))

for i, color in zip(range(num_classes), colors):
    l, = plt.plot(recall[i], precision[i], color=color, lw=2)
    lines.append(l)
    labels.append('Precision-recall for class {0} (area = {1:0.4f})'
                  ''.format(i, average_precision[i]))

fig = plt.gcf()
fig.subplots_adjust(bottom=0.05)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Extension of Precision-Recall curve to multi-class')
plt.legend(lines, labels, loc=(0, -.38), prop=dict(size=14))
plt.show()

In [None]:
#%% evaluate error
ensemble_model_weighted_average_mean_squared_error = mean_squared_error(Y_test,
                                                                        weighted_average_pred.argmax(axis=-1))  
ensemble_model_weighted_average_mean_squared_log_error = mean_squared_log_error(Y_test,
                                                                                weighted_average_pred.argmax(axis=-1))  
print("The weighted averaging mean squared error of the ensemble model is  = ", 
      ensemble_model_weighted_average_mean_squared_error)
print("The weighted averaging mean squared log error of the ensemble model is  = ", 
      ensemble_model_weighted_average_mean_squared_log_error)

## Stacked Generalization

We attempted performing a stacking ensemble by training a neural network based meta-learner that will best combine the predictions from the sub-models and ideally perform better than any single sub-model.The first step is to load the saved models. We can use the load_model() Keras function and create a Python list of loaded models.

In [None]:
# load models from file
n_models = 7 #we have seven models

def load_all_models(n_models):
    all_models = list()
    model_densenet.load_weights('weights/densenet_finetuned.06-0.8873.h5')
    all_models.append(model_densenet)
    model_inceptionv3.load_weights('weights/inceptionv3_finetuned.03-0.8821.h5')
    all_models.append(model_inceptionv3)
    model_vgg19.load_weights('weights/vgg19_finetuned.02-0.8921.h5')
    all_models.append(model_vgg19)
    model_vgg16.load_weights('weights/vgg16_finetuned.06-0.8946.h5')
    all_models.append(model_vgg16)
    model_xception.load_weights('weights/xception_finetuned.08-0.8791.h5')
    all_models.append(model_xception)
    model_nasnet.load_weights('weights/nasnet_finetuned.04-0.8740.h5')
    all_models.append(model_nasnet)
    model_mobile.load_weights('weights/mobilenet_finetuned.07-0.8801.h5')
    all_models.append(model_mobile)
    return all_models

# We can call this function to load our three saved models from the “models/” sub-directory.
# load all models

n_members = 7
members = load_all_models(n_members)
print('Loaded %d models' % len(members))

It would be useful to know how well the single models perform on the test dataset as we would expect a stacking model to perform better. We can easily evaluate each single model on the training dataset and establish a baseline of performance.

In [None]:
# evaluate standalone models on test dataset

for model in members:
    sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) 
    model.compile(optimizer=sgd,loss='categorical_crossentropy',metrics=['accuracy'])
    _, acc = model.evaluate_generator(test_generator, nb_test_samples//batch_size + 1, verbose=1)
    print('Model Accuracy: %.3f' % acc)

Integrated Stacking Model: It may be desirable to use a neural network as a meta-learner.Specifically, the sub-networks can be embedded in a larger multi-headed neural network that then learns how to best combine the predictions from each input sub-model. It allows the stacking ensemble to be treated as a single large model. The benefit of this approach is that the outputs of the submodels are provided directly to the meta-learner. Further, it is also possible to update the weights of the submodels in conjunction with the meta-learner model, if this is desirable. This can be achieved using the Keras functional interface for developing models. 

After the models are loaded as a list, a larger stacking ensemble model can be defined where each of the loaded models is used as a separate input-head to the model. All of the layers in each of the loaded models be marked as not trainable so the weights cannot be updated when the new larger model is being trained. Keras also requires that each layer has a unique name, therefore the names of each layer in each of the loaded models will have to be updated to indicate to which ensemble member they belong. Once the sub-models have been prepared, we can define the stacking ensemble model. The input layer for each of the sub-models will be used as a separate input head to this new model. This means that k copies of any input data will have to be provided to the model, where k is the number of input models, in this case, 3. The outputs of each of the models can then be merged. In this case, we will use a simple concatenation merge, where a single 6-element vector will be created from the two class-probabilities predicted by each of the 3 models. 

We will then define a hidden layer to interpret this “input” to the meta-learner and an output layer that will make its own probabilistic prediction. A plot of the network graph is created when this function is called to give an idea of how the ensemble model fits together.

In [None]:
def define_stacked_model(members):
    # update all layers in all models to not be trainable
    for i in range(len(members)):
        model = members[i]
        for layer in model.layers:
        # make not trainable
            layer.trainable = False
            # rename to avoid 'unique layer name' issue
            layer.name = 'ensemble_' + str(i+1) + '_' + layer.name
    # define multi-headed input
    ensemble_visible = [model.input]
    # concatenate merge output from each model
    ensemble_outputs = [model.output for model in members]
    merge = concatenate(ensemble_outputs)
    hidden = Dense(14, activation='relu')(merge) # two ouputs for 7 models, so 14 hidden neurons
    output = Dense(2, activation='softmax')(hidden)
    model = Model(inputs=ensemble_visible, outputs=output)
    
    # compile
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# define ensemble model
stacked_model = define_stacked_model(members)
stacked_model.summary()

#plot model
plot_model(stacked_model, to_file='stacked_model.png',show_shapes=True, show_layer_names=False)

Once the model is defined, it can be fit. We can fit it directly on the holdout validation dataset. Because the sub-models are not trainable, their weights will not be updated during training and only the weights of the new hidden and output layer will be updated. The stacking neural network model will be fit on the trainig data for 30 epochs.

In [None]:
#train the ensemble model
filepath = 'weights/' + stacked_model.name + '.{epoch:02d}-{val_acc:.4f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, 
                             save_weights_only=False, save_best_only=True, mode='max', period=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5,
                              verbose=1, mode='max', min_lr=0.00001)

tensor_board = TensorBoard(log_dir='logs/', histogram_freq=0, batch_size=batch_size)
callbacks_list = [checkpoint, tensor_board, reduce_lr]

#reset generators
train_generator.reset()
validation_generator.reset()

history = stacked_model.fit_generator(train_generator, steps_per_epoch=nb_train_samples // batch_size + 1,
                                  epochs=30, validation_data=validation_generator,
                                  class_weight = class_weights,
                                  callbacks=callbacks_list, 
                                  validation_steps=nb_validation_samples // batch_size + 1, verbose=1) 

In [None]:
#plot performance of the ensemble model

N = 30
plt.figure(figsize=(20,10), dpi=100)
plt.plot(np.arange(1, N+1), history.history["loss"], 'orange', label="train_loss")
plt.plot(np.arange(1, N+1), history.history["val_loss"], 'red', label="val_loss")
plt.plot(np.arange(1, N+1), history.history["acc"], 'blue', label="train_acc")
plt.plot(np.arange(1, N+1), history.history["val_acc"], 'green', label="val_acc")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower right")
plt.savefig("weights/stacking_ensemble_plot.png")

In [None]:
# Once fit, we can use the new stacked model to make a prediction on new data.
# This is as simple as calling the predict_generator() function on the model. 

#load the best model
stacked_model.load_weights('weights/stacking_ensemble.03-0.8907.h5')
stacked_model.summary()

#first reset the test generator otherwise it gives wierd results
test_generator.reset()

#evaluate accuracy 
ensemble_y_pred = stacked_model.predict_generator(test_generator, nb_test_samples//batch_size + 1, verbose=1)

#print prediction shapes
print(ensemble_y_pred.shape)

#save the predictions
np.savetxt('weights/stacking_y_pred.csv', ensemble_y_pred, fmt='%i',delimiter = ",")

In [None]:
#measure performance metrics of the stacked ensemble

accuracy = accuracy_score(Y_test1.argmax(axis=-1),ensemble_y_pred.argmax(axis=-1))
print('The test accuracy of the Custom model is: ', accuracy)

#evaluate mean squared error
custom_mse = mean_squared_error(Y_test1.argmax(axis=-1),ensemble_y_pred.argmax(axis=-1))
print('The Mean Squared Error of the Custom model is: ', custom_mse)

#evaluate mean squared log error
custom_msle = mean_squared_log_error(Y_test1.argmax(axis=-1),ensemble_y_pred.argmax(axis=-1))  
print('The Mean Squared Log Error of the Custom model is: ', custom_msle)

#evaluate matthews correlation coefficient
custom_MCC = matthews_corrcoef(Y_test1.argmax(axis=-1),ensemble_y_pred.argmax(axis=-1))
print('The Matthews correlation coefficient value (MCC) for the Custom model is: ', custom_MCC)

In [None]:
#%% print classification report and plot confusion matrix

target_names = ['class 0(abnormal)','class 1(normal)'] 
print(classification_report(Y_test1.argmax(axis=-1),ensemble_y_pred.argmax(axis=-1),
                            target_names=target_names, digits=4))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_test1.argmax(axis=-1),ensemble_y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot non-normalized confusion matrix using scikit learn
plt.figure(figsize=(10,10), dpi=100)
plot_confusion_matrix(cnf_matrix, classes=target_names)
plt.show()

In [None]:
#%% compute the ROC-AUC values

skplt.metrics.plot_roc(Y_test,ensemble_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

In [None]:
#compute precision-recall curves

colors = cycle(['red', 'blue', 'green', 'cyan', 'teal'])

plt.figure(figsize=(15,10), dpi=100)
f_scores = np.linspace(0.2, 0.8, num=4)
lines = []
labels = []
for f_score in f_scores:
    x = np.linspace(0.01, 1)
    y = f_score * x / (2 * x - f_score)
    l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2)
    plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))
    
# For each class
precision = dict()
recall = dict()
average_precision = dict()
for i in range(num_classes):
    precision[i], recall[i], _ = precision_recall_curve(Y_test1[:, i],
                                                        ensemble_y_pred[:, i])
    average_precision[i] = average_precision_score(Y_test1[:, i], ensemble_y_pred[:, i])

# A "micro-average": quantifying score on all classes jointly
precision["micro"], recall["micro"], _ = precision_recall_curve(Y_test1.ravel(),
   ensemble_y_pred.ravel())
average_precision["micro"] = average_precision_score(Y_test1, ensemble_y_pred,
                                                     average="micro")
print('Average precision score, micro-averaged over all classes: {0:0.4f}'
      .format(average_precision["micro"]))

lines.append(l)
labels.append('iso-f1 curves')
l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2)
lines.append(l)
labels.append('micro-average Precision-recall (area = {0:0.4f})'
              ''.format(average_precision["micro"]))

for i, color in zip(range(num_classes), colors):
    l, = plt.plot(recall[i], precision[i], color=color, lw=2)
    lines.append(l)
    labels.append('Precision-recall for class {0} (area = {1:0.4f})'
                  ''.format(i, average_precision[i]))

fig = plt.gcf()
fig.subplots_adjust(bottom=0.05)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Extension of Precision-Recall curve to multi-class')
plt.legend(lines, labels, loc=(0, -.38), prop=dict(size=14))
plt.show()

In [None]:
#%% plot the KS statistic plot

skplt.metrics.plot_ks_statistic(Y_test,ensemble_y_pred,figsize=(20,10),
                       title_fontsize='large', text_fontsize='large')
plt.legend(loc="lower right")
plt.show()

Every model has its own weaknesses. The reasoning behind using an ensemble is that by stacking different models representing different hypotheses about the data, we can find a better hypothesis that is not in the hypothesis space of the models from which the ensemble is built. By using a very basic ensemble, a much lower error rate was achieved than when a single model was used. This proves effectiveness of ensembling. Of course, there are some practical considerations to keep in mind when using an ensemble for your machine learning task. Since ensembling means stacking multiple models together, it also means that the input data needs to be forward-propagated for each model. This increases the amount of compute that needs to be performed and, consequently, evaluation (predicition) time. However, it is a very critical factor when designing a commercial product. Another consideration is increased size of the final model which, again, might be a limiting factor for ensemble use in a commercial product.