In [1]:
import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator

# Set path for datasets.
trainPath = 'balanced_chest_xray/Train'
valPath = 'balanced_chest_xray/Val'
testPath = 'balanced_chest_xray/Test'
originaltestPath = 'chest_xray/test'

# Creating a generator for data augmentation for the training set.
print('Data in balanced data set')
trainGen = ImageDataGenerator(rotation_range=10 ,height_shift_range=0.2, width_shift_range=0.2, horizontal_flip=True)

# Preprosses the images in 128x128 grayscale in to batches.
trainBatch = trainGen.flow_from_directory(trainPath, target_size=(64,64), classes=['Pneumonia', 'Normal'], color_mode="grayscale", batch_size=16, shuffle=True)
vaildBatch = ImageDataGenerator().flow_from_directory(valPath, target_size=(64,64), classes=['Pneumonia', 'Normal'], color_mode="grayscale", batch_size=1, shuffle=False)
testBatch = ImageDataGenerator().flow_from_directory(testPath, target_size=(64,64), classes=['Pneumonia', 'Normal'], color_mode="grayscale", batch_size=1, shuffle=False)

# Preprosses the original test set in 128x128 grayscale in to batches.
print('\nOriginal test set')
testBatch_OG = ImageDataGenerator().flow_from_directory(originaltestPath, target_size=(64,64), classes=['Pneumonia', 'Normal'], color_mode="grayscale", batch_size=1, shuffle=False)


# Show the new labels incoding.
testBatch.class_indices

Using TensorFlow backend.


Data in balanced data set
Found 2232 images belonging to 2 classes.
Found 466 images belonging to 2 classes.
Found 468 images belonging to 2 classes.

Original test set
Found 624 images belonging to 2 classes.


{'Normal': 1, 'Pneumonia': 0}

In [2]:
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dropout, Flatten, Dense, BatchNormalization
from keras.models import Sequential

# Defining the model structure.
model = Sequential()
model.add(Conv2D(32, (3,3), strides=(1,1), padding='same', input_shape=(64,64,1), activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(32, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(64, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(128, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(128, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2)))

model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(2, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 64, 64, 32)        320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 64, 64, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
batch_normalization_2 (Batch (None, 32, 32, 64)        256       
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 32, 32, 64)        36928     
__________

In [5]:
# Compiling the model with Adam optimizer and cross entropy for loss measurement.
model.compile('Adam',loss="binary_crossentropy", metrics=["accuracy"])

In [6]:
from keras.callbacks import ModelCheckpoint

# fitting the model to the training and validation.
epoch=100
checkPoint = ModelCheckpoint(filepath='saved_model/weights.best.from_Benmark.hdf5', verbose=1, save_best_only=True, monitor='val_loss')
model.fit_generator(trainBatch, steps_per_epoch=155, validation_data=vaildBatch, validation_steps=len(vaildBatch), callbacks=[checkPoint], epochs=epoch)
    

Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.59223, saving model to saved_model/weights.best.from_Benmark.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 0.59223 to 0.52681, saving model to saved_model/weights.best.from_Benmark.hdf5
Epoch 3/100

Epoch 00003: val_loss did not improve from 0.52681
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.52681
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.52681
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.52681
Epoch 7/100

Epoch 00007: val_loss improved from 0.52681 to 0.43499, saving model to saved_model/weights.best.from_Benmark.hdf5
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.43499
Epoch 9/100

Epoch 00009: val_loss improved from 0.43499 to 0.21477, saving model to saved_model/weights.best.from_Benmark.hdf5
Epoch 10/100

Epoch 00010: val_loss did not improve from 0.21477
Epoch 11/100

Epoch 00011: val_loss did not improve from 0.21477
Epoch 12/100

Epoch 00012: val_loss did no


Epoch 00042: val_loss did not improve from 0.09440
Epoch 43/100

Epoch 00043: val_loss did not improve from 0.09440
Epoch 44/100

Epoch 00044: val_loss did not improve from 0.09440
Epoch 45/100

Epoch 00045: val_loss improved from 0.09440 to 0.09249, saving model to saved_model/weights.best.from_Benmark.hdf5
Epoch 46/100

Epoch 00046: val_loss did not improve from 0.09249
Epoch 47/100

Epoch 00047: val_loss improved from 0.09249 to 0.06619, saving model to saved_model/weights.best.from_Benmark.hdf5
Epoch 48/100

Epoch 00048: val_loss did not improve from 0.06619
Epoch 49/100

Epoch 00049: val_loss did not improve from 0.06619
Epoch 50/100

Epoch 00050: val_loss did not improve from 0.06619
Epoch 51/100

Epoch 00051: val_loss did not improve from 0.06619
Epoch 52/100

Epoch 00052: val_loss did not improve from 0.06619
Epoch 53/100

Epoch 00053: val_loss did not improve from 0.06619
Epoch 54/100

Epoch 00054: val_loss did not improve from 0.06619
Epoch 55/100

Epoch 00055: val_loss did 

<keras.callbacks.History at 0x130a99f6160>

In [8]:
# Evalutate the model and show loss and accuracy.
loss, acc = model.evaluate_generator(testBatch, steps=len(testBatch))

print('model performancens on balance data')
print('loss:{}'.format(loss) )
print('accuracy:{}'.format(acc))

model performancens on balance data
loss:1.8760706973381105
accuracy:0.7457264957264957


In [6]:
model.load_weights('saved_model/weights.best.from_benchmark.hdf5')

In [7]:
# Evalutate the model and show loss and accuracy.
loss, acc = model.evaluate_generator(testBatch, steps=len(testBatch))

print('model performancens on balance data')
print('loss:{}'.format(loss) )
print('accuracy:{}'.format(acc))

model performancens on balance data
loss:0.9820309939163135
accuracy:0.8141025641025641


In [24]:
# Making prediction on balanced test set and the original
pred = model.predict_generator(testBatch, steps=len(testBatch), verbose=True)
pred_OG = model.predict_generator(testBatch_OG, steps=len(testBatch_OG), verbose=True)



In [25]:
# Applaying preprocess function to balance test set and predict.
y_true, y_pred = Preprocess_yTrue_yPred(pred, testBatch)
# Applaying preprocess function to original test set and predcit.
y_true_OG, y_pred_OG = Preprocess_yTrue_yPred(pred_OG, testBatch_OG)

# Print metrics results for balance test set.
print('Performs metrics for balance test set')
Metrics_results(y_true, y_pred)

# Print metrics results for original test set.
print('\nPerforms metrics for original test set')
Metrics_results(y_true_OG, y_pred_OG)

Performs metrics for balance test set
Accuracy:0.8141025641025641
Precision:0.9622641509433962
Recall:0.6538461538461539
Fbeta score:0.8071463937943555

Performs metrics for original test set
Accuracy:0.8573717948717948
Precision:0.9503105590062112
Recall:0.6538461538461539
Fbeta score:0.8207493947227666
