In [4]:
import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator

# Set path for datasets.
trainPath = 'balanced_chest_xray/Train'
valPath = 'balanced_chest_xray/Val'
testPath = 'balanced_chest_xray/Test'
originaltestPath = 'chest_xray/test'

# Applying data augmentation on training set.
print('Data in balanced data set')
trainGen = ImageDataGenerator(rotation_range=10 ,height_shift_range=0.2, width_shift_range=0.2, horizontal_flip=True)

# Preprosses the images in 128x128 grayscale in to batches.
trainBatch = trainGen.flow_from_directory(trainPath, target_size=(64,64), classes=['Pneumonia', 'Normal'], color_mode="grayscale", batch_size=16, shuffle=True)
vaildBatch = ImageDataGenerator().flow_from_directory(valPath, target_size=(64,64), classes=['Pneumonia', 'Normal'], color_mode="grayscale", batch_size=1, shuffle=False)
testBatch = ImageDataGenerator().flow_from_directory(testPath, target_size=(64,64), classes=['Pneumonia', 'Normal'], color_mode="grayscale", batch_size=1, shuffle=False)

# Preprosses the original test set in 128x128 grayscale in to batches.
print('\nOriginal test set')
testBatch_OG = ImageDataGenerator().flow_from_directory(originaltestPath, target_size=(64,64), classes=['Pneumonia', 'Normal'], color_mode="grayscale", batch_size=1, shuffle=False)


# Show the new label incoding
testBatch.class_indices

Data in balanced data set
Found 2232 images belonging to 2 classes.
Found 466 images belonging to 2 classes.
Found 468 images belonging to 2 classes.

Original test set
Found 624 images belonging to 2 classes.


{'Normal': 1, 'Pneumonia': 0}

In [2]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense, BatchNormalization
from keras.models import Sequential

# Defining the model structure.
model = Sequential()

model.add(Conv2D(32, (3,3), strides=(1,1), padding='same', input_shape=(64,64,1), activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(32, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(64, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(128, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(128, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(256, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(256, (3,3), strides=(1,1), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2)))

model.add(Flatten())
model.add(Dense(2, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 64, 64, 32)        320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 64, 64, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
batch_normalization_2 (Batch (None, 32, 32, 64)        256       
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 32, 32, 64)        36928     
__________

In [3]:
# Compiling the model with Adam optimizer and cross entropy for loss measurement.
model.compile('Adam',loss="binary_crossentropy", metrics=["accuracy"])

In [5]:
from keras.callbacks import ModelCheckpoint, TensorBoard

# fitting the model to the training and validation set with modelcheckpoint function to save the best weights.
epoch=100
checkPoint = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5', verbose=1, save_best_only=True, monitor='val_loss')


model.fit_generator(trainBatch, steps_per_epoch=155, validation_data=vaildBatch, validation_steps=len(vaildBatch), callbacks=[checkPoint], epochs=epoch)
    

Epoch 1/100

Epoch 00001: val_loss improved from inf to 6.87422, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 6.87422 to 0.38052, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 3/100

Epoch 00003: val_loss did not improve from 0.38052
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.38052
Epoch 5/100

Epoch 00005: val_loss improved from 0.38052 to 0.19318, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.19318
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.19318
Epoch 8/100

Epoch 00008: val_loss improved from 0.19318 to 0.16555, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 9/100

Epoch 00009: val_loss did not improve from 0.16555
Epoch 10/100

Epoch 00010: val_loss did not improve from 0.16555
Epoch 11/100

Epoch 00011: val_loss did not improve from 0.16555
Epoch 12/100

Epoch 00012: val_loss di


Epoch 00084: val_loss did not improve from 0.03662
Epoch 85/100

Epoch 00085: val_loss did not improve from 0.03662
Epoch 86/100

Epoch 00086: val_loss did not improve from 0.03662
Epoch 87/100

Epoch 00087: val_loss did not improve from 0.03662
Epoch 88/100

Epoch 00088: val_loss did not improve from 0.03662
Epoch 89/100

Epoch 00089: val_loss did not improve from 0.03662
Epoch 90/100

Epoch 00090: val_loss did not improve from 0.03662
Epoch 91/100

Epoch 00091: val_loss did not improve from 0.03662
Epoch 92/100

Epoch 00092: val_loss did not improve from 0.03662
Epoch 93/100

Epoch 00093: val_loss did not improve from 0.03662
Epoch 94/100

Epoch 00094: val_loss did not improve from 0.03662
Epoch 95/100

Epoch 00095: val_loss did not improve from 0.03662
Epoch 96/100

Epoch 00096: val_loss did not improve from 0.03662
Epoch 97/100

Epoch 00097: val_loss did not improve from 0.03662
Epoch 98/100

Epoch 00098: val_loss did not improve from 0.03662
Epoch 99/100

Epoch 00099: val_loss di

<keras.callbacks.History at 0x2b483845668>

In [6]:
# Evalutate the model and show loss and accuracy.
loss, acc = model.evaluate_generator(testBatch, steps=len(testBatch))

print('model performancens on balance data')
print('loss:{}'.format(loss) )
print('accuracy:{}'.format(acc))

model performancens on balance data
loss:2.079278032891692
accuracy:0.6901709401709402


In [17]:
# loding the model from the checkpoint function to get the best weights based on validation set loss.
model.load_weights('saved_models/weights.best.from_scratch.hdf5')
print('Best weights are loaded')

Best weights are loaded


In [18]:
# Evalutate the model and show loss and accuracy with best weights
loss, acc = model.evaluate_generator(testBatch, steps=len(testBatch))

print('model performancens on balance data with best weights')
print('loss:{}'.format(loss) )
print('accuracy:{}'.format(acc))

model performancens on balance data with best weights
loss:0.4747115498066599
accuracy:0.8803418803418803


In [19]:
# Making prediction on balanced test set and the original
pred = model.predict_generator(testBatch, steps=len(testBatch), verbose=True)
pred_OG = model.predict_generator(testBatch_OG, steps=len(testBatch_OG), verbose=True)



In [20]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import fbeta_score
import numpy as np

# Preprocess the predicted test set and it predcitons for y true and y predict
def Preprocess_yTrue_yPred(predict, testset):
    # Round the prediced data from test set to 1 and 0 for each class(True, False)
    pred = np.rint(predict)
    # Making y true variable with label(Normal: 0, Pneumonia: 1)
    y_true = testset.classes
    
    # Making y predict variable with appendig all Pneumonia predciton(Pneumonia True: 1, Pneumonia False: 0)
    y_pred = []
    for normal, Pneumonia in pred:
        y_pred.append(Pneumonia)
   
    # returning Y true and Y predict
    return y_true, y_pred

# Show all the metrics(accuracy, precision, recall and fbeta score) for input y_True and y_predict
def Metrics_results(y_true, y_pred):
    # Initializing all metricis for benchmarking
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    
    # priting resualts
    fbeta = fbeta_score(y_true, y_pred, average='macro', beta=2)
    # Prnit benchmark resualts for test set
    print('Accuracy:{}'.format(accuracy))
    print('Precision:{}'.format(precision))
    print('Recall:{}'.format(recall))
    print('Fbeta score:{}'.format(fbeta))

In [21]:
# Applaying Preprocess function to balance test set and predcit
y_true, y_pred = Preprocess_yTrue_yPred(pred, testBatch)
# Applaying Preprocess function to original test set and predcit
y_true_OG, y_pred_OG = Preprocess_yTrue_yPred(pred_OG, testBatch_OG)

#print benmark results for balance test set
print('Benmark for balance test set')
Metrics_results(y_true, y_pred)

#print benmark results for original test set
print('\nBenmark for original test set')
Metrics_results(y_true_OG, y_pred_OG)

Benmark for balance test set
Accuracy:0.8803418803418803
Precision:0.8739495798319328
Recall:0.8888888888888888
Fbeta score:0.8803229492053382

Benmark for original test set
Accuracy:0.8717948717948718
Precision:0.7938931297709924
Recall:0.8888888888888888
Fbeta score:0.8711015063260132
