# This notebook is to build and train another version of the model for binomial classification.

In [1]:
#Imports
from wrangle import *
import numpy as np
import pandas as pd
import os, shutil, glob, itertools, random
import matplotlib.pyplot as plt
import warnings
import tensorflow as tf
from keras import backend as K
from tensorflow import keras
from keras.callbacks import EarlyStopping
from keras.regularizers import l2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import keras_tuner
from keras_tuner import RandomSearch, GridSearch, BayesianOptimization, Objective
from sklearn.metrics import confusion_matrix, classification_report
from PIL import Image

In [2]:
#Preferences
warnings.simplefilter(action="ignore", category=FutureWarning)
%matplotlib inline

In [3]:
#Path variables
train_path = "data/chest_xray/train/"
val_path = "data/chest_xray/val/"
test_path = "data/chest_xray/test/"

In [4]:
save_model = True
save_weights = True

In [5]:
path_list = [train_path, val_path, test_path]

In [6]:
datagen = ImageDataGenerator(rescale=1./255)

In [7]:
train_generator = datagen.flow_from_directory(directory=train_path,
                                              target_size=(256,256),
                                              batch_size=16,
                                              color_mode="grayscale",
                                              classes=["NORMAL", "PNEUMONIA"],
                                              save_format="jpeg")

Found 4332 images belonging to 2 classes.


In [8]:
val_generator = datagen.flow_from_directory(directory=val_path,
                                            target_size=(256,256),
                                            batch_size=16,
                                            color_mode="grayscale",
                                            classes=["NORMAL", "PNEUMONIA"],
                                            save_format="jpeg")

Found 900 images belonging to 2 classes.


In [9]:
test_generator = datagen.flow_from_directory(directory=test_path,
                                             target_size=(256,256),
                                             batch_size=16,
                                             color_mode="grayscale",
                                             classes=["NORMAL", "PNEUMONIA"],
                                             save_format="jpeg",
                                             shuffle=False)

Found 624 images belonging to 2 classes.


In [10]:
def f1_score(y_true, y_pred):
    """
    #TN = Predicted normal and were normal - 113
    #TP = Predicted pneumonia and were pneumonia - 382
    #FN = Predicted normal and were pneumonia - 8
    #FP = Predicted pneumonia and were normal - 121

    Precision = TP / (TP + FP)
    Recall = TP / (TP + FN)
    F1 Score = 2 * (Precision * Recall) / (Precision + Recall)
    """
    
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2 * (precision * recall) / (precision + recall + K.epsilon())
    return f1_val

In [11]:
def build_model(hp):
    """
    Building a model to optimize hypter parameters.
    """
    model = Sequential()
    model.add(Conv2D(filters=hp.Int("conv_1_filter",
                                    min_value=8,
                                    max_value=256,
                                    step=16),
                     kernel_size=hp.Choice("conv_1_kernel", values = [3,5]),
                     activation="relu",
                     input_shape=(256,256,1)))
    model.add(MaxPool2D(pool_size=2))
    model.add(Dropout(0.25))
    model.add(Conv2D(filters=hp.Int("conv_2_filter",
                                    min_value=16,
                                    max_value=256,
                                    step=16),
                     kernel_size=hp.Choice("conv_2_kernel", values = [3,5]),
                     activation="relu"))
    model.add(MaxPool2D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(units=hp.Int("dense_1_units",
                                 min_value=16,
                                 max_value=256,
                                 step=16),
                    activation="relu", kernel_regularizer=l2(hp.Float("l2_value", min_value=1e-5, max_value=1e-2, sampling="LOG"))))
    model.add(Dropout(0.5))
    model.add(Dense(units=2, activation="softmax"))
    model.compile(optimizer=Adam(hp.Choice("learning_rate", values=[1e-2, 1e-3])),
                  loss="binary_crossentropy",
                  metrics=["accuracy", f1_score])

    return model

In [12]:
earlystop = EarlyStopping(monitor="val_loss", patience = 4)

In [13]:
tuner_search = BayesianOptimization(build_model,
                                    objective=Objective("val_f1_score", direction="max"),
                                    max_trials=20,
                                    executions_per_trial=2,
                                    directory="output",
                                    project_name="ChestXrayPneumoniaF1Score")

INFO:tensorflow:Reloading Tuner from output/ChestXrayPneumoniaF1Score/tuner0.json


In [14]:
tuner_search.search(train_generator, epochs=3, validation_data=val_generator)

INFO:tensorflow:Oracle triggered exit


In [15]:
tuner_search.search_space_summary()

Search space summary
Default search space size: 7
conv_1_filter (Int)
{'default': None, 'conditions': [], 'min_value': 8, 'max_value': 256, 'step': 16, 'sampling': 'linear'}
conv_1_kernel (Choice)
{'default': 3, 'conditions': [], 'values': [3, 5], 'ordered': True}
conv_2_filter (Int)
{'default': None, 'conditions': [], 'min_value': 16, 'max_value': 256, 'step': 16, 'sampling': 'linear'}
conv_2_kernel (Choice)
{'default': 3, 'conditions': [], 'values': [3, 5], 'ordered': True}
dense_1_units (Int)
{'default': None, 'conditions': [], 'min_value': 16, 'max_value': 256, 'step': 16, 'sampling': 'linear'}
l2_value (Float)
{'default': 1e-05, 'conditions': [], 'min_value': 1e-05, 'max_value': 0.01, 'step': None, 'sampling': 'log'}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001], 'ordered': True}


In [16]:
tuner_search.results_summary()

Results summary
Results in output/ChestXrayPneumoniaF1Score
Showing 10 best trials
Objective(name="val_f1_score", direction="max")

Trial 19 summary
Hyperparameters:
conv_1_filter: 216
conv_1_kernel: 3
conv_2_filter: 96
conv_2_kernel: 5
dense_1_units: 240
l2_value: 0.0006804908996363804
learning_rate: 0.001
Score: 0.9742324352264404

Trial 02 summary
Hyperparameters:
conv_1_filter: 72
conv_1_kernel: 3
conv_2_filter: 224
conv_2_kernel: 5
dense_1_units: 96
l2_value: 0.0001478315858641764
learning_rate: 0.001
Score: 0.9709429740905762

Trial 18 summary
Hyperparameters:
conv_1_filter: 152
conv_1_kernel: 3
conv_2_filter: 256
conv_2_kernel: 3
dense_1_units: 176
l2_value: 1.290685390365463e-05
learning_rate: 0.001
Score: 0.9665570259094238

Trial 12 summary
Hyperparameters:
conv_1_filter: 88
conv_1_kernel: 3
conv_2_filter: 96
conv_2_kernel: 5
dense_1_units: 64
l2_value: 0.004114565089723973
learning_rate: 0.001
Score: 0.9621710479259491

Trial 13 summary
Hyperparameters:
conv_1_filter: 8
conv

In [18]:
tuner_search.get_best_models(num_models=5)[4]



<keras.engine.sequential.Sequential at 0x7f0a78313b50>

In [19]:
model = tuner_search.get_best_models(num_models=5)[4]



In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 252, 252, 8)       208       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 126, 126, 8)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 124, 124, 208)     15184     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 62, 62, 208)      0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 799552)            0         
                                                                 
 dense (Dense)               (None, 256)               2

In [None]:
history = model.fit(train_generator, epochs=10, validation_data=val_generator, callbacks=[earlystop])

Epoch 1/10
 41/271 [===>..........................] - ETA: 6:50 - loss: 0.1048 - accuracy: 0.9893 - f1_score: 0.9893

### Make a prediction

In [None]:
#Plot function
def plotImages(images_arr):
    """
    Plots images in a gird.
    """
    fig, axes, = plt.subplots(1, 10, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img, cmap='gray')
        ax.axis("off")
    plt.tight_layout()
    plt.show()

In [None]:
test_imgs, test_labels = next(test_generator)
plotImages(test_imgs)
print(test_labels)

In [None]:
predictions = model.predict(x=test_generator, verbose=0)

In [None]:
cm = confusion_matrix(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1))

In [None]:
#Confusion matrix plot function from TensorFlows website
def plot_confusion_matrix(cm, classes, normalize=False, title="Confusion Matrix", cmap=plt.cm.Blues):
    """
    A function to plot results in a confusion matrix.
    """
    plt.imshow(cm, interpolation="nearest", cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print("Confusion matrix, without normalization")

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i,j], horizontalalignment="center", color = "white" if cm[i,j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel("True label")
    plt.xlabel("Predicted label")

In [None]:
cm_plot_labels = ["NORMAL","PNEUMONIA"]
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title = "Confusion Matrix")

In [None]:
#Saving the model
if save_model == True:
    model.save("models/pneumonia_detection00.h5")
    
if save_weights == True:
    model.save_weights("models/pneumonia_detection_weights00.h5")

In [None]:
confusion_matrix(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1), normalize="all")

In [None]:
print(classification_report(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1), target_names = cm_plot_labels))

In [None]:
# Plot Training and Validation Accuracy
plt.figure(figsize=(14,4))
plt.subplot(1,2,1)
plt.plot(history.history["accuracy"], label="Training Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.title("Model Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

# Plot Training and Validation Loss
plt.subplot(1,2,2)
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.title("Model Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()