# This notebook is to build and train another version of the model for binomial classification.

In [1]:
#Imports
from wrangle import *
import numpy as np
import pandas as pd
import os, shutil, glob, itertools, random
import matplotlib.pyplot as plt
import warnings
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.constraints import MaxNorm
import keras_tuner
from keras_tuner import RandomSearch, GridSearch, BayesianOptimization, Objective
from sklearn.metrics import confusion_matrix, classification_report
from PIL import Image

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])






  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
#Preferences
warnings.simplefilter(action="ignore", category=FutureWarning)
%matplotlib inline

In [3]:
if not os.path.exists("data/chest_xray"):
    acquire_data()

In [4]:
if not os.path.exists("data/chest_xray/val"):
    prepare_data()

In [5]:
#Path variables
train_path = "data/chest_xray/train/"
val_path = "data/chest_xray/val/"
test_path = "data/chest_xray/test/"

In [6]:
#print(tf.config.list_physical_devices("GPU"))

In [7]:
save_model = True
save_weights = True
version_num = "05"

In [8]:
path_list = [train_path, val_path, test_path]

In [9]:
datagen = ImageDataGenerator(rescale=1./255)

In [10]:
train_generator = datagen.flow_from_directory(directory=train_path,
                                              target_size=(256,256),
                                              batch_size=16,
                                              color_mode="grayscale",
                                              classes=["NORMAL", "PNEUMONIA"],
                                              save_format="jpeg")

Found 4332 images belonging to 2 classes.


In [11]:
val_generator = datagen.flow_from_directory(directory=val_path,
                                            target_size=(256,256),
                                            batch_size=16,
                                            color_mode="grayscale",
                                            classes=["NORMAL", "PNEUMONIA"],
                                            save_format="jpeg")

Found 900 images belonging to 2 classes.


In [12]:
test_generator = datagen.flow_from_directory(directory=test_path,
                                             target_size=(256,256),
                                             batch_size=16,
                                             color_mode="grayscale",
                                             classes=["NORMAL", "PNEUMONIA"],
                                             save_format="jpeg",
                                             shuffle=False)

Found 624 images belonging to 2 classes.


In [13]:
def f1_score(y_true, y_pred):
    """
    #TN = Predicted normal and were normal - 113
    #TP = Predicted pneumonia and were pneumonia - 382
    #FN = Predicted normal and were pneumonia - 8
    #FP = Predicted pneumonia and were normal - 121

    Precision = TP / (TP + FP)
    Recall = TP / (TP + FN)
    F1 Score = 2 * (Precision * Recall) / (Precision + Recall)
    """
    
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2 * (precision * recall) / (precision + recall + K.epsilon())
    return f1_val

In [14]:
def build_model(hp):
    """
    Building a model to optimize hypter parameters.
    """
    model = Sequential()
    model.add(Dropout(0.1,
                      input_shape=(256, 256, 1)))
    model.add(Conv2D(filters=hp.Int("conv_1_filter",
                                    min_value=8,
                                    max_value=128,
                                    step=16),
                     kernel_size=hp.Choice("conv_1_kernel", values = [3,5]),
                     kernel_constraint=MaxNorm(3),
                     activation="relu"))
    model.add(MaxPool2D(pool_size=2))
    model.add(Dropout(0.5))
    model.add(Conv2D(filters=hp.Int("conv_1_filter",
                                    min_value=8,
                                    max_value=128,
                                    step=16),
                     kernel_size=hp.Choice("conv_1_kernel", values = [3,5]),
                     kernel_constraint=MaxNorm(3),
                     activation="relu"))
    model.add(MaxPool2D(pool_size=2))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(units=hp.Int("dense_2_units",
                                 min_value=8,
                                 max_value=256,
                                 step=32),
                    activation="relu",
                    kernel_regularizer=l2(hp.Float("l2_value_2", min_value=1e-5, max_value=1e-2, sampling="LOG")),
                    kernel_constraint=MaxNorm(3)))
    model.add(Dense(units=2, activation="softmax"))
    model.compile(optimizer=Adam(hp.Choice("learning_rate", values=[1e-2, 1e-3])),
                  loss="binary_crossentropy",
                  metrics=[tf.keras.metrics.Accuracy()])

    return model

In [15]:
earlystop = EarlyStopping(monitor="val_loss", patience = 4)

In [16]:
tuner_search = BayesianOptimization(build_model,
                                    objective=[Objective(tf.keras.metrics.Accuracy(), direction="max")],
                                    max_trials=10,
                                    executions_per_trial=5,
                                    directory="output",
                                    project_name=f"ChestXrayPneumoniaF1Score{version_num}")

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [17]:
tuner_search.search(train_generator, epochs=5, validation_data=val_generator)

Trial 2 Complete [00h 01m 52s]

Best multi_objective So Far: None
Total elapsed time: 00h 14m 34s

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
120               |88                |conv_1_filter
5                 |5                 |conv_1_kernel
168               |136               |dense_2_units
0.00053924        |0.001824          |l2_value_2
0.001             |0.01              |learning_rate

Epoch 1/5
 38/271 [===>..........................] - ETA: 16:24 - loss: 4.0948 - accuracy: 0.7451

KeyboardInterrupt: 

In [None]:
tuner_search.search_space_summary()

In [None]:
tuner_search.results_summary()

In [None]:
tuner_search.get_best_models()[0]

In [None]:
model = tuner_search.get_best_models()[0]

In [None]:
model.summary()

In [None]:
history = model.fit(train_generator, epochs=10, validation_data=val_generator, callbacks=[earlystop])

### Make a prediction

In [None]:
#Plot function
def plotImages(images_arr):
    """
    Plots images in a gird.
    """
    fig, axes, = plt.subplots(1, 10, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img, cmap='gray')
        ax.axis("off")
    plt.tight_layout()
    plt.show()

In [None]:
test_imgs, test_labels = next(test_generator)
plotImages(test_imgs)
print(test_labels)

In [None]:
predictions = model.predict(x=test_generator, verbose=0)

In [None]:
cm = confusion_matrix(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1))

In [None]:
#Confusion matrix plot function from TensorFlows website
def plot_confusion_matrix(cm, classes, normalize=False, title="Confusion Matrix", cmap=plt.cm.Blues):
    """
    A function to plot results in a confusion matrix.
    """
    plt.imshow(cm, interpolation="nearest", cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print("Confusion matrix, without normalization")

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i,j], horizontalalignment="center", color = "white" if cm[i,j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel("True label")
    plt.xlabel("Predicted label")

In [None]:
cm_plot_labels = ["NORMAL","PNEUMONIA"]
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title = "Confusion Matrix")

In [None]:
#Saving the model
if save_model == True:
    model.save(f"models/pneumonia_detection{version_num}.keras")
    
if save_weights == True:
    model.save_weights(f"models/pneumonia_detection_weights{version_num}.keras")

In [None]:
confusion_matrix(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1), normalize="all")

In [None]:
print(classification_report(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1), target_names = cm_plot_labels))

In [None]:
# Plot Training and Validation Accuracy
plt.figure(figsize=(14,4))
plt.subplot(1,2,1)
plt.plot(history.history["accuracy"], label="Training Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.title("Model Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

# Plot Training and Validation Loss
plt.subplot(1,2,2)
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.title("Model Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()