This notebook does not prep the data. Train/val/test split is very unbalanced. 

Checking origional data from: https://data.mendeley.com/datasets/rscbjbr9sj/2

In [1]:
#Imports
import numpy as np
import pandas as pd
import os
import shutil
import glob
import itertools
import random
import matplotlib.pyplot as plt
import warnings
import tensorflow as tf
from tensorflow import keras
from keras.callbacks import EarlyStopping
from keras.regularizers import l2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import keras_tuner
from keras_tuner import RandomSearch, GridSearch
from sklearn.metrics import confusion_matrix, classification_report
from PIL import Image

In [2]:
#Preferences
warnings.simplefilter(action="ignore", category=FutureWarning)
%matplotlib inline

In [3]:
save_model = True
save_weights = True

In [4]:
#Path variables
train_path = "data/train/"
val_path = "data/val/"
test_path = "data/test/"

In [5]:
path_list = [train_path, val_path, test_path]

In [6]:
datagen = ImageDataGenerator(rescale=1./255)

In [7]:
train_generator = datagen.flow_from_directory(directory=train_path,
                                              target_size=(256,256),
                                              batch_size=16,
                                              color_mode="grayscale",
                                              classes=["NORMAL", "viral_pneumonia", "bacterial_pneumonia"],
                                              save_format="jpeg")

Found 4877 images belonging to 3 classes.


In [8]:
val_generator = datagen.flow_from_directory(directory=val_path,
                                            target_size=(256,256),
                                            batch_size=16,
                                            color_mode="grayscale",
                                            classes=["NORMAL", "viral_pneumonia", "bacterial_pneumonia"],
                                            save_format="jpeg")

Found 355 images belonging to 3 classes.


In [9]:
test_generator = datagen.flow_from_directory(directory=test_path,
                                             target_size=(256,256),
                                             batch_size=16,
                                             color_mode="grayscale",
                                             classes=["NORMAL", "viral_pneumonia", "bacterial_pneumonia"],
                                             save_format="jpeg",
                                             shuffle=False)

Found 624 images belonging to 3 classes.


In [10]:
img, labels = next(train_generator)

In [11]:
def build_model(hp):
    """
    Building a model to optimize hypter parameters.
    """
    model = Sequential()
    model.add(Conv2D(filters=hp.Int("conv_1_filter",
                                    min_value=8,
                                    max_value=256,
                                    step=16),
                     kernel_size=hp.Choice("conv_1_kernel", values = [3,5]),
                     activation="relu",
                     input_shape=(256,256,1)))
    model.add(MaxPool2D(pool_size=2))
    model.add(Dropout(hp.Float("dropout_1", min_value=0.0,max_value=0.8, step=0.1)))
    model.add(Conv2D(filters=hp.Int("conv_2_filter",
                                    min_value=16,
                                    max_value=256,
                                    step=16),
                     kernel_size=hp.Choice("conv_2_kernel", values = [3,5]),
                     activation="relu"))
    model.add(MaxPool2D(pool_size=2))
    model.add(Dropout(hp.Float("dropout_2", min_value=0.0,max_value=0.8, step=0.1)))
    model.add(Flatten())
    model.add(Dense(units=hp.Int("dense_1_units",
                                 min_value=16,
                                 max_value=256,
                                 step=16),
                    activation="relu", kernel_regularizer=l2(hp.Float("l2_value", min_value=1e-5, max_value=1e-2, sampling="LOG"))))
    model.add(Dropout(hp.Float("dropout_1", min_value=0.0,max_value=0.8, step=0.1)))
    model.add(Dense(units=3, activation="softmax"))
    model.compile(optimizer=Adam(hp.Choice("learning_rate", values=[1e-2, 1e-3])),
                  loss="categorical_crossentropy",
                  metrics=["accuracy", tf.keras.metrics.Recall()])

    return model

SyntaxError: '(' was never closed (1961154190.py, line 24)

In [None]:
earlystop = EarlyStopping(monitor="val_loss", patience = 3)

In [None]:
tuner_search = GridSearch(build_model,
                          objective="val_accuracy",
                          max_trials=20,
                          executions_per_trial=3,
                          directory="output",
                          project_name="ChestXrayPneumoniaAccuracy6")

In [None]:
tuner_search.search(train_generator, epochs=3, validation_data=val_generator)

In [None]:
tuner_search.search_space_summary()

In [None]:
tuner_search.results_summary()

In [None]:
model = tuner_search.get_best_models(num_models=1)[0]

In [None]:
model.summary()

In [None]:
model.fit(train_generator, epochs=10, validation_data=val_generator, callbacks=[earlystop])

### Make a prediction

In [None]:
#Plot function
def plotImages(images_arr):
    """
    Plots images in a gird.
    """
    fig, axes, = plt.subplots(1, 10, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img, cmap='gray')
        ax.axis("off")
    plt.tight_layout()
    plt.show()

In [None]:
test_imgs, test_labels = next(test_generator)
plotImages(test_imgs)
print(test_labels)

In [None]:
predictions = model.predict(x=test_generator, verbose=0)

In [None]:
cm = confusion_matrix(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1))

In [None]:
#Confusion matrix plot function from TensorFlows website
def plot_confusion_matrix(cm, classes, normalize=False, title="Confusion Matrix", cmap=plt.cm.Blues):
    """
    A function to plot results in a confusion matrix.
    """
    plt.imshow(cm, interpolation="nearest", cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print("Confusion matrix, without normalization")

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i,j], horizontalalignment="center", color = "white" if cm[i,j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel("True label")
    plt.xlabel("Predicted label")

In [None]:
cm_plot_labels = ["Normal","viral_pneumonia", "bacterial_pneumonia"]
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title = "Confusion Matrix")

In [None]:
#Saving the model
if save_model == True:
    model.save("models/pneumonia_detection.h5")
    
if save_weights == True:
    model.save_weights("models/pneumonia_detection_weights.h5")

In [None]:
confusion_matrix(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1), normalize="all")

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1), target_names = cm_plot_labels))