Checking origional data from: https://data.mendeley.com/datasets/rscbjbr9sj/2

In [1]:
#Imports
from wrangle import *
import numpy as np
import pandas as pd
import os, shutil, glob, itertools, random
import matplotlib.pyplot as plt
import warnings
import tensorflow as tf
from tensorflow import keras
from keras.callbacks import EarlyStopping
from keras.regularizers import l2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import keras_tuner
from keras_tuner import RandomSearch, GridSearch
from sklearn.metrics import confusion_matrix, classification_report
from PIL import Image

In [2]:
#Preferences
warnings.simplefilter(action="ignore", category=FutureWarning)
%matplotlib inline

In [3]:
if not os.path.exists("data/chest_xray"):
    acquire_data()

In [4]:
if not os.path.exists("data/chest_xray/val"):
    prepare_data()

In [5]:
save_model = True
save_weights = True

In [6]:
#Path variables
train_path = "data/train/"
val_path = "data/val/"
test_path = "data/test/"

In [7]:
path_list = [train_path, val_path, test_path]

In [8]:
datagen = ImageDataGenerator(rescale=1./255)

In [9]:
train_generator = datagen.flow_from_directory(directory= train_path,
                                              target_size=(256,256),
                                              batch_size=16,
                                              color_mode="grayscale",
                                              classes=["normal", "viral_pneumonia", "bacterial_pneumonia"],
                                              save_format="jpeg")

Found 4332 images belonging to 3 classes.


In [10]:
val_generator = datagen.flow_from_directory(directory=val_path,
                                            target_size=(256,256),
                                            batch_size=16,
                                            color_mode="grayscale",
                                            classes=["normal", "viral_pneumonia", "bacterial_pneumonia"],
                                            save_format="jpeg")

Found 900 images belonging to 3 classes.


In [11]:
test_generator = datagen.flow_from_directory(directory=test_path,
                                             target_size=(256,256),
                                             batch_size=16,
                                             color_mode="grayscale",
                                             classes=["normal", "viral_pneumonia", "bacterial_pneumonia"],
                                             save_format="jpeg",
                                             shuffle=False)

Found 624 images belonging to 3 classes.


In [12]:
img, labels = next(train_generator)

In [13]:
def build_model(hp):
    """
    Building a model to optimize hypter parameters.
    """
    model = Sequential()
    model.add(Conv2D(filters=hp.Int("conv_1_filter",
                                    min_value=8,
                                    max_value=128,
                                    step=16),
                     kernel_size=hp.Choice("conv_1_kernel", values = [3,5]),
                     activation="relu",
                     input_shape=(256,256,1)))
    model.add(MaxPool2D(pool_size=2))
    model.add(Conv2D(filters=hp.Int("conv_2_filter",
                                    min_value=8,
                                    max_value=128,
                                    step=16),
                     kernel_size=hp.Choice("conv_2_kernel", values = [3,5]),
                     activation="relu"))
    model.add(MaxPool2D(pool_size=2))
    model.add(Conv2D(filters=hp.Int("conv_3_filter",
                                    min_value=8,
                                    max_value=128,
                                    step=16),
                     kernel_size=hp.Choice("conv_3_kernel", values = [3,5]),
                     activation = "relu"))
    model.add(MaxPool2D(pool_size=2))
    model.add(Conv2D(filters=hp.Int("conv_4_filter",
                                    min_value=8,
                                    max_value=128,
                                    step=16),
                     kernel_size=hp.Choice("conv_4_kernel", values = [3,5]),
                     activation="relu",
                     input_shape=(256,256,1)))
    model.add(MaxPool2D(pool_size=2))
    model.add(Conv2D(filters=hp.Int("conv_5_filter",
                                    min_value=8,
                                    max_value=128,
                                    step=16),
                     kernel_size=hp.Choice("conv_5_kernel", values = [3,5]),
                     activation="relu",
                     input_shape=(256,256,1)))
    model.add(MaxPool2D(pool_size=2))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(units=3, activation="softmax"))
    model.compile(optimizer=Adam(hp.Choice("learning_rate", values=[1e-2, 1e-3])),
                  loss="categorical_crossentropy",
                  metrics=["accuracy", tf.keras.metrics.Recall()])

    return model

In [14]:
earlystop = EarlyStopping(monitor="val_loss", patience = 4)

In [15]:
tuner_search = GridSearch(build_model,
                          objective="val_accuracy",
                          max_trials=20,
                          executions_per_trial=3,
                          directory="TriclassOutput",
                          project_name="ChestXrayTriClass0")

2023-08-10 09:45:57.922492: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
tuner_search.search(train_generator, epochs=3, validation_data=val_generator)

Trial 4 Complete [00h 32m 24s]
val_accuracy: 0.550000011920929

Best val_accuracy So Far: 0.5777777830759684
Total elapsed time: 02h 09m 12s

Search: Running Trial #5

Value             |Best Value So Far |Hyperparameter
8                 |8                 |conv_1_filter
3                 |3                 |conv_1_kernel
8                 |8                 |conv_2_filter
3                 |3                 |conv_2_kernel
8                 |8                 |conv_3_filter
3                 |3                 |conv_3_kernel
8                 |8                 |conv_4_filter
3                 |3                 |conv_4_kernel
24                |8                 |conv_5_filter
3                 |3                 |conv_5_kernel
0.01              |0.001             |learning_rate

Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3

KeyboardInterrupt: 

In [None]:
tuner_search.search_space_summary()

In [None]:
tuner_search.results_summary()

In [None]:
model = tuner_search.get_best_models(num_models=1)[0]

In [None]:
model.summary()

In [None]:
history = model.fit(train_generator, epochs=10, validation_data=val_generator, callbacks=[earlystop])

### Make a prediction

In [None]:
#Plot function
def plotImages(images_arr):
    """
    Plots images in a gird.
    """
    fig, axes, = plt.subplots(1, 10, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img, cmap='gray')
        ax.axis("off")
    plt.tight_layout()
    plt.show()

In [None]:
test_imgs, test_labels = next(test_generator)
plotImages(test_imgs)
print(test_labels)

In [None]:
predictions = model.predict(x=test_generator, verbose=0)

In [None]:
cm = confusion_matrix(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1))

In [None]:
#Confusion matrix plot function from TensorFlows website
def plot_confusion_matrix(cm, classes, normalize=False, title="Confusion Matrix", cmap=plt.cm.Blues):
    """
    A function to plot results in a confusion matrix.
    """
    plt.imshow(cm, interpolation="nearest", cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print("Confusion matrix, without normalization")

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i,j], horizontalalignment="center", color = "white" if cm[i,j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel("True label")
    plt.xlabel("Predicted label")

In [None]:
cm_plot_labels = ["Normal","viral_pneumonia", "bacterial_pneumonia"]
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title = "Confusion Matrix")

In [None]:
#Saving the model
if save_model == True:
    model.save("models/tri_pneumonia_detection.h5")
    
if save_weights == True:
    model.save_weights("models/tri_pneumonia_detection_weights.h5")

In [None]:
confusion_matrix(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1), normalize="all")

In [None]:
print(classification_report(y_true=test_generator.classes, y_pred=np.argmax(predictions, axis=-1), target_names = cm_plot_labels))

In [None]:
# Plot Training and Validation Accuracy
plt.figure(figsize=(14,4))
plt.subplot(1,2,1)
plt.plot(history.history["accuracy"], label="Training Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.title("Model Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

# Plot Training and Validation Loss
plt.subplot(1,2,2)
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.title("Model Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()