In [35]:
# Imports (Non exhaustive)
import numpy as np    
import pandas as pd
import matplotlib.pyplot as plt

# Tensorflow API new structure for Tensorflow v2.15.0
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.layers import RandomFlip, RandomZoom, RandomRotation, Rescaling
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [36]:
# Directories definition
train_path = "C:/Users/user/Desktop/Chest Xray Classifier/dataset/train"
test_path = "C:/Users/user/Desktop/Chest Xray Classifier/dataset/test"
val_path = "C:/Users/user/Desktop/Chest Xray Classifier/dataset/val"

# Basic parameters (image dimension and batch size)
batch_size = 16
img_height = 500
img_width = 500

In [37]:
# Data Preparation

# Data Augmentation
data_augmentation = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Load data
train = data_augmentation.flow_from_directory(train_path,
                                                    image_size=(img_height, img_width),
                                                    color_mode='grayscale',
                                                    batch_size=batch_size)

test = tf.keras.utils.image_dataset_from_directory(test_path,
                                                   image_size=(img_height, img_width),
                                                   color_mode='grayscale',
                                                   shuffle=False, 
                                                   label_mode='binary',
                                                   batch_size=batch_size)

valid = tf.keras.utils.image_dataset_from_directory(val_path,
                                                    image_size=(img_height, img_width),
                                                    color_mode='grayscale',
                                                    label_mode='binary',
                                                    batch_size=batch_size)


Found 4192 files belonging to 2 classes.
Found 624 files belonging to 2 classes.
Found 1040 files belonging to 2 classes.


In [38]:
# Model Architecture (Input->CNN->Flat->ANN->Output)
cnn = Sequential()

# Convolution and Pooling Layers
cnn.add(Conv2D(32, (3, 3), activation="relu", input_shape=(img_width, img_height, 1)))
cnn.add(MaxPooling2D(pool_size=(2, 2)))
cnn.add(Conv2D(32, (3, 3), activation="relu"))
cnn.add(MaxPooling2D(pool_size=(2, 2)))
cnn.add(Conv2D(64, (3, 3), activation="relu"))
cnn.add(MaxPooling2D(pool_size=(2, 2)))
cnn.add(Conv2D(64, (3, 3), activation="relu"))
cnn.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten 
cnn.add(Flatten())

# Fully-connected Neural Network Layers
cnn.add(Dense(activation='relu', units = 128))
cnn.add(Dense(activation='relu', units = 64))
cnn.add(Dense(activation='sigmoid', units = 1))

cnn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Regularization callbacks
early = EarlyStopping(monitor="val_loss", mode="min", patience=1) # Patience: no. of epochs to run after monitored parameter stops changing
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, factor=0.3, min_lr=0.00001)
callbacks_ls = [early, learning_rate_reduction] 

# Class weights for unbalanced datasets
# Assign higher weights to the minority class, reduce bias towards majority class
# Calculate proportion, invert it as a counter-bias
# from sklearn.utils.class_weight import compute_class_weight
# weights = compute_class_weight('balanced', np.unique(train.class_names))
# class_weight_val = dict(zip(np.unique(train.class_names), weights))
# #print(class_weight_val)

In [40]:
# Train model (run this cell to train)
# cnn.fit(train, epochs=30, validation_data=valid, class_weight=class_weight_val, callbacks=callbacks_ls)
cnn.fit(train, epochs=6, validation_data=valid, callbacks=callbacks_ls)
# Training metric visualization
pd.DataFrame(cnn.history.history).plot()

Epoch 1/6


Epoch 2/6

KeyboardInterrupt: 

In [34]:
# Model evaluation
test_model = cnn.evaluate(test)
test_accu = test_model[1] * 100
print('Test Accuracy: ', test_accu, '%')

# Assign binary classes to prediction output (Sigmoid: 0 < output < 1)
prediction = cnn.predict(test, verbose=1)
prediction_bin = prediction.copy()
prediction_bin[prediction_bin <=0.5 ] = 0
prediction_bin[prediction > 0.5] = 1

# Confusion matrix
# from sklearn.metrics import classification_report, confusion_matrix
# con_mat = pd.DataFrame(data=confusion_matrix(test.class_names, prediction_bin, labels=[0, 1]),
#                        index=["Actual Normal", "Actual Pneumonia"],
#                        columns=["Predicted Normal", "Predicted Pneumonia"])
# print(classification_report(y_true=test.class_names, y_pred=prediction_bin, target_names=['NORMAL','PNEUMONIA']))

Test Accuracy:  85.89743375778198 %
