# CNN for automatic detection of pneumonia from chest X-ray images.

Pneumonia is a respiratory infection caused by bacteria or viruses; it affects many people, especially in developing and underdeveloped countries with high levels of pollution. Pneumonia causes pleural effusion, which means that fluid fills the lung, leading to breathing difficulties. Early diagnosis of pneumonia is crucial to ensure curative treatment and increase survival rates. Chest X-rays are the most common method used to diagnose pneumonia. However, the examination of chest radiographs is a difficult task and is subject to subjective variability. In this brief, you will develop a computer-aided diagnostic system for automatic detection of pneumonia from chest X-ray images.

# CNN Keras

- https://victorzhou.com/blog/intro-to-cnns-part-1/
- https://victorzhou.com/blog/keras-cnn-tutorial/
- https://www.tensorflow.org/tutorials/images/cnn


---

## Importing libraries

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPooling2D, Dropout, BatchNormalization, Flatten, Dense
from keras.models import Sequential, load_model
from keras.metrics import Precision, Recall
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# from keras.utils import plot_model

from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix

---

## Defining paths

In [None]:
# define the path for each data set
# train

train_path = "chest_xray/train/"

# val

val_path = "chest_xray/val/"

# test

test_path = "chest_xray/test/"

---

## Defining image size and batch size

In [None]:
# Defining the size of the image

img_width, img_height = 192, 192

batch_size = 32

---

# Defining ImageDataGenerator

In [None]:
# Create Image Data Generator for Train Set
image_gen = ImageDataGenerator(
  rescale = 1./255,
  shear_range = 0.2,
  zoom_range = 0.2,
  horizontal_flip = True
)

# Create Image Data Generator for Test/Validation Set
test_data_gen = ImageDataGenerator(rescale = 1./255)

---

## TRAIN

In [None]:
# Preparing train data
train = image_gen.flow_from_directory(
    train_path,
    target_size = (img_height, img_width),
    color_mode = 'grayscale',
    class_mode = 'binary',
    batch_size = batch_size
)

---

## TEST

In [None]:
# Preparing test data
test = test_data_gen.flow_from_directory(
    test_path,
    target_size = (img_height, img_width),
    color_mode = 'grayscale',
    shuffle = False, 
    class_mode = 'binary',
    batch_size = batch_size,
)

---

## VAL

In [None]:
# Preparing val data
val = test_data_gen.flow_from_directory(
    val_path,
    target_size = (img_height, img_width),
    color_mode = 'grayscale',
    shuffle = False, 
    class_mode = 'binary',
    batch_size = batch_size,
)

---

## Viewing some images

In [None]:
labels = {0 : "NORMAL", 1 : "PNEUMONIA"}
plt.figure(figsize = (12, 12))

for i in range(0, 10):
    plt.subplot(2, 5, i + 1)
    
    for X_batch, Y_batch in train:
        image = X_batch[0]        
        plt.title(labels.get(Y_batch[0]))
        plt.axis("off")
        plt.imshow(np.squeeze(image), cmap = "gray", interpolation = "nearest")

plt.tight_layout()
plt.show()

---

## Creating Model

In [None]:
# CNN model

model = Sequential()


model.add(Conv2D(32, (3, 3), activation = "relu", padding = 'same', input_shape = (img_width, img_height, 1)))
model.add(MaxPooling2D(padding = 'same'))
model.add(Dropout(0.1))

model.add(Conv2D(32, (3, 3), activation = "relu", padding = 'same'))
model.add(MaxPooling2D(padding = 'same'))
model.add(Dropout(0.1))

model.add(Conv2D(32, (3, 3), activation = "relu", padding = 'same'))
model.add(MaxPooling2D(padding = 'same'))
model.add(Dropout(0.1))

model.add(Conv2D(64, (3, 3), activation = "relu", padding = 'same'))
model.add(MaxPooling2D(padding = 'same'))
model.add(Dropout(0.1))

model.add(Conv2D(64, (3, 3), activation = "relu", padding = 'same'))
model.add(Dropout(0.1))


model.add(Flatten())

model.add(Dense(128, activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Dense(64, activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Dense(1, activation = 'sigmoid'))


model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy', Precision(name = 'precision'), Recall(name = 'recall')])

---

## Model Summary

In [None]:
# Display a summary of the model
model.summary()

# Require libraries installation
# plot_model(model,show_shapes=True, show_layer_names=True, rankdir='TB', expand_nested=True)

---

## Creating callbacks

In [None]:
# Creating callbacks
early = EarlyStopping(
    verbose = 1, 
    monitor = 'val_accuracy', 
    mode = 'min',
    patience = 10
)

LR_reduction = ReduceLROnPlateau(
    verbose = 1, 
    monitor = 'val_loss', 
    mode = 'min',
    factor = 0.2, 
    min_lr = 0.000001,
    patience = 3
)

checkpoint = ModelCheckpoint(
    '/content/drive/MyDrive/Colab Notebooks/Models/THE_BEST_OF_THE_BEST.hdf5', 
    verbose = 1, 
    monitor = 'val_accuracy', 
    mode = 'min',
    save_best_only = True
)


callbacks_list = [early, LR_reduction, checkpoint]

---

## Setting weights

In [None]:
weights = compute_class_weight(
    class_weight = 'balanced', 
    classes = np.unique(train.classes), 
    y = train.classes
)

cw = dict(zip(np.unique(train.classes), weights))

print(cw)

---

## Model training

In [None]:
# train the model
history = model.fit(
    train, 
    epochs = 50, 
    validation_data = val, 
    class_weight = cw, 
    callbacks = callbacks_list
)

---

## Plotting results

In [None]:
# summarize history for accuracy

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc = 'upper left')
plt.show()

# summarize history for loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc = 'upper left')
plt.show()

---

## Evaluation

In [None]:
# evaluate the model
model = load_model("C:/Users/antoi/Downloads/model.plk")

test_accu = model.evaluate(test)
print(f'The testing accuracy is : {round(test_accu[1]*100, 2)}%')

In [None]:
# display the confusion matrix
preds = model.predict(test, verbose = 1)

predictions = preds.copy()
predictions[predictions <= 0.5] = 0
predictions[predictions > 0.5] = 1


cm = pd.DataFrame(
    data = confusion_matrix(
        test.classes, 
        predictions, 
        labels = [0, 1]
    ),
    index = ["Actual Normal", "Actual Pneumonia"], 
    columns = ["Predicted Normal", "Predicted Pneumonia"]
)

sns.heatmap(cm, annot = True, fmt = "d")

In [None]:
print(classification_report(y_true = test.classes, y_pred = predictions, target_names = ['NORMAL','PNEUMONIA']))

---

## Visualizing some of the predicted images with percentage %

In [None]:
test.reset()
x = np.concatenate([test.next()[0] for i in range(test.__len__())])
y = np.concatenate([test.next()[1] for i in range(test.__len__())])

#this little code above extracts the images from test Data iterator without shuffling the sequence# x contains image array and y has labels = {0:'NORMAL', 1:'PNEUMONIA'}

labels = {0 : "NORMAL", 1 : "PNEUMONIA"}

plt.figure(figsize = (20, 20))

for i in range(0+228, 9+228):
  plt.subplot(3, 3, (i-228) + 1)

  if preds[i, 0] >= 0.5: 
    out = (f'{round(preds[i][0] * 100, 2)}% probability of being Pneumonia case')
  
  else: 
    out = (f'{round((1 - preds[i][0]) * 100, 2)}% probability of being Normal case')

  plt.title(f"{out}\nActual case : {labels.get(y[i])}")    
  plt.imshow(np.squeeze(x[i]))
  plt.axis('off')
  
plt.show()