<a href="https://colab.research.google.com/github/LucasDamascenoS/DeepLearning/blob/main/Covid19_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Implementation of the paper **CoroNet: A deep neural network for detection and diagnosis of COVID-19 from chest x-ray images**

The dataset used and CNN models can be found at: https://nextcloud.lasseufpa.org/s/JrxKko4NG5mFg2x

- If running on Google Colab, run the 2 cells below

In [None]:
# mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# unzip dataset and models
# specify the path on your Drive where the dataset.zip and models.zip is saved
!unzip /content/drive/MyDrive/Colab\ Notebooks/Master\'s/Computational\ Intelligence/dataset.zip
!unzip /content/drive/MyDrive/Colab\ Notebooks/Master\'s/Computational\ Intelligence/models.zip

In [None]:
# import libraries and packages
import cv2, os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.font_manager

from os import listdir
from numpy.random import seed
from skimage import transform

from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

seed(8)
tf.random.set_seed(7)

**Crete Data Generators to preprocess and prepare training and validation**

In [None]:
# load the specific folder from the dataset and .hs file corresponding to the number of classes that you want to classify

# 0 - 2 classes: covid and normal
# 1 - 3 classes: covid, normal and pneumonia
# 2 - 4 classes: covid, normal, pneumonia bacterial and pneumonia viral
numberClasses = 0

# parameter to specify if running on Google Colab or Local
# 0 - running Local
# 1 - running on Google Colab
runColab = 0

if numberClasses == 0 and runColab == 0:
    DATASET_PATH = './dataset/two_classes'
    TRAIN_PATH = './dataset/two_classes/train'
    TEST_PATH = './dataset/two_classes/test'
    MODEL_PATH = './models/two_classes.h5'
    MODEL_NAME = 'two_classes.h5'
    CLASS_MODE = 'categorical'
    LOSS = 'categorical_crossentropy'
    ACTIVATION = 'softmax'
    LABELS = ["covid", "normal"]

elif numberClasses == 1 and runColab == 0:
    DATASET_PATH = './dataset/three_classes'
    TRAIN_PATH = './dataset/three_classes/train'
    TEST_PATH = './dataset/three_classes/test'
    MODEL_PATH = './models/three_classes.h5'
    MODEL_NAME = 'three_classes.h5'
    CLASS_MODE = 'categorical'
    LOSS = 'categorical_crossentropy'
    ACTIVATION = 'softmax'
    LABELS = ["covid", "normal", "pneumonia"]

elif numberClasses == 2 and runColab == 0:
    DATASET_PATH = './dataset/four_classes'
    TRAIN_PATH = './dataset/four_classes/train'
    TEST_PATH = './dataset/four_classes/test'
    MODEL_PATH = './models/four_classes.h5'
    MODEL_NAME = 'four_classes.h5'
    CLASS_MODE = 'categorical'
    LOSS = 'categorical_crossentropy'
    ACTIVATION = 'softmax'
    LABELS = ["covid", "normal", "pneumonia_bac", "pneumonia_vir"]

elif numberClasses == 0 and runColab == 1:
    DATASET_PATH = '/content/dataset/two_classes'
    TRAIN_PATH = '/content/dataset/two_classes/train'
    TEST_PATH = '/content/dataset/two_classes/test'
    MODEL_PATH = '/content/models/two_classes.h5'
    MODEL_NAME = 'two_classes.h5'
    CLASS_MODE = 'categorical'
    LOSS = 'categorical_crossentropy'
    ACTIVATION = 'softmax'
    LABELS = ["covid", "normal"]

elif numberClasses == 1 and runColab == 1:
    DATASET_PATH = '/content/dataset/three_classes'
    TRAIN_PATH = '/content/dataset/three_classes/train'
    TEST_PATH = '/content/dataset/three_classes/test'
    MODEL_PATH = '/content/models/three_classes.h5'
    MODEL_NAME = 'three_classes.h5'
    CLASS_MODE = 'categorical'
    LOSS = 'categorical_crossentropy'
    ACTIVATION = 'softmax'
    LABELS = ["covid", "normal", "pneumonia"]

elif numberClasses == 2 and runColab == 1:
    DATASET_PATH = '/content/dataset/four_classes'
    TRAIN_PATH = '/content/dataset/four_classes/train'
    TEST_PATH = '/content/dataset/four_classes/test'
    MODEL_PATH = '/content/models/four_classes.h5'
    MODEL_NAME = 'four_classes.h5'
    CLASS_MODE = 'categorical'
    LOSS = 'categorical_crossentropy'
    ACTIVATION = 'softmax'
    LABELS = ["covid", "normal", "pneumonia_bac", "pneumonia_vir"]

In [None]:
IMAGE_SIZE    = (150, 150)
NUM_CLASSES   = len(listdir(TRAIN_PATH))
BATCH_SIZE    = 10
NUM_EPOCHS    = 10
LEARNING_RATE =0.0001

# train datagen here is a preprocessor
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=50,
                                   featurewise_center = True,
                                   featurewise_std_normalization = True,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.25,
                                   zoom_range=0.1,
                                   zca_whitening = True,
                                   channel_shift_range = 20,
                                   horizontal_flip = True ,
                                   vertical_flip = True ,
                                   validation_split = 0.2,
                                   fill_mode='constant')

train_batches = train_datagen.flow_from_directory(TRAIN_PATH,
                                                  target_size=IMAGE_SIZE,
                                                  shuffle=True,
                                                  batch_size=BATCH_SIZE,
                                                  subset = "training",
                                                  seed=42,
                                                  class_mode=CLASS_MODE
                                                  )

valid_batches = train_datagen.flow_from_directory(TRAIN_PATH,
                                                  target_size=IMAGE_SIZE,
                                                  shuffle=True,
                                                  batch_size=BATCH_SIZE,
                                                  subset = "validation",
                                                  seed=42,
                                                  class_mode=CLASS_MODE
                                                  )

**CNN Model**

In [None]:
# this cell will create a CNN model
# if you already have a model, skip the next 4 cells and run the next one
conv_base = Xception(weights='imagenet',
                     include_top=False,
                     input_shape=(150, 150, 3)
                     )

conv_base.trainable = True

model = Sequential()
model.add(conv_base)
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dense(NUM_CLASSES, activation=ACTIVATION))

model.compile(loss=LOSS,
              optimizer=Adam(learning_rate=LEARNING_RATE),
              metrics=['acc']
              )

model.summary()

In [None]:
# training the model
print(len(train_batches))
print(len(valid_batches))

STEP_SIZE_TRAIN=train_batches.n//train_batches.batch_size
STEP_SIZE_VALID=valid_batches.n//valid_batches.batch_size

result=model.fit(train_batches,
                 steps_per_epoch =STEP_SIZE_TRAIN,
                 validation_data = valid_batches,
                 validation_steps = STEP_SIZE_VALID,
                 epochs= NUM_EPOCHS,
                )

In [None]:
# plot the accuracy and loss graphs
def plot_acc_loss(result, epochs):
  acc = result.history['acc']
  loss = result.history['loss']
  val_acc = result.history['val_acc']
  val_loss = result.history['val_loss']
  plt.figure(figsize=(15, 5))
  plt.subplot(121)
  plt.plot(range(1,epochs), acc[1:], label='Train_acc')
  plt.plot(range(1,epochs), val_acc[1:], label='Val_acc')
  plt.title('Accuracy over ' + str(epochs) + ' Epochs', size=15)
  plt.legend()
  plt.grid(True)
  plt.subplot(122)
  plt.plot(range(1,epochs), loss[1:], label='Train_loss')
  plt.plot(range(1,epochs), val_loss[1:], label='Val_loss')
  plt.title('Loss over ' + str(epochs) + ' Epochs', size=15)
  plt.legend()
  plt.grid(True)
  plt.show()
    
plot_acc_loss(result, NUM_EPOCHS)

In [None]:
# save the trained model
model.save(MODEL_NAME)

In [None]:
# load saved model from .hs file
model = load_model(MODEL_PATH)
print(model.summary())

**Crete Data Generators to test and evaluate**

In [None]:
test_datagen = ImageDataGenerator(rescale=1. / 255)

eval_generator = test_datagen.flow_from_directory(TEST_PATH,
                                                  target_size=IMAGE_SIZE,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  seed=42,
                                                  class_mode=CLASS_MODE
                                                  )

eval_generator.reset()

In [None]:
# evalute the trained model on evaluate generator
eval_generator.reset()  

x = model.evaluate_generator(eval_generator,
                             steps = np.ceil(len(eval_generator)),
                             use_multiprocessing = False,
                             verbose = 1,
                             workers=1,
                             )

print('Test loss:' , x[0])
print('Test accuracy:',x[1])

**Crete Data Generators on single folder/class from test and predict**

In [None]:
test_datagen = ImageDataGenerator(rescale=1. / 255)

pred_generator = test_datagen.flow_from_directory(TEST_PATH,
                                                  target_size=IMAGE_SIZE,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  seed=42,
                                                  class_mode=CLASS_MODE
                                                  )

pred_generator.reset()   

count=[0]*NUM_CLASSES
files=pred_generator.filenames

for i in range(len(files)):
  x,y = pred_generator.next()
  img = x
  predict=model.predict(img)
  
  p=np.argmax(predict, axis=-1)
  print(str(p[0])+" "+files[pred_generator.batch_index-1])
  count[p[0]]+=1 

print(count)

In [None]:
filenames = eval_generator.filenames
nb_samples = len(filenames)
eval_generator.reset()  
predict = model.predict_generator(eval_generator,steps = np.ceil(len(eval_generator)))
pp=predict
predict=np.argmax(predict, axis=-1)
classes= eval_generator.classes[eval_generator.index_array]
acc=sum(predict==classes)/len(predict)
names=LABELS

font = {
    'family': 'DeJavu Serif',
    'serif': 'Times New Roman',
    'size': 12
    }

plt.rc('font', **font)

cm = confusion_matrix(classes, predict)

print(cm)
print(classification_report(classes,predict))

plt.imshow(cm, cmap=plt.cm.Blues)
plt.xlabel('Predicted labels \nAccuracy: {:0.2f}'.format(acc*100))
plt.ylabel("True labels")
plt.xticks(classes, [])
plt.yticks(classes, [])
plt.title('Confusion matrix ')
plt.colorbar()
plt.show()

**Evaluate the model on a single image**

In [None]:
# use one of the following img_path to evaluate the model on a single image
# you can use any of the images on the dataset, these are only examples

# for 2 classes: covid and normal
img_path = TEST_PATH + '/covid/covid_13.jpeg'
#img_path = TEST_PATH + '/normal/normal_192.jpeg'

# for 3 classes: covid, normal and pneumonia
#img_path = TEST_PATH + '/covid/covid_13.jpeg'
#img_path = TEST_PATH + '/normal/normal_192.jpeg'
#img_path = TEST_PATH + '/pneumonia/pneumonia_196.jpeg'

# for 4 classes: covid, normal, pneumonia bacterial and pneumonia viral
#img_path = TEST_PATH + '/covid/covid_13.jpeg'
#img_path = TEST_PATH + '/normal/normal_192.jpeg'
#img_path = TEST_PATH + '/pneumonia_bacterial/pneumonia_bacterial_104.jpeg'
#img_path = TEST_PATH + '/pneumonia_viral/pneumonia_viral_79.jpeg'

img_r = cv2.imread(img_path)
img1 = np.array(img_r).astype('float32')/255
img2 = transform.resize(img1, (150, 150, 3))
img = np.expand_dims(img2, axis=0)

r=model.predict(img)

scores=r
print(scores)

font = {
    'family': 'DeJavu Serif',
    'serif': 'Times New Roman',
    'size': 9
    }

plt.rc('font', **font)

if numberClasses == 0:
    plt.title(names[0] +" "+ str(round(scores[0][0]*100,1))+"%"+"\n"+names[1] +" "+ str(round(scores[0][1]*100,1)))
elif numberClasses == 1:
    plt.title(names[0] +" "+ str(round(scores[0][0]*100,1))+"%"+"\n"+names[1] +" "+ str(round(scores[0][1]*100,1))+"%"+"\n"+names[2] +" "+ str(round(scores[0][2]*100,1)))
else:
    plt.title(names[0] +" "+ str(round(scores[0][0]*100,1))+"%"+"\n"+names[1] +" "+ str(round(scores[0][1]*100,1))+"%"+"\n"+names[2] +" "+ str(round(scores[0][2]*100,1))+"%"+"\n"+names[3] +" "+ str(round(scores[0][3]*100,1))+"%")
plt.imshow(img2)