<h1>Inicializations</h1>

In [None]:
BASEDIR = '.'
TRAINDATAPATH = 'aug_dataset_covid/train'
TESTDATAPATH = 'aug_dataset_covid/test'
VALIDDATAPATH = 'aug_dataset_covid/valid'

Make necessary imports

In [None]:
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten,Conv2D,\
                                    MaxPooling2D,BatchNormalization,\
                                    MaxPooling2D,AveragePooling2D,\
                                    GlobalMaxPooling2D,GlobalAveragePooling2D,\
                                    Concatenate,Input,ZeroPadding2D,Reshape
                                    
from keras.layers.merge import concatenate
import numpy as np
import time
import os

from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.callbacks import TensorBoard

from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam 
from tensorflow.keras.applications import VGG16, VGG19
from tensorflow.keras import optimizers

<b>Load for the first time the dataset</b>


We first begin by defining the preprocesses that our data is going to suffer, in this case it suffers the recomended preprocess by VGG as well as the possibility of beeing introduced some small alterations on the original image so our model generalzes better during training (small rotations, shears, zooming and mirroring)

We make use of the flow_from_directory method available to the ImageDataGenerator object we created. This function will look at the given directory and separate the images in 2 classes based on our subdirectories.

The images are given on 224 by 224, since its the recomended size to feed to a VGG Net model.

Since the generator is an iterator, the batch size decides how many elements are to be "returned" on each next() call.

We also shuffle the data to avoid situations where the model receives to much of the same consecutive class, and ends up not generalizing but instead memorizing.

Note: the validation is composed of 20% of the original training dataset

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255)

train_generator=train_datagen.flow_from_directory(TRAINDATAPATH, 
                                                 target_size=(224,224),
                                                 batch_size=1,
                                                 class_mode="binary",
                                                 shuffle=True,
                                                  seed=42)


valid_generator=train_datagen.flow_from_directory(VALIDDATAPATH, 
                                                 target_size=(224,224),
                                                 batch_size=1,
                                                 class_mode="binary",
                                                 shuffle=True,
                                                  seed=42)


eval_generator = train_datagen.flow_from_directory(TESTDATAPATH,
                                                  target_size=(224,224),
                                                  batch_size=1,
                                                  shuffle=False,
                                                  seed=42,
                                                  class_mode="binary")

In [None]:
print(eval_generator.class_indices)

Function whose purpose is to plot the values of the train and test accuracies according to the epoch

In [None]:
import matplotlib.pyplot as plt


def plot_acc_loss(result, epochs):
    acc = result.history['accuracy']
    loss = result.history['loss']
    val_acc = result.history['val_accuracy']
    val_loss = result.history['val_loss']
    plt.figure(figsize=(15, 5))
    plt.subplot(121)
    plt.plot(range(1,epochs), acc[1:], label='Train_acc')
    plt.plot(range(1,epochs), val_acc[1:], label='Test_acc')
    plt.title('Accuracy over ' + str(epochs) + ' Epochs', size=15)
    plt.legend()
    plt.grid(True)
    plt.subplot(122)
    plt.plot(range(1,epochs), loss[1:], label='Train_loss')
    plt.plot(range(1,epochs), val_loss[1:], label='Test_loss')
    plt.title('Loss over ' + str(epochs) + ' Epochs', size=15)
    plt.legend()
    plt.grid(True)
    plt.show()

Now that we verified that some results are obtained from the last train, we proceed to find the best hyper parameters:

The followed parameters where testes for the following values:

Number of dense layers : 1, 2, 3, 4

Number of nodes per dense layer: 16, 32, 64, 128, 256

Value of dropout: 0, 0.2, 0.5

Batch Size: 1, 2, 3

Number of epochs trained: 10. 20. 30

In [None]:
from tensorflow.keras.applications.resnet import preprocess_input
import os


NUMBER_DENSE = [1,2]
DENSE_NODES = [128,256,512]
DROPOUT = [0,0.25,0.5]
BATCH_SIZE = [2]
EPOCHS = [10]


for denseNumber in NUMBER_DENSE:
  for denseNodes in DENSE_NODES:
    for dropoutNumber in DROPOUT:
      for batchSize in BATCH_SIZE:
        for epochNumber in EPOCHS:
        

          print("denseNumber:\t",denseNumber)
          print("denseNodes:\t",denseNodes)
          print("dropoutNumber:\t",dropoutNumber)
          print("batchSize:\t",batchSize)
          print("epochNumber:\t",epochNumber)

          nOfNew = -2 
          base_model=VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(224, 224, 3))
          base_model.trainable = False

          x=base_model.output
          x=Flatten()(x)
          

          for i in range(denseNumber):
            print(denseNodes)
            x=Dense(denseNodes,activation='relu')(x) 
            x=Dropout(dropoutNumber)(x) 

          preds=Dense(1,activation='sigmoid')(x)

          model=Model(inputs=base_model.input,outputs=preds)

          model.compile(loss='binary_crossentropy',optimizer=optimizers.Adam(lr=0.0005),metrics=['accuracy'])

          step_size_train=train_generator.n//train_generator.batch_size
          step_size_valid=valid_generator.n//valid_generator.batch_size
          history=model.fit_generator(train_generator,
                              steps_per_epoch =step_size_train,
                              validation_data = valid_generator,
                              validation_steps = step_size_valid,
                              epochs= epochNumber
                              )
          
          plot_acc_loss(history, epochNumber)
          
          eval_generator.reset() 
          x = model.evaluate_generator(eval_generator,
                           steps = np.ceil(len(eval_generator) / 1),
                           use_multiprocessing = False,
                           verbose = 1,
                           workers=1
                           )
          print('Test loss:' , x[0])
          print('Test accuracy:',x[1])



denseNumber:	 1<br>
denseNodes:	 128<br>
dropoutNumber:	 0.25<br>
batchSize:	 2<br>
epochNumber:	 10<br>
<br>
<b>Test loss:</b> 0.6569514274597168<br>
<b>Test accuracy:</b> 0.8125<br>

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255,rotation_range=30,shear_range=0.25,zoom_range=0.1, horizontal_flip = True)

train_generator=train_datagen.flow_from_directory(TRAINDATAPATH, 
                                                 target_size=(224,224),
                                                 batch_size=1,
                                                 class_mode="binary",
                                                 shuffle=True,
                                                  seed=42)


valid_generator=train_datagen.flow_from_directory(VALIDDATAPATH, 
                                                 target_size=(224,224),
                                                 batch_size=1,
                                                 class_mode="binary",
                                                 shuffle=True,
                                                  seed=42)


eval_generator = train_datagen.flow_from_directory(TESTDATAPATH,
                                                  target_size=(224,224),
                                                  batch_size=1,
                                                  shuffle=False,
                                                  seed=42,
                                                  class_mode="binary")

In [None]:
denseNumber = 1
denseNodes = 128
dropoutNumber = 0.3
batchSize = 2
epochNumber = 20

base_model=VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(224, 224, 3))
base_model.trainable = False

x=base_model.output
x=Flatten()(x)


for i in range(denseNumber):
  print(denseNodes)
  x=Dense(denseNodes,activation='relu')(x) 
  x=Dropout(dropoutNumber)(x) 

preds=Dense(1,activation='sigmoid')(x)

model=Model(inputs=base_model.input,outputs=preds)

model.compile(loss='binary_crossentropy',optimizer=optimizers.Adam(lr=0.0003),metrics=['accuracy',tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
step_size_train=train_generator.n//train_generator.batch_size
step_size_valid=valid_generator.n//valid_generator.batch_size
history=model.fit_generator(train_generator,
                    steps_per_epoch =step_size_train,
                    validation_data = valid_generator,
                    validation_steps = step_size_valid,
                    epochs= epochNumber
                    )

In [None]:
   plot_acc_loss(history, epochNumber)

We proceed to test the model against the test set (20% of the original dataset)

In [None]:
eval_generator.reset() 
x= model.evaluate_generator(eval_generator,
                  steps = np.ceil(len(eval_generator) / 1),
                  use_multiprocessing = False,
                  verbose = 1,
                  workers=1
                  )
print('Test loss:' , x[0])
print('Test accuracy:',x[1])
print('Test precision:', x[2])
print('Test Recall:',x[3])
print('Test F1 Score: ',2*(x[2]*x[3])/(x[2]+x[3]))

In [None]:
eval_generator.reset()  
pred = model.predict_generator(eval_generator,steps = np.ceil(len(eval_generator) / 1),verbose=1)
print("Predictions finished")

Display Confusion Matrix

In [None]:
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
trueP= x[4]
trueN = x[5]
falseP = x[6]
falseN = x[7]
array = [[trueP,falseP],
         [falseN,trueN]]

array = [[trueP,falseP], [falseN,trueN]]
df_cm = pd.DataFrame(array, ["covid19","non-covid19"], ["covid19","non-covid19"])
sn.set(font_scale=1.4)

sn.heatmap(df_cm, annot=True, annot_kws={"size": 16},cmap="Blues") 

We finalize by visualizing the predictions of the model

In [None]:
import cv2

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

for index, probability in enumerate(pred):
    image_path = TESTDATAPATH + "/" +eval_generator.filenames[index]
    image = mpimg.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    pixels = np.array(image)
    plt.imshow(pixels)
    
    print(eval_generator.filenames[index])
    if probability > 0.5:
        plt.title("%.2f" % (probability[0]*100) + "% Non-COVID19")
    else:
        plt.title("%.2f" % ((1-probability[0])*100) + "% COVID19")
    plt.show()