## 1. Import Required Packages <a class="anchor" id="1"></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import random
import itertools

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow.keras import *
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.layers.experimental.preprocessing import RandomFlip, RandomRotation

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

## 2. Data Preparation <a class="anchor" id="2"></a>

In [None]:
def import_image_to_array(directory_path, size):
    
    # Define empty lists to store the images array and their respective labels
    image_arrays = []
    
    directory = os.listdir(directory_path)

    for image_name in directory:
        
        # Load an image from the specified directory
        image = cv2.imread(directory_path + image_name)
            
        # Resize image to the specified size
        image = cv2.resize(image, size)

        # Update the image dataset and labels lists respectively
        image_arrays.append(image)
            
    return image_arrays

In [None]:
NORMAL_DIR = "/kaggle/input/covid-cxr-image-dataset-research/COVID_IEEE/normal/"  # Normal CXR Image Dataset
VIRAL_DIR = "/kaggle/input/covid-cxr-image-dataset-research/COVID_IEEE/virus/"    # Viral Pneumonia CXR Image Dataset
COVID_DIR = "/kaggle/input/covid-cxr-image-dataset-research/COVID_IEEE/covid/"    # COVID-19 CXR Image Dataset

normal_dataset = import_image_to_array(NORMAL_DIR, (224,224))
viral_dataset = import_image_to_array(VIRAL_DIR, (224,224))
covid_dataset = import_image_to_array(COVID_DIR, (224,224))


In [None]:
print("The Dataset which is made up of {} Image Arrays has: ".format(len(normal_dataset) + len(viral_dataset) + len(covid_dataset)))
print('{} Normal CXR Image Arrays'.format(len(normal_dataset)))
print('{} Viral Pneumonia CXR Image Arrays'.format(len(viral_dataset)))
print('{} COVID-19 CXR Image Arrays'.format(len(covid_dataset)))

In [None]:
class_names = {0: "Normal CXR", 1 : "Viral Pneumonia CXR", 2: "COVID-19 CXR"}

# Normal CXR Sampling
plt.figure(figsize=(10, 10))
for images in range(3):
    ax = plt.subplot(3, 3, images + 1)
    plt.imshow(normal_dataset[images], cmap = "gray")
    plt.title(class_names[0])
    plt.axis("off")
    
# Viral Pneumonia CXR Sampling
plt.figure(figsize=(10, 10))
for images in range(3):
    ax = plt.subplot(3, 3, images + 1)
    plt.imshow(viral_dataset[images], cmap = "gray")
    plt.title(class_names[1])
    plt.axis("off")
    
# COVID-19 CXR Sampling
plt.figure(figsize=(10, 10))
for images in range(3):
    ax = plt.subplot(3, 3, images + 1)
    plt.imshow(covid_dataset[images], cmap = "gray")
    plt.title(class_names[2])
    plt.axis("off")

## Data Enhancement

In [None]:
def white_balance(channel, perc = 0.05):
    mi, ma = (np.percentile(channel, perc), np.percentile(channel,100.0-perc))
    channel = np.uint8(np.clip((channel-mi)*255.0/(ma-mi), 0, 255))
    return channel

In [None]:
def clahe():
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(16, 16))
    return clahe

In [None]:
def image_enhancer(image_arrays):
    enchanced_images = []
    
    for image in image_arrays:
        
        # White Balance
        image_WB  = np.dstack([white_balance(channel, 0.05) for channel in cv2.split(image)] )
        gray_image = cv2.cvtColor(image_WB, cv2.COLOR_RGB2GRAY)

        # CLAHE
        clahe_function = clahe()
        image_clahe = clahe_function.apply(gray_image)
        image = cv2.cvtColor(image_clahe, cv2.COLOR_GRAY2RGB)
        
        enchanced_images.append(image)
    
    return enchanced_images

In [None]:
normal_dataset_enhanced = image_enhancer(normal_dataset) 
viral_dataset_enhanced = image_enhancer(viral_dataset)
covid_dataset_enhanced = image_enhancer(covid_dataset)

In [None]:
# Normal CXR Sampling
plt.figure(figsize=(10, 10))
ax = plt.subplot(2, 2, 1)
plt.imshow(normal_dataset[100], cmap = "gray")
plt.title("Normal CXR - Before Image Enhancement")
plt.axis("off")

ax = plt.subplot(2, 2, 2)
plt.imshow(normal_dataset_enhanced[100], cmap = "gray")
plt.title("Normal CXR - After Image Enhancement")
plt.axis("off")
    
# Viral Pneumonia CXR Sampling
plt.figure(figsize=(10, 10))
ax = plt.subplot(2, 2, 1)
plt.imshow(viral_dataset[100], cmap = "gray")
plt.title("Viral CXR - Before Image Enhancement")
plt.axis("off")

ax = plt.subplot(2, 2, 2)
plt.imshow(viral_dataset_enhanced[100], cmap = "gray")
plt.title("Viral CXR - After Image Enhancement")
plt.axis("off")
    
# COVID-19 CXR Sampling
plt.figure(figsize=(10, 10))
ax = plt.subplot(2, 2, 1)
plt.imshow(covid_dataset[100], cmap = "gray")
plt.title("COVID-19 CXR - Before Image Enhancement")
plt.axis("off")

ax = plt.subplot(2, 2, 2)
plt.imshow(covid_dataset_enhanced[100], cmap = "gray")
plt.title("COVID-19 CXR - After Image Enhancement")
plt.axis("off")

In [None]:
def normalizer(image_arrays):
    # Create an empty list to store normalized arrays
    norm_image_arrays = []
    
    # Iterate over all the image arrays and normalize them before storing them into our predefined list
    for image_array in image_arrays:
        norm_image_array = image_array / 255.0
        norm_image_arrays.append(norm_image_array)
    
    return norm_image_arrays

In [None]:
normal_dataset_normalized = normalizer(normal_dataset)
viral_dataset_normalized = normalizer(viral_dataset)
covid_dataset_normalized = normalizer(covid_dataset)

In [None]:
def split_and_merge_function(image_arrays, split_factor = [0.7, 0.15, 0.15]):
    # Define an empty dictionary to hold the training, validation and test datasets
    datasets = {}
    # Calculate the number of image categories in the list argument passed to this function
    number_of_categories = len(image_arrays)   
    
    train_dataset, validation_dataset, test_dataset, train_labels, validation_labels, test_labels = [], [], [], [], [], []
    
    for image_array_id in range(number_of_categories):
        image_array = image_arrays[image_array_id]
        
        # Update the data and labels lists, respectively
        dataset = [[image_array[image_id], [image_array_id]] for image_id in range(len(image_array))]
            
        # Split the data and labels into the train, validation, and test datasets
        train_dataset.extend(np.array(dataset[ : int(np.around(len(dataset) * split_factor[0]))]))
        validation_dataset.extend(np.array(dataset[int(np.around(len(dataset) * split_factor[0])) : int(np.around(len(dataset) * (split_factor[0] + split_factor[1])))]))
        test_dataset.extend(np.array(dataset[int(np.around(len(dataset) * (split_factor[0] + split_factor[1]))) : ]))
        
        # Randomize the train, validation and test datasets
        random.seed(42) # Define a random state parameter to ensure the dataset generated is the same regardless of how many iterations we run
        random.shuffle(train_dataset), random.shuffle(validation_dataset), random.shuffle(test_dataset)
        
        # Split the data and label pairs and add them to the data and labels lists
        train_data = [dataset[0] for dataset in train_dataset]
        train_labels = [dataset[1] for dataset in train_dataset]
        validation_data = [dataset[0] for dataset in validation_dataset]
        validation_labels = [dataset[1] for dataset in validation_dataset]
        test_data = [dataset[0] for dataset in test_dataset]
        test_labels = [dataset[1] for dataset in test_dataset]
        
    
    # Store train, validation and test datasets into the datasets dictionary
    datasets['train_dataset'] = np.array(train_data)
    datasets['validation_dataset'] = np.array(validation_data)
    datasets['test_dataset'] = np.array(test_data)
    
    # Convert labels from label-encoding to one-hot encoding and store in the datasets dictionary     
    datasets['train_labels'] = to_categorical(np.array(train_labels))
    datasets['validation_labels'] = to_categorical(np.array(validation_labels))
    datasets['test_labels'] = to_categorical(np.array(test_labels))
        
    return datasets

In [None]:
#This training dataset only applies data normalization
image_arrays_normalized = [normal_dataset_normalized, viral_dataset_normalized, covid_dataset_normalized]
datasets_normalized = split_and_merge_function(image_arrays_normalized, split_factor = [0.7, 0.15, 0.15])

train_dataset1 = datasets_normalized['train_dataset']
validation_dataset1 = datasets_normalized['validation_dataset']
test_dataset1 = datasets_normalized['test_dataset']
train_labels1 = datasets_normalized['train_labels'] 
validation_labels1 = datasets_normalized['validation_labels']
test_labels1 = datasets_normalized['test_labels']
print("The Dataset which is made up of {} Image Arrays has been splitted into:".format(len(train_dataset1) + len(validation_dataset1) + len(test_dataset1)))
print('{} Training Image Arrays'.format(len(train_dataset1)))
print('{} Validation Image Arrays'.format(len(validation_dataset1)))
print('{} Test Image Arrays'.format(len(test_dataset1)))

In [None]:
#This training dataset only applies data enhancement
image_arrays_enhanced = [normal_dataset_enhanced, viral_dataset_enhanced, covid_dataset_enhanced]
datasets_enhanced = split_and_merge_function(image_arrays_enhanced, split_factor = [0.7, 0.15, 0.15])

train_dataset2 = datasets_enhanced['train_dataset']
validation_dataset2 = datasets_enhanced['validation_dataset']
test_dataset2 = datasets_enhanced['test_dataset']
train_labels2 = datasets_enhanced['train_labels'] 
validation_labels2 = datasets_enhanced['validation_labels']
test_labels2 = datasets_enhanced['test_labels']
print("The Dataset which is made up of {} Image Arrays has been splitted into:".format(len(train_dataset2) + len(validation_dataset2) + len(test_dataset2)))
print('{} Training Image Arrays'.format(len(train_dataset2)))
print('{} Validation Image Arrays'.format(len(validation_dataset2)))
print('{} Test Image Arrays'.format(len(test_dataset2)))

In [None]:
def data_augmenter():
    
    data_augmentation = tf.keras.Sequential()
    data_augmentation.add(RandomFlip('horizontal'))
    data_augmentation.add(RandomRotation(0.1))
    
    return data_augmentation

## 3. Transfer Learning Using NasNetMobile <a class="anchor" id="3"></a>

### 3.1 Classify 3 classes with data normalization 

### Training

In [None]:
def model(image_size, num_classes, data_augmentation = data_augmenter()):
    
    input_shape = image_size + (3,)
    
    base_model = tf.keras.applications.NASNetMobile(input_shape=input_shape,
                                                   include_top=False,
                                                   weights="imagenet") 
    
    # Freeze the base model by making it non trainable
    base_model.trainable = False 

    # create the input layer (Same as the imageNetv2 input size)
    inputs = tf.keras.Input(shape=input_shape) 
    
    # apply data augmentation to the inputs
    x = data_augmentation(inputs)
     
    # set training to False to avoid keeping track of statistics in the batch norm layer
    x = base_model(x, training=False) 
    
    # Add the new Binary classification layers
    # use global avg pooling to summarize the info in each channel
    x = GlobalAveragePooling2D()(x) 
    #include dropout with probability of 0.2 to avoid overfitting
    x = Dropout(0.2)(x)
        
    # create a prediction layer
    if num_classes == 2:
        activation = "sigmoid"
        units = 1
    else:
        activation = "softmax"
        units = num_classes

    x = layers.Dropout(0.5)(x)
    
    prediction_layer = Dense(units, activation=activation)
    
    outputs = prediction_layer(x)
    
    model = Model(inputs, outputs)
    
    return model

In [None]:
# Define a model using the model function
image_size = (224,224)
nasnet_model = model(image_size, num_classes = 3)

# Preview the Model Summary
nasnet_model.summary()

In [None]:
base_learning_rate = 0.001
optimizer = Adam(learning_rate = base_learning_rate)
initial_epochs = 50
batch_size = 64
loss = 'categorical_crossentropy'
metrics = ['accuracy']
callback = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy',factor=1e-1, patience=8, verbose=1, min_lr = 2e-6)

nasnet_model.compile(optimizer = optimizer,
              loss = loss,
              metrics = metrics)

In [None]:
history = nasnet_model.fit(train_dataset1, train_labels1,
                                               batch_size = batch_size, 
                                               epochs = initial_epochs, 
                                               validation_data = (validation_dataset1, validation_labels1), 
                                               callbacks = [callback, reduce_lr], 
                                               shuffle = True)

In [None]:
acc = [0.] + history.history['accuracy']
val_acc = [0.] + history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
base_model = nasnet_model.layers[2] # MobileNetV2 Architecture
base_model.trainable = True


optimizer = Adam(learning_rate = 0.1 * base_learning_rate)
batch_size = 64
loss = 'categorical_crossentropy'
metrics = ['accuracy']
callback = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy',factor=1e-1, patience=8, verbose=1, min_lr = 2e-6)

nasnet_model.compile(optimizer = optimizer,
                        loss = loss,
                        metrics = metrics)

In [None]:
fine_tune_epochs = 30
total_epochs =  initial_epochs + fine_tune_epochs

history = nasnet_model.fit(train_dataset1, train_labels1,
                                             epochs = total_epochs,
                                             batch_size = batch_size,
                                             initial_epoch = history.epoch[-1],
                                             callbacks = [callback, reduce_lr], 
                                             validation_data = (validation_dataset1, validation_labels1),
                                             shuffle = True)

In [None]:
acc = [0.] + history.history['accuracy']
val_acc = [0.] + history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

###  Model Evaluation <a class="anchor" id="4"></a>


In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues,
                          target_names = ['Normal','Covid-19','Viral Pneumonia']):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    target_names = target_names

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
training_predictions = nasnet_model.predict(train_dataset1, batch_size = 64)

# Convert the predicted probabilities to labels
training_predictions_classes = np.argmax(training_predictions, axis=1)
training_labels_classes = np.argmax(train_labels1, axis=1) # Position of the actual label

# Return a nicely formatted classification report
    
print(classification_report(training_labels_classes, training_predictions_classes, target_names=['normal','covid','virus']))

In [None]:
# compute the confusion matrix
confusion_mtx = confusion_matrix(training_labels_classes, training_predictions_classes)
# plot the confusion matrix
plot_confusion_matrix(confusion_mtx, classes = range(3)) 

In [None]:
validation_predictions = nasnet_model.predict(validation_dataset1, batch_size = 64)

# Convert the predicted probabilities to labels
validation_predictions_classes = np.argmax(validation_predictions, axis=1)
validation_labels_classes = np.argmax(validation_labels1, axis=1) # Position of the actual label

# Return a nicely formatted classification report
    
print(classification_report(validation_labels_classes, validation_predictions_classes, target_names=['normal','covid','virus']))

In [None]:
# compute the confusion matrix
confusion_mtx_2 = confusion_matrix(validation_labels_classes, validation_predictions_classes)
# plot the confusion matrix
plot_confusion_matrix(confusion_mtx_2, classes = range(3)) 

In [None]:
test_predictions = nasnet_model.predict(test_dataset1, batch_size = 64)

# Convert the predicted probabilities to labels
test_predictions_classes = np.argmax(test_predictions, axis=1)
test_labels_classes = np.argmax(test_labels1, axis=1) # Position of the actual label

# Return a nicely formatted classification report
    
print(classification_report(test_labels_classes, test_predictions_classes, target_names=['normal','covid','virus']))

In [None]:
# compute the confusion matrix
confusion_mtx_3 = confusion_matrix(test_labels_classes, test_predictions_classes)
# plot the confusion matrix
plot_confusion_matrix(confusion_mtx_3, classes = range(3)) 

In [None]:
plt.figure(figsize=(12, 12))
for image in range(9):
    ax = plt.subplot(3, 3, image + 1)
    plt.imshow(test_dataset1[image], cmap = "gray")
    plt.title("Predicted Class: " + str(class_names[test_predictions_classes[image]]) + "\n" +"Actual Class: " + str(class_names[test_labels_classes[image]]))
    plt.axis("off")


### 3.2 Classify 3 classes with data enhancement 

### Training

In [None]:
# Define a model using the model function
image_size = (224,224)
nasnet_model = model(image_size, num_classes = 3)

# Preview the Model Summary
nasnet_model.summary()

In [None]:
base_learning_rate = 0.001
optimizer = Adam(learning_rate = base_learning_rate)
initial_epochs = 50
batch_size = 64
loss = 'categorical_crossentropy'
metrics = ['accuracy']
callback = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy',factor=1e-1, patience=8, verbose=1, min_lr = 2e-6)

nasnet_model.compile(optimizer = optimizer,
              loss = loss,
              metrics = metrics)

In [None]:
history2 = nasnet_model.fit(train_dataset2, train_labels2,
                                               batch_size = batch_size, 
                                               epochs = initial_epochs, 
                                               validation_data = (validation_dataset2, validation_labels2), 
                                               callbacks = [callback, reduce_lr], 
                                               shuffle = True)

In [None]:
acc = [0.] + history2.history['accuracy']
val_acc = [0.] + history2.history['val_accuracy']

loss = history2.history['loss']
val_loss = history2.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,500])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
base_model = nasnet_model.layers[2] # MobileNetV2 Architecture
base_model.trainable = True


optimizer = Adam(learning_rate = 0.1 * base_learning_rate)
batch_size = 64
loss = 'categorical_crossentropy'
metrics = ['accuracy']
callback = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy',factor=1e-1, patience=8, verbose=1, min_lr = 2e-6)

nasnet_model.compile(optimizer = optimizer,
                        loss = loss,
                        metrics = metrics)

In [None]:
fine_tune_epochs = 30
total_epochs =  initial_epochs + fine_tune_epochs

history2 = nasnet_model.fit(train_dataset2, train_labels2,
                                             epochs = total_epochs,
                                             batch_size = batch_size,
                                             initial_epoch = history2.epoch[-1],
                                             callbacks = [callback, reduce_lr], 
                                             validation_data = (validation_dataset2, validation_labels2),
                                             shuffle = True)

In [None]:
acc = [0.] + history2.history['accuracy']
val_acc = [0.] + history2.history['val_accuracy']

loss = history2.history['loss']
val_loss = history2.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,2])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

###  Model Evaluation <a class="anchor" id="4"></a>


In [None]:
training_predictions = nasnet_model.predict(train_dataset2, batch_size = 64)
# Convert the predicted probabilities to labels
training_predictions_classes = np.argmax(training_predictions, axis=1)
training_labels_classes = np.argmax(train_labels2, axis=1) # Position of the actual label

# Return a nicely formatted classification report
    
print(classification_report(training_labels_classes, training_predictions_classes, target_names=['normal','covid','virus']))

In [None]:
# compute the confusion matrix
confusion_mtx = confusion_matrix(training_labels_classes, training_predictions_classes)
# plot the confusion matrix
plot_confusion_matrix(confusion_mtx, classes = range(3)) 

In [None]:
validation_predictions = nasnet_model.predict(validation_dataset2, batch_size = 64)
# Convert the predicted probabilities to labels
validation_predictions_classes = np.argmax(validation_predictions, axis=1)
validation_labels_classes = np.argmax(validation_labels2, axis=1) # Position of the actual label

# Return a nicely formatted classification report
    
print(classification_report(validation_labels_classes, validation_predictions_classes, target_names=['normal','covid','virus']))

In [None]:
# compute the confusion matrix
confusion_mtx_2 = confusion_matrix(validation_labels_classes, validation_predictions_classes)
# plot the confusion matrix
plot_confusion_matrix(confusion_mtx_2, classes = range(3)) 

In [None]:
test_predictions = nasnet_model.predict(test_dataset2, batch_size = 64)
# Convert the predicted probabilities to labels
test_predictions_classes = np.argmax(test_predictions, axis=1)
test_labels_classes = np.argmax(test_labels2, axis=1) # Position of the actual label

# Return a nicely formatted classification report
    
print(classification_report(test_labels_classes, test_predictions_classes, target_names=['normal','covid','virus']))

In [None]:
# compute the confusion matrix
confusion_mtx_3 = confusion_matrix(test_labels_classes, test_predictions_classes)
# plot the confusion matrix
plot_confusion_matrix(confusion_mtx_3, classes = range(3)) 

In [None]:
plt.figure(figsize=(12, 12))
for image in range(9):
    ax = plt.subplot(3, 3, image + 1)
    plt.imshow(test_dataset2[image], cmap = "gray")
    plt.title("Predicted Class: " + str(class_names[test_predictions_classes[image]]) + "\n" +"Actual Class: " + str(class_names[test_labels_classes[image]]))
    plt.axis("off")


### 3.3 Binary classification with data normalization

In [None]:
def split_and_merge_function2(image_arrays, split_factor = [0.7, 0.15, 0.15]):
    # Define an empty dictionary to hold the training, validation and test datasets
    datasets = {}
    # Calculate the number of image categories in the list argument passed to this function
    number_of_categories = len(image_arrays)   
    
    train_dataset, validation_dataset, test_dataset, train_labels, validation_labels, test_labels = [], [], [], [], [], []
    
    for image_array_id in range(number_of_categories):
        image_array = image_arrays[image_array_id]
        
        # Update the data and labels lists, respectively
        dataset = [[image_array[image_id], [image_array_id]] for image_id in range(len(image_array))]
            
        # Split the data and labels into the train, validation, and test datasets
        train_dataset.extend(np.array(dataset[ : int(np.around(len(dataset) * split_factor[0]))]))
        validation_dataset.extend(np.array(dataset[int(np.around(len(dataset) * split_factor[0])) : int(np.around(len(dataset) * (split_factor[0] + split_factor[1])))]))
        test_dataset.extend(np.array(dataset[int(np.around(len(dataset) * (split_factor[0] + split_factor[1]))) : ]))
        
        # Randomize the train, validation and test datasets
        random.seed(42) # Define a random state parameter to ensure the dataset generated is the same regardless of how many iterations we run
        random.shuffle(train_dataset), random.shuffle(validation_dataset), random.shuffle(test_dataset)
        
        # Split the data and label pairs and add them to the data and labels lists
        train_data = [dataset[0] for dataset in train_dataset]
        train_labels = [dataset[1] for dataset in train_dataset]
        validation_data = [dataset[0] for dataset in validation_dataset]
        validation_labels = [dataset[1] for dataset in validation_dataset]
        test_data = [dataset[0] for dataset in test_dataset]
        test_labels = [dataset[1] for dataset in test_dataset]
        
    
    # Store train, validation and test datasets into the datasets dictionary
    datasets['train_dataset'] = np.array(train_data)
    datasets['validation_dataset'] = np.array(validation_data)
    datasets['test_dataset'] = np.array(test_data)
    
    # Do not convert labels from label-encoding to one-hot encoding and store in the datasets dictionary     
    datasets['train_labels'] = np.array(train_labels)
    datasets['validation_labels'] = np.array(validation_labels)
    datasets['test_labels'] = np.array(test_labels)
        
    return datasets

In [None]:
#Merge the normal dataset and the viral dataset to get the non-covid dataset
non_covid_dataset=np.concatenate((normal_dataset, viral_dataset), axis=0)
non_covid_dataset_normalized = normalizer(non_covid_dataset)

In [None]:
#This training dataset only applies data normalization
image_arrays_normalized = [covid_dataset_normalized, non_covid_dataset_normalized]
datasets_normalized = split_and_merge_function2(image_arrays_normalized, split_factor = [0.7, 0.15, 0.15])

train_dataset3 = datasets_normalized['train_dataset']
validation_dataset3 = datasets_normalized['validation_dataset']
test_dataset3 = datasets_normalized['test_dataset']
train_labels3 = datasets_normalized['train_labels'] 
validation_labels3 = datasets_normalized['validation_labels']
test_labels3 = datasets_normalized['test_labels']
print("The Dataset which is made up of {} Image Arrays has been splitted into:".format(len(train_dataset3) + len(validation_dataset3) + len(test_dataset3)))
print('{} Training Image Arrays'.format(len(train_dataset3)))
print('{} Validation Image Arrays'.format(len(validation_dataset3)))
print('{} Test Image Arrays'.format(len(test_dataset3)))

In [None]:
# Define a model using the make_model function
image_size = (224,224)
nasnet_model = model(image_size, num_classes = 2)

# Preview the Model Summary
nasnet_model.summary()

In [None]:
base_learning_rate = 0.001
optimizer = Adam(learning_rate = base_learning_rate)
initial_epochs = 50
batch_size = 64
loss = 'binary_crossentropy'
metrics = ['binary_accuracy']
callback = EarlyStopping(monitor='val_binary_accuracy', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_binary_accuracy',factor=1e-1, patience=8, verbose=1, min_lr = 2e-6)

nasnet_model.compile(optimizer = optimizer,
              loss = loss,
              metrics = metrics)

In [None]:
history3 = nasnet_model.fit(train_dataset3, train_labels3,
                                               batch_size = batch_size, 
                                               epochs = initial_epochs, 
                                               validation_data = (validation_dataset3, validation_labels3), 
                                               callbacks = [callback, reduce_lr], 
                                               shuffle = True)

In [None]:
acc = [0.] + history3.history['binary_accuracy']
val_acc = [0.] + history3.history['val_binary_accuracy']

loss = history3.history['loss']
val_loss = history3.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
base_model = nasnet_model.layers[2] # MobileNetV2 Architecture
base_model.trainable = True


optimizer = Adam(learning_rate = 0.1 * base_learning_rate)
batch_size = 64
loss = 'binary_crossentropy'
metrics = ['binary_accuracy']
callback = EarlyStopping(monitor='val_binary_accuracy', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_binary_accuracy',factor=1e-1, patience=8, verbose=1, min_lr = 2e-6)

nasnet_model.compile(optimizer = optimizer,
                        loss = loss,
                        metrics = metrics)

In [None]:
fine_tune_epochs = 30
total_epochs =  initial_epochs + fine_tune_epochs

history3 = nasnet_model.fit(train_dataset3, train_labels3,
                                             epochs = total_epochs,
                                             batch_size = batch_size,
                                             initial_epoch = history3.epoch[-1],
                                             callbacks = [callback, reduce_lr], 
                                             validation_data = (validation_dataset3, validation_labels3),
                                             shuffle = True)

In [None]:
acc = [0.] + history3.history['binary_accuracy']
val_acc = [0.] + history3.history['val_binary_accuracy']

loss = history3.history['loss']
val_loss = history3.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1.1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,0.5])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

###  Model Evaluation <a class="anchor" id="4"></a>**

In [None]:
def plot_confusion_matrix2(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues,
                          target_names = ['Covid-19','None-Covid-19']):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    target_names = target_names

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)
    
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
training_predictions = nasnet_model.predict(train_dataset3, batch_size = 64)

# Convert predictions classes from floating points to label encodings
training_predictions_classes = []
for i in (training_predictions):
    if i<0.5:
        training_predictions_classes.append(0)
    else:
        training_predictions_classes.append(1)
training_labels_classes = train_labels3 # Position of the actual label

# Return a nicely formatted classification report
    
print(classification_report(training_labels_classes, training_predictions_classes))

In [None]:
# compute the confusion matrix
confusion_mtx = confusion_matrix(training_labels_classes, training_predictions_classes)
# plot the confusion matrix
plot_confusion_matrix2(confusion_mtx, classes = range(2)) 

In [None]:
validation_predictions = nasnet_model.predict(validation_dataset3, batch_size = 64)

# Convert predictions classes from floating points to label encodings
validation_predictions_classes = []
for i in (validation_predictions):
    if i<0.5:
        validation_predictions_classes.append(0)
    else:
        validation_predictions_classes.append(1)
validation_labels_classes = validation_labels3

# Return a nicely formatted classification report
    
print(classification_report(validation_labels_classes, validation_predictions_classes))

In [None]:
# compute the confusion matrix
confusion_mtx_2 = confusion_matrix(validation_labels_classes, validation_predictions_classes)
# plot the confusion matrix
plot_confusion_matrix2(confusion_mtx_2, classes = range(2)) 

In [None]:
test_predictions = nasnet_model.predict(validation_dataset3, batch_size = 64)

# Convert predictions classes from floating points to label encodings
test_predictions_classes = []
for i in (test_predictions):
    if i<0.5:
        test_predictions_classes.append(0)
    else:
        test_predictions_classes.append(1)
test_labels_classes = validation_labels3

# Return a nicely formatted classification report
    
print(classification_report(test_labels_classes, test_predictions_classes))

In [None]:
# compute the confusion matrix
confusion_mtx_3 = confusion_matrix(test_labels_classes, test_predictions_classes)
# plot the confusion matrix
plot_confusion_matrix2(confusion_mtx_3, classes = range(2)) 

In [None]:
class_names2={0: "Covid-19", 1: "Non-Covid-19"}

In [None]:
plt.figure(figsize=(12, 12))
for image in range(9):
    ax = plt.subplot(3, 3, image + 1)
    plt.imshow(test_dataset3[image], cmap = "gray")
    predicted_label=0
    if(test_predictions_classes[image]>=0.5):
        predicted_label=1
    plt.title("Predicted Class: " + str(class_names2[predicted_label]) + "\n" +"Actual Class: " + str(class_names2[test_labels_classes[image][0]]))
    plt.axis("off")


### 3.4 Binary classification with data enhancement

In [None]:
#Merge the normal dataset and the viral dataset to get the non-covid dataset
non_covid_dataset=np.concatenate((normal_dataset, viral_dataset), axis=0)
non_covid_dataset_enhanced = image_enhancer(non_covid_dataset)

In [None]:
#This training dataset only applies data enhancement
image_arrays_nenhanced = [covid_dataset_enhanced, non_covid_dataset_enhanced]
datasets_enhanced = split_and_merge_function2(image_arrays_nenhanced, split_factor = [0.7, 0.15, 0.15])

train_dataset4 = datasets_enhanced['train_dataset']
validation_dataset4 = datasets_enhanced['validation_dataset']
test_dataset4 = datasets_enhanced['test_dataset']
train_labels4 = datasets_enhanced['train_labels'] 
validation_labels4 = datasets_enhanced['validation_labels']
test_labels4 = datasets_enhanced['test_labels']
print("The Dataset which is made up of {} Image Arrays has been splitted into:".format(len(train_dataset4) + len(validation_dataset4) + len(test_dataset4)))
print('{} Training Image Arrays'.format(len(train_dataset4)))
print('{} Validation Image Arrays'.format(len(validation_dataset4)))
print('{} Test Image Arrays'.format(len(test_dataset4)))

In [None]:
# Define a model using the make_model function
image_size = (224,224)
nasnet_model = model(image_size, num_classes = 2)

# Preview the Model Summary
nasnet_model.summary()

In [None]:
base_learning_rate = 0.001
optimizer = Adam(learning_rate = base_learning_rate)
initial_epochs = 50
batch_size = 64
loss = 'binary_crossentropy'
metrics = ['binary_accuracy']
callback = EarlyStopping(monitor='val_binary_accuracy', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_binary_accuracy',factor=1e-1, patience=8, verbose=1, min_lr = 2e-6)

nasnet_model.compile(optimizer = optimizer,
              loss = loss,
              metrics = metrics)

In [None]:
history4 = nasnet_model.fit(train_dataset4, train_labels4,
                                               batch_size = batch_size, 
                                               epochs = initial_epochs, 
                                               validation_data = (validation_dataset4, validation_labels4), 
                                               callbacks = [callback, reduce_lr], 
                                               shuffle = True)

In [None]:
acc = [0.] + history4.history['binary_accuracy']
val_acc = [0.] + history4.history['val_binary_accuracy']

loss = history4.history['loss']
val_loss = history4.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,100])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
base_model = nasnet_model.layers[2] # MobileNetV2 Architecture
base_model.trainable = True


optimizer = Adam(learning_rate = 0.1 * base_learning_rate)
batch_size = 64
loss = 'binary_crossentropy'
metrics = ['binary_accuracy']
callback = EarlyStopping(monitor='val_binary_accuracy', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_binary_accuracy',factor=1e-1, patience=8, verbose=1, min_lr = 2e-6)

nasnet_model.compile(optimizer = optimizer,
                        loss = loss,
                        metrics = metrics)

In [None]:
fine_tune_epochs = 30
total_epochs =  initial_epochs + fine_tune_epochs

history4 = nasnet_model.fit(train_dataset4, train_labels4,
                                             epochs = total_epochs,
                                             batch_size = batch_size,
                                             initial_epoch = history4.epoch[-1],
                                             callbacks = [callback, reduce_lr], 
                                             validation_data = (validation_dataset4, validation_labels4),
                                             shuffle = True)

In [None]:
acc = [0.] + history4.history['binary_accuracy']
val_acc = [0.] + history4.history['val_binary_accuracy']

loss = history4.history['loss']
val_loss = history4.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

###  Model Evaluation <a class="anchor" id="4"></a>


In [None]:
training_predictions = nasnet_model.predict(train_dataset4, batch_size = 64)

# Convert predictions classes from floating points to label encodings
training_predictions_classes = []
for i in (training_predictions):
    if i<0.5:
        training_predictions_classes.append(0)
    else:
        training_predictions_classes.append(1)
training_labels_classes = train_labels4 # Position of the actual label

# Return a nicely formatted classification report
    
print(classification_report(training_labels_classes, training_predictions_classes))

In [None]:
# compute the confusion matrix
confusion_mtx = confusion_matrix(training_labels_classes, training_predictions_classes)

# plot the confusion matrix
plot_confusion_matrix2(confusion_mtx, classes = range(2)) 

In [None]:
validation_predictions = nasnet_model.predict(validation_dataset4, batch_size = 64)

# Convert predictions classes from floating points to label encodings
validation_predictions_classes = []
for i in (validation_predictions):
    if i<0.5:
        validation_predictions_classes.append(0)
    else:
        validation_predictions_classes.append(1)
validation_labels_classes = validation_labels4

# Return a nicely formatted classification report
    
print(classification_report(validation_labels_classes, validation_predictions_classes))

In [None]:
# compute the confusion matrix
confusion_mtx_2 = confusion_matrix(validation_labels_classes, validation_predictions_classes)

# plot the confusion matrix
plot_confusion_matrix2(confusion_mtx_2, classes = range(2)) 

In [None]:
test_predictions = nasnet_model.predict(validation_dataset4, batch_size = 64)

# Convert predictions classes from floating points to label encodings
test_predictions_classes = []
for i in (test_predictions):
    if i<0.5:
        test_predictions_classes.append(0)
    else:
        test_predictions_classes.append(1)
test_labels_classes = validation_labels4

# Return a nicely formatted classification report
    
print(classification_report(test_labels_classes, test_predictions_classes))

In [None]:
# compute the confusion matrix
confusion_mtx_3 = confusion_matrix(test_labels_classes, test_predictions_classes)
# plot the confusion matrix
plot_confusion_matrix2(confusion_mtx_3, classes = range(2)) 

In [None]:
class_names2={0: "Covid-19", 1: "Non-Covid-19"}
plt.figure(figsize=(12, 12))
for image in range(9):
    ax = plt.subplot(3, 3, image + 1)
    plt.imshow(test_dataset4[image], cmap = "gray")
    predicted_label=0
    if(test_predictions_classes[image]>=0.5):
        predicted_label=1
    plt.title("Predicted Class: " + str(class_names2[predicted_label]) + "\n" +"Actual Class: " + str(class_names2[test_labels_classes[image][0]]))
    plt.axis("off")
