In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Data Reading 

import os
from glob import glob
from PIL import Image

# Data Processing 

import numpy as np
import pandas as pd
import cv2
import random
import albumentations as A

# Data Analysis

import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

# Data Modeling & Model Evaluation

from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from tensorflow.keras import layers, models
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report, recall_score, accuracy_score, precision_score, f1_score

# Grad-CAM

import keras
import matplotlib.cm as cm

In [2]:
levels = ['Normal', 'COVID']
path = "./COVID-19_Radiography_Dataset"
data_dir = os.path.join(path)

data = []
for id, level in enumerate(levels):
    for file in os.listdir(os.path.join(data_dir, level)):
        data.append(['{}/{}'.format(level, file), level])

data = pd.DataFrame(data, columns = ['image_file', 'corona_result'])

data['path'] = path + '/' + data['image_file']
data['corona_result'] = data['corona_result'].map({'Normal': 'Negative', 'COVID': 'Positive'})
samples = 13808


print('Number of Total Samples: %d'%(data.isnull().value_counts()))
data.head()

Number of Total Samples: 13808


Unnamed: 0,image_file,corona_result,path
0,Normal/Normal-6196.png,Negative,./COVID-19_Radiography_Dataset/Normal/Normal-6...
1,Normal/Normal-7288.png,Negative,./COVID-19_Radiography_Dataset/Normal/Normal-7...
2,Normal/Normal-1821.png,Negative,./COVID-19_Radiography_Dataset/Normal/Normal-1...
3,Normal/Normal-4781.png,Negative,./COVID-19_Radiography_Dataset/Normal/Normal-4...
4,Normal/Normal-4959.png,Negative,./COVID-19_Radiography_Dataset/Normal/Normal-4...


In [3]:
all_data = []

# Storing images and their labels into a list for further Train Test split

for i in range(len(data)):
    image = cv2.imread(data['path'][i])
    image = cv2.resize(image, (70, 70)) / 255.0
    label = 1 if data['corona_result'][i] == "Positive" else 0
    all_data.append([image, label])

In [4]:
all_data = pd.DataFrame(all_data, columns=['image','label'])
master = all_data.sample(frac=0.2)
node1 = all_data.sample(frac=0.2)
node2 = all_data.sample(frac=0.2)
node3 = all_data.sample(frac=0.2)
val = all_data.sample(frac=0.3) #evaluation data
print(master)

                                                   image  label
3475   [[[0.6588235294117647, 0.6588235294117647, 0.6...      0
1714   [[[0.00392156862745098, 0.00392156862745098, 0...      0
5351   [[[0.050980392156862744, 0.050980392156862744,...      0
2591   [[[0.0, 0.0, 0.0], [0.07058823529411765, 0.070...      0
12585  [[[0.2, 0.2, 0.2], [0.09803921568627451, 0.098...      1
...                                                  ...    ...
13632  [[[0.00392156862745098, 0.00392156862745098, 0...      1
779    [[[0.6823529411764706, 0.6823529411764706, 0.6...      0
10131  [[[0.00392156862745098, 0.00392156862745098, 0...      0
3807   [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...      0
2639   [[[0.011764705882352941, 0.011764705882352941,...      0

[2762 rows x 2 columns]


In [5]:
valx = val['image']
valy = val['label']
valx = np.array(valx)
valy = np.array(valy)
valx = np.stack(valx,axis=0)

In [6]:
def get_model(data): #return cnn model
    x = data['image']
    y = data['label'] 
    x = np.array(x)
    y = np.array(y)
    x = np.stack(x,axis=0)
    cnn_model = models.Sequential()
    cnn_model.add(layers.Conv2D(filters = 128, kernel_size = (3, 3), activation = 'relu', input_shape = (70, 70, 3)))
    cnn_model.add(layers.MaxPooling2D((2, 2)))
    cnn_model.add(layers.Dropout(0.3))

    cnn_model.add(layers.Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    cnn_model.add(layers.MaxPooling2D((2, 2)))
    cnn_model.add(layers.Dropout(0.5))

    cnn_model.add(layers.Conv2D(filters = 64, kernel_size = (3, 3), activation = 'relu'))
    cnn_model.add(layers.Flatten())
    cnn_model.add(layers.Dense(units = 16, activation = 'relu'))
    cnn_model.add(layers.Dropout(0.2))

    cnn_model.add(layers.Dense(units = 2))

    cnn_model.compile(optimizer = 'adam', 
               loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True), 
               metrics = ['accuracy'])
    es = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', mode = 'min', verbose = 1, patience = 4)

    #tf.random.set_seed(42)
    history = cnn_model.fit(x, y, 
                            epochs = 3, batch_size = 256,  
                            validation_data = (valx, valy), 
                            callbacks = [es])
    return cnn_model

In [7]:
def model_weight_ensemble(members, weights):
    # determine how many layers need to be averaged
    n_layers = len(members[0].get_weights())
    # create an set of average model weights
    avg_model_weights = list()
    for layer in range(n_layers):
        # collect this layer from each model
        layer_weights = np.array([model.get_weights()[layer] for model in members])
        # weighted average of weights for this layer
        avg_layer_weights = np.average(layer_weights, axis=0, weights=weights)
        # store average layer weights
        avg_model_weights.append(avg_layer_weights)

    model = keras.models.clone_model(members[0])
    # set the weights in the new
    model.set_weights(avg_model_weights)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [8]:
master_model = get_model(master)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [9]:
members1 = list()
members1.append(master_model)
node1_model = get_model(node1)
members1.append(node1_model)
n_models = len(members1)
weights = [1/n_models for i in range(1, n_models+1)]
# create a new model with the weighted average of all model weights
merge_model1 = model_weight_ensemble(members1, weights)
# summarize the created model
merge_model1.summary()
yp_val = merge_model1.predict(valx)
yp_val = np.argmax(yp_val, axis = 1)
print("Accuracy on Val Data: ", round(accuracy_score(valy, yp_val),4))

Epoch 1/3
Epoch 2/3
Epoch 3/3
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 68, 68, 128)       3584      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 34, 34, 128)      0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 34, 34, 128)       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 64)        73792     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 16, 16, 64)       0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)      

In [10]:
members2=list()
members2.append(master_model)
node2_model = get_model(node2)
members2.append(node2_model) #append master and node2
n_models = len(members2)
weights = [1/n_models for i in range(1, n_models+1)]
merge_model2 = model_weight_ensemble(members2, weights)
yp_val = merge_model2.predict(valx)
yp_val = np.argmax(yp_val, axis = 1)
print("Accuracy on Val Data: ", round(accuracy_score(valy, yp_val),4))

Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy on Val Data:  0.7356


In [11]:
members3=list()
members3.append(master_model)
node3_model = get_model(node3)
members3.append(node3_model) #append master and node3
n_models = len(members3)
weights = [1/n_models for i in range(1, n_models+1)]
merge_model3 = model_weight_ensemble(members3, weights)
yp_val = merge_model3.predict(valx)
yp_val = np.argmax(yp_val, axis = 1)
print("Accuracy on Val Data: ", round(accuracy_score(valy, yp_val),4))

Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy on Val Data:  0.7356


In [13]:
members_final=list() #Master merge
members_final.append(merge_model1)
members_final.append(merge_model2) 
members_final.append(merge_model3) 
n_models = len(members_final)
weights = [1/n_models for i in range(1, n_models+1)]
merge_mastermodel = model_weight_ensemble(members_final, weights)
yp_val = merge_mastermodel.predict(valx)
yp_val = np.argmax(yp_val, axis = 1)
print("Accuracy on Val Data: ", round(accuracy_score(valy, yp_val),4))

Accuracy on Val Data:  0.7356
