# Dataset

for Google `Colab`

In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

# # set the env var for the json file path
# import os

# os.environ['KAGGLE_CONFIG_DIR'] = '/content/drive/MyDrive/kaggle'

In [2]:
# !kaggle datasets download -d tawsifurrahman/covid19-radiography-database
# !unzip -q covid19-radiography-database.zip

# Packages

In [3]:
!pip install -q mlflow

# libraries

In [4]:
import numpy as np
import pandas as pd
import shutil
import random
import os
from tqdm import tqdm

import tensorflow
import tensorflow.keras
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalMaxPooling2D, Conv1D, Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg19 import VGG19, preprocess_input
from keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.applications import EfficientNetB0
import cv2
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder , OneHotEncoder
from sklearn.utils import shuffle
from tensorflow.keras import *
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix, confusion_matrix
import itertools
from tensorflow.keras.applications.efficientnet import preprocess_input

import mlflow
import mlflow.tensorflow

In [5]:
mlflow.autolog()

In [6]:
folders = ['COVID','Lung_Opacity','Normal','Viral Pneumonia']
# base_folder = '/content/COVID-19_Radiography_Dataset'    ## IN Colab
base_folder = '../input/covid19-radiography-database/COVID-19_Radiography_Dataset'  ## IN Kaggle
data = []
labels = []

In [7]:
for folder in folders :
    files = os.listdir(os.path.join(base_folder, folder))
    for file in tqdm(files):
        # some solutions here
        # read in gray
        img = cv2.imread(os.path.join(base_folder, folder, file), 0)
        # resize to smaller images
        img = cv2.resize(img, (112, 112))
        data.append(img)
        labels.append(folder)


In [8]:
plt.imshow(data[1], cmap='gray')
plt.title(labels[1])

In [9]:
data = np.array(data)
labels = np.array(labels)

In [10]:
np.unique(labels)

In [11]:
print(data.shape)
print(labels.shape)

In [12]:
CLASSES = np.unique(labels)
count = []
for cl in CLASSES:
    count.append(np.sum(labels == cl))

x = list(CLASSES)
plt.bar(x, count, color="purple")
plt.xlabel("Classes")
plt.ylabel("Distribution")
plt.show()


In [13]:
# extra dim for training
data = data.reshape(-1, 112, 112, 1)

In [14]:
data = data / 255.0

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(data, 
                                                    labels,
                                                    stratify=labels,
                                                    random_state=42,
                                                    shuffle=True)
print(X_train.shape)
print(X_test.shape)

In [16]:
encode = LabelEncoder()
onehotencoder = OneHotEncoder()

Y_train = encode.fit_transform(Y_train)
train_labels = Y_train.reshape(-1,1)
train_labels

Y_test = encode.transform(Y_test)
test_labels = Y_test.reshape(-1,1)
test_labels

In [17]:
Y_train = onehotencoder.fit_transform(train_labels)
Y_train = Y_train.toarray()

Y_test = onehotencoder.transform(test_labels)
Y_test = Y_test.toarray()
Y_test

In [18]:
# the issue here is we cant use pretrained weights anymore
# unless the channel is created before the pretrained vgg
# using a 1x1 conv with 3 filters

mobile = MobileNetV2(input_shape= (112,112,3),  include_top=False , weights="imagenet", pooling='max')

In [19]:
mobile.trainable = False

In [20]:
model = Sequential()

model.add(Conv1D(3, 1, activation='relu', padding='same', input_shape=(112, 112, 1)))
model.add(mobile)
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(4, activation='sigmoid'))


In [21]:
model.summary()

In [22]:
# # model.get_layer("mobilenetv2_1.00_224")
# print([layer.name for layer in model.get_layer('mobilenetv2_1.00_224').layers])

In [23]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])

In [24]:
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(test_labels),
                                                 test_labels[:,0])

class_weights = {i: class_weights[i] for i in range(0, len(class_weights))}
print(class_weights)

In [25]:
res = model.fit(X_train, Y_train, batch_size=512 , epochs=20 , validation_data = (X_test, Y_test), class_weight= class_weights )

In [26]:
def print_details(train_data,validation_data,tr_label,val_label,history, test_predictions):
    print(classification_report(backend.argmax(val_label).numpy(),backend.argmax(test_predictions).numpy()))
    plt.plot(history.history['loss'], 'r-o')
    plt.plot(history.history['val_loss'], 'b--o')
    plt.title('Loss')
    plt.show()

    plt.plot(history.history['acc'], 'r-o')
    plt.plot(history.history['val_acc'], 'b--o')
    plt.title('Accuracy')
    plt.show()

    classes = ['COVID','Lung_Opacity','Normal','Viral Pneumonia']
    cm = confusion_matrix(backend.argmax(val_label).numpy(),backend.argmax(test_predictions).numpy())
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion matrix')
    plt.colorbar()
    tick_marks = np.arange(4)

    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [27]:
print_details(X_train, X_test, Y_train, Y_test, res, model.predict(X_test))

In [44]:
!zip mlruns_covid.zip -r -q ./mlruns

In [46]:
from IPython.display import FileLink
FileLink(r'./mlruns_covid.zip')

In [30]:
# !rm -r ./mlruns


# mlflow.autolog()

In [31]:
images = preprocess_input(data)

In [32]:
X_train, X_test, Y_train, Y_test = train_test_split(images, 
                                                    labels,
                                                    stratify=labels,
                                                    random_state=42,
                                                    shuffle=True)
print(X_train.shape)
print(X_test.shape)

In [33]:
encode = LabelEncoder()
onehotencoder = OneHotEncoder()

Y_train = encode.fit_transform(Y_train)
train_labels = Y_train.reshape(-1,1)
train_labels

Y_test = encode.transform(Y_test)
test_labels = Y_test.reshape(-1,1)
test_labels

In [34]:
Y_train = onehotencoder.fit_transform(train_labels)
Y_train = Y_train.toarray()

Y_test = onehotencoder.transform(test_labels)
Y_test = Y_test.toarray()
Y_test

In [35]:
efficientNet = EfficientNetB0(input_shape=(112,112,3), include_top=False, weights='imagenet', pooling='max')

In [36]:
efficientNet.trainable=False

In [37]:
second_model = Sequential()

second_model.add(Conv1D(3, 1, activation='relu', padding='same', input_shape=(112, 112, 1)))
second_model.add(efficientNet)
second_model.add(Flatten())
second_model.add(Dense(128, activation='relu'))
second_model.add(Dropout(0.2))
second_model.add(Dense(4, activation='sigmoid'))


In [38]:
second_model.summary()

In [39]:
second_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])

In [40]:
history = second_model.fit(X_train, Y_train, batch_size=512 , epochs=20 , validation_data = (X_test, Y_test), class_weight= class_weights )

In [41]:
print_details(X_train, X_test, Y_train, Y_test, history, second_model.predict(X_test))

In [42]:
model.save('first_model.h5')

In [43]:
second_model.save('second_model.h5')