# Installing, Cloning & Importing

In [None]:
!pip install tensorflow==2.1

In [None]:
 !pip install keras-tuner

In [None]:
# Update dataset: !git pull
!git clone https://github.com/casperbh96/COVID-19-Detection.git dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import cv2, time
import tensorflow as tf


tf.__version__

In [None]:
import kerastuner

kerastuner.__version__

# Loading The Datasets

In [None]:
covid_path = 'dataset/covid_dataset.csv'
covid_image_path = 'dataset/covid_adjusted/'

normal_path = 'dataset/normal_xray_dataset.csv'
normal_image_path = 'dataset/normal_dataset/'

covid_df = pd.read_csv(covid_path, usecols=['filename', 'finding'])
normal_df = pd.read_csv(normal_path, usecols=['filename', 'finding'])

normal_df = normal_df.head(99)

covid_df.head()

In [None]:
covid_images = []
covid_labels = []

for index, row in covid_df.iterrows():
    filename = row['filename']
    label = row['finding']
    path = covid_image_path + filename

    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    covid_images.append(image)
    covid_labels.append(label)

normal_images = []
normal_labels = []

for index, row in normal_df.iterrows():
    filename = row['filename']
    label = row['finding']
    path = normal_image_path + filename

    # temporary fix while we preprocess ALL the images
    if filename == '4c268764-b5e5-4417-85a3-da52916984d8.jpg':
        break

    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    normal_images.append(image)
    normal_labels.append(label)

# normalize to interval of [0,1]
covid_images = np.array(covid_images) / 255

# normalize to interval of [0,1]
normal_images = np.array(normal_images) / 255

# Splitting Datasets Into Training and Testing

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.utils import to_categorical
from keras.callbacks import EarlyStopping

# split into training(0.7) and testing(0.15) and validation(0.15)
covid_x_train, covid_x_val, covid_y_train, covid_y_val= train_test_split(
    covid_images, covid_labels, test_size=0.3)
covid_x_test, covid_x_val, covid_y_test, covid_y_val= train_test_split(
    covid_x_val, covid_y_val, test_size=0.5)

normal_x_train, normal_x_val, normal_y_train, normal_y_val = train_test_split(
    normal_images, normal_labels, test_size=0.3)
normal_x_test, normal_x_val, normal_y_test, normal_y_val = train_test_split(
    normal_x_val, normal_y_val, test_size=0.5)



X_train = np.concatenate((normal_x_train, covid_x_train), axis=0)
X_val = np.concatenate((normal_x_val, covid_x_val), axis=0)
X_test = np.concatenate((normal_x_test, covid_x_test), axis=0)
y_train = np.concatenate((normal_y_train, covid_y_train), axis=0)
y_val = np.concatenate((normal_y_val, covid_y_val), axis=0)
y_test = np.concatenate((normal_y_test, covid_y_test), axis=0)

# make labels into categories - either 0 or 1
y_train = LabelBinarizer().fit_transform(y_train)
y_train = to_categorical(y_train)

y_val = LabelBinarizer().fit_transform(y_val)
y_val = to_categorical(y_val)

y_test = LabelBinarizer().fit_transform(y_test)
y_test = to_categorical(y_test)# split into training(0.7) and testing(0.15) and validation(0.15)
covid_x_train, covid_x_val, covid_y_train, covid_y_val= train_test_split(
    covid_images, covid_labels, test_size=0.3)
covid_x_test, covid_x_val, covid_y_test, covid_y_val= train_test_split(
    covid_x_val, covid_y_val, test_size=0.5)

normal_x_train, normal_x_val, normal_y_train, normal_y_val = train_test_split(
    normal_images, normal_labels, test_size=0.3)
normal_x_test, normal_x_val, normal_y_test, normal_y_val = train_test_split(
    normal_x_val, normal_y_val, test_size=0.5)



X_train = np.concatenate((normal_x_train, covid_x_train), axis=0)
X_val = np.concatenate((normal_x_val, covid_x_val), axis=0)
X_test = np.concatenate((normal_x_test, covid_x_test), axis=0)
y_train = np.concatenate((normal_y_train, covid_y_train), axis=0)
y_val = np.concatenate((normal_y_val, covid_y_val), axis=0)
y_test = np.concatenate((normal_y_test, covid_y_test), axis=0)

# make labels into categories - either 0 or 1
y_train = LabelBinarizer().fit_transform(y_train)
y_train = to_categorical(y_train)

y_val = LabelBinarizer().fit_transform(y_val)
y_val = to_categorical(y_val)

y_test = LabelBinarizer().fit_transform(y_test)
y_test = to_categorical(y_test)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_aug = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)


# Modeling With Keras

In [None]:
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Activation
from tensorflow.keras.layers import BatchNormalization,Dropout
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input

In [None]:
def build_model(hp):  # random search passes this hyperparameter() object 
    vggModel = VGG19(weights="imagenet", include_top=False,
    input_tensor=Input(shape=X_train.shape[1:]))

    outputs = vggModel.output
    outputs = Flatten(name="flatten")(outputs)
    outputs = Dropout(0.5)(outputs)
    outputs = Dense(2, activation="softmax")(outputs)

    model = Model(inputs=vggModel.input, outputs=outputs)
    model.summary()
    for layer in vggModel.layers:
       layer.trainable = False


    model.compile(optimizer=keras.optimizers.Adam(
                hp.Float(
                    'learning_rate',
                    min_value=1e-4,
                    max_value=1e-2,
                    sampling='LOG',
                    default=1e-3
                )
            ),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model
   
    

In [None]:
import time
LOG_DIR = f"{int(time.time())}"

tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=2,  # how many model variations to test?
    executions_per_trial=20,  # how many trials per variation? (same model could perform differently)
    directory=LOG_DIR)
# earlystop = EarlyStopping(monitor = 'loss',patience = 5, baseline = None)
tuner.search(train_aug.flow(X_train, y_train, batch_size=32),
                    validation_data=(X_val, y_val),
                    validation_steps=len(X_val) / 32,
                    steps_per_epoch=len(X_train) / 32,
                    epochs=100)

In [None]:
tuner.get_best_hyperparameters()[0].values

In [None]:
tuner.get_best_models()[0].summary()

In [None]:
modelBest = tuner.get_best_models()[0]

In [None]:

history = modelBest.fit(train_aug.flow(X_train, y_train, batch_size=32),
                    validation_data=(X_val, y_val),
                    validation_steps=len(X_val) / 32,
                    steps_per_epoch=len(X_train) / 32,
                    epochs=200)



In [None]:
acc_scores = list()
acc = modelBest.evaluate(X_test, y_test, verbose=0)
acc_scores.append(acc[1])
print('Accuracy scores: ', acc_scores)

In [None]:
y_pred = modelBest.predict(X_test, batch_size=32)
y_pred_covid = modelBest.predict(covid_x_test, batch_size=32)

In [None]:
y_test_covid = LabelBinarizer().fit_transform(covid_y_test)
y_test_covid = to_categorical(y_test_covid)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1)))

In [None]:
plt.figure(figsize=(10,10))
plt.style.use('dark_background')

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')

plt.legend(['Training', 'Validation'])

plt.show()

In [None]:
plt.figure(figsize=(10,10))
plt.style.use('dark_background')

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')

plt.legend(['Training', 'Validation'])

plt.show()

In [None]:
from keras.preprocessing import image

test_image = image.load_img('dataset/normal_dataset/0b421aea-6e0f-4faf-a4bf-4a82445d0e35.jpg', target_size = (224,224))
#test_image = image.load_img('dataset/covid_adjusted/covid-19-pneumonia-rapidly-progressive-12-hours.jpg.jpg', target_size = (224,224))


test_image = image.img_to_array(test_image)
test_image = test_image *1./255

test_image = np.expand_dims(test_image, axis = 0)
test_image = np.vstack([test_image ])
result = modelBest.predict(test_image,batch_size=32)
print(result)

In [None]:
import matplotlib.image as mpimg

img=mpimg.imread('dataset/normal_dataset/0b421aea-6e0f-4faf-a4bf-4a82445d0e35.jpg')
#img=mpimg.imread('dataset/covid_adjusted/covid-19-pneumonia-rapidly-progressive-12-hours.jpg.jpg')
imgplot = plt.imshow(img)
plt=plt.title('Chest X-ray ')

In [None]:
if result[0][0]<result[0][1]:

    prediction = 'normal'
else:
    prediction = 'covid'
    
print("AI's prediction is: "+ prediction)