In [1]:
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import cv2
import torchxrayvision as xrv
from sklearn.model_selection import train_test_split

  from tqdm.autonotebook import tqdm


In [2]:
d = xrv.datasets.COVID19_Dataset(imgpath="data/images/",\
                                 csvpath="data/csv/metadata.csv")


In [3]:
len(d) # Size of dataset = 535 entries, 342(64%) of them are COVID-19 positive, 193(36%) negatives.
# lab_test_attributes = d.pathologies # Column names
# lab_test_attributes

535

In [4]:
data = []
headers = ['image', 'shape', 'covid-19', 'findings']
for i in range(len(d)):
    img = d[i]['img'][0]
    shape = img.shape
    lab = d[i]['lab']
    
    row = [img, shape]
    row.append(lab[3])   # entry for covid-19
    row.append(1.0 if re.search('COVID-19', d.csv.iloc[i][5]) else 0.0)
    data.append(row)
df = pd.DataFrame(data, columns = headers)

In [5]:
X, y, low, high = df.iloc[:,0], df.iloc[:,-1], -1024, 1024
data, target_height, target_width = [], 512, 512
for image in X:
    image  = 255 * (image - low) / (high - low)
    image = cv2.resize(image, (target_width, target_height))
    data.append(image)
X = np.array(data).reshape(-1, target_width, target_height, 1)

In [6]:
print(X.shape, y.shape)  # 535 datasets, of size 512 by 512, greyscale

(535, 512, 512, 1) (535,)


##### Initial Run

In [7]:
import tensorflow
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
from sklearn.metrics import confusion_matrix, classification_report
# from keras_tuner import HyperModel

In [13]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size = 0.4, random_state = 3244)

In [8]:
ann = models.Sequential([
    layers.Flatten(input_shape = (512, 512, 1)),
    layers.Dense(64, activation = 'relu'),
    layers.Dense(32, activation = 'relu'),
    layers.Dense(1, activation = 'sigmoid')  # For binary classification
])

In [26]:
ann.compile(optimizer = 'adam',
            loss = 'binary_crossentropy',
            metrics = ['accuracy'])

In [27]:
cnn = models.Sequential([
    layers.Conv2D(16, (3, 3), activation = 'relu', input_shape = (512, 512, 1)),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(32, (3, 3), activation = 'relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Flatten(),
    layers.Dense(64, activation = 'relu'),
    # layers.Dropout(0.5),  # Helps prevent overfitting
     
    layers.Dense(1, activation = 'sigmoid')  # For binary classification
])

In [28]:
cnn.compile(optimizer = 'adam',
            loss = 'binary_crossentropy',
            metrics = ['accuracy'])

In [29]:
ann.fit(Xtrain, ytrain, epochs = 5)

Epoch 1/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 119ms/step - accuracy: 0.6499 - loss: 6211.8687
Epoch 2/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 121ms/step - accuracy: 0.4816 - loss: 5555.7017
Epoch 3/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 124ms/step - accuracy: 0.5968 - loss: 1906.2186
Epoch 4/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 127ms/step - accuracy: 0.6256 - loss: 709.0164
Epoch 5/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 120ms/step - accuracy: 0.7068 - loss: 502.0346


<keras.src.callbacks.history.History at 0x2b7b2aff9b0>

In [30]:
cnn.fit(Xtrain, ytrain, epochs = 5)

Epoch 1/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 645ms/step - accuracy: 0.5792 - loss: 481.0723
Epoch 2/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 649ms/step - accuracy: 0.5445 - loss: 41.9315
Epoch 3/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 643ms/step - accuracy: 0.5326 - loss: 2.5553
Epoch 4/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 650ms/step - accuracy: 0.5151 - loss: 0.6968
Epoch 5/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 651ms/step - accuracy: 0.6295 - loss: 0.5803


<keras.src.callbacks.history.History at 0x2b7b2b03740>

In [43]:
ann_pred = ann.predict(Xtest)
ann_pred_classes = [np.round(element) for element in ann_pred]
print("Classification report: \n", classification_report(ytest, ann_pred_classes))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Classification report: 
               precision    recall  f1-score   support

         0.0       0.42      0.89      0.57        83
         1.0       0.76      0.21      0.33       131

    accuracy                           0.48       214
   macro avg       0.59      0.55      0.45       214
weighted avg       0.63      0.48      0.42       214



In [44]:
cnn_pred = cnn.predict(Xtest)
cnn_pred_classes = [np.round(element) for element in cnn_pred]
print("Classification report: \n", classification_report(ytest, cnn_pred_classes)) 

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 175ms/step
Classification report: 
               precision    recall  f1-score   support

         0.0       0.33      0.04      0.07        83
         1.0       0.61      0.95      0.74       131

    accuracy                           0.60       214
   macro avg       0.47      0.50      0.40       214
weighted avg       0.50      0.60      0.48       214



##### Augmentation

In [61]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [62]:
datagen = ImageDataGenerator(
    rotation_range = 60,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'nearest'
)

In [50]:
cnn.fit(Xtrain, ytrain, epochs = 5)

Epoch 1/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 931ms/step - accuracy: 0.5333 - loss: 1762.8212
Epoch 2/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 932ms/step - accuracy: 0.5644 - loss: 3.2285
Epoch 3/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 928ms/step - accuracy: 0.6792 - loss: 0.6939
Epoch 4/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 932ms/step - accuracy: 0.6662 - loss: 0.6916
Epoch 5/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 880ms/step - accuracy: 0.6426 - loss: 0.6904


<keras.src.callbacks.history.History at 0x25f8312c950>

In [51]:
cnn.evaluate(Xtest, ytest)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 234ms/step - accuracy: 0.6385 - loss: 0.6897


[0.6899442076683044, 0.6296296119689941]

In [63]:
cnn.fit(datagen.flow(Xtrain, ytrain, batch_size = 32), epochs = 5)

Epoch 1/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 1s/step - accuracy: 0.6425 - loss: 0.6892
Epoch 2/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 1s/step - accuracy: 0.6455 - loss: 2.3894
Epoch 3/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 1s/step - accuracy: 0.6183 - loss: 0.6877
Epoch 4/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 1s/step - accuracy: 0.6279 - loss: 0.7932
Epoch 5/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step - accuracy: 0.6478 - loss: 0.6837


<keras.src.callbacks.history.History at 0x25f83112480>

In [64]:
cnn.evaluate(Xtest, ytest)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 217ms/step - accuracy: 0.6385 - loss: 0.6799


[0.6821554899215698, 0.6296296119689941]