In [3]:
#Load and Pickle the dataset
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import keras.backend as K
import plotly.express as px
import os
import cv2
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
IMAGE_SIZE = (224,224)
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [5]:
files = tf.io.gfile.glob(r"/content/drive/Shareddrives/Covid Recognition Practicum/COVID-19 Radiography Database/COVID/*")
files.extend(tf.io.gfile.glob(r"/content/drive/Shareddrives/Covid Recognition Practicum/COVID-19 Radiography Database/NORMAL/*"))
files.extend(tf.io.gfile.glob(r"/content/drive/Shareddrives/Covid Recognition Practicum/COVID-19 Radiography Database/Viral Pneumonia/*"))

In [6]:
len(files)

3886

In [7]:
normal_count = len([file for file in files if 'NORMAL' in file])
covid_count = len([file for file in files if 'COVID' in file])
pneumonia_count = len([file for file in files if 'Viral Pneumonia' in file])

In [8]:
class_distribution = pd.DataFrame({'class':['normal','viral pneumonia','covid'],
                                   'count':[normal_count, pneumonia_count, covid_count]})

In [None]:
fig = px.bar(class_distribution, x="class", y="count", title="Distribution of images")
fig.update_layout(width=800, height=300)
fig.show()

In [9]:
train_files, test_files = train_test_split(files, test_size=0.1, random_state=42)
train_files, val_files = train_test_split(train_files, test_size=0.2, random_state=42)

In [10]:

train_normal_count = len([file for file in train_files if 'NORMAL' in file])
train_covid_count = len([file for file in train_files if 'COVID' in file])
train_pneumonia_count = len([file for file in train_files if 'Viral Pneumonia' in file])

print(train_normal_count, train_covid_count, train_pneumonia_count)

957 2797 966


In [11]:
train_dataset = tf.data.Dataset.from_tensor_slices(train_files)
val_dataset = tf.data.Dataset.from_tensor_slices(val_files)
test_dataset = tf.data.Dataset.from_tensor_slices(test_files)

In [12]:
TRAIN_SIZE = tf.data.experimental.cardinality(train_dataset).numpy()
VAL_SIZE = tf.data.experimental.cardinality(val_dataset).numpy()
TEST_SIZE = tf.data.experimental.cardinality(test_dataset).numpy()

In [13]:
TRAIN_SIZE, VAL_SIZE, TEST_SIZE

(2797, 700, 389)

In [None]:
"""
train_files, test_files = train_test_split(files, test_size=0.1, random_state=42)
train_files, val_files = train_test_split(train_files, test_size=0.05, random_state=42)
"""
"""
train_normal_count = len([file for file in train_files if 'NORMAL' in file])
train_covid_count = len([file for file in train_files if 'COVID' in file])
train_pneumonia_count = len([file for file in train_files if 'Viral Pneumonia' in file])

print(train_normal_count, train_covid_count, train_pneumonia_count)
"""
"""
train_dataset = tf.data.Dataset.from_tensor_slices(train_files)
val_dataset = tf.data.Dataset.from_tensor_slices(val_files)
test_dataset = tf.data.Dataset.from_tensor_slices(test_files)
"""
"""
TRAIN_SIZE = tf.data.experimental.cardinality(train_dataset).numpy()
VAL_SIZE = tf.data.experimental.cardinality(val_dataset).numpy()
TEST_SIZE = tf.data.experimental.cardinality(test_dataset).numpy()
"""
#TRAIN_SIZE, VAL_SIZE, TEST_SIZE

In [14]:
def get_targets(file):
    split = tf.strings.split(file, os.path.sep)
    if split[-2] == 'COVID 19':
        return 0
    elif split[-2] == 'Viral Pneumonia':
        return 2
    else: return 1

In [15]:
def preprocess_img(file):
    target = get_targets(file)
    img = tf.io.read_file(file)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, IMAGE_SIZE)
    return img, target

In [16]:
train_data = train_dataset.map(preprocess_img)
val_data = val_dataset.map(preprocess_img)

In [17]:
BATCH_SIZE = 64
test_data = test_dataset.map(preprocess_img)
test_data = test_data.batch(BATCH_SIZE)

In [18]:
def preprocessing(dataset, buffer_size=650):
    dataset = dataset.shuffle(buffer_size=buffer_size).repeat()
    dataset = dataset.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
    return dataset

In [19]:
train_set = preprocessing(train_data)
val_set = preprocessing(val_data)

In [20]:
def visualize_samples(images, targets):
    plt.figure(figsize=(12,8))
    for n in range(16):
        sub = plt.subplot(4,4,n+1)
        plt.imshow(images[n])
        if targets[n] == 0:
            plt.title("COVID")
        elif targets[n] == 1:
            plt.title("PNEUMONIA")
        else:plt.title("NORMAL")
        plt.axis("off")

In [21]:
def conv_block(filters):
    block = tf.keras.Sequential([
        tf.keras.layers.SeparableConv2D(filters, 3, activation='relu', padding='same'),
        tf.keras.layers.SeparableConv2D(filters, 3, activation='relu', padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPool2D()
    ]
    )
    return block

In [22]:
def dense_block(units, dropout_rate):
    block = tf.keras.Sequential([
        tf.keras.layers.Dense(units, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(dropout_rate)
    ])
    return block

In [44]:
def new_dense_block(units, dropout_rate, active_fun):
    block = tf.keras.Sequential([
        tf.keras.layers.Dense(units, activation=active_fun),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(dropout_rate)
    ])
    return block
def create_dense_model():
    model = tf.keras.Sequential([
    tf.keras.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)),
    new_dense_block(128, 0.5, 'relu'),
    tf.keras.layers.Dense(32, activation='sigmoid')
    ])
    return model 

In [45]:
dense_model = create_dense_model()

METRICS = ['accuracy']

opt = tf.keras.optimizers.Adam(learning_rate=0.0006)
    
dense_model.compile(optimizer= opt, loss='sparse_categorical_crossentropy', metrics=METRICS)

In [46]:
dense_model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_11 (Sequential)   (None, 224, 224, 128)     1024      
_________________________________________________________________
dense_10 (Dense)             (None, 224, 224, 32)      4128      
Total params: 5,152
Trainable params: 4,896
Non-trainable params: 256
_________________________________________________________________


In [47]:
path = r"/content/drive/Shareddrives/Covid Recognition Practicum/dense_covid_model.h5"
dense_checkpoint = tf.keras.callbacks.ModelCheckpoint(path,save_best_only=True)
early_stopping = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(factor=0.2, patience=10, mode='min', min_lr=1E-5)

In [48]:
EPOCHS = 30
history = dense_model.fit(
    train_set,
    steps_per_epoch = TRAIN_SIZE // BATCH_SIZE,
    epochs = EPOCHS,
    validation_data = val_set,
    validation_steps = VAL_SIZE // BATCH_SIZE,
    callbacks=[dense_checkpoint]
)

Epoch 1/30


InvalidArgumentError: ignored

In [55]:
def build_model():
    model = tf.keras.Sequential([
        tf.keras.Input(shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)),
        
        tf.keras.layers.Conv2D(64, 7, activation='relu', padding='same'),
        tf.keras.layers.MaxPool2D(),
        
        conv_block(128),
        conv_block(256),
        tf.keras.layers.Dropout(rate=0.5),
        
        conv_block(256),
        tf.keras.layers.Dropout(0.4),
        
        tf.keras.layers.Flatten(),
        dense_block(128, 0.5),
        dense_block(64, 0.5),
       
        tf.keras.layers.Dense(3, activation='softmax')
    ])
    
    return model

In [56]:
model = build_model()

METRICS = ['accuracy']

opt = tf.keras.optimizers.Adam(learning_rate=0.0006)
    
model.compile(optimizer= opt, loss='sparse_categorical_crossentropy', metrics=METRICS)


In [57]:
model.summary()

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 64)      9472      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 64)      0         
_________________________________________________________________
sequential_10 (Sequential)   (None, 56, 56, 128)       27072     
_________________________________________________________________
sequential_11 (Sequential)   (None, 28, 28, 256)       103296    
_________________________________________________________________
dropout_6 (Dropout)          (None, 28, 28, 256)       0         
_________________________________________________________________
sequential_12 (Sequential)   (None, 14, 14, 256)       137216    
_________________________________________________________________
dropout_7 (Dropout)          (None, 14, 14, 256)     

In [58]:
checkpoint = tf.keras.callbacks.ModelCheckpoint("/content/drive/Shareddrives/Covid Recognition Practicum/covid_model.h5",save_best_only=True)
early_stopping = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(factor=0.2, patience=10, mode='min', min_lr=1E-5)

In [None]:
EPOCHS = 30
history = model.fit(
    train_set,
    steps_per_epoch = TRAIN_SIZE // BATCH_SIZE,
    epochs = EPOCHS,
    validation_data = val_set,
    validation_steps = VAL_SIZE // BATCH_SIZE,
    callbacks=[checkpoint]
)

Epoch 1/30
 2/43 [>.............................] - ETA: 10:46 - loss: 1.2354 - accuracy: 0.2930