### Notebook from : https://www.kaggle.com/code/jiaowoguanren/sars-cov-2-ct-scan-dataset-classification-tf-0-99

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
%pip install split-folders

In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import cv2
import pathlib, splitfolders
import os

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import *

from keras.utils.np_utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import  ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16

# USE OF SARSCOV2-CTSCAN-DATASET

In [50]:
epochs = 30
batch_size = 32
img_height, img_width = 300, 300
input_shape = (img_height, img_width, 3)

def create_data_binary(data_bs):
    data_bs = pathlib.Path(data_bs)
    splitfolders.ratio(data_bs, output='../sarscov2-ctscan-dataset-splitted/', seed=1234, ratio=(0.7, 0.15, 0.15), group_prefix=None)
    data_gen = ImageDataGenerator(rescale=1.0 / 255)
    train_ds = data_gen.flow_from_directory('../sarscov2-ctscan-dataset-splitted/train/', target_size=(img_height, img_width),
                                            class_mode='binary', batch_size=batch_size, subset='training')
    val_ds = data_gen.flow_from_directory('../sarscov2-ctscan-dataset-splitted/val/', target_size=(img_height, img_width),
                                          class_mode='binary', batch_size=batch_size, shuffle=False)

    return train_ds, val_ds


train_data, val_data = create_data_binary('../sarscov2-ctscan-dataset/')

Copying files: 2481 files [00:01, 2033.41 files/s]

Found 1736 images belonging to 2 classes.
Found 371 images belonging to 2 classes.





# USE OF COVID-CT-master DATASET

In [None]:
epochs = 30
batch_size = 32
img_height, img_width = 150, 150
input_shape = (img_height, img_width, 3)

data_gen = ImageDataGenerator(rescale=1.0 / 255)

"""
train_datagen = ImageDataGenerator(rescale=1.0/255,
                                  zoom_range=0.2,
                                  width_shift_range=0.2,
                                  height_shift_range=0.2,
                                  fill_mode='nearest')
"""
                                  
train_data = data_gen.flow_from_directory("../COVID-CT-master/Dataset/train/",
                                          target_size=(img_height, img_width),
                                          batch_size = batch_size,
                                          class_mode = 'binary',
                                          subset='training')
                                         
test_data = data_gen.flow_from_directory("../COVID-CT-master/Dataset/test/",
                                          target_size=(img_height, img_width),
                                          batch_size = batch_size,
                                          class_mode = 'binary')

val_data = data_gen.flow_from_directory("../COVID-CT-master/Dataset/val/",
                                          target_size=(img_height, img_width),
                                          batch_size = batch_size,
                                          class_mode = 'binary',
                                          shuffle=False)

### Base Model CNN

In [None]:
class BaseModel(tf.keras.Model):
    def __init__(self):
        super(BaseModel, self).__init__()

        self.C1 = Conv2D(32, (3 * 3), padding='same', input_shape = input_shape)
        self.B1 = BatchNormalization()
        self.A1 = Activation('relu')
        self.P1 = MaxPooling2D(2, padding='same')
        
        self.C2 = Conv2D(32, (3 * 3), padding='same')
        self.B2 = BatchNormalization()
        self.A2 = Activation('relu')
        self.P2 = MaxPooling2D(2, padding='same')
        self.Dr1 = Dropout(0.3)
        
        self.C3 = Conv2D(32, (3 * 3), padding='same')
        self.B3 = BatchNormalization()
        self.A3 = Activation('relu')
        self.P3 = MaxPooling2D(2, padding='same')
        self.Dr2 = Dropout(0.3)
        
        self.F1 = Flatten()
        self.D1 = Dense(256, activation='relu')
        self.B4 = BatchNormalization()
        self.D2 = Dense(256, activation='relu')
        self.D3 = Dense(256, activation='relu')
        self.D4 = Dense(256, activation='relu')
        self.Dr3 = Dropout(0.3)
        self.D5 = Dense(1, activation='sigmoid')
        
        
    def call(self, x):
        x = self.C1(x)
        x = self.B1(x)
        x = self.A1(x)
        x = self.P1(x)
        
        x = self.C2(x)
        x = self.B2(x)
        x = self.A2(x)
        x = self.P2(x)
        x = self.Dr1(x)
        
        x = self.C3(x)
        x = self.B3(x)
        x = self.A3(x)
        x = self.P3(x)
        x = self.Dr2(x)
        
        x = self.F1(x)
        x = self.D1(x)
        x = self.B4(x)
        x = self.D2(x)
        x = self.D3(x)
        x = self.D4(x)
        x = self.Dr3(x)
        y = self.D5(x)
        
        return y
    
    def __repr__(self):
        name = 'Huang_Model'
        return name
    
    
net = BaseModel()

net.compile(optimizer='adam',
            loss='binary_crossentropy',
            metrics=['accuracy'])

checkpoint_save_path = './Model.ckpt'
if os.path.exists(checkpoint_save_path + '.index'):
    net.load_weights(checkpoint_save_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path, save_weights_only=True,
                                                 save_best_only=True)

#history = net.fit(train_data, epochs=30, batch_size=batch_size, callbacks=[cp_callback])
history = net.fit(train_data, epochs=30, batch_size=batch_size)

net.summary()

file = open('./weights.txt', 'w')
for v in net.trainable_variables:
    file.write(str(v.name) + '\n')
    file.write(str(v.shape) + '\n')
    file.write(str(v.numpy()) + '\n')

file.close()

### VGG16 CNN

In [52]:
disease_types = ['COVID', 'non-COVID']

data_dir = '../sarscov2-ctscan-dataset-splitted/'

# resize images from data_dir train, test and val and create new dataset resized
def resize_images(data_dir):
    # resize images from data_dir train, test and val and create new dataset resized
    for folder in ['train', 'test', 'val']:
        for file in os.listdir(os.path.join(data_dir, folder)):
            if file.endswith('.jpg'):

                img = cv2.imread(os.path.join(data_dir, folder, file))
                img = cv2.resize(img, (224, 224))
                cv2.imwrite(os.path.join(data_dir, 'resized', folder, file), img)

# resize_images(data_dir)

# create data generator for train, test and val
def create_data_binary(data_dir):
    data_dir = pathlib.Path(data_dir)
    # create data generator for train, test and val
    train_ds = tf.keras.preprocessing.image_dataset_from_directory(
        os.path.join(data_dir, 'train'),
        validation_split=0.2,
        subset="training",
        seed=123,
        image_size=(224, 224),
        batch_size=32,
        label_mode='binary')
    test_ds = tf.keras.preprocessing.image_dataset_from_directory(
        os.path.join(data_dir, 'test'),
        validation_split=0.2,
        subset="validation",
        seed=123,
        image_size=(224, 224),
        batch_size=32,
        label_mode='binary')
    return train_ds, test_ds

train_ds, test_ds = create_data_binary('../sarscov2-ctscan-dataset-splitted/')


Found 1736 files belonging to 2 classes.
Using 1389 files for training.
Found 374 files belonging to 2 classes.
Using 74 files for validation.


In [None]:
class VGG16Model(tf.keras.Model):
    def __init__(self):
        super(VGG16Model, self).__init__()
        
        self.C1 = Conv2D(32, (3 * 3), padding='same', input_shape = input_shape)

        #self.VGG = VGG16(weights='imagenet', include_top = False, input_shape= input_shape)
        self.VGG = VGG16(weights='imagenet', include_top = False)

        self.GAP = GlobalAveragePooling2D()
        self.B1 = BatchNormalization()

        self.D1 = Dense(32, activation='relu')
        self.B2 = BatchNormalization()
        
        self.D2 = Dense(32, activation='softmax')
    
    def call(self, x):
        x = self.C1(x)
        x = self.VGG(x)
        x = self.GAP(x)
        x = self.B1(x)
        x = self.D1(x)
        x = self.B2(x)
        y = self.D2(x)
        
        return y

    def __repr__(self):
        name = 'VGG16Model'
        return name
        
net = VGG16Model()

optimizer = Adam(learning_rate= 0.003, beta_1 = 0.9, beta_2 = 0.999, epsilon = 0.1, decay = 0.0)
net.compile(loss = 'categorical_crossentropy', optimizer =optimizer, metrics = ['accuracy'])

annealer = ReduceLROnPlateau(monitor = 'val_accuracy', factor = 0.70, patience = 5, verbose = 1, min_lr = 1e-4)

# Fits the model on batches with real-time data augmentation
#history = net.fit(X_Train, epochs=epochs, batch_size = batch_size, callbacks = [annealer])
history = net.fit(train_ds, epochs=epochs, batch_size = batch_size)

model.summary()

In [None]:
acc = history.history['accuracy']
loss = history.history['loss']

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(acc)
plt.title('Training Acc')

plt.subplot(1, 2, 2)
plt.plot(loss)
plt.title('Training Loss')
plt.show()