In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input
from tensorflow.keras.layers import Activation, Dense, Dropout, Flatten, BatchNormalization, Conv2D, MaxPooling2D, Lambda, Input, AveragePooling2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing import image
from tensorflow.keras import backend as K
import tensorflow as tf 
import matplotlib.pyplot as plt 
import numpy as np 
import pandas as pd
import random
#import cv2
import os

In [38]:
# Check filenames 
image_names=list(os.listdir("Database"))
image_names.sort()
print(image_names)

['.DS_Store', 'COVID19', 'NORMAL', 'Viral_Pneumonia']


In [6]:
covid_dir=os.path.join("Database/COVID19")
norm_dir=os.path.join("Database/NORMAL")
pneu_dir=os.path.join("Database/Viral_Pneumonia")


print('total Covid images:', len(os.listdir(covid_dir)))
print('total Normal images:', len(os.listdir(norm_dir)))
print('total Viral_Pneumonia:', len(os.listdir(pneu_dir)))


total Covid images: 1327
total Normal images: 1341
total Viral_Pneumonia: 1463


In [10]:
df =pd.DataFrame(os.listdir(covid_dir))
df["condition"]= "Covid19"
df.head()

Unnamed: 0,0,condition
0,COVID-19 (979).png,Covid19
1,COVID-19 (580).png,Covid19
2,COVID-19 (996).png,Covid19
3,COVID-19 (646).png,Covid19
4,COVID-19 (216).png,Covid19


In [12]:
df =pd.DataFrame.from_records({"file_name":os.listdir(covid_dir),"condition": "Covid19"})

for f in image_names[1:]:
    folder_path="Database/" + f
    temp_df= pd.DataFrame.from_records({"file_name":os.listdir(folder_path),"condition": f })
    df=df.append(temp_df)
    
df["condition"].value_counts()

Viral_Pneumonia    1345
NORMAL             1341
Covid19            1143
Name: condition, dtype: int64

In [59]:
# # import csv to match correct condition
# data_csv= pd.read_csv("https://raw.githubusercontent.com/Coachnmomof3/UCB_COVID_Prediction_Model/james_demott/connect_core_data_set_csv.csv")
# data_csv.head()

In [21]:
img_width, img_height=200,200
batch_size=128

data_dir = "Database/"

# Rescale images 
datagen= ImageDataGenerator(rescale=1/255,validation_split=.3,rotation_range=20,
                           shear_range=.2,width_shift_range=0.1,height_shift_range=0.1,zoom_range=0.2)

# Flow training images in batches of 128 using train_data 
train_generator= datagen.flow_from_directory(
                data_dir,
                target_size=(img_width,img_height),
                batch_size=batch_size,
                subset="training",
                class_mode="categorical", 
                classes= ["COVID19", "NORMAL","Viral_Pneumonia"],
                shuffle=True, seed=30)


datagen2=ImageDataGenerator(rescale=1/255,validation_split=.3)

test_generator=datagen2.flow_from_directory(
                data_dir,
                target_size=(img_width,img_height),
                batch_size=batch_size,
                classes= ["COVID19", "NORMAL","Viral_Pneumonia"],
                class_mode="categorical", subset="validation", shuffle=True, seed=30)

# train_ds = tf.keras.preprocessing.image_dataset_from_directory(
#   data_dir,
#   validation_split=0.2,
#   subset="training",
#   seed=123,
#   image_size=(img_height, img_width),
#   batch_size=batch_size)

# # Flow test images in 


# val_ds = tf.keras.preprocessing.image_dataset_from_directory(
#   data_dir,
#   validation_split=0.2,
#   subset="validation",
#   seed=123,
#   image_size=(img_height, img_width),
#   batch_size=batch_size)

Found 2682 images belonging to 3 classes.
Found 1147 images belonging to 3 classes.


## Model 1
### Using new dataset and with 3 categories

In [22]:
# CNN MODEL- conv-batch-maxpool-dropout

classifier=Sequential()
classifier.add(Conv2D(32, kernel_size=3, activation="relu", input_shape=(200,200,3)))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(strides=(2,2)))
classifier.add(Dropout(.3))

classifier.add(Conv2D(32, kernel_size=3, activation="relu"))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(strides=(2,2)))
classifier.add(Dropout(.5))

classifier.add(Conv2D(64, kernel_size=3, activation="relu"))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(strides=(2,2)))
classifier.add(Dropout(.4))

classifier.add(Conv2D(64, kernel_size=3, activation="relu"))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(strides=(2,2)))
classifier.add(Dropout(.3))

classifier.add(Flatten())
classifier.add(Dense(512,activation="relu"))
classifier.add(Dense(128,activation="relu"))
classifier.add(Dropout(.4))

classifier.add(Dense(3, activation="softmax"))

In [23]:
classifier.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 198, 198, 32)      896       
_________________________________________________________________
batch_normalization_8 (Batch (None, 198, 198, 32)      128       
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 99, 99, 32)        0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 99, 99, 32)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 97, 97, 32)        9248      
_________________________________________________________________
batch_normalization_9 (Batch (None, 97, 97, 32)        128       
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 48, 48, 32)       

In [25]:
classifier.compile(loss="categorical_crossentropy", 
              optimizer="adam", 
              metrics=["categorical_accuracy"])

total_sample=train_generator.n
batch_size=128

history= classifier.fit_generator(
            train_generator, steps_per_epoch=int(total_sample/batch_size),
            epochs=3,
            validation_data=test_generator, 
            validation_steps=2)

Epoch 1/3
Epoch 2/3
Epoch 3/3


## Model 2

In [27]:
## Model 2
classifier= Sequential()

# First convolution layer
classifier.add(Conv2D(32, (3,3), input_shape=(200,200,3),activation='relu'))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Second convolution layer
classifier.add(Conv2D(64,(3,3),activation="relu"))
classifier.add(Dropout(0.1))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Third convolution layer
classifier.add(Conv2D(64, (3,3), activation="relu"))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Fourth convolution layer
classifier.add(Conv2D(128, (3,3), activation="relu"))
classifier.add(Dropout(0.2))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

#Fifth convolution layer
classifier.add(Conv2D(128, (3,3), activation="relu"))
classifier.add(Dropout(0.2))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Flatten the results to feed into a dense layer
classifier.add(Flatten())

# 128 neuron in the fully-connected layer
classifier.add(Dense(units = 128 , activation = 'relu'))
classifier.add(Dropout(0.2))

# 14 output neurons for 14 classes with the softmax activation
classifier.add(Dense(units = 3 , activation = 'softmax'))

In [28]:
# Model 2, with augmentation
classifier.compile(
  optimizer='adam',
  loss="categorical_crossentropy",
  metrics=['accuracy'])


total_sample= train_generator.n
batch_size=128


history= classifier.fit_generator(
            train_generator, steps_per_epoch=int(total_sample/batch_size),
            epochs=3,
            validation_data=test_generator)

Epoch 1/3
Epoch 2/3
Epoch 3/3


# Model 3

In [29]:
# Model 3
# GENERATE MODEL
classifier= Sequential()

# First convolution layer
classifier.add(Conv2D(32, (3,3), input_shape=(200,200,3),activation='relu'))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Second convolution layer
classifier.add(Conv2D(32,(3,3),activation="relu"))
classifier.add(Dropout(0.1))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Flatten the results to feed into a dense layer
classifier.add(Flatten())

# 128 neuron in the fully-connected layer
classifier.add(Dense(128 , activation = 'relu'))
classifier.add(Dropout(0.2))

# 14 output neurons for 14 classes with the softmax activation
classifier.add(Dense(3 , activation = 'softmax'))

In [30]:
# Model 3, with augmentation
classifier.compile(
  optimizer='adam',
  loss="categorical_crossentropy",
  metrics=['accuracy'])

# total_valsample=test_generator.n
total_sample= train_generator.n
batch_size=128

history= classifier.fit_generator(
            train_generator, steps_per_epoch=int(total_sample/batch_size),
            epochs=3,
            validation_data=test_generator)

Epoch 1/3
Epoch 2/3
Epoch 3/3


# Model 4- Transfer Learning

In [2]:
img_width, img_height=200,200
batch_size=128

data_dir = "Database/"

# Rescale images 
datagen= ImageDataGenerator(rescale=1/255,validation_split=.3,rotation_range=20,
                           shear_range=.2,width_shift_range=0.1,height_shift_range=0.1,zoom_range=0.2,
                           preprocessing_function=preprocess_input)

# Flow training images in batches of 128 using train_data 
train_generator= datagen.flow_from_directory(
                data_dir,
                target_size=(64,64),
                batch_size=batch_size,
                subset="training",
                class_mode="categorical", 
                classes= ["COVID19", "NORMAL","Viral_Pneumonia"],
                shuffle=True, seed=30)


datagen2=ImageDataGenerator(rescale=1/255,validation_split=.3,
                            preprocessing_function=preprocess_input)

test_generator=datagen2.flow_from_directory(
                data_dir,
                target_size=(64,64),
                batch_size=batch_size,
                classes= ["COVID19", "NORMAL","Viral_Pneumonia"],
                class_mode="categorical", subset="validation", shuffle=True, seed=30)

Found 2893 images belonging to 3 classes.
Found 1238 images belonging to 3 classes.


In [3]:
# Model 4

datagen= ImageDataGenerator(rescale=1/255,validation_split=.3)

classifier=VGG16(weights="imagenet", include_top=False, input_shape=(64,64,3))

for layer in classifier.layers:
    layer.trainable=False

x=Flatten()(classifier.output)
x=Dense(3, activation="softmax")(x)

model=Model(inputs=classifier.input, outputs=x)
model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=["accuracy"])

In [5]:
total_sample= train_generator.n
batch_size=128


history= model.fit_generator(
            train_generator, steps_per_epoch=int(total_sample/batch_size),
            epochs=50,
            validation_data=test_generator)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
