In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input
from tensorflow.keras.layers import Activation, Dense, Dropout, Flatten, BatchNormalization, Conv2D, MaxPooling2D, Lambda, Input, AveragePooling2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing import image
from tensorflow.keras import backend as K
import tensorflow as tf 
import matplotlib.pyplot as plt 
import numpy as np 
import pandas as pd
import random
#import cv2
import os

In [6]:
# Check filenames 
image_names=list(os.listdir("Images"))
image_names.sort()
print(image_names)

['.DS_Store', 'Atelectasis', 'Cardiomegaly', 'Consolidation', 'Covid', 'Edema', 'Enlarged_Cardiomediastinum', 'Fracture', 'Lung_Lesion', 'Lung_Opacity', 'No_Finding', 'Pleural_Other', 'Pneumonia', 'Pneumothorax', 'Support_Devices']


In [7]:
# Directory with Atelectasis
atele_dir=os.path.join("Images/Atelectasis")
df =pd.DataFrame.from_records({"file_name":os.listdir(atele_dir),"condition": "Atelectasis"})
for f in image_names[2:]:
    folder_path="Images/" + f
    temp_df= pd.DataFrame.from_records({"file_name":os.listdir(folder_path),"condition": f })
    df=df.append(temp_df)
    
df["condition"].value_counts()

No_Finding                    198
Covid                         184
Pneumothorax                  164
Lung_Opacity                  156
Edema                         155
Support_Devices               154
Consolidation                 145
Enlarged_Cardiomediastinum    143
Atelectasis                   143
Fracture                      142
Cardiomegaly                  136
Lung_Lesion                   127
Pneumonia                     118
Pleural_Other                  99
Name: condition, dtype: int64

In [4]:
# balance classes - random Random Sampling No_Finding
df_no_finding = df[df["condition"]=="No_Finding"].sample(n=150, random_state=42)
df_balanced = df[df["condition"]!="No_Finding"].append(df_no_finding)
df_balanced["condition"].value_counts()

Covid                         184
Pneumothorax                  164
Lung_Opacity                  156
Edema                         155
Support_Devices               154
No_Finding                    150
Consolidation                 145
Atelectasis                   143
Enlarged_Cardiomediastinum    143
Fracture                      142
Lung_Lesion                   127
Pneumonia                     118
Pleural_Other                  99
Name: condition, dtype: int64

In [8]:
img_width, img_height=200,200
batch_size=128

data_dir = "Images/"

# Rescale images 
datagen= ImageDataGenerator(rescale=1/255,validation_split=.3,rotation_range=20,
                           shear_range=.2,width_shift_range=0.1,height_shift_range=0.1,zoom_range=0.2)

# Flow training images in batches of 128 using train_data 
train_generator= datagen.flow_from_directory(
                data_dir,
                target_size=(img_width,img_height),
                batch_size=batch_size,
                subset="training",
                class_mode="categorical", 
                classes= ["Covid", "Atelectasis", "Cardiomegaly", "Consolidation", "Edema", "Enlarged_Cardiomediastinum", "Fracture", 
                "Lung_Lesion", "Lung_Opacity", "No_Finding", "Pleural_Other", "Pneumonia", "Pneumothorax", "Support_Devices"],
                shuffle=True, seed=30)


# train_ds = tf.keras.preprocessing.image_dataset_from_directory(
#   data_dir,
#   validation_split=0.2,
#   subset="training",
#   seed=123,
#   image_size=(img_height, img_width),
#   batch_size=batch_size)

# # Flow test images in 


# val_ds = tf.keras.preprocessing.image_dataset_from_directory(
#   data_dir,
#   validation_split=0.2,
#   subset="validation",
#   seed=123,
#   image_size=(img_height, img_width),
#   batch_size=batch_size)

datagen2=ImageDataGenerator(rescale=1/255,validation_split=.3)

test_generator=datagen2.flow_from_directory(
                data_dir,
                target_size=(img_width,img_height),
                batch_size=batch_size,
                classes= ["Covid", "Atelectasis", "Cardiomegaly", "Consolidation", "Edema", "Enlarged_Cardiomediastinum", "Fracture", 
                "Lung_Lesion", "Lung_Opacity", "No_Finding", "Pleural_Other", "Pneumonia", "Pneumothorax", "Support_Devices"],
                class_mode="categorical", subset="validation", shuffle=True, seed=30)


Found 2642 images belonging to 14 classes.
Found 1122 images belonging to 14 classes.


## Model #1

In [10]:
# GENERATE MODEL 1
classifier= Sequential()

classifier.add(Conv2D(32, (3,3), input_shape=(200,200,3),activation='relu'))

classifier.add(MaxPooling2D(pool_size=(2,2)))

classifier.add(Conv2D(32, (3,3), activation="relu"))
classifier.add(MaxPooling2D(pool_size=(2,2)))
classifier.add(Flatten())

classifier.add(Dense(units=128, activation="relu"))
classifier.add(Dense(units=14, activation="sigmoid"))

In [7]:
# MODEL 1

classifier.compile(
  optimizer='adam',
  loss="categorical_crossentropy",
  metrics=['accuracy'])


total_sample= train_generator.n
batch_size=128


history= classifier.fit_generator(
            train_generator, steps_per_epoch=int(total_sample/batch_size),
            epochs=3,
            validation_data=test_generator, 
            validation_steps=2)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/3
Epoch 2/3
Epoch 3/3


## Model #2

In [11]:
# Model 2
# GENERATE MODEL
classifier= Sequential()

# First convolution layer
classifier.add(Conv2D(32, (3,3), input_shape=(200,200,3),activation='relu'))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Second convolution layer
classifier.add(Conv2D(64,(3,3),activation="relu"))
classifier.add(Dropout(0.1))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Third convolution layer
classifier.add(Conv2D(64, (3,3), activation="relu"))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Fourth convolution layer
classifier.add(Conv2D(128, (3,3), activation="relu"))
classifier.add(Dropout(0.2))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

#Fifth convolution layer
classifier.add(Conv2D(128, (3,3), activation="relu"))
classifier.add(Dropout(0.2))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# classifier.add(Conv2D(256, (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
# classifier.add(Dropout(0.2))
# classifier.add(BatchNormalization())
# classifier.add(MaxPooling2D((2,2) , strides = 2 , padding = 'same'))

# Flatten the results to feed into a dense layer
classifier.add(Flatten())

# 128 neuron in the fully-connected layer
classifier.add(Dense(units = 128 , activation = 'relu'))
classifier.add(Dropout(0.2))

# 14 output neurons for 14 classes with the softmax activation
classifier.add(Dense(units = 14 , activation = 'sigmoid'))

# # Flatten the results to feed into a dense layer
# classifier.add(Flatten())

# # 128 neuron in the fully-connected layer
# classifier.add(Dense(128, activation="relu"))

# # 14 output neurons for 14 classes with the softmax activation
# classifier.add(Dense(14, activation="softmax"))

In [12]:
# Model 2, with augmentation
classifier.compile(
  optimizer='adam',
  loss="categorical_crossentropy",
  metrics=['accuracy'])


total_sample= train_generator.n
batch_size=128


history= classifier.fit_generator(
            train_generator, steps_per_epoch=int(total_sample/batch_size),
            epochs=3,
            validation_data=test_generator, 
            validation_steps=2)

Epoch 1/3
Epoch 2/3
Epoch 3/3


## Model 3
### changed to activation function softmax 

In [21]:
# Model 3
# GENERATE MODEL
classifier= Sequential()

# First convolution layer
classifier.add(Conv2D(32, (3,3), input_shape=(200,200,3),activation='relu'))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# Second convolution layer
classifier.add(Conv2D(32,(3,3),activation="relu"))
classifier.add(Dropout(0.1))
classifier.add(BatchNormalization())
classifier.add(MaxPooling2D(pool_size=(2,2)))

# # Third convolution layer
# classifier.add(Conv2D(64, (3,3), activation="relu"))
# classifier.add(BatchNormalization())
# classifier.add(MaxPooling2D(pool_size=(2,2)))

# classifier.add(Conv2D(256, (3,3) , strides = 1 , padding = 'same' , activation = 'relu'))
# classifier.add(Dropout(0.2))
# classifier.add(BatchNormalization())
# classifier.add(MaxPooling2D((2,2) , strides = 2 , padding = 'same'))

# Flatten the results to feed into a dense layer
classifier.add(Flatten())

# 128 neuron in the fully-connected layer
classifier.add(Dense(128 , activation = 'relu'))
classifier.add(Dropout(0.2))

# 14 output neurons for 14 classes with the softmax activation
classifier.add(Dense(14 , activation = 'softmax'))

In [23]:
# Model 3, with augmentation
classifier.compile(
  optimizer='adam',
  loss="categorical_crossentropy",
  metrics=['accuracy'])

total_valsample=test_generator.n
total_sample= train_generator.n
batch_size=128

history= classifier.fit_generator(
            train_generator, steps_per_epoch=int(total_sample/batch_size),
            epochs=30,
            validation_data=test_generator)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## Transfer Learning Modeling

In [3]:
img_width, img_height=200,200
batch_size=128

data_dir = "Database/"


datagen = ImageDataGenerator(rescale=1./255,shear_range=0.2,validation_split=.3,
                             zoom_range=0.2,horizontal_flip=True,
                                preprocessing_function=preprocess_input)

train_generator= datagen.flow_from_directory(
                data_dir,
                target_size=(64,64),
                batch_size=batch_size,
                subset="training",
                class_mode="categorical", 
                classes= ["COVID19", "NORMAL","Viral_Pneumonia"],
                shuffle=False, seed=30)

datagen2=ImageDataGenerator(rescale=1/255,validation_split=.3,
                           preprocessing_function=preprocess_input)

test_generator=datagen2.flow_from_directory(
                data_dir,
                target_size=(64,64),
                batch_size=batch_size,
                classes= ["COVID19", "NORMAL","Viral_Pneumonia"],
                class_mode="categorical", subset="validation", shuffle=False, seed=30)


Found 2893 images belonging to 3 classes.
Found 1238 images belonging to 3 classes.


In [4]:
# Model 3, transfer learning

datagen= ImageDataGenerator(rescale=1/255,validation_split=.3)

classifier=VGG16(weights="imagenet", include_top=False, input_shape=(64,64,3))

for layer in classifier.layers:
    layer.trainable=False

x=Flatten()(classifier.output)
x=Dense(3, activation="softmax")(x)

model=Model(inputs=classifier.input, outputs=x)
model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=["accuracy"])

In [None]:
total_sample= train_generator.n
batch_size=128

model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=["accuracy"])

history= model.fit_generator(
            train_generator, steps_per_epoch=int(total_sample/batch_size),
            epochs=3,
            validation_data=test_generator)

Epoch 1/3
Epoch 2/3
Epoch 3/3