In [None]:
import tensorflow as tf
import os
import json
import numpy as np
import pandas
import matplotlib.pyplot as plt
import re
import random
from shutil import copyfile

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout

#from tensorflow.keras.callbacks import EarlyStopping
#from tensorflow.keras.callbacks import ModelCheckpoint

from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input

In [None]:
# Get all the paths
data_dir_list = os.listdir('dataset')
print(data_dir_list)

path, dirs, files = next(os.walk("dataset"))
file_count = len(files)
print(path, dirs, files, file_count)

In [None]:
# Make new base directory
#original_dataset_dir = 'dataset'
base_dir = 'data/'
os.mkdir(base_dir)

In [None]:
#create two folders (train and test)
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)

test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

#Under train folder create 3 folders 
# (covid, pneumonia, normal )

train_covid_dir = os.path.join(train_dir, 'covid')
os.mkdir(train_covid_dir)

train_pneumonia_dir = os.path.join(train_dir, 'pneumonia')
os.mkdir(train_pneumonia_dir)

train_normal_dir = os.path.join(train_dir, 'normal')
os.mkdir(train_normal_dir)

#Under test folder create 3 folders 
# (covid, pneumonia, normal )

test_covid_dir = os.path.join(test_dir, 'covid')
os.mkdir(test_covid_dir)

test_pneumonia_dir = os.path.join(test_dir, 'pneumonia')
os.mkdir(test_pneumonia_dir)

test_normal_dir = os.path.join(test_dir, 'normal')
os.mkdir(test_normal_dir)

In [None]:
#split dataset

def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    files = []
    for filename in os.listdir(SOURCE):
        file = SOURCE + filename
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(filename + " is zero length, so ignoring.")

    training_length = int(len(files) * SPLIT_SIZE)
    testing_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[0:training_length]
    test_set = shuffled_set[training_length:]

    for filename in training_set:
        this_file = SOURCE + filename
        destination = TRAINING + filename
        copyfile(this_file, destination)

    for filename in test_set:
        this_file = SOURCE + filename
        destination = TESTING + filename
        copyfile(this_file, destination)

In [None]:
#storing the path of original dataset 
COVID_SOURCE_DIR = 'dataset/COVID/'
TRAINING_COVID_DIR = 'data/train/covid/'
TEST_COVID_DIR = 'data/test/covid/'

PNEU_SOURCE_DIR = 'dataset/PNEUMONIA/'
TRAINING_PNEU_DIR = 'data/train/pneumonia/'
TEST_PNEU_DIR = 'data/test/pneumonia/'

NORMAL_SOURCE_DIR = 'dataset/NORMAL/'
TRAINING_NORMAL_DIR = 'data/train/normal/'
NORMAL_NORMAL_DIR = 'data/test/normal/'

In [None]:
#split dataset calling
split_size = .75

split_data(COVID_SOURCE_DIR, TRAINING_COVID_DIR, TEST_COVID_DIR, split_size)
split_data(PNEU_SOURCE_DIR, TRAINING_PNEU_DIR, TEST_PNEU_DIR, split_size)
split_data(NORMAL_SOURCE_DIR, TRAINING_NORMAL_DIR, NORMAL_NORMAL_DIR, split_size)

In [None]:
#distribution of training dataset
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.image import imread
import pathlib

image_folder = ['covid', 'pneumonia', 'normal']
nimgs = {}
for i in image_folder:
    nimages = len(os.listdir('data/train/'+i+'/'))
    nimgs[i]=nimages
plt.figure(figsize=(9, 6))
plt.bar(range(len(nimgs)), list(nimgs.values()), align='center')
plt.xticks(range(len(nimgs)), list(nimgs.keys()))
plt.title('Distribution of different classes in Training Dataset')
plt.show()

In [None]:
#finding no of training dataset of each class
for i in ['covid', 'pneumonia', 'normal']:
    print('Training {} images are: '.format(i)+str(len(os.listdir('data/train/'+i+'/'))))

In [None]:
#distribution of testing dataset

image_folder = ['covid', 'pneumonia', 'normal']
nimgs = {}
for i in image_folder:
    nimages = len(os.listdir('data/test/'+i+'/'))
    nimgs[i]=nimages
plt.figure(figsize=(9, 6))
plt.bar(range(len(nimgs)), list(nimgs.values()), align='center')
plt.xticks(range(len(nimgs)), list(nimgs.keys()))
plt.title('Distribution of different classes in Testing Dataset')
plt.show()

In [None]:
#finding no of training dataset of each class

for i in ['covid', 'pneumonia', 'normal']:
    print('Valid {} images are: '.format(i)+str(len(os.listdir('data/test/'+i+'/'))))

In [None]:
#initialization
img_width=224; img_height=224
batch_size=64  #defines the number of samples that will be propagated through the network.

In [None]:
#preprocessing
TRAINING_DIR = 'data/train/'

train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   rotation_range=30,
                                   zoom_range=0.4,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                  shear_range=0.2)

train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                    batch_size=batch_size,
                                                    class_mode='categorical',
                                                    target_size=(img_height, img_width))

In [None]:
TEST_DIR = 'data/test/'

test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

test_generator = test_datagen.flow_from_directory(TEST_DIR,
                                                              batch_size=batch_size,
                                                              class_mode='categorical',
                                                              target_size=(img_height, img_width)
                                                             )

ResNet-50 Architecture

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input
from tensorflow.keras.layers import Flatten , Dense, Dropout , MaxPool2D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint


In [None]:
resnet_model = Sequential()

res= ResNet50(include_top=False,
                   input_shape=(224,224,3),
                   pooling=max,classes=3,
                   weights='imagenet')


In [None]:
for layer in res.layers:
        layer.trainable=False

print(len(res.layers))

In [None]:
resnet_model.add(res)
resnet_model.add(Flatten())
#resnet_model.add(Dense(512,activation='relu'))
resnet_model.add(Dense(256,activation='relu'))
resnet_model.add(Dense(3,activation='softmax'))


In [None]:
resnet_model.summary()


In [None]:
resnet_model.compile(optimizer='adam', loss = 'categorical_crossentropy', metrics=['accuracy'])

In [None]:
es = EarlyStopping(monitor= "val_accuracy" , min_delta= 0.01, patience= 3, verbose=1)
mc = ModelCheckpoint(filepath="bestmodel.h5", monitor="val_accuracy", verbose=1, save_best_only= True)

In [None]:
hist = resnet_model.fit(train_generator, steps_per_epoch= 35, epochs= 15, validation_data= test_generator , validation_steps= 32, callbacks=[es,mc])

VGG-16 Architecture

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Flatten , Dense, Dropout , MaxPool2D

In [None]:
vgg = VGG16( input_shape=(299,299,3), include_top= False) # include_top will consider the new weights

In [None]:
for layer in vgg.layers:           # Dont Train the parameters again 
    layer.trainable = False

In [None]:
x = Flatten()(vgg.output)
x = Dense(units=3 , activation='softmax', name = 'predictions' )(x)

model = Model(vgg.input, x)

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam', loss = 'categorical_crossentropy', metrics=['accuracy'])

In [None]:
es = EarlyStopping(monitor= "accuracy" , min_delta= 0.01, patience= 3, verbose=1)
mc = ModelCheckpoint(filepath="bestmodel.h5", monitor="accuracy", verbose=1, save_best_only= True)

In [None]:
hist = model.fit(train_generator, steps_per_epoch= 10, epochs= 10, validation_data= test_generator , validation_steps= 16, callbacks=[es,mc])

Validation

In [None]:
#load best model
model = load_model("bestmodel.h5")

In [None]:
import numpy
from tensorflow.keras.models import Model


VALID_DIR = 'data/test/'

validation_datagen = ImageDataGenerator(preprocessing_function= preprocess_input)

validation_generator = validation_datagen.flow_from_directory(VALID_DIR,
                                                  batch_size=batch_size,
                                                  class_mode='categorical',
                                                  target_size=(img_height, img_width))
test_steps_per_epoch = numpy.math.ceil(validation_generator.samples / validation_generator.batch_size)

predictions = model.predict(validation_generator, steps=test_steps_per_epoch)
# Get most likely class
predicted_classes = numpy.argmax(predictions, axis=1)

In [None]:
#dont change
TEST_DIR = 'data/test/'

test_datagen = ImageDataGenerator(preprocessing_function= preprocess_input)

test_generator = test_datagen.flow_from_directory(TEST_DIR,
                                                  batch_size=batch_size,
                                                  class_mode='categorical',
                                                  target_size=(img_height, img_width))

print("Accuracy : ", str(model.evaluate(test_generator)[1]*100))

#print("Accuracy : ", str(model.evaluate(validation_generator)[1]*100))


In [None]:
true_classes = validation_generator.classes
class_labels = list(validation_generator.class_indices.keys())   

In [None]:
for i in range(len(true_classes)):
    print(true_classes[i], predicted_classes[i])

In [None]:
from sklearn import metrics

print('Classification Report')
report = metrics.classification_report(true_classes, predicted_classes, target_names=class_labels)

print(report)  

In [None]:
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

print('Confusion Matrix')
d = confusion_matrix(true_classes, predicted_classes)
print(d)
plt.show()

In [None]:
import seaborn as sns

cf_matrix = confusion_matrix(true_classes, predicted_classes)
print(cf_matrix)

categories = ['covid', 'normal','pneumonia']
sns.heatmap(cf_matrix, annot=True)

In [None]:
import numpy as np
sns.heatmap(cf_matrix/np.sum(cf_matrix), annot=True, 
            fmt='.2%', cmap='Blues')

In [None]:
import seaborn as sns

cf_matrix = confusion_matrix(true_classes, predicted_classes)
labels = ['covid', 'normal','pneumonia']
#sns.heatmap(cf_matrix, annot=True)

sns.set(color_codes=True)
plt.figure(1, figsize=(9, 6))

plt.title("Confusion Matrix")

sns.set(font_scale=1.4)
ax = sns.heatmap(cf_matrix, annot=True, cmap="YlGnBu", cbar_kws={'label': 'Scale'})

ax.set_xticklabels(labels)
ax.set_yticklabels(labels)

ax.set(ylabel="True Label", xlabel="Predicted Label")

plt.savefig("cf.png", bbox_inches='tight', dpi=300)
#plt.close()
plt.show()

In [None]:
acc=hist.history['accuracy']
val_acc=hist.history['val_accuracy']
loss=hist.history['loss']
val_loss=hist.history['val_loss']
epochs=range(len(acc))

In [None]:
fig = plt.figure(figsize=(14,7))
plt.plot(epochs, acc, 'r', label="Training Accuracy")
plt.plot(epochs, val_acc, 'b', label="Validation Accuracy")
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc='lower right')
plt.show()


In [None]:

fig2 = plt.figure(figsize=(14,7))
plt.plot(epochs, loss, 'r', label="Training Loss")
plt.plot(epochs, val_loss, 'b', label="Validation Loss")
plt.legend(loc='upper right')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and validation loss')

In [None]:
from tensorflow.keras.models import model_from_json
import cv2
import numpy as np
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import load_model

model = load_model('bestmodel.h5')

class_type = {0:'covid', 1:'normal', 2:'pneumonia'}

def predict_(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image,(224,224))
    image = preprocess_input(image)
    image = image.reshape(1,224,224,3)
    preds = model.predict(image)
    preds = np.argmax(preds,axis=1)[0]
    if preds==0:
        print("Predicted Label:Covid")
    elif preds==2:
        print("Predicted Label: Pneumonia")
    else:
        print("Predicted Label: Normal")
    print(preds)

In [None]:
path='Covid.jpg'
predict_(path)

path='Normal.jpeg'
predict_(path)

path='Pneumonia.jpg'
predict_(path)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# set width of bar
barWidth = 0.20
#fig = plt.subplots(figsize =(12, 8))
fig = plt.subplots(figsize =(12, 5))

# set height of bar
Training_Accuracy= [95.85, 93.93, 93.45, 93.54]
Teasting_Accuracy = [93.26, 92.78, 89.42, 90.86]
Validation_Accuracy = [94.14, 93.55, 91.79, 89.94]

# Set position of bar on X axis
br1 = np.arange(len(Training_Accuracy))
br2 = [x + barWidth for x in br1]
br3 = [x + barWidth for x in br2]

# Make the plot
plt.bar(br1, Training_Accuracy, color ='r', width = barWidth,
		edgecolor ='grey', label ='Training')
plt.bar(br2, Teasting_Accuracy, color ='g', width = barWidth,
		edgecolor ='grey', label ='Teasting')
plt.bar(br3, Validation_Accuracy, color ='b', width = barWidth,
		edgecolor ='grey', label ='Validation')

# Adding Xticks
plt.xlabel('Architecture', fontweight ='bold', fontsize = 15)
plt.ylabel('Accuracy', fontweight ='bold', fontsize = 15)
plt.xticks([r + barWidth for r in range(len(Training_Accuracy))],
		['ResNet-50', 'ResNet-101', 'VGG-16', 'VGG-19'])

plt.legend()
plt.show()
