# Covid 19 Identification with Chest X Ray


# Dataset

### Total Covid19 Positive images 4100
### Total Covid19 Negative images ( Pneumonia + Normal ) 5362







## Credits and Links to open source dataset

1. [Covid Chest X Ray Dataset](https://github.com/ieee8023/covid-chestxray-dataset)
2. [Pneumonia dataset by Praveen](https://www.kaggle.com/praveengovi/coronahack-chest-xraydataset)
3. [COVID19 chest XRAY analysis by SAIMANASA_C](https://www.kaggle.com/code/saimanasachadalavada/covid19-chest-xray-analysis/data)
4. [COVID19 with Pneumonia and Normal Chest Xray(PA) Dataset by AMANULLAH ASRAF](https://www.kaggle.com/datasets/amanullahasraf/covid19-pneumonia-normal-chest-xray-pa-dataset)
5. [HASH_Directors - Covid19 by AMRUTH AMBRISH K](https://www.kaggle.com/code/amruthambrish/hash-directors-covid19/data)
6. [RICORD COVID-19 X-ray positive tests by RADDAR](https://www.kaggle.com/datasets/raddar/ricord-covid19-xray-positive-tests)



Created By Lalith Kahatapitiya at [PGIS](http://www.pgis.pdn.ac.lk/) - University of Peradeniya


In [45]:
!conda info --envs

# conda environments:
#
base                     /Users/lalithk90/opt/miniconda3
tensorflow            *  /Users/lalithk90/opt/miniconda3/envs/tensorflow
yto                      /Users/lalithk90/opt/miniconda3/envs/yto



In [46]:
!pip list

Package                       Version
----------------------------- -----------
absl-py                       1.2.0
appnope                       0.1.3
argon2-cffi                   21.3.0
argon2-cffi-bindings          21.2.0
asttokens                     2.0.8
astunparse                    1.6.3
attrs                         22.1.0
autopep8                      1.7.0
backcall                      0.2.0
backports.functools-lru-cache 1.6.4
bayesian-optimization         1.2.0
beautifulsoup4                4.11.1
bleach                        5.0.1
boto3                         1.24.88
botocore                      1.27.88
brotlipy                      0.7.0
cached-property               1.5.2
cachetools                    5.2.0
certifi                       2022.9.24
cffi                          1.15.1
charset-normalizer            2.1.1
click                         8.1.3
cloudpickle                   2.2.0
colorama                      0.4.5
contourpy        

   # Required phython packages and libraries 

In [47]:
import sys
import platform
import os
import datetime
import pytz
timezone = pytz.timezone("Asia/Colombo")
import random

import tensorflow.keras
import tensorflow as tf
import pandas as pd
import sklearn as sk
import numpy as np

import cv2

print(f"Python Platform: {platform.platform()}")
print(f"Python {sys.version}")
print()
print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tensorflow.keras.__version__}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
print(f"Numpy {np.__version__}")
print(f"Open CV {cv2.__version__}")
print()
gpu = len(tf.config.list_physical_devices('GPU')) > 0
print("GPU is", "available" if gpu else "NOT AVAILABLE")

Python Platform: macOS-12.6-arm64-arm-64bit
Python 3.9.13 | packaged by conda-forge | (main, May 27 2022, 17:00:33) 
[Clang 13.0.1 ]

Tensor Flow Version: 2.10.0
Keras Version: 2.10.0
Pandas 1.5.0
Scikit-Learn 1.1.2
Numpy 1.23.2
Open CV 4.6.0

GPU is available


# Data Set Location Verified and Find Image's Folder

In [48]:
# image data set_path mapping check folder names
dataset_path = 'TRAINING'
categories = os.listdir(dataset_path)
print(categories)

['.DS_Store', 'NORMAL', 'COVID']


# Images count in each folder

In [65]:
categories_all = os.listdir(dataset_path)
categories = []
for category in categories_all:
    if (category != ".DS_Store"):
        categories.append(category)

img_path_non_covid_old = []
img_path_covid_old = []


def images_name_array(need_store_array, folder_path):
    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        need_store_array.append(img_path)


normal_images_names_folder_path = os.path.join(dataset_path, "NORMAL")
images_name_array(img_path_non_covid_old, normal_images_names_folder_path)

covid_images_names_folder_path = os.path.join(dataset_path, "COVID")
images_name_array(img_path_covid_old, covid_images_names_folder_path)


# images array shuffles
def image_path_shuffle(img_path_array):
    img_path_array = random.sample(img_path_array, len(img_path_array))
    return img_path_array


img_path_covid_old = image_path_shuffle(img_path_covid_old)
img_path_non_covid_old = image_path_shuffle(img_path_non_covid_old)

#  images array length
divider = 99.99
middle_img_path_covid = int(len(img_path_covid_old)/divider)
middle_img_path_non_covid = int(len(img_path_non_covid_old)/divider)

img_path_covid = img_path_covid_old[:middle_img_path_covid]
img_path_non_covid = img_path_non_covid_old[:middle_img_path_non_covid]

total_images = len(img_path_non_covid) + len(img_path_covid)

print(f"Covid images count     : {len(img_path_covid)}")
print(f"Non covid images count : {len(img_path_non_covid)}")
print(f"All images count       : {total_images}")
print(
    f"Covid images           : {round((len(img_path_covid)/total_images)*100, 2)} %")
print(
    f"Non covid images       : {round((len(img_path_non_covid)/total_images)*100, 2)} %")

Covid images count     : 40
Non covid images count : 53
All images count       : 93
Covid images           : 43.01 %
Non covid images       : 56.99 %


# Data Labels creation

In [50]:
labels = [i for i in range(len(categories))]

label_dict = dict(zip(categories, labels))  # empty dictionary

print(label_dict)
print(categories)
print(labels)

{'NORMAL': 0, 'COVID': 1}
['NORMAL', 'COVID']
[0, 1]


# Images reading and creating data array

In [80]:
img_size = 224
data = []
target = []


def image_data_reading(img_path):
    img = cv2.imread(img_path)
    try:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # Coverting the image into gray scale
        resized = cv2.resize(gray, (img_size, img_size))
        # resizing the gray scale into 255x255, since we need a fixed common size for all the images in the dataset

    except Exception as e:
        print('Exception:', e)
        # if any exception rasied, the exception will be printed here. And pass to the next image
    return resized


for image_path in img_path_covid:
    data.append(image_data_reading(image_path))
    target.append(label_dict["COVID"])

for image_path in img_path_non_covid:
    data.append(image_data_reading(image_path))
    target.append(label_dict["COVID"])


print(target)
print(data)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[array([[  4,   4,   4, ...,   4,   4,   4],
       [  3,   3,   3, ...,   4,   4,   4],
       [  3,   3,   3, ...,   4,   4,   4],
       ...,
       [ 90,  87,  91, ...,  37,  42,  48],
       [ 98,  88,  97, ...,  41,  45,  55],
       [100,  89,  99, ...,  38,  52,  59]], dtype=uint8), array([[0, 0, 1, ..., 0, 0, 2],
       [0, 0, 0, ..., 0, 0, 2],
       [0, 0, 0, ..., 0, 0, 2],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8), array([[245, 246, 245, ...,  54, 103, 131],
       [243, 244, 245, ...,  45,  95, 121],
       [242, 242, 242, ...,  47,  87, 115],
       ...,
       [219, 215, 215, ..., 122, 125, 136],
       [211, 215, 210, ..., 117, 123, 134],
       [254, 255, 252, ..., 252, 255, 254]], dtype=uint8), array([[ 18,  13,  13, ...,  34,  25,  31],
       [ 13,  12,  10, ...,  19, 

# Read images data array reshape and save as np array

In [75]:
data=np.array(data)/255.0
print(data[0])
print("-------------")
data=np.reshape(data,(data.shape[0],img_size,img_size,1))
print(data[0])
target=np.array(target)
print("-------------")
print(target[0])

from keras.utils import np_utils

new_target=np_utils.to_categorical(target)
print(type(new_target))
# standard binary file format in NumPy for persisting a single arbitrary NumPy array on disk.
np.save('data',data)
np.save('target',new_target)

print(f"Data shape : {data.shape} \nTarget shape : {target.shape} \nNew Target shape : {new_target.shape}")



[[[2.23737380e-19]
  [2.23737380e-19]
  [2.23737380e-19]
  ...
  [2.23737380e-19]
  [2.23737380e-19]
  [2.23737380e-19]]

 [[1.67803035e-19]
  [1.67803035e-19]
  [1.67803035e-19]
  ...
  [2.23737380e-19]
  [2.23737380e-19]
  [2.23737380e-19]]

 [[1.67803035e-19]
  [1.67803035e-19]
  [1.67803035e-19]
  ...
  [2.23737380e-19]
  [2.23737380e-19]
  [2.23737380e-19]]

 ...

 [[5.20189408e-18]
  [4.75441932e-18]
  [4.97815670e-18]
  ...
  [2.51704552e-18]
  [2.79671725e-18]
  [2.85265159e-18]]

 [[5.36969712e-18]
  [4.75441932e-18]
  [5.36969712e-18]
  ...
  [2.46111118e-18]
  [2.85265159e-18]
  [2.90858594e-18]]

 [[5.36969712e-18]
  [5.48156581e-18]
  [5.20189408e-18]
  ...
  [2.62891421e-18]
  [2.57297987e-18]
  [3.07638897e-18]]]
-------------
[[[2.23737380e-19]
  [2.23737380e-19]
  [2.23737380e-19]
  ...
  [2.23737380e-19]
  [2.23737380e-19]
  [2.23737380e-19]]

 [[1.67803035e-19]
  [1.67803035e-19]
  [1.67803035e-19]
  ...
  [2.23737380e-19]
  [2.23737380e-19]
  [2.23737380e-19]]

 [[1

# Data description and plot

In [None]:
print(type(data))

# Create model and get summary 

In [None]:

from keras.models import Sequential,Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D,Activation,MaxPooling2D
from keras.utils import normalize
from keras.layers import Concatenate
from keras import Input
from keras.callbacks import ModelCheckpoint

data=np.load('data.npy')
target=np.load('target.npy')

print(data.shape)
print(target.shape)


input_shape=data.shape[1:] #50,50,1
inp=Input(shape=input_shape)
convs=[]

parrallel_kernels=[3,5,7]

for k in range(len(parrallel_kernels)):
    if(k !=0):
        conv = Conv2D(128, kernel_size = k,padding = 'same' ,activation='relu')(inp)

        convs.append(conv)

out = Concatenate()(convs)
conv_model = Model(inp, out)

model = Sequential()
model.add(conv_model)

model.add(Conv2D(64,(3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(32,(3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2,input_dim=128,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

model.summary()

# Plot model architecture 

In [None]:
# plot model as image
tf.keras.utils.plot_model(
    model,
    to_file='model.png',
    show_shapes=True,
    show_dtype=True,
    show_layer_names=True,
    rankdir='TB',
    expand_nested=True,
    dpi=96,
    layer_range=None,
    show_layer_activations=True
)



# Date set divided to training data set and validation data set 

In [None]:
from sklearn.model_selection import train_test_split

train_data,test_data,train_target,test_target=train_test_split(data,target,test_size=0.4)


print(type (train_data))
# need to print training set length and validation data length

# Model training and validation

In [None]:
print(f"Start checkpoint creation:{datetime.datetime.now()}")
checkpoint = ModelCheckpoint('model-{epoch:03d}.model',
                             monitor='val_accuracy',
                             verbose=1,
                             save_best_only=True,
                             mode='max')


history=model.fit(train_data,
                  train_target,
                  epochs=20,
                  callbacks=[checkpoint],
                  validation_split=0.1)
print(f"End checkpoint creation:{datetime.datetime.now()}")

# Graph Floting area


## Plot values in to graph

In [None]:
# floting values in to graph
from matplotlib import pyplot as plt
import seaborn as sns
sns.set()
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(0,len(acc))
fig = plt.gcf()
fig.set_size_inches(16, 8)

plt.plot(epochs, acc, 'r', label='Training accuracy',marker = "o")
plt.plot(epochs, val_acc, 'b', label='Validation accuracy',marker = "o")
plt.title('Training and validation accuracy')
plt.xticks(np.arange(0, len(acc), 10))
plt.legend(loc=0)
plt.figure()

fig = plt.gcf()
fig.set_size_inches(16, 8)
plt.plot(epochs, loss, 'r', label='Training Loss',marker = "o")
plt.plot(epochs, val_loss, 'b', label='Validation Loss',marker = "o")
plt.title('Training and validation Loss')
plt.xticks(np.arange(0, len(acc), 10))
plt.legend(loc=0)
#plt.savefig('Multiclass Model .png')
plt.figure()
plt.show()

In [None]:
print(model.evaluate(test_data,test_target))

In [None]:
model.save("covid.h5")

## Model Validation
### Confusion matrix , Precision, Recall, F1 Score, ROC Curve  

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
accuracy_score(test_data,test_target)


In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(actual,predict)

In [None]:
from sklearn.metrics import classification_report
import pandas as pd
report = pd.DataFrame(classification_report(actual,predicted,output_dict =True))

In [None]:
import seaborn as sns
sns.pairplot(data,hue="") #hue = required label for draf

In [None]:
sns.countplot(x="",data=report) #counplot x = x axis values