# Covid-19 Detection From X-Ray

### Problem Description

In this project I have tried to predict whether the person have covid19 or not using CNN on x-ray images of patients lungs.
Advantages of x-ray detetction over blood test : 
- Blood tests are costly.
- Blood tests take time to conduct ~5 hours per patient.
- Extend of spread can not be detected using blood test.

### Code

In [5]:
# Importing all required libraries.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.layers import *
from keras.models import *
from keras.preprocessing import image
import os
import shutil
import glob

In [6]:
# Paths for different directory in which training and testing datasets are prepared.
FILE_PATH = "DatasetsRaw/Covid19.csv" # Csv file in which label of each image is present.
IMAGES_PATH = "DatasetsRaw/Covid19" # Path for x-ray images having covid19.
TARGET_PATH = "DatasetProcessed/Covid" # Path for x-ray images having covid19 after first processing.
IMAGES_NORMAL_PATH = "DatasetsRaw/Normal" # Path for x-ray images not having covid19.
TARGET_NORMAL_PATH = "DatasetProcessed/Normal" # Path for x-ray images not having covid19 after first processing.
COVID_TRAIN_PATH = "Datasets/Train/Covid" # Path for training datasets having covid.
NORMAL_TRAIN_PATH = "Datasets/Train/Normal" # Path for training datasets not having covid.
COVID_TEST_PATH = "Datasets/Test/Covid" # Path for testing datasets having covid.
NORMAL_TEST_PATH = "Datasets/Test/Normal" # Path for testing datasets not having covid.

In [7]:
# Reading dataset in csv form using pandas.
df = pd.read_csv(FILE_PATH)
df.head()

Unnamed: 0,patientid,offset,sex,age,finding,RT_PCR_positive,survival,intubated,intubation_present,went_icu,...,date,location,folder,filename,doi,url,license,clinical_notes,other_notes,Unnamed: 29
0,2,0.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,N,...,"January 22, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-a-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
1,2,3.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,N,...,"January 25, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-b-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
2,2,5.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,N,...,"January 27, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-c-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
3,2,6.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,N,...,"January 28, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-d-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
4,4,0.0,F,52.0,Pneumonia/Viral/COVID-19,Y,,N,N,N,...,"January 25, 2020","Changhua Christian Hospital, Changhua City, Ta...",images,nejmc2001573_f1a.jpeg,10.1056/NEJMc2001573,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,diffuse infiltrates in the bilateral lower lungs,,


In [8]:
# Data cleaning for x-ray images having covid19.
if not os.path.exists(TARGET_PATH):
    os.mkdir(TARGET_PATH)

for (i,row) in df.iterrows():
    if row["finding"]=="Pneumonia/Viral/COVID-19" and row["view"]=="PA": # Filtering out x-ray images having covid19 and having frontal view (PA is medical term for frontal view).  
        filename = row["filename"]
        image_path = os.path.join(IMAGES_PATH,filename)
        image_copy_path = os.path.join(TARGET_PATH,filename)
        shutil.copy2(image_path,image_copy_path)

In [9]:
# Data cleaning for x-ray images not having covid19.
if not os.path.exists(TARGET_NORMAL_PATH):
    os.mkdir(TARGET_NORMAL_PATH)

image_names = os.listdir(IMAGES_NORMAL_PATH) # This will create a list of all image file present in IMAGES_NORMAL_PATH.
np.random.seed(1)
np.random.shuffle(image_names,) # Shuffling images.
x = len(os.listdir(TARGET_PATH))

for i in range(x):
    image_name = image_names[i]
    image_path = os.path.join(IMAGES_NORMAL_PATH,image_name)
    target_path = os.path.join(TARGET_NORMAL_PATH,image_name)
    shutil.copy2(image_path,target_path)

In [10]:
# Making training and testing datasets.
# Making directories.
if os.path.exists("Datasets"):
    shutil. rmtree("Datasets")
os.makedirs(COVID_TRAIN_PATH)
os.makedirs(NORMAL_TRAIN_PATH)
os.makedirs(COVID_TEST_PATH)
os.makedirs(NORMAL_TEST_PATH)

# Making training and testing dataset for x-rays having covid19.
for filename in os.listdir(TARGET_PATH):
    if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg"): 
        fileparts = filename.split('.')
        if np.random.uniform(0, 1) <= 0.8:
            shutil.copy2(os.path.join(TARGET_PATH, filename), os.path.join(COVID_TRAIN_PATH,filename))
        else:
            shutil.copy2(os.path.join(TARGET_PATH, filename), os.path.join(COVID_TEST_PATH,filename))

# Making training and testing dataset for x-rays not having covid19.
for filename in os.listdir(TARGET_NORMAL_PATH):
    if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg"): 
        fileparts = filename.split('.')
        if np.random.uniform(0, 1) <= 0.8:
            shutil.copy2(os.path.join(TARGET_NORMAL_PATH, filename), os.path.join(NORMAL_TRAIN_PATH,filename))
        else:
            shutil.copy2(os.path.join(TARGET_NORMAL_PATH, filename), os.path.join(NORMAL_TEST_PATH,filename))

In [11]:
# Training CNN based model using Keras.
model = Sequential()
# Layer-1
model.add(Conv2D(32,kernel_size=(3,3),activation='relu',input_shape=(224,224,3)))
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
# Layer-2
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
# Layer-3
model.add(Conv2D(128,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
# Layer-4
model.add(Flatten())
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1,activation='sigmoid'))
# Compiling model.
model.compile(loss=keras.losses.binary_crossentropy,optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 conv2d_1 (Conv2D)           (None, 220, 220, 64)      18496     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 110, 110, 64)     0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 110, 110, 64)      0         
                                                                 
 conv2d_2 (Conv2D)           (None, 108, 108, 64)      36928     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 54, 54, 64)       0         
 2D)                                                    

In [12]:
# Augmentation
train_datagen = image.ImageDataGenerator(
    rescale = 1.0/255,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True
)
test_datagen = image.ImageDataGenerator(
    rescale = 1.0/255
)

In [13]:
# Train generator.
train_generator = train_datagen.flow_from_directory(
    'Datasets/Train',
    target_size=(224,224),
    batch_size=32,
    class_mode='binary'
)
train_generator.class_indices

Found 311 images belonging to 2 classes.


{'Covid': 0, 'Normal': 1}

In [14]:
# Testing generator.
validation_generator = test_datagen.flow_from_directory(
    'Datasets/Test',
    target_size=(224,224),
    batch_size=32,
    class_mode='binary'
)
validation_generator.class_indices

Found 80 images belonging to 2 classes.


{'Covid': 0, 'Normal': 1}

In [15]:
# Fitiing (or training) model.
hist = model.fit_generator(
    train_generator,
    steps_per_epoch=8,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=2
)

  hist = model.fit_generator(


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
# Saving model
model.save("model.h5")

In [17]:
# Finding accuracy on training dataset.
model.evaluate_generator(train_generator)

  model.evaluate_generator(train_generator)


[0.10925129801034927, 0.9774919748306274]

In [18]:
# Finding accuracy on validation(testing) dataset.
model.evaluate_generator(validation_generator)

  model.evaluate_generator(validation_generator)


[0.07846243679523468, 0.9624999761581421]