In [1]:
# import the necessary packages
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as mpatches
import pandas as pd
import seaborn as sns
import ast
import pywt

In [2]:

covid_images_length = len(os.listdir('./COVID-19_Radiography_Dataset/COVID/sharpen_images/'))
normal_images_length = len(os.listdir('./COVID-19_Radiography_Dataset/Normal/sharpen_images/'))
viral_pneumonia_images_length = len(os.listdir('./COVID-19_Radiography_Dataset/Viral Pneumonia/sharpen_images/'))

#! read all images of normal
normal_images = []
for i in range(1, normal_images_length+1):
    normal_images.append(cv2.imread(
        './COVID-19_Radiography_Dataset/Normal/sharpen_images/Normal-'+str(i)+'.png'))

#! read all images of viral pneumonia
viral_pneumonia_images = []
for i in range(1, viral_pneumonia_images_length+1):
    viral_pneumonia_images.append(cv2.imread(
        './COVID-19_Radiography_Dataset/Viral Pneumonia/sharpen_images/Viral Pneumonia-'+str(i)+'.png'))

#! read all images of covid
covid_images = []
for i in range(1, covid_images_length+1):
    covid_images.append(cv2.imread(
        './COVID-19_Radiography_Dataset/COVID/sharpen_images/COVID-'+str(i)+'.png'))

In [3]:
print('No. of Normal images ', len(normal_images))
print('No. of Viral Pneumonia images ', len(viral_pneumonia_images))
print('No. of COVID images ', len(covid_images))

No. of Normal images  10192
No. of Viral Pneumonia images  1345
No. of COVID images  3616


In [4]:
# divide the data set into train and test in the ratio of 80:20
train_normal_images = normal_images[:int(0.8*len(normal_images))]
train_viral_pneumonia_images = viral_pneumonia_images[:int(0.8*len(viral_pneumonia_images))]
train_covid_images = covid_images[:int(0.8*len(covid_images))]
test_normal_images = normal_images[int(0.8*len(normal_images)):]
test_viral_pneumonia_images = viral_pneumonia_images[int(0.8*len(viral_pneumonia_images)):]
test_covid_images = covid_images[int(0.8*len(covid_images)):]

print('Normal images        => \t Total:', normal_images_length ,'\tTrain: ', len(train_normal_images),"\tTest: ", len(test_normal_images))
print('Pneumonia images     => \t Total:', viral_pneumonia_images_length ,'\tTrain: ', len(train_viral_pneumonia_images),"\tTest: ", len(test_viral_pneumonia_images))
print('COVID images         => \t Total:', covid_images_length ,'\tTrain: ', len(train_covid_images),"\tTest: ", len(test_covid_images))



Normal images        => 	 Total: 10192 	Train:  8153 	Test:  2039
Pneumonia images     => 	 Total: 1345 	Train:  1076 	Test:  269
COVID images         => 	 Total: 3616 	Train:  2892 	Test:  724


In [5]:
# save the train and test images in the respective folders
for i in range(len(test_normal_images)):
    cv2.imwrite('./COVID-19_Radiography_Dataset/Normal/normal_test_images/Normal-' +
                str(i+1)+'.png', test_normal_images[i])
for i in range(len(test_viral_pneumonia_images)):
    cv2.imwrite('./COVID-19_Radiography_Dataset/Viral Pneumonia/pneumonia_test_images/Viral Pneumonia-' +
                str(i+1)+'.png', test_viral_pneumonia_images[i])
for i in range(len(test_covid_images)):
    cv2.imwrite('./COVID-19_Radiography_Dataset/COVID/covid_test_images/COVID-' +
                str(i+1)+'.png', test_covid_images[i])


In [7]:
for i in range(len(train_normal_images)):
    cv2.imwrite('./COVID-19_Radiography_Dataset/Normal/normal_train_images/Normal-' +
                str(i+1)+'.png', train_normal_images[i])
for i in range(len(train_viral_pneumonia_images)):
    cv2.imwrite('./COVID-19_Radiography_Dataset/Viral Pneumonia/pneumonia_train_images/Viral Pneumonia-' +
                str(i+1)+'.png', train_viral_pneumonia_images[i])
for i in range(len(train_covid_images)):
    cv2.imwrite('./COVID-19_Radiography_Dataset/COVID/covid_train_images/COVID-' +
                str(i+1)+'.png', train_covid_images[i])


In [8]:

#! function to return a vector of frequency of all intensities from 0 to 255

def get_img_vector(img):
    img_vector = np.zeros(256)
    for i in range(0, img.shape[0]):
        for j in range(0, img.shape[1]):
            img_vector[img[i][j]] += 1
    return img_vector


#! function to calculate euclidean distance between two vectors
def euclidean_distance(vector1, vector2):
    distance = 0.0
    for i in range(len(vector1)):
        distance += (vector1[i] - vector2[i])**2
    return np.sqrt(distance)


#! function to calculate manhattan distance between two vectors
def manhattan_distance(vector1, vector2):
    return np.sum(np.abs(vector1 - vector2))

In [9]:
# read the train images of covid
covid_train_images = []
for i in range(1, len(os.listdir('./COVID-19_Radiography_Dataset/COVID/covid_train_images/'))+1):
    covid_train_images.append(cv2.imread(
        './COVID-19_Radiography_Dataset/COVID/covid_train_images/COVID-'+str(i)+'.png'))

# read the train images of pneumonia
pneumonia_train_images = []
for i in range(1, len(os.listdir('./COVID-19_Radiography_Dataset/Viral Pneumonia/pneumonia_train_images/'))+1):
    pneumonia_train_images.append(cv2.imread(
        './COVID-19_Radiography_Dataset/Viral Pneumonia/pneumonia_train_images/Viral Pneumonia-'+str(i)+'.png'))

# read the train images of normal
normal_train_images = []
for i in range(1, len(os.listdir('./COVID-19_Radiography_Dataset/Normal/normal_train_images/'))+1):
    normal_train_images.append(cv2.imread(
        './COVID-19_Radiography_Dataset/Normal/normal_train_images/Normal-'+str(i)+'.png'))

# read the test images of covid
covid_test_images = []
for i in range(1, len(os.listdir('./COVID-19_Radiography_Dataset/COVID/covid_test_images/'))+1):
    covid_test_images.append(cv2.imread(
        './COVID-19_Radiography_Dataset/COVID/covid_test_images/COVID-'+str(i)+'.png'))

# read the test images of pneumonia
pneumonia_test_images = []
for i in range(1, len(os.listdir('./COVID-19_Radiography_Dataset/Viral Pneumonia/pneumonia_test_images/'))+1):
    pneumonia_test_images.append(cv2.imread(
        './COVID-19_Radiography_Dataset/Viral Pneumonia/pneumonia_test_images/Viral Pneumonia-'+str(i)+'.png'))

# read the test images of normal
normal_test_images = []
for i in range(1, len(os.listdir('./COVID-19_Radiography_Dataset/Normal/normal_test_images/'))+1):
    normal_test_images.append(cv2.imread(
        './COVID-19_Radiography_Dataset/Normal/normal_test_images/Normal-'+str(i)+'.png'))

# print the length of the train and test images
print('Train images => \t COVID:', len(covid_train_images), '\tPneumonia:', len( pneumonia_train_images), '\tNormal:', len(normal_train_images))
print('Test images => \t COVID:', len(covid_test_images), '\tPneumonia:', len( pneumonia_test_images), '\tNormal:', len(normal_test_images))

Train images => 	 COVID: 2892 	Pneumonia: 1076 	Normal: 8153
Test images => 	 COVID: 724 	Pneumonia: 269 	Normal: 2039


In [11]:
def formatString(string):
    string = string.replace("array([", "[")
    string = string.replace("]),array([", "],[")
    string = string.replace("])", "]")
    return string


def convertToArray(string):
    listArr = ast.literal_eval(string)
    return listArr


def tupleToList(listArr):
    array = []
    for i in range(len(listArr)):
        array.append(listArr[i])
    return array

def loadFile(path):
    with open(path, 'r') as f:
        vector = f.read()
        vector = formatString(vector)
        vector = convertToArray(vector)
        vector = tupleToList(vector)
        f.close()
    return vector

In [14]:
# load image vectors from file normal_images_vector.txt
normal_images_vector = loadFile('./COVID-19_Radiography_Dataset/Normal/normal_images_vector.txt')
# load image vectors from file pneumonia_images_vector.txt
pneumonia_images_vector = loadFile('./COVID-19_Radiography_Dataset/Viral Pneumonia/viral_pneumonia_images_vector.txt')
# load image vectors from file covid_images_vector.txt
covid_images_vector = loadFile('./COVID-19_Radiography_Dataset/COVID/covid_images_vector.txt')

In [15]:
# divide the image vectors into train and test vectors
normal_train_images_vector = normal_images_vector[:len(normal_train_images)]
normal_test_images_vector = normal_images_vector[len(normal_train_images):]
pneumonia_train_images_vector = pneumonia_images_vector[:len(pneumonia_train_images)]
pneumonia_test_images_vector = pneumonia_images_vector[len(pneumonia_train_images):]
covid_train_images_vector = covid_images_vector[:len(covid_train_images)]
covid_test_images_vector = covid_images_vector[len(covid_train_images):]

# print the length of the train and test image vectors
print('Train image vectors => \t COVID:', len(covid_train_images_vector), '\tPneumonia:', len( pneumonia_train_images_vector), '\tNormal:', len(normal_train_images_vector))
print('Test image vectors => \t COVID:', len(covid_test_images_vector), '\tPneumonia:', len( pneumonia_test_images_vector), '\tNormal:', len(normal_test_images_vector))



Train image vectors => 	 COVID: 2892 	Pneumonia: 1076 	Normal: 8153
Test image vectors => 	 COVID: 724 	Pneumonia: 269 	Normal: 2039


In [16]:
# save the vectors to txt file

with open('./COVID-19_Radiography_Dataset/COVID/features/covid_test_vectors.txt', 'w') as f:
    f.write(str(covid_test_images_vector))

with open('./COVID-19_Radiography_Dataset/COVID/features/covid_train_vectors.txt', 'w') as f:
    f.write(str(covid_train_images_vector))

with open('./COVID-19_Radiography_Dataset/Viral Pneumonia/features/pneumonia_test_vectors.txt', 'w') as f:
    f.write(str(pneumonia_test_images_vector))

with open('./COVID-19_Radiography_Dataset/Viral Pneumonia/features/pneumonia_train_vectors.txt', 'w') as f:
    f.write(str(pneumonia_train_images_vector))

with open('./COVID-19_Radiography_Dataset/Normal/features/normal_test_vectors.txt', 'w') as f:
    f.write(str(normal_test_images_vector))

with open('./COVID-19_Radiography_Dataset/Normal/features/normal_train_vectors.txt', 'w') as f:
    f.write(str(normal_train_images_vector))

# load the vectors from the txt file

RUN BELOW CODE

In [17]:
covid_test_vectors = loadFile('./COVID-19_Radiography_Dataset/COVID/features/covid_test_vectors.txt')
covid_train_vectors = loadFile('./COVID-19_Radiography_Dataset/COVID/features/covid_train_vectors.txt')
pneumonia_test_vectors = loadFile('./COVID-19_Radiography_Dataset/Viral Pneumonia/features/pneumonia_test_vectors.txt')
pneumonia_train_vectors = loadFile('./COVID-19_Radiography_Dataset/Viral Pneumonia/features/pneumonia_train_vectors.txt')
normal_test_vectors = loadFile('./COVID-19_Radiography_Dataset/Normal/features/normal_test_vectors.txt')
normal_train_vectors = loadFile('./COVID-19_Radiography_Dataset/Normal/features/normal_train_vectors.txt')

# print the length of the train and test image vectors
print('Train image vectors => \t COVID:', len(covid_train_vectors), '\tPneumonia:', len( pneumonia_train_vectors), '\tNormal:', len(normal_train_vectors))
print('Test image vectors => \t COVID:', len(covid_test_vectors), '\tPneumonia:', len( pneumonia_test_vectors), '\tNormal:', len(normal_test_vectors))



Train image vectors => 	 COVID: 2892 	Pneumonia: 1076 	Normal: 8153
Test image vectors => 	 COVID: 724 	Pneumonia: 269 	Normal: 2039


In [18]:
r = 10000

covid_train_clusters = []  # list of clusters

# cluster to store vectors that lie within a threshold distance(r) from a vector
cluster = []
check = []
for i in range(0, len(covid_train_vectors)-1):
    cluster = []
    if covid_train_vectors[i] not in check:
        cluster.append(covid_train_vectors[i])
        check.append(covid_train_vectors[i])
    else:
        continue
    for j in range(i+1, len(covid_train_vectors)):

        if euclidean_distance(covid_train_vectors[i], covid_train_vectors[j]) <= r:
            if covid_train_vectors[j] not in check:
                cluster.append(covid_train_vectors[j])
                check.append(covid_train_vectors[j])

    covid_train_clusters.append(cluster)


In [19]:
# print the clusters
sum=0
for i in range(0, len(covid_train_clusters)):
    print("Cluster ", i+1, " : ", len(covid_train_clusters[i]))
    sum+=len(covid_train_clusters[i])
print("Total number of vectors: ", sum)

Cluster  1  :  2563
Cluster  2  :  151
Cluster  3  :  2
Cluster  4  :  38
Cluster  5  :  24
Cluster  6  :  6
Cluster  7  :  22
Cluster  8  :  28
Cluster  9  :  1
Cluster  10  :  4
Cluster  11  :  3
Cluster  12  :  3
Cluster  13  :  1
Cluster  14  :  5
Cluster  15  :  1
Cluster  16  :  1
Cluster  17  :  5
Cluster  18  :  1
Cluster  19  :  1
Cluster  20  :  1
Cluster  21  :  1
Cluster  22  :  1
Total number of vectors:  2863
