In [1]:
import imageio
import numpy as np
import PIL
import sklearn
from skimage import color
from skimage import io
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from sklearn.metrics import accuracy_score

from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB, ComplementNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [2]:
# Display image
nparray = np.array(PIL.Image.open('COVID/COVID-1.png'))
print(nparray.shape)

img = io.imread('COVID/COVID-1.png')
print(img.shape)
print(type(img))

(299, 299)
(299, 299)
<class 'numpy.ndarray'>


In [3]:
def load_dataset(path, n_images):
    """
    Args:
        path: path to the dataset
        n_images: number of images to load
    Return:
        dataset: a list of numpy arrays containing the images
    """
    dataset = []
    # Read all images in the folder
    for filename in os.listdir(path)[:n_images]:
        # Read image with numpy
        image_as_np = io.imread(path + filename)
        if len(image_as_np.shape) == 3:
            image_as_np = color.rgb2gray(image_as_np)
        # Add image to the dataset
        dataset.append(image_as_np)

    return dataset
        


In [4]:
N_IMAGES = 1000
covid_images = load_dataset('COVID/', N_IMAGES)
lung_opacity_images = load_dataset('Lung_Opacity/', N_IMAGES)
normal_images = load_dataset('Normal/', N_IMAGES)
viral_images = load_dataset('Viral Pneumonia/', N_IMAGES)

# Resize images to 10x10
RESOLUTION = 100
covid_images = [resize(image, (RESOLUTION, RESOLUTION), anti_aliasing=True) for image in covid_images]
lung_opacity_images = [resize(image, (RESOLUTION, RESOLUTION), anti_aliasing=True) for image in lung_opacity_images]
normal_images = [resize(image, (RESOLUTION, RESOLUTION), anti_aliasing=True) for image in normal_images]
viral_images = [resize(image, (RESOLUTION, RESOLUTION), anti_aliasing=True) for image in viral_images]

print(covid_images[0].shape, lung_opacity_images[0].shape, normal_images[0].shape, viral_images[0].shape)

# Flatten images
covid_images = [image.flatten() for image in covid_images]
lung_opacity_images = [image.flatten() for image in lung_opacity_images]
normal_images = [image.flatten() for image in normal_images]
viral_images = [image.flatten() for image in viral_images]

print(covid_images[0].shape, lung_opacity_images[0].shape, normal_images[0].shape, viral_images[0].shape)

X = np.concatenate((covid_images, lung_opacity_images, normal_images, viral_images))
print(X.shape)


(100, 100) (100, 100) (100, 100) (100, 100)
(10000,) (10000,) (10000,) (10000,)
(4000, 10000)


In [5]:

COVID_LABEL = 1
LUNG_OPACITY_LABEL = 2
NORMAL_LABEL = 3
VIRAL_LABEL = 4

labels = np.array([COVID_LABEL] * len(covid_images) + [LUNG_OPACITY_LABEL] * len(lung_opacity_images) + [NORMAL_LABEL] * len(normal_images) + [VIRAL_LABEL] * len(viral_images))
print(labels)

[1 1 1 ... 4 4 4]


In [6]:
# Test train split
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.3, shuffle=True)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(2800, 10000) (1200, 10000) (2800,) (1200,)


In [7]:
def get_accuracy_for_model(X_train, X_test, y_train, y_test, model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return accuracy_score(y_test, y_pred)

In [8]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, GaussianNB()))

0.6666666666666666


In [9]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, ComplementNB()))

0.5925


In [10]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, BernoulliNB()))

0.3908333333333333


In [11]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, MultinomialNB()))

0.6333333333333333


In [12]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, MultinomialNB()))

0.6333333333333333


In [13]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, RandomForestClassifier()))

0.8158333333333333


In [14]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, SVC()))

0.7991666666666667


In [15]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, MLPClassifier(learning_rate='adaptive', max_iter=300)))

0.7491666666666666
