In [None]:
import imageio
import numpy as np
import PIL
import sklearn
from skimage import color
from skimage import io
from skimage.util import img_as_int
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from sklearn.metrics import accuracy_score

from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB, ComplementNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

from google.colab import drive
# drive.mount('')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
os.environ['KAGGLE_USERNAME'] = "kevinssssssssss"
# username from the json file
os.environ['KAGGLE_KEY'] = "bf8e54c32881fe35ab6798a9c0737c1f"


In [None]:
!kaggle datasets download -d tawsifurrahman/covid19-radiography-database

covid19-radiography-database.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
!unzip -q /content/covid19-radiography-database.zip


replace COVID-19_Radiography_Dataset/COVID.metadata.xlsx? [y]es, [n]o, [A]ll, [N]one, [r]ename: A


# Processing

In [None]:
from skimage.util import img_as_int
def load_dataset(path, n_images, resolution):
    """
    Args:
        path: path to the dataset
        n_images: number of images to load
    Return:
        dataset: a list of numpy arrays containing the images
    """
    dataset = []
    # Read all images in the folder
    for filename in os.listdir(path+'images/')[:n_images]:
        # Read image with numpy
        image_as_np = io.imread(path + 'images/' + filename, as_gray=True)
        # print(image_as_np.dtype)
        mask_as_np = io.imread(path + 'masks/' + filename, as_gray=True)
        # print(mask_as_np.dtype)
        # Scale image to 255 to convert it to dtype unit8 from float64
        mask_as_np = 255 * mask_as_np
        mask_as_np = mask_as_np.astype(np.uint8) 
  
        mask_as_np = resize(mask_as_np, (resolution, resolution), anti_aliasing=True)
        image_as_np = resize(image_as_np, (resolution, resolution), anti_aliasing=True)
  
        im_mask_as_np = (image_as_np.astype(np.uint8) & mask_as_np.astype(np.uint8))
        # Add image to the dataset
        dataset.append(im_mask_as_np)

    return dataset
        


In [None]:
root_folder = '/content/COVID-19_Radiography_Dataset/'

N_IMAGES = -1
# Resize images to 10x10
RESOLUTION = 100
covid_images = load_dataset(root_folder + 'COVID/', N_IMAGES, RESOLUTION)
lung_opacity_images = load_dataset(root_folder + 'Lung_Opacity/', N_IMAGES, RESOLUTION)
normal_images = load_dataset(root_folder + 'Normal/', N_IMAGES, RESOLUTION)
viral_images = load_dataset(root_folder + 'Viral Pneumonia/', N_IMAGES, RESOLUTION)


# covid_images = [resize(image, (RESOLUTION, RESOLUTION), anti_aliasing=True) for image in covid_images]
# lung_opacity_images = [resize(image, (RESOLUTION, RESOLUTION), anti_aliasing=True) for image in lung_opacity_images]
# normal_images = [resize(image, (RESOLUTION, RESOLUTION), anti_aliasing=True) for image in normal_images]
# viral_images = [resize(image, (RESOLUTION, RESOLUTION), anti_aliasing=True) for image in viral_images]

print(covid_images[0].shape, lung_opacity_images[0].shape, normal_images[0].shape, viral_images[0].shape)

# Flatten images
covid_images = [image.flatten() for image in covid_images]
lung_opacity_images = [image.flatten() for image in lung_opacity_images]
normal_images = [image.flatten() for image in normal_images]
viral_images = [image.flatten() for image in viral_images]

print(covid_images[0].shape, lung_opacity_images[0].shape, normal_images[0].shape, viral_images[0].shape)

X = np.concatenate((covid_images, lung_opacity_images, normal_images, viral_images))
print(X.shape)


(100, 100) (100, 100) (100, 100) (100, 100)
(10000,) (10000,) (10000,) (10000,)
(21161, 10000)


In [None]:

COVID_LABEL = 1
LUNG_OPACITY_LABEL = 2
NORMAL_LABEL = 3
VIRAL_LABEL = 4

labels = np.array([COVID_LABEL] * len(covid_images) + [LUNG_OPACITY_LABEL] * len(lung_opacity_images) + [NORMAL_LABEL] * len(normal_images) + [VIRAL_LABEL] * len(viral_images))
print(labels)

[1 1 1 ... 4 4 4]


In [None]:
# Test train split
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.3, shuffle=True)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(14812, 10000) (6349, 10000) (14812,) (6349,)


In [None]:
def get_accuracy_for_model(X_train, X_test, y_train, y_test, model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return accuracy_score(y_test, y_pred)

In [None]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, GaussianNB()))

  n_ij = -0.5 * np.sum(np.log(2.0 * np.pi * self.var_[i, :]))
  n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) / (self.var_[i, :]), 1)


0.1655378799810994


In [None]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, ComplementNB()))

0.1655378799810994


In [None]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, BernoulliNB()))

0.47692550007875256


In [None]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, MultinomialNB()))

0.47692550007875256


In [None]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, MultinomialNB()))

0.47692550007875256


In [None]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, RandomForestClassifier()))

0.47692550007875256


In [None]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, SVC()))


0.47692550007875256


In [None]:
print(get_accuracy_for_model(X_train, X_test, y_train, y_test, MLPClassifier(learning_rate='adaptive', max_iter=300)))

0.47692550007875256
