In [4]:
import cv2
import glob
import matplotlib.pyplot as plt
import imutils
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA, KernelPCA
from sklearn.ensemble import RandomForestClassifier



In [5]:
data_no = 'data-covid/no/'
ext = ['png', 'jpg', 'gif']    # Add image formats here

files_no = []
[files_no.extend(glob.glob(data_no + '*.' + e)) for e in ext]

images_no = [cv2.imread(file) for file in files_no]


In [6]:
data_yes = 'data-covid/yes/'
ext = ['png', 'jpg', 'gif']    # Add image formats here

files_yes = []
[files_yes.extend(glob.glob(data_yes + '*.' + e)) for e in ext]

images_yes = [cv2.imread(file) for file in files_yes]


In [7]:
print(len(images_no)+ len(images_yes))

7655


In [8]:
def crop_brain(image):
    
    # Convert the image to grayscale, and blur it slightly
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    
    thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.erode(thresh, None, iterations=2)
    thresh = cv2.dilate(thresh, None, iterations=2)

    # Find contours in thresholded image, then grab the largest one
    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    c = max(cnts, key=cv2.contourArea)
    # extreme points
    extLeft = tuple(c[c[:, :, 0].argmin()][0])
    extRight = tuple(c[c[:, :, 0].argmax()][0])
    extTop = tuple(c[c[:, :, 1].argmin()][0])
    extBot = tuple(c[c[:, :, 1].argmax()][0])
    
    # crop new image out of the original image using the four extreme points (left, right, top, bottom)
    new_image = image[extTop[1]:extBot[1], extLeft[0]:extRight[0]]            
    
    return new_image


In [9]:
covid_imgs_croped_yes = []
covid_imgs_croped_no = []


for image in images_yes:
    x = crop_brain(image)
    x_resize = cv2.resize(x, (128, 128))
    gray = cv2.cvtColor(x_resize, cv2.COLOR_BGR2GRAY)
    covid_imgs_croped_yes.append(gray)


for image in images_no:
    x = crop_brain(image)
    x_resize = cv2.resize(x, (128, 128))
    gray = cv2.cvtColor(x_resize, cv2.COLOR_BGR2GRAY)
    covid_imgs_croped_no.append(gray)




In [10]:

y_yes = np.ones(len(covid_imgs_croped_yes), dtype="int8")
y_no = np.zeros(len(covid_imgs_croped_no), dtype="int8")



In [11]:
covid_imgs_croped_no

[array([[  3,   4,   3, ...,  19,  32,  40],
        [ 14,  23,  20, ...,  18,  29,  38],
        [ 13,  22,  19, ...,  17,  27,  37],
        ...,
        [ 26,  43,  37, ...,  29,  38,  65],
        [ 34,  58,  53, ...,  41,  49,  80],
        [ 39,  72,  68, ...,  65,  71, 103]], dtype=uint8),
 array([[44, 72, 34, ..., 24, 24, 24],
        [44, 72, 36, ..., 24, 24, 24],
        [44, 71, 34, ..., 23, 23, 24],
        ...,
        [10, 23, 20, ..., 18, 18, 18],
        [12, 29, 26, ..., 19, 19, 20],
        [16, 38, 36, ..., 23, 23, 22]], dtype=uint8),
 array([[149,  92,  58, ...,  68,  68,  71],
        [145,  91,  55, ...,  23,  24,  25],
        [142,  88,  54, ...,   4,   6,   6],
        ...,
        [ 30,   9,   3, ...,   5,   4,   3],
        [ 30,  11,   5, ...,   6,   2,   3],
        [ 33,  15,  11, ...,   5,   2,   4]], dtype=uint8),
 array([[ 7,  7,  6, ...,  4,  4,  3],
        [ 6,  6,  6, ...,  0,  0,  0],
        [ 5,  5,  5, ...,  0,  0,  0],
        ...,
        [ 0,

In [13]:
X = np.concatenate((covid_imgs_croped_yes, covid_imgs_croped_no), axis=0)
y = np.concatenate((y_yes, y_no), axis=0)
d1, d2, d3 = X.shape


In [14]:
X = X.reshape((d1, d2 * d3))


In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)


## scale data before train model


In [16]:
scaler_ = StandardScaler()
X_train_sc = scaler_.fit_transform(X_train)
X_test_sc = scaler_.transform(X_test)


## random forest without pca


In [17]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train_sc, y_train)
y_predict_rf = rf_model.predict(X_test_sc)



## SVC without pca


In [19]:
svc_model = SVC(kernel="linear")
svc_model.fit(X_train_sc, y_train)
y_predict_svc = svc_model.predict(X_test_sc)


## PCA


In [20]:
data_pca = PCA(n_components=12)
pca_components = data_pca.fit(X_train_sc)
X_train_pca = pca_components.fit_transform(X_train_sc)
X_test_pca = pca_components.transform(X_test_sc)


## KernelPCA


In [21]:
data_kpca = KernelPCA()
kpca_components = data_kpca.fit(X_train_sc)
X_train_kpca = kpca_components.fit_transform(X_train_sc)
X_test_kpca = kpca_components.transform(X_test_sc)


## RandomForest With PCA


In [22]:
rf_model_pca = RandomForestClassifier()
rf_model_pca.fit(X_train_pca, y_train)
y_predict_rf_pca = rf_model_pca.predict(X_test_pca)


## SVC With PCA


In [23]:
svc_model_pca = SVC(kernel="linear")
svc_model.fit(X_train_pca, y_train)
y_predict_pca = svc_model.predict(X_test_pca)



## RandomForest With KernelPCA


In [24]:
rf_model_kpca = RandomForestClassifier()
rf_model_kpca.fit(X_train_kpca, y_train)
y_predict_rf_kpca = rf_model_kpca.predict(X_test_kpca)


## SVC With KernelPCA


In [25]:
svc_model_kpca = SVC(kernel="linear")
svc_model.fit(X_train_kpca, y_train)
y_predict_kpca = svc_model.predict(X_test_kpca)



In [26]:

print("without PCA: ", accuracy_score(y_test, y_predict_svc))
print("with PCA: ", accuracy_score(y_test, y_predict_pca))




without PCA:  0.8007838014369693
with PCA:  0.6962769431743958


In [32]:

print("without PCA:")
print(classification_report(y_test, y_predict_svc))


without PCA:
              precision    recall  f1-score   support

           0       0.82      0.78      0.80       777
           1       0.78      0.82      0.80       754

    accuracy                           0.80      1531
   macro avg       0.80      0.80      0.80      1531
weighted avg       0.80      0.80      0.80      1531



In [33]:
print("with PCA:")
print(classification_report(y_test, y_predict_pca))



with PCA:
              precision    recall  f1-score   support

           0       0.69      0.73      0.71       777
           1       0.70      0.67      0.68       754

    accuracy                           0.70      1531
   macro avg       0.70      0.70      0.70      1531
weighted avg       0.70      0.70      0.70      1531



In [28]:

print("RandomForest without PCA:")
print(classification_report(y_test, y_predict_rf))


RandomForest without PCA:
              precision    recall  f1-score   support

           0       0.91      0.92      0.91       777
           1       0.92      0.90      0.91       754

    accuracy                           0.91      1531
   macro avg       0.91      0.91      0.91      1531
weighted avg       0.91      0.91      0.91      1531



In [29]:

print("RandomForest with PCA:")
print(classification_report(y_test, y_predict_rf_pca))


RandomForest with PCA:
              precision    recall  f1-score   support

           0       0.88      0.86      0.87       777
           1       0.86      0.88      0.87       754

    accuracy                           0.87      1531
   macro avg       0.87      0.87      0.87      1531
weighted avg       0.87      0.87      0.87      1531



In [30]:

print("RandomForest without KPCA:")
print(classification_report(y_test, y_predict_rf_kpca))


RandomForest without KPCA:
              precision    recall  f1-score   support

           0       0.77      0.11      0.20       777
           1       0.51      0.97      0.67       754

    accuracy                           0.53      1531
   macro avg       0.64      0.54      0.43      1531
weighted avg       0.64      0.53      0.43      1531



In [31]:

print("SVC with KPCA:")
print(classification_report(y_test, y_predict_kpca))


SVC with KPCA:
              precision    recall  f1-score   support

           0       0.82      0.78      0.80       777
           1       0.78      0.82      0.80       754

    accuracy                           0.80      1531
   macro avg       0.80      0.80      0.80      1531
weighted avg       0.80      0.80      0.80      1531

