https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia

In [1]:
import os
import glob
import cv2
from sklearn.neighbors import KNeighborsClassifier
from skimage.feature import greycomatrix, greycoprops
import numpy as np
from tqdm import tqdm

In [2]:
PATH_TRAIN = 'D:\\BSU\\Datasets\\chest_xray\\train'
PATH_TEST = 'D:\\BSU\\Datasets\\chest_xray\\test'
CLASSES = ['NORMAL', 'PNEUMONIA']
dx = [-1, 0, 1, 1]
dy = [1, 1, 1, 0]

In [3]:
def get_feature_vector(filepath):
    img = cv2.imread(filepath, 0)
    glsm = greycomatrix(img, [1], [0], 256, symmetric=True, normed=True)
    vector = glsm.flatten()
    return vector

In [4]:
def get_set(path):
    features = []
    labels = []
    count = 0
    for img_class in CLASSES:
        class_path = os.path.join(path, img_class)
        print(img_class)
        for filename in tqdm(os.listdir(class_path)):
            if not '.jpeg' in filename:
                continue
            filepath = glob.glob(os.path.join(class_path, filename))[0]
            feature_vector = get_feature_vector(filepath)
            features.append(feature_vector)
            labels.append(img_class)
            count += 1
    print('Set from ' + path + ' uploaded. \nFiles ' + str(count))
    return features, labels

In [5]:
test_set, test_labels = get_set(PATH_TEST)

NORMAL


100%|████████████████████████████████████████████████████████████████████████████████| 234/234 [00:07<00:00, 38.08it/s]


PNEUMONIA


100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:05<00:00, 70.30it/s]


Set from D:\BSU\Datasets\chest_xray\test uploaded. 
Files 624


In [6]:
train_set, train_labels = get_set(PATH_TRAIN)

NORMAL


100%|██████████████████████████████████████████████████████████████████████████████| 1341/1341 [00:49<00:00, 27.36it/s]


PNEUMONIA


100%|██████████████████████████████████████████████████████████████████████████████| 3875/3875 [01:02<00:00, 62.05it/s]


Set from D:\BSU\Datasets\chest_xray\train uploaded. 
Files 5216


In [7]:
model = KNeighborsClassifier(n_neighbors=5, n_jobs=-1)
model.fit(train_set, train_labels)
acc = model.score(test_set, test_labels)
print("Accuracy: {:.2f}% k = 5".format(acc * 100))

Accuracy: 71.96% k = 5


In [8]:
model = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)
model.fit(train_set, train_labels)
acc = model.score(test_set, test_labels)
print("Accuracy: {:.2f}% k = 1".format(acc * 100))

Accuracy: 70.03% k = 1
