In [0]:
import os
import numpy as np
import pandas as pd 
import random
import cv2
import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf
import warnings

from random import shuffle 
from tqdm import tqdm 
from PIL import Image
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization, Conv2D, SeparableConv2D, MaxPool2D, LeakyReLU, Activation
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn import svm
from google.colab import drive
drive.mount('/content/gdrive')

%matplotlib inline

in_path = '/content/gdrive/My Drive/demo/chest_xray/'

Using TensorFlow backend.


Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
def extract_data(dimensions, batch_length):
    tgen = ImageDataGenerator(rescale=1./255, zoom_range=0.3, vertical_flip=True)
    test_val_datagen = ImageDataGenerator(rescale=1./255)
    tgen_final = tgen.flow_from_directory(directory=in_path+'train', target_size=(dimensions, dimensions), batch_size=batch_length, class_mode='binary', shuffle=True)
    test_gen = test_val_datagen.flow_from_directory(directory=in_path+'test', target_size=(dimensions, dimensions), batch_size=batch_length, class_mode='binary', shuffle=True)

    test_data = []
    test_labels = []
    
    train_data = []
    train_labels = []

    for normal_image in (os.listdir(in_path + 'test' + '/NORMAL/')):
        normal_image = plt.imread(in_path+'test'+'/NORMAL/'+normal_image)
        normal_image = cv2.resize(normal_image, (dimensions, dimensions))
        normal_image = normal_image.astype('float32') / 255
        label = 0
        test_data.append(normal_image)
        test_labels.append(label)

    for pneumonia_image in (os.listdir(in_path + 'test' + '/PNEUMONIA/')):
        pneumonia_image = plt.imread(in_path+'test'+'/PNEUMONIA/'+pneumonia_image)
        pneumonia_image = cv2.resize(pneumonia_image, (dimensions, dimensions))
        pneumonia_image = pneumonia_image.astype('float32') / 255
        label = 1
        test_data.append(pneumonia_image)
        test_labels.append(label)

    for normal_image in (os.listdir(in_path + 'train' + '/NORMAL/')):
        if normal_image == '.DS_Store':
            continue
        normal_image = plt.imread(in_path+'train'+'/NORMAL/'+normal_image)
        normal_image = cv2.resize(normal_image, (dimensions, dimensions))
        normal_image = normal_image.astype('float32') / 255
        label = 0
        train_data.append(normal_image)
        train_labels.append(label)

    for pneumonia_image in (os.listdir(in_path + 'train' + '/PNEUMONIA/')):
        if pneumonia_image == '.DS_Store':
            continue
        pneumonia_image = plt.imread(in_path+'train'+'/PNEUMONIA/'+pneumonia_image)
        pneumonia_image = cv2.resize(pneumonia_image, (dimensions, dimensions))
        pneumonia_image = pneumonia_image.astype('float32') / 255
        label = 1
        train_data.append(pneumonia_image)
        train_labels.append(label)
    
    return tgen_final, test_gen, test_data, test_labels, train_data, train_labels

In [0]:
img_dims = 150
epochs = 1
batch_size = 10

train_gen, test_gen, test_data, test_labels, train_data, train_labels = extract_data(img_dims, batch_size)


Found 5276 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [0]:
flat_train_data = []
flat_test_data = []

for img in train_data: 
    flat_train_data.append(img.flatten())
    
for img in test_data: 
    flat_test_data.append(img.flatten())

In [0]:
listed_train_data = []
listed_test_data = []

for d in flat_train_data:
    listed_train_data.append(d.tolist()[:150*150])
    
for d in flat_test_data:
    listed_test_data.append(d.tolist()[:150*150])

In [0]:
train_labels_mini = train_labels
test_labels_mini = test_labels

In [0]:
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(listed_train_data, train_labels_mini)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [0]:
correctlyPredicted = 0
fp = 0
tp = 0
fn = 0
tn = 0

for ind in range(len(listed_test_data)): 
    pred = random_forest.predict([listed_test_data[ind]])
    real = test_labels_mini[ind]
    if real == 1 and pred == 1:
        tp += 1
    if real == 1 and pred == 0:
        fn += 1
    if real == 0 and pred == 1:
        fp += 1
    if real == 0 and pred == 0:
        tn += 1
    if pred == real:
        correctlyPredicted += 1

acc = correctlyPredicted/float(len(listed_test_data))
print("TEST DATA________________________")
print("Accuracy: {}".format(acc))
print("Confusion matrix: f_p = {}, t_p = {}, f_n = {}, t_n = {}".format(fp, tp, fn, tn))

prec = tp/(tp+fp)*100
rec = tp/(tp+fn)*100

print("Precision: {}, recall: {}".format(prec, rec))

TEST DATA________________________
Accuracy: 0.7596153846153846
Confusion matrix: f_p = 147, t_p = 387, f_n = 3, t_n = 87
Precision: 72.47191011235955, recall: 99.23076923076923


In [0]:
F1  = 2*prec*rec/(prec+rec)
print ("f1 score:",F1)


f1 score: 83.76623376623377


In [0]:
correctlyPredicted = 0
fp = 0
tp = 0
fn = 0
tn = 0

for ind in range(len(listed_train_data)): 
    pred = random_forest.predict([listed_train_data[ind]])
    real = train_labels_mini[ind]
    if real == 1 and pred == 1:
        tp += 1
    if real == 1 and pred == 0:
        fn += 1
    if real == 0 and pred == 1:
        fp += 1
    if real == 0 and pred == 0:
        tn += 1
    if pred == real:
        correctlyPredicted += 1

acc = correctlyPredicted/float(len(listed_train_data))
print("TRAIN DATA________________________")
print("Accuracy: {}".format(acc))
print("Confusion matrix: f_p = {}, t_p = {}, f_n = {}, t_n = {}".format(fp, tp, fn, tn))

prec = tp/(tp+fp)*100
rec = tp/(tp+fn)*100
print("Precision: {}, recall: {}".format(prec, rec))


In [0]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.naive_bayes import GaussianNB

In [0]:
from sklearn.ensemble import VotingClassifier
LogClf = LogisticRegression()
RndClf = RandomForestClassifier()
DecClf = DecisionTreeClassifier()
voting_clf = VotingClassifier(estimators = [('rf',RndClf) , ('dt' , DecClf)],voting='soft')
voting_clf.fit(listed_train_data, train_labels_mini)

VotingClassifier(estimators=[('rf',
                              RandomForestClassifier(bootstrap=True,
                                                     ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',
                                                     max_depth=None,
                                                     max_features='auto',
                                                     max_leaf_nodes=None,
                                                     max_samples=None,
                                                     min_impurity_decrease=0.0,
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                                     min_samples_split=2,
                                                     min_weight_fraction_leaf=0.0,
        

In [0]:
correctlyPredicted = 0
fp = 0
tp = 0
fn = 0
tn = 0

for ind in range(len(listed_test_data)): 
    pred = voting_clf.predict([listed_test_data[ind]])
    real = test_labels_mini[ind]
    if real == 1 and pred == 1:
        tp += 1
    if real == 1 and pred == 0:
        fn += 1
    if real == 0 and pred == 1:
        fp += 1
    if real == 0 and pred == 0:
        tn += 1
    if pred == real:
        correctlyPredicted += 1

acc = correctlyPredicted/float(len(listed_test_data))
print("TEST DATA________________________")
print("Accuracy: {}".format(acc))


prec = tp/(tp+fp)*100
rec = tp/(tp+fn)*100

print("Precision: {}, recall: {}".format(prec, rec))

TEST DATA________________________
Accuracy: 0.7035256410256411
Precision: 69.1588785046729, recall: 94.87179487179486


In [0]:
F1  = 2*prec*rec/(prec+rec)
print ("f1 score:",F1)


f1 score: 80.0
