In [23]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tensorflow.keras import layers,models
import os
from PIL import Image
import cv2

In [4]:
meta_data=pd.read_csv('Chest_xray_Corona_Metadata.csv')

In [5]:
import tensorflow as tf
import keras.backend as K
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, SeparableConv2D, MaxPool2D, LeakyReLU, Activation
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping


In [7]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [4]:
#meta_data.columnsContains 165 grayscale images in GIF format of 15 individuals. There are 11 images per subject, one per different facial expression or configuration: center-light, w/glasses, happy, left-light, w/no glasses, normal, right-light, sad, sleepy, surprised, and wink.

In [8]:
meta_data=meta_data.drop(['Unnamed: 0'],axis=1)


In [9]:
meta_data = meta_data.drop(['Label_2_Virus_category','Label_1_Virus_category'],axis=1)
meta_data.head(10)

Unnamed: 0,X_ray_image_name,Label,Dataset_type
0,IM-0128-0001.jpeg,Normal,TRAIN
1,IM-0127-0001.jpeg,Normal,TRAIN
2,IM-0125-0001.jpeg,Normal,TRAIN
3,IM-0122-0001.jpeg,Normal,TRAIN
4,IM-0119-0001.jpeg,Normal,TRAIN
5,IM-0117-0001.jpeg,Normal,TRAIN
6,IM-0115-0001.jpeg,Normal,TRAIN
7,IM-0189-0001.jpeg,Normal,TRAIN
8,IM-0187-0001.jpeg,Normal,TRAIN
9,IM-0185-0001.jpeg,Normal,TRAIN


In [10]:
train=[]
test=[]
for i in range(len(meta_data)):
    if meta_data.iloc[i][2]=='TRAIN':
        train.append(meta_data.iloc[i][0])
    else :
        test.append(meta_data.iloc[i][0])
print(len(train))
print(len(test))

5286
624


In [11]:
for i in range(len(meta_data)):
    if meta_data.iloc[i]['Label']=='Normal':
        meta_data.iloc[i]['Label'] = 0
    else:
        meta_data.iloc[i]['Label'] = 1
        
print(meta_data.tail(10))

                X_ray_image_name Label Dataset_type
5900  person1644_virus_2844.jpeg     1         TEST
5901  person1643_virus_2843.jpeg     1         TEST
5902  person1642_virus_2842.jpeg     1         TEST
5903  person1641_virus_2840.jpeg     1         TEST
5904  person1640_virus_2839.jpeg     1         TEST
5905  person1637_virus_2834.jpeg     1         TEST
5906  person1635_virus_2831.jpeg     1         TEST
5907  person1634_virus_2830.jpeg     1         TEST
5908  person1633_virus_2829.jpeg     1         TEST
5909  person1632_virus_2827.jpeg     1         TEST


In [12]:
train_data = meta_data[meta_data['Dataset_type']=='TRAIN']
test_data = meta_data[meta_data['Dataset_type']=='TEST']

In [24]:
def process_data(img_dims, batch_size):
    # Data generation objects
    train_datagen = ImageDataGenerator(rescale=1./255, zoom_range=0.3, vertical_flip=True)
    test_val_datagen = ImageDataGenerator(rescale=1./255)
    
    train_gen = train_datagen.flow_from_directory(
    directory='train', 
    target_size=(img_dims, img_dims), 
    batch_size=batch_size, 
    class_mode='binary', 
    shuffle=True)
    
    test_gen = test_val_datagen.flow_from_directory(
    directory='test', 
    target_size=(img_dims, img_dims), 
    batch_size=batch_size, 
    class_mode='binary', 
    shuffle=True)
    
    test_data = []
    test_labels = []

    for cond in ['/NORMAL/', '/PNEUMONIA/']:
        for img in (os.listdir('test' + cond)):
            img = plt.imread('test'+cond+img)
            img = cv2.resize(img, (img_dims, img_dims))
            img = np.dstack([img, img, img])
            img = img.astype('float32') / 255
            if cond=='/NORMAL/':
                label = 0
            elif cond=='/PNEUMONIA/':
                label = 1
            test_data.append(img)
            test_labels.append(label)
        
    test_data = np.array(test_data)
    test_labels = np.array(test_labels)
    
    return train_gen, test_gen, test_data, test_labels

In [25]:
img_dims = 150
epochs = 20
batch_size = 64

train_gen,test_gen,test_data,test_labels = process_data(img_dims,batch_size) 

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [15]:
inputs = Input(shape=(img_dims, img_dims, 3))

# First conv block
x = Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same')(inputs)
x = Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = MaxPool2D(pool_size=(2, 2))(x)

# Second conv block
x = SeparableConv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)

# Third conv block
x = SeparableConv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)

# Fourth conv block
x = SeparableConv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(rate=0.2)(x)

# Fifth conv block
x = SeparableConv2D(filters=256, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=256, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(rate=0.2)(x)

# FC layer
x = Flatten()(x)
x = Dense(units=512, activation='relu')(x)
x = Dropout(rate=0.7)(x)
x = Dense(units=128, activation='relu')(x)
x = Dropout(rate=0.5)(x)
x = Dense(units=64, activation='relu')(x)
x = Dropout(rate=0.3)(x)

# Output layer
output = Dense(units=1, activation='sigmoid')(x)

# Creating model and compiling
model = Model(inputs=inputs, outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
checkpoint = ModelCheckpoint(filepath='best_weights.hdf5', save_best_only=True, save_weights_only=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=2, verbose=2, mode='max')
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.1, patience=1, mode='min')

In [9]:
train_imgs=pd.Series(trainimg)
test_imgs=pd.Series(testimg)

In [16]:
hist = model.fit(
           train_gen, steps_per_epoch=train_gen.samples // batch_size, 
           epochs=epochs, validation_data=test_gen, 
           validation_steps=test_gen.samples // batch_size, callbacks=[checkpoint, lr_reduce])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 00009: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
Epoch 10/20
Epoch 11/20
Epoch 00011: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.
Epoch 12/20
Epoch 13/20
Epoch 00013: ReduceLROnPlateau reducing learning rate to 8.100000013655517e-06.
Epoch 14/20
Epoch 15/20
Epoch 00015: ReduceLROnPlateau reducing learning rate to 2.429999949526973e-06.
Epoch 16/20
Epoch 17/20
Epoch 00017: ReduceLROnPlateau reducing learning rate to 7.289999985005124e-07.
Epoch 18/20
Epoch 19/20
Epoch 00019: ReduceLROnPlateau reducing learning rate to 2.1870000637136398e-07.
Epoch 20/20


In [17]:
model.save('Pneumonia_model.hdf5')

In [26]:
from sklearn.metrics import accuracy_score, confusion_matrix
preds = model.predict(test_data)

In [28]:
acc_score = accuracy_score(test_labels,np.round(preds))
conf_mat = confusion_matrix(test_labels, np.round(preds))
tn,fp,fn,tp =  conf_mat.ravel()

print(acc_score)
print(conf_mat)

0.8942307692307693
[[182  52]
 [ 14 376]]
