In [15]:
import pandas as pd
import numpy as np
import keras
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout , BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report,confusion_matrix
from keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical, plot_model
import cv2
import os

In [16]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [17]:
#read the image
# function to load the dataset and return the dataset list
# containing elements of format [scans, target]
targets = ['PNEUMONIA', 'NORMAL']
img_size = 150
def get_training_data(scans):
    data = list()
    for target in targets: 
        path = os.path.join(scans, target)
        class_label = targets.index(target)
        for img in os.listdir(path):
            try:
                arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                resized_arr = cv2.resize(arr, (img_size, img_size))
                data.append([resized_arr, class_label])
            except Exception as e:
                print(e)
    return np.array(data)

In [18]:
#extracting the dataset
train = get_training_data('/content/gdrive/MyDrive/PythonProjectWebApp/chest_xray/train')
test = get_training_data('/content/gdrive/MyDrive/PythonProjectWebApp/chest_xray/test')
val = get_training_data('/content/gdrive/MyDrive/PythonProjectWebApp/chest_xray/val')




In [19]:
#split the data into training, testing and validation groups with variables to hold the target names.
X_train = []
y_train = []

X_val = []
y_val = []

X_test = []
y_test = []

for image_scan, target in train:
    X_train.append(image_scan)
    y_train.append(target)

for image_scan, target in test:
    X_test.append(image_scan)
    y_test.append(target)
    
for image_scan, target in val:
    X_val.append(image_scan)
    y_val.append(target)

In [20]:
# Normalize data
X_train = np.array(X_train) / 255
X_val = np.array(X_val) / 255
X_test = np.array(X_test) / 255

In [21]:
# resize data
X_train = X_train.reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)

X_val = X_val.reshape(-1, img_size, img_size, 1)
y_val = np.array(y_val)

X_test = X_test.reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)

In [22]:
#prevents overfitting and handles the imbalance in dataset

data_aug = ImageDataGenerator(
        rotation_range = 30,  #rotate 
        zoom_range = 0.2, #  zoom  
        width_shift_range=0.1,  #  shift images horizontally 
        height_shift_range=0.1,  #  shift images vertically
        horizontal_flip = True,  # flip images
        vertical_flip=False)  # no vertical flip

data_aug.fit(X_train)

In [None]:
#building the model
model = Sequential()
model.add(Conv2D(32 , (3,3) , padding = 'same' , activation = 'relu' , input_shape = (150,150,1)))
model.add(MaxPool2D((2,2) , strides = 2))

model.add(Conv2D(64 , (3,3) , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(MaxPool2D((2,2) , strides = 2))

model.add(Conv2D(64 , (3,3) , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(MaxPool2D((2,2) , strides = 2 ))

model.add(Conv2D(128 , (3,3) , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(MaxPool2D((2,2) , strides = 2))

model.add(Conv2D(256 , (3,3)  , padding = 'same' , activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2))

model.add(Flatten())

model.add(Dense(units = 1 , activation = 'sigmoid'))

model.compile(optimizer = "rmsprop" , loss = 'binary_crossentropy' , metrics = ['acc'])
model.summary()
plot_model(model, to_file = 'model_plot.png', show_shapes = True, show_layer_names = True)

In [24]:
#learning rate reduction
LRR = ReduceLROnPlateau(monitor='val_acc', patience = 2, verbose=1,factor=0.3, min_lr=0.000001)

In [25]:
history = model.fit(data_aug.flow(X_train, y_train, batch_size = 44) , epochs = 16 , validation_data = data_aug.flow(X_val, y_val) , callbacks = [LRR])

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 7: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
Epoch 8/16
Epoch 9/16
Epoch 9: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.
Epoch 10/16
Epoch 11/16
Epoch 11: ReduceLROnPlateau reducing learning rate to 8.100000013655517e-06.
Epoch 12/16
Epoch 13/16
Epoch 13: ReduceLROnPlateau reducing learning rate to 2.429999949526973e-06.
Epoch 14/16
Epoch 15/16
Epoch 15: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 16/16


In [26]:
model.save('PNmodel.h5')

In [27]:
print("Loss:" , model.evaluate(X_test,y_test)[0])
print("Accuracy:" , model.evaluate(X_test,y_test)[1])

Loss: 0.2633186876773834
Accuracy: 0.9166666865348816
