In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
count = 0
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        count += 1
        if count >= 10:
            break
    if count >= 10:
        break

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout , BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ReduceLROnPlateau
import cv2
import os

In [None]:
labels = ['PNEUMONIA', 'NORMAL']
def get_training_data(data_dir):
    data = []
    for label in labels:
        path = os.path.join(data_dir, label)
        class_num = labels.index(label)
        for img in os.listdir(path):
            try:
                img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                resized_arr = cv2.resize(img_arr, (150, 150))
                data.append([resized_arr, class_num])
            except Exception as e:
                print(e)
    return np.array(data)

In [None]:
train = get_training_data('/kaggle/input/chest-xray-pneumonia/chest_xray/train')
test = get_training_data('/kaggle/input/chest-xray-pneumonia/chest_xray/test')

  


In [None]:
x_train = []
y_train = []

for feature, label in train:
    x_train.append(feature)
    y_train.append(label)

x_test = []
y_test = []

for feature, label in test:
    x_test.append(feature)
    y_test.append(label)

In [None]:
x_train = np.array(x_train) / 255
x_test = np.array(x_test) / 255

In [None]:
img_size=150
x_train = x_train.reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)

x_test = x_test.reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)

In [None]:
data_generator = ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range = 30,
        zoom_range = 0.1,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip = True,
        )
#ye data augmentation process hai for eg flipping rotating normaliztion these help to precent overfitting and handle imbalanced data

data_generator.fit(x_train)

In [None]:

model = Sequential()
model.add(Conv2D(32,(3,3),strides=1,padding='same',activation='relu',input_shape=(150,150,1)))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2),strides=2,padding='same'))

model.add(Conv2D(64,(3,3),strides=1,padding='same',activation='relu'))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2),strides=2,padding='same'))

model.add(Conv2D(64,(3,3),strides=1,padding='same',activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2),strides=2,padding='same'))

model.add(Conv2D(128,(3,3),strides=1,padding='same',activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2),strides=2,padding='same'))

model.add(Conv2D(256,(3,3),strides=1,padding='same',activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2),strides=2,padding='same'))

model.add(Flatten())
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.2))#these prevent overfitting
model.add(Dense(1,activation='sigmoid'))

model.compile(optimizer="rmsprop",loss='binary_crossentropy',metrics=['accuracy'])
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_15 (Conv2D)           (None, 150, 150, 32)      320       
_________________________________________________________________
batch_normalization_15 (Batc (None, 150, 150, 32)      128       
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 75, 75, 32)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 75, 75, 64)        18496     
_________________________________________________________________
dropout_12 (Dropout)         (None, 75, 75, 64)        0         
_________________________________________________________________
batch_normalization_16 (Batc (None, 75, 75, 64)        256       
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 38, 38, 64)       

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='accuracy', patience=2, verbose=1, factor=0.3, min_lr=0.000001)

history = model.fit(
    data_generator.flow(x_train, y_train, batch_size=32),#from here we will se the training accuacy that the model gives and also if the loss is decreasing
    epochs=10,
    callbacks=[learning_rate_reduction]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.
Epoch 9/10
Epoch 10/10


In [None]:
print("Loss of the model is - " , model.evaluate(x_test,y_test)[0])
print("Accuracy of the model is - " , model.evaluate(x_test,y_test)[1]*100 , "%")

Loss of the model is -  0.35083287954330444
Accuracy of the model is -  91.98718070983887 %


# Therefore the accuracy that we get from the unseen data that is the testing data through the CNN method is 92 percent

In [None]:
predictions = model.predict_classes(x_test)
predictions = predictions.reshape(1,-1)[0]
predictions[:30]
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
print("THE CLASSIFICATION REPORT OF CNN IS AS FOLLOWS:")
print(classification_report(y_test, predictions, target_names = ['Pneumonia ','Normal ']))

THE CLASSIFICATION REPORT OF CNN IS AS FOLLOWS:
              precision    recall  f1-score   support

  Pneumonia        0.91      0.97      0.94       390
     Normal        0.94      0.84      0.89       234

    accuracy                           0.92       624
   macro avg       0.93      0.90      0.91       624
weighted avg       0.92      0.92      0.92       624

