In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **Importing Libraries**

In [2]:
# importing all the libraries
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense , Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# **Loading Data**

In [3]:
allimage = Path('../input/chest-xray-pneumonia/chest_xray/chest_xray')

training_path = allimage/'train'
testing_path = allimage/'test'
validation_path = allimage/'val'

train_normal = training_path / 'NORMAL'
train_pneumonia = training_path / 'PNEUMONIA'
testing_normal = testing_path /'NORMAL'
testing_pneumonia = testing_path /'PNEUMONIA'


# **Image Preprocessing**

In [4]:
train_datagen = ImageDataGenerator(rescale = 1/255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

validation_datagen = ImageDataGenerator(rescale = 1/255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)


test_datagen = ImageDataGenerator(rescale = 1/255)

training_set = train_datagen.flow_from_directory(training_path,
                                                 target_size = (64,64),
                                                 batch_size = 32,
                                                 class_mode = 'binary')                                                 

validation_set = validation_datagen.flow_from_directory(validation_path,
                                                 target_size = (64,64),
                                                 batch_size = 8,
                                                 class_mode = 'binary')

testing_set = test_datagen.flow_from_directory(testing_path,
                                                 target_size = (64,64),
                                                 batch_size = 32,
                                                 class_mode = 'binary')
                                                 

# **Data Visualization**

In [5]:
#  visualizing few images from training dataset
plt.figure(figsize=(15,15))
data =next(training_set)
imag = data[0]
label =data[1]

for i in range(12):
   
    ax=plt.subplot(6,6,i+1)
    
    plt.imshow(imag[i])
    plt.axis('off')

    if label[i]==0:
        plt.title("Normal")
    else:
        plt.title("Pneumonia")

plt.suptitle('Images from Training Dataset',fontsize=22)
plt.show()

In [6]:
# visualizing no of images in each class in training data

train_data = []

normal = train_normal.glob('*.jpeg')
pneumonia = train_pneumonia.glob('*.jpeg')

#  labeling '0' as normal images and '1' as  pneumonia images
for img in normal:
    train_data.append(0)
for img in pneumonia:
    train_data.append(1)

#  creating a dataframe 
train_data = pd.DataFrame(train_data, columns=['label'],index=None)

plt.figure(figsize=(8,6))
sns.countplot(train_data["label"], palette='GnBu')
plt.title("Normal (0) and Pneumonia (1) XRay Images in training dataset")

# **Model Building - CNN**

In [7]:
#  initializing convnet layer
model = Sequential()

# 1st convnet layer
model.add(Conv2D(32, 3, activation='relu' , kernel_initializer='he_uniform'))
model.add(MaxPool2D())

# 2nd convnet layer
model.add(Conv2D(64, 3, activation='relu' , kernel_initializer='he_uniform'))
model.add(MaxPool2D())

# 3rd convnet layer
model.add(Conv2D(128, 3, activation='relu' , kernel_initializer='he_uniform'))
model.add(MaxPool2D())

model.add(Flatten())

#hidden layer 1
model.add(Dense(128 , activation = 'relu'))
model.add(Dropout(0.4))

#hidden layer 2
model.add(Dense(64 , activation = 'sigmoid'))
model.add(Dropout(0.5))

#hidden layer 3
model.add(Dense(32 , activation = 'relu'))
model.add(Dropout(0.2))

# output layer
model.add(Dense(1 , activation = 'sigmoid'))

METRICS = [
        'accuracy',
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')
    ]
# model compiling
model.compile('adam' , loss = 'binary_crossentropy' , metrics = METRICS)

# training model
history = model.fit(training_set , epochs = 5,  validation_data=validation_set)


# **Plotting training and validation accuracy over epochs**


In [8]:
plt.figure(figsize=(10,6))

plt.plot(history.history['accuracy'],color='lightgreen',label='Training accuracy')
plt.plot(history.history['val_accuracy'], color='darkcyan',label='Validation accuracy')

plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# **Plotting training and validation loss over epochs**

In [9]:
plt.figure(figsize=(10,6))

history_df = pd.DataFrame(history.history)
plt.plot(history_df.loc[:, ['loss']], "blue", label='Training loss')
plt.plot(history_df.loc[:, ['val_loss']],"purple", label='Validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(loc="best")

plt.show()

# **Model Evaluation**

In [10]:
# model performance for testing data
loss, accuracy, precision, recall = model.evaluate(testing_set)

In [11]:
print("Training Dataset Performance :")   

print("Accuracy: ",round(history.history['accuracy'][4]*100,2)," Precision:", round(history.history['precision'][4]*100,2),
       "Recall:", round(history.history['recall'][4]*100,2)) 
      
print("========================================")
print("========================================")

print("Testing Dataset Performance :")   
print(f'Accuracy: {accuracy*100:.2f}, Precision: {precision*100:.2f}, Recall: {recall*100:.2f}')
