<a href="https://colab.research.google.com/github/Rahulraj31/Chest_Xray_Pneumonia/blob/main/Chest_X_ray.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import PIL.Image as Image
import cv2
import numpy as np
import pandas as pd

import matplotlib.pylab as plt

import tensorflow as tf
import tensorflow_hub as hub

from tensorflow import keras
from tensorflow.keras import layers,losses
from tensorflow.keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator

In [1]:
import zipfile

local_zip = '/content/drive/MyDrive/Colab Notebooks/chest X ray - Sorted.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/content')
zip_ref.close()

In [3]:
from pathlib import Path 
# Define path to the data directory
dir_alldata = Path('/content/chest_xray')

# Path to train directory 
train_data_dir = dir_alldata / 'train'

# Path to validation directory
validation_data_dir = dir_alldata / 'val'

# Path to test directory
test_data_dir = dir_alldata / 'test'

# Get the path to the normal and pneumonia sub-directories
normal_cases_train = train_data_dir / 'NORMAL'
pneumonia_cases_train = train_data_dir / 'PNEUMONIA'

In [14]:
train_images_dict = {
    'normal': list(normal_cases_train.glob('*')) ,   
    'pneumonia': list(pneumonia_cases_train.glob('*'))  
}

test_images_dict = {
    'normal': list(test_data_dir.glob('NORMAL/*')) ,   
    'pneumonia': list(test_data_dir.glob('PNEUMONIA/*'))  
}

val_images_dict = {
    'normal': list(validation_data_dir.glob('NORMAL/*')) ,   
    'pneumonia': list(validation_data_dir.glob('PNEUMONIA/*'))  
}


labels_dict = {
    'normal': "normal",
    'pneumonia': "pneumonia",
    
}

In [19]:
img= cv2.imread(str(train_images_dict['pneumonia'][0]))
img.shape

(1249, 1758, 3)

In [20]:
img= cv2.imread(str(test_images_dict['normal'][10]))
img.shape

(1576, 2143, 3)

In [17]:
img= cv2.imread(str(val_images_dict['normal'][0]))
img.shape

(1416, 1776, 3)

In [26]:
Xtrain, ytrain = [], []
for name,images in train_images_dict.items():#returns key , path for each record
    for image in images:
        img=str(image)
    
        Xtrain.append(img)
        ytrain.append(labels_dict[name])


Fseries = pd.Series(Xtrain, name="filepaths",dtype=str)
Lseries = pd.Series(ytrain, name="labels",dtype=str)
train_data = pd.concat([Fseries,Lseries], axis=1)
train_df = pd.DataFrame(train_data)

In [30]:
print(train_df["labels"].value_counts())

pneumonia    3875
normal       1341
Name: labels, dtype: int64


In [39]:
Xtest, ytest = [], []
for name,images in test_images_dict.items():#returns key , path for each record
    for image in images:
        img=str(image)
    
        Xtest.append(img)
        ytest.append(labels_dict[name])


Fseries = pd.Series(Xtest, name="filepaths",dtype=str)
Lseries = pd.Series(ytest, name="labels",dtype=str)
test_data = pd.concat([Fseries,Lseries], axis=1)
test_df = pd.DataFrame(test_data)

In [40]:
test_df["labels"].value_counts()

pneumonia    390
normal       234
Name: labels, dtype: int64

In [42]:
Xval, yval = [], []
for name,images in val_images_dict.items():#returns key , path for each record
    for image in images:
        img=str(image)
    
        Xval.append(img)
        yval.append(labels_dict[name])


Fseries = pd.Series(Xval, name="filepaths",dtype=str)
Lseries = pd.Series(yval, name="labels",dtype=str)
val_data = pd.concat([Fseries,Lseries], axis=1)
val_df = pd.DataFrame(val_data)

val_df["labels"].value_counts()

normal       8
pneumonia    8
Name: labels, dtype: int64

In [43]:

#shape of data
print(train_df.shape)
print(test_df.shape)
print(val_df.shape)

(5216, 2)
(624, 2)
(16, 2)


In [45]:
#Generate batches of tensor image data with real-time data augmentation.
image_gen = ImageDataGenerator(preprocessing_function= tf.keras.applications.mobilenet_v2.preprocess_input)
train = image_gen.flow_from_dataframe(dataframe= train_df,x_col="filepaths",y_col="labels",
                                      target_size=(244,244),
                                      color_mode='rgb',
                                      class_mode="categorical", #used for Sequential Model
                                      batch_size=32,
                                      shuffle=False            #do not shuffle data
                                     )
test = image_gen.flow_from_dataframe(dataframe= test_df,x_col="filepaths", y_col="labels",
                                     target_size=(244,244),
                                     color_mode='rgb',
                                     class_mode="categorical",
                                     batch_size=32,
                                     shuffle= False
                                    )
val = image_gen.flow_from_dataframe(dataframe= val_df,x_col="filepaths", y_col="labels",
                                    target_size=(244,244),
                                    color_mode= 'rgb',
                                    class_mode="categorical",
                                    batch_size=32,
                                    shuffle=False
                                   )

Found 5216 validated image filenames belonging to 2 classes.
Found 624 validated image filenames belonging to 2 classes.
Found 16 validated image filenames belonging to 2 classes.


In [46]:

classes=list(train.class_indices.keys())
print (classes)

['normal', 'pneumonia']


In [47]:
cnn= Sequential([
                        #cnn layer 
                        layers.Conv2D(filters=32,kernel_size=(3,3),activation='relu',padding='same',input_shape=(244,244,3)),
                        layers.Conv2D(filters=32,kernel_size=(3,3),activation='relu',padding='same'),
                        layers.MaxPool2D(pool_size=(2,2)),

                        layers.Conv2D(filters=32,kernel_size=(3,3),activation='relu',padding='same'),
                        layers.Conv2D(filters=32,kernel_size=(3,3),activation='relu',padding='same'),
                        layers.MaxPool2D(pool_size=(2,2)),

                        layers.Flatten(),
                        layers.Dense(128, activation='relu'),
                        layers.Dropout(rate=0.3),
                        layers.Dense(64, activation="relu"),
                        layers.Dense(2, activation="sigmoid")

])

In [48]:
cnn.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [49]:
History = cnn.fit(train, validation_data= val, epochs=10,verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
