**load dataset**

In [1]:
# prompt: unzip /content/drive/MyDrive/DataSets_Term3/PneumoniaDetection/pneumonia.zip  to /content/drive/MyDrive/DataSets_Term3/PneumoniaDetection/Pneumonia_data

# !unzip /content/drive/MyDrive/DataSets_Term3/PneumoniaDetection/pneumonia.zip -d /content/drive/MyDrive/DataSets_Term3/PneumoniaDetection/Pneumonia_data


**EDA**

In [None]:
# root_dir = 'chest_xray'

In [15]:
# prompt: explore and print 5 images in each subfolder of  the data in val, test, train of root_dir make he image size printed very small like 10x12 display

import matplotlib.pyplot as plt
import os
import cv2

def explore_images(root_dir):
  for subdir, dirs, files in os.walk(root_dir):
    if subdir != root_dir:
      print(f"Exploring subdirectory: {subdir}")
      image_count = 0
      for file in files:
          if file.endswith(('.png', '.jpg', '.jpeg')):
              if image_count < 5:
                  image_path = os.path.join(subdir, file)
                  img = cv2.imread(image_path)
                  img = cv2.resize(img,(150,100))
                  plt.figure(figsize=(1,1))  # Set figure size
                  plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # Convert to RGB
                  plt.axis('off')
                  plt.show()
                  image_count += 1
              else:
                break
explore_images('chest_xray')


In [16]:
# checking for different file formats

import os

def check_file_formats(root_dir):
  for subdir, _, files in os.walk(root_dir):
    if subdir != root_dir:
      formats = set()
      for file in files:
        formats.add(file.split('.')[-1].lower())
      print(f"Subdirectory: {subdir}, Formats: {formats}") # if you want to print
check_file_formats('chest_xray')

In [17]:
#total number of images in each of the sub folders

import os

def count_images_in_subfolders(root_dir):
  for subdir, _, files in os.walk(root_dir):
    if subdir != root_dir:
      image_count = 0
      for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
          image_count += 1
      print(f"Subdirectory: {subdir}, Image count: {image_count}")

count_images_in_subfolders('chest_xray')

In [65]:
#checking for missing images

import os
import cv2
import numpy as np
import pandas as pd

# Define paths
# data_dir = root_dir
# normal_dir = "C:\Users\Joel\Downloads\CNN\chest_xray\train\NORMAL"
# pneumonia_dir = "C:\Users\Joel\Downloads\CNN\chest_xray\train\PNEUMONIA"

# Check for missing images
def check_missing_images(directory):
    for filename in os.listdir(directory):
        if not os.path.isfile(os.path.join(directory, filename)):
            print(f'Missing image: {filename}')

check_missing_images('train/NORMAL')
check_missing_images('train/PNEUMONIA')

In [67]:
# Exploring data

import matplotlib.pyplot as plt
import seaborn as sns
import os
import cv2
import numpy as np

# Define paths
# data_dir = "chest_xray/"  # Use the root_dir from previous cells
normal_dir = "train/NORMAL"
pneumonia_dir = "train/PNEUMONIA"

# Load image data and labels
X = []
y = []

# Function to load images and labels from a directory
def load_images_and_labels(directory, label):
    for filename in os.listdir(directory):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(directory, filename)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (150, 100))  # Resize images for consistency
            X.append(img)
            y.append(label)

# Load normal images and assign label 0
load_images_and_labels(normal_dir, 0)

# Load pneumonia images and assign label 1
load_images_and_labels(pneumonia_dir, 1)

# Convert X and y to NumPy arrays
X = np.array(X)
y = np.array(y)

In [68]:
# # Feature Engineering

# from sklearn.model_selection import train_test_split
# from tensorflow.keras.preprocessing.image import ImageDataGenerator

# # Split the dataset
# X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# # Data augmentation
# train_datagen = ImageDataGenerator(
#     rescale=1./255,
#     rotation_range=20,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True
# )

# val_datagen = ImageDataGenerator(rescale=1./255)

# train_generator = train_datagen.flow(X_train, y_train, batch_size=32)
# val_generator = val_datagen.flow(X_val, y_val, batch_size=32)

###############################################################################################################

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define paths
train_data_dir = 'train/'
test_data_dir = 'val/'
val_data_dir = 'test/'

# Data augmentation for training data
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Normalization for validation and test data
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create generators
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'  # Assuming binary classification (normal vs pneumonia)
)

val_generator = val_datagen.flow_from_directory(
    val_data_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    shuffle=False  # Important for evaluation
)

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.
Found 16 images belonging to 2 classes.


In [69]:
# model selection

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Build a simple CNN model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(


In [70]:
model.summary()

In [None]:
# hyper parameter tuning

from keras.callbacks import EarlyStopping

# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=5)
history = model.fit(train_generator, validation_data=val_generator, epochs=20, callbacks=[early_stopping])

Epoch 1/20


  self._warn_if_super_not_called()


[1m115/163[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m27s[0m 565ms/step - accuracy: 0.7067 - loss: 0.8709

In [None]:
# saving the model

model.save('pneumonia_detection_model.h5')

In [None]:
# prompt: how to load the save modle

from tensorflow.keras.models import load_model

# Load the saved model
loaded_model = load_model('pneumonia_detection_model.h5')

# Now you can use the loaded_model for predictions or further training
# For example:
# predictions = loaded_model.predict(new_data)