<a href="https://colab.research.google.com/github/EricSiq/Understanding-DeepLearning/blob/main/Experiment_No6_XRay_Classification_Pneumonia_using_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import os


In [None]:

# Install Kaggle API client and download the dataset
# This part is specific to Google Colab and Kaggle
!pip install kaggle
from google.colab import files
files.upload() # Upload your kaggle.json file here
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download and unzip the pneumonia dataset
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
!unzip chest-xray-pneumonia.zip



TypeError: 'NoneType' object is not subscriptable

In [3]:
# Define paths to the dataset
train_dir = '/content/chest_xray/train'
val_dir = '/content/chest_xray/val'
test_dir = '/content/chest_xray/test'

# Use ImageDataGenerator for data preprocessing and augmentation
# Rescale pixel values to the range [0, 1]
# Apply data augmentation to the training set to prevent overfitting

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Only rescale the validation and test sets (no augmentation)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create data generators from directories
# This automatically infers labels from subfolder names
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),  # Resize all images to 150x150 pixels
    batch_size=32,
    class_mode='binary'      # 'binary' for our two classes (normal/pneumonia)
)

validation_generator = test_datagen.flow_from_directory(
    val_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    shuffle=False             # Important to set shuffle=False for evaluation
)

FileNotFoundError: [Errno 2] No such file or directory: '/content/chest_xray/train'

In [None]:
# Initialize a sequential model
model = Sequential()

# Add a series of Convolutional and Pooling layers
# These layers automatically learn features from the images
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))


In [None]:

# Flatten the output to feed into the dense layers
model.add(Flatten())

# Add a dropout layer to reduce overfitting
model.add(Dropout(0.5))

# Add a dense layer with ReLU activation
model.add(Dense(512, activation='relu'))

# Final output layer with a sigmoid activation for binary classification
model.add(Dense(1, activation='sigmoid'))

# Compile the model
# Use Adam optimizer and binary crossentropy for loss
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Print a summary of the model architecture
model.summary()

In [None]:
# Train the model using the fit method with generators
# We train for 10 epochs, a good starting point
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size
)

# Save the trained model
model.save('pneumonia_classifier.h5')

In [None]:
# Evaluate the model on the unseen test data
loss, accuracy = model.evaluate(test_generator)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


In [None]:

# You can also visualize the training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()


In [None]:

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()