In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt

In [None]:
# Data Augmentation and Preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,                # Normalize pixel values to [0, 1]
    rotation_range=20,             # Randomly rotate images by up to 20 degrees
    width_shift_range=0.2,         # Randomly shift images horizontally by up to 20% of the width
    height_shift_range=0.2,        # Randomly shift images vertically by up to 20% of the height
    shear_range=0.2,               # Apply random shearing transformations
    zoom_range=0.2,                # Apply random zooming
    horizontal_flip=True,          # Randomly flip images horizontally
    fill_mode='nearest',           # Fill in new pixels with the nearest pixel values
    validation_split=0.2           # Use 20% of training data for validation
)

test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Load Data
train_dir = '../input/chest-xray-pneumonia/chest_xray/chest_xray/train'
test_dir =  '../input/chest-xray-pneumonia/chest_xray/chest_xray/test'



In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import random
import os
import random

def display_sample_images(directory, class_name):
    class_dir = os.path.join(directory, class_name)
    sample_images = random.sample(os.listdir(class_dir), 5)
    plt.figure(figsize=(15, 5))
    for i, img_name in enumerate(sample_images):
        img_path = os.path.join(class_dir, img_name)
        img = mpimg.imread(img_path)
        plt.subplot(1, 5, i+1)
        plt.imshow(img, cmap='gray')
        plt.title(class_name)
        plt.axis('off')
    plt.show()

display_sample_images(train_dir, 'NORMAL')
display_sample_images(train_dir, 'PNEUMONIA')

In [None]:

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),        # Resize all images to 224x224 pixels
    batch_size=32,                 # Number of images to yield in one batch
    class_mode='binary',           # Binary classification (NORMAL or PNEUMONIA)
    subset='training'              # Set as training data
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='validation'  # Set as validation data
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)


In [None]:
# Model Architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),  # Convolutional layer with 32 filters and a 3x3 kernel
    MaxPooling2D((2, 2)),                                              # Max-pooling layer with 2x2 pool size
    Conv2D(64, (3, 3), activation='relu'),                             # Convolutional layer with 64 filters and a 3x3 kernel
    MaxPooling2D((2, 2)),                                              # Max-pooling layer with 2x2 pool size
    Conv2D(128, (3, 3), activation='relu'),                            # Convolutional layer with 128 filters and a 3x3 kernel
    MaxPooling2D((2, 2)),                                              # Max-pooling layer with 2x2 pool size
    Flatten(),                                                         # Flatten the 3D output to 1D tensor
    Dense(512, activation='relu'),                                     # Fully connected layer with 512 units
    Dropout(0.5),                                                      # Dropout layer to prevent overfitting
    Dense(1, activation='sigmoid')                                     # Output layer with a single neuron for binary classification
])

# Compile Model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss')

# Train Model
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=validation_generator,
    callbacks=[early_stopping, model_checkpoint]
)


In [None]:
# Evaluate on Validation Data
val_loss, val_accuracy = model.evaluate(validation_generator)
print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')

# Evaluate on Test Data
test_loss, test_accuracy = model.evaluate(test_generator)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')


In [None]:
# Plot Training & Validation Accuracy
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')
plt.show()

# Plot Training & Validation Loss
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper left')
plt.show()


In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Get true labels
test_labels = test_generator.classes

# Ensure the test generator has been run through completely
test_generator.reset()

# Get predictions
test_pred_prob = model.predict(test_generator)
test_pred_classes = (test_pred_prob > 0.5).astype("int32").flatten()

# Check distribution of true labels
unique, counts = np.unique(test_labels, return_counts=True)
print("True label distribution:", dict(zip(unique, counts)))

# Check distribution of predicted labels
unique, counts = np.unique(test_pred_classes, return_counts=True)
print("Predicted label distribution:", dict(zip(unique, counts)))

# Plot confusion matrix
cm = confusion_matrix(test_labels, test_pred_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['NORMAL', 'PNEUMONIA'])
disp.plot(cmap=plt.cm.Blues)
plt.show()



In [None]:
# Evaluate on Training Data
train_loss, train_accuracy = model.evaluate(train_generator)
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')

# Evaluate on Validation Data
val_loss, val_accuracy = model.evaluate(validation_generator)
print(f'Validation Accuracy: {val_accuracy * 100:.2f}%')



In [None]:
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False  # Important to not shuffle for consistent evaluation
)

print(test_generator.class_indices)
print(f'Found {len(test_generator.filenames)} test images.')


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Get a batch of test images and labels
test_generator.reset()  # Reset the generator to the beginning
x_test, y_test = next(test_generator)

# Get predictions for the batch
y_pred = (model.predict(x_test) > 0.5).astype("int32").flatten()

# Plot some of the test images along with the predicted and true labels
plt.figure(figsize=(15, 5))
for i in range(10):
    plt.subplot(2, 5, i + 1)
    plt.imshow(x_test[i])
    true_label = 'PNEUMONIA' if y_test[i] == 1 else 'NORMAL'
    pred_label = 'PNEUMONIA' if y_pred[i] == 1 else 'NORMAL'
    plt.title(f'True: {true_label}\nPred: {pred_label}')
    plt.axis('off')
plt.show()


In [None]:
test_generator.reset()  # Reset the generator to the beginning
test_labels = test_generator.classes  # Get the true labels
test_pred_prob = model.predict(test_generator)  # Get the predicted probabilities
test_pred_classes = (test_pred_prob > 0.5).astype("int32").flatten()  # Convert probabilities to class labels

# Check the distribution of true and predicted labels
unique, counts = np.unique(test_labels, return_counts=True)
print("True label distribution:", dict(zip(unique, counts)))

unique, counts = np.unique(test_pred_classes, return_counts=True)
print("Predicted label distribution:", dict(zip(unique, counts)))

# Confusion Matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(test_labels, test_pred_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['NORMAL', 'PNEUMONIA'])
disp.plot(cmap=plt.cm.Blues)
plt.show()
