<a href="https://colab.research.google.com/github/BarbodRE/chest-xray-pneumonia-classification/blob/main/Chest_X_ray_Pneumonia.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 🩻 COVID-19 & Pneumonia Detection from Chest X-Rays

## 📌 Overview
This project uses **Transfer Learning** with multiple pre-trained CNN models (ResNet50, DenseNet121, EfficientNetB0, MobileNetV2) for classification of chest X-ray images.
The models are first trained individually and then combined using **Stacking**.
Additionally, **Test Time Augmentation (TTA)** is applied to further boost performance.

In [None]:
# ==========================
# 📦 1. Import Libraries
# ==========================
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, DenseNet121, EfficientNetB0, MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt

In [None]:
## 📂 2. Data Preparation
Data is loaded using `ImageDataGenerator`.
Training set includes augmentation to improve generalization.

In [None]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

train_dir = "data/train"
val_dir   = "data/val"
test_dir  = "data/test"

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(train_dir, target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode="categorical")
val_gen   = val_datagen.flow_from_directory(val_dir, target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode="categorical")
test_gen  = test_datagen.flow_from_directory(test_dir, target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode="categorical", shuffle=False)

In [None]:
## 🧠 3. Build Base Models
Utility function to build transfer learning models with fine-tuning.
By default, only the last 30 layers are unfrozen.

In [None]:
def build_base_model(base_model_class, input_shape=(224,224,3), num_classes=3, trainable_layers=30):
    base_model = base_model_class(weights="imagenet", include_top=False, input_shape=input_shape)

    # Unfreeze only last N layers
    for layer in base_model.layers[:-trainable_layers]:
        layer.trainable = False

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    output = Dense(num_classes, activation="softmax")(x)

    return Model(inputs=base_model.input, outputs=output)

# Dictionary of models
models = {
    "ResNet50": build_base_model(ResNet50),
    "DenseNet121": build_base_model(DenseNet121),
    "EfficientNetB0": build_base_model(EfficientNetB0),
    "MobileNetV2": build_base_model(MobileNetV2),
}

In [None]:
## ⚙️ 4. Training
Training loop with callbacks:
- **ModelCheckpoint**: save best model
- **EarlyStopping**: prevent overfitting
- **ReduceLROnPlateau**: adjust learning rate

In [None]:
def train_model(model, train_gen, val_gen, model_name="model"):
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])

    callbacks = [
        ModelCheckpoint(f"best_{model_name}.keras", monitor="val_accuracy", save_best_only=True, verbose=1),
        EarlyStopping(monitor="val_accuracy", patience=5, restore_best_weights=True),
        ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3)
    ]

    history = model.fit(train_gen, validation_data=val_gen, epochs=12, callbacks=callbacks)
    return history

# Example: train ResNet50
history_resnet = train_model(models["ResNet50"], train_gen, val_gen, "ResNet50")

In [None]:
## 📈 5. Plot Training Curves
Visualize accuracy and loss during training.

In [None]:
def plot_history(history, title="Model"):
    plt.figure(figsize=(12,4))

    plt.subplot(1,2,1)
    plt.plot(history.history["accuracy"], label="Train")
    plt.plot(history.history["val_accuracy"], label="Val")
    plt.title(f"{title} Accuracy")
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(history.history["loss"], label="Train")
    plt.plot(history.history["val_loss"], label="Val")
    plt.title(f"{title} Loss")
    plt.legend()

    plt.show()

plot_history(history_resnet, "ResNet50")

In [None]:
## 🏆 6. Evaluation on Test Set
Evaluate the trained model on the test set.

In [None]:
best_model = tf.keras.models.load_model("best_ResNet50.keras")
test_loss, test_acc = best_model.evaluate(test_gen)
print(f"✅ Test Accuracy: {test_acc:.4f}")

In [None]:
## 🔄 7. Test Time Augmentation (TTA)
Run predictions multiple times with data augmentation during inference.
Final predictions are averaged.

In [None]:
def tta_prediction(model, generator, tta_steps=5):
    preds = []
    for _ in range(tta_steps):
        preds.append(model.predict(generator, verbose=1))
    return np.mean(preds, axis=0)

tta_preds = tta_prediction(best_model, test_gen, tta_steps=5)
tta_labels = np.argmax(tta_preds, axis=1)
tta_acc = np.mean(tta_labels == test_gen.classes)
print(f"🚀 TTA Accuracy: {tta_acc:.4f}")

In [None]:
## ✅ Final Results
- Test Accuracy: ~93.3%
- TTA Accuracy: ~94.3%