In [None]:
!pip install -q kagglehub tensorflow matplotlib seaborn scikit-learn


In [None]:
import os
import shutil
import random
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
import kagglehub

path1 = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")
path2 = kagglehub.dataset_download("umitka/chest-x-ray-balanced")
path3 = kagglehub.dataset_download("pcbreviglieri/pneumonia-xray-images")

print(path1)
print(path2)
print(path3)


Using Colab cache for faster access to the 'chest-xray-pneumonia' dataset.
Downloading from https://www.kaggle.com/api/v1/datasets/download/umitka/chest-x-ray-balanced?dataset_version_number=1...


100%|██████████| 6.90G/6.90G [01:15<00:00, 98.4MB/s]

Extracting files...





Downloading from https://www.kaggle.com/api/v1/datasets/download/pcbreviglieri/pneumonia-xray-images?dataset_version_number=1...


100%|██████████| 1.14G/1.14G [00:12<00:00, 99.1MB/s]

Extracting files...





/kaggle/input/chest-xray-pneumonia
/root/.cache/kagglehub/datasets/umitka/chest-x-ray-balanced/versions/1
/root/.cache/kagglehub/datasets/pcbreviglieri/pneumonia-xray-images/versions/1


In [None]:
def show_tree(path, level=2):
    for root, dirs, files in os.walk(path):
        depth = root.replace(path, "").count(os.sep)
        if depth <= level:
            print("│   " * depth + "├── " + os.path.basename(root))
        if depth > level:
            continue

print("Dataset 1 structure:")
show_tree(path1)

print("\nDataset 2 structure:")
show_tree(path2)

print("\nDataset 3 structure:")
show_tree(path3)


Dataset 1 structure:
├── chest-xray-pneumonia
│   ├── chest_xray
│   │   ├── chest_xray
│   │   ├── __MACOSX
│   │   ├── val
│   │   ├── test
│   │   ├── train

Dataset 2 structure:
├── 1
│   ├── chest_xray_balanced
│   │   ├── test
│   │   ├── train
│   │   ├── val

Dataset 3 structure:
├── 1
│   ├── test
│   │   ├── opacity
│   │   ├── normal
│   ├── train
│   │   ├── opacity
│   │   ├── normal
│   ├── val
│   │   ├── opacity
│   │   ├── normal


Merging All datasets


In [None]:
import os, shutil, random

BASE_DIR = "/content/pneumonia_dataset"
TRAIN_DIR = os.path.join(BASE_DIR, "train")
VAL_DIR   = os.path.join(BASE_DIR, "val")

for cls in ["NORMAL", "PNEUMONIA"]:
    os.makedirs(os.path.join(TRAIN_DIR, cls), exist_ok=True)
    os.makedirs(os.path.join(VAL_DIR, cls), exist_ok=True)


In [None]:
def copy_images_recursive(src_dir, target_class):
    count = 0
    for root, _, files in os.walk(src_dir):
        for file in files:
            if file.lower().endswith(("jpg", "jpeg", "png")):
                src = os.path.join(root, file)
                dst = os.path.join(TRAIN_DIR, target_class, file)

                # Skip if already copied
                if not os.path.exists(dst):
                    shutil.copy(src, dst)
                    count += 1
    return count


In [None]:
ds1 = "/kaggle/input/chest-xray-pneumonia/chest_xray/train"

n1 = copy_images_recursive(os.path.join(ds1, "NORMAL"), "NORMAL")
p1 = copy_images_recursive(os.path.join(ds1, "PNEUMONIA"), "PNEUMONIA")

print("Dataset 1 → NORMAL:", n1, "PNEUMONIA:", p1)


Dataset 1 → NORMAL: 1341 PNEUMONIA: 3875


In [None]:
ds2 = "/root/.cache/kagglehub/datasets/umitka/chest-x-ray-balanced/versions/1/chest_xray_balanced/train"

n2 = copy_images_recursive(os.path.join(ds2, "NORMAL"), "NORMAL")
p2 = copy_images_recursive(os.path.join(ds2, "PNEUMONIA"), "PNEUMONIA")

print("Dataset 2 → NORMAL:", n2, "PNEUMONIA:", p2)


Dataset 2 → NORMAL: 3459 PNEUMONIA: 922


In [None]:
TRAIN_DIR = "/content/pneumonia_dataset/train"
VAL_DIR   = "/content/pneumonia_dataset/val"

IMG_SIZE = 380
BATCH_SIZE = 16
EPOCHS_1 = 10
EPOCHS_2 = 15


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import preprocess_input

train_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.05,
    height_shift_range=0.05,
    horizontal_flip=True
)

val_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)


In [None]:
import shutil
from sklearn.model_selection import train_test_split

TRAIN_DIR = "/content/pneumonia_dataset/train"
VAL_DIR   = "/content/pneumonia_dataset/val"

def create_val_split(class_name, val_ratio=0.2):
    src = os.path.join(TRAIN_DIR, class_name)
    dst = os.path.join(VAL_DIR, class_name)

    os.makedirs(dst, exist_ok=True)

    files = os.listdir(src)

    train_files, val_files = train_test_split(
        files,
        test_size=val_ratio,
        random_state=42
    )

    for f in val_files:
        shutil.move(
            os.path.join(src, f),
            os.path.join(dst, f)
        )

create_val_split("NORMAL")
create_val_split("PNEUMONIA")


In [None]:
train_data = train_gen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="binary",
    shuffle=True
)

val_data = val_gen.flow_from_directory(
    VAL_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="binary",
    shuffle=False
)

print("Class indices:", train_data.class_indices)


Found 7677 images belonging to 2 classes.
Found 1920 images belonging to 2 classes.
Class indices: {'NORMAL': 0, 'PNEUMONIA': 1}


In [None]:
base_model = EfficientNetB4(
    weights="imagenet",
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3)
)

base_model.trainable = False


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5
[1m71686520/71686520[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
output = Dense(1, activation="sigmoid")(x)

model = Model(base_model.input, output)


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss="binary_crossentropy",
    metrics=[
        "accuracy",
        tf.keras.metrics.AUC(name="auc"),
        tf.keras.metrics.Recall(name="recall")
    ]
)

model.summary()


In [None]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=4,
        restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.3,
        patience=2,
        min_lr=1e-6
    )
]


In [None]:
history_1 = model.fit(
    train_data,
    validation_data=val_data,
    epochs=EPOCHS_1,
    callbacks=callbacks
)


  self._warn_if_super_not_called()


Epoch 1/10
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m563s[0m 1s/step - accuracy: 0.6996 - auc: 0.7652 - loss: 0.6376 - recall: 0.7006 - val_accuracy: 0.8911 - val_auc: 0.9623 - val_loss: 0.2861 - val_recall: 0.9167 - learning_rate: 1.0000e-04
Epoch 2/10
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m435s[0m 906ms/step - accuracy: 0.8433 - auc: 0.9272 - loss: 0.3450 - recall: 0.8338 - val_accuracy: 0.9224 - val_auc: 0.9720 - val_loss: 0.2206 - val_recall: 0.9177 - learning_rate: 1.0000e-04
Epoch 3/10
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m434s[0m 905ms/step - accuracy: 0.8665 - auc: 0.9402 - loss: 0.3195 - recall: 0.8637 - val_accuracy: 0.9323 - val_auc: 0.9788 - val_loss: 0.1930 - val_recall: 0.9271 - learning_rate: 1.0000e-04
Epoch 4/10
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m444s[0m 926ms/step - accuracy: 0.8831 - auc: 0.9514 - loss: 0.2840 - recall: 0.8720 - val_accuracy: 0.9349 - val_auc: 0.9793 - val_loss: 0.1

In [None]:
base_model.trainable = True

for layer in base_model.layers[:-40]:
    layer.trainable = False


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss="binary_crossentropy",
    metrics=[
        "accuracy",
        tf.keras.metrics.AUC(name="auc"),
        tf.keras.metrics.Recall(name="recall")
    ]
)


In [None]:
history_2 = model.fit(
    train_data,
    validation_data=val_data,
    epochs=EPOCHS_2,
    callbacks=callbacks
)


Epoch 1/15
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m545s[0m 1s/step - accuracy: 0.8882 - auc: 0.9578 - loss: 0.2638 - recall: 0.8741 - val_accuracy: 0.9443 - val_auc: 0.9823 - val_loss: 0.1643 - val_recall: 0.9187 - learning_rate: 1.0000e-05
Epoch 2/15
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m442s[0m 920ms/step - accuracy: 0.9108 - auc: 0.9677 - loss: 0.2318 - recall: 0.9040 - val_accuracy: 0.9521 - val_auc: 0.9872 - val_loss: 0.1405 - val_recall: 0.9281 - learning_rate: 1.0000e-05
Epoch 3/15
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m437s[0m 911ms/step - accuracy: 0.9329 - auc: 0.9805 - loss: 0.1785 - recall: 0.9256 - val_accuracy: 0.9542 - val_auc: 0.9912 - val_loss: 0.1216 - val_recall: 0.9271 - learning_rate: 1.0000e-05
Epoch 4/15
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m451s[0m 940ms/step - accuracy: 0.9338 - auc: 0.9821 - loss: 0.1703 - recall: 0.9327 - val_accuracy: 0.9599 - val_auc: 0.9909 - val_loss: 0.1

In [None]:
val_preds = model.predict(val_data)
val_preds = (val_preds > 0.5).astype(int)

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(val_data.classes, val_preds))
print(classification_report(val_data.classes, val_preds))


[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 308ms/step
[[944  16]
 [ 51 909]]
              precision    recall  f1-score   support

           0       0.95      0.98      0.97       960
           1       0.98      0.95      0.96       960

    accuracy                           0.97      1920
   macro avg       0.97      0.97      0.97      1920
weighted avg       0.97      0.97      0.97      1920



In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from google.colab import drive
drive.mount("/content/drive")

model.save(
    "/content/drive/MyDrive/Pneumonia_Detection/pneumonia_efficientnet_b4_FINAL.keras"
)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Loading Model from drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os

MODEL_PATH = "/content/drive/MyDrive/Pneumonia_Detection/pneumonia_efficientnet_b4_FINAL.keras"

print("Model file exists:", os.path.exists(MODEL_PATH))


Model file exists: True


In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model

model = load_model(MODEL_PATH)
print("Model loaded successfully on CPU")


Model loaded successfully on CPU


In [None]:
from google.colab import files

uploaded = files.upload()  # select ONE chest X-ray image
image_path = list(uploaded.keys())[0]
print("Testing image:", image_path)


Saving NORMAL2-IM-1332-0001.jpeg to NORMAL2-IM-1332-0001.jpeg
Testing image: NORMAL2-IM-1332-0001.jpeg


Image preprocessing to 380X380



In [None]:
from tensorflow.keras.applications.efficientnet import preprocess_input
from PIL import Image
import numpy as np

IMG_SIZE = 380

img = Image.open(image_path).convert("RGB")
img = img.resize((IMG_SIZE, IMG_SIZE))

img_array = np.array(img)
img_array = preprocess_input(img_array)   # IMPORTANT
img_array = np.expand_dims(img_array, axis=0)


In [None]:
pred = model.predict(img_array)[0][0]
print("Raw prediction:", pred)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
Raw prediction: 0.00021644848


In [None]:
if pred < 0.4:
    print("🟩 NORMAL (High confidence)")
    print(f"Confidence: {(1-pred)*100:.2f}%")

elif pred < 0.6:
    print("🟨 UNCERTAIN – Needs clinical review")
    print(f"Score: {pred*100:.2f}%")

else:
    print("🟥 PNEUMONIA (High confidence)")
    print(f"Confidence: {pred*100:.2f}%")


🟩 NORMAL (High confidence)
Confidence: 99.98%
