In [51]:
import cv2
import glob
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    roc_curve,
    classification_report,
    confusion_matrix,
    accuracy_score,
)
import tensorflow as tf
from tensorflow import keras
from keras import layers, models
from keras.applications import ResNet50
from keras.applications.resnet import preprocess_input
import pandas as pd

In [52]:
# 1. Load & preprocess images
def load_and_preprocess(image_paths, label, img_size=(224, 224)):
    X, y = [], []
    for p in image_paths:
        img = cv2.imread(p)
        if img is None:
            continue
        img = cv2.resize(img, img_size)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = preprocess_input(img.astype("float32"))  # use ResNet preprocessor
        X.append(img)
        y.append(label)
    return np.array(X), np.array(y)

In [None]:
# 2. Load & split training data
norm_paths = glob.glob("./chest_xray/train/NORMAL/*.jpeg")
pneu_paths = glob.glob("./chest_xray/train/PNEUMONIA/*.jpeg")

X_norm, y_norm = load_and_preprocess(norm_paths, label=0)
X_pneu, y_pneu = load_and_preprocess(pneu_paths, label=1)

# Undersample PNEUMONIA to 2500
X_pneu_under = X_pneu[:2500]
y_pneu_under = y_pneu[:2500]

# Train/validation split
X_train_n, X_val_n, y_train_n, y_val_n = train_test_split(
    X_norm, y_norm, test_size=0.2, random_state=42
)

X_train_p, X_val_p, y_train_p, y_val_p = train_test_split(
    X_pneu_under, y_pneu_under, test_size=0.2, random_state=42
)

# Combine validation sets
X_val = np.concatenate([X_val_n, X_val_p], axis=0)
y_val = np.concatenate([y_val_n, y_val_p], axis=0)

In [54]:
# 3. Generators (balanced)
batch_size = 32
half_bs = batch_size // 2

aug = keras.preprocessing.image.ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode="nearest",
)

gen_norm = aug.flow(X_train_n, y_train_n, batch_size=half_bs, shuffle=True)
gen_pneu = aug.flow(X_train_p, y_train_p, batch_size=half_bs, shuffle=True)


def balanced_generator(gen0, gen1):
    while True:
        X0, y0 = next(gen0)
        X1, y1 = next(gen1)
        Xb = np.vstack([X0, X1])
        yb = np.concatenate([y0, y1])
        idx = np.random.permutation(len(yb))
        yield Xb[idx], yb[idx]


train_gen = balanced_generator(gen_norm, gen_pneu)

In [55]:
# 4. Build ResNet50 model
input_shape = (224, 224, 3)
base_model = ResNet50(
    include_top=False, weights="imagenet", input_shape=input_shape, pooling="avg"
)
base_model.trainable = False  # freeze for faster training

model = models.Sequential(
    [
        base_model,
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(1, activation="sigmoid"),
    ]
)

model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy", tf.keras.metrics.AUC(name="auc")],
)

In [None]:
# 5. Train
steps_per_epoch = (len(X_train_n) + len(X_train_p)) // batch_size
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint("resnet_model_3.h5", save_best_only=True),
]

history = model.fit(
    train_gen,
    steps_per_epoch=steps_per_epoch,
    epochs=20,
    validation_data=(X_val, y_val),
    callbacks=callbacks,
)

Epoch 1/20
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8590 - auc: 0.9282 - loss: 0.3518



[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 2s/step - accuracy: 0.8596 - auc: 0.9286 - loss: 0.3506 - val_accuracy: 0.9441 - val_auc: 0.9908 - val_loss: 0.1621
Epoch 2/20
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 2s/step - accuracy: 0.9360 - auc: 0.9774 - loss: 0.1826 - val_accuracy: 0.9337 - val_auc: 0.9932 - val_loss: 0.1929
Epoch 3/20
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 2s/step - accuracy: 0.9525 - auc: 0.9896 - loss: 0.1221 - val_accuracy: 0.9376 - val_auc: 0.9920 - val_loss: 0.1937
Epoch 4/20
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 2s/step - accuracy: 0.9486 - auc: 0.9863 - loss: 0.1432 - val_accuracy: 0.9051 - val_auc: 0.9952 - val_loss: 0.2453


In [None]:
val_probs = model.predict(X_val).ravel()

# 6. Threshold tuning


fpr, tpr, thresholds = roc_curve(y_val, val_probs)


youden_j = tpr - fpr


best_idx = np.argmax(youden_j)


best_thresh = thresholds[best_idx]


print(f"Optimal sigmoid threshold = {best_thresh:.3f}")

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 2s/step
Optimal sigmoid threshold = 0.523


In [58]:
# 7. Test data
test_n = glob.glob("./chest_xray/test/NORMAL/*.jpeg")
test_p = glob.glob("./chest_xray/test/PNEUMONIA/*.jpeg")
X_test_n, y_test_n = load_and_preprocess(test_n, label=0)
X_test_p, y_test_p = load_and_preprocess(test_p, label=1)

X_test = np.concatenate([X_test_n, X_test_p], axis=0)
y_test = np.concatenate([y_test_n, y_test_p], axis=0)

In [59]:
# 8. Evaluate using tuned threshold
test_probs = model.predict(X_test).ravel()

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 2s/step


In [60]:
y_pred = (test_probs >= best_thresh).astype(int)  # best_thresh

In [61]:
loss, acc, auc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}, Acc: {acc:.4f}, AUC: {auc:.4f}")

Test Loss: 0.3065, Acc: 0.8766, AUC: 0.9434


In [62]:
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=["Normal", "Pneumonia"]))

Classification Report:
              precision    recall  f1-score   support

      Normal       0.86      0.81      0.83       234
   Pneumonia       0.89      0.92      0.91       390

    accuracy                           0.88       624
   macro avg       0.88      0.87      0.87       624
weighted avg       0.88      0.88      0.88       624



In [None]:
cm = confusion_matrix(y_test, y_pred, labels=[0, 1])

# Display with readable labels
cm_df = pd.DataFrame(
    cm,
    index=["Normal (true)", "Pneumonia (true)"],
    columns=["Normal (pred)", "Pneumonia (pred)"],
)

print("Confusion Matrix:")
print(cm_df)

Confusion Matrix:
                  Normal (pred)  Pneumonia (pred)
Normal (true)               189                45
Pneumonia (true)             30               360


In [64]:
print("Accuracy Score:")
print(accuracy_score(y_test, y_pred))

Accuracy Score:
0.8798076923076923
