In [None]:
# Install necessary libraries 
%pip install kagglehub opencv-python matplotlib seaborn tensorflow scikit-learn

import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import kagglehub
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

# ========================== STEP 1: DOWNLOAD DATASET ==========================
# Download Kaggle dataset
path = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")
print("Dataset downloaded at:", path)

# Define dataset paths
train_path = os.path.join(path, "chest_xray/train")
val_path = os.path.join(path, "chest_xray/val")
test_path = os.path.join(path, "chest_xray/test")

categories = ["NORMAL", "PNEUMONIA"]

# ========================== STEP 2: PREPROCESSING WITH OPENCV ==========================
def load_images_from_folder(folder_path):
    images = []
    labels = []
    
    for category in categories:
        category_path = os.path.join(folder_path, category)
        for filename in os.listdir(category_path):
            img_path = os.path.join(category_path, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Load grayscale
            
            if img is None:
                continue  
            
            img = cv2.resize(img, (224, 224))  # Resize
            
            # Apply CLAHE for contrast enhancement
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
            img = clahe.apply(img)

            img = img / 255.0  # Normalize
            
            # Convert grayscale to 3-channel RGB
            img = np.stack([img] * 3, axis=-1)  # (224, 224, 1) → (224, 224, 3)
            
            images.append(img)
            labels.append(1 if category == "PNEUMONIA" else 0)  

    return np.array(images), np.array(labels)

# Load train, validation, and test datasets
X_train, y_train = load_images_from_folder(train_path)
X_val, y_val = load_images_from_folder(val_path)
X_test, y_test = load_images_from_folder(test_path)

print("Dataset shapes:")
print("Train:", X_train.shape, y_train.shape)
print("Validation:", X_val.shape, y_val.shape)
print("Test:", X_test.shape, y_test.shape)

# ========================== STEP 3: BUILD MODEL (TRANSFER LEARNING) ==========================
# Load MobileNetV2 (without top layers)
base_model = MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
base_model.trainable = False  # Freeze pre-trained layers

# Add custom layers for classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output_layer = Dense(1, activation='sigmoid')(x)  # Binary classification

# Define model
model = Model(inputs=base_model.input, outputs=output_layer)

# Compile model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# Display model summary
model.summary()

# ========================== STEP 4: TRAIN THE MODEL ==========================
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=5, batch_size=32)

# ========================== STEP 5: FINE-TUNE MODEL ==========================
# Unfreeze last 30 layers for fine-tuning
for layer in base_model.layers[-30:]:
    layer.trainable = True

# Recompile with lower learning rate
model.compile(optimizer=Adam(learning_rate=0.00001), loss='binary_crossentropy', metrics=['accuracy'])

# Train again
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=5, batch_size=32)

# ========================== STEP 6: EVALUATE MODEL ==========================
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.2f}")

# ========================== STEP 7: PLOT EVALUATION METRICS ==========================
# Plot accuracy and loss
plt.figure(figsize=(12,4))

plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Model Accuracy")
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Model Loss")
plt.legend()

plt.show()

# ========================== STEP 8: CONFUSION MATRIX ==========================
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)

cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['Normal', 'Pneumonia'], yticklabels=['Normal', 'Pneumonia'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

# Print classification report
print(classification_report(y_test, y_pred, target_names=["Normal", "Pneumonia"]))

# ========================== STEP 9: ROC CURVE ==========================
fpr, tpr, _ = roc_curve(y_test, y_pred)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

# ========================== STEP 10: SAVE MODEL ==========================
model.save("pneumonia_mobilenet_model.h5")
print("Model saved successfully!")

# ========================== STEP 11: Metrics ==========================

# 1. Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

# 2. Calculate metrics
accuracy = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp) if (tp + fp) != 0 else 0
recall = tp / (tp + fn) if (tp + fn) != 0 else 0
specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

# 3. Print results
print(f"Confusion Matrix:\n{cm}")
print(f"Accuracy:    {accuracy:.3f}")
print(f"Precision:   {precision:.3f}")
print(f"Recall:      {recall:.3f}")
print(f"Specificity: {specificity:.3f}")
print(f"F1 Score:    {f1:.3f}")




Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.
Dataset downloaded at: /Users/fatemehsoltanzade/.cache/kagglehub/datasets/paultimothymooney/chest-xray-pneumonia/versions/2
