EfficientNetB0 implementation with:

Train-validation-test split

Metrics plots: accuracy, precision, recall, loss

Confusion matrix visualization

Saving the model for future use (e.g., XAI like Grad-CAM)

Model architecture display

Line-by-line comments for clarity

Make sure your dataset only includes two classes: COVID and NORMAL (as your code assumes binary classification).

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.utils import shuffle
import cv2

# Set dataset path
path = "A:\\GVSU Semester 5\\PSM Internship\\Covid-19\\COVID-19_Radiography_Dataset"

# Build a dataframe mapping class labels to image paths
class_labels = []
for item in os.listdir(path):
    class_path = os.path.join(path, item)
    all_images = os.listdir(class_path)
    for img in all_images:
        class_labels.append((item, os.path.join(class_path, img)))

# Create a DataFrame from the collected paths and labels
df = pd.DataFrame(class_labels, columns=['Labels', 'image'])

# Display sample entries and class distribution
print(df.head())
print(df.tail())
print("Total number of images in the dataset: ", len(df))
print(df['Labels'].value_counts())

# Set image size and initialize containers
im_size = 224
images = []
labels = []

# Limit images per class for balance
max_images_per_class = 3616

# Load and preprocess images
for label in df['Labels'].unique():
    label_df = df[df['Labels'] == label][:max_images_per_class]
    for img_path in label_df['image']:
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, (im_size, im_size))
            images.append(img)
            labels.append(label)

# Normalize image pixels to range [0, 1]
images = np.array(images).astype('float32') / 255.0

# Encode labels to one-hot
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels).reshape(-1, 1)

ct = ColumnTransformer([("onehot", OneHotEncoder(), [0])], remainder='passthrough')
Y = ct.fit_transform(encoded_labels)

# Shuffle data
images, Y = shuffle(images, Y, random_state=1)

# Split into train, validation, and test sets (80%, 10%, 10%)
train_x, temp_x, train_y, temp_y = train_test_split(images, Y, test_size=0.2, random_state=42)
val_x, test_x, val_y, test_y = train_test_split(temp_x, temp_y, test_size=0.5, random_state=42)

# Print dataset shapes
print(f"Train: {train_x.shape}, {train_y.shape}")
print(f"Validation: {val_x.shape}, {val_y.shape}")
print(f"Test: {test_x.shape}, {test_y.shape}")

# Model definition using EfficientNetB0
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB0

IMG_SIZE = 224
NUM_CLASSES = 2

inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
# Using EfficientNetB0 without pre-trained weights
outputs = EfficientNetB0(include_top=True, weights=None, classes=NUM_CLASSES)(inputs)
model = tf.keras.Model(inputs, outputs)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
model.summary()  # Display model architecture

# Create training and validation datasets using tf.data
BATCH_SIZE = 32
train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).shuffle(1024).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_dataset = tf.data.Dataset.from_tensor_slices((val_x, val_y)).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Train the model
history = model.fit(train_dataset, validation_data=val_dataset, epochs=6, verbose=2)

# Save the model (for reuse, XAI, or deployment)
model.save("efficientnet_covid_classifier.h5")  # Saves model + weights

# Plot training metrics
def plot_training_history(history):
    plt.figure(figsize=(12, 6))

    # Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

    # Additional metrics: precision and recall
    plt.figure(figsize=(12, 4))
    plt.plot(history.history['precision'], label='Train Precision')
    plt.plot(history.history['recall'], label='Train Recall')
    plt.title("Train Precision & Recall")
    plt.xlabel('Epoch')
    plt.ylabel('Value')
    plt.legend()
    plt.show()

plot_training_history(history)

# Evaluate on test set
test_loss, test_acc, test_prec, test_rec = model.evaluate(test_x, test_y, verbose=0)
print(f"\nTest Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test Precision: {test_prec:.4f}")
print(f"Test Recall: {test_rec:.4f}")

# Predict and generate confusion matrix
y_pred = model.predict(test_x)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(test_y, axis=1)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred_classes, target_names=label_encoder.classes_))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_encoder.classes_)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()


In [None]:
import tensorflow.keras.backend as K
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Grad-CAM function
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(predictions[0])
        class_output = predictions[:, pred_index]

    # Get gradients of top predicted class w.r.t. last conv layer output
    grads = tape.gradient(class_output, conv_outputs)

    # Mean intensity of the gradients over channels
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # Multiply gradients with feature maps (importance weighting)
    conv_outputs = conv_outputs[0]
    heatmap = conv_outputs @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # Normalize to [0, 1]
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# Helper function to superimpose heatmap
def superimpose_heatmap(heatmap, image, alpha=0.4):
    heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]))
    heatmap_colored = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
    superimposed_img = heatmap_colored * alpha + image
    return np.uint8(superimposed_img)

# Select a test image and reshape for prediction
idx = 0  # Change to visualize other test samples
input_img = test_x[idx]
img_array = np.expand_dims(input_img, axis=0)

# Generate heatmap
last_conv_layer = "top_conv"  # Last conv layer in EfficientNetB0
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer)

# Superimpose and display
input_img_rgb = (input_img * 255).astype(np.uint8)  # convert back to [0,255]
superimposed_img = superimpose_heatmap(heatmap, input_img_rgb)

# Plot original, heatmap, and superimposed image
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.title("Original")
plt.imshow(input_img_rgb)
plt.axis('off')

plt.subplot(1, 3, 2)
plt.title("Grad-CAM Heatmap")
plt.imshow(heatmap, cmap='jet')
plt.axis('off')

plt.subplot(1, 3, 3)
plt.title("Superimposed")
plt.imshow(superimposed_img)
plt.axis('off')
plt.tight_layout()
plt.show()
