<a href="https://colab.research.google.com/github/NoeDebrois/Artificial-NN-and-Deep-Learning/blob/main/Object_Localisation_and_Class_Activation_Maps.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Artificial Neural Networks and Deep Learning

---

## Lecture 5a: Object Localisation and Class Activation Maps

<img src="https://drive.google.com/uc?export=view&id=14qXmXmQHVwDxXJ3DiVhNmMOcnpA6QMiq" width="500"/>

### 🌐 Connect Colab to Google Drive

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive/My Drive/[2024-2025] AN2DL/Lecture 5

### ⚙️ Import Libraries

In [None]:
# Set seed for reproducibility
seed = 42

# Import necessary libraries
import os

# Set environment variables before importing modules
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

# Suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# Import necessary modules
import logging
import random
import numpy as np

# Set seeds for random number generators in NumPy and Python
np.random.seed(seed)
random.seed(seed)

# Import TensorFlow and Keras
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl

# Set seed for TensorFlow
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

# Reduce TensorFlow verbosity
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# Print TensorFlow version
print(tf.__version__)

# Import additional libraries
import cv2
import csv
import scipy
from PIL import Image
from xml.dom import minidom
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor
import seaborn as sns
from tensorflow.keras.applications.mobilenet import preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
)

# Configure plot display settings
sns.set(font_scale=1.4)
sns.set_style('white')
plt.rc('font', size=14)
%matplotlib inline

### ⏳ Load and Process Data

In [None]:
# Set environment variables for training dataset
os.environ["TRAINING_DATASET_NAME"] = "cats_dogs_images_train.zip"
os.environ["TRAINING_DATASET_URL"] = "1_fGNrYZxs0yzIJQfUmUWHrWnRisVEYaY"

# Check if training dataset exists, download if not
if not os.path.exists(os.environ["TRAINING_DATASET_NAME"]):
    print("Training data downloading...")
    ! gdown -q ${TRAINING_DATASET_URL}
    ! unzip ${TRAINING_DATASET_NAME}
    print("Training data downloaded!")
else:
    print("Training data already downloaded, using cached data.")

# Set environment variables for bounding boxes of training dataset
os.environ["TRAINING_DATASET_BOUNDING_BOXES_NAME"] = "cats_dogs_images_boxes.csv"
os.environ["TRAINING_DATASET_BOUNDING_BOXES_URL"] = "1visBcJA_F9oUOAOTNq6R-MTzkFBXa2LY"

# Check if bounding boxes file exists, download if not
if not os.path.exists(os.environ["TRAINING_DATASET_BOUNDING_BOXES_NAME"]):
    print("Training data bounding boxes downloading...")
    ! gdown -q ${TRAINING_DATASET_BOUNDING_BOXES_URL}
    print("Training data bounding boxes downloaded!")
else:
    print("Training data bounding boxes already downloaded, using cached data.")

# Set environment variables for test dataset
os.environ["TEST_DATASET_NAME"] = "cats_dogs_images_test.zip"
os.environ["TEST_DATASET_URL"] = "1RFJwHLkLdj3RVq-xkYtP_8uLkj5K-obn"

# Check if test dataset exists, download if not
if not os.path.exists(os.environ["TEST_DATASET_NAME"]):
    print("Test data downloading...")
    ! gdown -q ${TEST_DATASET_URL}
    ! unzip ${TEST_DATASET_NAME}
    print("Test data downloaded!")
else:
    print("Test data already downloaded, using cached data.")

In [None]:
def load_images_from_folder(folder, img_dim):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename))

        # Make the image dataset squared
        dim = min(img.shape[:-1])
        img = img[(img.shape[0]-dim)//2:(img.shape[0]+dim)//2,(img.shape[1]-dim)//2:(img.shape[1]+dim)//2,:]

        # Resize all images to a fix size
        img = cv2.resize(img, (img_dim, img_dim))

        # Convert the image from BGR to RGB as NasNetMobile was trained on RGB images
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if img is not None:
            images.append(img)

    return np.array(images)

In [None]:
def preprocess_dataset(image_dir='cats_dogs_images', image_size=(256, 256)):
    """
    Preprocesses a dataset and returns the labels, boxes, and images as a tuple.

    Args:
        image_dir (str): The directory containing the images in the dataset.
        image_size (tuple): The size of the images in the dataset.

    Returns:
        tuple: A tuple containing the labels, boxes, and images.
    """
    # Read the CSV file with the dataset
    df = pd.read_csv('cats_dogs_images_boxes.csv')

    # Define a function to process each row
    def process_row(row):
        img_path = row[0]
        label = int(row[3])
        bbox_coords = list(map(float, row[4:8]))

        # Read the image using OpenCV
        img = cv2.imread(os.path.join(image_dir, img_path))
        if img is None:
            return None  # Skip if image is not found

        # Get original image dimensions
        orig_height, orig_width = img.shape[:2]

        # Scale the bounding box coordinates using original dimensions
        x1, y1, x2, y2 = bbox_coords
        x1 /= orig_width
        y1 /= orig_height
        x2 /= orig_width
        y2 /= orig_height
        bbox = [x1, y1, x2, y2]

        # Resize the image
        img_resized = cv2.resize(img, image_size)

        # Convert BGR to RGB
        img_rgb = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)

        return label, bbox, img_rgb

    # Use ThreadPoolExecutor to process images in parallel
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(process_row, df.itertuples(index=False)))

    # Filter out any None results
    results = [r for r in results if r is not None]

    # Unzip the results
    labels, boxes, img_list = zip(*results)

    # Convert to lists
    return list(labels), list(boxes), list(img_list)

In [None]:
# Get augmented images and corresponding bounding boxes and labels
labels, boxes, img_list = preprocess_dataset()

# Shuffle the data by zipping the lists and shuffling the combined list
combined_list = list(zip(img_list, boxes, labels))
random.shuffle(combined_list)

# Unpack the shuffled lists
img_list, boxes, labels = zip(*combined_list)

# Convert the lists to numpy arrays
img_list, boxes, labels = np.array(img_list), np.array(boxes), np.array(labels)

num_to_labels = {0: 'cat', 1: 'dog'}

# Print the shape of each list
print(f"Image list shape: {img_list.shape}")
print(f"Bounding boxes shape: {boxes.shape}")
print(f"Labels shape: {labels.shape}")

In [None]:
# Set the image size for resizing
img_size = 256

# Create a Matplotlib figure with a fixed size
fig, ax = plt.subplots(figsize=(20, 20), facecolor='white')

# Generate a random sample of indices from the image list
random_indices = random.sample(range(len(img_list)), 20)

# Iterate over the random indices and plot the corresponding images
for i, index in enumerate(random_indices, 1):

    # Extract the bounding box coordinates for the current image
    x1, y1, x2, y2 = boxes[index]

    # Rescale the bounding box coordinates to match the image size
    x1, y1, x2, y2 = x1 * img_size, y1 * img_size, x2 * img_size, y2 * img_size

    # Retrieve the current image and clip its values to the range 0-255
    image = img_list[index]
    image = np.clip(image, 0, 255)

    # Draw a green bounding box on the image
    cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 3)

    # Plot the image on a subplot and turn off the axis labels
    ax = plt.subplot(4, 5, i)
    ax.imshow(image)
    ax.axis("off")

# Adjust the spacing between the subplots to improve readability
plt.subplots_adjust(hspace=0.2, wspace=0.2)

# Display the figure
plt.show()

In [None]:
# Perform the train-test split on images, labels, and bounding boxes
train_images, val_images, train_labels, val_labels, train_boxes, val_boxes = train_test_split(
    img_list, labels, boxes, test_size=0.2, random_state=seed, stratify=labels
)

# Output the number of elements in the training and validation sets for verification
print('Number of training images: ', train_images.shape[0])
print('Number of training labels: ', train_labels.shape[0])
print('Number of training boxes: ', train_boxes.shape[0])
print('Number of validation images: ', val_images.shape[0])
print('Number of validation labels: ', val_labels.shape[0])
print('Number of validation boxes: ', val_boxes.shape[0])

# Convert labels to one-hot encoded format for use in training
train_labels = tfk.utils.to_categorical(train_labels, num_classes=2)
val_labels = tfk.utils.to_categorical(val_labels, num_classes=2)

<img src="https://drive.google.com/uc?export=view&id=15T4O0D_r2AF3M1FzHaqf1z2y5NXV43t2" width="900"/>


### 🛠️ Models and Experiments

In [None]:
def spearman_rho(box_predictions, val_boxes):

    # Reshape predictions and validation boxes into 1-D tensors
    box_predictions = tf.reshape(box_predictions, [-1])
    val_boxes = tf.reshape(val_boxes, [-1])

    # Function to compute ranks of elements
    def rank(x):
        # Sort elements and obtain their indices
        sorted_indices = tf.argsort(x, direction='ASCENDING')

        # Assign ranks based on the sorted indices
        ranks = tf.argsort(sorted_indices, direction='ASCENDING') + 1

        return tf.cast(ranks, tf.float32)

    # Compute ranks for the predicted and actual boxes
    rank_pred = rank(box_predictions)
    rank_val = rank(val_boxes)

    # Calculate the mean of the ranks
    mean_rank_pred = tf.reduce_mean(rank_pred)
    mean_rank_val = tf.reduce_mean(rank_val)

    # Calculate differences from the mean ranks
    diff_pred = rank_pred - mean_rank_pred
    diff_val = rank_val - mean_rank_val

    # Compute covariance of the rank differences
    cov = tf.reduce_mean(diff_pred * diff_val)

    # Compute standard deviations of the rank differences
    std_pred = tf.sqrt(tf.reduce_mean(tf.square(diff_pred)))
    std_val = tf.sqrt(tf.reduce_mean(tf.square(diff_val)))

    # Compute Spearman's rank correlation coefficient with epsilon to avoid division by zero
    spearman_rho = cov / (std_pred * std_val + 1e-8)

    return spearman_rho

#### 🧠 Multitask Learning

In [None]:
# Load a pre-trained MobileNetV2 model without the top layer,
# using 'imagenet' weights, and configure it for transfer learning
mobile = tfk.applications.MobileNetV2(
    input_shape = train_images.shape[1:],
    include_top = False,
    weights = 'imagenet',
    pooling = 'avg',
)

# Plot the architecture of the MobileNetV2 model, displaying trainable parameters and the shapes of each layer
tfk.utils.plot_model(mobile, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

In [None]:
# Freeze MobileNet layers
mobile.trainable = False

# Define inputs and add MobileNet as a feature extractor
inputs = tfk.Input(shape=train_images.shape[1:])
x = mobile(inputs)

# Add dropout layer and classifier head for object classification
x = tfkl.Dropout(0.5, seed=seed)(x)
class_outputs = tfkl.Dense(2, activation='softmax', name='classifier')(x)

# Add localisation head for bounding box prediction
box_outputs = tfkl.Dense(4, activation='linear', name='localizer')(x)

# Create the model connecting inputs to classification and localisation outputs
object_localization_model = tfk.Model(inputs=inputs, outputs=[class_outputs, box_outputs], name='object_localization_model')

# Compile the model with categorical crossentropy and mean squared error losses, using Adam optimiser
object_localization_model.compile(loss=[tfk.losses.CategoricalCrossentropy(), tfk.losses.MeanSquaredError()], optimizer=tfk.optimizers.Adam())

# Display model summary and plot the model architecture
object_localization_model.summary()
tfk.utils.plot_model(object_localization_model, show_trainable=True, show_shapes=True, dpi=70)

In [None]:
# Model training with input images and associated labels and bounding boxes
object_localization_history = object_localization_model.fit(
    x = preprocess_input(train_images),
    y = [train_labels, train_boxes],
    batch_size = 64,
    epochs = 200,

    # Validation data provided for performance monitoring
    validation_data = (preprocess_input(val_images), [val_labels, val_boxes]),

    # Callbacks for early stopping and learning rate reduction based on validation loss
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=15, min_delta=1e-5)
    ]
).history

In [None]:
# Save the best model
object_localization_model.save('MultitaskCatDogLocalizer.keras')
del object_localization_model

In [None]:
# Re-load the model after transfer learning
object_localization_model = tfk.models.load_model('MultitaskCatDogLocalizer.keras')
object_localization_model.summary()

In [None]:
# Generate predictions on the validation images
val_predictions = object_localization_model.predict(preprocess_input(val_images), verbose=0)
classification_predictions = np.argmax(val_predictions[0], axis=1)
box_predictions = val_predictions[1]

# Retrieve true labels from the validation set
val_gt = np.argmax(val_labels, axis=1)

# Compute and display confusion matrix
cm = confusion_matrix(val_gt, classification_predictions)
plt.figure(figsize=(5, 4))
sns.heatmap(cm.T, fmt='d', xticklabels=list(num_to_labels.values()),
            yticklabels=list(num_to_labels.values()), cmap='Blues', annot=True)
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.title('Confusion Matrix')
plt.show()

# Compute and display accuracy on the validation set
val_accuracy = accuracy_score(val_gt, classification_predictions)
print(f'Accuracy score on validation set: {round(val_accuracy*100, 2)}')

# Compute and display precision on the validation set
val_precision = precision_score(val_gt, classification_predictions, average='macro')
print(f'Precision score on validation set: {round(val_precision*100, 2)}')

# Compute and display recall on the validation set
val_recall = recall_score(val_gt, classification_predictions, average='macro')
print(f'Recall score on validation set: {round(val_recall*100, 2)}')

# Compute and display F1 score on the validation set
val_f1 = f1_score(val_gt, classification_predictions, average='macro')
print(f'F1 score on validation set: {round(val_f1*100, 2)}')

# Compute and display Spearman's Rho correlation for bounding box predictions
spearman = spearman_rho(val_boxes, box_predictions)
print(f'Spearman\'s Rho Correlation on validation set: {round(float(spearman),4)}')

#### 🕹️ Use the Model - Make Inference

In [None]:
# Load test images from the specified folder path with a target size of 256
test_path = 'localization_test/'
X_test = load_images_from_folder(test_path, 256)

In [None]:
# Display a sample of 10 images from the test data
num_img = 10
fig, axes = plt.subplots(2, num_img // 2, figsize=(20, 9))
for i in range(num_img):
    ax = axes[i % 2, i % (num_img // 2)]
    ax.imshow(np.clip(X_test[i], 0, 255))
    ax.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# Generate predictions on the test images
test_predictions = object_localization_model.predict(preprocess_input(X_test), verbose=0)

In [None]:
# Display a sample of 10 test images with predicted bounding boxes and class labels
num_img = 10
fig, axes = plt.subplots(2, num_img // 2, figsize=(20, 9))

for i in range(num_img):
    row = i // (num_img // 2)
    col = i % (num_img // 2)
    ax = axes[row, col]

    # Extract bounding box predictions
    a1, b1, a2, b2 = test_predictions[1][i]
    img_size = 256

    # Rescale bounding box values to match image size
    x1 = a1 * img_size
    y1 = b1 * img_size
    x2 = a2 * img_size
    y2 = b2 * img_size

    img = X_test[i].copy()

    # Draw bounding boxes on the image
    cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 3)

    # Get predicted class probabilities and label
    probabilities = test_predictions[0][i]
    predicted_class = np.argmax(probabilities)
    confidence = round(probabilities[predicted_class] * 100, 1)
    label = 'dog' if predicted_class == 1 else 'cat'

    # Display image with bounding box and label
    ax.imshow(np.clip(img, 0, 255).astype(np.uint8))
    ax.axis('off')
    ax.set_title(f'{label}: {confidence}%')

plt.tight_layout()
plt.show()

#### 🧠 Double Neural Network

**Image Classifier**

In [None]:
mobile = tfk.applications.MobileNetV2(
    input_shape = train_images.shape[1:],
    include_top = False,
    weights = 'imagenet',
    pooling = 'avg',
)

In [None]:
# Use MobileNet as feature extractor
mobile.trainable = False

# Add the classifier to MobileNet
inputs = tfk.Input(shape=train_images.shape[1:])
x = mobile(inputs)
x = tfkl.Dropout(0.5, seed=seed)(x)
outputs = tfkl.Dense(2, activation='softmax', name='classifier')(x)

# Connect input and output through the Model class
classifier_model = tfk.Model(inputs=inputs, outputs=outputs, name='classifier_model')

# Compile the model
classifier_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics=['accuracy'])
classifier_model.summary()

In [None]:
# Train the model
classifier_history = classifier_model.fit(
    x = preprocess_input(train_images),
    y = train_labels,
    batch_size = 64,
    epochs = 200,
    validation_data = (preprocess_input(val_images), val_labels),
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=15, min_delta=1e-5)
    ]
).history

In [None]:
# Save the best model
classifier_model.save('CatDogClassifier.keras')
del classifier_model

In [None]:
# Re-load the model after transfer learning
classifier_model = tfk.models.load_model('CatDogClassifier.keras')
classifier_model.summary()

In [None]:
# Generate predictions on the validation images
val_predictions = classifier_model.predict(preprocess_input(val_images), verbose=0)
classification_predictions = np.argmax(val_predictions, axis=1)

# Retrieve true labels from the validation set
val_gt = np.argmax(val_labels, axis=1)

# Compute and display confusion matrix
cm = confusion_matrix(val_gt, classification_predictions)
plt.figure(figsize=(5, 4))
sns.heatmap(cm.T, fmt='d', xticklabels=list(num_to_labels.values()),
            yticklabels=list(num_to_labels.values()), cmap='Blues', annot=True)
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.title('Confusion Matrix')
plt.show()

# Compute and display accuracy on the validation set
val_accuracy = accuracy_score(val_gt, classification_predictions)
print(f'Accuracy score on validation set: {round(val_accuracy*100, 2)}')

# Compute and display precision on the validation set
val_precision = precision_score(val_gt, classification_predictions, average='macro')
print(f'Precision score on validation set: {round(val_precision*100, 2)}')

# Compute and display recall on the validation set
val_recall = recall_score(val_gt, classification_predictions, average='macro')
print(f'Recall score on validation set: {round(val_recall*100, 2)}')

# Compute and display F1 score on the validation set
val_f1 = f1_score(val_gt, classification_predictions, average='macro')
print(f'F1 score on validation set: {round(val_f1*100, 2)}')

**Box Regressor**

In [None]:
mobile = tfk.applications.MobileNetV2(
    input_shape = train_images.shape[1:],
    include_top = False,
    weights = 'imagenet',
    pooling = 'avg',
)

In [None]:
# Use MobileNet as feature extractor
mobile.trainable = False

# Add the classifier to MobileNet
inputs = tfk.Input(shape=train_images.shape[1:])
x = mobile(inputs)
x = tfkl.Dropout(0.5, seed=seed)(x)
outputs = tfkl.Dense(4, activation='linear', name='box_regressor')(x)

# Connect input and output through the Model class
box_regressor_model = tfk.Model(inputs=inputs, outputs=outputs, name='box_regressor_model')

# Compile the model
box_regressor_model.compile(loss=tfk.losses.MeanSquaredError(), optimizer=tfk.optimizers.Adam())
box_regressor_model.summary()

In [None]:
# Train the model
regressor_history = box_regressor_model.fit(
    x = preprocess_input(train_images),
    y = train_boxes,
    batch_size = 64,
    epochs = 200,
    validation_data = (preprocess_input(val_images), val_boxes),
    callbacks = [
            tfk.callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
            tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=15, min_delta=1e-5)
    ]
).history

In [None]:
# Save the best model
box_regressor_model.save('CatDogBoxRegressor.keras')
del box_regressor_model

In [None]:
# Re-load the model after transfer learning
box_regressor_model = tfk.models.load_model('CatDogBoxRegressor.keras')
box_regressor_model.summary()

In [None]:
# Generate predictions for bounding box regression on validation images
predictions = box_regressor_model.predict(preprocess_input(val_images), verbose=0)

# Compute and display Spearman's Rho correlation between true and predicted bounding boxes
spearman = spearman_rho(val_boxes, predictions)
print(f"Spearman's Rho Correlation on validation set: {round(float(spearman), 4)}")

#### 🕹️ Use the Model - Make Inference

In [None]:
# Load test images from the specified folder with the given image size
test_path = 'localization_test/'
X_test = load_images_from_folder(test_path, img_size)

In [None]:
# Display a sample of 10 test images
num_img = 10
fig, axes = plt.subplots(2, num_img // 2, figsize=(20, 9))

for i in range(num_img):
    ax = axes[i % 2, i % (num_img // 2)]
    ax.imshow(np.clip(X_test[i], 0, 255))
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Generate class predictions and bounding box predictions on the test images
test_class_predictions = classifier_model.predict(preprocess_input(X_test), verbose=0)
test_box_predictions = box_regressor_model.predict(preprocess_input(X_test), verbose=0)

In [None]:
# Inspect the predictions
num_img = 10
fig, axes = plt.subplots(2, num_img // 2, figsize=(20, 9))

for i in range(num_img):
    row = i // (num_img // 2)
    col = i % (num_img // 2)
    ax = axes[row, col]

    # Bounding box of each image
    a1, b1, a2, b2 = test_box_predictions[i]
    img_size = 256

    # Rescaling the bounding box values to match the image size
    x1 = a1 * img_size
    y1 = b1 * img_size
    x2 = a2 * img_size
    y2 = b2 * img_size

    img = X_test[i].copy()

    # Draw bounding boxes on the image
    cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 3)

    # Get predicted probabilities
    probabilities = test_class_predictions[i]
    predicted_class = np.argmax(probabilities)
    confidence = round(probabilities[predicted_class] * 100, 1)
    label = 'dog' if predicted_class == 1 else 'cat'

    ax.imshow(np.clip(img, 0, 255).astype(np.uint8))
    ax.axis('off')
    ax.set_title(f'{label}: {confidence}%')

plt.tight_layout()
plt.show()

#### 👁️ Class Activation Maps

In [None]:
# Load test images from the specified folder and preprocess them
test_path = 'localization_test'
X_test = load_images_from_folder(test_path, img_size)
X_test_preprocessed = preprocess_input(X_test)

In [None]:
def compute_CAM(model, img):
    # Expand image dimensions to fit the model input shape
    img = np.expand_dims(img, axis=0)

    # Predict to get the winning class
    predictions = model.predict(img, verbose=0)
    label_index = np.argmax(predictions)

    # Get the 1028 input weights to the softmax of the winning class
    class_weights = model.layers[-1].get_weights()[0]
    class_weights_winner = class_weights[:, label_index]

    # Define the final convolutional layer of the MobileNetV2 model
    final_conv_layer = tfk.Model(
        model.get_layer('mobilenetv2_1.00_224').input,
        model.get_layer('mobilenetv2_1.00_224').get_layer('Conv_1').output
    )

    # Compute the convolutional outputs and squeeze the dimensions
    conv_outputs = final_conv_layer(img)
    conv_outputs = np.squeeze(conv_outputs)

    # Upsample the convolutional outputs and compute the final output using the class weights
    mat_for_mult = scipy.ndimage.zoom(conv_outputs, (32, 32, 1), order=1)
    final_output = np.dot(mat_for_mult.reshape((256*256, 1280)), class_weights_winner).reshape(256,256)

    return final_output, label_index, predictions

In [None]:
# Compute Class Activation Map (CAM) values for each image in the preprocessed test set
values = []
for img in X_test_preprocessed:
    values.append(compute_CAM(classifier_model, img))

In [None]:
# Inspect the data
num_img = 10
fig, axes = plt.subplots(2, num_img//2, figsize=(20,9))
for i in range(num_img):
    ax = axes[i%2,i%num_img//2]
    ax.imshow(values[i][0], cmap='turbo')
    ax.imshow(np.clip(X_test[i], 0, 255), alpha=0.5)
    ax.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# Display a sample of 10 test images with heatmap-based bounding boxes and class labels
num_img = 10
fig, axes = plt.subplots(2, num_img // 2, figsize=(20, 9))

for i in range(num_img):

    # Extract the maximum value from the heatmap
    heatmap_max = np.max(values[i][0])

    # Define a threshold to filter heatmap values
    boundary = heatmap_max * 0.3

    # Apply the threshold to create a binary heatmap
    bbox_heatmap = values[i][0].copy()
    bbox_heatmap = np.where(bbox_heatmap <= boundary, 0, 255)

    bbox_img = X_test[i].copy()

    # Find contours of the heatmap
    cnts = cv2.findContours(bbox_heatmap.astype('uint8'),
                            cv2.RETR_EXTERNAL,
                            cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]

    # Define an offset for the bounding box
    offset = 10
    for c in cnts:
        x, y, w, h = cv2.boundingRect(c)
        cv2.rectangle(bbox_img, (x + offset, y + offset),
                      (x + offset + w, y + h), (100, 255, 0), 3)

    # Get the predicted label and confidence score
    label = num_to_labels[values[i][1]]
    confidence = round(values[i][2][0][values[i][1]] * 100, 1)

    # Display image with bounding box and label
    ax = axes[i % 2, i % (num_img // 2)]
    ax.imshow(np.clip(bbox_img, 0, 255))
    ax.title.set_text(f'{label}: {confidence}%')
    ax.axis('off')

plt.tight_layout()
plt.show()

#  
<img src="https://airlab.deib.polimi.it/wp-content/uploads/2019/07/airlab-logo-new_cropped.png" width="350">

<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/9/95/Instagram_logo_2022.svg/800px-Instagram_logo_2022.svg.png" width="15"> **Instagram:** https://www.instagram.com/airlab_polimi/

<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/8/81/LinkedIn_icon.svg/2048px-LinkedIn_icon.svg.png" width="15"> **LinkedIn:** https://www.linkedin.com/company/airlab-polimi/
___
Credits: Eugenio Lomurno 📧 eugenio.lomurno@polimi.it





```
   Copyright 2024 Eugenio Lomurno

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
```
