# GradCAM Demo

In [1]:
%load_ext autoreload
%autoreload 2

from pytorch_grad_cam import GradCAM, HiResCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, FullGrad
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from torchvision.io import read_image
from torchvision.utils import save_image
from PIL import Image
from torchvision import transforms
from backbones.mobilenetv3small import MobileNetV3Small
from backbones.resnet50 import ResNet50
from backbones.resnet34 import ResNet34
from backbones.resnet18 import ResNet18
from backbones.swinv2t import SwinV2T
from backbones.swint import SwinT
from backbones.vitb16 import ViTB16
from backbones.detr import DETR
import torch
import cv2
import os
import glob
import matplotlib.pyplot as plt

  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count


In [2]:
cam_types = {
    "grad": GradCAM,
    "hires": HiResCAM,
    "score": ScoreCAM,
    "gradpp": GradCAMPlusPlus,
    "abl": AblationCAM,
    "xgrad": XGradCAM,
    "eigen": EigenCAM,
    "full": FullGrad
}

In [3]:
def save_image_with_heatmap(original_image_path, output_dir, filename_suffix, visualization, heatmap):
    dirname, filename = os.path.split(original_image_path)
    filename, fileext = os.path.splitext(filename)
    cv2.imwrite(f"{output_dir}/{filename}_{filename_suffix}.jpg", cv2.cvtColor(visualization, cv2.COLOR_BGR2RGB))
    
def load_images(glob_path: str):
    image_paths = glob.glob(glob_path)
    paths = []
    input_images = []
    for path in image_paths:
        paths.append(path)
        input_images.append(Image.open(path))
    return paths, input_images

def load_input_batch(glob_path: str, preprocess):
    paths, input_images = load_images(glob_path)
    input_tensors = []
    for img in input_images:
        input_tensors.append(preprocess(img).unsqueeze(0))

    input_batch = torch.cat(input_tensors)
    return paths, input_batch

def create_cam_images(glob_path: str, backbone, output_path: str, cam_name = "grad", cam_layer_index: int = 0, image_size=224, reshape_transform = None):
    preprocess = transforms.Compose([
        transforms.Resize(image_size),
        transforms.CenterCrop(image_size),
        transforms.Grayscale(num_output_channels=3),
        transforms.ToTensor(),
    ])
    normalize = transforms.Compose([
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    paths, input_batch = load_input_batch(glob_path, preprocess)
    model = backbone
    target_layers = [backbone.gradcam_layers[cam_layer_index]]
    cam_type = cam_types[cam_name]
    heatmaps = []
    visualizations = []

    # Construct the CAM object once, and then re-use it on many images:
    with cam_type(model=model, target_layers=target_layers, reshape_transform=reshape_transform) as cam:
        # We have to specify the target we want to generate
        # the Class Activation Maps for.
        # If targets is None, the highest scoring category
        # will be used for every image in the batch.
        # Here we use ClassifierOutputTarget, but you can define your own custom targets
        # That are, for example, combinations of categories, or specific outputs in a non standard model.

        # You can also pass aug_smooth=True and eigen_smooth=True, to apply smoothing.
        grayscale_cam = cam(input_tensor=normalize(input_batch), targets=targets)

        for i in range(input_batch.shape[0]):
            heatmap = grayscale_cam[i, :]
            heatmaps.append(heatmap)
            visualizations.append(show_cam_on_image(input_batch[i].detach().cpu().permute(1,2,0).numpy(), heatmap, use_rgb=True))
        for i in range(len(visualizations)):
            save_image_with_heatmap(paths[i], output_path, f"{cam_name}_layer{cam_layer_index}_{backbone.name}", visualizations[i], heatmaps[i])
            
def reshape_transform_swin_transformer(tensor, height=7, width=7):
    result = tensor.transpose(2, 3).transpose(1, 2)
    return result

def reshape_transform_vit(tensor, height=14, width=14):
    result = tensor[:, 1 :  , :].reshape(tensor.size(0),
        height, width, tensor.size(2))

    # Bring the channels to the first dimension,
    # like in CNNs.
    result = result.transpose(2, 3).transpose(1, 2)
    return result

In [4]:
#backbone = DETR()
backbone = DETR(filepath="C:\\Users\\tilof\\PycharmProjects\\DeepLearningProjects\\DETR\\results\\spine\detr_r50\\checkpoint.pth")
classifier_output_target = -1
if classifier_output_target > -1:
    targets = [ClassifierOutputTarget(2)]
else:
    targets=None
    
for i, _ in enumerate(backbone.gradcam_layers):
    create_cam_images(
        "images/dog/*.png",
        backbone,
        "images/dog/gradcam",
        cam_name="grad",
        image_size=512,
        cam_layer_index=i,
        reshape_transform=backbone.gradcam_reshape_transform
    )
    break

Using cache found in C:\Users\tilof/.cache\torch\hub\facebookresearch_detr_main


OPTIONS torch.Size([92, 256])
