In [24]:
import os
os.listdir('/kaggle/input/val-mini/val_mini')

['n01440764', 'n01494475', 'n01484850', 'n01491361', 'n01443537']

In [25]:
"""
UTILS
"""
#!/usr/bin/env python
"""flashtorch.utils

This module provides utility functions for image handling and tensor
transformation.

"""
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
import torch.nn.functional as ff
import cv2
import torch

def load_image(image_path):
    """Loads image as a PIL RGB image.

        Args:
            - **image_path (str) - **: A path to the image

        Returns:
            An instance of PIL.Image.Image in RGB

    """

    return Image.open(image_path).convert('RGB')


def apply_transforms(image, size=224):
    """Transforms a PIL image to torch.Tensor.

    Applies a series of tranformations on PIL image including a conversion
    to a tensor. The returned tensor has a shape of :math:`(N, C, H, W)` and
    is ready to be used as an input to neural networks.

    First the image is resized to 256, then cropped to 224. The `means` and
    `stds` for normalisation are taken from numbers used in ImageNet, as
    currently developing the package for visualizing pre-trained models.

    The plan is to to expand this to handle custom size/mean/std.

    Args:
        image (PIL.Image.Image or numpy array)
        size (int, optional, default=224): Desired size (width/height) of the
            output tensor

    Shape:
        Input: :math:`(C, H, W)` for numpy array
        Output: :math:`(N, C, H, W)`

    Returns:
        torch.Tensor (torch.float32): Transformed image tensor

    Note:
        Symbols used to describe dimensions:
            - N: number of images in a batch
            - C: number of channels
            - H: height of the image
            - W: width of the image

    """

    if not isinstance(image, Image.Image):
        image = F.to_pil_image(image)

    means = [0.485, 0.456, 0.406]
    stds = [0.229, 0.224, 0.225]

    transform = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.ToTensor(),
        transforms.Normalize(means, stds)
    ])

    tensor = transform(image).unsqueeze(0)

    tensor.requires_grad = True

    return tensor

def apply_transforms_v0(image, size=224):
    """Transforms a PIL image to torch.Tensor.

    Applies a series of tranformations on PIL image including a conversion
    to a tensor. The returned tensor has a shape of :math:`(N, C, H, W)` and
    is ready to be used as an input to neural networks.

    First the image is resized to 256, then cropped to 224. The `means` and
    `stds` for normalisation are taken from numbers used in ImageNet, as
    currently developing the package for visualizing pre-trained models.

    The plan is to to expand this to handle custom size/mean/std.

    Args:
        image (PIL.Image.Image or numpy array)
        size (int, optional, default=224): Desired size (width/height) of the
            output tensor

    Shape:
        Input: :math:`(C, H, W)` for numpy array
        Output: :math:`(N, C, H, W)`

    Returns:
        torch.Tensor (torch.float32): Transformed image tensor

    Note:
        Symbols used to describe dimensions:
            - N: number of images in a batch
            - C: number of channels
            - H: height of the image
            - W: width of the image

    """

    if not isinstance(image, Image.Image):
        image = F.to_pil_image(image)

    means = [0.485, 0.456, 0.406]
    stds = [0.229, 0.224, 0.225]

    transform = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.ToTensor()
    ])

    tensor = transform(image).unsqueeze(0)

    tensor.requires_grad = True

    return tensor


def denormalize(tensor):
    """Reverses the normalisation on a tensor.

    Performs a reverse operation on a tensor, so the pixel value range is
    between 0 and 1. Useful for when plotting a tensor into an image.

    Normalisation: (image - mean) / std
    Denormalisation: image * std + mean

    Args:
        tensor (torch.Tensor, dtype=torch.float32): Normalized image tensor

    Shape:
        Input: :math:`(N, C, H, W)`
        Output: :math:`(N, C, H, W)` (same shape as input)

    Return:
        torch.Tensor (torch.float32): Demornalised image tensor with pixel
            values between [0, 1]

    Note:
        Symbols used to describe dimensions:
            - N: number of images in a batch
            - C: number of channels
            - H: height of the image
            - W: width of the image

    """

    means = [0.485, 0.456, 0.406]
    stds = [0.229, 0.224, 0.225]

    denormalized = tensor.clone()

    for channel, mean, std in zip(denormalized[0], means, stds):
        channel = channel.mul(std).add(mean)


    return denormalized


def standardize_and_clip(tensor, min_value=0.0, max_value=1.0):
    """Standardizes and clips input tensor.

    Standardize the input tensor (mean = 0.0, std = 1.0), ensures std is 0.1
    and clips it to values between min/max (default: 0.0/1.0).

    Args:
        tensor (torch.Tensor):
        min_value (float, optional, default=0.0)
        max_value (float, optional, default=1.0)

    Shape:
        Input: :math:`(C, H, W)`
        Output: Same as the input

    Return:
        torch.Tensor (torch.float32): Normalised tensor with values between
            [min_value, max_value]

    """

    tensor = tensor.detach().cpu()

    mean = tensor.mean()
    std = tensor.std()
    if std == 0:
        std += 1e-7

    standardized = tensor.sub(mean).div(std).mul(0.1)
    clipped = standardized.add(0.5).clamp(min_value, max_value)

    return clipped


def format_for_plotting(tensor):
    """Formats the shape of tensor for plotting.

    Tensors typically have a shape of :math:`(N, C, H, W)` or :math:`(C, H, W)`
    which is not suitable for plotting as images. This function formats an
    input tensor :math:`(H, W, C)` for RGB and :math:`(H, W)` for mono-channel
    data.

    Args:
        tensor (torch.Tensor, torch.float32): Image tensor

    Shape:
        Input: :math:`(N, C, H, W)` or :math:`(C, H, W)`
        Output: :math:`(H, W, C)` or :math:`(H, W)`, respectively

    Return:
        torch.Tensor (torch.float32): Formatted image tensor (detached)

    Note:
        Symbols used to describe dimensions:
            - N: number of images in a batch
            - C: number of channels
            - H: height of the image
            - W: width of the image

    """

    has_batch_dimension = len(tensor.shape) == 4
    formatted = tensor.clone()

    if has_batch_dimension:
        formatted = tensor.squeeze(0)

    if formatted.shape[0] == 1:
        return formatted.squeeze(0).detach()
    else:
        return formatted.permute(1, 2, 0).detach()


def visualize(input_, gradients, save_path=None, cmap='viridis', alpha=0.7):

    """ Method to plot the explanation.

        # Arguments
            input_: Tensor. Original image.
            gradients: Tensor. Saliency map result.
            save_path: String. Defaults to None.
            cmap: Defaults to be 'viridis'.
            alpha: Defaults to be 0.7.

    """

    input_ = format_for_plotting(denormalize(input_))
    gradients = format_for_plotting(standardize_and_clip(gradients))

    subplots = [
        ('Input image', [(input_, None, None)]),
        ('Saliency map across RGB channels', [(gradients, None, None)]),
        ('Overlay', [(input_, None, None), (gradients, cmap, alpha)])
    ]

    num_subplots = len(subplots)

    fig = plt.figure(figsize=(16, 3))

    for i, (title, images) in enumerate(subplots):
        ax = fig.add_subplot(1, num_subplots, i + 1)
        ax.set_axis_off()

        for image, cmap, alpha in images:
            ax.imshow(image, cmap=cmap, alpha=alpha)

        ax.set_title(title)
    if save_path is not None:
        plt.savefig(save_path)


def basic_visualize(input_, gradients, save_path=None, weight=None, cmap='viridis', alpha=0.7):

    """ Method to plot the explanation.

        # Arguments
            input_: Tensor. Original image.
            gradients: Tensor. Saliency map result.
            save_path: String. Defaults to None.
            cmap: Defaults to be 'viridis'.
            alpha: Defaults to be 0.7.

    """
    input_ = format_for_plotting(denormalize(input_))
    gradients = format_for_plotting(standardize_and_clip(gradients))

    subplots = [
        ('Saliency map across RGB channels', [(gradients, None, None)]),
        ('Overlay', [(input_, None, None), (gradients, cmap, alpha)])
    ]

    num_subplots = len(subplots)

    fig = plt.figure(figsize=(4, 4))

    for i, (title, images) in enumerate(subplots):
        ax = fig.add_subplot(1, num_subplots, i + 1)
        ax.set_axis_off()

        for image, cmap, alpha in images:
            # ax.imshow(image, cmap=cmap, alpha=alpha)
            ax.imshow(image,alpha=alpha)
    if save_path is not None:
        plt.savefig(save_path)


def find_resnet_layer(arch, target_layer_name):
    """Find resnet layer to calculate GradCAM and GradCAM++

    Args:
        arch: default torchvision densenet models
        target_layer_name (str): the name of layer with its hierarchical information. please refer to usages below.
            target_layer_name = 'conv1'
            target_layer_name = 'layer1'
            target_layer_name = 'layer1_basicblock0'
            target_layer_name = 'layer1_basicblock0_relu'
            target_layer_name = 'layer1_bottleneck0'
            target_layer_name = 'layer1_bottleneck0_conv1'
            target_layer_name = 'layer1_bottleneck0_downsample'
            target_layer_name = 'layer1_bottleneck0_downsample_0'
            target_layer_name = 'avgpool'
            target_layer_name = 'fc'

    Return:
        target_layer: found layer. this layer will be hooked to get forward/backward pass information.
    """
    if target_layer_name is None:
        target_layer_name = 'layer4'

    if 'layer' in target_layer_name:
        hierarchy = target_layer_name.split('_')
        layer_num = int(hierarchy[0].lstrip('layer'))
        if layer_num == 1:
            target_layer = arch.layer1
        elif layer_num == 2:
            target_layer = arch.layer2
        elif layer_num == 3:
            target_layer = arch.layer3
        elif layer_num == 4:
            target_layer = arch.layer4
        else:
            raise ValueError('unknown layer : {}'.format(target_layer_name))

        if len(hierarchy) >= 2:
            bottleneck_num = int(hierarchy[1].lower().lstrip('bottleneck').lstrip('basicblock'))
            target_layer = target_layer[bottleneck_num]

        if len(hierarchy) >= 3:
            target_layer = target_layer._modules[hierarchy[2]]

        if len(hierarchy) == 4:
            target_layer = target_layer._modules[hierarchy[3]]

    else:
        target_layer = arch._modules[target_layer_name]

    return target_layer


def find_densenet_layer(arch, target_layer_name):
    """Find densenet layer to calculate GradCAM and GradCAM++

    Args:
        arch: default torchvision densenet models
        target_layer_name (str): the name of layer with its hierarchical information. please refer to usages below.
            target_layer_name = 'features'
            target_layer_name = 'features_transition1'
            target_layer_name = 'features_transition1_norm'
            target_layer_name = 'features_denseblock2_denselayer12'
            target_layer_name = 'features_denseblock2_denselayer12_norm1'
            target_layer_name = 'features_denseblock2_denselayer12_norm1'
            target_layer_name = 'classifier'

    Return:
        target_layer: found layer. this layer will be hooked to get forward/backward pass information.
    """

    if target_layer_name is None:
        target_layer_name = 'features'

    hierarchy = target_layer_name.split('_')
    target_layer = arch._modules[hierarchy[0]]

    if len(hierarchy) >= 2:
        target_layer = target_layer._modules[hierarchy[1]]

    if len(hierarchy) >= 3:
        target_layer = target_layer._modules[hierarchy[2]]

    if len(hierarchy) == 4:
        target_layer = target_layer._modules[hierarchy[3]]

    return target_layer


def find_vgg_layer(arch, target_layer_name):
    """Find vgg layer to calculate GradCAM and GradCAM++

    Args:
        arch: default torchvision densenet models
        target_layer_name (str): the name of layer with its hierarchical information. please refer to usages below.
            target_layer_name = 'features'
            target_layer_name = 'features_42'
            target_layer_name = 'classifier'
            target_layer_name = 'classifier_0'

    Return:
        target_layer: found layer. this layer will be hooked to get forward/backward pass information.
    """
    if target_layer_name is None:
        target_layer_name = 'features'

    hierarchy = target_layer_name.split('_')

    if len(hierarchy) >= 1:
        target_layer = arch.features

    if len(hierarchy) == 2:
        target_layer = target_layer[int(hierarchy[1])]
    # print(f'The target layer is: {target_layer}')
    return target_layer


def find_alexnet_layer(arch, target_layer_name):
    """Find alexnet layer to calculate GradCAM and GradCAM++

    Args:
        arch: default torchvision densenet models
        target_layer_name (str): the name of layer with its hierarchical information. please refer to usages below.
            target_layer_name = 'features'
            target_layer_name = 'features_0'
            target_layer_name = 'classifier'
            target_layer_name = 'classifier_0'

    Return:
        target_layer: found layer. this layer will be hooked to get forward/backward pass information.
    """
    if target_layer_name is None:
        target_layer_name = 'features_29'

    hierarchy = target_layer_name.split('_')

    if len(hierarchy) >= 1:
        target_layer = arch.features

    if len(hierarchy) == 2:
        target_layer = target_layer[int(hierarchy[1])]

    return target_layer


def find_squeezenet_layer(arch, target_layer_name):
    """Find squeezenet layer to calculate GradCAM and GradCAM++

        Args:
            - **arch - **: default torchvision densenet models
            - **target_layer_name (str) - **: the name of layer with its hierarchical information. please refer to usages below.
                target_layer_name = 'features_12'
                target_layer_name = 'features_12_expand3x3'
                target_layer_name = 'features_12_expand3x3_activation'

        Return:
            target_layer: found layer. this layer will be hooked to get forward/backward pass information.
    """
    if target_layer_name is None:
        target_layer_name = 'features'

    hierarchy = target_layer_name.split('_')
    target_layer = arch._modules[hierarchy[0]]

    if len(hierarchy) >= 2:
        target_layer = target_layer._modules[hierarchy[1]]

    if len(hierarchy) == 3:
        target_layer = target_layer._modules[hierarchy[2]]

    elif len(hierarchy) == 4:
        target_layer = target_layer._modules[hierarchy[2] + '_' + hierarchy[3]]

    return target_layer


def find_googlenet_layer(arch, target_layer_name):
    """Find squeezenet layer to calculate GradCAM and GradCAM++

        Args:
            - **arch - **: default torchvision googlenet models
            - **target_layer_name (str) - **: the name of layer with its hierarchical information. please refer to usages below.
                target_layer_name = 'inception5b'

        Return:
            target_layer: found layer. this layer will be hooked to get forward/backward pass information.
    """
    if target_layer_name is None:
        target_layer_name = 'features'

    hierarchy = target_layer_name.split('_')
    target_layer = arch._modules[hierarchy[0]]

    if len(hierarchy) >= 2:
        target_layer = target_layer._modules[hierarchy[1]]

    if len(hierarchy) == 3:
        target_layer = target_layer._modules[hierarchy[2]]

    elif len(hierarchy) == 4:
        target_layer = target_layer._modules[hierarchy[2] + '_' + hierarchy[3]]

    return target_layer


def find_mobilenet_layer(arch, target_layer_name):
    """Find mobilenet layer to calculate GradCAM and GradCAM++

        Args:
            - **arch - **: default torchvision googlenet models
            - **target_layer_name (str) - **: the name of layer with its hierarchical information. please refer to usages below.
                target_layer_name = 'features'

        Return:
            target_layer: found layer. this layer will be hooked to get forward/backward pass information.
    """
    if target_layer_name is None:
        target_layer_name = 'features'

    hierarchy = target_layer_name.split('_')
    target_layer = arch._modules[hierarchy[0]]

    if len(hierarchy) >= 2:
        target_layer = target_layer._modules[hierarchy[1]]

    if len(hierarchy) == 3:
        target_layer = target_layer._modules[hierarchy[2]]

    elif len(hierarchy) == 4:
        target_layer = target_layer._modules[hierarchy[2] + '_' + hierarchy[3]]

    return target_layer


def find_shufflenet_layer(arch, target_layer_name):
    """Find mobilenet layer to calculate GradCAM and GradCAM++

        Args:
            - **arch - **: default torchvision googlenet models
            - **target_layer_name (str) - **: the name of layer with its hierarchical information. please refer to usages below.
                target_layer_name = 'conv5'

        Return:
            target_layer: found layer. this layer will be hooked to get forward/backward pass information.
    """
    if target_layer_name is None:
        target_layer_name = 'features'

    hierarchy = target_layer_name.split('_')
    target_layer = arch._modules[hierarchy[0]]

    if len(hierarchy) >= 2:
        target_layer = target_layer._modules[hierarchy[1]]

    if len(hierarchy) == 3:
        target_layer = target_layer._modules[hierarchy[2]]

    elif len(hierarchy) == 4:
        target_layer = target_layer._modules[hierarchy[2] + '_' + hierarchy[3]]

    return target_layer


def find_layer(arch, target_layer_name):
    """Find target layer to calculate CAM.

        : Args:
            - **arch - **: Self-defined architecture.
            - **target_layer_name - ** (str): Name of target class.

        : Return:
            - **target_layer - **: Found layer. This layer will be hooked to get forward/backward pass information.
    """

    if target_layer_name.split('_') not in arch._modules.keys():
        raise Exception("Invalid target layer name.")
    target_layer = arch._modules[target_layer_name]
    return target_layer




def visualize(img, cam):
    """
    Synthesize an image with CAM to make a result image.
    Args:
        img: (Tensor) shape => (1, 3, H, W)
        cam: (Tensor) shape => (1, 1, H', W')
    Return:
        synthesized image (Tensor): shape =>(1, 3, H, W)
    """

    _, _, H, W = img.shape
    img = img.detach().cpu()
    cam = cam.detach().cpu()
    cam = ff.interpolate(cam, size=(H, W), mode='bilinear', align_corners=False)
    cam = 255 * cam.squeeze()
    heatmap = cv2.applyColorMap(np.uint8(cam), cv2.COLORMAP_JET)
    heatmap = torch.from_numpy(heatmap.transpose(2, 0, 1))
    heatmap = heatmap.float() / 255
    b, g, r = heatmap.split(1)
    heatmap = torch.cat([r, g, b])

    result = heatmap + img.cpu()
    result = result.div(result.max())
    result = torch.squeeze(result)
    # print(f'Shape of result is: {result.shape}')
    result = result.permute(1,2,0)
    plt.imshow(result)
    plt.show()
    plt.imsave('tower_red.png', result)
    return result

# **ScoreCAM**

In [26]:
'''
Part of code borrows from https://github.com/1Konny/gradcam_plus_plus-pytorch
'''

import torch
import torch.nn.functional as f

class BaseCAM(object):
    """ Base class for Class activation mapping.

        : Args
            - **model_dict -** : Dict. Has format as dict(type='vgg', arch=torchvision.models.vgg16(pretrained=True),
            layer_name='features',input_size=(224, 224)).

    """

    def __init__(self, model_dict):
        model_type = model_dict['type']
        layer_name = model_dict['layer_name']

        self.model_arch = model_dict['arch']
        self.model_arch.eval()
        if torch.cuda.is_available():
          self.model_arch.cuda()
        self.gradients = dict()
        self.activations = dict()

        def backward_hook(module, grad_input, grad_output):
            if torch.cuda.is_available():
              self.gradients['value'] = grad_output[0].cuda()
            else:
              self.gradients['value'] = grad_output[0]
            return None

        def forward_hook(module, input, output):
            if torch.cuda.is_available():
              self.activations['value'] = output.cuda()
            else:
              self.activations['value'] = output
            return None

        if 'vgg' in model_type.lower():
            self.target_layer = find_vgg_layer(self.model_arch, layer_name)
        elif 'resnet' in model_type.lower():
            self.target_layer = find_resnet_layer(self.model_arch, layer_name)
        elif 'densenet' in model_type.lower():
            self.target_layer = find_densenet_layer(self.model_arch, layer_name)
        elif 'alexnet' in model_type.lower():
            self.target_layer = find_alexnet_layer(self.model_arch, layer_name)
        elif 'squeezenet' in model_type.lower():
            self.target_layer = find_squeezenet_layer(self.model_arch, layer_name)
        elif 'googlenet' in model_type.lower():
            self.target_layer = find_googlenet_layer(self.model_arch, layer_name)
        elif 'shufflenet' in model_type.lower():
            self.target_layer = find_shufflenet_layer(self.model_arch, layer_name)
        elif 'mobilenet' in model_type.lower():
            self.target_layer = find_mobilenet_layer(self.model_arch, layer_name)
        else:
            self.target_layer = find_layer(self.model_arch, layer_name)

        self.target_layer.register_forward_hook(forward_hook)
        self.target_layer.register_backward_hook(backward_hook)

    def forward(self, input, class_idx=None, retain_graph=False):
        return None

    def __call__(self, input, class_idx=None, retain_graph=False):
        return self.forward(input, class_idx, retain_graph)


# ScoreCAM

class ScoreCAM(BaseCAM):

    """
        ScoreCAM, inherit from BaseCAM

    """

    def __init__(self, model_dict):
        super().__init__(model_dict)
        self.predicted_confidence_cam = 0
        self.predicted_confidence_cam_list = []
        self.avg_drop = 0
        self.avg_increase = 0
    def forward(self, input, class_idx=None, retain_graph=False):
        b, c, h, w = input.size()

        # predication on raw input
        logit = self.model_arch(input).cuda()

        if class_idx is None:
            predicted_class = logit.max(1)[-1]
            score = logit[:, logit.max(1)[-1]].squeeze()
        else:
            predicted_class = torch.LongTensor([class_idx])
            score = logit[:, class_idx].squeeze()

        logit = f.softmax(logit)

        if torch.cuda.is_available():
          predicted_class= predicted_class.cuda()
          score = score.cuda()
          logit = logit.cuda()

        self.model_arch.zero_grad()
        score.backward(retain_graph=retain_graph)
        activations = self.activations['value']
        b, k, u, v = activations.size()
        
        score_saliency_map = torch.zeros((1, 1, h, w))

        if torch.cuda.is_available():
          activations = activations.cuda()
          score_saliency_map = score_saliency_map.cuda()

        with torch.no_grad():
          for i in range(k):

            # upsampling
            saliency_map = torch.unsqueeze(activations[:, i, :, :], 1)
            saliency_map = f.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)

            if saliency_map.max() == saliency_map.min():
              continue

            # normalize to 0-1
            norm_saliency_map = (saliency_map - saliency_map.min()) / (saliency_map.max() - saliency_map.min())

            output = self.model_arch(input * norm_saliency_map)
            output = f.softmax(output)
            score = output[0][predicted_class]

            score_saliency_map +=  score * saliency_map

        score_saliency_map = f.relu(score_saliency_map)
        score_saliency_map_min, score_saliency_map_max = score_saliency_map.min(), score_saliency_map.max()

        if score_saliency_map_min == score_saliency_map_max:
            return None

        score_saliency_map = (score_saliency_map - score_saliency_map_min).div(score_saliency_map_max - score_saliency_map_min).data

        return score_saliency_map

    def __call__(self, input, class_idx=None, retain_graph=False):
        return self.forward(input, class_idx, retain_graph)

    def metrics(self,model,input_):
        predicted_confidence, predicted_class = model(input_).max(1)
        predicted_class = predicted_class.item()
        predicted_confidence = predicted_confidence.item()

        scorecam_map = self.forward(input_)
        if scorecam_map == None:
            self.predicted_confidence_cam = 0
        else:
            scorecam_map_  = scorecam_map * input_
            scorecam_map_output = model(scorecam_map_)
            predicted_confidence_cam = scorecam_map_output[0][predicted_class]
            self.predicted_confidence_cam = predicted_confidence_cam.item()

        self.predicted_confidence_cam_list.append(self.predicted_confidence_cam)
        if predicted_confidence > self.predicted_confidence_cam:
          self.avg_drop = self.avg_drop + (predicted_confidence - self.predicted_confidence_cam)/predicted_confidence
        else:
          self.avg_increase = self.avg_increase + 1

    def percentize(self, count):
        self.avg_drop = self.avg_drop*100/count
        self.avg_increase = self.avg_increase*100/count

#ScoreCAM_d
# class ScoreCAM_d(BaseCAM):

#     """
#         ScoreCAM, inherit from BaseCAM

#     """

#     def __init__(self, model_dict):
#         super().__init__(model_dict)
#         self.predicted_confidence_cam = 0
#         self.predicted_confidence_cam_list = []
#         self.avg_drop = 0
#         self.avg_increase = 0
#     def forward(self, input, class_idx=None, retain_graph=False):
#         b, c, h, w = input.size()

#         # predication on raw input
#         logit = self.model_arch(input).cuda()

#         if class_idx is None:
#             predicted_class = logit.max(1)[-1]
#             score = logit[:, logit.max(1)[-1]].squeeze()
#         else:
#             predicted_class = torch.LongTensor([class_idx])
#             score = logit[:, class_idx].squeeze()

#         logit = f.softmax(logit)

#         if torch.cuda.is_available():
#           predicted_class= predicted_class.cuda()
#           score = score.cuda()
#           logit = logit.cuda()

#         self.model_arch.zero_grad()
#         score.backward(retain_graph=retain_graph)
#         activations = self.activations['value']
#         gradients = self.gradients['value']
#         b, k, u, v = activations.size()
        
#         score_saliency_map = torch.zeros((1, 1, h, w))

#         if torch.cuda.is_available():
#           activations = activations.cuda()
#           score_saliency_map = score_saliency_map.cuda()

#         with torch.no_grad():
#           for i in range(k):

#             # upsampling
#             saliency_map = torch.unsqueeze(activations[:, i, :, :], 1)
#             if (activations[:, i, :, :]*gradients[:, i, :, :]).max() < 0:
#                 continue
                
#             saliency_map = f.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
            
#             if saliency_map.max() == saliency_map.min():
#               continue

#             # normalize to 0-1
#             norm_saliency_map = (saliency_map - saliency_map.min()) / (saliency_map.max() - saliency_map.min())

#             output = self.model_arch(input * norm_saliency_map)
#             output = f.softmax(output)
#             score = output[0][predicted_class]

#             score_saliency_map +=  score * saliency_map

#         score_saliency_map = f.relu(score_saliency_map)
#         score_saliency_map_min, score_saliency_map_max = score_saliency_map.min(), score_saliency_map.max()

#         if score_saliency_map_min == score_saliency_map_max:
#             return None

#         score_saliency_map = (score_saliency_map - score_saliency_map_min).div(score_saliency_map_max - score_saliency_map_min).data

#         return score_saliency_map

#     def __call__(self, input, class_idx=None, retain_graph=False):
#         return self.forward(input, class_idx, retain_graph)

#     def metrics(self,model,input_):
#         predicted_confidence, predicted_class = model(input_).max(1)
#         predicted_class = predicted_class.item()
#         predicted_confidence = predicted_confidence.item()

#         scorecam_map = self.forward(input_)
#         if scorecam_map == None:
#             self.predicted_confidence_cam = 0
#         else:
#             scorecam_map_  = scorecam_map * input_
#             scorecam_map_output = model(scorecam_map_)
#             predicted_confidence_cam = scorecam_map_output[0][predicted_class]
#             self.predicted_confidence_cam = predicted_confidence_cam.item()

#         self.predicted_confidence_cam_list.append(self.predicted_confidence_cam)
#         if predicted_confidence > self.predicted_confidence_cam:
#           self.avg_drop = self.avg_drop + (predicted_confidence - self.predicted_confidence_cam)/predicted_confidence
#         else:
#           self.avg_increase = self.avg_increase + 1

#     def percentize(self, count):
#         self.avg_drop = self.avg_drop*100/count
#         self.avg_increase = self.avg_increase*100/count

#ScoreCAM_x
class ScoreCAM_x(BaseCAM):

    """
    ScoreCAM, inherit from BaseCAM

    """

    def __init__(self, model_dict, threshold=0.5, isthreshold=True):
        super().__init__(model_dict)
        self.threshold = threshold
        self.predicted_confidence_cam = 0
        self.predicted_confidence_cam_list = []
        self.avg_drop = 0
        self.avg_increase = 0
        self.isthreshold = isthreshold
    def forward(self, input, class_idx=None, retain_graph=False):
        b, c, h, w = input.size()

        # predication on raw input
        logit = self.model_arch(input).cuda()

        if class_idx is None:
            predicted_class = logit.max(1)[-1]
            score = logit[:, logit.max(1)[-1]].squeeze()
        else:
            predicted_class = torch.LongTensor([class_idx])
            score = logit[:, class_idx].squeeze()

        logit = f.softmax(logit)

        if torch.cuda.is_available():
            predicted_class= predicted_class.cuda()
            score = score.cuda()
            logit = logit.cuda()

        self.model_arch.zero_grad()
        score.backward(retain_graph=retain_graph)
        activations = self.activations['value']
        b, k, u, v = activations.size()

        score_saliency_map = torch.zeros((1, 1, h, w))

        if torch.cuda.is_available():
            activations = activations.cuda()
            score_saliency_map = score_saliency_map.cuda()

        with torch.no_grad():
            for i in range(k):

                # upsampling
                saliency_map = torch.unsqueeze(activations[:, i, :, :], 1)
                saliency_map = f.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)

                saliency_map = f.tanh(saliency_map)
                norm_saliency_map = saliency_map.clone()


                output = self.model_arch(input * norm_saliency_map)
                output = f.softmax(output)
                score = output[0][predicted_class]
                # print(f'Score is: {score}')   # Score is: tensor([0.0057], device='cuda:0')
                score_saliency_map +=  score * saliency_map
                # print(f'Score_saliency map: {score_saliency_map}')
                score_saliency_map = f.relu(score_saliency_map)
            score_saliency_map_min, score_saliency_map_max = score_saliency_map.min(), score_saliency_map.max()

        if score_saliency_map_min == score_saliency_map_max:
            return None

        score_saliency_map = (score_saliency_map - score_saliency_map_min).div(score_saliency_map_max - score_saliency_map_min).data

        return score_saliency_map

    def __call__(self, input, class_idx=None, retain_graph=False):
        return self.forward(input, class_idx, retain_graph)

    def metrics(self,model,input_):
        predicted_confidence, predicted_class = model(input_).max(1)
        predicted_class = predicted_class.item()
        predicted_confidence = predicted_confidence.item()

        scorecam_map = self.forward(input_)
        if scorecam_map == None:
            self.predicted_confidence_cam = 0
        else:
            scorecam_map_  = scorecam_map * input_
            scorecam_map_output = model(scorecam_map_)
            predicted_confidence_cam = scorecam_map_output[0][predicted_class]
            self.predicted_confidence_cam = predicted_confidence_cam.item()

            self.predicted_confidence_cam_list.append(self.predicted_confidence_cam)
        if predicted_confidence > self.predicted_confidence_cam:
            self.avg_drop = self.avg_drop + (predicted_confidence - self.predicted_confidence_cam)/predicted_confidence
        else:
            self.avg_increase = self.avg_increase + 1

    def percentize(self, count):
        self.avg_drop = self.avg_drop*100/count
        self.avg_increase = self.avg_increase*100/count

#xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
class LayerCAM(BaseCAM):

    def __init__(self, model_dict):
        super().__init__(model_dict)
        self.predicted_confidence_cam_list = []
        self.avg_drop = 0
        self.avg_increase = 0
    def forward(self, input, class_idx=None, retain_graph=False):
        b, c, h, w = input.size()

        # predication on raw input
        logit = self.model_arch(input).cuda()

        if class_idx is None:
            predicted_class = logit.max(1)[-1]
            score = logit[:, logit.max(1)[-1]].squeeze()
        else:
            predicted_class = torch.LongTensor([class_idx])
            score = logit[:, class_idx].squeeze()

        #logit = F.softmax(logit)

        if torch.cuda.is_available():
          predicted_class = predicted_class.cuda()
          score = score.cuda()
          logit = logit.cuda()

        one_hot_output = torch.FloatTensor(1, logit.size()[-1]).zero_()
        one_hot_output[0][predicted_class] = 1
        one_hot_output = one_hot_output.cuda(non_blocking=True)
        # Zero grads
        self.model_arch.zero_grad()
        # Backward pass with specified target
        logit.backward(gradient=one_hot_output, retain_graph=True)
        activations = self.activations['value'].clone().detach()
        gradients = self.gradients['value'].clone().detach()
        b, k, u, v = activations.size()

        with torch.no_grad():
            activation_maps = activations * f.relu(gradients)
            cam = torch.sum(activation_maps, dim=1).unsqueeze(0)
            cam = f.interpolate(cam, size=(h, w), mode='bilinear', align_corners=False)
            cam_min, cam_max = cam.min(), cam.max()
            if cam_min == cam_max:
              return None
            norm_cam = (cam - cam_min).div(cam_max - cam_min + 1e-8).data
#
        return norm_cam

    def __call__(self, input, class_idx=None, retain_graph=False):
        return self.forward(input, class_idx, retain_graph)

    def metrics(self,model,input_):
        predicted_confidence, predicted_class = model(input_).max(1)
        predicted_class = predicted_class.item()
        predicted_confidence = predicted_confidence.item()

        scorecam_map = self.forward(input_)
        if scorecam_map == None:
            self.predicted_confidence_cam = 0
        else:
            scorecam_map_  = scorecam_map * input_
            scorecam_map_output = model(scorecam_map_)
            predicted_confidence_cam = scorecam_map_output[0][predicted_class]
            self.predicted_confidence_cam = predicted_confidence_cam.item()

        self.predicted_confidence_cam_list.append(self.predicted_confidence_cam)
        if predicted_confidence > self.predicted_confidence_cam:
          self.avg_drop = self.avg_drop + (predicted_confidence - self.predicted_confidence_cam)/predicted_confidence
        else:
          self.avg_increase = self.avg_increase + 1

    def percentize(self, count):
        self.avg_drop = self.avg_drop*100/count
        self.avg_increase = self.avg_increase*100/count


In [27]:
import torch
import torch.nn.functional as f

from statistics import mode, mean


class SaveValues():
    def __init__(self, m):
        # register a hook to save values of activations and gradients
        self.activations = None
        self.gradients = None
        self.forward_hook = m.register_forward_hook(self.hook_fn_act)
        self.backward_hook = m.register_backward_hook(self.hook_fn_grad)

    def hook_fn_act(self, module, input, output):
        self.activations = output

    def hook_fn_grad(self, module, grad_input, grad_output):
        self.gradients = grad_output[0]

    def remove(self):
        self.forward_hook.remove()
        self.backward_hook.remove()


class CAM(object):
    """ Class Activation Mapping """

    def __init__(self, model, target_layer):
        """
        Args:
            model: a base model to get CAM which have global pooling and fully connected layer.
            target_layer: conv_layer before Global Average Pooling
        """

        self.model = model
        self.target_layer = target_layer

        # save values of activations and gradients in target_layer
        self.values = SaveValues(self.target_layer)

    def forward(self, x, idx=None):
        """
        Args:
            x: input image. shape =>(1, 3, H, W)
        Return:
            heatmap: class activation mappings of the predicted class
        """

        # object classification
        score = self.model(x)

        prob = f.softmax(score, dim=1)

        if idx is None:
            prob, idx = torch.max(prob, dim=1)
            idx = idx.item()
            prob = prob.item()
            # print("predicted class ids {}\t probability {}".format(idx, prob))

        # cam can be calculated from the weights of linear layer and activations
        weight_fc = list(
            self.model._modules.get('fc').parameters())[0].to('cpu').data

        cam = self.getCAM(self.values, weight_fc, idx)

        return cam, idx

    def __call__(self, x):
        return self.forward(x)

    def getCAM(self, values, weight_fc, idx):
        '''
        values: the activations and gradients of target_layer
            activations: feature map before GAP.  shape => (1, C, H, W)
        weight_fc: the weight of fully connected layer.  shape => (num_classes, C)
        idx: predicted class id
        cam: class activation map.  shape => (1, num_classes, H, W)
        '''

        cam = f.conv2d(values.activations, weight=weight_fc[:, :, None, None])
        _, _, h, w = cam.shape

        # class activation mapping only for the predicted class
        # cam is normalized with min-max.
        cam = cam[:, idx, :, :]
        cam -= torch.min(cam)
        cam /= torch.max(cam)
        cam = cam.view(1, 1, h, w)

        return cam.data



class GradCAM(CAM):
    """ Grad CAM """

    def __init__(self, model_dict):
        model = model_dict['arch'].cuda()
        model_type = model_dict['type']
        self.model_arch = model_dict['arch']
        self.predicted_confidence_cam = 0
        self.predicted_confidence_cam_list = []
        self.avg_drop = 0
        self.avg_increase = 0

        layer_name = model_dict['layer_name']
        if 'vgg' in model_type.lower():
            target_layer = find_vgg_layer(self.model_arch, layer_name)
        elif 'resnet' in model_type.lower():
            target_layer = find_resnet_layer(self.model_arch, layer_name)
        elif 'densenet' in model_type.lower():
            target_layer = find_densenet_layer(self.model_arch, layer_name)
        elif 'alexnet' in model_type.lower():
            target_layer = find_alexnet_layer(self.model_arch, layer_name)
        elif 'squeezenet' in model_type.lower():
            target_layer = find_squeezenet_layer(self.model_arch, layer_name)
        elif 'googlenet' in model_type.lower():
            target_layer = find_googlenet_layer(self.model_arch, layer_name)
        elif 'shufflenet' in model_type.lower():
            target_layer = find_shufflenet_layer(self.model_arch, layer_name)
        elif 'mobilenet' in model_type.lower():
            target_layer = find_mobilenet_layer(self.model_arch, layer_name)
        else:
            self.target_layer = find_layer(self.model_arch, layer_name)
        super().__init__(model, target_layer)

        """
        Args:
            model: a base model to get CAM, which need not have global pooling and fully connected layer.
            target_layer: conv_layer you want to visualize
        """

    def forward(self, x, idx=None):
        """
        Args:
            x: input image. shape =>(1, 3, H, W)
            idx: ground truth index => (1, C)
        Return:
            heatmap: class activation mappings of the predicted class
        """

        # anomaly detection
        _,_,h,w = x.size()

        score = self.model(x)
#         print(f'Input Size: {x.size()}')
        prob = f.softmax(score, dim=1)

        if idx is None:
            prob, idx = torch.max(prob, dim=1)
            idx = idx.item()
            prob = prob.item()
#             print("predicted class ids {}\t probability {}".format(idx, prob))

        # calculate cam of the predicted class
        cam = self.getGradCAM(self.values, score, idx) # Size of CAM: torch.Size([1, 1, 12, 16])
        """
        saliency_map = torch.unsqueeze(activations[:, i, :, :], 1)
        saliency_map = f.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False)
        """
        if cam == None:
          return None
        saliency_map = f.interpolate(cam,size=(h,w), mode='bilinear', align_corners=False)
        return saliency_map

    def __call__(self, x):
        return self.forward(x)

    def getGradCAM(self, values, score, idx):
        '''
        values: the activations and gradients of target_layer
            activations: feature map before GAP.  shape => (1, C, H, W)
        score: the output of the model before softmax
        idx: predicted class id
        cam: class activation map.  shape=> (1, 1, H, W)
        '''

        self.model.zero_grad()

        # score[0, idx].backward(retain_graph=True)
        score[0, idx].backward(retain_graph=False)

        activations = values.activations
        gradients = values.gradients
        n, c, _, _ = gradients.shape
        alpha = gradients.view(n, c, -1).mean(2)
        alpha = alpha.view(n, c, 1, 1)

        # shape => (1, 1, H', W')
        cam = (alpha * activations).sum(dim=1, keepdim=True)
        cam = f.relu(cam)
        if cam.any() == None:
          return None

        if torch.min(cam) == torch.max(cam):
          return None
        cam -= torch.min(cam)
        cam /= torch.max(cam)
        return cam.data

    def metrics(self,model,input_):

        predicted_confidence, predicted_class = model(input_).max(1)
        predicted_class = predicted_class.item()
        predicted_confidence = predicted_confidence.item()

        scorecam_map = self.forward(input_)
        if scorecam_map == None:
            self.predicted_confidence_cam = 0
        else:
            scorecam_map_  = scorecam_map * input_
            scorecam_map_output = model(scorecam_map_)
            predicted_confidence_cam = scorecam_map_output[0][predicted_class]
            self.predicted_confidence_cam = predicted_confidence_cam.item()

        self.predicted_confidence_cam_list.append(self.predicted_confidence_cam)
        if predicted_confidence > self.predicted_confidence_cam:
          self.avg_drop = self.avg_drop + (predicted_confidence - self.predicted_confidence_cam)/predicted_confidence
        else:
          self.avg_increase = self.avg_increase + 1

    def percentize(self, count):
        self.avg_drop = self.avg_drop*100/count
        self.avg_increase = self.avg_increase*100/count

class GradCAMpp(CAM):
    """ Grad CAM plus plus """

    def __init__(self, model_dict):
        model = model_dict['arch'].cuda()
        model_type = model_dict['type']
        self.model_arch = model_dict['arch']
        self.predicted_confidence_cam_list = []
        self.predicted_confidence_cam = 0
        self.avg_drop = 0
        self.avg_increase = 0
        layer_name = model_dict['layer_name']
        if 'vgg' in model_type.lower():
            target_layer = find_vgg_layer(self.model_arch, layer_name)
        elif 'resnet' in model_type.lower():
            target_layer = find_resnet_layer(self.model_arch, layer_name)
        elif 'densenet' in model_type.lower():
            target_layer = find_densenet_layer(self.model_arch, layer_name)
        elif 'alexnet' in model_type.lower():
            target_layer = find_alexnet_layer(self.model_arch, layer_name)
        elif 'squeezenet' in model_type.lower():
            target_layer = find_squeezenet_layer(self.model_arch, layer_name)
        elif 'googlenet' in model_type.lower():
            target_layer = find_googlenet_layer(self.model_arch, layer_name)
        elif 'shufflenet' in model_type.lower():
            target_layer = find_shufflenet_layer(self.model_arch, layer_name)
        elif 'mobilenet' in model_type.lower():
            target_layer = find_mobilenet_layer(self.model_arch, layer_name)
        else:
            self.target_layer = find_layer(self.model_arch, layer_name)
        super().__init__(model, target_layer)
        """
        Args:
            model: a base model
            target_layer: conv_layer you want to visualize
        """

    def forward(self, x, idx=None):
        """
        Args:
            x: input image. shape =>(1, 3, H, W)
        Return:
            heatmap: class activation mappings of predicted classes
        """

        _, _, h, w = x.size()

        # object classification
        score = self.model(x)

        prob = f.softmax(score, dim=1)

        if idx is None:
            prob, idx = torch.max(prob, dim=1)
            idx = idx.item()
            prob = prob.item()
#             print("predicted class ids {}\t probability {}".format(idx, prob))

        # caluculate cam of the predicted class
        cam = self.getGradCAMpp(self.values, score, idx)
        if cam == None:
          return None
        saliency_map = f.interpolate(cam,size=(h,w), mode='bilinear', align_corners=False)
        return saliency_map
#         return cam, idx

    def __call__(self, x):
        return self.forward(x)

    def getGradCAMpp(self, values, score, idx):
        '''
        values: the activations and gradients of target_layer
            activations: feature map before GAP.  shape => (1, C, H, W)
        score: the output of the model before softmax. shape => (1, n_classes)
        idx: predicted class id
        cam: class activation map.  shape=> (1, 1, H, W)
        '''
        # print(("Printing from GradCAMpp->getGradCAMpp function:").upper())
        self.model.zero_grad()

        # score[0, idx].backward(retain_graph=True)
        score[0, idx].backward(retain_graph=False)

        activations = values.activations
        gradients = values.gradients
        n, c, _, _ = gradients.shape

        # calculate alpha
        numerator = gradients.pow(2)
        denominator = 2 * gradients.pow(2)
        ag = activations * gradients.pow(3)
        denominator += ag.view(n, c, -1).sum(-1, keepdim=True).view(n, c, 1, 1)
        denominator = torch.where(
            denominator != 0.0, denominator, torch.ones_like(denominator))
        alpha = numerator / (denominator + 1e-7)

        relu_grad = f.relu(score[0, idx].exp() * gradients)
        # print(f'Alpha is: {alpha}')
        # print(f'Gradients are: {gradients}')
        # print(f'Relu_grad are: {relu_grad}')
        # print(f'Score exp is: {score[0,idx].exp()}')
        weights = (alpha * relu_grad).view(n, c, -1).sum(-1).view(n, c, 1, 1)
        # print(f"Weights are: {weights}")
        # shape => (1, 1, H', W')
        cam = (weights * activations).sum(1, keepdim=True)
        cam = f.relu(cam)
        # print(cam)
        # print(f'Max and min of cam from GradCAMpp are: {torch.max(cam)} and {torch.min(cam)}')
        if cam.any() == None:
          return None
        if torch.min(cam) == torch.max(cam):
          return None
        cam -= torch.min(cam)
        cam /= torch.max(cam)

        return cam.data

    def metrics(self,model,input_):

        predicted_confidence, predicted_class = model(input_).max(1)
        predicted_class = predicted_class.item()
        predicted_confidence = predicted_confidence.item()

        scorecam_map = self.forward(input_)
        if scorecam_map == None:
            self.predicted_confidence_cam = 0
        else:
            scorecam_map_  = scorecam_map * input_
            scorecam_map_output = model(scorecam_map_)
            predicted_confidence_cam = scorecam_map_output[0][predicted_class]
            self.predicted_confidence_cam = predicted_confidence_cam.item()

        self.predicted_confidence_cam_list.append(self.predicted_confidence_cam)
        if predicted_confidence > self.predicted_confidence_cam:
          self.avg_drop = self.avg_drop + (predicted_confidence - self.predicted_confidence_cam)/predicted_confidence
        else:
          self.avg_increase = self.avg_increase + 1

    def percentize(self, count):
        self.avg_drop = self.avg_drop*100/count
        self.avg_increase = self.avg_increase*100/count

        
class aug_GradCAMpp(CAM):
    """ Grad CAM plus plus """

    def __init__(self, model_dict):
        model = model_dict['arch'].cuda()
        model_type = model_dict['type']
        self.model_arch = model_dict['arch']
        self.predicted_confidence_cam_list = []
        self.predicted_confidence_cam = 0
        self.avg_drop = 0
        self.avg_increase = 0
        layer_name = model_dict['layer_name']
        if 'vgg' in model_type.lower():
            target_layer = find_vgg_layer(self.model_arch, layer_name)
        elif 'resnet' in model_type.lower():
            target_layer = find_resnet_layer(self.model_arch, layer_name)
        elif 'densenet' in model_type.lower():
            target_layer = find_densenet_layer(self.model_arch, layer_name)
        elif 'alexnet' in model_type.lower():
            target_layer = find_alexnet_layer(self.model_arch, layer_name)
        elif 'squeezenet' in model_type.lower():
            target_layer = find_squeezenet_layer(self.model_arch, layer_name)
        elif 'googlenet' in model_type.lower():
            target_layer = find_googlenet_layer(self.model_arch, layer_name)
        elif 'shufflenet' in model_type.lower():
            target_layer = find_shufflenet_layer(self.model_arch, layer_name)
        elif 'mobilenet' in model_type.lower():
            target_layer = find_mobilenet_layer(self.model_arch, layer_name)
        else:
            self.target_layer = find_layer(self.model_arch, layer_name)
        super().__init__(model, target_layer)
        """
        Args:
            model: a base model
            target_layer: conv_layer you want to visualize
        """

    def forward(self, x, r=100,idx=None):
        """
        Args:
            x: input image. shape =>(1, 3, H, W)
        Return:
            heatmap: class activation mappings of predicted classes
        """

        _, _, h, w = x.size()
        score = self.model(x)

        prob = f.softmax(score, dim=1)

        if idx is None:
            prob, idx = torch.max(prob, dim=1)
            idx = idx.item()
            prob = prob.item()
        # caluculate cam of the predicted class
        transform = transforms.Compose([
            transforms.RandomAffine(degrees=45, translate=(0.2, 0.2)),
        ])
        
        cam = self.getGradCAMpp(self.values, score, idx)
        # object classification
        for idx in range(r):
            x_ = transform(x)
            score = self.model(x_)
            prob = f.softmax(score, dim=1)
            if idx is None:
                prob, idx = torch.max(prob, dim=1)
                idx = idx.item()
                prob = prob.item()
            # caluculate cam of the predicted class
            cam_ = self.getGradCAMpp(self.values, score, idx)
            if cam_ is None:
                continue
            else:
                cam = cam + cam_
                
        if cam == None:
          return None
    
        saliency_map = f.interpolate(cam,size=(h,w), mode='bilinear', align_corners=False)
        saliency_map = (saliency_map - saliency_map.min())/(saliency_map.max()-saliency_map.min())
        return saliency_map


    def __call__(self, x):
        return self.forward(x)

    def getGradCAMpp(self, values, score, idx):
        '''
        values: the activations and gradients of target_layer
            activations: feature map before GAP.  shape => (1, C, H, W)
        score: the output of the model before softmax. shape => (1, n_classes)
        idx: predicted class id
        cam: class activation map.  shape=> (1, 1, H, W)
        '''
        # print(("Printing from GradCAMpp->getGradCAMpp function:").upper())
        self.model.zero_grad()

        # score[0, idx].backward(retain_graph=True)
        score[0, idx].backward(retain_graph=False)

        activations = values.activations
        gradients = values.gradients
        n, c, _, _ = gradients.shape

        # calculate alpha
        numerator = gradients.pow(2)
        denominator = 2 * gradients.pow(2)
        ag = activations * gradients.pow(3)
        denominator += ag.view(n, c, -1).sum(-1, keepdim=True).view(n, c, 1, 1)
        denominator = torch.where(
            denominator != 0.0, denominator, torch.ones_like(denominator))
        alpha = numerator / (denominator + 1e-7)
        relu_grad = f.relu(score[0, idx].exp() * gradients)  
        weights = (alpha * relu_grad).view(n, c, -1).sum(-1).view(n, c, 1, 1)
        cam = (weights * activations).sum(1, keepdim=True)
        cam = f.relu(cam)
        if cam.any() == None:
          return None
        if torch.min(cam) == torch.max(cam):
          return None
        cam -= torch.min(cam)
        cam /= torch.max(cam)

        return cam.data

    def metrics(self,model,input_):

        predicted_confidence, predicted_class = model(input_).max(1)
        predicted_class = predicted_class.item()
        predicted_confidence = predicted_confidence.item()

        scorecam_map = self.forward(input_)
        if scorecam_map == None:
            self.predicted_confidence_cam = 0
        else:
            scorecam_map_  = scorecam_map * input_
            scorecam_map_output = model(scorecam_map_)
            predicted_confidence_cam = scorecam_map_output[0][predicted_class]
            self.predicted_confidence_cam = predicted_confidence_cam.item()

        self.predicted_confidence_cam_list.append(self.predicted_confidence_cam)
        if predicted_confidence > self.predicted_confidence_cam:
          self.avg_drop = self.avg_drop + (predicted_confidence - self.predicted_confidence_cam)/predicted_confidence
        else:
          self.avg_increase = self.avg_increase + 1

    def percentize(self, count):
        self.avg_drop = self.avg_drop*100/count
        self.avg_increase = self.avg_increase*100/count
        
        
class SmoothGradCAMpp(CAM):
    """ Smooth Grad CAM plus plus """

    def __init__(self, model_dict, n_samples=5, stdev_spread=0.15):
        model = model_dict['arch'].cuda()
        model_type = model_dict['type']
        self.model_arch = model_dict['arch']
        self.predicted_confidence_cam_list = []
        self.predicted_confidence_cam = 0
        self.avg_drop = 0
        self.avg_increase = 0

        layer_name = model_dict['layer_name']
        if 'vgg' in model_type.lower():
            target_layer = find_vgg_layer(self.model_arch, layer_name)
        elif 'resnet' in model_type.lower():
            target_layer = find_resnet_layer(self.model_arch, layer_name)
        elif 'densenet' in model_type.lower():
            target_layer = find_densenet_layer(self.model_arch, layer_name)
        elif 'alexnet' in model_type.lower():
            target_layer = find_alexnet_layer(self.model_arch, layer_name)
        elif 'squeezenet' in model_type.lower():
            target_layer = find_squeezenet_layer(self.model_arch, layer_name)
        elif 'googlenet' in model_type.lower():
            target_layer = find_googlenet_layer(self.model_arch, layer_name)
        elif 'shufflenet' in model_type.lower():
            target_layer = find_shufflenet_layer(self.model_arch, layer_name)
        elif 'mobilenet' in model_type.lower():
            target_layer = find_mobilenet_layer(self.model_arch, layer_name)
        else:
            self.target_layer = find_layer(self.model_arch, layer_name)
        super().__init__(model, target_layer)
        """
        Args:
            model: a base model
            target_layer: conv_layer you want to visualize
            n_sample: the number of samples
            stdev_spread: standard deviationß
        """

        self.n_samples = n_samples
        self.stdev_spread = stdev_spread

    def forward(self, x, idx=None):
        """
        Args:
            x: input image. shape =>(1, 3, H, W)
        Return:
            heatmap: class activation mappings of predicted classes
        """

        _, _, h, w = x.size()
        stdev = self.stdev_spread / (x.max() - x.min())
        std_tensor = torch.ones_like(x) * stdev

        indices = []
        probs = []

        for i in range(self.n_samples):
            self.model.zero_grad()

            x_with_noise = torch.normal(mean=x, std=std_tensor)
            x_with_noise.requires_grad_()

            score = self.model(x_with_noise)

            prob = f.softmax(score, dim=1)

            if idx is None:
                prob, idx = torch.max(prob, dim=1)
                idx = idx.item()
                probs.append(prob.item())

            indices.append(idx)

            score[0, idx].backward(retain_graph=True)
            if i == self.n_samples-1:
#                 print('Limit Accessed')
                score[0, idx].backward(retain_graph=False)

            activations = self.values.activations
            gradients = self.values.gradients
            n, c, _, _ = gradients.shape

            # calculate alpha
            numerator = gradients.pow(2)
            denominator = 2 * gradients.pow(2)
            ag = activations * gradients.pow(3)
            denominator += \
                ag.view(n, c, -1).sum(-1, keepdim=True).view(n, c, 1, 1)
            denominator = torch.where(
                denominator != 0.0, denominator, torch.ones_like(denominator))
            alpha = numerator / (denominator + 1e-7)

            relu_grad = f.relu(score[0, idx].exp() * gradients)
            weights = \
                (alpha * relu_grad).view(n, c, -1).sum(-1).view(n, c, 1, 1)

            # shape => (1, 1, H', W')
            cam = (weights * activations).sum(1, keepdim=True)
            cam = f.relu(cam)
            cam -= torch.min(cam)
            cam /= torch.max(cam)

            if i == 0:
                total_cams = cam.clone()
            else:
                total_cams += cam

        total_cams /= self.n_samples
        idx = mode(indices)
        prob = mean(probs)
        if total_cams.any() == None:
          return None
        if total_cams.max() == total_cams.min():
          return None
#         print("predicted class ids {}\t probability {}".format(idx, prob))
        saliency_map = f.interpolate(total_cams,size=(h,w), mode='bilinear', align_corners=False)
        return saliency_map
#         return total_cams.data, idx

    def __call__(self, x):
        return self.forward(x)

    def metrics(self,model,input_):

        predicted_confidence, predicted_class = model(input_).max(1)
        predicted_class = predicted_class.item()
        predicted_confidence = predicted_confidence.item()

        scorecam_map = self.forward(input_)
        if scorecam_map == None:
            self.predicted_confidence_cam = 0
        else:
            scorecam_map_  = scorecam_map * input_
            scorecam_map_output = model(scorecam_map_)
            predicted_confidence_cam = scorecam_map_output[0][predicted_class]
            self.predicted_confidence_cam = predicted_confidence_cam.item()

        self.predicted_confidence_cam_list.append(self.predicted_confidence_cam)
        if predicted_confidence > self.predicted_confidence_cam:
          self.avg_drop = self.avg_drop + (predicted_confidence - self.predicted_confidence_cam)/predicted_confidence
        else:
          self.avg_increase = self.avg_increase + 1

    def percentize(self, count):
        self.avg_drop = self.avg_drop*100/count
        self.avg_increase = self.avg_increase*100/count


# X GradCAM Implementation:

In [28]:
import argparse
import cv2
import numpy as np
import torch
from torch.autograd import Function
from torchvision import models

class _BaseCAM(object):
    """ Base class for Class activation mapping.

        : Args
            - **model_dict -** : Dict. Has format as dict(type='vgg', arch=torchvision.models.vgg16(pretrained=True),
            layer_name='features',input_size=(224, 224)).

    """

    def __init__(self, model_dict):
        model_type = model_dict['type']
        layer_name = model_dict['layer_name']

        self.model_arch = model_dict['arch']
        self.model_arch.eval()
        if torch.cuda.is_available():
          self.model_arch.cuda()
        self.gradients = dict()
        self.activations = dict()

        def backward_hook(module, grad_input, grad_output):
            if torch.cuda.is_available():
              self.gradients['value'] = grad_output[0].cuda()
            else:
              self.gradients['value'] = grad_output[0]
            return None

        def forward_hook(module, input, output):
            if torch.cuda.is_available():
              self.activations['value'] = output.cuda()
            else:
              self.activations['value'] = output
            return None

        if 'vgg' in model_type.lower():
            self.target_layer = find_vgg_layer(self.model_arch, layer_name)
        elif 'resnet' in model_type.lower():
            self.target_layer = find_resnet_layer(self.model_arch, layer_name)
        elif 'densenet' in model_type.lower():
            self.target_layer = find_densenet_layer(self.model_arch, layer_name)
        elif 'alexnet' in model_type.lower():
            self.target_layer = find_alexnet_layer(self.model_arch, layer_name)
        elif 'squeezenet' in model_type.lower():
            self.target_layer = find_squeezenet_layer(self.model_arch, layer_name)
        elif 'googlenet' in model_type.lower():
            self.target_layer = find_googlenet_layer(self.model_arch, layer_name)
        elif 'shufflenet' in model_type.lower():
            self.target_layer = find_shufflenet_layer(self.model_arch, layer_name)
        elif 'mobilenet' in model_type.lower():
            self.target_layer = find_mobilenet_layer(self.model_arch, layer_name)
        else:
            self.target_layer = find_layer(self.model_arch, layer_name)

        self.target_layer.register_forward_hook(forward_hook)
        self.target_layer.register_backward_hook(backward_hook)

    def forward(self, input, class_idx=None, retain_graph=True):
        return None

    def __call__(self, input, class_idx=None, retain_graph=True):
        return self.forward(input, class_idx, retain_graph)

    def get_output(self, x):
      for name, module in self.model_arch._modules.items():
            x = module(x)
      return x

# class ModelOutputs(_BaseCAM):
#     """ Class for making a forward pass, and getting:
#     1. The network output.
#     2. Activations from intermeddiate targetted layers.
#     3. Gradients from intermeddiate targetted layers. """

#     def __init__(self, model_dict):
#         super().__init__(model_dict)
#         self.model = model_dict['arch']


#         # self.feature_extractor = _BaseCAM(model_dict)

#     def get_gradients(self):
#         return self.gradients['value']

#     def __call__(self, x):
#         print(f'Self activations: {self.activations}')

#         target_activations = self.activations['value']
#         output = self.get_output(x)
#         output = output.view(output.size(0), -1)
#         output = self.model.classifier(output)
#         return target_activations, output



class XGradCam(_BaseCAM):
    def __init__(self, model_dict,use_cuda=True):
        super().__init__(model_dict)
        self.model = model_dict['arch']
        self.model.eval()
        self.cuda = use_cuda
        self.predicted_confidence_cam_list = []
        self.predicted_confidence_cam = 0
        self.avg_drop = 0
        self.avg_increase = 0
        if self.cuda:
            self.model = self.model.cuda()

        # self.extractor = ModelOutputs(model_dict)

    def forward(self, input):
        return self.model(input)

    def __call__(self, input, index=-1):
        output = self.model(input)
        # for name, module in self.model._modules.items():
        #     output = module(output)

        # output = output.view(output.size(0), -1)
        # output = self.model.classifier(output)
        # output.backward()
        features = self.activations['value']
        if self.cuda:
            features = features.cuda()
            output = output.cuda()


        if index == -1:
            index = np.argmax(output.cpu().data.numpy())

        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
        one_hot[0][index] = 1
        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
        if self.cuda:
            one_hot = torch.sum(one_hot.cuda() * output)
        else:
            one_hot = torch.sum(one_hot * output)

        # self.model.features.zero_grad()
        # self.model.classifier.zero_grad()
        self.model.zero_grad()
        one_hot.backward(retain_graph=True)

        grads_val = self.gradients['value'].cpu().data.numpy()

        target = features
        target = target.cpu().data.numpy()[0, :]

        # XGrad_CAM
        X_weights = np.sum(grads_val[0, :] * target, axis=(1, 2))
        X_weights = X_weights / (np.sum(target, axis=(1, 2)) + 1e-6)
        # Grad_CAM
        weights = np.mean(grads_val, axis=(2, 3))[0, :]

        X_cam = np.zeros(target.shape[1:], dtype=np.float32)
        cam = np.zeros(target.shape[1:], dtype=np.float32)

        for i, w in enumerate(weights):
            cam += w * target[i, :, :]
            X_cam += X_weights[i] * target[i, :, :]

        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, (224, 224))
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        X_cam = np.maximum(X_cam, 0)
        X_cam = cv2.resize(X_cam, (224, 224))
        if X_cam.any() == None:
          return None
        if np.min(X_cam) == np.max(X_cam):
          return None
        X_cam = X_cam - np.min(X_cam)
        X_cam = X_cam / np.max(X_cam)
        return  X_cam

    def metrics(self,model,input_):

        predicted_confidence, predicted_class = model(input_).max(1)
        predicted_class = predicted_class.item()
        predicted_confidence = predicted_confidence.item()

        scorecam_map = self(input_)
        # print(f'Saliency map size: {scorecam_map.size()}')
        if scorecam_map.any() == None:
            self.predicted_confidence_cam = 0
        else:
            # print(f'Input size: {input_.size()}')
            scorecam_map =  torch.from_numpy(scorecam_map).cuda()
            scorecam_map_  = scorecam_map * input_
            scorecam_map_output = model(scorecam_map_)
            predicted_confidence_cam = scorecam_map_output[0][predicted_class]
            self.predicted_confidence_cam = predicted_confidence_cam.item()

        self.predicted_confidence_cam_list.append(self.predicted_confidence_cam)
        if predicted_confidence > self.predicted_confidence_cam:
          self.avg_drop = self.avg_drop + (predicted_confidence - self.predicted_confidence_cam)/predicted_confidence
        else:
          self.avg_increase = self.avg_increase + 1

    def percentize(self, count):
        self.avg_drop = self.avg_drop*100/count
        self.avg_increase = self.avg_increase*100/count


In [29]:
print('ll')

import os
from PIL import Image
from torch.utils.data import Dataset
import numpy as np
import torchvision.transforms.functional as F
import torchvision.transforms as transforms


class Data(Dataset):
    #image_dir will contain the path to imagenet-mini->val directory->The directory with folders for different classes and images in them
    def __init__(self, image_dir = '/kaggle/input/imagenetmini-1000/imagenet-mini/val'):
        self.image_dir = image_dir
        self.images = []
        for item in os.listdir(image_dir):
            path = os.path.join(image_dir+'/'+item)
            self.images_ = os.listdir(path)
            for images in self.images_:
                path_ = os.path.join(path+'/'+images)
                self.images.append(path_)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        img_path = os.path.join(self.image_dir, self.images[index])
        image = load_image(img_path)
        # print(f'From Data: {image.shape}')
        image = apply_transforms(image)
        return image

ll


**Testing the CAMs**

In [30]:
# from torch.utils.data import DataLoader
# import torchvision.models as models
# from tqdm import tqdm
# import pandas as pd
# import warnings

# warnings.filterwarnings('ignore')

# #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# #VGG
# vgg = models.vgg19(pretrained=True).eval()
# vgg_model_dict = dict(type='vgg16', arch=vgg, layer_name='features_29',input_size=(224, 224))
# vgg_xgradcam = XGradCam(vgg_model_dict)
# #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# def metrics(dataloader):

#   loop = tqdm(dataloader)
# #   loop = dataloader   #for when I don't need tqdm
#   count = 0

#   for idx, data in enumerate(loop):
#     input_ = data
#     input_ = input_.squeeze()
#     input_ = input_.unsqueeze(0)

#     if torch.cuda.is_available():
#       input_ = input_.cuda()
#     count += 1
#     print(f'Input Size:{input_.size()}')
#     saliency_map = vgg_xgradcam(input_)
#     saliency_map =  torch.from_numpy(saliency_map)
#     print(f'Saliency Map: {saliency_map.shape}')
#     e_map = saliency_map * input_.cpu()
#     print(f'E-Map Size: {e_map.size()}')
#     if count == 1:
#         break




# val_ds = Data()
# val_loader = DataLoader(
#       val_ds,
#       batch_size=1,
#       shuffle=True,
#   )

# metrics(val_loader)

In [31]:
# from torch.utils.data import DataLoader
# import torchvision.models as models
# from tqdm import tqdm
# import pandas as pd
# import warnings

# warnings.filterwarnings('ignore')

# #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# #VGG
# vgg = models.vgg19(pretrained=True).eval()
# vgg_model_dict = dict(type='vgg16', arch=vgg, layer_name='features_29',input_size=(224, 224))


# vgg_scorecam_5 = ScoreCAM_x(vgg_model_dict, threshold=0.5)

# #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# #ResNet
# resnet = models.resnet18(pretrained=True).eval()
# resnet_model_dict = dict(type='resnet18', arch=resnet, layer_name='layer4',input_size=(224, 224))


# resnet_scorecam_5 = ScoreCAM_x(resnet_model_dict, threshold=0.5)

# #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# vgg_d = {'scorecam_0': 0,'scorecam_5': 0,'scorecam_6': 0,'scorecam_7': 0, 'scorecam_8': 0 }
# resnet_d = {'scorecam_0':0,'scorecam_5': 0,'scorecam_6': 0,'scorecam_7': 0, 'scorecam_8': 0 }
# #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# def metrics(dataloader):

#   loop = tqdm(dataloader)
# #   loop = dataloader   #for when I don't need tqdm
#   count = 0

#   for idx, data in enumerate(loop):
#     input_ = data
#     input_ = input_.squeeze()
#     input_ = input_.unsqueeze(0)

#     if torch.cuda.is_available():
#       input_ = input_.cuda()
#     count += 1

#     #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

#     vgg_scorecam_5.metrics(vgg,input_)

#     #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
#     vgg_d['scorecam_0'] = vgg_scorecam_0.predicted_confidence_cam
#     vgg_d['scorecam_5'] = vgg_scorecam_5.predicted_confidence_cam
#     vgg_d['scorecam_6'] = vgg_scorecam_6.predicted_confidence_cam
#     vgg_d['scorecam_7'] = vgg_scorecam_7.predicted_confidence_cam
#     vgg_d['scorecam_8'] = vgg_scorecam_8.predicted_confidence_cam
#     #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
#     v = list(vgg_d.values())
#     k = list(vgg_d.keys())
#     n = k[v.index(max(v))]
#     #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
#     if n == 'scorecam_0':
#        vgg_scorecam_0.win += 1
#     elif n == 'scorecam_5':
#        vgg_scorecam_5.win += 1
#     elif n == 'scorecam_6':
#        vgg_scorecam_6.win += 1
#     elif n == 'scorecam_7':
#        vgg_scorecam_7.win += 1
#     else:
#        vgg_scorecam_8.win += 1
#     #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
#     resnet_scorecam_0.metrics(resnet, input_)
#     resnet_scorecam_5.metrics(resnet, input_)
#     resnet_scorecam_6.metrics(resnet, input_)
#     resnet_scorecam_7.metrics(resnet, input_)
#     resnet_scorecam_8.metrics(resnet, input_)
#     #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
#     resnet_d['scorecam_0'] = vgg_scorecam_0.predicted_confidence_cam
# #     resnet_d['scorecam_3'] = vgg_scorecam_3.predicted_confidence_cam
# #     resnet_d['scorecam_4'] = vgg_scorecam_4.predicted_confidence_cam
#     resnet_d['scorecam_5'] = vgg_scorecam_5.predicted_confidence_cam
#     resnet_d['scorecam_6'] = vgg_scorecam_6.predicted_confidence_cam
#     resnet_d['scorecam_7'] = vgg_scorecam_7.predicted_confidence_cam
#     resnet_d['scorecam_8'] = vgg_scorecam_8.predicted_confidence_cam
#     #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
#     v = list(resnet_d.values())
#     k = list(resnet_d.keys())
#     n = k[v.index(max(v))]
#     #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
#     if n == 'scorecam_0':
#        resnet_scorecam_0.win += 1
#     elif n == 'scorecam_5':
#        resnet_scorecam_5.win += 1
#     elif n == 'scorecam_6':
#        resnet_scorecam_6.win += 1
#     elif n == 'scorecam_7':
#        resnet_scorecam_7.win += 1
#     else:
#        resnet_scorecam_8.win += 1
#     if count == 10:
#         break



#   vgg_scorecam_0.percentize(count)
#   vgg_scorecam_5.percentize(count)
#   vgg_scorecam_6.percentize(count)
#   vgg_scorecam_7.percentize(count)
#   vgg_scorecam_8.percentize(count)

#   resnet_scorecam_0.percentize(count)
#   resnet_scorecam_5.percentize(count)
#   resnet_scorecam_6.percentize(count)
#   resnet_scorecam_7.percentize(count)
#   resnet_scorecam_8.percentize(count)

# val_ds = Data()
# val_loader = DataLoader(
#       val_ds,
#       batch_size=1,
#       shuffle=True,
#   )

# metrics(val_loader)


# data = {'Model':['scorecam0','scorecam_5','scorecam_6','scorecam_7','scorecam_8'],
#       'Average Drop':[vgg_scorecam_0.avg_drop,vgg_scorecam_5.avg_drop,vgg_scorecam_6.avg_drop,
#                       vgg_scorecam_7.avg_drop,vgg_scorecam_8.avg_drop],
#       'Average Increase':[vgg_scorecam_0.avg_increase,vgg_scorecam_5.avg_increase,vgg_scorecam_6.avg_increase,
#                           vgg_scorecam_7.avg_increase,vgg_scorecam_8.avg_increase],
#         'Win':[vgg_scorecam_0.win,vgg_scorecam_5.win,vgg_scorecam_6.win,vgg_scorecam_7.win,vgg_scorecam_8.win]}

# df = pd.DataFrame(data)
# print('Vgg details\n')
# print(df)
# # saving the dataframe
# df.to_csv('vgg_scorecam_ablation_study.csv')


# data = {'Model':['scorecam_0','scorecam_5','scorecam_6','scorecam_7','scorecam_8'],
#       'Average Drop':[resnet_scorecam_0.avg_drop,resnet_scorecam_5.avg_drop,
#                       resnet_scorecam_6.avg_drop,resnet_scorecam_7.avg_drop,resnet_scorecam_8.avg_drop],
#       'Average Increase':[resnet_scorecam_0.avg_increase,
#                           resnet_scorecam_5.avg_increase,resnet_scorecam_6.avg_increase,
#                           resnet_scorecam_7.avg_increase,resnet_scorecam_8.avg_increase],
#        'Win':[resnet_scorecam_0.win,resnet_scorecam_5.win,resnet_scorecam_6.win,
#                resnet_scorecam_7.win,resnet_scorecam_8.win]}
# df = pd.DataFrame(data)
# print('\n\nResnet Details\n')
# print(df)
# # saving the dataframe
# df.to_csv('resnet_scorecam_ablation_study.csv')

In [32]:
print('ll')

from torch.utils.data import DataLoader
import torchvision.models as models
from tqdm import tqdm
import pandas as pd
import warnings



warnings.filterwarnings('ignore')
val_ds = Data()
val_loader = DataLoader(
      val_ds,
      batch_size=1,
      shuffle=True,
  )

def metrics(model,cam_model_list=[],dataloader=val_loader):

  loop = tqdm(dataloader)
  # loop = dataloader   #for when I don't need tqdm
  count = 0

  for idx, data in enumerate(loop):
    input_ = data
    input_ = input_.squeeze()
    input_ = input_.unsqueeze(0)
    input = input_.type(torch.float16)
    if torch.cuda.is_available():
      input_ = input_.cuda()
    count += 1
    #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
    for cam_model in cam_model_list:
        cam_model.metrics(model, input_)
    if count == 50:
        break
  
  for cam_model in cam_model_list:
      cam_model.percentize(count)

ll


In [33]:
print('ll')


vgg_avg_drop = {'gradcam':0,'gradcampp':0,'xgradcam':0,'layercam':0,'scorecam':0,'scorecam_x':0,'augpp':0}
vgg_avg_increase = {'gradcam':0,'gradcampp':0,'xgradcam':0,'layercam':0,'scorecam':0,'scorecam_x':0,'augpp':0}
vgg_avg_win = {'gradcam':0,'gradcampp':0,'xgradcam':0,'layercam':0,'scorecam':0,'scorecam_x':0,'augpp':0}
vgg_predicted_confidence = {'gradcam':[],'gradcampp':[],'xgradcam':[],'layercam':[],
                            'scorecam':[],'scorecam_x':[],'augpp':[]}

# #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
#VGG
vgg = models.vgg19(pretrained=True).eval()
vgg_model_dict = dict(type='vgg16', arch=vgg, layer_name='features_36',input_size=(224, 224))

vgg_gradcam = GradCAM(vgg_model_dict)
vgg_gradcampp = GradCAMpp(vgg_model_dict)
vgg_xgradcam = XGradCam(vgg_model_dict)
vgg_layercam = LayerCAM(vgg_model_dict)
vgg_scorecam = ScoreCAM(vgg_model_dict)
vgg_scorecam_x = ScoreCAM_x(vgg_model_dict, isthreshold=True)
# vgg_scorecam_d = ScoreCAM_d(vgg_model_dict)
vgg_augpp = aug_GradCAMpp(vgg_model_dict)

model_list = [vgg_gradcam, vgg_gradcampp, vgg_xgradcam, vgg_layercam, vgg_scorecam, vgg_scorecam_x, vgg_augpp]
# model_list = [vgg_scorecam, vgg_scorecam_x]

metrics(vgg, model_list)

vgg_predicted_confidence['gradcam'] = vgg_gradcam.predicted_confidence_cam_list[:]
vgg_avg_drop['gradcam'] = vgg_gradcam.avg_drop
vgg_avg_increase['gradcam'] = vgg_gradcam.avg_increase
del vgg_gradcam

vgg_predicted_confidence['gradcampp'] = vgg_gradcampp.predicted_confidence_cam_list[:]
vgg_avg_drop['gradcampp'] = vgg_gradcampp.avg_drop
vgg_avg_increase['gradcampp'] = vgg_gradcampp.avg_increase
del vgg_gradcampp

vgg_predicted_confidence['xgradcam'] = vgg_xgradcam.predicted_confidence_cam_list[:]
vgg_avg_drop['xgradcam'] = vgg_xgradcam.avg_drop
vgg_avg_increase['xgradcam'] = vgg_xgradcam.avg_increase
del vgg_xgradcam

vgg_predicted_confidence['layercam'] = vgg_layercam.predicted_confidence_cam_list[:]
vgg_avg_drop['layercam'] = vgg_layercam.avg_drop
vgg_avg_increase['layercam'] = vgg_layercam.avg_increase
del vgg_layercam

vgg_predicted_confidence['scorecam'] = vgg_scorecam.predicted_confidence_cam_list[:]
vgg_avg_drop['scorecam'] = vgg_scorecam.avg_drop
vgg_avg_increase['scorecam'] = vgg_scorecam.avg_increase
del vgg_scorecam

vgg_predicted_confidence['scorecam_x'] = vgg_scorecam_x.predicted_confidence_cam_list[:]
vgg_avg_drop['scorecam_x'] = vgg_scorecam_x.avg_drop
vgg_avg_increase['scorecam_x'] = vgg_scorecam_x.avg_increase
del vgg_scorecam_x

# vgg_predicted_confidence['scorecam_d'] = vgg_scorecam_d.predicted_confidence_cam_list[:]
# vgg_avg_drop['scorecam_d'] = vgg_scorecam_d.avg_drop
# vgg_avg_increase['scorecam_d'] = vgg_scorecam_d.avg_increase
# del vgg_scorecam_d

vgg_predicted_confidence['augpp'] = vgg_augpp.predicted_confidence_cam_list[:]
vgg_avg_drop['augpp'] = vgg_augpp.avg_drop
vgg_avg_increase['augpp'] = vgg_augpp.avg_increase
del vgg_augpp
    
print('Completed Successfully')


#xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# ResNet
resnet = models.resnet18(pretrained=True).eval()
resnet_model_dict = dict(type='resnet18', arch=resnet, layer_name='layer4',input_size=(224, 224))

resnet_avg_drop = {'gradcam':0,'gradcampp':0,'xgradcam':0,'layercam':0,
                   'scorecam':0,'scorecam_x':0,'augpp':0}
resnet_avg_increase = {'gradcam':0,'gradcampp':0,'xgradcam':0,'layercam':0,
                       'scorecam':0,'scorecam_x':0,'augpp':0}
resnet_avg_win = {'gradcam':0,'gradcampp':0,'xgradcam':0,'layercam':0,
                  'scorecam':0,'scorecam_x':0,'augpp':0}
resnet_predicted_confidence = {'gradcam':[],'gradcampp':[],'xgradcam':[],'layercam':[],
                               'scorecam':[],'scorecam_x':[],'augpp':[]}



resnet_gradcam = GradCAM(resnet_model_dict)
resnet_gradcampp = GradCAMpp(resnet_model_dict)
resnet_xgradcam = XGradCam(resnet_model_dict)
resnet_layercam = LayerCAM(resnet_model_dict)
resnet_scorecam = ScoreCAM(resnet_model_dict)
resnet_scorecam_x = ScoreCAM_x(resnet_model_dict, isthreshold=True)
# resnet_scorecam_d = ScoreCAM_d(resnet_model_dict)
resnet_augpp = aug_GradCAMpp(resnet_model_dict)

model_list = [resnet_gradcam,resnet_gradcampp,resnet_xgradcam ,resnet_layercam, resnet_scorecam ,resnet_scorecam_x,
             resnet_augpp]
# model_list = [resnet_scorecam ,resnet_scorecam_x]

metrics(resnet,model_list)

resnet_predicted_confidence['gradcam'] = resnet_gradcam.predicted_confidence_cam_list[:]
resnet_avg_drop['gradcam'] = resnet_gradcam.avg_drop
resnet_avg_increase['gradcam'] = resnet_gradcam.avg_increase
del resnet_gradcam

resnet_predicted_confidence['gradcampp'] = resnet_gradcampp.predicted_confidence_cam_list[:]
resnet_avg_drop['gradcampp'] = resnet_gradcampp.avg_drop
resnet_avg_increase['gradcampp'] = resnet_gradcampp.avg_increase
del resnet_gradcampp

resnet_predicted_confidence['xgradcam'] = resnet_xgradcam.predicted_confidence_cam_list[:]
resnet_avg_drop['xgradcam'] = resnet_xgradcam.avg_drop
resnet_avg_increase['xgradcam'] = resnet_xgradcam.avg_increase
del resnet_xgradcam

resnet_predicted_confidence['layercam'] = resnet_layercam.predicted_confidence_cam_list[:]
resnet_avg_drop['layercam'] = resnet_layercam.avg_drop
resnet_avg_increase['layercam'] = resnet_layercam.avg_increase
del resnet_layercam

resnet_predicted_confidence['scorecam'] = resnet_scorecam.predicted_confidence_cam_list[:]
resnet_avg_drop['scorecam'] = resnet_scorecam.avg_drop
resnet_avg_increase['scorecam'] = resnet_scorecam.avg_increase
del resnet_scorecam

resnet_predicted_confidence['scorecam_x'] = resnet_scorecam_x.predicted_confidence_cam_list[:]
resnet_avg_drop['scorecam_x'] = resnet_scorecam_x.avg_drop
resnet_avg_increase['scorecam_x'] = resnet_scorecam_x.avg_increase
del resnet_scorecam_x

# resnet_predicted_confidence['scorecam_d'] = resnet_scorecam_d.predicted_confidence_cam_list[:]
# resnet_avg_drop['scorecam_d'] = resnet_scorecam_d.avg_drop
# resnet_avg_increase['scorecam_d'] = resnet_scorecam_d.avg_increase
# del resnet_scorecam_d

resnet_predicted_confidence['augpp'] = resnet_augpp.predicted_confidence_cam_list[:]
resnet_avg_drop['augpp'] = resnet_augpp.avg_drop
resnet_avg_increase['augpp'] = resnet_augpp.avg_increase
del resnet_augpp

ll


  1%|          | 49/3923 [06:15<8:14:32,  7.66s/it]


Completed Successfully


  1%|          | 49/3923 [03:11<4:12:47,  3.92s/it]


In [34]:
import pandas as pd

def vgg_win_metrics(names_exclude=[],predicted_confidence_dict=vgg_predicted_confidence, filename='vgg_metrics_scorecam.csv'):
  keys = list(predicted_confidence_dict.keys())
  vgg_avg_win_keys = list(vgg_avg_win.keys())
  num_total = len(predicted_confidence_dict[keys[0]])
#   num_total = 20
  names = ['gradcam','gradcampp','xgradcam','layercam','scorecam',
          'scorecam_x', 'augpp']
#   names = ['scorecam','scorecam_x']
  max_conf = -1000
  max_conf_item = 'k'
    
  for item in vgg_avg_win:
    vgg_avg_win[item] = 0
    
  for idx in range(num_total):
    for index, item in enumerate(names):
       if (item in names_exclude):
          continue
#        print(item)
       if predicted_confidence_dict[item][idx] > max_conf:
          max_conf = predicted_confidence_dict[item][idx]
          max_conf_item = item

    vgg_avg_win[max_conf_item] += 1
    max_conf = -1000
    max_conf_item = ''

  for item in vgg_avg_win:
    vgg_avg_win[item] = vgg_avg_win[item]*100/num_total
    
  average_drop = []
  average_increase = []
  average_win = []
  names_ = []
  for item in names:
    if (item in names_exclude):
        continue
    names_.append(item)
    average_drop.append(vgg_avg_drop[item])
    average_increase.append(vgg_avg_increase[item])
    average_win.append(vgg_avg_win[item])
  
  data = {'name':names_,'Average_drop':average_drop, 'Average_increase':average_increase,'Average_win':average_win}
  df = pd.DataFrame(data)
  df.to_csv(filename)
  print(filename)
  print(df)

def resnet_win_metrics(names_exclude=[],predicted_confidence_dict = resnet_predicted_confidence, filename='resnet_metrics_scorecam.csv'):
  keys = list(predicted_confidence_dict.keys())
  num_keys = len(keys)
  num_total = len(predicted_confidence_dict[keys[0]])
#   num_total = 20
  max_conf = -1000
  max_conf_item  = 'k'
  names = ['gradcam','gradcampp','xgradcam','layercam','scorecam','scorecam_x','augpp']

#   names = ['scorecam','scorecam_x']
    
  for item in resnet_avg_win:
    resnet_avg_win[item] = 0
    
  for idx in range(num_total):
    for index, item in enumerate(names):
         if (item in names_exclude):
            continue
         if predicted_confidence_dict[item][idx] > max_conf:
            max_conf = predicted_confidence_dict[item][idx]
            max_conf_item = item

    resnet_avg_win[max_conf_item] += 1
    max_conf = -1000
    max_conf_item = 'k' 
    
  for item in resnet_avg_win:   
    resnet_avg_win[item] = resnet_avg_win[item]*100/num_total
  
  
  

  names_ = []
  average_drop = []
  average_increase = []
  average_win = []
  for item in names:
    if (item in names_exclude):
        continue
    names_.append(item)
    average_drop.append(resnet_avg_drop[item])
    average_increase.append(resnet_avg_increase[item])
    average_win.append(resnet_avg_win[item])
  
  data = {'name':names_,'Average_drop':average_drop, 'Average_increase':average_increase,'Average_win':average_win}
  df = pd.DataFrame(data)
  df.to_csv(filename)
  print(filename)
  print(df)



In [35]:
# print(vgg_avg_win)
vgg_win_metrics(names_exclude=[])

#xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# print('\n\n\n')

resnet_win_metrics(names_exclude=['sccorecam_true','scorecam_true_x'])


vgg_metrics_scorecam.csv
         name  Average_drop  Average_increase  Average_win
0     gradcam     10.920070              42.0         22.0
1   gradcampp     13.173542              26.0          2.0
2    xgradcam     10.189994              40.0         26.0
3    layercam     12.980695              24.0          0.0
4    scorecam     11.678690              38.0         12.0
5  scorecam_x      8.267951              38.0         30.0
6       augpp     12.158997              30.0          8.0
resnet_metrics_scorecam.csv
         name  Average_drop  Average_increase  Average_win
0     gradcam     10.197578              18.0          6.0
1   gradcampp     10.247979              18.0          0.0
2    xgradcam     10.168956              18.0          8.0
3    layercam     10.223560              18.0          0.0
4    scorecam      7.845512              24.0         10.0
5  scorecam_x      4.358351              46.0         66.0
6       augpp     10.511366              20.0         10.0


In [36]:
# import torchvision.models as models
# resnet = models.resnet18(pretrained=True).eval()
# resnet_model_dict = dict(type='resnet18', arch=resnet, layer_name='layer4',input_size=(224, 224))
# print(resnet)
vgg = models.vgg19(pretrained=True).eval()
print(vgg)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd