## Visualization of CNN: Grad-CAM
* **Objective**: Convolutional Neural Networks are widely used on computer vision. It is powerful for processing grid-like data. However we hardly know how and why it works, due to the lack of decomposability into individually intuitive components. In this assignment, we use Grad-CAM, which highlights the regions of the input image that were important for the neural network prediction.

* **To be submitted by next session**: this notebook, **cleaned** (i.e. without results, for file size reasons: `menu > kernel > restart and clean`), in a state ready to be executed (if one just presses 'Enter' till the end, one should obtain all the results for all images) with a few comments at the end. No additional report, just the notebook!

* NB: if `PIL` is not installed, try `conda install pillow`.


In [22]:
import torch
from torchvision.utils import  save_image
import torch.nn as nn
from torchvision import models, datasets, transforms
import matplotlib.pyplot as plt

import numpy as np
from PIL import Image

%matplotlib inline

![vgg_16.png](https://www.researchgate.net/profile/Bibo_Shi/publication/323440752/figure/fig1/AS:739814685032448@1553396974148/The-architecture-of-VGG-16-model-To-represent-different-depth-levels-convolutional.jpg)

In [120]:
# The downloading process may take a few minutes. 
# load the vgg-16 model trained on Animals10 dataset using transfer learning.
# net = torch.load('animals10_resnet18_V1.pth', map_location=torch.device('cpu')) 
net = torch.load('animals10vgg_V0.pth', map_location=torch.device('cpu')) 



### Grad-CAM 
* **Overview:** Given an image, and a category (‘tiger cat’) as input, we forward-propagate the image through the model to obtain the `raw class scores` before softmax. The gradients are set to zero for all classes except the desired class (tiger cat), which is set to 1. This signal is then backpropagated to the `rectified convolutional feature map` of interest, where we can compute the coarse Grad-CAM localization (blue heatmap).

In [64]:
class FeatureExtractor():
    """ Class for extracting activations and 
    registering gradients from targetted intermediate layers """

    def __init__(self, model, target_layers):
        self.model = model
        self.target_layers = target_layers
        self.gradients = []

    def save_gradient(self, grad):
        self.gradients.append(grad)

    def __call__(self, x):
        outputs = []
        self.gradients = []
        for name, module in self.model._modules.items():
            x = module(x)
            if name in self.target_layers:
                x.register_hook(self.save_gradient)
                outputs += [x]
        return outputs, x

In [65]:
class ModelOutputs():
    """ Class for making a forward pass, and getting:
    __call__
    1. The network output.
    2. Activations from intermeddiate targetted layers.
    get_gradient()
    3. Gradients from intermeddiate targetted layers. """

    def __init__(self, model, target_layers):
        self.model = model
        self.feature_extractor = FeatureExtractor(self.model.features, target_layers)

    def get_gradients(self):
        return self.feature_extractor.gradients

    def __call__(self, x):
        target_activations, output = self.feature_extractor(x)
        output = output.view(output.size(0), -1)
        output = self.model.classifier(output)
        return target_activations, output


In [66]:
class GradCam:
    def __init__(self, model, target_layer_names):
        self.model = model
        self.model.eval()
        self.extractor = ModelOutputs(self.model, target_layer_names)

    def forward(self, input):
        return self.model(input)

    def __call__(self, input, index=None):
        
        features, output = self.extractor(input)
        
        if index == None:
            index = np.argmax(output.cpu().data.numpy())

        # Set all output to be zero except the target class
        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
        one_hot[0][index] = 1
        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
        one_hot = torch.sum(one_hot * output)

        # Reset the gradient and perform backpropagation
        self.model.features.zero_grad()
        self.model.classifier.zero_grad()
        one_hot.backward(retain_graph=True)
        
        # Get the gradients of target layer
        grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy()


        target = features[-1]
        target = target.cpu().data.numpy()[0, :]

        # Calculate the weight of the feature (average of gradient) 
        weights = np.mean(grads_val, axis=(2, 3))[0, :]
        cam = np.zeros(target.shape[1:], dtype=np.float32)

        for i, w in enumerate(weights):
            cam += w * target[i, :, :]

        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, (224, 224))
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        return cam



In [67]:
import cv2
def show_cam_on_image(img, mask, file_name):
    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
    heatmap = np.float32(heatmap) / 255
    cam = heatmap + np.float32(img)
    cam = cam / np.max(cam)
    cv2.imwrite(file_name, np.uint8(255 * cam))


### Input Images and apply Grad-CAM 

In [89]:
right_label = [0, 0, 1, 2, 1, 5, 5, 9]
wrong_label = [5, 6, 6, 1, 0, 3, 9, 5]

In [124]:
import os
# path = './data/ori/'
# path = './data/resnet noise/'
path = './data/vgg/'
listing = os.listdir(path)

grad_cam = GradCam(net, target_layer_names=["25"])

for image_name, target_label in zip(listing, right_label):
    print(image_name)
    im_orig_ori = Image.open(path + image_name)
    im = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])(im_orig_ori)
    im_orig_ori.close()
    im = im[None, :, :, :]

    label = torch.argmax(net.forward(torch.autograd.Variable(im, requires_grad=True)).data).item()
    print (label)
    im = im.requires_grad_(True)
    # calculating the masks
    mask = grad_cam(im, target_label)
    # show cam on the image
    show_cam_on_image(np.transpose(im.detach().squeeze().numpy(), (1, 2, 0)), mask, 'GC_'+image_name)

vgg25_0 (100).bmp
5
vgg25_0 (93).bmp
6
vgg25_1 (47).bmp
6
vgg25_2 (64).bmp
1
vgg25_4 (90).bmp
0
vgg25_5 (20).bmp
3
vgg25_5 (51).bmp
9
vgg25_9 (2).bmp
5


### Reinforce the noise for a better visualization

In [118]:
import os
from PIL import ImageOps, ImageEnhance
# path = './data/ori/'
# path = './data/resnet noise/'
path = './data/vgg noise/'
listing = os.listdir(path)

for image_name, target_label in zip(listing, wrong_label):
    print(image_name)
    im_orig_ori = Image.open(path + image_name)
    inv = ImageOps.invert(im_orig_ori)
    im = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])(inv)
    im = im - im%im[0,0,0]
    im_orig_ori.close()
    save_image(im, image_name)
#     torch im = 

vggdiff_25_1 (47).bmp
vggdiff_25_4 (90).bmp
vggdiff_25_5 (51).bmp


### Resize original image

In [122]:
path = './data/ori/'
listing = os.listdir(path)

for image_name, target_label in zip(listing, wrong_label):
    im_orig_ori = Image.open(path + image_name)
    im = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])(im_orig_ori)
    im_orig_ori.close()
    save_image(im, image_name)
#     torch im = 