In [1]:
import io
from PIL import Image
from torchvision import models, transforms
from torch.autograd import Variable
from torch.nn import functional as F
import numpy as np
import cv2
import json
import os

https://github.com/zhoubolei/CAM/blob/master/pytorch_CAM.py

For a class $c$

$$\text{cam at $(x,y)$} = M_c(x,y) = \sum_k w_k f_k(x,y)$$

$$\text{resulf of GAP for unit $k$} = F_k = \sum_{x,y} f_k(x,y)$$

\begin{align}
S_c & = \sum_k w_k F_k\\
& = \sum_k w_k \sum_{x,y} f_k(x,y)\\
& = \sum_k \sum_{x,y} w_k f_k(x,y)\\
& = \sum_{x,y} \sum_k w_k f_k(x,y)\\
& = \sum_{x,y} M_c(x,y)\\
\end{align}

In [7]:
net = models.resnet18(pretrained=True)
finalconv_name = 'layer4'

net.eval()

# hook the feature extractor
features_blobs = []
def hook_feature(module, input, output):
    features_blobs.append(output.data.cpu().numpy())

net._modules.get(finalconv_name).register_forward_hook(hook_feature)

# get the softmax weight
params = list(net.parameters())
weight_softmax = np.squeeze(params[-2].data.numpy())

def returnCAM(feature_conv, weight_softmax, class_idx):
    # generate the class activation maps upsample to 256x256
    size_upsample = (256, 256)
    # batch?, 
    # nc = n in paper with unit k = 0, ..., n-1 
    # h = height with x = 0, ..., h-1 
    # w = width with y = 0, ..., w-1
    bz, nc, h, w = feature_conv.shape
    output_cam = []
    for idx in class_idx:
        # cam.shape = (1, 512) @_shape (512, 49) = (1, 49)
        cam = weight_softmax[idx].dot(feature_conv.reshape((nc, h*w)))
        cam = cam.reshape(h, w) # (7, 7)
        # Min-max normalization
        cam = cam - np.min(cam)
        cam_img = cam / np.max(cam)
        cam_img = np.uint8(255 * cam_img)
        output_cam.append(cv2.resize(cam_img, size_upsample))
        # Output min and max
    return output_cam


normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
   transforms.Resize((224,224)),
   transforms.ToTensor(),
   normalize
])

In [8]:
# load the imagenet category list
with open('imagenet-simple-labels.json') as f:
    classes = json.load(f)
    
images_classe = {}

for image_file in os.listdir(r'C:\Users\Nicol\Documents\EPFL\BA7\Project\Files\cam\images'):
    if image_file.endswith(".jpg"):
        
        # load test image
        img_pil = Image.open('images/' + image_file)
        img_tensor = preprocess(img_pil)
        img_variable = Variable(img_tensor.unsqueeze(0))
        logit = net(img_variable)

        h_x = F.softmax(logit, dim=1).data.squeeze()
        probs, idx = h_x.sort(0, True)
        probs = probs.numpy()
        idx = idx.numpy()

        # output the prediction
        for i in range(0, 5):
            print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))

        # generate class activation mapping for the top1 prediction
        CAMs = returnCAM(features_blobs[0], weight_softmax, [idx[0]])

        # render the CAM and output
        print('output CAM.jpg for the top1 prediction: %s'%classes[idx[0]])
        print('')
        images_classe[image_file] = classes[idx[0]]
        
        img = cv2.imread('images/' + image_file)
        height, width, _ = img.shape
        heatmap = cv2.applyColorMap(cv2.resize(CAMs[0],(width, height)), cv2.COLORMAP_JET)
        result = heatmap * 0.3 + img * 0.5
        cv2.imwrite(f'camResults/{image_file[:-4]}_CAM.jpg', result)

1.000 -> container ship
0.000 -> dock
0.000 -> fireboat
0.000 -> drilling rig
0.000 -> ocean liner
(512,)
(512, 49)
(49,)
output CAM.jpg for the top1 prediction: container ship

0.372 -> pirate ship
0.367 -> paddle wheel
0.051 -> lifeboat
0.036 -> drilling rig
0.030 -> dock
(512,)
(512, 49)
(49,)
output CAM.jpg for the top1 prediction: pirate ship

0.164 -> station wagon
0.108 -> tow truck
0.106 -> race car
0.096 -> minibus
0.091 -> minivan
(512,)
(512, 49)
(49,)
output CAM.jpg for the top1 prediction: station wagon

0.673 -> semi-trailer truck
0.218 -> garbage truck
0.066 -> moving van
0.008 -> tow truck
0.005 -> minibus
(512,)
(512, 49)
(49,)
output CAM.jpg for the top1 prediction: semi-trailer truck

0.233 -> tiger cat
0.188 -> ring-tailed lemur
0.159 -> lynx
0.112 -> tabby cat
0.092 -> koala
(512,)
(512, 49)
(49,)
output CAM.jpg for the top1 prediction: tiger cat

0.386 -> tricycle
0.264 -> unicycle
0.200 -> mountain bike
0.126 -> tandem bicycle
0.007 -> moped
(512,)
(512, 49)
(49,

In [4]:
images_classe_file = open("camResults/images_classe.json", "w")
json.dump(images_classe, images_classe_file)
images_classe_file.close()