Author: Thrupthi Ann John https://github.com/ThrupthiAnn

# Demo of calculating the metrics in our paper
This is the last demo notebook. Before running this notebook, please run <b>demo1_maps.ipynb</b> and <b>demo2_explanation.ipynb</b>. In this notebook, we will find the confidence of the images before and after creating explanation maps and run the metrics given in our paper. 

In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import models, transforms
from os.path import join
import torch
from pathlib import Path

datafolder = '../data'
imagefolder = join(datafolder, 'SampleData/SampleImages')
modelfolder = join(datafolder, 'Models')
interfolder = '../results/IntermediateResults'
explcmsfolder = join(interfolder, 'Explanation_CMS')
explgradcamfolder = join(interfolder, 'Explanation_GradCAM')
explgradcamplusfolder = join(interfolder, 'Explanation_GradCAMPlus')
explscorecamfolder = join(interfolder, 'Explanation_ScoreCAM')

device = torch.device('cuda')

p = Path(imagefolder)
filenames = [i.stem + '.jpg' for i in p.glob('**/*')]

# Step 1: Initialize.

## Initialize the deep model
Here, we provide code for VGG-Face trained on CelebA. Please write your own model loading function 

In [2]:
def loadVGGModel( filename):
	dat2 = torch.load(filename)
	# copy dictionary
	if str.split(list(dat2.keys())[0],'.')[0] == 'module':
		dat = {}
		for key in dat2.keys():
			k = '.'.join(str.split(key,'.')[1:])
			dat[k] = dat2[key]
	else:
		dat = dat2
		
	n_classes = dat['classifier.6.bias'].shape[0]
	model = models.vgg16(pretrained = False)
	lastlayer = torch.nn.Linear(in_features = model.classifier[-1].in_features, \
							   out_features = n_classes, \
							   bias = True)
	model.classifier[-1] = lastlayer
	model.load_state_dict(dat)
	return model

#model = loadVGGModel('VGG16_CelebA_Gender.pth')
model = loadVGGModel(join(modelfolder,'VGG16_CelebA_Recognition.pth')) # here is the recognition model
model.to(device)
model.eval()


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [3]:
def preprocess_image(pil_im, resize_im=True):
    """
        Processes image for CNNs

    Args:
        PIL_img (PIL_img): Image to process
        resize_im (bool): Resize to 224 or not
    returns:
        im_as_var (torch variable): Variable that contains processed float tensor
    """
    # mean and std list for channels (Imagenet)
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    # Resize image
    if resize_im:
        pil_im.thumbnail((512, 512))
    im_as_arr = np.float32(pil_im)
    im_as_arr = im_as_arr.transpose(2, 0, 1)  # Convert array to D,W,H
    # Normalize the channels
    for channel, _ in enumerate(im_as_arr):
        im_as_arr[channel] /= 255
        im_as_arr[channel] -= mean[channel]
        im_as_arr[channel] /= std[channel]
    # Convert to float tensor
    im_as_ten = torch.from_numpy(im_as_arr).float()
    # Add one more channel to the beginning. Tensor shape = 1,3,224,224
    # Convert to Pytorch variable
   # im_as_var = Variable(im_as_ten, requires_grad=True)
    im_as_ten = im_as_ten.to(device);
    im_as_ten.requires_grad=True;
    return im_as_ten
	
def get_image(filename):
    img = Image.open(filename)
    orig_size = img.size
    img = img.resize((224,224))
    pimg= preprocess_image(img)
    return pimg, orig_size

## Create a dataset

In [4]:
class Dataset(torch.utils.data.Dataset):
	def __init__(self, datafolder, filelist):
		self.datafolder = datafolder
		self.filelist = filelist
		self.length = len(self.filelist)
		
	def __len__(self):
		return self.length
	
	def __getitem__(self, index):
		return get_image(join(self.datafolder, self.filelist[index]))[0], self.filelist[index]
		

# Step 2: Get the Confidence

In [5]:
def confidence(dataset):
	dataloader = torch.utils.data.dataloader.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=0)
	# create variables to store score and class
	score = np.arange(len(dataset)).astype(np.float32)
	index = np.arange(len(dataset))
	filelist = []
	lastind = 0
	for ii, data in enumerate(dataloader):
		img, file = data
		filelist.extend(file)
		print('\r%s'%ii,end='           ')
		outputs = model(img.cuda()).detach()
		val, ind = torch.max(outputs,1)
		score[lastind:lastind+len(val)] = val.cpu().numpy()
		index[lastind:lastind+len(ind)] = ind.cpu().numpy()
		lastind = lastind+len(val)
		
	return score, index, filelist

def confidence_for_class(dataset, classes):
	dataloader = torch.utils.data.dataloader.DataLoader(dataset, batch_size=32, shuffle=False)
	# create variables to store score and class
	score = np.arange(len(dataset)).astype(np.float32)
	classes = classes.flatten()
	lastind = 0
	for ii, data in enumerate(dataloader):
		print('\r%s'%ii,end='           ')
		img, filename = data
		outputs = model(img.cuda()).detach().cpu().numpy()
		for jj in range(outputs.shape[0]):
			score[lastind:lastind+jj]=outputs[jj][classes[lastind+jj]]
		lastind = lastind+outputs.shape[0]
		
	return score

In [6]:
# score on unaltered images
images = Dataset(imagefolder, filenames)
imagescore, index, filelist = confidence(images)

# create other datasets with the same file order
gradcam = Dataset(explgradcamfolder, filelist)
gradcamplus = Dataset(explgradcamplusfolder, filelist)
scorecam = Dataset(explscorecamfolder, filelist)
cms = Dataset(explcmsfolder, filelist)

# get scores for the original classes
gradcamscore = confidence_for_class(gradcam, index)
gradcamplusscore = confidence_for_class(gradcamplus, index)
scorecamscore = confidence_for_class(scorecam, index)
cmsscore = confidence_for_class(cms, index)

3           

## Step 3: Calculate the metrics

In [7]:
def AverageDrop(fullscore, explscore):
	return  np.sum(np.maximum(0, fullscore-explscore)/fullscore)*100/len(fullscore)

def IncreaseConfidence(fullscore, explscore):
	return np.sum(fullscore<explscore)/len(fullscore)*100

def Win(gcscore, gcpscore, scscore, trscore):
	# for each, check which one is the hightest
	
	maxscores = np.argmin(np.vstack((gcscore,gcpscore, scscore, trscore)),axis=0)
	gc = np.sum(maxscores==0)
	gcp = np.sum(maxscores==1)
	sc = np.sum(maxscores==2)
	tr = np.sum(maxscores==3)
	length = len(gcscore)
	
	print('GradCAM\t\t:\t', gc/length*100)
	print('GradCAM++\t:\t', gcp/length*100)
	print('ScoreCAM\t:\t', sc/length*100)
	print('CMS\t\t:\t', tr/length*100)

In [8]:
print('Average Drop: (Higher is better)')
print('GradCAM\t\t:\t', AverageDrop(imagescore, gradcamscore))
print('GradCAM++\t:\t', AverageDrop(imagescore, gradcamplusscore))
print('ScoreCAM\t:\t', AverageDrop(imagescore, scorecamscore))
print('CMS\t\t:\t', AverageDrop(imagescore, cmsscore))
print('\n% Increase in confidence: (Lower is better)')
print('GradCAM\t\t:\t', IncreaseConfidence(imagescore, gradcamscore))
print('GradCAM++\t:\t', IncreaseConfidence(imagescore, gradcamplusscore))
print('ScoreCAM\t:\t', IncreaseConfidence(imagescore, scorecamscore))
print('CMS\t\t:\t', IncreaseConfidence(imagescore, cmsscore))
print('\nWin %: (Higher is better)')
Win(gradcamscore, gradcamplusscore, scorecamscore, cmsscore)


Average Drop: (Higher is better)
GradCAM		:	 37.9015998840332
GradCAM++	:	 42.49958419799805
ScoreCAM	:	 39.13896942138672
CMS		:	 65.07044219970703

% Increase in confidence: (Lower is better)
GradCAM		:	 8.0
GradCAM++	:	 7.000000000000001
ScoreCAM	:	 10.0
CMS		:	 4.0

Win %: (Higher is better)
GradCAM		:	 4.0
GradCAM++	:	 0.0
ScoreCAM	:	 0.0
CMS		:	 96.0
