In [2]:
import torch
import torch.nn as nn
import torchvision
from torch.utils import data
#import torchvision.models as models
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import os
import argparse
import matplotlib.pyplot as plt
from matplotlib import image
import math
import numpy as np
import random
import cv2
import pandas as pd
import csv
import zipfile
from torch.utils.data import Dataset
from natsort import natsorted
from PIL import Image
import sklearn
%matplotlib inline
import gdown

In [3]:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append('drive/My Drive')

Mounted at /content/drive


In [4]:
manual_seed = 999
random.seed(manual_seed)
torch.manual_seed(manual_seed)

<torch._C.Generator at 0x7f0a95297910>

#                              Download dataset

In [5]:
# Root directory for the celebset dataset
data_root = 'data/celebset'
# Path to folder with individual images
img_folder1 = f'{data_root}/img_celebset'
# URL for the Celebset dataset
url = "https://drive.google.com/uc?id=1EGVVqy9ZUVg4ErPlgEGQuezDkcMPJBSh"
# Path to download the dataset to
download_path = f'{data_root}/img_celebset.zip'
# Create required directories 
if not os.path.exists(data_root):
  os.makedirs(data_root)
  os.makedirs(img_folder1)

# Download the dataset from google drive
gdown.download(url, download_path, quiet=False)

# Unzip the downloaded file 
with zipfile.ZipFile(download_path, 'r') as ziphandler:
  ziphandler.extractall(img_folder1)

Downloading...
From: https://drive.google.com/uc?id=1EGVVqy9ZUVg4ErPlgEGQuezDkcMPJBSh
To: /content/data/celebset/img_celebset.zip
11.0MB [00:00, 20.2MB/s]


In [6]:
class CelebADataset(Dataset):
  def __init__(self, root_dir, transform=None):
    """
    Args:
      root_dir (string): Directory with all the images
      transform (callable, optional): transform to be applied to each image sample
    """
    # Read names of images in the root directory
    image_names = os.listdir(root_dir)

    self.root_dir = root_dir
    self.transform = transform 
    self.image_names = natsorted(image_names)

  def __len__(self): 
    return len(self.image_names)

  def __getitem__(self, idx):
    # Get the path to the image 
    img_path = os.path.join(self.root_dir, self.image_names[idx])
    # Load image and convert it to RGB
    img = Image.open(img_path).convert('RGB')
    # Apply transformations to the image
    if self.transform:
      img = self.transform(img)

    return img

In [8]:
# Spatial size of training images, images are resized to this size.
image_size = 32
# Transformations to be applied to each individual image sample
transform=transforms.Compose([
    transforms.Resize(image_size),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                          std=[0.5, 0.5, 0.5])
])

inv_normalize =  transforms.Normalize(
    mean=[-1, -1, -1],
    std=[1/0.5, 1/0.5, 1/0.5]
)
# Load the dataset from file and apply transformations
celebset_dataset= CelebADataset(f'{img_folder1}/img_celebset', transform)

In [9]:
attributes1 = pd.read_csv('/content/drive/My Drive/list_attr_celebset.csv')
y1=attributes1['male']

dataset_X=[]
dataset_y=[]
for i in range(1600):
  #if i not in test_inddex:
    dataset_X.append(celebset_dataset.__getitem__(i))
    dataset_y.append(y1[i])

In [10]:
encoder={1:"male",0:"female"}

In [13]:
#obtain the trainset of the fold that corresponds to the best accuracy
df = pd.read_excel ('/content/drive/MyDrive/test_train_valid.xlsx')
train_idx=df.iloc[7]
train_idx=np.array(train_idx)[1:]
train_x = []
train_y=[]
for i in range(len(train_idx)):
    train_x.append(dataset_X[int(train_idx[i])])
    train_y.append(dataset_y[int(train_idx[i])])

#              **Get indices and values of data points**


In [14]:
indices_df= pd.read_csv("/content/drive/MyDrive/ShapleyIndicesandValuesFinal.csv",header=None)
low_indices=list(indices_df.iloc[:52,0])
high_indices=list(indices_df.iloc[52:,0])
high_indices.reverse()
trainx_low=[]
trainy_low=[]
trainx_high=[]
trainy_high=[]
for i in low_indices:
    trainx_low.append(train_x[i])
    trainy_low.append(train_y[i])
for i in high_indices:
    trainx_high.append(train_x[i])
    trainy_high.append(train_y[i])


# **Load Model**

In [15]:
from models_celeb.preact_resnet import PreActResNet18
net=PreActResNet18()
model_dict = torch.load('/content/drive/My Drive/genderclassifier_new_3.pth')
net.load_state_dict(model_dict['net'])

<All keys matched successfully>

# **GradCAM**

In [16]:
class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        
        # define the PreActResNet18
        self.resnet = net
        
        # isolate the feature blocks
        self.features = nn.Sequential(self.resnet.conv1,
                                      self.resnet.layer1, 
                                      self.resnet.layer2, 
                                      self.resnet.layer3, 
                                      self.resnet.layer4)
        
       
        
        # classifier
        self.classifier = self.resnet.linear
        
        # gradient placeholder
        self.gradient = None
    
    # hook for the gradients
    def activations_hook(self, grad):
        self.gradient = grad
    
    def get_gradient(self):
        return self.gradient
    
    def get_activations(self, x):
        return self.features(x)
    
    def forward(self, x):
        
        # extract the features
        x = self.features(x)
        
        # register the hook
        h = x.register_hook(self.activations_hook)
        
        x=F.avg_pool2d(x, 4)
        # complete the forward pass
        x = x.view((x.size(0), -1))
        x = self.classifier(x)
        
        return x

In [17]:
resnet=ResNet()
_=resnet.eval()

# **store saliency maps of low valued points**

In [None]:
low_correct_indices=[]
low_correct_y=[]
low_wrong_idices=[]
low_wrong_pred=[]
low_wrong_actual=[]
for i in range(len(low_indices)):
    inp=trainx_low[i]
    inp=inp.float().reshape(1,3,32,32)
    pred=resnet(inp)
    index=pred.argmax(dim=1)
    pred[:,index].backward()
    gradients = resnet.gradient
    pooled_gradients = torch.mean(gradients, dim=[0, 2, 3])
    activations = resnet.get_activations(inp).detach()
    for j in range(512):
        activations[:, j, :, :] *= pooled_gradients[j]
    heatmap = torch.mean(activations, dim=1).squeeze()
    
    heatmap = np.maximum(heatmap.cpu(), 0)
    heatmap /= torch.max(heatmap)
    #plt.matshow(heatmap.squeeze())
    heatmap = heatmap.numpy()
    inp=inv_normalize(inp)
    inp=inp.cpu().reshape(3,32,32)
    inp=inp.numpy().transpose(1,2,0)
    inp=np.uint8(255*inp)
    inp=cv2.resize(inp,(224,224))
    print(i," predicted:",encoder[int(index)]," actual:",encoder[trainy_low[i]])
    plt.imshow(inp)
    plt.show()
    heatmap = cv2.resize(heatmap, (inp.shape[1], inp.shape[0]))
    heatmap = np.uint8(255 * heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    if index==trainy_low[i]:
        f2='./low_hmap_correct'+str(i)+'.jpg'
        file_name='./low_superimposed_correct'+str(i)+'.jpg'
    else:
        f2='./low_hmap_wrong'+str(i)+'.jpg'
        file_name='./low_superimposed_wrong'+str(i)+'.jpg'
    cv2.imwrite(f2,heatmap)
    superimposed_img = heatmap*0.4  + inp
    si2=heatmap*inp
    #plt.imshow(superimposed_img)
    cv2.imwrite(file_name, superimposed_img)



# **store saliency maps of high valued points**

In [None]:

for i in range(80):
    inp=trainx_high[i]
    inp=inp.float().reshape(1,3,32,32)
    pred=resnet(inp)
    index=pred.argmax(dim=1)
    pred[:,index].backward()
    gradients = resnet.gradient
    pooled_gradients = torch.mean(gradients, dim=[0, 2, 3])
    activations = resnet.get_activations(inp).detach()
    for j in range(512):
        activations[:, j, :, :] *= pooled_gradients[j]
    heatmap = torch.mean(activations, dim=1).squeeze()
    
    heatmap = np.maximum(heatmap.cpu(), 0)
    heatmap /= torch.max(heatmap)
    #plt.matshow(heatmap.squeeze())
    heatmap = heatmap.numpy()
    inp=inv_normalize(inp)
    inp=inp.cpu().reshape(3,32,32)
    inp=inp.numpy().transpose(1,2,0)
    inp=np.uint8(255*inp)
    inp=cv2.resize(inp,(224,224))
    print(i," predicted:",encoder[int(index)]," actual:",encoder[trainy_high[i]])
    plt.imshow(inp)
    plt.show()
    heatmap = cv2.resize(heatmap, (inp.shape[1], inp.shape[0]))
    heatmap = np.uint8(255 * heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    if index==trainy_high[i]:
        f2='./high_hmap_correct'+str(i)+'.jpg'
        file_name='./high_superimposed_correct'+str(i)+'.jpg'
    else:
        f2='./high_hmap_wrong'+str(i)+'.jpg'
        file_name='./high_superimposed_wrong'+str(i)+'.jpg'
    cv2.imwrite(f2,heatmap)
    superimposed_img = heatmap*0.4  + inp
    cv2.imwrite(file_name, superimposed_img)