In [1]:
import torchvision.models as models                   

import torch
from PIL import Image
import torchvision.transforms as transforms
import numpy as np
import json
import requests
import matplotlib.pyplot as plt
import warnings
import math
import torchvision.transforms as transforms
import random
warnings.filterwarnings('ignore')
%matplotlib inline

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# device = torch.device("cpu")
print(f'Using {device} for inference')

Using cuda for inference


In [2]:
from numpy import asarray, percentile, tile
import torch.nn as nn
from scipy.ndimage import gaussian_filter
from torchvision import transforms
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
denormalize = transforms.Normalize(mean = [-0.485/0.229, -0.456/0.224, -0.406/0.225], std = [1/0.229, 1/0.224, 1/0.225] )
def image_converter(im):
    im_copy = im.cpu()
    
    im_copy = denormalize(im_copy.clone().detach()).numpy()
    im_copy = im_copy.transpose(1,2,0)
    im_copy = im_copy.clip(0, 1) 
    return im_copy

def layer_hook(act_dict, layer_name):
    def hook(module, input, output):
        act_dict[layer_name] = output
    return hook

In [3]:
def jitter(model, img, jitter_t, layer_activation, layer_name, unit):
    sum = 0
    for i in range(10):
        temp_img = img
        tao = random.randint(0, jitter_t)
        temp_img = torch.add(temp_img, tao)
        model(temp_img)
        layer_out = layer_activation[layer_name]
        sum = torch.add(sum, layer_out[0][unit])
    jitter_loss = sum / 10
    jitter_loss.requires_grad_(True)
    jitter_loss.retain_grad() 
    jitter_loss.backward(retain_graph=True)
    jitter_grad = jitter_loss.grad.detach()
    return jitter_loss.detach(), jitter_grad

In [4]:
def tv(img, img_grad):
    bs_img, c_img, h_img, w_img = img.size()
    w = torch.sum(torch.pow(img[:,:,:,:-1] - img[:,:,:,1:], 2))
    h = torch.sum(torch.pow(img[:,:,:-1,:] - img[:,:,1:,:], 2))
    tv_loss = (1/(torch.norm(img_grad) * h_img * w_img) * (h + w))
    tv_loss.requires_grad_(True)
    tv_loss.retain_grad() 
    tv_loss.backward(retain_graph=True)
    tv_grad = tv_loss.grad.detach()
    return tv_loss.detach(), tv_grad

In [5]:
def act_max(model, 
    inp_img, 
    layer_activation, 
    layer_name, 
    unit, 
    steps=100, 
    alpha=torch.tensor(1),
    TV = False,
    Jitter = False,
    Regular = False,
    jitter_t = 20,
    jitter_alpha = 0.05,
    tv_alpha = 0.05,
    show_img = False
    ):

    best_activation = -float('inf')
    min_loss = float('inf')
    best_img = inp_img
    for k in range(steps):
        inp_img.requires_grad_(True)
        inp_img.retain_grad() 
        inp_img = inp_img.to(device)
        old_norm = torch.norm(inp_img)
        # Propagate image
        model(inp_img)
        layer_out = layer_activation[layer_name]
        # Compute gradients
        layer_out[0][unit].backward(retain_graph=True)
        img_grad = inp_img.grad
            
        # Gradient Step
        inp_img = torch.add(inp_img, torch.mul(img_grad.detach(), alpha))

        act_loss = layer_out[0][unit]
        #Jitter
        jitter_loss = torch.tensor(0)
        jitter_grad = 0
        if Jitter and k % 10 == 0:
            jitter_loss, jitter_grad = jitter(model, inp_img, jitter_t, layer_activation, layer_name, unit)
        if TV:
            tv_loss, tv_grad = tv(inp_img, img_grad)

        # Keep highest activation
        loss = -1 * act_loss
        if Jitter:
            loss -= jitter_alpha * jitter_loss
        if TV:
            loss += tv_alpha * tv_loss
        if Regular:
            if Jitter:
              inp_img = torch.add(inp_img, torch.mul(jitter_grad, alpha*jitter_alpha))
            if TV:
              inp_img = torch.add(inp_img, torch.mul(tv_grad, -alpha*tv_alpha))
        
        new_norm = torch.norm(inp_img)
        inp_img = torch.mul(inp_img, old_norm/new_norm)
        if loss < min_loss:
            if not Jitter or k % 10 != 10:
                jitter_loss, jitter_grad = jitter(model, inp_img, jitter_t, layer_activation, layer_name, unit)
            if not TV:
                tv_loss, tv_grad = tv(inp_img, img_grad)
            best_activation = act_loss, jitter_loss, tv_loss, loss
            min_loss = loss
            best_img = inp_img

        if show_img and k == steps-1:
            final_image = image_converter(inp_img.squeeze(0))
            plt.imshow(final_image)
            plt.show()        
            print('step: ', k, 'activation: ', layer_out[0][unit])
        
    return (best_activation, best_img)

In [6]:
resnet50 = models.resnet50(pretrained = True)
utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_convnets_processing_utils')

resnet50.eval().to(device)

alexnet = models.alexnet(weights='IMAGENET1K_V1')
alexnet.eval().to(device)

Using cache found in C:\Users\phili/.cache\torch\hub\NVIDIA_DeepLearningExamples_torchhub


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [7]:
def reset_img():
  inp = torch.rand((1, 3, 227, 227))
  inp.requires_grad_(True)
  return inp.to(device)
# convert (1, 3, 227, 227) Torch tensor into 227*227 element numpy array, averaging across RGB channels
def np_data(img):
    img = denormalize(img.squeeze().detach().cpu())
    img = torch.mean(img, 0)
    img = torch.flatten(img)
    img = img.numpy()
    return img
def get_hist(arr, title):
    hist, bin = np.histogram(arr)
    plt.hist(arr, bins=bin)
    plt.title(title)
    plt.show()

In [8]:
unit = 130
steps = 200
alpha = torch.tensor(1.5)
def experiment(model, TV, Jitter, jitter_t=0, jitter_alpha=0, tv_alpha=0, trials=10):
    # In order: activation, jitter, tv, and total losses
    losses = [], [], [], []
    for t in range(trials):
        # starting image
        orig_img = reset_img()
        inp = orig_img
        # outputs of image through both neural nets
        results = model(inp)
        value = results.detach().cpu().numpy()
        # max outputs of image through both neural nets
        k = max(value[0])
        act_dict = {}
        layer_name = 'classifier_final'
        list(model.children())[-1].register_forward_hook(layer_hook(act_dict, layer_name))
        
        activation, output = act_max(model=model,
                    inp_img=inp,
                    layer_activation=act_dict,
                    layer_name=layer_name,
                    unit=unit,
                    steps=steps,
                    alpha=alpha,
                    TV=TV,
                    Jitter=Jitter,
                    Regular=True,
                    jitter_t=jitter_t,
                    jitter_alpha=jitter_alpha,
                    tv_alpha=tv_alpha,
                    show_img=False,
                    )
        for i in range(4):
            if isinstance(activation[i], int):
                print(i)
            losses[i].append(activation[i].detach().cpu().numpy().item())
        out = np_data(output)
        torch.cuda.empty_cache()
        print("Completed trial:", t)
    names = ["Activations:", "Jitter losses:", "TV losses:", "Total losses:"], ["Average activation:", "Average jitter loss:", "Average TV loss:", "Average total loss:"]
    for i in range(4):
        print(names[0][i], str(losses[i]))
        print(names[1][i], sum(losses[i])/trials)

In [32]:
experiment(alexnet, TV=True, Jitter=True, jitter_t=10, jitter_alpha=0.01, tv_alpha=0.001, trials=10)

Completed trial: 0
Completed trial: 1


resnet50, TV=False, Jitter=True, jitter_t=10, jitter_alpha=0.1, tv_alpha=0, trials=10  
 - Activations: [163.57022094726562, 171.38491821289062, 177.36441040039062, 184.45797729492188, 174.5690460205078, 162.1881103515625, 173.3818359375, 167.202392578125, 176.84043884277344, 178.08633422851562]
 - Average activation: 172.90456848144532
 - Jitter losses: [22.103370666503906, 34.036346435546875, 8.708541870117188, 19.32915496826172, 38.63886260986328, 27.92671775817871, 14.243896484375, 23.848962783813477, 12.266249656677246, 39.02633285522461]
 - Average jitter loss: 24.0128436088562
 - TV losses: [0.02318560890853405, 0.027121976017951965, 0.028241293504834175, 0.04551585391163826, 0.025176187977194786, 0.026284033432602882, 0.026428097859025, 0.02534600719809532, 0.028803151100873947, 0.029583383351564407]
 - Average TV loss: 0.02856855932623148
 - Total losses: [-167.22222900390625, -171.38491821289062, -180.71128845214844, -184.45797729492188, -174.5690460205078, -164.54434204101562, -178.4657440185547, -170.62872314453125, -180.0749053955078, -181.38262939453125]
 - Average total loss: -175.34418029785155

In [10]:
# import gc
# gc.collect()
# torch.cuda.empty_cache()