 ## Set up the environment

In [None]:
from torchvision.datasets.folder import default_loader
import json
import torch
from torch.autograd import Variable as V
import torchvision.models as tmodels
import torchvision.transforms as transforms
from torch.nn import functional as F
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import numpy as np
os.environ['TORCH_HOME'] = '/mnt/raid/ni/agnessa/RSA/.cache/torch/' #directory where pretrained models are saved
imagenet_validation_path = '/mnt/raid/data/ni/dnn/ILSVRC2012_img_val'
meta_file_path = '/mnt/raid/ni/agnessa/RSA/'
ROOT_PATH = '/mnt/raid/ni/agnessa/RSA/Scenes/ImageNet'
layers_path = '/mnt/raid/ni/agnessa/RSA/layer_names'

## Select data and metadata

In [None]:
class ILSVRCSubDataset(Dataset):
    """ILSVRC 2012 subset of the original val dataset"""

    def __init__(self, json_file, root, transform=None):
        """
        Args:
            json_file (string): Path to the json file with meta.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root = root
        self.transform = transform
        
        # Parse META File
        with open(json_file, "r") as fd:
            self.meta = json.load(fd)
        print(self.meta)

    def __len__(self):
        return len(self.meta)

    def __getitem__(self, idx):
        path = os.path.join(self.root,
                            self.meta[idx]["0"]) #merge root and the filename of the sample
        sample = default_loader(path)
        if self.transform is not None:
            sample = self.transform(sample)
            
        wnid = self.meta[idx]["1"]
            
        return sample, wnid #sample, class


## Transform the images

In [None]:
# load the image transformer
data_transforms = transforms.Compose([
        transforms.Resize((256,256)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

dataset_val = ILSVRCSubDataset(json_file=os.path.join(meta_file_path,'meta.json'),
                               root=imagenet_validation_path,
                               transform=data_transforms)

dataloaders = torch.utils.data.DataLoader(dataset_val, #Combines a dataset and a sampler, and provides an iterable over the given dataset.
                                          batch_size=20, #how many samples per batch to load
                                          shuffle=False)


## Define functions to create filenames

In [None]:
def getFileName(n_samples, name):
    return name \
        + "_{}_".format(n_samples) \
        + "_{}_".format(model_name) \
        + "_{}".format(layer_name)  \
        + ".npy"  

## Load the pretrained model weights

In [None]:
#load json file with the layers of interest 
model_name = 'densenet161'
json_file_layers=os.path.join(layers_path,model_name + '_selected_layers.json')
with open(json_file_layers, "r") as fd:
    selected_layers = json.load(fd)
layer_name = selected_layers[15].get('layer') #change the index at each iteration

#load the weights
model_file = '%s_places365.pth.tar' % model_name
model = tmodels.__dict__[model_name](num_classes=365)
if not os.access(model_file, os.W_OK):
    weight_url = 'http://places2.csail.mit.edu/models_places365/' + model_file
    os.system('wget ' + weight_url)
    torch.save(model.state_dict(), model_file)
    
model.load_state_dict(torch.load(model_file))
model.eval()

## Get the activations from a layer for all samples and save them
Use the subset with 10 images of 1000 classes on torchvisions pretrained models, get the activations of specific layers and calculate the Input RDM by correlating between the activations.

In [None]:
NR_OF_SAMPLES = len(dataset_val) #num classes*num samples per class;  len(dataset_val)   
batch_size = 20
model.eval() #important: put model in evaluation mode for consistent results

for layer,m in model.named_modules():
    if layer == layer_name:
        print('Getting activations for model->',model_name,'and layer->', layer_name)       
        data_iterator = iter(dataloaders) 
        activations = list() 
        #arguments: model, input, output. every time that an output is computed, this hook is called and the lambda is executed
        handle = m.register_forward_hook(lambda m, i, o: activations.append(list(o.data.numpy().squeeze()))) 

        for i in range(int(NR_OF_SAMPLES/batch_size)): #for each batch get the activations
            print(".", end='')
            cur = next(data_iterator)[0] #cur: images, labels             
            out = model(cur) 

        print('Model->',model_name,'and layer->',layer_name,': done.')
            
        flattened = np.array(activations).reshape(NR_OF_SAMPLES,-1)
        print("Shape of the flattened activations -> ",flattened.shape)

        #save activations 
        path = os.path.join(ROOT_PATH + '/activations/', getFileName(NR_OF_SAMPLES,"activations"))
        print("Save Activation -> {}".format(path))
        np.save(path, flattened)

        #clear variables
        del(activations)
        del(data_iterator)
        handle.remove() #remove hook   

In [None]:
# #if you need to look up the index of a specific model
# for idx, dictionary in enumerate(selected_layers):
#     if dictionary.get('model') == 'resnet50':
#         print(idx)
#         break

In [None]:
##deleted lines of code that might be needed at some point

## old code
# checkpoint = torch.load(model_file, map_location=lambda storage, loc: storage)
# state_dict = {str.replace(k,'module.',''): v for k,v in checkpoint['state_dict'].items()}
# model.load_state_dict(state_dict)

# updated code
# checkpoint = {'state_dict': model.state_dict()}
# path_model = os.path.abspath(model_file)
# model.load_state_dict(torch.load(path_model)['state_dict'])
# model.load_state_dict(torch.load(path_model))