# Setup Environment
Use validation set images from ILSVRC 2012 Challenge in a google drive folder with their labels in a meta.json. (Using validation set since no meta data available for test set)

For more information see: http://image-net.org/challenges/LSVRC/2012/

In [None]:
from __future__ import print_function, division
import os
import json
import io
import datetime
import collections
from skimage import io
from functools import partial
import numpy as np
import scipy
import scipy.stats
import matplotlib.pyplot as plt
import seaborn
import tables
import h5py

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torchvision
import torchvision.transforms as transforms
import torchvision.models as tmodels
from torchvision import datasets
from torchvision.datasets import ImageFolder
from torchvision.datasets.folder import default_loader 
from torchvision.models import *

from scipy.stats.stats import pearsonr #maybe use this instead


In [None]:
#########################################################################################################

# replace with own directories
imagenet_validation_path = '/mnt/raid/data/ni/dnn/ILSVRC2012_img_val'
meta_file_path = '/mnt/raid/ni/agnessa/RSA/'
ROOT_PATH = '/mnt/raid/ni/agnessa/RSA/'

#########################################################################################################

# Select Data and get Metadata
Select 10 images of each of the 1000 classes of the validation data set together with their label. 

In [None]:
class ILSVRCSubDataset(Dataset):
    """ILSVRC 2012 subset of the original val dataset"""

    def __init__(self, json_file, root, transform=None):
        """
        Args:
            json_file (string): Path to the json file with meta.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root = root
        self.transform = transform
        
        # Parse META File
        with open(json_file, "r") as fd:
            self.meta = json.load(fd)
        print(self.meta)

    def __len__(self):
        return len(self.meta)

    def __getitem__(self, idx):
        path = os.path.join(self.root,
                            self.meta[idx]["0"]) #merge root and the filename of the sample
        sample = default_loader(path)
        if self.transform is not None:
            sample = self.transform(sample)
            
        wnid = self.meta[idx]["1"]
            
        return sample, wnid #sample, class

In [None]:
data_transforms = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

dataset_val = ILSVRCSubDataset(json_file=os.path.join(meta_file_path,'meta.json'),
                               root=imagenet_validation_path,
                               transform=data_transforms)

dataloaders = torch.utils.data.DataLoader(dataset_val, #Combines a dataset and a sampler, and provides an iterable over the given dataset.
                                          batch_size=20, #how many samples per batch to load
                                          shuffle=False)


# Get model and activations
Use the subset with 10 images of 1000 classes on torchvisions pretrained models, get the activations of specific layers and calculate the Input RDM by correlating between the activations.

In [None]:
def getFileName(n_samples, name):
    return name \
        + "_{}_".format(n_samples) \
        + "_{}_".format(model_name) \
        + "_{}".format(layer_name)  \
        + ".npy"
#         + datetime.datetime.now().replace(microsecond=0).isoformat() \
        

In [None]:
def getFileNameh5(n_samples, name):
    return name \
        + "_{}_".format(n_samples) \
        + "_{}_".format(model_name) \
        + "_{}".format(layer_name)  \
        + ".h5"
#         + datetime.datetime.now().replace(microsecond=0).isoformat() \
        

# Get the activations from a layer for all samples, save them

In [None]:
# Iterator shouldn't be recreated every time, because it always returns the first element
# Which breaks everything if shuffling is disabled

#load json file with the layers of interest (resnets)
json_file_layers=os.path.join(meta_file_path,'resnets_selected_layers.json')
with open(json_file_layers, "r") as fd:
    selected_layers = json.load(fd)#next index 20
model_name, layer_name = selected_layers[17].get('model'),  selected_layers[17].get('layer') #change the index at each iteration
print(model_name+'(pretrained=True)')
model = eval( model_name+'(pretrained=True)')

# #if you need to look up the index of a specific model
# for idx, dictionary in enumerate(selected_layers):
#     if dictionary.get('model') == 'resnet50':
#         print(idx)
#         break
        
NR_OF_SAMPLES = 10000 #num classes*num samples per class;  len(dataset_val)   
batch_size = 20

#important: put model in evaluation mode for consistent results
model.eval()
print('Getting activations for model->',model_name,'and layer->', layer_name)
for layer,m in model.named_modules():
    if layer == layer_name:
        #create an iterator for each layer
        data_iterator = iter(dataloaders) 
        activations = list() 
        handle = m.register_forward_hook(lambda m, i, o: activations.append(list(o.data.numpy().squeeze()))) 
        #arguments: model, input, output. every time that an output is computed, this hook is called and the lambda is executed

        #for each batch get the activations for each batch
        for i in range(int(NR_OF_SAMPLES/batch_size)): 
            print(".", end='')
            cur = next(data_iterator)[0] #cur: images, labels             
            out = model(cur) #probabilities of each class

#prepare for flattening over features
print('size activations->',np.array(activations).shape)

#flatten into num samples x num features
flattened = np.array(activations).reshape(NR_OF_SAMPLES,-1)
print(flattened.shape)

#save activations  
path = os.path.join(ROOT_PATH + 'activations/', getFileName(NR_OF_SAMPLES,"activations"))
print("Save Activation -> {}".format(path))
np.save(path, flattened)
#clear variables
handle.remove() #remove hook
del(activations)
del(data_iterator)


## In case you need to run only one layer

In [None]:
# #for layer 1

# # Iterator shouldn't be recreated every time, because it always returns the first element
# # Which breaks everything if shuffling is disabled
# # data_iterator = iter(dataloaders)
# model_name = 'resnet34'
# layer_name = 'layer1.0'
# NR_OF_SAMPLES = 10000 #num classes*num samples/class;  len(dataset_val) 
# batch_size = 20

# path = os.path.join(ROOT_PATH + 'activations/', getFileName(NR_OF_SAMPLES,"activations"))
# # save np.load
# np_load_old = np.load

# # modify the default parameters of np.load
# np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)
# activations = np.load(path)
# np.load = np_load_old

# #correlation matrix = input RDM
# y = activations.shape[2]
# x = activations.shape[3]
# w = activations.shape[4]

# print('size activations->',np.array(activations).shape)
# print('y,x,w->',y,x,w)

# flattened = np.array(activations).reshape(NR_OF_SAMPLES,y*x*w)
# corr_matrix = correlationd_matrix(flattened,NR_OF_SAMPLES) 
# path = os.path.join(ROOT_PATH + 'Input_RDM/', getFileName(NR_OF_SAMPLES, "Input_RDM"))
# print("Save Input RDM ->len {}".format(path))
# np.save(path, np.array(corr_matrix))
