# Calculate the correlations for each layer and create input RDMs

## Set up the environment

In [1]:
import numpy as np
import os
import json
import matplotlib.pyplot as plt
import seaborn

## Define the function to create filenames

In [2]:
def getFileName(n_samples, name):
    return name \
        + "_{}_".format(n_samples) \
        + "_{}_".format(model_name) \
        + "_{}".format(layer_name)   \
        + ".npy"       

## Select the model and layer

In [4]:
#load the np file containing the shape of the activations
ROOT_PATH = '/mnt/raid/ni/agnessa/RSA/Objects/'
layers_path = '/mnt/raid/ni/agnessa/RSA/layer_names'
NR_OF_SAMPLES = 10000
#load json file with the layers of interest 
model_name = 'densenet161'
json_file_layers=os.path.join(layers_path,model_name + '_selected_layers.json')
with open(json_file_layers, "r") as fd:
    selected_layers = json.load(fd)
layer_name =  selected_layers[0].get('layer') #change the index at each iteration


## Define the correlation function

In [5]:
# my version
def correlationd_matrix(batch_size): #(list_of_activations, n) ,array_activations
    file_name = os.path.join(ROOT_PATH+'activations/',getFileName(NR_OF_SAMPLES,'activations'))
    act = np.load(file_name,mmap_mode='r') #mmap is used to access a part of the file 
    correlationd = np.zeros((NR_OF_SAMPLES,NR_OF_SAMPLES))
    correlationd[:] = np.nan
    num_batches = int(NR_OF_SAMPLES / batch_size)
    total = sum(x+1 for x in range(num_batches)) #num 1000-wise comparisons to do: 55
    index = 0
   
    for i in range(num_batches): #[0:9[
        start_1 = batch_size*i
        end_1 = batch_size*(i+1)
        list_of_activations_1 = act[start_1:end_1,:]

        for j in range(i,num_batches): #[i:10[
            index += 1
            print("New Iteration: i = {0}, j = {1}; {2}/{3}".format(i,j,index,total))
            start_2 = batch_size*(j)
            end_2 = batch_size*(j+1)
            list_of_activations_2 = act[start_2:end_2,:]
            corr_activations = 1-np.corrcoef(list_of_activations_1,list_of_activations_2) #2000 x 2000 matrix

            for x in range(corr_activations.shape[0]):
                for y in range(corr_activations.shape[1]):
                    if x < batch_size:
                        start_x = start_1
                    else: 
                        start_x = start_2-1000
                    if y < batch_size:
                        start_y = start_1
                    else:
                        start_y = start_2-1000                       
                    correlationd[x+start_x,y+start_y] = correlationd[y+start_y,x+start_x] = corr_activations[x,y]

    return(correlationd)

In [None]:
#felix's method
def correlationd_matrix(batch_size): #(list_of_activations, n) ,array_activations
    file_name = os.path.join(ROOT_PATH+'activations/',getFileName(NR_OF_SAMPLES,'activations'))
    act = np.load(file_name,mmap_mode='r') #mmap is used to access a part of the file 
    correlationd = np.zeros((NR_OF_SAMPLES,NR_OF_SAMPLES))
    correlationd[:] = np.nan

    extra_samples = (NR_OF_SAMPLES % batch_size) != 0
    num_batches = int(NR_OF_SAMPLES / batch_size)
    
    total = sum(x+1 for x in range(num_batches)) #num 1000-wise comparisons to do: 55
    index = 0

    for i in range(num_batches):  # [0:9[
        start_1 = batch_size * i
        end_1 = batch_size * (i + 1)
        list_of_activations_1 = act[start_1:end_1, :]
        print(f"loaded list of activations 1, shape: {list_of_activations_1.shape}")

        for j in range(i + 1, num_batches if not extra_samples else num_batches + 1):  # [i:10[
            index += 1
            print("New Iteration: i = {0}, j = {1}; {2}/{3}".format(i, j, index, total))
            start_2 = batch_size * j
            if j == num_batches:
                end_2 = NR_OF_SAMPLES
            else:
                end_2 = batch_size * (j + 1)

            list_of_activations_2 = act[start_2:end_2, :]
            print(f"loaded list of activations 2, shape: {list_of_activations_2.shape}")
            # warnings.filterwarnings('error', message="invalid value encountered in true_divide")
            # try:
            corr_activations = 1 - np.corrcoef(list_of_activations_1, list_of_activations_2)  # 2000 x 2000 matrix

            # X times X correlation
            if np.isnan(correlationd[start_1:end_1, start_1:end_1]).any():
                print(
                    f"Adding X correlation in the {start_1}:{end_1}, {start_1}:{end_1} square with size {corr_activations[:batch_size, :batch_size].shape}")
                correlationd[start_1:end_1, start_1:end_1] = corr_activations[:batch_size, :batch_size]

            # Y times Y correlation
            if np.isnan(correlationd[start_2:end_2, start_2:end_2]).any():
                print(
                    f"Adding Y correlation in the {start_2}:{end_2}, {start_2}:{end_2} square with size {corr_activations[batch_size:, batch_size:].shape}")

                correlationd[start_2:end_2, start_2:end_2] = corr_activations[batch_size:, batch_size:]
            # X times Y correlation and vice versa

            print(
                f"Adding XY correlation in the {start_1}:{end_1}, {start_2}:{end_2} square with size {corr_activations[batch_size:, :batch_size].shape}")
            correlationd[start_1:end_1, start_2:end_2] = corr_activations[:batch_size, batch_size:]
            print(
                f"Adding YX correlation in the {start_2}:{end_2}, {start_1}:{end_1} square with size {corr_activations[batch_size:, :batch_size].T.shape}")
            correlationd[start_2:end_2, start_1:end_1] = corr_activations[:batch_size, batch_size:].T

    return correlationd


In [None]:
print('Calculating the correlations for model: ',model_name,'and layer: ',layer_name)
corr_matrix = correlationd_matrix(1000) 
path = os.path.join(ROOT_PATH + 'Input_RDM/', getFileName(NR_OF_SAMPLES, "Input_RDM_"))
print("Save Input RDM -> {}".format(path))
np.save(path, np.array(corr_matrix)) 
fig = plt.figure(figsize=(17,15))
ax = seaborn.heatmap(corr_matrix, cmap='rainbow', vmin=0.5, vmax=1.0)
path_fig = os.path.join(ROOT_PATH + 'Input_RDM_plots', getFileName(NR_OF_SAMPLES,"Input_RDM_") + '.png')
fig.savefig(path_fig)

Calculating the correlations for model:  densenet161 and layer:  features.denseblock1.denselayer1
New Iteration: i = 0, j = 0; 1/55


## In case the load function does not work

In [None]:
#If the load function gives an error, do this
np_load_old = np.load # modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)
activations_shape = np.load(path)
np.load = np_load_old