# Setup

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
import seaborn
main_path = '/mnt/raid/ni/agnessa/RSA/'


# Define the function to create filenames

In [None]:
def getFileName(n_samples,name,model_name,layer_name):
    return name \
        + "_{}_".format(n_samples) \
        + "_{}_".format(model_name) \
        + "_{}".format(layer_name)  \
        + ".npy"       

# Get the lower triangular

In [None]:
def getUpperTriangular(rdm):
    num_conditions = rdm.shape[0] #num samples
    upp_tri = rdm[np.triu_indices(num_conditions,1)] 
    return upp_tri #take all above the main diagonal (excluding it), returns flattened version

# Get average correlation for one model RDM - only upper triangular

In [None]:
def getAverageCorrelation(cross_task,model_name,layer_name,trained_on,tested_on,min_layer_idx,max_layer_idx): 
    if tested_on == 'ImageNet' or tested_on == '':
        n_samples = 10000
    elif tested_on == 'Places365':
        n_samples = 10220
        
    model_rdm_filename = os.path.join(main_path,trained_on,tested_on,'Model_RDM', \
                                      getFileName(n_samples,'Model_RDM',model_name,layer_name))
    if cross_task:
        model_name = model_name+'_'+model_name
        model_rdm_filename = os.path.join(main_path,trained_on,tested_on,'Model_RDM', \
                                      getFileName(n_samples,'Model_RDM_cross_task_',model_name,layer_name))
    model_rdm = np.load(model_rdm_filename)
    selected_model_rdm = model_rdm[min_layer_idx:max_layer_idx+1,min_layer_idx:max_layer_idx+1]#plus one makes sure that the last layer is included
    upper_triangular = getUpperTriangular(selected_model_rdm)
    avg_similarity_coefficient = 1-np.mean(upper_triangular)
    return avg_similarity_coefficient

In [None]:
resnet50_images_early_layers = getAverageCorrelation(0,'resnet50','all','Objects','',0,6)
resnet50_images_late_layers = getAverageCorrelation(0,'resnet50','all','Objects','',7,15)
resnet50_images_scenes_early_layers = getAverageCorrelation(1,'resnet50','all','Objects','Places365',0,6)
resnet50_images_scenes_late_layers = getAverageCorrelation(1,'resnet50','all','Objects','Places365',7,15)

early_layers = [resnet50_images_early_layers,resnet50_images_scenes_early_layers]
late_layers = [resnet50_images_late_layers,resnet50_images_scenes_late_layers]

# Correlate the Model RDMs of two networks (same model, different training tasks) - layer by layer

In [None]:
def rsa_model_rdms_cross_task(model_name,trained_on_1,trained_on_2,tested_on,min_layer_idx,max_layer_idx):
    layer_name = 'all'
    
    #define number of samples depending on the testing dataset
    if tested_on == 'ImageNet' or tested_on == '':
        n_samples = 10000
    elif tested_on == 'Places365':
        n_samples = 10220
        
    #load the models
    model_rdm_1_filename = os.path.join(main_path,trained_on_1,'','Model_RDM', \
                                      getFileName(n_samples,'Model_RDM',model_name,layer_name))
    model_name_2 = model_name+'_'+model_name
    model_rdm_2_filename = os.path.join(main_path,trained_on_2,tested_on,'Model_RDM', \
                                      getFileName(n_samples,'Model_RDM_cross_task',model_name_2,layer_name))  
    model_rdm_1 = np.load(model_rdm_1_filename)
    model_rdm_2 = np.load(model_rdm_2_filename)
    
    #select only the desired layers
    selected_model_rdm_1 = model_rdm_1[min_layer_idx:max_layer_idx+1,min_layer_idx:max_layer_idx+1]#plus one makes sure that the last layer is included
    selected_model_rdm_2 = model_rdm_2[min_layer_idx:max_layer_idx+1,min_layer_idx:max_layer_idx+1]
   
    #get the correlation
    num_layers = (max_layer_idx+1)-min_layer_idx  
    correlation_models = np.ones((num_layers,1)) #num layers x num layers
    correlation_models[:] = np.nan
    for layer in np.arange(num_layers):
        correlation = np.corrcoef(selected_model_rdm_1[layer,:],selected_model_rdm_2[layer,:])[0]  
        correlation_models[layer] = correlation[1]
    return selected_model_rdm_1,selected_model_rdm_2,correlation_models

# Average correlation of a subset of layers for two networks (same model, different training tasks) - use the whole matrix (not just upper triangular)

In [None]:
def avg_correlation_cross_task(model_name,trained_on_1,trained_on_2,tested_on,min_layer_idx,max_layer_idx):
    layer_name = 'all'
    
    #define number of samples depending on the testing dataset
    if tested_on == 'ImageNet' or tested_on == '':
        n_samples = 10000
    elif tested_on == 'Places365':
        n_samples = 10220
        
    #load the models
    model_rdm_1_filename = os.path.join(main_path,trained_on_1,'','Model_RDM', \
                                      getFileName(n_samples,'Model_RDM',model_name,layer_name))
    model_name_2 = model_name+'_'+model_name
    model_rdm_2_filename = os.path.join(main_path,trained_on_2,tested_on,'Model_RDM', \
                                      getFileName(n_samples,'Model_RDM_cross_task',model_name_2,layer_name))  
    model_rdm_1 = np.load(model_rdm_1_filename)
    model_rdm_2 = np.load(model_rdm_2_filename)
    
    #select only the desired layers
    selected_model_rdm_1 = model_rdm_1[min_layer_idx:max_layer_idx+1,min_layer_idx:max_layer_idx+1]#plus one makes sure that the last layer is included
    selected_model_rdm_2 = model_rdm_2[min_layer_idx:max_layer_idx+1,min_layer_idx:max_layer_idx+1]
   
    #get the correlation
    avg_correlation_1 = 1-np.mean(selected_model_rdm_1)
    avg_correlation_2 = 1-np.mean(selected_model_rdm_2)
    return avg_correlation_1, avg_correlation_2

In [None]:
r50e1,r50e2 = (avg_correlation_cross_task('resnet50','Objects','Scenes','ImageNet',0,6))
r50l1,r50l2 = (avg_correlation_cross_task('resnet50','Objects','Scenes','ImageNet',7,15))
ane1,ane2 = (avg_correlation_cross_task('alexnet','Objects','Scenes','ImageNet',0,8))
anl1,anl2 = (avg_correlation_cross_task('alexnet','Objects','Scenes','ImageNet',9,20))

print(r50e1,r50e2)
print(r50l1,r50l2)
print(ane1,ane2)
print(anl1,anl2)

In [None]:
r50e1,r50e2,corr_early_r50 = (rsa_model_rdms_cross_task('resnet50','Objects','Scenes','ImageNet',0,6))
r50l1,r50l2,corr_late_r50 = (rsa_model_rdms_cross_task('resnet50','Objects','Scenes','ImageNet',7,15))
ane1,ane2,corr_early_alexnet = (rsa_model_rdms_cross_task('alexnet','Objects','Scenes','ImageNet',0,8))
anl1,anl2,corr_late_alexnet = (rsa_model_rdms_cross_task('alexnet','Objects','Scenes','ImageNet',9,20))

print(1-np.mean(r50e1),1-np.mean(r50e2))
print(1-np.mean(r50l1),1-np.mean(r50l2))
print(1-np.mean(ane1),1-np.mean(ane2))
print(1-np.mean(anl1),1-np.mean(anl2))

In [None]:
plot_model = rdm1
fig = plt.figure(figsize=(17,13)) #change depending on the size of the model 

ax = seaborn.heatmap(plot_model, cmap='rainbow', vmin=0.0, vmax=1.0)
plt.xticks(rotation=90) 
plt.yticks(rotation=0)
plt.show()

plot_model = rdm2
fig = plt.figure(figsize=(17,13)) #change depending on the size of the model 

ax = seaborn.heatmap(plot_model, cmap='rainbow', vmin=0.0, vmax=1.0)
plt.xticks(rotation=90) 
plt.yticks(rotation=0)
plt.show()

# Plot the correlations (bar plot)

In [None]:
early_layers = [corr_early_r50,corr_early_alexnet]
late_layers = [corr_late_r50,corr_late_alexnet]
labels = ['ResNet-50','AlexNet']
bar_locations = np.arange(len(labels))  # the label locations
width = 3

fig, ax = plt.subplots()
rects1 = ax.bar(bar_locations - width/2, early_layers, width, label='Early layers')
rects2 = ax.bar(bar_locations + width/2, late_layers, width, label='Late layers')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Spearman''s correlation')
ax.set_title('Similarity within early layers and within late layers for different models')
ax.set_xticks(bar_locations)
ax.set_xticklabels(labels)
ax.legend()


In [None]:
# width = 0.2
# num_bars = 2
# x_coords = np.arange(0,width*2,0.1)
# # width_bars/num_bars
# plt.bar(x_coords,[avg_corr_early_layers,avg_corr_late_layers],width)
# plt.xticks(x_coords,['early layers','late layers'])


In [None]:

print(late_layers)