In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm
import os

In [2]:
# Specify the directories and layer information
results_dir = '../Results/'
dist_layer = 5
RE_layer = 7
layer_list = ['block1_pool','block2_pool','block3_pool','block4_pool','block5_pool','fc1','fc2']

In [3]:
# Load the distinctiveness measure
DCNN_df = pd.read_csv(os.path.join(results_dir,
                                  'Dist',
                                  '{}_NN.csv'.format(layer_list[dist_layer-1])),
                     header=None, names=['img', 'NN', 'dist'])

In [4]:
# Load the reconstruction error measure
RE_data = np.load(os.path.join(results_dir, 
                              'RE',
                              '{}'.format(layer_list[RE_layer-1]),
                              'spc_rec_quality.npy'))
RE_df = pd.DataFrame(RE_data, columns=['RE'])
RE_df['img'] = range(1, 2223)

In [5]:
# Read the memorability scores from Isola et al. 
img_info = pd.read_csv('../Image_info/target_info_IsolaEtAl.csv').rename(columns={'im_ind':'img'})

merge_df = RE_df.merge(DCNN_df).merge(img_info)

In [6]:
# Define a function to run partial correlation
def partial_corr(df, var1, var2, ctr_var):

    X1 = sm.add_constant(df[ctr_var].values)
    Y1 = df[var1].values
    model1 = sm.OLS(Y1,X1)
    results1 = model1.fit()
    resid1 = results1.resid
    
    X2 = sm.add_constant(df[ctr_var].values)
    Y2 = df[var2].values
    model2 = sm.OLS(Y2,X2)
    results2 = model2.fit()
    resid2 = results2.resid
    
    return stats.pearsonr(resid1, resid2)[0]

In [7]:
# Define functions to run bootstrapping
def bootstrap_corr(df, var1, var2, n_iter=1000):
    bootstrap_rs = []
    for ii in range(n_iter):
        _df = df.sample(n=len(df), replace=True)
        curr_r, _ = stats.pearsonr(_df[var1], _df[var2])
        bootstrap_rs.append(curr_r)
    bootstrap_rs.sort()
    return np.array(bootstrap_rs)

def bootstrap_partcorr(df, var1, var2, ctr_var, n_iter=1000):
    bootstrap_rs = []
    for ii in range(n_iter):
        _df = df.sample(n=len(df), replace=True)
        curr_r = partial_corr(_df, var1, var2, ctr_var)
        bootstrap_rs.append(curr_r)
    bootstrap_rs.sort()
    return np.array(bootstrap_rs)


In [8]:
corr_all = stats.pearsonr(merge_df['dist'], merge_df['corrected_hit'])[0]
bootstrap_values = bootstrap_corr(merge_df, 'dist', 'corrected_hit')
n_iter = 1000
print('Correlation between Layer {} distinctiveness and memorability, {}\n'.format(dist_layer,
                                                                                  corr_all.round(2)))
print('Bootstrap 95% CI, [{}, {}]\n'.format(bootstrap_values[int(n_iter*0.025)].round(2),
                                            bootstrap_values[int(n_iter*(1-0.025))].round(2)))
p_value = np.min([np.sum(bootstrap_values<0),
                 np.sum(bootstrap_values>0)])*2
if p_value < 0.001:
    print('Uncorrected two-tailed p value, < .001')
else:
    print('Uncorrected two-tailed p value,  {}'.format(p_value.round(3)))

Correlation between Layer 5 distinctiveness and memorability, 0.36

Bootstrap 95% CI, [0.32, 0.4]

Uncorrected two-tailed p value, < .001


In [9]:
corr_all = stats.pearsonr(merge_df['RE'], merge_df['corrected_hit'])[0]
bootstrap_values = bootstrap_corr(merge_df, 'RE', 'corrected_hit')
n_iter = 1000
print('Correlation between Layer {} reconsctruction error and memorability, {}\n'.format(RE_layer,
                                                                                  corr_all.round(2)))
print('Bootstrap 95% CI, [{}, {}]\n'.format(bootstrap_values[int(n_iter*0.025)].round(2),
                                            bootstrap_values[int(n_iter*(1-0.025))].round(2)))
p_value = np.min([np.sum(bootstrap_values<0),
                 np.sum(bootstrap_values>0)])*2
if p_value < 0.001:
    print('Uncorrected two-tailed p value, < .001')
else:
    print('Uncorrected two-tailed p value,  {}'.format(p_value.round(3)))

Correlation between Layer 7 reconsctruction error and memorability, 0.29

Bootstrap 95% CI, [0.26, 0.33]

Uncorrected two-tailed p value, < .001


In [10]:
partcorr = partial_corr(merge_df, 'RE', 'corrected_hit', 'dist')
bootstrap_values = bootstrap_partcorr(merge_df, 'RE', 'corrected_hit', 'dist')
n_iter = 1000
print('Partial correlation between Layer {} reconsctruction error and memorability (after controlling for Layer {} distinctiveness), {}\n'.format(RE_layer,
                                                                                                                                                  dist_layer,
                                                                                  partcorr.round(2)))
print('Bootstrap 95% CI, [{}, {}]\n'.format(bootstrap_values[int(n_iter*0.025)].round(2),
                                            bootstrap_values[int(n_iter*(1-0.025))].round(2)))
p_value = np.min([np.sum(bootstrap_values<0),
                 np.sum(bootstrap_values>0)])*2
if p_value < 0.001:
    print('Uncorrected two-tailed p value, < .001')
else:
    print('Uncorrected two-tailed p value,  {}'.format(p_value.round(3)))

Partial correlation between Layer 7 reconsctruction error and memorability (after controlling for Layer 5 distinctiveness), 0.16

Bootstrap 95% CI, [0.11, 0.2]

Uncorrected two-tailed p value, < .001
