In [22]:
from warnings import filterwarnings
filterwarnings("ignore")

In [23]:
import os, sys, time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm as tqdm
from copy import copy
from glob import glob
from PIL import Image

In [3]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [4]:
os.environ['CUDA_VISIBLE_DEVICES'] = ''

In [5]:
sys.path.append('../mouseland/model_opts')
from feature_extraction import *
from model_options import *

In [6]:
from processing import *

In [7]:
imageset = 'oasis'

root = 'images/{}/'.format(imageset)
assets = glob(root + '*.jpg')
asset_dictlist = []
for asset in assets:
    imgstr = asset.split('/')[-1]
    row = {'image_name': imgstr}
    asset_dictlist.append(row)
image_df = (pd.DataFrame(asset_dictlist)
            .sort_values(by='image_name', ignore_index=True))

In [8]:
model_string = 'alexnet_imagenet'

model_options = get_model_options()
image_transforms = get_recommended_transforms(model_string)
model_name = model_options[model_string]['model_name']
train_type = model_options[model_string]['train_type']
model_call = model_options[model_string]['call']

model = eval(model_call)
model = model.eval()
if torch.cuda.is_available():
    model = model.cuda()

In [9]:
stimulus_loader = DataLoader(dataset=StimulusSet(image_df, root, image_transforms), batch_size=64)

In [10]:
stimulus_features = get_all_feature_maps(model, stimulus_loader, numpy=False)

Feature Extraction (Batch):   0%|          | 0/15 [00:00<?, ?it/s]

In [11]:
sample_feature_map = stimulus_features['Conv2d-1']
sample_feature_map.shape

torch.Size([900, 193600])

In [12]:
def treves_rolls(x):
    if isinstance(x, np.ndarray):
        return ((np.sum(x / x.shape[0]))**2 / np.sum(x**2 / x.shape[0]))
    if isinstance(x, torch.Tensor):
        return ((torch.sum(x / x.shape[0]))**2 / torch.sum(x**2 / x.shape[0]))

In [24]:
#source: https://tntorch.readthedocs.io/en/latest/_modules/metrics.html

def torch_skewness(x):
    return torch.mean(((x - torch.mean(x))/torch.std(x))**3)

def torch_kurtosis(x, fisher=True):
    return torch.mean(((x-torch.mean(x))/torch.std(x))**4) - fisher*3

def torch_frobnorm(x):
    return torch.sqrt(torch.clamp(torch.dot(x,x), min=0))

In [36]:
metric_dictlist = []
for model_layer_index, model_layer in enumerate(tqdm(stimulus_features)):
    target_map = stimulus_features[map_key]
    for target_i, target_activity in enumerate(target_map):
        image_name = image_df.image_name.iloc[target_i]
        
        mean_activity = target_activity.mean().item()
        mean_absolute = target_activity.abs().mean().item()
        max_activity = target_activity.max().item()
        min_activity = target_activity.min().item()
        var_activity = target_activity.std().item()
        var_absolute = target_activity.abs().std().item()
        sparseness = treves_rolls(target_activity).item()
        skewness = torch_skewness(target_activity.abs()).item()
        kurtosis = torch_kurtosis(target_activity.abs()).item()
        frobenius = torch_frobnorm(target_activity.abs()).item()
        activity_range = max_activity - min_activity
        
        metric_dictlist.append({
            'image': image_name, 
            'model': model_name,
            'train_type': train_type,
            'model_layer': model_layer, 
            'model_layer_index': model_layer_index,
            'mean_absolute': mean_absolute,
            'mean_activity': mean_activity,
            'var_activity': var_activity,
            'var_absolute': var_absolute,
            'max_activity': max_activity,
            'min_activity': min_activity,
            'range': activity_range,
            'sparseness': sparseness,
            'skewness': skewness,
            'kurtosis': kurtosis,
            'frobenius': frobenius,
        })
        
metric_data_raw = pd.DataFrame(metric_dictlist)

  0%|          | 0/18 [00:00<?, ?it/s]

In [37]:
metric_data_raw[['mean_activity','var_activity','max_activity','min_activity',
                 'range','sparseness','kurtosis','frobenius', 'mean_absolute','var_absolute']].corr()

Unnamed: 0,mean_activity,var_activity,max_activity,min_activity,range,sparseness,kurtosis,frobenius,mean_absolute,var_absolute
mean_activity,1.0,-0.891285,-0.18239,0.842686,-0.761544,-0.650767,0.390688,-0.756302,-0.954142,-0.866852
var_activity,-0.891285,1.0,0.410374,-0.918993,0.899084,0.532039,-0.404904,0.875033,0.958957,0.989712
max_activity,-0.18239,0.410374,1.0,-0.370076,0.632769,0.028353,0.27227,0.515168,0.271909,0.469383
min_activity,0.842686,-0.918993,-0.370076,1.0,-0.953536,-0.418291,0.289616,-0.929725,-0.848147,-0.923166
range,-0.761544,0.899084,0.632769,-0.953536,1.0,0.357848,-0.153102,0.942014,0.795127,0.921699
sparseness,-0.650767,0.532039,0.028353,-0.418291,0.357848,1.0,-0.534849,0.381652,0.682394,0.531773
kurtosis,0.390688,-0.404904,0.27227,0.289616,-0.153102,-0.534849,1.0,-0.191801,-0.492281,-0.348619
frobenius,-0.756302,0.875033,0.515168,-0.929725,0.942014,0.381652,-0.191801,1.0,0.783051,0.908207
mean_absolute,-0.954142,0.958957,0.271909,-0.848147,0.795127,0.682394,-0.492281,0.783051,1.0,0.933608
var_absolute,-0.866852,0.989712,0.469383,-0.923166,0.921699,0.531773,-0.348619,0.908207,0.933608,1.0


In [38]:
response_data = {'vessel': load_response_data('vessel'), 'oasis': load_response_data('oasis')}

In [39]:
def process_metric_data(metric_data, orient='wide'):
    metric_data['dataset'] = imageset
    if 'image' in metric_data.columns:
        metric_data = metric_data.rename(columns={'image': 'image_name'})
    
    data_wide = pd.merge(metric_data, response_data[imageset], on = 'image_name')
    data_wide['model_layer_depth'] = (data_wide['model_layer_index'] / 
                                      data_wide['model_layer'].nunique())
    
    id_columns = ['dataset','image_name','image_type','model','train_type',
                  'model_layer','model_layer_index','model_layer_depth']
    measurement_columns = [col for col in data_wide.columns 
                           if col in ['arousal','beauty','valence']]
    
    analysis_columns = [col for col in data_wide.columns 
                        if col not in id_columns + measurement_columns]
    
    data_wide = data_wide[id_columns + measurement_columns + analysis_columns]
    data_wide = pd.melt(data_wide, id_vars=id_columns + analysis_columns, 
                        var_name = 'measurement', value_name='rating')
    
    data_long = pd.melt(data_wide, id_vars=id_columns + ['measurement', 'rating'], 
                        var_name = 'metric', value_name='value')
    
    if orient == 'wide':
        return(data_wide)
    if orient == 'long':
        return(data_long)

In [40]:
metric_data = process_metric_data(metric_data_raw)

In [114]:
corr_data = process_corr_data(metric_data)
corr_data['corr_abs'] = abs(corr_data['corr'])

In [115]:
corr_data.groupby(['metric'])['corr'].mean().reset_index()

Unnamed: 0,metric,corr
0,frobenius,0.075415
1,kurtosis,0.050385
2,max_activity,0.109034
3,mean_absolute,0.058533
4,mean_activity,0.019727
5,min_activity,-0.078457
6,range,0.104694
7,skewness,0.053761
8,sparseness,-0.02908
9,var_absolute,0.083384


In [117]:
(corr_data[(corr_data['measurement'] == 'beauty') & (corr_data['image_type'] == 'Scene')]
 .groupby(['metric'])['corr_abs'].mean().reset_index().sort_values(by='corr_abs'))

Unnamed: 0,metric,corr_abs
2,max_activity,0.102883
6,range,0.112526
5,min_activity,0.11287
1,kurtosis,0.125108
7,skewness,0.133055
9,var_absolute,0.14473
10,var_activity,0.147385
0,frobenius,0.154446
8,sparseness,0.162722
4,mean_activity,0.173468


In [42]:
max_transform(corr_data, group_vars = ['measurement', 'image_type', 'metric'],
              measure_var = 'corr').groupby(['metric'])['corr'].mean().reset_index()

Unnamed: 0,metric,corr
0,frobenius,0.204449
1,kurtosis,0.253909
2,max_activity,0.204471
3,mean_absolute,0.231817
4,mean_activity,0.261281
5,min_activity,0.066089
6,range,0.207561
7,skewness,0.264455
8,sparseness,0.236716
9,var_absolute,0.20715


In [None]:
max_transform(corr_data[(corr_data['measurement'] == 'beauty') & (corr_data['image_type'] == 'lsc')],
              group_vars = ['metric'], measure_var = 'corr').groupby(['metric'])['corr'].mean().reset_index()

In [46]:
import numba

NAN = float("nan")

@numba.njit(nogil=True)
def _any_nans(a):
    for x in a:
        if np.isnan(x): return True
    return False

@numba.jit
def any_nans(a):
    if not a.dtype.kind=='f': return False
    return _any_nans(a.flat)

In [70]:
target_metrics = ['mean_activity', 'mean_absolute', 'var_activity', 'var_absolute', 'max_activity', 'min_activity',
                  'range', 'sparseness', 'skewness', 'kurtosis', 'frobenius']

results_dictlist = []
data_wide = metric_data
model_layers = data_wide['model_layer'].unique()
for measurement in data_wide['measurement'].unique():
        for image_type in data_wide['image_type'].unique():
            for metric in target_metrics:
                data_i = data_wide[(data_wide['image_type'] == image_type) & 
                                   (data_wide['measurement'] == measurement)]
                y = data_i[(data_i['model_layer']==model_layers[0])]['rating'].to_numpy()
                X = np.stack([data_i[(data_i['model_layer']==model_layer)][metric].to_numpy() 
                              for model_layer in model_layers], axis = 1)

                actual_max = max([abs(pearsonr(x, y)[0]) for x in X.transpose()
                                  if not any_nans(x)])

                permuted_max_corrs = []
                for i in range(1000):
                    permuted_corrs = [abs(pearsonr(np.random.permutation(x), y)[0]) 
                                      for x in X.transpose() if not any_nans(x)]
                    permuted_max_corrs.append(max(permuted_corrs))

                permuted_lqt = np.quantile(permuted_max_corrs, 0.025)
                permuted_uqt = np.quantile(permuted_max_corrs, 0.975)
                permuted_pvalue = (len([corr for corr in permuted_max_corrs if corr >= actual_max])) / 1000

                results_dictlist.append({'model': model_name, 'train_type': train_type, 
                                         'dataset': 'oasis', 'image_type': image_type, 
                                         'metric': metric, 'measurement': measurement,
                                         'model_depth': len(model_layers),
                                         'corr_max_score': actual_max,
                                         'corr_lower_ci': permuted_lqt,
                                         'corr_upper_ci': permuted_uqt,
                                         'corr_p_value': permuted_pvalue})


metric_permutations = pd.DataFrame(results_dictlist)

In [101]:
metric_permutations['corr_p_adj'] = pg.multicomp(metric_permutations['corr_p_value'].to_numpy(), 
                                                 alpha = 0.05, method = 'fdr')[1]

In [102]:
metric_permutations[['measurement','image_type']].value_counts()

measurement  image_type
arousal      Animal        11
             Object        11
             Person        11
             Scene         11
beauty       Animal        11
             Object        11
             Person        11
             Scene         11
valence      Animal        11
             Object        11
             Person        11
             Scene         11
dtype: int64

In [103]:
metric_permutations.query('corr_p_value < 0.05')[['measurement','image_type']].value_counts()

measurement  image_type
beauty       Animal        11
             Object        11
valence      Animal        11
             Scene         11
arousal      Object        10
beauty       Scene         10
valence      Object         8
             Person         8
arousal      Scene          5
beauty       Person         3
arousal      Person         1
dtype: int64

In [105]:
metric_permutations[(metric_permutations['measurement'] == 'beauty') & 
                    (metric_permutations['image_type'] == 'Scene')] 

Unnamed: 0,model,train_type,dataset,image_type,metric,measurement,model_depth,corr_max_score,corr_lower_ci,corr_upper_ci,corr_p_value,corr_p_value_bonf,corr_p_adjusted
121,alexnet,imagenet,oasis,Scene,mean_activity,beauty,18,0.403099,0.088099,0.21093,0.0,0.0,0.0
122,alexnet,imagenet,oasis,Scene,mean_absolute,beauty,18,0.401328,0.090819,0.210365,0.0,0.0,0.0
123,alexnet,imagenet,oasis,Scene,var_activity,beauty,18,0.393713,0.089671,0.217553,0.0,0.0,0.0
124,alexnet,imagenet,oasis,Scene,var_absolute,beauty,18,0.406972,0.089243,0.214995,0.0,0.0,0.0
125,alexnet,imagenet,oasis,Scene,max_activity,beauty,18,0.183058,0.090075,0.213882,0.113,1.0,1.0
126,alexnet,imagenet,oasis,Scene,min_activity,beauty,18,0.343282,0.061691,0.197003,0.0,0.0,0.0
127,alexnet,imagenet,oasis,Scene,range,beauty,18,0.318409,0.090187,0.208197,0.0,0.0,0.0
128,alexnet,imagenet,oasis,Scene,sparseness,beauty,18,0.40547,0.090771,0.215394,0.0,0.0,0.0
129,alexnet,imagenet,oasis,Scene,skewness,beauty,18,0.352672,0.088916,0.217816,0.0,0.0,0.0
130,alexnet,imagenet,oasis,Scene,kurtosis,beauty,18,0.307379,0.088088,0.212489,0.001,0.132,0.095
