In [None]:
import urllib
import numpy as np
import tensorflow as tf
from rcnn_sat_2 import preprocess_image, bl_net
import os
import json
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.decomposition import PCA as RandomizedPCA
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import entropy
from scipy import stats
import scipy.io as sio

os.environ["CUDA_VISIBLE_DEVICES"]="0"


# Restore fine-tuned model


## Building the model

In [None]:
input_layer = tf.keras.layers.Input((128, 128, 3))
model = bl_net(input_layer, classes_scenes=2, cumulative_readout=False)

## Loading the weights

In [None]:
model_name = 'model_02.11_2'
model.load_weights(os.path.join(model_name,model_name+'_weights.h5')) 

In [None]:
model.summary()

# Define training & testing data for the SVM model 

## Train data

In [None]:
#1) create json files of names+labels of selected training/validation data (5 per class for training, 2 per class for val)
#   -- not all classes shown in txt file... don't need all i think. so just pick 5 from 200 classes
#2) upload the training data onto eltanin
#3) run through 

In [None]:
#load and preprocess training data
data_names_path = '/mnt/raid/ni/agnessa/rcnn-sat/'

#training
with open(os.path.join(data_names_path,'2400_selected_scenes_places365_train_standard.json')) as json_file:
    subset_scenes_dict_train = json.load(json_file)  
train_set_path = '/mnt/raid/data/agnessa/data_256'
train_paths = list(subset_scenes_dict_train.keys())
train_imgs_prep = np.ones([len(train_paths),128,128,3])
train_imgs_prep[:] = np.nan
for idx,image_path in enumerate(train_paths):
        image = load_img(train_set_path+image_path, target_size=(128, 128)) 
        image = img_to_array(image)
        image = np.uint8(image)
        image = preprocess_image(image)
        train_imgs_prep[idx,:,:,:] = image
    
# #define y (labels) and x    
y_train = np.array([label for label in range(2) for reps in range(int(train_imgs_prep.shape[0]/2))]) 
x_train =  train_imgs_prep

#all paths together
train_images_paths = [train_set_path+image_path for image_path in train_paths]

## Test data

In [None]:
main_path = '/mnt/raid/ni/agnessa'
data_path = '/mnt/raid/data/agnessa/val_256'

#IDs of the scenes of interest
with open(os.path.join(main_path,'RSA/RSA_EEG/scenes_eeg_ordered.json')) as json_file:
    scenes_60 = json.load(json_file)      
         
selected_scenes = list(scenes_60.keys())
test_images_paths = [None]*len(selected_scenes)
for index,file in enumerate(selected_scenes):
    test_images_paths[index] = os.path.join(data_path,selected_scenes[index])

# PCA

In [None]:
# def run_pca(activations,num_components):
#     np.random.seed(0)   
#     scaler = MinMaxScaler(feature_range=[0 ,1]) #normalize
#     activs_scaled = scaler.fit_transform(activations)
#     pca = RandomizedPCA(n_components = num_components)
#     pca_results = pca.fit_transform(activs_scaled)
    
#     return pca_results, pca

# Get activations from all layers & all timepoints (train+test)

In [None]:
def get_activations(img_paths,batch_size,model_directory):

    ## preallocate
    num_images_all = len(img_paths)
    num_batches = int(num_images_all / batch_size)
    num_layers = 7
    num_timepoints = 8

    ## loop over all layers and timepoints
    for layer_idx in range(num_layers):
        for timepoint in range(num_timepoints):
            activ = []
            layer_time =  'ReLU_Layer_{}_Time_{}'.format(layer_idx,timepoint)
            get_layer_activation = tf.keras.backend.function(
            [model.input],
            [model.get_layer(layer_time).output])
            for batch, img_idx in enumerate(range(0, num_images_all, batch_size)):
                print('Getting activations for layer',layer_idx,', timepoint',timepoint,', batch',batch)
                batch_paths = img_paths[img_idx:img_idx + batch_size] 
                batch_images = np.zeros((batch_size,128,128,3)) 
                #preprocessing images
                for i, image_path in enumerate(batch_paths):
                    image = load_img(image_path, target_size=(128, 128)) 
                    image = img_to_array(image)
                    image = np.uint8(image)
                    image = preprocess_image(image)
                    batch_images[i,:,:,:] = image

                activ.append(list(np.array(get_layer_activation(batch_images)).squeeze()))  
            
#           flatten the vector of activations from all batches into (num-all-images x num-all-features)    
            flattened_activ = np.array(activ).reshape(num_images_all,-1)

            #replace zeros
            flattened_activ[flattened_activ==0]=0.000001
            
            #z-score across images
            zscored_activ = stats.zscore(flattened_activ,axis=0) #normalize over images
            print(np.mean(zscored_activ[:,0])) #get the mean of feature 1 values for all imgs - should be 0
#             print(zscored_activ[0,82])
#             print(zscored_activ[0,12113])
#             print(zscored_activ[0,12209])
            #pca
#             [pca_activs,pca_object] = run_pca(zscored_activ,num_components_pca)
#             print('Variance explained:',pca_object.explained_variance_ratio_)
            path = os.path.join(main_path + '/rcnn-sat/'+model_directory, '{}_activations'.format(layer_time))
            np.save(path,zscored_activ)
            print('Saved zscored activations')

            del flattened_activ
            del activ
            del zscored_activ
#             del pca_activs

    return 

In [None]:
# activations_directory = model_name+'/normalized_pca_1000/activations/'
# activations_directory = model_name+'/normalized_nopca/activations/'

t = train_images_paths+test_images_paths
# get_activations(t,20,activations_directory)

len(t)

# Get RTs

In [None]:
def get_RTs(test_images_path,batch_size,entropy_thresh):
    num_images_all = len(test_images_path)
    num_batches = int(num_images_all / batch_size)
    num_timepoints = 8
    num_classes = 2
    all_batches_activ = np.ones([num_batches, batch_size, 128, 128, 3])
    all_batches_activ[:] = np.nan
    pred = np.ones([num_batches,num_timepoints,batch_size,num_classes])
    pred[:] = np.nan

    for batch, img_idx in enumerate(range(0, num_images_all, batch_size)):
        batch_paths = test_images_path[img_idx:img_idx + batch_size] 
        batch_images = np.zeros((batch_size,128,128,3)) 
        for i, image_path in enumerate(batch_paths):
            image = load_img(image_path, target_size=(128, 128)) 
            image = img_to_array(image)
            image = np.uint8(image)
            image = preprocess_image(image)
            batch_images[i,:,:,:] = image

        #predictions
        pred[batch,:,:,:] = model(batch_images) #shape: num_timepoints x batch_size x classes

    #reshape: all images from all batches in one dimension
    pred_reshaped =  np.transpose(pred,(0,2,1,3)).reshape(num_batches*batch_size,num_timepoints,num_classes)

    #get entropies for each image & each timepoint
    entropies_pred = np.ones([num_images_all,num_timepoints])
    entropies_pred[:] = np.nan

    for image in range(num_images_all):
        for tp in range(num_timepoints):
            entropies_pred[image,tp] = entropy(pred_reshaped[image,tp])

    # #for each image, determine the timepoint when entropy reaches threshold
    rt_thresh = np.ones(num_images_all)
    rt_thresh[:] = np.nan
    for image in range(num_images_all):
        for tp in range(num_timepoints):
            if entropies_pred[image,tp] <= entropy_thresh:
                rt_thresh[image]=tp
                break          

    #if it never reaches the threshold (nan in the array), replace by 8
    rt_thresh[np.isnan(rt_thresh)] = 8
   
    return rt_thresh

## Pick an entropy threshold that correlates the most with the EEG RTs

In [None]:
# from scipy import stats

#load RTs
rts_eeg_dict = sio.loadmat(os.path.join(main_path+'/rcnn-sat/','RT_all_subjects_5_35_categorization.mat'))
rts_eeg = rts_eeg_dict.get('RTs')

#define some variables
num_subjects = rts_eeg.shape[0]
entropies = np.arange(0.01,0.1,0.01)
best_entropy = np.ones([num_subjects])
best_entropy[:] = np.nan
correlation_test = np.ones([num_subjects,3]) #all,artificial,natural
correlation_test[:] = np.nan
num_scenes = len(test_images_paths)

#get RNN RTs for every entropy threshold and correlate with humans
rts_rnn = np.ones([len(entropies),len(test_images_paths)])
rts_rnn[:] = np.nan
for idx,e in enumerate(entropies):
    rts_rnn[idx,:] = get_RTs(test_images_paths,20,e)
    
#for each fold, fit the entropy threshold on 29 subjects
for s in range(num_subjects): 
    artificial_idx = np.arange(30)
    natural_idx = np.arange(30,60)

    test_sub = rts_eeg[s,:]
    fit_sub = np.nanmean(rts_eeg[np.arange(num_subjects)!=s,:],0)
    correlation_fit = np.ones([len(entropies),2])
    correlation_fit[:] = np.nan
    corr_diff = np.ones([len(entropies)])
    corr_diff[:] = np.nan
    
    for idx,e in enumerate(entropies):
        correlation_fit[idx,0] = stats.pearsonr(np.squeeze(rts_rnn[idx,artificial_idx]),fit_sub[artificial_idx])[0] #artificial
        correlation_fit[idx,1] = stats.pearsonr(np.squeeze(rts_rnn[idx,natural_idx]),fit_sub[natural_idx])[0] #natural
        corr_diff[idx] = np.abs(correlation_fit[idx,0]-correlation_fit[idx,1])
        
    #select the entropy with highest correlation but lowest art/nat RNN-human difference   
    best_entropy[s] = round(entropies[np.argmin(corr_diff)],2)
    print(correlation_fit)
    print(corr_diff)
    
    #remove scene if there's no RT for it 
    selected_rnn_rts = rts_rnn[np.argmin(corr_diff),:]
    if np.argwhere(np.isnan(test_sub)).size:
        print(s)
        removed_scene = np.argwhere(np.isnan(test_sub))[0][0]
        if removed_scene in natural_idx:
            natural_idx = np.delete(natural_idx,removed_scene-30)
        elif removed_scene in artificial_idx:
            artificial_idx = np.delete(artificial_idx,removed_scene)

    #correlate with leftout subject        
    correlation_test[s,0] = stats.pearsonr(selected_rnn_rts[np.concatenate((artificial_idx,natural_idx))],\
                                           test_sub[np.concatenate((artificial_idx,natural_idx))])[0]        
    correlation_test[s,1] = stats.pearsonr(selected_rnn_rts[artificial_idx],test_sub[artificial_idx])[0]
    correlation_test[s,2] = stats.pearsonr(selected_rnn_rts[natural_idx],test_sub[natural_idx])[0]
    
print(best_entropy)
print(correlation_test)
RT_entropy = stats.mode(best_entropy)[0][0]
RT_RNN_final = rts_rnn[np.argwhere(entropies==RT_entropy)[0][0],:]

corr_path = os.path.join(main_path+'/rcnn-sat/'+model_name,'correlation_RT_human_RNN_cross-validated')
np.save(corr_path,correlation_test)
rt_path = os.path.join(main_path+'/rcnn-sat/'+model_name,'RNN_RTs_entropy_threshold_{}'.format(RT_entropy))
np.save(rt_path,RT_RNN_final)

In [None]:
# # from scipy import stats

# #load RTs
# rts_eeg_dict = sio.loadmat(os.path.join(main_path+'/rcnn-sat/','RT_all_subjects_5_35_categorization.mat'))
# rts_eeg = rts_eeg_dict.get('RTs')

# #fit the entropy threshold on 29 subjects, use remaining one for further analyses
# test_sub = rts_eeg[-1,:]
# fit_sub_artificial = np.nanmedian(rts_eeg[0:-1,0:30],0)
# fit_sub_natural = np.nanmedian(rts_eeg[0:-1,30:],0)

# #calculate RNN-human correlations for each entropy threshold
# entropies = np.arange(0.01,0.2,0.01)
# correlations_art = np.ones([len(entropies)])
# correlations_nat = np.ones([len(entropies)])
# corr_diff = np.ones([len(entropies)])

# correlations_art[:] = np.nan
# correlations_art[:] = np.nan

# rts_e = np.ones([len(entropies),len(test_images_paths)])
# rts_e[:] = np.nan

# for idx,e in enumerate(entropies):
#     rts_e[idx,:] = get_RTs(test_images_paths,20,e)
#     correlations_art[idx] = stats.spearmanr(np.squeeze(rts_e[idx,0:30]),fit_sub_artificial)[0]
#     correlations_nat[idx] = stats.spearmanr(np.squeeze(rts_e[idx,30:]),fit_sub_natural)[0]
#     corr_diff[idx] = np.abs(correlations_art[idx]-correlations_nat[idx])
    
# #get entropy threshold with smallest difference of RNN-human correlations for artificial and natural scenes
# print(correlations_art)
# print(correlations_nat)
# best_entropy = round(entropies[np.argmin(corr_diff)],2)
# rt_path = os.path.join(main_path+'/rcnn-sat/'+model_name,'leave_oo_cross-validated_reaction_time_entropy_th_{}'.format(best_entropy))
# np.save(rt_path,np.squeeze(rts_e[np.argmax(correlations),:]))

In [None]:
## old ##

# # from scipy import stats

# rts_eeg_dict = sio.loadmat(os.path.join(main_path+'/rcnn-sat/','RT_all_subjects_5_35_categorization.mat'))
# rts_eeg = rts_eeg_dict.get('RTs')
# num_subjects = rts_eeg.shape[0]
# np.random.shuffle(rts_eeg) #shuffle subjects order
# # np.save(os.path.join(main_path+'/rcnn-sat/','RTs_shuffled_across_subjects'),rts_eeg)
# # half_subjects = np.nanmedian(rts_eeg[0:int(num_subjects/2),:],0)
# test_sub = rts_eeg[-1,:]
# fit_sub = np.nanmedian(rts_eeg[0:-1,:],0)
# entropies = np.arange(0.01,0.1,0.01)
# correlations = np.ones([len(entropies)])
# correlations[:] = np.nan
# rts_e = np.ones([len(entropies),len(test_images_paths)])
# rts_e[:] = np.nan

# for idx,e in enumerate(entropies):
#     rts_e[idx,:] = get_RTs(test_images_paths,20,e)
#     correlations[idx] = stats.spearmanr(np.squeeze(rts_e[idx,:]),fit_sub)[0]
    
# best_entropy = round(entropies[np.argmax(correlations)],2)
# rt_path = os.path.join(main_path+'/rcnn-sat/'+model_name,'cross-validated_reaction_time_entropy_th_{}'.format(best_entropy))
# # np.save(rt_path,np.squeeze(rts_e[np.argmax(correlations),:]))