In [None]:
from feature_analysis3 import *
import seaborn as sns
from mapping_methods import *

In [None]:
target_imageset = 'oasis'
model_name = 'alexnet'
train_type = 'imagenet'
model_string = '_'.join([model_name, train_type])
model_option = {'model_name': model_name,
                'train_type': train_type}

In [None]:
from torchvision import models

In [None]:
model = models.alexnet(pretrained = True)

In [None]:
image_data = load_image_data(target_imageset)
response_data = copy(image_data).merge(load_response_data(target_imageset), on = 'image_name')

In [None]:
imagenet_stats = {'mean': [0.485, 0.456, 0.406], 
                  'std':  [0.229, 0.224, 0.225]}

image_transforms = transforms.Compose([
    transforms.Resize((224,224)), 
    transforms.ToTensor(),
    transforms.Normalize(**imagenet_stats)
])

In [None]:
stimulus_loader = get_stimulus_loader(image_data.image_path, image_transforms)

In [None]:
stimulus_features = get_all_feature_maps(model_string, inputs = stimulus_loader)
stimulus_features = get_feature_map_srps(stimulus_features, delete_originals = True)

In [None]:
reg_results = get_regression_results(model_option, stimulus_features, response_data, alpha_values = [1000])

In [None]:
max_transform(reg_results[reg_results['image_type'] == 'Combo'], group_vars = ['measurement', 'image_type'])

In [None]:
grouping_vars = ['model','model_layer','model_layer_index']
max_transform(reg_results.groupby(grouping_vars)['score'].mean().reset_index(), ['model'])

### Bootstrapping Procedure

In [None]:
image_data = load_image_data('vessel')
response_data = load_response_data('vessel', average = False)

In [None]:
def get_bootstrap_sample(response_data, image_data, image_type, measurement):
    image_type_reference = response_data[['image_name','image_type']].drop_duplicates()
    if image_type != 'Combo':
        response_data_sub = response_data[response_data['image_type'] == image_type]
    if image_type == 'Combo':
        response_data_sub = response_data
    subject_data = (response_data_sub[['subject',measurement,'image_name']]
                .pivot(index = ['subject'], columns = 'image_name', values = measurement))
    randlist = pd.DataFrame(index=np.random.choice(subject_data.index.unique(), size=subject_data.shape[0]))
    bootstrap_sample = subject_data.merge(randlist, left_index=True, right_index=True, how='right')
    bootstrap_response_data = (bootstrap_sample.mean(axis = 0).reset_index()
                               .rename(columns = {0: 'rating', 'index': 'image_name'}))
    bootstrap_response_data['measurement'] = measurement
    bootstrap_response_data = bootstrap_response_data.merge(image_type_reference, on = 'image_name')
    
    return image_data.merge(bootstrap_response_data, on = 'image_name')

In [None]:
bootstrap_list = []
for measurement in ['beauty']:
    for i in tqdm(range(10000)):
        bootstrap_sample = get_bootstrap_sample(response_data, image_data, 'Combo', measurement)
        bootstrap_sample['bootstrap_id'] = i+1
        bootstrap_list.append(bootstrap_sample)

In [None]:
bootstrap_df = pd.concat(bootstrap_list)

In [None]:
bootstrap_df

In [None]:
bootstrap_df.to_parquet('response/bootstrap_vessel.parquet', index = None)

In [None]:
bootstrap_pivot = (bootstrap_df[['image_name','measurement','rating','bootstrap_id']]
                   .pivot(index = ['image_name','measurement'], columns = 'bootstrap_id', values = 'rating'))

In [None]:
np.round(bootstrap_pivot.reset_index(), 5).to_csv('response/vessel_bootstraps.csv', index = None)

### Bootstrapped Regression

In [None]:
bootstrap_data = pd.read_csv('response/oasis_bootstraps.csv')

In [None]:
image_indices = response_data[response_data['image_type'] == 'Scene'].index

In [None]:
bootstrap_data[bootstrap_data['measurement'] == 'beauty'].reset_index(drop=True)

In [None]:
bootstrap_data[bootstrap_data['measurement'] == 'beauty'].reset_index(drop=True)

In [None]:
y = bootstrap_data[bootstrap_data['measurement'] == 'beauty'].iloc[:,2:].to_numpy()
X = scale(stimulus_features['ReLU-7'])
regression = RidgeCV(alphas=[1000], store_cv_values=True,
                     scoring='explained_variance').fit(X,y)

y_pred = regression.cv_values_[:, :, 0]

In [None]:
scores = score_func(y, y_pred, 'pearson_r')

In [None]:
print(scores.mean())
sns.distplot(scores);

In [None]:
scoring_metrics = ['explained_variance', 'pearson_r']

def get_bootstrapped_regression_results(model_option, stimulus_features, response_data, bootstrap_data,
                                        alpha_values = np.logspace(-1,5,25).tolist()):
    
    image_reference = response_data[['image_name','image_type']].drop_duplicates()
    
    model_name = model_option['model_name']
    train_type = model_option['train_type']
        
    scoresheets = []
    for model_layer_index, model_layer in enumerate(tqdm(stimulus_features, desc = 'Regression (Layer)')):
        target_features = stimulus_features[model_layer]
        if isinstance(stimulus_features[model_layer], torch.Tensor):
            target_features = target_features.numpy()

        measurements = [column for column in response_data.columns if column in ['arousal','beauty','valence']]
        for measurement in measurements:
            bootstrap_data_sub = bootstrap_data[bootstrap_data['measurement'] == measurement].reset_index(drop=True)
            for image_type in image_reference['image_type'].unique().tolist() + ['Combo']:
                if image_type != 'Combo':
                    image_indices = image_reference[image_reference['image_type'] == image_type].index.to_numpy()
                if image_type == 'Combo':
                    response_data_sub = response_data
                    image_indices = image_reference.index.to_numpy()

                y = bootstrap_data_sub.iloc[image_indices,2:].to_numpy()
                X = scale(target_features[image_indices,:])
                regression = RidgeCV(alphas=alpha_values, store_cv_values=True,
                                     scoring='explained_variance').fit(X,y)

                for alpha_value in alpha_values:
                    y_pred = regression.cv_values_[:, :, alpha_values.index(alpha_value)]

                    for score_type in scoring_metrics:
                        ridge_gcv_score = score_func(y, y_pred, score_type)
                        
                        scoresheet = pd.DataFrame({'model': model_name, 'train_type': train_type, 
                                                   'model_layer_index': model_layer_index+1,
                                                   'model_layer': model_layer,
                                                   'measurement': measurement,
                                                   'image_type': image_type,
                                                   'score_type': score_type,
                                                   'bootstrap_ids': list(range(1000)),
                                                   'score': ridge_gcv_score, 
                                                   'alpha': alpha_value})
                        
                        scoresheets.append(scoresheet)
                        
    return pd.concat(scoresheets) 

In [None]:
bootstrap_results = get_bootstrapped_regression_results(model_option, stimulus_features, 
                                                        response_data, bootstrap_data, [1000])

In [None]:
(bootstrap_results[(bootstrap_results['model_layer'] == 'Linear-3') &
                   (bootstrap_results['score_type'] == 'pearson_r')]
 .groupby(['measurement','image_type'])['score'].mean())

In [None]:
(pd.read_parquet('incoming/bootstrapping/oasis/resnet18_imagenet.parquet')
 .groupby(['measurement', 'image_type', 'score_type'])['score'].mean())