# Run ensemble model and save predictions

### Imports

In [1]:
import os
if 'notebooks' in os.getcwd(): os.chdir('../..')  # change to main directory
print('Working directory:', os.getcwd() )

Working directory: /scratch/snx3000/bp000429/neurips_presentation/adrian_sensorium


In [2]:
import torch
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

from nnfabrik.builder import get_data, get_model, get_trainer

from sensorium.utility import submission
from sensorium.utility.training import read_config

### Load configuration for model

In [3]:
# Loading config only for ensemble 0, because all 5 models have the same config (except
# for the seed and dataloader train/validation split)

config_file = 'saved_models/only_ensemble_0/config.yaml'
# config_file = 'saved_models/config_m4_ens0/config.yaml'

config = read_config( config_file )
print(config)

ordereddict([('data_sets', ['all']), ('dataset_fn', 'sensorium.datasets.static_loaders'), ('dataset_config', ordereddict([('normalize', True), ('include_behavior', True), ('include_eye_position', True), ('batch_size', 128), ('scale', 0.25), ('preload_from_merged_data', True), ('include_trial_id', True), ('include_rank_id', False), ('include_history', False), ('include_behav_state', False), ('adjusted_normalization', False), ('use_ensemble_tier', True), ('ensemble_nr', 0)])), ('model_fn', 'sensorium.models.modulated_stacked_core_full_gauss_readout'), ('model_seed', 40), ('model_config', ordereddict([('pad_input', False), ('stack', -1), ('layers', 4), ('input_kern', 9), ('gamma_input', 6.3831), ('gamma_readout', 0.0076), ('hidden_kern', 7), ('hidden_channels', 64), ('depth_separable', True), ('grid_mean_predictor', ordereddict([('type', 'cortex'), ('input_dimensions', 2), ('hidden_layers', 1), ('hidden_features', 30), ('nonlinearity', 'ELU'), ('final_tanh', True)])), ('init_sigma', 0.1),

### Prepare dataloader

In [4]:
# Use only one dataloader, since test and final_test are the same for all ensembles
basepath = "notebooks/data/"
filenames = [os.path.join(basepath, file) for file in os.listdir(basepath) if ".zip" in file ]
filenames = [file for file in filenames if 'static26872-17-20' not in file]

dataset_fn = config['dataset_fn']  # 'sensorium.datasets.static_loaders'
dataset_config = {'paths': filenames,
                  **config['dataset_config'],
                 }

dataloaders = get_data(dataset_fn, dataset_config)

### Load trained models

In [5]:
# Instantiate all five models
model_list = list()

for i in range(5):
    # all models have the same parameters
    model_fn = config['model_fn']     # e.g. 'sensorium.models.modulated_stacked_core_full_gauss_readout'
    model_config = config['model_config']

    model = get_model(model_fn=model_fn,
                      model_config=model_config,
                      dataloaders=dataloaders,
                      seed=config['model_seed'],
                     )

    # Load trained weights from specific ensemble
    save_file = 'saved_models/only_ensemble_{}/saved_model_v1.pth'.format(i)
    # save_file = 'saved_models/config_m4_ens{}/saved_model_v1.pth'.format(i)
    
    model.load_state_dict(torch.load(save_file))
    model_list.append( model )

### Combine them into one ensemble model

In [6]:
from sensorium.models.ensemble import EnsemblePrediction

In [7]:
ensemble = EnsemblePrediction(model_list, mode='mean')

### Predict responses for all timepoints

In [8]:
model_name = 'only_ensemble_merged'
# model_name = 'config_m4_merged'

In [9]:
from sensorium.utility import prediction

# calculate predictions per dataloader
results = prediction.all_predictions_with_trial(ensemble, dataloaders)

# merge predictions, sort in time and add behavioral variables
merged = prediction.merge_predictions(results)
sorted_res = prediction.sort_predictions_by_time(merged)
prediction.inplace_add_behavior_to_sorted_predictions(sorted_res)

save_folder = os.path.join('saved_models', model_name)
if not os.path.exists(save_folder):
    os.mkdir(save_folder)

npy_file = os.path.join(save_folder, model_name + '.npy')
np.save( npy_file, sorted_res)

Iterating datasets: 100%|██████████| 6/6 [01:46<00:00, 17.75s/it]


In [10]:
# save correlation values as csv files separately for easier comparisons


# calculate correlations on splits
dataframe_entries = list()
trial_trans = { 0:'Train', 1:'Val', 2:'Test', 3:'Final Test'}
keys = list( sorted_res.keys() )

for key in keys:
    # calculate correlations
    ses_data = sorted_res[key]
    nr_neurons = ses_data['output'].shape[1]
    trial_type = ses_data['trial_type']

    for i in range(nr_neurons):
        true = ses_data['target'][:,i]
        pred = ses_data['output'][:,i]

        for split in range(3):
            cor = np.corrcoef( true[ trial_type==split ], pred[ trial_type==split ])[1,0]
            if np.isnan(cor):
                cor=0

            dataframe_entries.append(
                        dict(model=model_name, key=key, neuron=i,
                             split=trial_trans[split], cor=cor)
                        )

df = pd.DataFrame( dataframe_entries )

# save DataFrame as csv
path = os.path.join( 'saved_models', '00_csv_results', model_name+'.csv' )
df.to_csv(path)

### For competiton: Generate submission file

In [8]:
dataset_name = '27204-5-13'

submission.generate_submission_file(trained_model=ensemble, 
                                    dataloaders=dataloaders,
                                    data_key=dataset_name,
                                    path="notebooks/submission_m4/results/",
                                    device="cuda")

Submission file saved for tier: live_test. Saved in: notebooks/submission_m4/results/submission_file_live_test.csv
Submission file saved for tier: final_test. Saved in: notebooks/submission_m4/results/submission_file_final_test.csv
