# Notebook to generate templates for Transformer-based models

In [6]:
import os
import yaml
import numpy as np

from utils import check_folder, read_yaml, save_yaml, write

In [7]:
path_to_main = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/code/fMRI/main.py"

In [8]:
subject_dict = {'english': [57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
                    72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 91, 92, 93,
                    94, 95, 96, 97, 98, 99, 100, 101, 103, 104, 105, 106, 108, 109, 110, 113, 114, 115],
                'french':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
                          21, 22, 23, 24, 25, 26, 27, 29, 30
                         ]
               }

In [9]:
hrf_list = [
    'spm', # hrf model used in SPM
    'spm + derivative', # SPM model plus its time derivative (2 regressors)
    'spm + derivative + dispersion', # idem, plus dispersion derivative (3 regressors)
    'glover', # this one corresponds to the Glover hrf
    'glover + derivative', # the Glover hrf + time derivative (2 regressors)
    'glover + derivative + dispersion' # idem + dispersion derivative
]
hrf = 'spm'

In [10]:
language = 'english'
temporal_shifting = 0

In [12]:
template = {
  # Shared General parameters
  'subject': None,
  'parallel': False,
  'cuda': True,
  'seed': 1111,
  'language': None,
  'path_to_root': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/",
  'path_to_fmridata': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI",
  'output': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/"  ,
  'input': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/" ,
  'detrend': True, 
  'standardize': True, 
  'high_pass': None, 
  'low_pass': None, 
  'mask_strategy': 'background', 
  #'dtype': 'float32', 
  'memory_level': 0, 
  'smoothing_fwhm': None , 
  'verbose': 0, 


  # Shared Splitter parameters
  'nb_runs': 9,
  'nb_runs_test': 1,

  # Shared Compression parameters
  'manifold_method': None,
  'manifold_args': {'n_neighbors':4, 'random_state':1111, 'min_dist':0.0, 'metric':'cosine'},

  # Shared Transformation parameters (includes the making of regressor and scaling)
  'tr': 2.,
  'scaling_mean': True,
  'scaling_var': True,
  'scaling_axis': None,
  'hrf': None,
  'offset_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/onsets-offsets/",
  'duration_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/",
  'temporal_shifting': 0,

  # Shared Estimator model parameters
  'base': 10.0,
  'voxel_wise': True,
  'alpha_percentile': 99.9,
  'alpha': None,
  'alpha_min_log_scale': 2,
  'alpha_max_log_scale': 5,
  'nb_alphas': 10,
  'optimizing_criteria': 'R2',
  'estimator_model': 'Ridge()',

  # Maps creation parameters
  'atlas': 'cort-prob-2mm',
  'masker_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/global_masker_english",
  'smoothed_masker_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/smoothed_global_masker_english",


  # Models
  'models': None, 
  'model_name': None
}

In [13]:
def write_commands(command_lines, path_to_sh, queue='Nspin_long'):
    for index, command in enumerate(command_lines):
        write(path_to_sh[index], command)
        queue = queue # 'Nspin_bigM'
        walltime = '99:00:00'
        output_log = '/home/ap259944/logs/log_o_{}'.format(index)
        error_log = '/home/ap259944/logs/log_e_{}'.format(index)
        job_name = os.path.basename(path_to_sh[index]).split('.')[0]
        write(job_to_launch_path, f"qsub -q {queue} -N {job_name} -l walltime={walltime} -o {output_log} -e {error_log} {path_to_sh[index]}")


In [14]:
def get_model_template(
    model_name, 
    layers, 
    hidden_states, 
    attention_heads, 
    heads,
    surname,
    data_compression, 
    ncomponents,
    offset_type='word+punctuation',
    duration_type=None,
    centering=False,
    shift_surprisal=False,
    scaling_type=None,
    order=None,
    input_template='activations'):
    
    columns_to_retrieve = []
    if hidden_states:
        columns_to_retrieve = ['hidden_state-layer-{}-{}'.format(layer, i) for layer in layers for i in range(1, 769)]
    if attention_heads:
        columns_to_retrieve += ['attention-layer-{}-head-{}-{}'.format(layer, head, i) for layer in layers for head in heads for i in range(1, 65)]
    result = { 
        'model_name': model_name,
        'columns_to_retrieve': str(columns_to_retrieve),
        'surname': surname,
        'data_compression': data_compression,
        'ncomponents': ncomponents,
        'offset_type': offset_type, # word / word+punctuation / ...,
        'duration_type': duration_type,
        'shift_surprisal': shift_surprisal,
        'centering': centering,
        'order': order,
        'scaling_type': scaling_type,
        'input_template': input_template # cls / sep / activations
      }
    return result
    

In [15]:
def fill_template_folder(
    model_names, 
    language, 
    template, 
    hidden_layer_list,
    attention_layer_list,
    attention_layer_head_list,
    centering,
    order,
    scaling_type,
    input_template,
    data_compression,
    ncomponents,
    temporal_shifting,
    path_to_main=path_to_main):
    command_lines = []
    path_to_sh = []
    for index, model_name in enumerate(model_names):
        for subject in subject_dict[language]:
            template['subject'] = subject

            # hidden layers comparison
            for hidden_layers in hidden_layer_list:
                model = get_model_template(model_name=model_name, 
                                           layers=hidden_layers, 
                                           hidden_states=True,
                                           attention_heads=False, 
                                           heads=None, 
                                           surname="{}_hidden-layer-{}".format(model_name, hidden_layers),
                                           data_compression=data_compression[index], 
                                           ncomponents=ncomponents[index],
                                           offset_type="word+punctuation", 
                                           duration_type=None, 
                                           centering=centering[index],
                                           order=order[index],
                                           shift_surprisal=False,
                                           scaling_type=scaling_type[index],
                                           input_template=input_template
                                          )
                template['models'] = [model]
                additional = '_{}_{}'.format(data_compression[index], ncomponents[index]) if data_compression[index] is not None else ''
                #template['model_name'] = '{}_norm-{}_temporal-shifting-{}_{}_hidden-layer-{}'.format(model_name, order[index], temporal_shifting, subject, hidden_layers[0]).replace('np.', '')
                #yaml_path = os.path.join(templates_folder, '{}_norm-{}_temporal-shifting-{}_{}_hidden-layer-{}.yml'.format(model_name,  order[index], temporal_shifting, subject, hidden_layers[0])).replace('np.', '')
                template['model_name'] = '{}_norm-{}_temporal-shifting-{}_{}_hidden-all-layers{}'.format(model_name, order[index], temporal_shifting, subject, additional).replace('np.', '')
                yaml_path = os.path.join(templates_folder, '{}_norm-{}_temporal-shifting-{}_{}_hidden-all-layers{}.yml'.format(model_name, order[index], temporal_shifting, subject, additional)).replace('np.', '')

                save_yaml(template, yaml_path)
                command_lines.append("python {} --yaml_file {}".format(path_to_main, yaml_path))
                #path_to_sh.append(os.path.join(sh_folder, '{}_norm-{}_temporal-shifting-{}_{}_hidden-layer-{}.sh'.format(model_name, order[index], temporal_shifting, subject, hidden_layers[0]).replace('np.', '')))
                path_to_sh.append(os.path.join(sh_folder, '{}_norm-{}_temporal-shifting-{}_{}_hidden-all-layers{}.sh'.format(model_name, order[index], temporal_shifting, subject, additional).replace('np.', '')))

            # attention layers comparison
            for attention_layers in attention_layer_list:
                model = get_model_template(model_name=model_name, 
                                           layers=attention_layers, 
                                           hidden_states=False,
                                           attention_heads=True, 
                                           heads=heads, 
                                           surname="{}_norm-{}_attention-layer-{}".format(model_name, order[index], attention_layers),
                                           data_compression=data_compression[index], 
                                           ncomponents=ncomponents[index],
                                           offset_type="word+punctuation", 
                                           duration_type=None, 
                                           centering=centering[index],
                                           order=order[index],
                                           shift_surprisal=False,
                                           scaling_type=scaling_type[index],
                                           input_template=input_template
                                          )
                template['models'] = [model]
                #additional = '_{}_{}'.format(data_compression[index], ncomponents[index]) if data_compression[index] is not None else ''
                template['model_name'] = '{}_norm-{}_{}_attention_layer-{}'.format(model_name, order[index], subject, attention_layers[0])
                yaml_path = os.path.join(templates_folder, '{}_norm-{}_{}_attention-layer-{}.yml'.format(model_name, order[index], subject, attention_layers[0]))
                save_yaml(template, yaml_path)
                command_lines.append("python {} --yaml_file {}".format(path_to_main, yaml_path))
                path_to_sh.append(os.path.join(sh_folder, '{}_norm-{}_{}_attention-layer-{}.sh'.format(model_name, order[index], subject, attention_layers[0])))

            # specific attention heads comparison    
            for (layer, head) in attention_layer_head_list:
                model = get_model_template(model_name=model_name, 
                                           layers=[layer], 
                                           hidden_states=False,
                                           attention_heads=True, 
                                           heads=[head], 
                                           surname="{}_norm-{}_attention-layer-{}-head-{}".format(model_name, order[index], layer, head),
                                           data_compression=None, 
                                           ncomponents=None,
                                           offset_type="word+punctuation", 
                                           duration_type=None, 
                                           centering=centering[index],
                                           order=order[index],
                                           shift_surprisal=False,
                                           scaling_type=scaling_type[index],
                                           input_template=input_template
                                          )
                template['models'] = [model]
                template['model_name'] = '{}_norm-{}_temporal-shifting-{}_{}_attention-layer-{}_head-{}'.format(model_name, order[index], temporal_shifting, subject, layer, head)
                yaml_path = os.path.join(templates_folder, '{}_norm-{}_temporal-shifting-{}_{}_attention-layer-{}_head-{}.yml'.format(model_name, order[index], temporal_shifting, subject, layer, head))
                save_yaml(template, yaml_path)
                command_lines.append("python {} --yaml_file {}".format(path_to_main, yaml_path))
                path_to_sh.append(os.path.join(sh_folder, '{}_norm-{}_temporal-shifting-{}_{}_attention-layer-{}_head-{}.sh'.format(model_name, order[index], temporal_shifting, subject, layer, head)))
    return path_to_sh, command_lines


## Here we start the generation

In [16]:
templates_folder = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_transformers_0/templates/"
sh_folder = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_transformers_0/shell_commands/"
job_to_launch_path = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_transformers_0/jobs.txt"
check_folder(templates_folder)
check_folder(sh_folder)


### Layer-wise

In [124]:
model_names = [
    'bert-base-cased_pre-7_1_post-0_norm-None',
    'gpt2_pre-20_1_norm-inf',
]

In [125]:
#model_names = ['bert-base-cased', 'gpt2_scaled', 'roberta-base']
hidden_layer_list = [[i] for i in range(13)]
attention_layer_list = [] #[[i] for i in range(1, 13)]
heads = np.arange(1, 13)
attention_layer_head_list = [[7, 6], [4, 10], [8, 1], [8,2], [6,7], [8, 10], [8, 11], [9, 6]]
command_lines = []
data_compression = [None, None]
ncomponents = [None, None]
order = ['np.inf'] * 2
centering = ['True'] * 2 
scaling_type = ['normalize'] * 2
input_template = 'activations'
scaling_axis = 1
temporal_shifting = 0

In [126]:
template['scaling_axis'] = scaling_axis
template['language'] = language
template['temporal_shifting'] = temporal_shifting
template['hrf'] = hrf

In [127]:
path_to_sh, command_lines = fill_template_folder(
                    model_names, 
                    language, 
                    template, 
                    hidden_layer_list,
                    attention_layer_list,
                    attention_layer_head_list,
                    centering,
                    order,
                    scaling_type,
                    input_template,
                    data_compression,
                    ncomponents,
                    temporal_shifting,
                    path_to_main=path_to_main
                )

In [128]:
write_commands(command_lines, path_to_sh)

In [129]:
template

{'subject': 115,
 'parallel': False,
 'cuda': True,
 'seed': 1111,
 'language': 'english',
 'path_to_root': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/',
 'path_to_fmridata': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI',
 'output': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/',
 'input': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/',
 'detrend': True,
 'standardize': True,
 'high_pass': None,
 'low_pass': None,
 'mask_strategy': 'background',
 'memory_level': 0,
 'smoothing_fwhm': None,
 'verbose': 0,
 'nb_runs': 9,
 'nb_runs_test': 1,
 'tr': 2.0,
 'scaling_mean': True,
 'scaling_var': True,
 'scaling_axis': 1,
 'hrf': 'spm',
 'offset_path': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/onsets-offsets/',
 'duration_path': '/neurospin/unicog/protocols/IRMf/LePetitPrince_

### Full Model

In [17]:
model_names = [
    'bert-base-cased_pre-7_1_post-0_norm-None',
    #'gpt2_pre-20_1_norm-inf',
    #'bert-base-cased_pre-7_1_post-0_norm-None',
    #'gpt2_pre-20_1_norm-inf',
] *30

In [18]:
len(model_names)

30

In [20]:
hidden_layer_list = [[i for i in range(13)]]
attention_layer_list = [] # [[i for i in range(1, 13)]]
attention_layer_head_list =  [] # np.arange(1, 13)
command_lines = []
data_compression = ['pca'] * 14 + ['umap'] * 16
ncomponents = [10, 25, 50, 100, 150, 500, 1000] * 2 + [10, 25, 50, 100, 150, 300, 500, 1000] * 2
#order = ['std'] * 8 + ['None'] * 8 + ['np.inf'] * 8 + ['5'] * 8 + ['3'] * 8 + ['2'] * 8 # to replace with only None
order = ['np.inf'] * 7 + ['2'] * 7 + ['np.inf'] * 8 + ['2'] * 8   # to replace with only None
centering = ['True'] * 30
#scaling_type = ['standardize'] * 8 + ['normalize'] * 40
scaling_type = ['normalize'] * 30
input_template = 'activations'
scaling_axis = 1
temporal_shifting = 0

In [26]:
#for index, model_name in enumerate(model_names):
#    additional = '_{}_{}'.format(data_compression[index], ncomponents[index]) if data_compression[index] is not None else ''
#    print('{}_norm-{}_{}_hidden-all-layers{}'.format(model_name, order[index], '{}',additional).replace('np.', ''))

In [22]:
template['scaling_axis'] = scaling_axis
template['language'] = language
template['temporal_shifting'] = temporal_shifting
template['hrf'] = hrf

In [24]:
path_to_sh, command_lines = fill_template_folder(
                    model_names, 
                    language, 
                    template, 
                    hidden_layer_list,
                    attention_layer_list,
                    attention_layer_head_list,
                    centering,
                    order,
                    scaling_type,
                    input_template,
                    data_compression,
                    ncomponents,
                    temporal_shifting, 
                    path_to_main=path_to_main
                )

In [25]:
write_commands(command_lines, path_to_sh)

In [23]:
template

{'subject': None,
 'parallel': False,
 'cuda': True,
 'seed': 1111,
 'language': 'english',
 'path_to_root': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/',
 'path_to_fmridata': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI',
 'output': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/',
 'input': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/',
 'detrend': True,
 'standardize': True,
 'high_pass': None,
 'low_pass': None,
 'mask_strategy': 'background',
 'memory_level': 0,
 'smoothing_fwhm': None,
 'verbose': 0,
 'nb_runs': 9,
 'nb_runs_test': 1,
 'manifold_method': None,
 'manifold_args': {'n_neighbors': 4,
  'random_state': 1111,
  'min_dist': 0.0,
  'metric': 'cosine'},
 'tr': 2.0,
 'scaling_mean': True,
 'scaling_var': True,
 'scaling_axis': 1,
 'hrf': 'spm',
 'offset_path': '/neurospin/unicog/protocols/IRMf/