# Notebook to generate templates for LSTM models

In [1]:
import os
import yaml
import numpy as np
import itertools

from utils import check_folder, read_yaml, save_yaml, write

In [2]:
path_to_main = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/code/fMRI/main.py"

In [3]:
subject_dict = {'english': [57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
                    72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 91, 92, 93,
                    94, 95, 96, 97, 98, 99, 100, 101, 103, 104, 105, 106, 108, 109, 110, 113, 114, 115],
                'french':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
                          21, 22, 23, 24, 25, 26, 27, 29, 30
                         ]
               }

In [4]:
hrf_list = [
    'spm', # hrf model used in SPM
    'spm + derivative', # SPM model plus its time derivative (2 regressors)
    'spm + derivative + dispersion', # idem, plus dispersion derivative (3 regressors)
    'glover', # this one corresponds to the Glover hrf
    'glover + derivative', # the Glover hrf + time derivative (2 regressors)
    'glover + derivative + dispersion' # idem + dispersion derivative
]
hrf = 'spm'

In [5]:
language = 'english'
temporal_shifting = 0

In [6]:
template = {
  # Shared General parameters
  'subject': None,
  'parallel': False,
  'cuda': True,
  'seed': 1111,
  'language': None,
  'path_to_root': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/",
  'path_to_fmridata': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI",
  'output': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/"  ,
  'input': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/" ,
  'detrend': True, 
  'standardize': True, 
  'high_pass': None, 
  'low_pass': None, 
  'mask_strategy': 'background', 
  #'dtype': 'float32', 
  'memory_level': 0, 
  'smoothing_fwhm': None , 
  'verbose': 0, 


  # Shared Splitter parameters
  'nb_runs': 9,
  'nb_runs_test': 1,

  # Shared Compression parameters
  'manifold_method': None,
  'manifold_args': {'n_neighbors':4, 'random_state':1111, 'min_dist':0.0, 'metric':'cosine'},

  # Shared Transformation parameters (includes the making of regressor and scaling)
  'tr': 2.,
  'scaling_mean': True,
  'scaling_var': True,
  'scaling_axis': 0,
  'hrf': None,
  'offset_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/onsets-offsets/",
  'duration_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/",
  'temporal_shifting': 0,
  'oversampling': 10,
  'add_noise_to_constant': True,

  # Shared Estimator model parameters
  'base': 10.0,
  'voxel_wise': True,
  'alpha_percentile': 99.9,
  'alpha': None,
  'alpha_min_log_scale': 1,
  'alpha_max_log_scale': 5,
  'nb_alphas': 10,
  'optimizing_criteria': 'R2',
  'estimator_model': 'Ridge()',
  'save_all_weights': False, 

  # Maps creation parameters
  'atlas': 'cort-prob-2mm',
  'masker_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/ROI_masks/global_masker_english",
  'smoothed_masker_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/ROI_masks/smoothed_global_masker_english",


  # Models
  'models': None, 
  'model_name': None
}

In [8]:
def get_lstm_template(
    model_name, 
    layers, 
    ninp,
    nhid,
    parameters,
    surname,
    data_compression, 
    ncomponents,
    offset_type,
    duration_type,
    shift_surprisal,
    includ_surprisal,
    includ_entropy,
    input_template='activations',
    centering=False,
    order=None,
    scaling_type=None
):
    
    columns_to_retrieve = []
    for param in parameters:
        columns_to_retrieve = ['{}-layer-{}-{}'.format(param, layer, i) for layer in layers for i in range(1, nhid + 1)]
    if includ_surprisal:
        columns_to_retrieve += ['surprisal']
    if includ_entropy:
        columns_to_retrieve += ['entropy']
    result = { 
        'model_name': model_name,
        'columns_to_retrieve': str(columns_to_retrieve),
        'surname': surname,
        'data_compression': data_compression,
        'ncomponents': ncomponents,
        'offset_type': offset_type, # word / word+punctuation / ...,
        'duration_type': duration_type,
        'shift_surprisal': shift_surprisal,
        'input_template': input_template, # activations
        'centering': centering,
        'order': order,
        'scaling_type': scaling_type,
      }

    
        
        
    return result
    

## Here starts the generation

In [30]:
templates_folder = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_lstm/templates/"
sh_folder = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_lstm/shell_commands/"
job_to_launch_path = "/Volumes/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_lstm/jobs.txt"

check_folder(os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(templates_folder.split('/')[2:])))
check_folder(os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(sh_folder.split('/')[2:])))


In [31]:
def write_commands(command_lines, path_to_sh, job_to_launch_path, queue='Nspin_long'):
    for index, command in enumerate(command_lines):
        write(os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(path_to_sh[index].split('/')[2:])), command)
        walltime = '99:00:00'
        job_name = os.path.basename(path_to_sh[index]).split('.')[0]
        write(job_to_launch_path, f"qsub -q {queue} -N {job_name} -l ncpus=2 -l walltime={walltime} {path_to_sh[index]}")


In [32]:
model_names = [
#"LSTM_embedding-size_768_nhid_768_nlayers_1_dropout_02_gutenberg_big_3_norm-2",
"LSTM_embedding-size_768_nhid_768_nlayers_1_dropout_02_gutenberg_big_2_norm-2",
#"LSTM_embedding-size_768_nhid_768_nlayers_1_dropout_02_gutenberg_big_1_norm-2",

]
hidden_layer_list = [[1]] * len(model_names)
parameters_list = [['hidden']] #['hidden'], ['in'], ['forget'], ['out'], ['c_tilde'], ['cell']
data_compression = ['pca'] * len(model_names)
ncomponents = [300] * len(model_names)
shift_surprisal = False
includ_surprisal = False
includ_entropy = False
params = [{'ninp': 768, 'nhid': 768, 'nlayers': 1}] * len(model_names)
order = [None] * len(model_names)
centering = ['True'] * len(model_names)
scaling_type = [None] * len(model_names)
input_template = 'activations'
scaling_axis = 0
temporal_shifting = 0

In [33]:
template['language'] = language
template['scaling_axis'] = scaling_axis
template['hrf'] = hrf
template['temporal_shifting'] = temporal_shifting

In [34]:
command_lines = []
path_to_sh = []

In [35]:
for index, model_name in enumerate(model_names):
    param = params[index]
    for subject in subject_dict[language]:
        template['hrf'] = hrf
        template['subject'] = subject
        
        for parameters in parameters_list:
        
            # hidden layers comparison
            model = get_lstm_template(
                model_name, 
                hidden_layer_list[index], 
                param['ninp'],
                param['nhid'],
                parameters,
                "{}_all-{}-layers".format(model_name, parameters[0]),
                data_compression[index], 
                ncomponents[index],
                "word+punctuation",
                None,
                shift_surprisal,
                includ_surprisal,
                includ_entropy,
                input_template='activations',
                centering=centering[index],
                order=order[index],
                scaling_type=scaling_type[index]
            )

            additional = '_{}_{}'.format(data_compression[index], ncomponents[index]) if data_compression[index] is not None else ''
            template['models'] = [model]
            template['model_name'] = '{}_{}_all-{}-layers{}'.format(model_name, subject, parameters[0], additional)
            yaml_path = os.path.join(templates_folder, '{}_{}_all-{}-layers{}.yml'.format(model_name, subject, parameters[0], additional))
            
            save_yaml(template, os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(yaml_path.split('/')[2:])))
            #save_yaml(template, yaml_path)
            command_lines.append("python {} --yaml_file {}".format(path_to_main, yaml_path))
            path_to_sh.append(os.path.join(sh_folder, '{}_{}_all-{}-layers{}.sh'.format(model_name, subject, parameters[0], additional)))


In [36]:
write_commands(command_lines, path_to_sh, job_to_launch_path)

In [37]:
template

{'subject': 115,
 'parallel': False,
 'cuda': True,
 'seed': 1111,
 'language': 'english',
 'path_to_root': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/',
 'path_to_fmridata': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI',
 'output': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/',
 'input': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/',
 'detrend': True,
 'standardize': True,
 'high_pass': None,
 'low_pass': None,
 'mask_strategy': 'background',
 'memory_level': 0,
 'smoothing_fwhm': None,
 'verbose': 0,
 'nb_runs': 9,
 'nb_runs_test': 1,
 'manifold_method': None,
 'manifold_args': {'n_neighbors': 4,
  'random_state': 1111,
  'min_dist': 0.0,
  'metric': 'cosine'},
 'tr': 2.0,
 'scaling_mean': True,
 'scaling_var': True,
 'scaling_axis': 0,
 'hrf': 'spm',
 'offset_path': '/neurospin/unicog/protocols/IRMf/L