# Notebook to generate templates for GloVe models


In [11]:
import os
import yaml
import numpy as np

from utils import check_folder, read_yaml, save_yaml, write

In [12]:
path_to_main = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/code/fMRI/main.py"

In [13]:
subject_dict = {'english': [57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
                    72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 91, 92, 93,
                    94, 95, 96, 97, 98, 99, 100, 101, 103, 104, 105, 106, 108, 109, 110, 113, 114, 115],
                'french':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
                          21, 22, 23, 24, 25, 26, 27, 29, 30
                         ]
               }

In [14]:
hrf_list = [
    'spm', # hrf model used in SPM
    'spm + derivative', # SPM model plus its time derivative (2 regressors)
    'spm + derivative + dispersion', # idem, plus dispersion derivative (3 regressors)
    'glover', # this one corresponds to the Glover hrf
    'glover + derivative', # the Glover hrf + time derivative (2 regressors)
    'glover + derivative + dispersion' # idem + dispersion derivative
]
hrf = 'spm'

In [15]:
language = 'english'
temporal_shifting = 0

In [36]:
template = {
  # Shared General parameters
  'subject': None,
  'parallel': False,
  'cuda': True,
  'seed': 1111,
  'language': None,
  'path_to_root': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/",
  'path_to_fmridata': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI",
  'output': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/"  ,
  'input': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/" ,
  'detrend': True, 
  'standardize': True, 
  'high_pass': None, 
  'low_pass': None, 
  'mask_strategy': 'background', 
  #'dtype': 'float32', 
  'memory_level': 0, 
  'smoothing_fwhm': None , 
  'verbose': 0, 


  # Shared Splitter parameters
  'nb_runs': 9,
  'nb_runs_test': 1,

  # Shared Compression parameters
  'manifold_method': None,
  'manifold_args': {'n_neighbors':4, 'random_state':1111, 'min_dist':0.0, 'metric':'cosine'},

  # Shared Transformation parameters (includes the making of regressor and scaling)
  'tr': 2.,
  'scaling_mean': True,
  'scaling_var': True,
  'scaling_axis': 0,
  'hrf': None,
  'offset_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/onsets-offsets/",
  'duration_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/",
  'temporal_shifting': 0,
  'oversampling': 10,
  'add_noise_to_constant': True,

  # Shared Estimator model parameters
  'base': 10.0,
  'voxel_wise': True,
  'alpha_percentile': 99.9,
  'alpha': None,
  'alpha_min_log_scale': 1,
  'alpha_max_log_scale': 5,
  'nb_alphas': 10,
  'optimizing_criteria': 'R2',
  'estimator_model': 'Ridge()',
  'save_all_weights': False, 

  # Maps creation parameters
  'atlas': 'cort-prob-2mm',
  'masker_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/ROI_masks/global_masker_english",
  'smoothed_masker_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/ROI_masks/smoothed_global_masker_english",


  # Models
  'models': None, 
  'model_name': None
}

In [37]:
def get_glove_template(
    model_name, 
    embedding_size,
    surname,
    data_compression, 
    ncomponents,
    offset_type,
    duration_type,
    shift_surprisal,
    centering,
    order,
    scaling_type,
    input_template='activations'):
    
    columns_to_retrieve = ['embedding-{}'.format(i) for i in range(1, embedding_size + 1)]
    result = { 
        'model_name': model_name,
        'columns_to_retrieve': str(columns_to_retrieve),
        'surname': surname,
        'data_compression': data_compression,
        'ncomponents': ncomponents,
        'offset_type': offset_type, # word / word+punctuation / ...,
        'duration_type': duration_type,
        'shift_surprisal': shift_surprisal,
        'centering': centering,
        'order': order,
        'scaling_type': scaling_type,
        'input_template': input_template # cls / sep / activations
      }
    return result
    

In [38]:
BF_model_names = ['rms_chris', "wordrate", "corrected_hale_logfreq", "corrected_hale_pos", "hale_bottomup"] #, "corrected_hale_topdown", "corrected_hale_bottomup"]*15 #semantic_mcrae
BF_data_compression = [None, None, None, None]*15
BF_ncomponents = [None, None, None, None]*15
BF_shift_surprisal = [False, False, False, False]*15
BF_offset_types = ["rms_chris", "word", "word", "word", "hale_word"] * len(BF_model_names) # ["word", "rms_0.01", "rms_chris"]
BF_command_lines = []
BF_path_to_sh = []
BF_columns = [
    ['amplitude'],
    ['wordrate'],
    ['logfreq'],
    ["pos_ADJ", "pos_ADP", "pos_ADV", 
    "pos_AUX", "pos_CCONJ", "pos_DET", 
    "pos_INTJ", "pos_NOUN", "pos_NUM", 
    "pos_PART", "pos_PRON", "pos_PROPN", 
    "pos_VERB"],
    ["bottom_up"]
]
def get_BF_template(
    model_name, 
    column_names,
    surname,
    data_compression, 
    ncomponents,
    offset_type,
    duration_type=None,
    shift_surprisal=False,
    centering=False,
    order=None,
    scaling_type=None,
    input_template='activations'
):
    
    columns_to_retrieve = column_names
    result = { 
        'model_name': model_name,
        'columns_to_retrieve': str(columns_to_retrieve),
        'surname': surname,
        'data_compression': data_compression,
        'ncomponents': ncomponents,
        'offset_type': offset_type, # word / word+punctuation / ...,
        'duration_type': duration_type,
        'shift_surprisal': shift_surprisal,
        'input_template': input_template, # activations
        'centering': centering,
        'order': order,
        'scaling_type': scaling_type,
      }
    return result

## Here starts the generation

In [44]:
templates_folder = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_glove/templates/"
sh_folder = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_glove/shell_commands/"
job_to_launch_path = "/Volumes/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_glove/jobs.txt"
check_folder(os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(templates_folder.split('/')[2:])))
check_folder(os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(sh_folder.split('/')[2:])))



In [45]:
def write_commands(command_lines, path_to_sh):
    for index, command in enumerate(command_lines):
        write(os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(path_to_sh[index].split('/')[2:])), command)
        queue = 'Nspin_long' # 'Nspin_bigM'
        walltime = '20:00:00'
        output_log = '/home/ap259944/logs/log_o_{}'.format(index)
        error_log = '/home/ap259944/logs/log_e_{}'.format(index)
        job_name = ''.join(os.path.basename(path_to_sh[index]).split('.')[:-1])
        write(job_to_launch_path, f"qsub -q {queue} -N {job_name} -l walltime={walltime} -o {output_log} -e {error_log} {path_to_sh[index]}")


In [46]:
model_names = [
"glove_embeddings_wiki_norm-2",
]
embedding_size = [300] 
data_compression = [None]
ncomponents = [None]
shift_surprisal = False
centering = [True] 
order = [None]
scaling_type = [None]
command_lines = []
path_to_sh = []
scaling_axis = 0

In [47]:
template['language'] = language
template['temporal_shifting'] = temporal_shifting
template['hrf'] = hrf
template['scaling_axis'] = scaling_axis

In [48]:
for index, model_name in enumerate(model_names):
    for subject in subject_dict[language]:
        template['subject'] = subject
        
        # hidden layers comparison
        model = get_glove_template(
                            model_name=model_name, 
                            embedding_size=embedding_size[index],
                            surname=model_name,
                            data_compression=data_compression[index], 
                            ncomponents=ncomponents[index],
                            offset_type="word+punctuation",
                            duration_type=None,
                            shift_surprisal=False,
                            centering=centering[index],
                            order=order[index],
                            scaling_type=scaling_type[index],
                            input_template='activations')

        template['models'] = [model]
        
        # Adding Basic Features
        BF_name = '-'.join(BF_model_names)
        for index_BF, BF_model_name in enumerate(BF_model_names):
            model = get_BF_template(BF_model_name, 
                                    BF_columns[index_BF], #verify that column name in actiation_run*.csv file is correct
                                    BF_model_name,
                                    BF_data_compression[index_BF], 
                                    BF_ncomponents[index_BF],
                                    BF_offset_types[index_BF],
                                    None,
                                    BF_shift_surprisal[index_BF],
                                    True,  #centering
                                    None, 
                                    None,
                                    input_template='activations')
            template['models'].append(model)

        template['model_name'] = model_name + BF_name + f'_{subject}'
        yaml_path = os.path.join(templates_folder, model_name+BF_name + '_{}.yml'.format(subject))

        save_yaml(template, os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(yaml_path.split('/')[2:])))
        command_lines.append("python {} --yaml_file {}".format(path_to_main, yaml_path))
        path_to_sh.append(os.path.join(sh_folder, model_name+BF_name + '_{}.sh'.format(subject)))


In [49]:
write_commands(command_lines, path_to_sh)

In [50]:
template

{'subject': 115,
 'parallel': False,
 'cuda': True,
 'seed': 1111,
 'language': 'english',
 'path_to_root': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/',
 'path_to_fmridata': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI',
 'output': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/',
 'input': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/',
 'detrend': True,
 'standardize': True,
 'high_pass': None,
 'low_pass': None,
 'mask_strategy': 'background',
 'memory_level': 0,
 'smoothing_fwhm': None,
 'verbose': 0,
 'nb_runs': 9,
 'nb_runs_test': 1,
 'manifold_method': None,
 'manifold_args': {'n_neighbors': 4,
  'random_state': 1111,
  'min_dist': 0.0,
  'metric': 'cosine'},
 'tr': 2.0,
 'scaling_mean': True,
 'scaling_var': True,
 'scaling_axis': 0,
 'hrf': 'spm',
 'offset_path': '/neurospin/unicog/protocols/IRMf/L