# Notebook to generate templates for Basic-Features models

In [1]:
import os
import yaml
import numpy as np

from utils import check_folder, read_yaml, save_yaml, write

In [2]:
path_to_main = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/code/fMRI/srm_main.py"

In [3]:
subject_dict = {'english': [57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
                    72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 91, 92, 93,
                    94, 95, 96, 97, 98, 99, 100, 101, 103, 104, 105, 106, 108, 109, 110, 113, 114, 115],
                'french':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
                          21, 22, 23, 24, 25, 26, 27, 29, 30
                         ]
               }

In [4]:
hrf_list = [
    'spm', # hrf model used in SPM
    'spm + derivative', # SPM model plus its time derivative (2 regressors)
    'spm + derivative + dispersion', # idem, plus dispersion derivative (3 regressors)
    'glover', # this one corresponds to the Glover hrf
    'glover + derivative', # the Glover hrf + time derivative (2 regressors)
    'glover + derivative + dispersion' # idem + dispersion derivative
]
hrf = 'spm'

In [5]:
language = 'english'
temporal_shifting = 0

In [6]:
template = {
  # Shared General parameters
  'subject': None,
  'parallel': False,
  'cuda': True,
  'seed': 1111,
  'language': None,
  'path_to_root': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/",
  'path_to_fmridata': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI",
  'output': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/"  ,
  'input': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/" ,
  'detrend': True, 
  'standardize': True, 
  'high_pass': None, 
  'low_pass': None, 
  'mask_strategy': 'background', 
  #'dtype': 'float32', 
  'memory_level': 0, 
  'smoothing_fwhm': None , 
  'verbose': 0, 


  # Shared Splitter parameters
  'nb_runs': 9,
  'nb_runs_test': 1,

  # Shared Compression parameters
  'manifold_method': None,
  'manifold_args': {'n_neighbors':4, 'random_state':1111, 'min_dist':0.0, 'metric':'cosine'},

  # Shared Transformation parameters (includes the making of regressor and scaling)
  'tr': 2.,
  'scaling_mean': True,
  'scaling_var': True,
  'scaling_axis': 0,
  'hrf': None,
  'offset_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/onsets-offsets/",
  'duration_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/",
  'temporal_shifting': 0,
  'oversampling': 10,
  'add_noise_to_constant': True,

  # Shared Estimator model parameters
  'base': 10.0,
  'voxel_wise': True,
  'alpha_percentile': 99.9,
  'alpha': None,
  'alpha_min_log_scale': 1,
  'alpha_max_log_scale': 5,
  'nb_alphas': 10,
  'optimizing_criteria': 'R2',
  'estimator_model': 'Ridge()',
  'save_all_weights': False, 

  # Maps creation parameters
  'atlas': 'cort-prob-2mm',
  'masker_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/ROI_masks/global_masker_english", #french_global_masker
  'smoothed_masker_path': "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/ROI_masks/smoothed_global_masker_english",


  # Models
  'models': None, 
  'model_name': None
}

In [7]:
def write_commands(command_lines, path_to_sh, job_to_launch_path, queue='Nspin_long'):
    for index, command in enumerate(command_lines):
        write(os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(path_to_sh[index].split('/')[2:])), command)
        walltime = '10:00:00'
        output_log = '/home/ap259944/logs/log_o_{}'.format(index)
        error_log = '/home/ap259944/logs/log_e_{}'.format(index)
        job_name = os.path.basename(path_to_sh[index]).split('.')[0]
        write(job_to_launch_path, f"qsub -q {queue} -N {job_name} -l walltime={walltime} -o {output_log} -e {error_log} {path_to_sh[index]}")


In [8]:
def get_BF_template(
    model_name, 
    column_names,
    surname,
    data_compression, 
    ncomponents,
    offset_type,
    duration_type=None,
    shift_surprisal=False,
    centering=False,
    order=None,
    scaling_type=None,
    input_template='activations'
):
    
    columns_to_retrieve = column_names
    result = { 
        'model_name': model_name,
        'columns_to_retrieve': str(columns_to_retrieve),
        'surname': surname,
        'data_compression': data_compression,
        'ncomponents': ncomponents,
        'offset_type': offset_type, # word / word+punctuation / ...,
        'duration_type': duration_type,
        'shift_surprisal': shift_surprisal,
        'input_template': input_template, # activations
        'centering': centering,
        'order': order,
        'scaling_type': scaling_type,
      }
    return result

## Here starts the generation

In [9]:
templates_folder = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_BF/templates/"
sh_folder = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_BF/shell_commands/"
job_to_launch_path = "/Volumes/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/tmp_BF/jobs.txt"
check_folder(os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(templates_folder.split('/')[2:])))
check_folder(os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(sh_folder.split('/')[2:])))


In [10]:
template['language'] = language
template['temporal_shifting'] = temporal_shifting

In [11]:
model_names = ["wordrate"] #, "corrected_hale_topdown", "corrected_hale_bottomup"]*15 #semantic_mcrae
# 'rms_chris', "corrected_hale_logfreq", "corrected_hale_pos", "hale_bottomup"
data_compression = [None, None, None, None]*15
ncomponents = [None, None, None, None]*15
shift_surprisal = [False, False, False, False]*15
offset_types = ["word"] * len(model_names) # ["word", "rms_0.01", "rms_chris"]
#"rms_chris", "word", "word", "word", "hale_word"
command_lines = []
path_to_sh = []
columns = [
    ['wordrate'],
    ['amplitude'],
    ['logfreq'],
#["pos_ADJ"],
#["pos_ADP"],
#["pos_ADV"],
#["pos_AUX"],
#["pos_CCONJ"],
#["pos_DET"],
#["pos_INTJ"],
#["pos_NOUN"],
#["pos_NUM"],
#["pos_PART"],
#["pos_PRON"],
#["pos_PROPN"],
#["pos_SCONJ"],
#["pos_VERB"],
    ["pos_ADJ", "pos_ADP", "pos_ADV", 
    "pos_AUX", "pos_CCONJ", "pos_DET", 
    "pos_INTJ", "pos_NOUN", "pos_NUM", 
    "pos_PART", "pos_PRON", "pos_PROPN", 
    "pos_VERB"],
    ["bottom_up"]
] 
extra_name = [
    '', '', '', '', ''
#    "_ADJ",
#    "_ADP",
#    "_ADV",
#    "_AUX",
#    "_CCONJ",
#    "_DET",
#    "_INTJ",
#    "_NOUN",
#    "_NUM",
#    "_PART",
#    "_PRON",
#    "_PROPN",
#    "_SCONJ",
#    "_VERB",
#    "_full"
]

#, "hale_pos"
#"pos_ADJ", "pos_ADP", "pos_ADV", "pos_AUX", "pos_CCONJ", "pos_DET", "pos_INTJ", "pos_NOUN", "pos_NUM", "pos_PART", 
#"pos_PRON", "pos_PROPN", "pos_SCONJ", "pos_VERB", "pos_X",


In [25]:
name = '-'.join(model_names)
extra = ''
for subject in subject_dict[language]:
    template['hrf'] = hrf
    template['subject'] = subject
    template['models'] = []
    for index, model_name in enumerate(model_names):
        # hidden layers comparison
        model = get_BF_template(model_name, 
                                columns[index], #verify that column name in actiation_run*.csv file is correct
                                model_name,
                                data_compression[index], 
                                ncomponents[index],
                                offset_types[index],
                                None,
                                shift_surprisal[index],
                                True,  #centering
                                None, 
                                None,
                                input_template='activations')

        template['models'].append(model)
    template['model_name'] =  'BF_{}{}_temporal-shifting-{}_{}'.format(name, extra, temporal_shifting, subject)
    yaml_path = os.path.join(templates_folder, 'BF_{}{}_temporal-shifting-{}_{}.yml'.format(name, extra, temporal_shifting, subject))

    save_yaml(template, os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(yaml_path.split('/')[2:])))
    command_lines.append("python {} --yaml_file {}".format(path_to_main, yaml_path))
    path_to_sh.append(os.path.join(sh_folder, 'BF_{}{}_temporal-shifting-{}_{}.sh'.format(name, extra, temporal_shifting, subject)))


In [12]:
for index, model_name in enumerate(model_names):
    for subject in subject_dict[language]:
        template['hrf'] = hrf
        template['subject'] = subject
        
        # hidden layers comparison
        model = get_BF_template(model_name, 
                                columns[index], #verify that column name in actiation_run*.csv file is correct
                                model_name,
                                data_compression[index], 
                                ncomponents[index],
                                offset_types[index],
                                None,
                                shift_surprisal[index],
                                True,  #centering
                                None, 
                                None,
                                input_template='activations')

        template['models'] = [model]
        template['model_name'] =  'BF_{}{}_temporal-shifting-{}_{}'.format(model_name, extra_name[index], temporal_shifting, subject)
        yaml_path = os.path.join(templates_folder, 'BF_{}{}_temporal-shifting-{}_{}.yml'.format(model_name, extra_name[index], temporal_shifting, subject))

        save_yaml(template, os.path.join('/', job_to_launch_path.split('/')[1], '/'.join(yaml_path.split('/')[2:])))
        command_lines.append("python {} --yaml_file {}".format(path_to_main, yaml_path))
        path_to_sh.append(os.path.join(sh_folder, 'BF_{}{}_temporal-shifting-{}_{}.sh'.format(model_name, extra_name[index], temporal_shifting, subject)))


In [13]:
write_commands(command_lines, path_to_sh, job_to_launch_path)

In [14]:
template

{'subject': 115,
 'parallel': False,
 'cuda': True,
 'seed': 1111,
 'language': 'english',
 'path_to_root': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/',
 'path_to_fmridata': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI',
 'output': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/',
 'input': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/',
 'detrend': True,
 'standardize': True,
 'high_pass': None,
 'low_pass': None,
 'mask_strategy': 'background',
 'memory_level': 0,
 'smoothing_fwhm': None,
 'verbose': 0,
 'nb_runs': 9,
 'nb_runs_test': 1,
 'manifold_method': None,
 'manifold_args': {'n_neighbors': 4,
  'random_state': 1111,
  'min_dist': 0.0,
  'metric': 'cosine'},
 'tr': 2.0,
 'scaling_mean': True,
 'scaling_var': True,
 'scaling_axis': 0,
 'hrf': 'spm',
 'offset_path': '/neurospin/unicog/protocols/IRMf/L