# Notebook generating plots for NeurIPS paper

### Imports

In [None]:
import os
import glob
from tqdm import tqdm
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
import nistats
import scipy
import nibabel as nib
import nilearn
from nilearn.image import load_img, mean_img, index_img, threshold_img, math_img, smooth_img, new_img_like
from nilearn.input_data import NiftiMapsMasker, NiftiMasker, NiftiLabelsMasker, MultiNiftiMasker
from nilearn.regions import RegionExtractor
from nistats.second_level_model import SecondLevelModel
from nistats.thresholding import map_threshold
from nilearn import plotting
from nilearn import datasets
from scipy.stats import norm
from nilearn.surface import vol_to_surf

In [None]:
from logger import Logger
from utils import read_yaml, check_folder, fetch_masker, possible_subjects_id, get_subject_name
import reporting
from linguistics_info import load_surnames, load_syntactic_roi, load_language_roi

### Defining variables

In [None]:
language = 'english'

In [None]:
PROJECT_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/"
OUTPUT_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/{language}"
INPUT_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/{language}"
FMRIDATA_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI/{language}"
MASKER_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/global_masker_{language}"
SMOOTHED_MASKER_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/smoothed_global_masker_{language}"

Fetch a masker fitted on all subjects data and the following characterisctics:
- detrend: true
- dtype: null
- high_pass: null
- low_pass: null
- mask_strategy: background
- memory_level: 0
- n_jobs: 1
- smoothing_fwhm: null
- standardize: true
- t_r: null
- verbose: 0

In [None]:
masker = reporting.fetch_masker(MASKER_PATH, language, FMRIDATA_PATH, INPUT_PATH, smoothing_fwhm=None, logger=logger)

In [None]:
atlas_maps, labels = reporting.load_atlas() # load harvard-oxford atlas named'cort-prob-2mm'

## Group level analysis

In [None]:
model_names = [
    'glove_300_{}',
    'LSTM_embedding-size_600_nhid_300_nlayers_1_dropout_02_wiki_kristina_english_{}_all-hidden-layers',
    'gpt2_scaled_{}_all-hidden-layers_pca_300',
    'bert-base-cased_{}_all-hidden-layers_pca_300',
    'BF_rms_{}',
    'BF_log_frequency_{}',
    'BF_wordrate_{}',
    'BF_content_words_{}',
    'BF_function_words_{}',
    'BF_word_position_{}'
]

Loading data:

In [None]:
data_full = reporting.get_model_data(model_names, language, OUTPUT_PATH)

Checking if we have all the data:

In [None]:
reporting.check_data(data_full, 51)

Computing group level maps:

In [None]:
reporting.compute_t_test_for_model_comparison(
                                        data_full, 
                                        smoothing_fwhm=6, 
                                        language='english',
                                        vmax=None,
                                        PROJECT_PATH='/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/'
                                        )

## Model Comparison (GloVe - LSTM - GPT2 - BERT)

In [None]:
model_names = [
    'glove_300_{}',
    'LSTM_embedding-size_600_nhid_300_nlayers_1_dropout_02_wiki_kristina_english_{}_all-hidden-layers',
    'gpt2_scaled_{}_all-hidden-layers_pca_300',
    'bert-base-cased_{}_all-hidden-layers_pca_300'
]

legend_names = ['GloVe', 
                'LSTM-E600-H300-L1', 
                'GPT2_scaled(Hpca)', 
                'BERT(Hpca)'
               ]

In [None]:
data_model_comparison = data_full[model_names]

Preparing data for plot per ROI:

In [None]:
data_prep_vertical = reporting.get_data_per_roi(
                                data_model_comparison, 
                                atlas_maps,
                                labels,
                                analysis=None, 
                                language='english', 
                                PROJECT_PATH='/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/'
                                )

In [None]:
mean = data_prep_vertical['mean']
third_quartile = data_prep_vertical['third_quartile']

### Vertical plots per ROI

In [None]:
reporting.vertical_plot(
                mean, 
                x_labels, 
                'Mean_R2_per_ROI',
                save_folder=None, 
                'R2', 
                reporting.load_surnames(),
                legend_names, 
                reporting.load_syntactic_roi(), 
                reporting.load_language_roi(), 
                figsize=(9,12), 
                count=False, 
                title=None, 
                ylabel='Regions of interest (ROI)', 
                xlabel='R2 value', 
                model_name='Model_comparison'
                )

In [None]:
reporting.vertical_plot(
                third_quartile, 
                x_labels, 
                'Third_Quartile_R2_per_ROI',
                save_folder=None, 
                'R2', 
                reporting.load_surnames(),
                legend_names, 
                reporting.load_syntactic_roi(), 
                reporting.load_language_roi(), 
                figsize=(9,12), 
                count=False, 
                title=None, 
                ylabel='Regions of interest (ROI)', 
                xlabel='R2 value', 
                model_name='Model_comparison'
                )

### Surface plots

#### Surface plots showing which model predicts better (voxel-wise)

In [None]:
model_names = [
    'glove_300',
    'LSTM_embedding-size_600_nhid_300_nlayers_1_dropout_02_wiki_kristina_english_all-hidden-layers',
    'gpt2_scaled_all-hidden-layers_pca_300',
    'bert-base-cased_all-hidden-layers_pca_300'
]
data_prep_surf = reporting.get_group_level_analysis_data(
                                            masker, 
                                            model_names, 
                                            language='english', 
                                            PROJECT_PATH='/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/'
                                            )

In [None]:
kwargs = {
    'surf_mesh': 'pial_left', # pial_right, infl_left, infl_right
    'surf_mesh_type': 'pial_left',
    'hemi':'left', # right
    'view':'lateral', # medial
    'bg_map': 'sulc_left', # sulc_right
    'bg_on_data':True,
    'darkness':.5
}

In [None]:
img = reporting.get_voxel_wise_max_img(
                            masker, 
                            model_names, 
                            language='english', 
                            PROJECT_PATH='/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/'
                            )

In [None]:
saving_path = 

In [None]:
reporting.plot_roi_img_surf(img, saving_path, 'model_comparison_surf', inflated=False, **kwargs)

In [None]:
reporting.interactive_surf_plot(img, inflated=False, **kwargs)

#### Surface plots of group-level difference analysis maps

In [None]:
comparisons = {
    'LSTM_300-H_vs_Glove' : ['LSTM_embedding-size_600_nhid_300_nlayers_1_dropout_02_wiki_kristina_english_all-hidden-layers', 'glove_300'],
    'BERT-Hpca_vs_GPT2-scaled-Hpca' : ['bert-base-cased_all-hidden-layers_pca_300', 'gpt2_scaled_all-hidden-layers_pca_300'],
    'BERT-Hpca_vs_LSTM_300-H' : ['bert-base-cased_all-hidden-layers_pca_300', 'LSTM_embedding-size_600_nhid_300_nlayers_1_dropout_02_wiki_kristina_english_all-hidden-layers'],
    'GPT2-scaled-Hpca_vs_LSTM_300-H' : ['gpt2_scaled_all-hidden-layers_pca_300', 'LSTM_embedding-size_600_nhid_300_nlayers_1_dropout_02_wiki_kristina_english_all-hidden-layers'],
    'BERT-Hpca_vs_Glove' : ['bert-base-cased_all-hidden-layers_pca_300', 'glove_300'],
    'GPT2-scaled-Hpca_vs_Glove' : ['gpt2_scaled_all-hidden-layers_pca_300', 'glove_300']   
}

In [None]:
for comparison in comparisons:
    imgs_1 = data_full[comparisons[comparison][0]]['R2']
    imgs_2 = data_full[comparisons[comparison][1]]['R2']
    
    names = comparison.split('_vs_')

    reporting.compute_model_contrasts_t_test(
                                    imgs_1,
                                    imgs_2,
                                    names[0], 
                                    names[1], 
                                    analysis_name='',
                                    observed_data='R2',
                                    language='english',
                                    smoothing_fwhm=6,
                                    PROJECT_PATH='/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/'
                                    )

In [None]:
paths = {}
for comparison in comparisons:
    names = comparison.split('_vs_')
    name = '{}-vs-{}_{}'.format(names[0], names[1], '')
    path = os.path.join(PROJECT_PATH, 'derivatives/fMRI/analysis/{}/{}'.format(language, name))
    paths[comparison] = reporting.fetch_map(path, 'R2_group_fdr_effect')[0]

In [None]:
kwargs = {
    'surf_mesh': 'pial_left', # pial_right, infl_left, infl_right
    'surf_mesh_type': 'pial_left',
    'hemi':'left', # right
    'view':'lateral', # medial
    'bg_map': 'sulc_left', # sulc_right
    'bg_on_data':True,
    'darkness':.5
}

In [None]:
for index_comp, comparison in enumerate(comparisons):
    saving_path = os.path.dirname(paths[index_comp])
    plot_img_surf(paths[index_comp], saving_path, comparison + '_surf', inflated=False, **kwargs):

## BERT vs GPT2