# Use FastSRM to determine ceiling level of fMRI BOLD signal predictability

In [1]:
from fastsrm.identifiable_srm import IdentifiableFastSRM
from sklearn.model_selection import KFold
from tqdm import tqdm
import numpy as np
import glob
import os

In [6]:
from utils import possible_subjects_id, get_subject_name, fetch_data, fetch_masker
from logger import Logger
import reporting

## Defining parameters

In [3]:
language = 'english'
nb_runs = 9
path_to_input = ''
path_to_fmridata = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI"

In [4]:
PROJECT_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/"
OUTPUT_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/{language}"
INPUT_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/{language}"
FMRIDATA_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI/{language}"
MASKER_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/global_masker_95%_{language}"
SMOOTHED_MASKER_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/smoothed_global_masker_{language}"

## Retrieving masker

In [5]:
logger = Logger(os.path.join(PROJECT_PATH, 'logs.txt'))

In [7]:
global_masker = fetch_masker(MASKER_PATH, language, FMRIDATA_PATH, INPUT_PATH, smoothing_fwhm=None, logger=logger)
atlas_maps, labels = reporting.load_atlas() # load harvard-oxford atlas named'cort-prob-2mm'
x_labels = labels[1:]

## Preprocessing function

In [8]:
subjects = [get_subject_name(sub_id) for sub_id in possible_subjects_id(language)]
paths = [sorted(glob.glob(os.path.join(FMRIDATA_PATH, sub, 'func', 'fMRI_*run*.nii.nii'))) for sub in subjects]

In [9]:
def process_fmri_data(fmri_paths, masker):
    """ Load fMRI data and mask it with a given masker.
    Preprocess it to avoid NaN value when using Pearson
    Correlation coefficients in the following analysis.
    Arguments:
        - fmri_paths: list (of string)
        - masker: NiftiMasker object
    Returns:
        - data: list of length #runs (np.array of shape: #scans * #voxels) 
    """
    data = [masker.transform(f) for f in fmri_paths]
    # voxels with activation at zero at each time step generate a nan-value pearson correlation => we add a small variation to the first element
    for run in range(len(data)):
        zero = np.zeros(data[run].shape[0])
        new = zero.copy()
        new[0] += np.random.random()/1000
        data[run] = np.apply_along_axis(lambda x: x if not np.array_equal(x, zero) else new, 0, data[run])
        np.save(fmri_paths[run][:-7] + 'npy', data[run].T)
    return data

## Preparing subjects data (transforming to numpy array with masker)

In [10]:
for sub_paths in tqdm(paths):
    process_fmri_data(sub_paths, global_masker)

100%|██████████| 51/51 [09:52<00:00, 11.61s/it]


In [11]:
MASKER_PATH

'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/global_masker_95%_english'

In [16]:
for p in paths:
    for run in range(9):
        p_ = p[run][:-7] + 'npy'
        subject = os.path.basename(os.path.dirname(os.path.dirname(p_)))
        print("rsync -a {} cp983411@calmar:/home_local/alexpsq/data/fMRI/english/{}/func/".format(p_, subject))
    print()

rsync -a /neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI/english/sub-057/func/fMRI_english_sub-057_run1.npy cp983411@calmar:/home_local/alexpsq/data/fMRI/english/sub-057/func/
rsync -a /neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI/english/sub-057/func/fMRI_english_sub-057_run2.npy cp983411@calmar:/home_local/alexpsq/data/fMRI/english/sub-057/func/
rsync -a /neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI/english/sub-057/func/fMRI_english_sub-057_run3.npy cp983411@calmar:/home_local/alexpsq/data/fMRI/english/sub-057/func/
rsync -a /neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI/english/sub-057/func/fMRI_english_sub-057_run4.npy cp983411@calmar:/home_local/alexpsq/data/fMRI/english/sub-057/func/
rsync -a /neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI/english/sub-057/func/fMRI_english_sub-057_run5.npy cp983411

## Retrieving data & FastSRM

In [55]:
result = {}
paths_array = [[session.replace('.nii.nii', '.npy') for session in sub ] for sub in paths]

In [60]:
try:
    for subjects_train, subjects_test in tqdm(KFold(n_splits=len(subjects),
                                            shuffle=True
                                           ).split(np.arange(len(subjects)))):

        logger.info("Subject test: {}...".format(subjects[subjects_test[0]]))
        n_subjects_train = len(subjects_train)
        n_subjects_test = len(subjects_test)
        r2_subjects = []

        X_train = np.array(paths_array)[subjects_train, :]
        X_test = np.array(paths_array)[subjects_test, :]

        for session_train, session_test in tqdm(KFold(n_splits=nb_runs,
                                            shuffle=True
                                           ).split(np.arange(nb_runs))):
            logger.info("\tSession test: {}...".format(session_test[0]))
            X_train_session_train = X_train[:, session_train]
            X_train_session_test = X_train[:, session_test]
            X_test_session_train = X_test[:, session_train]
            X_test_session_test = X_test[:, session_test]

            fastsrm = IdentifiableFastSRM(n_components=10, temp_dir="/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/oldstuff/fastsrm/", n_jobs=-1)
            fastsrm.fit(X_train_session_train)
            W_list = fastsrm.basis_list
            S1 = fastsrm.transform(X_train_session_train)
            S2 = fastsrm.transform(X_train_session_test)
            fastsrm.add_subjects(X_test_session_train, S1)

            predictions = fastsrm.inverse_transform(
                        S2,
                        subjects_indexes=np.arange(n_subjects_train, n_subjects_train + n_subjects_test))

            for i in range(n_subjects_test):
                diff = predictions[i][0] - np.load(X_test_session_test[i][0])
                r2 = 1 - diff.var(axis=1)
                r2_subjects.append(r2)
        result[subjects[subjects_test[0]]] = np.mean(r2_subjects, axis=0)
        np.save("/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/oldstuff/fastsrm/{}.npy".format(subjects[subjects_test[0]]), result[subjects[subjects_test[0]]])
except Exception as err:
    logger.error(str(err))

0it [00:00, ?it/s]
0it [16:25, ?it/s][A
0it [16:25, ?it/s]


UFuncTypeError: ufunc 'subtract' did not contain a loop with signature matching types (dtype('<U134'), dtype('<U134')) -> dtype('<U134')