# Use FastSRM to determine ceiling level of fMRI BOLD signal predictability

In [6]:
from fastsrm.identifiable_srm import IdentifiableFastSRM
from sklearn.model_selection import KFold
from tqdm import tqdm
import numpy as np
import glob
import os

In [5]:
from utils import possible_subjects_id, get_subject_name, fetch_data, fetch_masker
from logger import Logger
import reporting

## Defining parameters

In [7]:
language = 'english'
nb_runs = 9
path_to_input = ''
path_to_fmridata = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI"

In [8]:
PROJECT_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/"
OUTPUT_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/maps/{language}"
INPUT_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/{language}"
FMRIDATA_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/fMRI/{language}"
MASKER_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/global_masker_{language}"
SMOOTHED_MASKER_PATH = f"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/smoothed_global_masker_{language}"

## Retrieving masker

In [10]:
logger = Logger(os.path.join(PROJECT_PATH, 'logs.txt'))

In [None]:
global_masker = fetch_masker(MASKER_PATH, language, FMRIDATA_PATH, INPUT_PATH, smoothing_fwhm=None, logger=logger)
atlas_maps, labels = reporting.load_atlas() # load harvard-oxford atlas named'cort-prob-2mm'
x_labels = labels[1:]

## Preprocessing function

In [None]:
subjects = [get_subject_name(sub_id) for sub_id in possible_subjects_id(language)]
paths = sorted([glob.glob(os.path.join(FMRIDATA_PATH, sub, func, 'fMRI_*run*.nii.gz')) for sub in subjects])

In [40]:
def process_fmri_data(fmri_paths, masker):
    """ Load fMRI data and mask it with a given masker.
    Preprocess it to avoid NaN value when using Pearson
    Correlation coefficients in the following analysis.
    Arguments:
        - fmri_paths: list (of string)
        - masker: NiftiMasker object
    Returns:
        - data: list of length #runs (np.array of shape: #scans * #voxels) 
    """
    data = [masker.transform(f) for f in fmri_paths]
    # voxels with activation at zero at each time step generate a nan-value pearson correlation => we add a small variation to the first element
    for run in range(len(data)):
        zero = np.zeros(data[run].shape[0])
        new = zero.copy()
        new[0] += np.random.random()/1000
        data[run] = np.apply_along_axis(lambda x: x if not np.array_equal(x, zero) else new, 0, data[run])
        np.save(fmri_paths[run][:-7] + '.npy', data[run])
    return data

## Preparing subjects data (transforming to numpy array with masker)

In [43]:
for sub_paths in tqdm(paths):
    process_fmri_data(sub_paths, global_masker)

## Retrieving data & FastSRM

In [None]:
result = {}

In [None]:
for subjects_train, subjects_test in KFold(n_splits=len(subjects),
                                        shuffle=True
                                       ).split(np.arange(len(subjects))):
    
    n_subjects_train = len(subjects_train)
    n_subjects_test = len(subjects_test)

    X_train = np.array(paths)[subjects_train, :]
    X_test = np.array(paths)[subjects_test, :]
    
    for session_train, session_test in KFold(n_splits=nb_runs,
                                        shuffle=True
                                       ).split(np.arange(nb_runs)):
        X_train_session_train = X_train[:, session_train]
        X_train_session_test = X_train[:, session_test]
        X_test_session_train = X_test[:, session_train]
        X_test_session_test = X_test[:, session_test]
        
        fastsrm = IdentifiableFastSRM(n_components=100, temp_dir="/home/ap25995/fast_srm")
        fastsrm.fit(X_train_session_train)
        W_list = fastsrm.basis_list
        S1 = fastsrm.transform(X_train_session_train)
        S2 = fastsrm.transform(X_train_session_test)
        fastsrm.add_subjects(X_test_session_train, S1)

        predictions = fastsrm.inverse_transform(
                    S2,
                    subjects_indexes=np.arange(n_subjects_train, n_subjects_train + n_subjects_test))

        for i in range(n_subjects_test):
            diff = predictions[i] - X_test_session_test[i]
            r2 = 1 - diff.var(axis=1)
            r2_subjects.append(r2)
    result[subjects_test[0]] = np.mean(r2_subjects, axis=0)
