Extract timeseries from preprocessed data
=========================


Step 1: Load brain parcellations
-------------------------------------

In [3]:
from nilearn import datasets, input_data
from nilearn.plotting import plot_roi
from nilearn.input_data import NiftiLabelsMasker
import numpy as np
import pandas as pd

# Power 264 ROIs parcellation
power264 = datasets.fetch_coords_power_2011()
coords = np.vstack((power264.rois['x'], power264.rois['y'], power264.rois['z'])).T
power264_parcels = input_data.NiftiSpheresMasker(seeds=coords)
power264_networks = np.asarray(pd.read_csv('./support/modules.txt', header=None)[0])
power264_colors = {'AU':'#d182c6',
                   'CER':'#9fc5e8',
                   'CO':'#7d009d', 
                   'DA':'#75df33', 
                   'DM':'#ed1126', 
                   'FP':'#f6e838', 
                   'MEM':'#bebab5',
                   'SAL':'#2a2a2a',
                   'SOM':'#6ccadf',
                   'SUB':'#980000',
                   'UNC':'#f58c00',
                   'VA':'#00a074', 
                   'VIS':'#5131ac'}


# Schaefer 400 parcels parcellation
schaefer400 = datasets.fetch_atlas_schaefer_2018(n_rois=400, 
                                           yeo_networks=7, 
                                           resolution_mm=1,
                                           data_dir=None, 
                                           base_url=None, 
                                           resume=True, 
                                           verbose=1)
schaefer400_parcels = input_data.NiftiLabelsMasker(schaefer400['maps'])

schaefer400_fix_dict = {
    'Vis': 'VIS',
    'SomMot': 'SOM',
    'DorsAttn': 'DAN',
    'Sal': 'VAN',
    'Limbic': 'LIM',
    'Cont': 'FPN',
    "Default": "DMN"
}

schaefer400_networks = np.asarray([val for key, val in schaefer400_fix_dict.items() for roi in schaefer400['labels'] if key in roi.decode('UTF-8')])
schaefer400_colors = {'DAN':'#00ab2e', 
                      'DMN':'#dc6179', 
                      'FPN':'#e8c830',
                      'LIM':'#7eb3d4', 
                      'SOM':'#7e8dc1', 
                      'VAN':'#d182c6',
                      'VIS':'#ac00ad',}


# Creating dictionary with used parcellation
parcellations = {'shaefer400': {'parcels': schaefer400_parcels, 
                                'networks': schaefer400_networks, 
                                'colors': schaefer400_colors } , 
                 'power264':  {'parcels': power264_parcels, 
                               'networks': power264_networks, 
                               'colors': power264_colors}}


Dataset created in /home/connectomics/nilearn_data/schaefer_2018

Downloading data from https://raw.githubusercontent.com/ThomasYeoLab/CBIG/v0.8.1-Schaefer2018_LocalGlobal/stable_projects/brain_parcellation/Schaefer2018_LocalGlobal/Parcellations/MNI/Schaefer2018_400Parcels_7Networks_order.txt ...


Downloaded 16985 of 16985 bytes (100.0%,    0.0s remaining) ...done. (0 seconds, 0 min)


Downloading data from https://raw.githubusercontent.com/ThomasYeoLab/CBIG/v0.8.1-Schaefer2018_LocalGlobal/stable_projects/brain_parcellation/Schaefer2018_LocalGlobal/Parcellations/MNI/Schaefer2018_400Parcels_7Networks_order_FSLMNI152_1mm.nii.gz ...


Downloaded 295901 of 295901 bytes (100.0%,    0.0s remaining) ...done. (0 seconds, 0 min)


Step 2: Extract signal for parcellations
-------------------------------------

In [4]:
import numpy as np
import pandas as pd

from nilearn import datasets, plotting, input_data, signal, image 
from nilearn.input_data import NiftiLabelsMasker
import os

import numpy as np

# Selecting subjects with all training session
subjects_all = pd.read_csv('data/subjects.csv')
subjects_trained = subjects_all[groups['included_ses-all']]
subjects_list = subjects_trained['sub'].values

# Selecting denoising data to extract timeseries
data_dir = '/home/connectomics/Projects/LearningBrain/LearningBrain_BIDS/derivatives/fmridenoise/'
space = 'space-MNI152NLin2009cAsym'
desc = 'desc-preproc_bold_denoised'
pipeline = 'pipeline-24HMP_8Phys_SpikeReg_4GS'
suffix = 'nii.gz'
tasks = {'rest': 305, 'dualnback': 340}
sessions = ['ses-1', 'ses-2', 'ses-3', 'ses-4']

n_sub = len(subjects_list)
n_ses = len(sessions)

# Iterate over parcellations
for parcellation_name, parcellation_data in parcellations.items():
    
    # Create empty arrays to store data
    n_parcels = len(parcellation_data['networks'])    
    timeseries_rest = np.zeros((n_sub, n_ses, tasks['rest'], n_parcels))
    timeseries_dualnback = np.zeros((n_sub, n_ses, tasks['dualnback'], n_parcels))
    
    # Iterate over tasks
    for task, n_scans in tasks.items():
        print(50*"-")
        print(f"Parcellation: {parcellation_name} | Task: {task}")
        print(50*"-")
        # Iterate over subjects
        for i, sub in enumerate(subjects_list):
            print(f"Subject: {sub} | Session: ",  end =" ")
            
            # Iterate over sessions
            for j, ses in enumerate(sessions):
                
                sub_dir = f'{data_dir}{sub}/{ses}/'
                denoised_file = f'{sub}_{ses}_task-{task}_{space}_{desc}_{pipeline}.{suffix}'
                denoised_path = f'{sub_dir}{denoised_file}'
                
                if not os.path.exists(denoised_path):
                    #print(f'Data does not exist: {sub}, {ses}, {task}')
                    print("x",  end =" ")
                else:
                    timeseries = parcellation_data['parcels'].fit_transform(denoised_path, confounds=None)
                    
                    if task == 'rest':
                        timeseries_rest[i, j, :, :] = timeseries
                    else:
                        timeseries_dualnback[i, j, :, :] = timeseries
                    
                    if len(timeseries) == tasks[task]:
                        print("*",  end =" ")
                    else: 
                        print("?",  end =" ")
                        
            print('')
                        
    filename = f'timeseries_{parcellation_name}_{pipeline}'
    
    timeseries_data = {'tasks':
                           {'rest': {'timeseries': timeseries_rest , 'denoising': pipeline },
                            'dualnback': {'timeseries': timeseries_rest, 'denoising': pipeline}},
                       'parcellation': {
                           'name': parcellation_name,
                           'parcels': parcellation_data['parcels'],
                           'networks': parcellation_data['networks'],
                           'colors': parcellation_data['colors'] },
                       'subjects': subjects_trained,
                       'filename': filename
                       }                    

    np.save(f'./data/neuroimaging/{filename}.npy', timeseries_data)

FileNotFoundError: [Errno 2] File b'data/subjects.csv' does not exist: b'data/subjects.csv'

In [163]:
timeseries_data

{'tasks': {'rest': {'timeseries': array([[[[ 0.00327497, -0.08833537,  0.06947587, ...,  0.03717433,
             -0.12452784, -0.28368366],
            [ 0.20644929, -0.42003803,  1.12701678, ..., -0.36196067,
             -0.69225542, -0.50744769],
            [ 0.31700789, -0.39661105,  1.59470656, ..., -0.57705812,
             -1.00564952, -0.63868194],
            ...,
            [-0.86944853, -0.70784015, -1.6887897 , ..., -0.75367393,
              0.43370723, -0.04487071],
            [-0.58776713, -0.45953724, -1.38527034, ..., -0.38719785,
              0.42734948, -0.02974992],
            [-0.02144391,  0.00303535, -0.36527291, ...,  0.04730958,
              0.00659706,  0.05948683]],
   
           [[ 0.        ,  0.        ,  0.        , ...,  0.        ,
              0.        ,  0.        ],
            [ 0.        ,  0.        ,  0.        , ...,  0.        ,
              0.        ,  0.        ],
            [ 0.        ,  0.        ,  0.        , ...,  0.       