Importing Libraries:  
Python imports and modules that are required that are imported at the start:  
-	os, sys, time, numpy (np alias), pandas (pd alias), matplotlib.pyplot (plt alias) ipywidgets, tqdm.notebook, nibabel, glmsingle, bids, noise_ceiling and tc2see.

In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
import time
from pprint import pprint
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact
from tqdm.notebook import tqdm
import h5py

import nibabel as nib
from nilearn import surface
import bids
from bids import BIDSLayout

dir2 = os.path.abspath('../')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path: 
    sys.path.append(dir1)
    
from noise_ceiling import (
    compute_ncsnr,
    compute_nc,
)

from tc2see import load_data

Defining Dataset Paths and Variables:  
These sections focuses on establishing the paths to project directories and initializing key variables. It defines the paths to the dataset, derivatives, and preprocessed fMRI data, setting up the ways to access project data. Additionally, variables related to the dataset version, the number of runs, and the task specifications are set within this section.  
(Adjust this path and any other path as needed)

In [2]:
dataset_root = Path('E:\\fmri_processing\\results')

In [3]:
tc2see_version = 3 # [1, 2]
dataset_path = dataset_root
derivatives_path = dataset_path / 'derivatives_TC2See'
data_path = derivatives_path / 'fmriprep'
num_runs = 6 if tc2see_version in (1, 3) else 8

# Initialize BIDSLayouts for querying files.
dataset_layout = BIDSLayout(dataset_path / 'TC2See')
derivatives_layout = BIDSLayout(derivatives_path / 'fmriprep', derivatives=True, validate = False)

Example contents of 'dataset_description.json':
{"Name": "Example dataset", "BIDSVersion": "1.0.2", "GeneratedBy": [{"Name": "Example pipeline"}]}


Processing fMRI Data for Subjects:  
This code segment focuses on configuring and preparing the environment for the analysis of the preprocessed fMRI data. First it initializes configuration variables such as subject IDs, TR duration, brain mask dilation parameters, and the number of stimuli. Then, it loads stimulus images and creates a mapping of stimulus names to unique identifiers. The code then creates an HDF5 file for storing the preprocessed fMRI data, with the filename derived from the specified version. For each subject, it initializes a group within the HDF5 file and manages the loading of the brain mask, potentially applying binary dilation if required. Various datasets within the subject's group are created to store bold data, statistics, trends, and stimulus related information. This segment ensures that the preprocessed fMRI data is well organized and structured for further analysis and interpretation.

In [9]:
task = "bird"
space = 'fsaverage' 

# subjects = [ '0'+str(num) if num < 10 else str(num) for num in range(5,35)]
subjects = ['18']

tr = 2. # 1.97  # TR duration (in seconds)
mask_dilations = 3  # Number of dilation iterations for the brain mask
num_stimuli = 75 # 112  # Total number of different stimuli

# Load stimulus images and create a mapping of stimulus names to unique identifiers
stimulus_images = h5py.File(derivatives_path / 'stimulus-images.hdf5', 'r')
stimulus_id_map = {name: i for i, name in enumerate(stimulus_images.attrs['stimulus_names'])}
 
new_or_append = 'w' # Use 'a' for append/overwrite, 'w' for new hdf5 file
           
# Create or append to an HDF5 file to store preprocessed fMRI data
with h5py.File(data_path / f'tc2see-v{tc2see_version}-fsaverage-surf_17.hdf5', new_or_append) as f:
    for subject in tqdm(subjects):
        if f'sub-{subject}' not in list(f.keys()):
            # try:
                print(f"Processing subject {subject}...")
                group = f.require_group(f'sub-{subject}')

                fsaverage_surf_list = []
                for hemi in ('L', 'R'):
                    
                    leftOrRight = 0 if hemi == 'L' else 1
                
                    fsaverage_surf_hemi = derivatives_layout.get(
                            subject=subject,
                            run=1,
                            task=task,
                            space=space, 
                            extension='func.gii',
                    )[leftOrRight]

                    fsaverage_surf_hemi = surface.load_surf_data(fsaverage_surf_hemi).astype(np.float64)
                    fsaverage_surf_list.append(fsaverage_surf_hemi)
                    print(fsaverage_surf_hemi.shape)

                fsaverage_surf = np.concatenate(fsaverage_surf_list, axis=0)

                num_voxels = fsaverage_surf.shape[0]
                num_trs = fsaverage_surf.shape[1]

                group.require_dataset('bold', shape=(num_runs, num_trs, num_voxels), dtype='f4')
                group.require_dataset('bold_mean', shape=(num_runs, num_voxels), dtype='f4')
                group.require_dataset('bold_std', shape=(num_runs, num_voxels), dtype='f4')
                group.require_dataset('bold_trend', shape=(num_runs, 2, num_voxels), dtype='f4')
                group.require_dataset('bold_trend_std', shape=(num_runs, num_voxels), dtype='f4')
                group.require_dataset('stimulus_trs', shape=(num_runs, num_stimuli), dtype='f4')
                group.require_dataset('stimulus_ids', shape=(num_runs, num_stimuli), dtype='i4')
                
                for run_id in tqdm(range(num_runs)):
                    
                    fsaverage_surf_list = []
                    for hemi in ('L', 'R'):
                        
                        leftOrRight = 0 if hemi == 'L' else 1
                    
                        fsaverage_surf_hemi = derivatives_layout.get(
                                subject=subject,
                                run=run_id + 1,
                                task=task,
                                space=space, 
                                extension='func.gii',
                        )[leftOrRight]

                        fsaverage_surf_hemi = surface.load_surf_data(fsaverage_surf_hemi).astype(np.float64)
                        print(f"{fsaverage_surf_hemi.shape=}") # (163842, 231)
                        fsaverage_surf_list.append(fsaverage_surf_hemi)

                    fsaverage_surf = np.concatenate(fsaverage_surf_list, axis=0) # (327684, 231)
                    print(f"after concat: {fsaverage_surf.shape=}")
                    fsaverage_surf = np.transpose(fsaverage_surf) # (231, 327684)
                    print(f"after transpose: {fsaverage_surf.shape=}")
                    print(f"{np.mean(fsaverage_surf, axis=0)=}")
                    np.save("mean_fsaverage_surf", np.mean(fsaverage_surf, axis=0))
                    
                    num_trs_run = fsaverage_surf.shape[0]

                    trend_coeffs = np.stack([np.arange(num_trs_run), np.ones(shape=num_trs_run)], axis=1) # (231, 2)
                    print(f"{trend_coeffs.shape=}")
                    
                    # Perform linear detrending on the bold data
                    bold_trend = np.linalg.lstsq(trend_coeffs, fsaverage_surf, rcond=None)[0] # (2, 327684)
                    print(f"{bold_trend.shape=}")
                    bold_predicted = trend_coeffs @ bold_trend # (231, 327684)
                    print(f"{bold_predicted.shape=}")
                    np.save("bold_predicted", bold_predicted) 
                    bold_detrend = fsaverage_surf - bold_predicted # (231, 327684)
                    print(f"{bold_detrend.shape=}")
                    np.save("bold_detrend", bold_detrend)

                    print(f"{np.mean(bold_detrend, axis=0).shape=}")
                    np.save("mean_bold_detrend", np.mean(bold_detrend, axis=0))

                    # Load events data for the current subject and run
                    events_file = dataset_layout.get(
                        subject=subject,
                        run=run_id + 1,
                        task=task,
                        extension='tsv'
                    )[0]
                    
                    events_df = pd.read_csv(events_file.path, sep='\t')
                    events_df = events_df[events_df['stimulus'] != '+']
                    stimulus_names = [Path(stimulus_path).stem for stimulus_path in events_df['stimulus']]
                    stimulus_names = [
                        name[:name.find('hash')-1] if "hash" in name else name
                        for name in stimulus_names
                    ]
                    stimulus_ids = [stimulus_id_map[name] for name in stimulus_names]
                    
                    stimulus_trs = np.array(events_df['tr']).astype(np.float32)
                    
                    # Store various datasets in the HDF5 file
                    group['bold'][run_id, :num_trs_run] = fsaverage_surf
                    group['bold_mean'][run_id] = fsaverage_surf.mean(axis=0)
                    group['bold_std'][run_id] = fsaverage_surf.std(axis=0)
                    group['bold_trend'][run_id] = bold_trend
                    group['bold_trend_std'][run_id] = bold_detrend.std(axis=0)
                    group['stimulus_trs'][run_id] = stimulus_trs
                    group['stimulus_ids'][run_id] = stimulus_ids
                
            # except Exception as e:
            #     print(f"Error processing {subject}: {e}")
            #     del f[f'sub-{subject}']
            #     continue
        else:
            print(f"Subject {subject} already exists")
            print(f[f'sub-{subject}']['bold'].shape)

  0%|          | 0/1 [00:00<?, ?it/s]

Processing subject 18...
(163842, 231)


KeyboardInterrupt: 

In [5]:
# for subject in subjects:
#     print(f"Saving NC file for subject {subject}...")
#     try:
#         bold, stimulus_ids = load_data(
#             data_path / f'tc2see-v{tc2see_version}-fsaverage-surfs.hdf5', 
#             f'sub-{subject}', 
#             tr_offset=3,
#             run_normalize='linear_trend',
#             interpolation=False,
#         )

#         ncsnr = compute_ncsnr(bold, stimulus_ids) # Compute noise ceiling noise ratio
#         nc = compute_nc(ncsnr, num_averages=1)

#         img = nib.gifti.GiftiImage(darrays=[nib.gifti.GiftiDataArray(nc.astype(np.float32))])
#         nib.save(img, f'E:/fmri_processing/results/analysis/sub-{subject}/sub{subject}_fsaverage_surf_nc.gii')
#     except:
#         print(f"Error for participant {subject}, skipping...")
    