In [1]:
import numpy as np
import h5py
import pandas as pd
import os
import glob
import importlib
import nibabel as nib
import seaborn as sns
import matplotlib.pyplot as plt
from utils.Surf import roi2gii, vertex2gii
import utils.Vis
importlib.reload(utils.Vis)
from utils.Vis import plot_maps

In [2]:
# Config parameters
root = "/home/ali/graham-akhanf/EpLink/Eplink"
results_path = os.path.join(root,"ISC-pipeline","results","{dataset}","looISC","control")
atlases_path = os.path.join('.','ISC-pipeline','resources','atlases_fsLR_32K')

atlases = ['none', 'Glasser_2016', 'Desikan', 'Schaefer2018_17Networks_400', 'Yan2023_17Networks_400']
atlases_aliases = ['Vertex', 'Glasser 2016', 'Desikan', 'Schaefer 2018', 'Yan 2023']

atlases = ['none', 'Glasser_2016', 'Desikan', 'Yan2023_17Networks_400']
atlases_aliases = ['Vertex', 'Glasser 2016', 'Desikan', 'Yan 2023']

datasets = ["eplink-p2", "eplink-p3"]
task_dataset = {'eplink-p2': 'hitchcock', 'eplink-p3': 'movie'}
source_dataset = {'eplink-p2': 'resampled2fsLR/32k_space_surfaces-parcellated',\
                  'eplink-p3': 'temporalResampled-parcellated'}
resampled_dataset = {'eplink-p2': 'N', 'eplink-p3': 'Y'}

fwhm = 0
confounds_idx = 1
target_volumes = 240

output_path = os.path.join(root, 'results', 'phases_combined', 'looISC')
os.makedirs(output_path, exist_ok=True)
# file_pattern = "looISC_task-{task}_hemi-{{hemi}}_fwhm-{fwhm}_confounds-{confounds_idx}_resampled-{resampled}_atlas-{atlas}.h5"

In [3]:
# Helper functions
def get_dimensions(filepath):
    """Extract dimensions of data from h5 data."""
    # Check file extension
    ext = os.path.splitext(filepath)[-1]
    
    # h5 file
    if ext == '.h5': 
        # Open the HDF5 file
        with h5py.File(filepath, 'r') as f:
            # Load the parcellated data
            data = f['parcellated_data'][:]
            # Return data dimensions
            n_rois, n_vol = data.shape
    
    # gii file
    elif ext == '.gii':
        # Load the GIFTI file by nibabel
        gii = nib.load(filepath)
        # Return data dimensions
        n_rois = gii.darrays[0].data.shape[0]
        n_vol = len(gii.darrays)
    
    return n_rois, n_vol

def load_HDF(filepath, n_vols):
    """Load data stored in the HDF files given the runs dataframe."""
    with h5py.File(filepath, 'r') as f:
            # Load the parcellated data
            data = f['parcellated_data'][:]
            data = data[:,:n_vols] # Ignoring excessive volumes
    # Returns data with shape ROI x Time
    return data

def load_gii(filepath, n_vols):
    func_gii = nib.load(filepath)
    data = np.vstack([darray.data for darray in func_gii.darrays[:n_vols]]).T
    # Returns data with shape Vertex x Time
    return data

def load_runs(runs_df, n_vols):
    # Loading only the last run
    fp = runs_df['full_path'].iloc[-1]
    # Checking file extension
    ext = os.path.splitext(fp)[-1]

    # h5 file
    if ext == '.h5':
        data = load_HDF(fp, n_vols)
    # gii file
    elif ext == '.gii':
        data = load_gii(fp, n_vols)
    
    return data

def get_info(filepath):
    """Extract subject, hemi, task, run, fwhm, confounds,
      rois, and number of volumes from a given file."""
    # Get file name
    basename = os.path.basename(filepath)
    # Split file name by '_'
    parts = basename.split('_')
    # Info dictionary
    info = {}
    # Parse file name
    for part in parts:
        info['full_path'] = filepath

        if part.startswith('sub-'):
            info['subject'] = part.split('sub-')[1]
        elif part.startswith('hemi-'):
            info['hemi'] = part.split('hemi-')[1]
        elif part.startswith('task-'):
            info['task'] = part.split('task-')[1]
        elif part.startswith('run-'):
            info['run'] = part.split('run-')[1]
        elif part.startswith('fwhm-'):
            info['fwhm'] = part.split('fwhm-')[1]
        elif part.startswith('confounds-'):
            info['confounds'] = part.split('confounds-')[1].split('_')[0]  # Assuming confounds is the last part before the file extension

    # Get dimensions for stored data    
    info['n_roi'], info['n_vol'] = get_dimensions(filepath)
    
    return info

def build_dataframe(directory, pattern=None):
    """Build a dataframe from files in the directory and its subdirectories."""
    # Default Pattern for search
    if pattern == None:
        subject, hemi, task, run, fwhm, confounds = ('*', '*', '*', '*', 0, 1)
        pattern = f"sub-{subject}_hemi-{hemi}_task-{task}_run-{run}_space-fsLR_den-32k_desc-denoised_fwhm-{fwhm}_confounds-{confounds}_atlas-glasser.h5"
    
    pattern = os.path.join(directory, 'sub-*', 'func', pattern)
    # Get all files matching the pattern
    files = glob.glob(pattern, recursive=True)
    # Building the dataframe
    df = pd.DataFrame([get_info(file) for file in files])
    
    return df

In [8]:
for i in range(1,len(atlases)):
    print(atlases[i])
    df_comb = pd.DataFrame()
    for dataset in datasets:
        print(dataset)
        data_path = os.path.join(root, 'ISC-pipeline', 'results', f'{dataset}', f'{source_dataset[dataset]}', f'{atlases[i]}')
        # subjects_path = glob.glob(os.path.join(data_path, 'sub-*'))
        # subjects = [int(os.path.basename(p)[4:]) for p in subjects_path]
        # subjects.sort()

        subject, hemi, task, run, fwhm, confounds = ('*', '*', task_dataset[dataset], '*', 0, 1)
        datafile_pattern = f"sub-{subject}_task-{task}_run-{run}_hemi-{hemi}_confounds-{confounds}_bold.func.h5"

        df = build_dataframe(data_path, datafile_pattern)
        # Exclude files with less volumes
        df = df[df["n_vol"] >= target_volumes].reset_index(drop=True)

        df_comb = pd.concat((df_comb, df))

    # Sort files 
    df_comb = df_comb.sort_values(by=['subject', 'task', 'run'], ascending=[True]*3).reset_index()
    # Ignore some columns
    #['subject', 'task', 'run', 'hemi', 'fwhm', 'confounds', 'n_vol', 'full_path']
    df_comb = df_comb[['subject', 'task', 'run', 'hemi', 'n_vol', 'full_path']]

    # Get unique subjects
    subjects = df_comb['subject'].unique()
    controls = [s for s in subjects if int(s) > 5000]
    patients = [s for s in subjects if int(s) < 5000]
    n_subject = len(subjects)

    # Load files data
    data = []
    for subj in controls:
        # Filter subject files 
        df_s = df_comb[df_comb['subject'] == subj]
        df_s_L = df_s[df_s['hemi'] == 'L']
        df_s_R = df_s[df_s['hemi'] == 'R']
        # Load runs
        d_L = load_runs(df_s_L, target_volumes)
        d_R = load_runs(df_s_R, target_volumes)
        data.append(np.concatenate((d_L,d_R),axis=0))

    # Stacking subjects data (Subject, Unit, Time)
    data = np.stack(data)

    n_controls, n_unit, n_vols = data.shape
    output_name = f"looISC_hemi-{{hemi}}_fwhm-{fwhm}_confounds-{confounds_idx}_atlas-{atlases[i]}.h5"
    output_fullpath = os.path.join(output_path, output_name)

    # Sum subjects data (Unit, Time)
    data_sum = data.sum(axis=0)
    # Output will be shaped as (Subject, Unit)
    loo_ISC = np.zeros((n_controls, n_unit))
    for i in range(n_controls):
        for j in range(n_unit):
            sum_ts = data_sum[j,:].reshape((1,-1))
            sub_ts = data[i,j,:].reshape((1,-1))
            c = np.corrcoef(sub_ts, sum_ts-sub_ts)
            loo_ISC[i,j] = c[0,1]

    loo_ISC_dict = {'L': loo_ISC[:,:int(n_unit/2)],
                    'R': loo_ISC[:,int(n_unit/2):]}
    # Save ISCs as HDF5 file
    for hemi in ['L', 'R']:
        with h5py.File(output_fullpath.format(hemi=hemi), 'w') as f:
            f.create_dataset('loo_ISC', data=loo_ISC_dict[f'{hemi}'])

Glasser_2016
eplink-p2
eplink-p3


  c /= stddev[:, None]
  c /= stddev[None, :]


362
181.0
Desikan
eplink-p2
eplink-p3
72
36.0
Yan2023_17Networks_400
eplink-p2
eplink-p3


  c /= stddev[:, None]
  c /= stddev[None, :]


802
401.0
