# Create full tables (all subjects)

In [1]:
import numpy as np
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import hcp_utils as hcp
import nilearn as nl
import gc
import traceback
import logging
import pickle
import warnings
import joblib
from joblib import Parallel, delayed
from joblib import parallel_backend
from multiprocessing import Process, Manager
from joblib import Memory
warnings.filterwarnings(action='ignore')

  from pandas.core import (
pixdim[1,2,3] should be non-zero; setting 0 dims to 1


In [2]:
header = hcp.mmp.labels
header_cont = header
header_cont[0] = 'Subject_key'
header_list = list(header.values())[1:]


In [3]:
output_path = '/media/hcs-sci-psy-narun/ABCC/fmriresults01/derivatives/nilearn_glm/functional_tabels/'

In [5]:
## set variable for each dir task = WM
# path = '/media/hcs-sci-psy-narun/ABCC/fmriresults01/derivatives/nilearn_glm/surf_nback_std_fixed/'
path = '/media/hcs-sci-psy-narun/ABCC/fmriresults01/derivatives/nilearn_glm/surf_nback_std_fixed_newcntr/'

task_label = "nback"
space_label = "space-fsLR"
derivatives_folder = "derivatives/fmriprep"
direction = ['1', '2', 'mean']
stat = ['z','effect']
contrasts_ids= [    
    'place',
    'face',
    'emotionface',
    'face-place',
    'PosFace-NeutFace',
    'NegFace-NeutFace',
    'emotionface-NeutFace',
    'twobk',
    'zerobk',
    'twobk-zerobk']

In [5]:
## set variable for each dir task = sst
path = '/media/hcs-sci-psy-narun/ABCC/fmriresults01/derivatives/nilearn_glm/surf_SST_std_fixed/'
task_label = "SST"
space_label = "space-fsLR"
derivatives_folder = "derivatives/fmriprep"
direction = ['1', '2', 'mean']
stat = ['z','effect']
contrasts_ids= [    
    'CorrectGo',
    'IncorrectGo',
    'CorrectStop',
    'IncorrectStop',
    'CorrectStop-CorrectGo',
    'IncorrectStop-CorrectGo',
    'Stop-CorrectGo',
    'CorrectStop-IncorrectStop',
    'IncorrectGo-CorrectGo',
    'IncorrectGo-IncorrectStop']

In [10]:
## set variable for each dir task = mid
# path = '/media/hcs-sci-psy-narun/ABCC/fmriresults01/derivatives/nilearn_glm/surf_MID_std_fixed/'
path = '/media/hcs-sci-psy-narun/ABCC/fmriresults01/derivatives/nilearn_glm/surf_MID_std_fixed_newcntr/'

task_label = "MID"
space_label = "space-fsLR"
derivatives_folder = "derivatives/fmriprep"
direction = ['1', '2', 'mean']
stat = ['z','effect']
contrasts_ids= [    
    'Reward-Neutral',
    'Loss-Neutral',
    'LgReward-Neutral',
    'SmallReward-Neutral',
    'LgLoss-Neutral',
    'SmallLoss-Neutral',
    'LgLoss-SmallLoss',
    'LgReward-SmallReward',
    'RewardHit-RewardMiss',
    'LossHit-LossMiss']

In [11]:
items = os.listdir(path)
# Filter out non-folders (directories)
folders = [item for item in items if os.path.isdir(os.path.join(path, item))]


### task contrast

In [7]:
def load_tables(sub, contr, all_effect, all_zscore):
    try:
        # global all_effect
        # global all_zscore
        eff = path + '%s/%s_task-%s_run-mean_contrast-%s_effect_parcelations.tsv' %(sub, sub, task_label, contr)
        # z = path + '%s/%s_task-%s_run-mean_contrast-%s_z_parcelations.tsv' %(sub, sub, task_label, contr)
        if os.path.isfile(eff):
            eff_data = pd.read_csv(eff, sep='\t', skiprows=[2])
            eff_data = eff_data.drop(eff_data.columns[0], axis=1)
            # eff_data.insert(0, 'Subject_ID', sub[4:])
            # z_data = pd.read_csv(z, sep='\t', skiprows=[2])
            # z_data = z_data.drop(z_data.columns[0], axis=1)
            # z_data.insert(0, 'Subject_ID', sub[4:])
            all_effect[sub] = eff_data
            # all_zscore[sub] = z_data
            # all_effect = pd.concat([all_effect, eff_data], axis=0, ignore_index=True)
            # all_zscore = pd.concat([all_zscore,z_data], axis=0, ignore_index=True)
            gc.collect()
    except Exception as e:
        #error.append(sub_label)
        logging.error(traceback.format_exc())

In [8]:
def extract_tables(contr, all_cont_ef, all_cont_z):
    try:
        manager = Manager()
        all_effect = manager.dict()
        all_zscore = manager.dict()
        #pd.DataFrame()
        #pd.DataFrame()
        Parallel(n_jobs=40)(delayed(load_tables)(sub, contr, all_effect, all_zscore) for iter, sub in enumerate(folders))
        all_ef = dict(all_effect)
        # all_z = dict(all_zscore)
        all_ef_df = pd.concat(list(all_ef.values()), axis=0, keys=all_ef.keys())
        # all_z_df = pd.concat(list(all_z.values()), axis=0, keys=all_z.keys())
        all_ef_df.reset_index(level=1, inplace=True)
        # all_z_df.reset_index(level=1, inplace=True)

        # all_ef_df.columns = ['Subject_key'] + list(all_ef_df.columns[1:])
        # all_ef_df.columns = header_cont
        all_ef_df.reset_index(inplace=True)
        # all_z_df.reset_index(inplace=True)

        all_ef_df = all_ef_df.drop(all_ef_df.columns[1], axis=1)
        # all_z_df = all_z_df.drop(all_z_df.columns[1], axis=1)
        all_ef_df.columns = ['Subject_key'] + list(all_ef_df.columns[1:])
        # all_z_df.columns = ['Subject_key'] + list(all_z_df.columns[1:])
        all_ef_df = all_ef_df.set_index('Subject_key').reindex(folders)
        # all_z_df = all_z_df.set_index('Subject_key').reindex(folders)
        all_ef_df.to_csv(output_path + 'task-%s_contrast-%s_stat-effect_desc-ar1_parcelations.csv' %(task_label, contr), index=True)
        # all_z_df.to_csv(output_path + 'task-%s_contrast-%s_stat-z_desc-ar1_parcelations.csv' %(task_label, contr), index=True)
        # all_ef_df.set_index('Subject_key').reindex(folders)
        wide_columns = [f"{col}_{contr}" for col in all_ef_df.columns]
        all_ef_df.columns = wide_columns
        # all_z_df.columns = wide_columns
        all_cont_ef[contr] = all_ef_df
        # all_cont_z[contr] = all_z_df
        gc.collect()
        
    except Exception as e:
        #error.append(sub_label)
        logging.error(traceback.format_exc())


In [12]:
## run parallel

plt.ioff
fmanager = Manager()
all_cont_ef = fmanager.dict()
all_cont_z = fmanager.dict()
Parallel(n_jobs=15)(delayed(extract_tables)(contrast, all_cont_ef, all_cont_z) for iter, contrast in enumerate(contrasts_ids))
# all_ef = dict(all_cont_ef)
# all_z = dict(all_cont_z)
# all_ef_df = pd.concat(all_ef.values(), axis=1)
# all_z_df = pd.concat(all_z.values(), axis=1)

# all_ef_df.to_csv(output_path + 'task-%s_contrast-All_stat-effect_parcelations.csv' %(task_label), index=True)
# all_z_df.to_csv(output_path + 'task-%s_contrast-All_stat-z_parcelations.csv' %(task_label), index=True)

[None, None, None, None, None, None, None, None, None, None]

### task connectivity

In [20]:
def load_tc(sub, all_cen, all_uncen, measure):
    try:
        log = con_path + '%s/%s_task-%s_scrubbing_log.tsv' %(sub, sub, task_label)
        # cen = con_path + '%s/%s_task-%s_run-all_space-fsLR_atlas-Glasser_desc-censored_measure-%s_conmat.tsv' %(sub, sub, task_label, measure)
        uncen = con_path + '%s/%s_task-%s_run-all_space-fsLR_atlas-Glasser_desc-uncensored_measure-%s_conmat.tsv' %(sub, sub, task_label, measure)
        if os.path.isfile(log) and os.path.isfile(uncen):
            log_data = pd.read_csv(log, sep='\t')
            if np.sum(log_data['del vols'][:-1]) < np.sum(log_data['N of vols needed'][:-1]):
                # cen_data = pd.read_csv(cen, sep='\t')
                uncen_data = pd.read_csv(uncen, sep='\t')
                # cen_data = cen_data.drop(cen_data.columns[0], axis=1)
                uncen_data = uncen_data.drop(uncen_data.columns[0], axis=1)
                # cen_data.index = header_list
                uncen_data.index = header_list
                # filtered_cen = pd.DataFrame(nl.signal.clean(cen_data.values, detrend=False, high_pass=0.008, t_r=0.8), columns=cen_data.columns)
                # filtered_uncen = pd.DataFrame(nl.signal.clean(uncen_data.values, detrend=False, high_pass=0.008, t_r=0.8), columns=uncen_data.columns)
                # zconn_mat = np.arctanh(cen_data)
                # flatten:
                ### Transform matrix to string for the subject
                # cen_f = cen_data.where(np.triu(np.ones(cen_data.shape), k=1).astype(bool)).stack().reset_index()
                # indx = [i+'_&_'+j for i,j in zip(cen_f['level_0'], cen_f['level_1'])]
                # flat_cen = pd.Series(np.array(cen_f[0]),index=indx).to_frame().T

                uncen_f = uncen_data.where(np.triu(np.ones(uncen_data.shape), k=1).astype(bool)).stack().reset_index()
                indx = [i+'_&_'+j for i,j in zip(uncen_f['level_0'], uncen_f['level_1'])]
                flat_uncen = pd.Series(np.array(uncen_f[0]),index=indx).to_frame().T
                
                # flat_cen['Subject_key'] = sub[4:] 
                flat_uncen['Subject_key'] = sub[4:] 
                # flat_cen.insert(0, 'Subject_key', flat_cen.pop('Subject_key')) 
                flat_uncen.insert(0, 'Subject_key', flat_uncen.pop('Subject_key')) 
                # all_cen[sub] = flat_cen
                all_uncen[sub] = flat_uncen
        gc.collect()
    except Exception as e:
        logging.error(traceback.format_exc())

In [21]:
# get subs with valid mean :
measures = ['pearsoncorrelation']
tasks = ['nback'] #'nback', 'SST','MID', 'rest'
tasks_folder = ['WM'] #'WM','SST', 'MID', 'rest'
for i, task_label in enumerate(tasks): 
    for measure in measures:
        con_path = '/media/hcs-sci-psy-narun/ABCC/fmriresults01/derivatives/nilearn_glm/tconnectivity_%s/fullscrubbed-despiked/' %tasks_folder[i] # change accordingly
        con_items = os.listdir(con_path)
        print(tasks_folder[i])
        con_folders = [item for item in con_items if os.path.isdir(os.path.join(con_path, item))]
        tcmanager = Manager()
        all_cen = tcmanager.dict()
        all_uncen = tcmanager.dict()
        Parallel(n_jobs=30)(delayed(load_tc)(sub, all_cen, all_uncen, measure) for iter, sub in enumerate(con_folders))
        # all_c = dict(all_cen)
        all_u = dict(all_uncen)
        # all_cend = pd.concat(all_c.values(), axis=0, ignore_index=True)
        all_uncend = pd.concat(all_u.values(), axis=0, ignore_index=True)
        # all_cend.to_csv(output_path + 'task-%s_stat-%s_desc-CensoredFilteredFlat_Connectivity.csv' %(task_label, measure), index=False)
        all_uncend.to_csv(output_path + 'task-%s_stat-%s_desc-UncensoredFilteredFlat_Connectivity.csv' %(task_label, measure), index=False)

        # with open(output_path + 'task-%s_stat-%s_desc-CensoredFilteredFlat_Connectivity'%(task_label, measure), 'wb') as file:
        #     pickle.dump(all_cend, file)
        with open(output_path + 'task-%s_stat-%s_desc-UncensoredFilteredFlat_Connectivity' %(task_label, measure), 'wb') as file:
            pickle.dump(all_uncend, file)
        gc.collect()
    

WM


  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (


In [5]:
# get subs with valid mean GFC:
def load_tc(sub, all_cen, all_uncen, measure):
    try:
        # log = con_path + '%s/%s_task-%s_scrubbing_log.tsv' %(sub, sub, task_label)
        # cen = con_path + '%s/%s_task-%s_run-all_space-fsLR_atlas-Glasser_desc-censored_measure-%s_conmat.tsv' %(sub, sub, task_label, measure)
        uncen = con_path + '%s/%s_space-fsLR_atlas-Glasser_desc-general_measure-%s_conmat.tsv' %(sub, sub, measure)
        if os.path.isfile(uncen):
            # log_data = pd.read_csv(log, sep='\t')
            # if log_data['del vols'][2] < log_data['N of vols needed'][2]:
                # cen_data = pd.read_csv(cen, sep='\t')
            uncen_data = pd.read_csv(uncen, sep='\t')
            # cen_data = cen_data.drop(cen_data.columns[0], axis=1)
            uncen_data = uncen_data.drop(uncen_data.columns[0], axis=1)
            # cen_data.index = header_list
            uncen_data.index = header_list
            # filtered_cen = pd.DataFrame(nl.signal.clean(cen_data.values, detrend=False, high_pass=0.008, t_r=0.8), columns=cen_data.columns)
            # filtered_uncen = pd.DataFrame(nl.signal.clean(uncen_data.values, detrend=False, high_pass=0.008, t_r=0.8), columns=uncen_data.columns)
            # zconn_mat = np.arctanh(cen_data)
            # flatten:
            ### Transform matrix to string for the subject
            # cen_f = cen_data.where(np.triu(np.ones(cen_data.shape), k=1).astype(bool)).stack().reset_index()
            # indx = [i+'_&_'+j for i,j in zip(cen_f['level_0'], cen_f['level_1'])]
            # flat_cen = pd.Series(np.array(cen_f[0]),index=indx).to_frame().T

            uncen_f = uncen_data.where(np.triu(np.ones(uncen_data.shape), k=1).astype(bool)).stack().reset_index()
            indx = [i+'_&_'+j for i,j in zip(uncen_f['level_0'], uncen_f['level_1'])]
            flat_uncen = pd.Series(np.array(uncen_f[0]),index=indx).to_frame().T
            
            # flat_cen['Subject_key'] = sub[4:] 
            flat_uncen['Subject_key'] = sub[4:] 
            # flat_cen.insert(0, 'Subject_key', flat_cen.pop('Subject_key')) 
            flat_uncen.insert(0, 'Subject_key', flat_uncen.pop('Subject_key')) 
            # all_cen[sub] = flat_cen
            all_uncen[sub] = flat_uncen
        gc.collect()
    except Exception as e:
        logging.error(traceback.format_exc())

In [8]:
# get subs with valid mean GFC:
measures = ['pearsoncorrelation']
tasks = ['gfc'] #'tfc'
for i, task_label in enumerate(tasks): 
    for measure in measures:
        con_path = '/media/hcs-sci-psy-narun/ABCC/fmriresults01/derivatives/nilearn_glm/FC-valid/'
        con_items = os.listdir(con_path)
        # print(tasks_folder[i])
        con_folders = [item for item in con_items if os.path.isdir(os.path.join(con_path, item))]
        tcmanager = Manager()
        all_cen = tcmanager.dict()
        all_uncen = tcmanager.dict()
        Parallel(n_jobs=30)(delayed(load_tc)(sub, all_cen, all_uncen, measure) for iter, sub in enumerate(con_folders))
        # all_c = dict(all_cen)
        all_u = dict(all_uncen)
        # all_cend = pd.concat(all_c.values(), axis=0, ignore_index=True)
        all_uncend = pd.concat(all_u.values(), axis=0, ignore_index=True)
        # all_cend.to_csv(output_path + 'task-%s_stat-%s_desc-CensoredFilteredFlat_Connectivity.csv' %(task_label, measure), index=False)
        all_uncend.to_csv(output_path + 'task-%s_stat-%s_desc-UncensoredFilteredFlat_Connectivity.csv' %(task_label, measure), index=False)

        # with open(output_path + 'task-%s_stat-%s_desc-CensoredFilteredFlat_Connectivity'%(task_label, measure), 'wb') as file:
        #     pickle.dump(all_cend, file)
        with open(output_path + 'task-%s_stat-%s_desc-UncensoredFilteredFlat_Connectivity' %(task_label, measure), 'wb') as file:
            pickle.dump(all_uncend, file)
        gc.collect()
    

  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
  from pandas.core import (
