In [6]:
import pandas as pd
import numpy as np
import os
import glob
import seaborn as sns
import nibabel as nib
import nilearn.plotting as plotting
import matplotlib.pyplot as plt
%matplotlib inline
import hcp_utils as hcp

##### Path var

In [7]:
source_dir = '/media/hcs-psy-narun/HCP-YA-retest-BIDS/derivatives/xcp_d-0.4.0rc2+16/xcp_d/'

In [9]:
os.makedirs('/media/hcs-psy-narun/HCP-YA_new_rest_RETEST_connectivity/signals')

In [10]:
out_dir = '/media/hcs-psy-narun/HCP-YA_new_rest_RETEST_connectivity/signals/'

In [11]:
out_tab = '/media/hcs-psy-narun/HCP-YA_new_rest_RETEST_connectivity/'

##### Load data

In [12]:
labels_reord = np.append(pd.Series(hcp.mmp.labels)[1:].values[180:-19], pd.Series(hcp.mmp.labels)[1:].values[0:180])
labels_reord = np.append(labels_reord, sorted(pd.Series(hcp.mmp.labels)[1:].values[-19:]))

In [13]:
#read subject IDs
subIDs = [i.split('/')[-2].split('-')[-1] for i in (sorted(glob.glob(source_dir+'sub-*/')))]

In [14]:
main_ids = np.array(pd.read_csv('/media/hcs-psy-narun/Alina/HCP_YA_retest_id.csv', header=None).values.ravel(), dtype=str)

In [15]:
len(main_ids)

34

##### Read cifti files, extract signals and save them as a files

In [16]:
#dct_signls = {}

for subID in main_ids:
    
    #dct_signls[subID] = {}
    
    os.mkdir(out_dir +subID)
    path_out = out_dir+str(subID)+'/'
    
    files = sorted(glob.glob(source_dir+'sub-'+subID+'/func/'+'*rest_dir-*'+'denoised_bold.dtseries.nii'))
    
    for file in files:
        
        
        img = nib.load(file) #load file
        X = img.get_fdata() #load series to nibabel
        Xn = hcp.normalize(X) #normalize series
        Xp = hcp.parcellate(Xn, hcp.mmp) #apply Glasser parcellation
        Xdf = pd.DataFrame(Xp, columns=pd.Series(hcp.mmp.labels)[1:].values)  #extract timeseries into table
        Xdfr = Xdf.reindex(columns = labels_reord) #reorder index as in our previous data
        Xdfr = Xdfr.iloc[:,:-19] #replace subcortex from glasser with the same subcortex got with another command
        subcortex = {}
        for subc in list(hcp.struct.keys())[4:]:
            txt = 'hcp.struct.'+str(subc)
            subcortex[subc] = pd.DataFrame(Xn[:, eval(txt)]).T.mean().values
        sbcr =   pd.DataFrame(subcortex)  

        Xdfr1 = pd.concat([Xdfr, sbcr], axis=1)
        
        
        
        Xdfr1.to_csv(path_out+str(subID)+'_'+file.split('/')[-1].split('_')[1].split('-')[1]+'_'+file.split('/')[-1].split('_')[3].replace('-','')+'_'+file.split('/')[-1].split('_')[2].split('-')[1]+'.csv')
        
        #dct_signls[subID][file.split('/')[-1].split('_')[1].split('-')[1]+'_'+file.split('/')[-1].split('_')[3].replace('-','')+'_'+file.split('/')[-1].split('_')[2].split('-')[1]] = Xdfr1
        

  return (X - np.mean(X,axis=0))/np.std(X,axis=0)


##### Load saved files into dictionary

In [22]:
dct_signls = {}

for subID in main_ids:
    
    dct_signls[subID] = {}
    
    files = sorted(glob.glob(out_dir+subID+'/'+'*'+'.csv'))
    
    for file in files:
        
        dct_signls[subID][('_').join(file.split('/')[-1].split('.')[0].split('_')[2:])] = pd.read_csv(file, index_col=0)

In [23]:
#check the length
len(dct_signls.keys())

34

In [24]:
#check the number of runs(files)
len(dct_signls['103818'].keys())

4

In [25]:
#check for missing files

for key in dct_signls.keys():
    if len(dct_signls[key].keys()) !=4:
        print(key)
        print(dct_signls[key].keys())

##### Concat 4 files into 1 signal table

In [28]:
for key in dct_signls.keys():
    tbls = sorted(dct_signls[key].keys())
    df_ = dct_signls[key][tbls[0]]
    for nm in tbls[1:]:
        df_ = pd.concat([df_, dct_signls[key][nm]], axis=0, ignore_index=True)
    dct_signls[key] = df_

##### calculate the correlation matrix, flatten them into one row

In [30]:
#calculate correlation matrix for each task, flatten them into one row


dct_mat_str = {}

for subID in sorted(dct_signls.keys()):
    
    df = dct_signls[subID]

    Xdfr_cor = df.corr() #create correlational matrix
    Xdfr_corZ = np.arctanh(Xdfr_cor) #r-to-z
    #transform matrix to string for the subject
    dfg = Xdfr_corZ.where(np.triu(np.ones(Xdfr_corZ.shape), k=1).astype(bool)).stack().reset_index()
    indx = [i+'_&_'+j for i,j in zip(dfg['level_0'], dfg['level_1'])]
    fc_string = pd.Series(np.array(dfg[0]),index=indx) #a vector-like data for upper part of cor.matrix


    dct_mat_str[subID] = fc_string
            
df_rest_ = pd.DataFrame(dct_mat_str)        

In [31]:
df_rest_.T

Unnamed: 0,R_V1_&_R_MST,R_V1_&_R_V6,R_V1_&_R_V2,R_V1_&_R_V3,R_V1_&_R_V4,R_V1_&_R_V8,R_V1_&_R_4,R_V1_&_R_3b,R_V1_&_R_FEF,R_V1_&_R_PEF,...,pallidum_right_&_putamen_left,pallidum_right_&_putamen_right,pallidum_right_&_thalamus_left,pallidum_right_&_thalamus_right,putamen_left_&_putamen_right,putamen_left_&_thalamus_left,putamen_left_&_thalamus_right,putamen_right_&_thalamus_left,putamen_right_&_thalamus_right,thalamus_left_&_thalamus_right
103818,0.318335,0.371497,0.977259,0.765622,0.609599,0.388311,0.245236,0.371716,0.002455,0.053507,...,0.111112,0.165683,0.084324,0.090713,0.547254,0.279958,0.270655,0.224086,0.269526,0.491691
105923,0.306039,0.66593,1.153015,0.9095,0.718358,0.40586,0.026101,0.083973,0.309478,0.095295,...,0.061612,0.09312,0.07926,0.065281,0.407841,0.197837,0.160214,0.187853,0.212835,0.395685
114823,0.178502,0.599634,0.929645,0.841252,0.695489,0.42096,-0.098291,0.094652,0.234786,0.22181,...,0.041944,0.094836,0.004115,0.031112,0.40682,0.236082,0.22188,0.190647,0.205004,0.445037
115320,0.243321,0.664069,0.938495,0.824216,0.640275,0.436011,0.342439,0.354407,0.432571,0.301141,...,0.076011,0.135443,0.035382,0.056853,0.296924,0.176611,0.146843,0.159915,0.12414,0.214738
122317,0.184334,0.572013,0.858446,0.440449,0.518291,0.539749,0.229217,0.295575,0.087706,-0.025521,...,0.095881,0.095007,0.055365,0.047611,0.422293,0.21157,0.216656,0.230774,0.25995,0.547424
125525,0.223584,0.646363,1.283646,0.87486,0.684737,0.455249,0.521108,0.582647,0.342744,0.088994,...,0.054583,0.117353,0.059908,0.05215,0.288651,0.177789,0.138547,0.1147,0.170712,0.267332
135528,-0.087057,0.211434,0.682886,0.72552,0.606852,0.259708,0.017415,0.178495,0.286403,0.093781,...,0.081076,0.1216,0.08298,0.099322,0.279148,0.317547,0.268785,0.276295,0.296032,0.363564
137128,0.099656,0.115489,0.626239,0.560205,0.592218,0.273514,-0.018505,0.108484,0.088575,0.097903,...,0.069603,0.113414,0.040775,0.051924,0.357561,0.144312,0.152249,0.158621,0.182366,0.322391
139839,0.242122,0.701285,1.061345,1.018767,0.664719,0.436309,0.244493,0.202698,0.398263,0.121739,...,0.093094,0.126892,0.10056,0.058611,0.450047,0.251213,0.237952,0.228533,0.236047,0.228301
143325,0.280068,0.48503,0.950883,0.796806,0.566182,0.529687,0.423096,0.44431,0.080168,0.189402,...,0.114764,0.133628,0.070051,0.088134,0.494495,0.291612,0.279678,0.231064,0.252037,0.381451


##### Save

In [32]:
df_rest_.T.to_csv(out_tab+'Rest_FC_retest_group_z_full.csv')