In [1]:
import pandas as pd
import numpy as np
import os
import glob
import seaborn as sns
import nibabel as nib
import nilearn.plotting as plotting
import matplotlib.pyplot as plt
%matplotlib inline
import hcp_utils as hcp

pixdim[1,2,3] should be non-zero; setting 0 dims to 1


##### Path var

In [2]:
source_dir = '/media/hcs-psy-narun/HCP-YA-MICA-BIDS/derivatives/xcp_d-0.4.0rc2+16/xcp_d/'

In [3]:
os.makedirs('/media/hcs-psy-narun/HCP-YA_new_rest_connectivity/signals')

In [4]:
out_dir = '/media/hcs-psy-narun/HCP-YA_new_rest_connectivity/signals/'

In [5]:
out_tab = '/media/hcs-psy-narun/HCP-YA_new_rest_connectivity/'

##### Load data

In [6]:
labels_reord = np.append(pd.Series(hcp.mmp.labels)[1:].values[180:-19], pd.Series(hcp.mmp.labels)[1:].values[0:180])
labels_reord = np.append(labels_reord, sorted(pd.Series(hcp.mmp.labels)[1:].values[-19:]))

In [7]:
#read subject IDs
subIDs = [i.split('/')[-2].split('-')[-1] for i in (sorted(glob.glob(source_dir+'sub-*/')))]

In [8]:
main_ids = np.array(pd.read_csv('/media/hcs-psy-narun/Alina/HCP_YA_id_iq.csv', header=None).values.ravel(), dtype=str)

In [9]:
len(main_ids)

873

##### Read cifti files, extract signals and save them as a files

In [31]:
#dct_signls = {}

for subID in subIDs:
    
    #dct_signls[subID] = {}
    
    os.mkdir(out_dir +subID)
    path_out = out_dir+str(subID)+'/'
    
    files = sorted(glob.glob(source_dir+'sub-'+subID+'/func/'+'*rest_dir-*'+'denoised_bold.dtseries.nii'))
    
    for file in files:
        
        
        img = nib.load(file) #load file
        X = img.get_fdata() #load series to nibabel
        Xn = hcp.normalize(X) #normalize series
        Xp = hcp.parcellate(Xn, hcp.mmp) #apply Glasser parcellation
        Xdf = pd.DataFrame(Xp, columns=pd.Series(hcp.mmp.labels)[1:].values)  #extract timeseries into table
        Xdfr = Xdf.reindex(columns = labels_reord) #reorder index as in our previous data
        Xdfr = Xdfr.iloc[:,:-19] #replace subcortex from glasser with the same subcortex got with another command
        subcortex = {}
        for subc in list(hcp.struct.keys())[4:]:
            txt = 'hcp.struct.'+str(subc)
            subcortex[subc] = pd.DataFrame(Xn[:, eval(txt)]).T.mean().values
        sbcr =   pd.DataFrame(subcortex)  

        Xdfr1 = pd.concat([Xdfr, sbcr], axis=1)
        
        
        
        Xdfr1.to_csv(path_out+str(subID)+'_'+file.split('/')[-1].split('_')[1].split('-')[1]+'_'+file.split('/')[-1].split('_')[3].replace('-','')+'_'+file.split('/')[-1].split('_')[2].split('-')[1]+'.csv')
        
        #dct_signls[subID][file.split('/')[-1].split('_')[1].split('-')[1]+'_'+file.split('/')[-1].split('_')[3].replace('-','')+'_'+file.split('/')[-1].split('_')[2].split('-')[1]] = Xdfr1
        

  return (X - np.mean(X,axis=0))/np.std(X,axis=0)


##### Load saved files into dictionary

In [11]:
dct_signls = {}

for subID in subIDs:
    
    dct_signls[subID] = {}
    
    files = sorted(glob.glob(out_dir+subID+'/'+'*'+'.csv'))
    
    for file in files:
        
        dct_signls[subID][('_').join(file.split('/')[-1].split('.')[0].split('_')[2:])] = pd.read_csv(file, index_col=0)

In [None]:
#check the length
len(dct_signls.keys())

In [13]:
#check the number of runs(files)
len(dct_signls['100206'].keys())

4

In [14]:
#check for missing files

for key in dct_signls.keys():
    if len(dct_signls[key].keys()) !=4:
        print(key)
        print(dct_signls[key].keys())

##### Concat 4 files into 1 signal table

In [17]:
for key in dct_signls.keys():
    tbls = sorted(dct_signls[key].keys())
    df_ = dct_signls[key][tbls[0]]
    for nm in tbls[1:]:
        df_ = pd.concat([df_, dct_signls[key][nm]], axis=0, ignore_index=True)
    dct_signls[key] = df_

##### calculate the correlation matrix, flatten them into one row

In [19]:
#calculate correlation matrix, flatten them into one row


dct_mat_str = {}

for subID in sorted(dct_signls.keys()):
    
    df = dct_signls[subID]

    Xdfr_cor = df.corr() #create correlational matrix
    Xdfr_corZ = np.arctanh(Xdfr_cor) #r-to-z
    #transform matrix to string for the subject
    dfg = Xdfr_corZ.where(np.triu(np.ones(Xdfr_corZ.shape), k=1).astype(bool)).stack().reset_index()
    indx = [i+'_&_'+j for i,j in zip(dfg['level_0'], dfg['level_1'])]
    fc_string = pd.Series(np.array(dfg[0]),index=indx) #a vector-like data for upper part of cor.matrix


    dct_mat_str[subID] = fc_string
            
df_rest_ = pd.DataFrame(dct_mat_str)        

In [20]:
df_rest_.T

Unnamed: 0,R_V1_&_R_MST,R_V1_&_R_V6,R_V1_&_R_V2,R_V1_&_R_V3,R_V1_&_R_V4,R_V1_&_R_V8,R_V1_&_R_4,R_V1_&_R_3b,R_V1_&_R_FEF,R_V1_&_R_PEF,...,pallidum_right_&_putamen_left,pallidum_right_&_putamen_right,pallidum_right_&_thalamus_left,pallidum_right_&_thalamus_right,putamen_left_&_putamen_right,putamen_left_&_thalamus_left,putamen_left_&_thalamus_right,putamen_right_&_thalamus_left,putamen_right_&_thalamus_right,thalamus_left_&_thalamus_right
100206,0.226010,0.423218,0.743392,0.673107,0.605246,0.319110,0.158588,0.306271,0.028238,0.126797,...,0.135037,0.197415,0.101042,0.149911,0.664261,0.234388,0.224559,0.233620,0.331742,0.281420
100307,0.222151,0.567189,0.992299,0.650762,0.386657,0.425053,0.046241,0.113939,0.332872,0.198960,...,0.045592,0.075001,0.013923,-0.006053,0.282734,0.133110,0.136262,0.075486,0.116133,0.145946
100408,0.195462,0.346968,0.865607,0.661944,0.494127,0.400729,0.163101,0.335636,-0.029049,0.096574,...,0.063044,0.103815,0.057084,0.068172,0.298321,0.119987,0.150077,0.097859,0.118715,0.249539
100610,0.424294,0.491327,0.878055,0.869785,0.783447,0.548097,0.504140,0.444348,0.428329,0.340649,...,0.154307,0.192001,0.128225,0.205036,0.492174,0.278152,0.312114,0.283278,0.311848,0.447222
101006,0.521876,0.783721,1.323310,1.092694,1.032858,0.583760,0.490503,0.503347,0.520833,0.241291,...,0.047484,0.099776,0.029364,0.079642,0.390709,0.235737,0.196369,0.169119,0.224477,0.239275
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
991267,0.437975,0.678543,0.762799,0.684585,0.704614,0.543893,0.277274,0.285670,0.403463,0.324135,...,0.071707,0.130467,0.096064,0.141353,0.511244,0.274185,0.277654,0.271591,0.349251,0.359506
992673,0.542572,0.937431,1.318759,1.223323,1.025598,0.774298,0.613749,0.621932,0.771166,0.495030,...,0.152160,0.212275,0.101218,0.206550,0.432399,0.309536,0.273022,0.264935,0.327751,0.361556
992774,0.426191,0.746183,1.282801,0.885269,0.725445,0.580296,0.564937,0.598338,0.453818,0.229529,...,0.022818,0.085168,0.048895,0.076919,0.370840,0.051342,0.096533,0.044783,0.099912,0.292296
993675,0.373106,0.500096,1.287350,1.070962,0.827959,0.520119,0.694412,0.577707,0.653493,0.419444,...,0.054520,0.099572,0.081775,0.068575,0.401944,0.301112,0.300698,0.305963,0.331518,0.376244


##### Save

In [21]:
df_rest_.T.to_csv(out_tab+'Rest_FC_group_z_full.csv')