In [1]:
import os
import h5py
import numpy as np
import pandas as pd
from scipy.stats import pearsonr


In [2]:
# get HCP - S900 subject list        
subjlist = '../data/subjectListS900_QC_gr.txt'
f = open(subjlist); mylist = f.read().split("\n"); f.close() 
subjlist = joinedlist = mylist[:-1]

mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


709


### get HCP demographics

In [3]:
HCP_u = pd.read_csv('../data/HCP_solar/HCP_s1200_unrestricted.csv', 
                    index_col = 0)

HCP_u.index = HCP_u.index.map(str)

HCP_r = pd.read_csv('../data/HCP_solar/HCP_s1200_restricted.csv', 
                    index_col = 0)

HCP_r.index = HCP_r.index.map(str)

len(HCP_u.index), len(HCP_r.index)

(1206, 1206)

### get hippocampus-to-cortex functional connectivity (fc) for individual subjects and subfields

### LSUB

In [4]:
# we'll have 360 nodes along the cortex (Glasser vertex number)
tot_node_num_cortex = 360
node_str = []
for i in range(1, tot_node_num_cortex+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '...', node_str[-1]



360


('node_1', '...', 'node_360')

In [15]:
mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

In [21]:
# data dirs
conndir  = '../data/tout_hippoc/' 
odir = '../solar/FC_LSUB/'

# we have 1024 vertices along the subiculum
tot_node_num = 1024

for j in range(0, tot_node_num):

    # generate a subdir like fc_*1*    
    if not os.path.isdir(os.path.join(odir, 'fc_' + str(j+1))):
        os.makedirs(os.path.join(odir, 'fc_' + str(j+1)))
        print('aaa')
        
    fc_name = os.path.join(os.path.join(odir, 'fc_' + str(j+1)), 
                           'fc_lsub_nodes.csv')        
        
    for subjID in subjlist:

        iA = HCP_r.index.get_loc(subjID)
        iB = HCP_u.index.get_loc(subjID)
        iC = df.index.get_loc(subjID)

        df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
        df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

        # get the hippocampus-cortex fc for each subject 
        subjconn_left = os.path.join(conndir, 'HCP_' + subjID + '_left.h5')
        with h5py.File(subjconn_left, "r") as f:        
            subjdata_left = np.array(f['HCP_'+subjID])   # (4096, 360)

        conn = subjdata_left[j,:]    

        df.iloc[iC][node_str] = conn.reshape(len(conn))
    
    # drop NaN's and save
    df = df.dropna()
    df.to_csv(fc_name)

### LCA

In [25]:
# we'll have 360 nodes (Glasser vertex number)
tot_node_num_cortex = 360
node_str = []
for i in range(1, tot_node_num_cortex+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '...', node_str[-1]


360


('node_1', '...', 'node_360')

In [29]:
# data dirs
conndir  = '../data/tout_hippoc/' 
odir = '../solar/FC_LCA/'

# we have 2048 vertices along the CA
tot_node_num = 2048

for j in range(0, tot_node_num):

    # generate a subdir like fc_*1*    
    if not os.path.isdir(os.path.join(odir, 'fc_' + str(j+1))):
        os.makedirs(os.path.join(odir, 'fc_' + str(j+1)))
        print('aaa')
        
    fc_name = os.path.join(os.path.join(odir, 'fc_' + str(j+1)), 
                           'fc_lca_nodes.csv')        
        
    for subjID in subjlist:

        iA = HCP_r.index.get_loc(subjID)
        iB = HCP_u.index.get_loc(subjID)
        iC = df.index.get_loc(subjID)

        df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
        df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

        # get the hippocampus-cortex fc for each subject 
        subjconn_left = os.path.join(conndir, 'HCP_' + subjID + '_left.h5')
        with h5py.File(subjconn_left, "r") as f:        
            subjdata_left = np.array(f['HCP_'+subjID])   # (4096, 360)

        conn = subjdata_left[j+1024,:]    

        df.iloc[iC][node_str] = conn.reshape(len(conn))
    
    # drop NaN's and save
    df = df.dropna()
    df.to_csv(fc_name)

### LDG

In [30]:
# we'll have 360 nodes (Glasser vertex number)
tot_node_num_cortex = 360
node_str = []
for i in range(1, tot_node_num_cortex+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '...', node_str[-1]

360


('node_1', '...', 'node_360')

In [31]:
# data dirs
conndir  = '../data/tout_hippoc/' 
odir = '../solar/FC_LDG/'

# we have 1024 vertices along the DG
tot_node_num = 1024

for j in range(0, tot_node_num):

    # generate a subdir like fc_*1*    
    if not os.path.isdir(os.path.join(odir, 'fc_' + str(j+1))):
        os.makedirs(os.path.join(odir, 'fc_' + str(j+1)))
        print('aaa')
        
    fc_name = os.path.join(os.path.join(odir, 'fc_' + str(j+1)), 
                           'fc_ldg_nodes.csv')        
        
    for subjID in subjlist:

        iA = HCP_r.index.get_loc(subjID)
        iB = HCP_u.index.get_loc(subjID)
        iC = df.index.get_loc(subjID)

        df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
        df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

        # get the hippocampus-to-cortex connectivity for each subject 
        subjconn_left = os.path.join(conndir, 'HCP_' + subjID + '_left.h5')
        with h5py.File(subjconn_left, "r") as f:        
            subjdata_left = np.array(f['HCP_'+subjID])   # (4096, 360)

        conn = subjdata_left[j+1024+2048,:]    

        df.iloc[iC][node_str] = conn.reshape(len(conn))
    
    # drop NaN's and save
    df = df.dropna()
    df.to_csv(fc_name)

### RSUB

In [33]:
# data dirs
conndir  = '../data/tout_hippoc/' 
odir = '../solar/FC_RSUB/'

tot_node_num = 1024

for j in range(0, tot_node_num):

    # generate a subdir like fc_*1*    
    if not os.path.isdir(os.path.join(odir, 'fc_' + str(j+1))):
        os.makedirs(os.path.join(odir, 'fc_' + str(j+1)))
        print('aaa')
        
    fc_name = os.path.join(os.path.join(odir, 'fc_' + str(j+1)), 
                           'fc_rsub_nodes.csv')        
        
    for subjID in subjlist:

        iA = HCP_r.index.get_loc(subjID)
        iB = HCP_u.index.get_loc(subjID)
        iC = df.index.get_loc(subjID)

        df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
        df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

        # get the hippocampus-cortex fc for each subject 
        subjconn_left = os.path.join(conndir, 'HCP_' + subjID + '_right.h5')
        with h5py.File(subjconn_left, "r") as f:        
            subjdata_left = np.array(f['HCP_'+subjID])   # (4096, 360)

        conn = subjdata_left[j,:]    

        df.iloc[iC][node_str] = conn.reshape(len(conn))
    
    # drop NaN's and save
    df = df.dropna()
    df.to_csv(fc_name)

### RCA

In [6]:
# we'll have 360 nodes (Glasser vertex number)
tot_node_num_cortex = 360
node_str = []
for i in range(1, tot_node_num_cortex+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '...', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

360


In [None]:
# data dirs
conndir  = '../data/tout_hippoc/' 
odir = '../solar/FC_RCA/'

tot_node_num = 2048

for j in range(0, tot_node_num):

    # generate a subdir like fc_*1*    
    if not os.path.isdir(os.path.join(odir, 'fc_' + str(j+1))):
        os.makedirs(os.path.join(odir, 'fc_' + str(j+1)))
        print('aaa')
        
    fc_name = os.path.join(os.path.join(odir, 'fc_' + str(j+1)), 
                           'fc_rca_nodes.csv')        
        
    for subjID in subjlist:

        iA = HCP_r.index.get_loc(subjID)
        iB = HCP_u.index.get_loc(subjID)
        iC = df.index.get_loc(subjID)

        df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
        df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

        # get the hippocampus-cortex fc for each subject 
        subjconn_left = os.path.join(conndir, 'HCP_' + subjID + '_right.h5')
        with h5py.File(subjconn_left, "r") as f:        
            subjdata_left = np.array(f['HCP_'+subjID])   # (4096, 360)

        conn = subjdata_left[j+1024,:]    

        df.iloc[iC][node_str] = conn.reshape(len(conn))
    
    # drop NaN's and save
    df = df.dropna()
    df.to_csv(fc_name)

### RDG

In [6]:
# we'll have 360 nodes (Glasser vertex number)
tot_node_num_cortex = 360
node_str = []
for i in range(1, tot_node_num_cortex+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '...', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

360


In [None]:
# data dirs
conndir  = '../data/tout_hippoc/' 
odir = '../solar/FC_RDG/'

tot_node_num = 1024

for j in range(0, tot_node_num):

    # generate a subdir like fc_*1*    
    if not os.path.isdir(os.path.join(odir, 'fc_' + str(j+1))):
        os.makedirs(os.path.join(odir, 'fc_' + str(j+1)))
        print('aaa')
        
    fc_name = os.path.join(os.path.join(odir, 'fc_' + str(j+1)), 
                           'fc_rdg_nodes.csv')        
        
    for subjID in subjlist:

        iA = HCP_r.index.get_loc(subjID)
        iB = HCP_u.index.get_loc(subjID)
        iC = df.index.get_loc(subjID)

        df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
        df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

        # get the hippocampus-to-cortex connectivity for each subject 
        subjconn_left = os.path.join(conndir, 'HCP_' + subjID + '_right.h5')
        with h5py.File(subjconn_left, "r") as f:        
            subjdata_left = np.array(f['HCP_'+subjID])   # (4096, 360)

        conn = subjdata_left[j+1024+2048,:]    

        df.iloc[iC][node_str] = conn.reshape(len(conn))
    
    # drop NaN's and save
    df = df.dropna()
    df.to_csv(fc_name)