In [1]:
import os
import h5py
import numpy as np
import pandas as pd
from scipy.io import loadmat
from scipy.stats import pearsonr

### get the HCP demographics

In [2]:
HCP_u = pd.read_csv('../data/HCP_solar/HCP_s1200_unrestricted.csv', 
                    index_col = 0)

HCP_u.index = HCP_u.index.map(str)

HCP_r = pd.read_csv('../data/HCP_solar/HCP_s1200_restricted.csv', 
                    index_col = 0)

HCP_r.index = HCP_r.index.map(str)


In [3]:
len(HCP_u), len(HCP_r)

(1206, 1206)

### combine S900 Gradients with HCP demogr.

In [4]:
# get HCP - S900 subject list        
subjlist = '../data/subjectListS900_QC_gr.txt'
f = open(subjlist); mylist = f.read().split("\n"); f.close() 
subjlist = joinedlist = mylist[:-1]

len(subjlist)

709

### G1, LSUB

In [5]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))

tot_node_num_lsub = 1024 # subiculum has 1024 vertices
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [6]:
gdir = '../data/tout_hippoc_grad_flipped_msm50'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    gfile_LSUB = h5py.File(os.path.join(gdir, 'HCP_' + subjID + '_G1_LSUB.h5'), 'r')
    g1_LSUB = np.array(gfile_LSUB[subjID])  
    gfile_LSUB.close()

    df.iloc[iC][node_str] = g1_LSUB


In [7]:
print(len(df))
df = df.dropna()
df.to_csv('../solar/msm50_G1_LSUB/G1_LSUB.csv')

709


### G1, LCA

In [8]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))

tot_node_num_lca = 2048 ## CA has 2048 vertices
node_str = []
for i in range(1, tot_node_num_lca+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str
len(mycols)

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
2048


In [9]:
gdir = '../data/tout_hippoc_grad_flipped_msm50'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    gfile_LCA = h5py.File(os.path.join(gdir, 'HCP_' + subjID + '_G1_LCA.h5'), 'r')
    g1_LCA = np.array(gfile_LCA[subjID])  
    gfile_LCA.close()
    
    df.iloc[iC][node_str] = g1_LCA


In [10]:
print(len(df))
df = df.dropna()
df.to_csv('../solar/msm50_G1_LCA/G1_LCA.csv')

709


### G1, LDG

In [11]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))

tot_node_num_ldg = 1024 ## DG has 1024 points 
node_str = []
for i in range(1, tot_node_num_ldg+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 
len(mycols)

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [12]:
gdir = '../data/tout_hippoc_grad_flipped_msm50'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    gfile_LDG = h5py.File(os.path.join(gdir, 'HCP_' + subjID + '_G1_LDG.h5'), 'r')
    g1_LDG = np.array(gfile_LDG[subjID])  
    gfile_LDG.close()
    
    df.iloc[iC][node_str] = g1_LDG

    


In [13]:
print(len(df))
df = df.dropna()
df.to_csv('../solar/msm50_G1_LDG/G1_LDG.csv')

709


### G2, LSUB

In [14]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))

tot_node_num_lsub = 1024 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [15]:
gdir = '../data/tout_hippoc_grad_flipped_msm50'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    gfile_LSUB = h5py.File(os.path.join(gdir, 'HCP_' + subjID + '_G2_LSUB.h5'), 'r')
    g2_LSUB = np.array(gfile_LSUB[subjID])  
    gfile_LSUB.close()

    df.iloc[iC][node_str] = g2_LSUB


In [16]:
print(len(df))
df = df.dropna()
df.to_csv('../solar/msm50_G2_LSUB/G2_LSUB.csv')

709


### G2, LCA

In [17]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))

tot_node_num_lca = 2048 ## because ca has 2048 points 
node_str = []
for i in range(1, tot_node_num_lca+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
2048


In [18]:
gdir = '../data/tout_hippoc_grad_flipped_msm50'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    gfile_LCA = h5py.File(os.path.join(gdir, 'HCP_' + subjID + '_G2_LCA.h5'), 'r')
    g2_LCA = np.array(gfile_LCA[subjID])  
    gfile_LCA.close()
    
    df.iloc[iC][node_str] = g2_LCA


In [19]:
print(len(df))
df = df.dropna()
df.to_csv('../solar/msm50_G2_LCA/G2_LCA.csv')

709


### G2, LDG

In [20]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))

tot_node_num_ldg = 1024 
node_str = []
for i in range(1, tot_node_num_ldg+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 
len(mycols)

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [21]:
gdir = '../data/tout_hippoc_grad_flipped_msm50'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    gfile_LDG = h5py.File(os.path.join(gdir, 'HCP_' + subjID + '_G2_LDG.h5'), 'r')
    g2_LDG = np.array(gfile_LDG[subjID])  
    gfile_LDG.close()
    
    df.iloc[iC][node_str] = g2_LDG

    


In [22]:
print(len(df))
df = df.dropna()
df.to_csv('../solar/msm50_G2_LDG/G2_LDG.csv')

709
