In [1]:
import os
import h5py
import numpy as np
import pandas as pd
from scipy.io import loadmat
from scipy.stats import pearsonr
import nibabel as nb
import seaborn as sns

### get subject id's

In [2]:
# get HCP - S900 subject list        
subjlist = '../data/subjectListS900_QC_gr.txt'
f = open(subjlist); mylist = f.read().split("\n"); f.close() 
subjlist = joinedlist = mylist[:-1]

len(subjlist)

mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


709


# prepare for solar, t1w-t2w ratios

### get the HCP demographics

In [3]:
HCP_u = pd.read_csv('../data/HCP_solar/HCP_s1200_unrestricted.csv', 
                    index_col = 0)

HCP_u.index = HCP_u.index.map(str)

HCP_r = pd.read_csv('../data/HCP_solar/HCP_s1200_restricted.csv', 
                    index_col = 0)

HCP_r.index = HCP_r.index.map(str)


In [4]:
len(HCP_u), len(HCP_r)

(1206, 1206)

In [5]:
HCP_r.index

Index(['100004', '100206', '100307', '100408', '100610', '101006', '101107',
       '101208', '101309', '101410',
       ...
       '987983', '989987', '990366', '991267', '992673', '992774', '993675',
       '994273', '995174', '996782'],
      dtype='object', name='Subject', length=1206)

# combine t1t2-ratios with HCP demogr.

## Left hemisphere

### LSUB

In [6]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 ## because subiculum has 1024 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

709
1024


('node_1', '....', 'node_1024')

In [7]:
mycols = ['age', 'sex'] + node_str 

In [8]:
df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

In [9]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_sub_left.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])


In [10]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_msm50_t1t2_lsub/t1t2_lsub.csv')

709


### LCA

In [11]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 2048 ## because CA has 2048 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
2048


In [12]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_ca_left.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])


In [13]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_msm50_t1t2_lca/t1t2_lca.csv')

709


### LDG

In [14]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 ## because CA has 2048 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [15]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_dg_left.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])


In [16]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_msm50_t1t2_ldg/t1t2_ldg.csv')

709


## Right Hemisphere

### RSUB

In [17]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [18]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_sub_right.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])


In [19]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_msm50_t1t2_rsub/t1t2_rsub.csv')

709


### RCA

In [20]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 2048
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
2048


In [21]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_ca_right.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])


In [22]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_msm50_t1t2_rca/t1t2_rca.csv')

709


### RDG

In [23]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [24]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_dg_right.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])


In [25]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_msm50_t1t2_rdg/t1t2_rdg.csv')

709


# controlling for the mean T1w/T1w...

### LSUB

In [26]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 ## because subiculum has 1024 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

709
1024


('node_1', '....', 'node_1024')

In [27]:
mycols = ['age', 'sex', 'meanW'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

In [28]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_sub_left.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])

    # meant t1w/t2w ratios    
    df.iloc[iC]['meanW'] = df.iloc[iC][node_str].mean()
    

In [29]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_mean_msm50_t1t2_lsub/t1t2_lsub.csv')

709


### LCA

In [30]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 2048 ## because CA has 2048 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex', 'meanW'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
2048


In [31]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_ca_left.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])

    df.iloc[iC]['meanW'] = df.iloc[iC][node_str].mean()

In [32]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_mean_msm50_t1t2_lca/t1t2_lca.csv')

709


### LDG

In [33]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 ## because CA has 2048 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex', 'meanW'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [34]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_dg_left.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])
    df.iloc[iC]['meanW'] = df.iloc[iC][node_str].mean()

In [35]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_mean_msm50_t1t2_ldg/t1t2_ldg.csv')

709


### RSUB 

In [36]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 ## because subiculum has 1024 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]


709
1024


('node_1', '....', 'node_1024')

In [37]:
mycols = ['age', 'sex', 'meanW'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

In [38]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_sub_right.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])
    df.iloc[iC]['meanW'] = df.iloc[iC][node_str].mean()
    
    

In [39]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_mean_msm50_t1t2_rsub/t1t2_rsub.csv')

709


### RCA

In [40]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 2048 ## because CA has 2048 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex', 'meanW'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
2048


In [41]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_ca_right.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])
    df.iloc[iC]['meanW'] = df.iloc[iC][node_str].mean()

In [42]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_mean_msm50_t1t2_rca/t1t2_rca.csv')

709


### RDG

In [43]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))


tot_node_num_lsub = 1024 ## because CA has 2048 points 
node_str = []
for i in range(1, tot_node_num_lsub+1):
    node_str.append('node_'+ str(i))
print(len(node_str))
node_str[0], '....', node_str[-1]

mycols = ['age', 'sex', 'meanW'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709
1024


In [44]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    vol2hipp  = os.path.join(ddir, 'HCP_%s_t1t2_dg_right.h5' % (subjID))

    h  = h5py.File(vol2hipp, 'r')
    
    df.iloc[iC][node_str] = np.array(h[subjID])
    df.iloc[iC]['meanW'] = df.iloc[iC][node_str].mean()

In [45]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/solar_mean_msm50_t1t2_rdg/t1t2_rdg.csv')

709


### check heritability of T1w/T2w and G2 (fc) correlations

### LSUB

In [46]:
mysubjects = []
for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))
node_str = ['node_1']
mycols = ['age', 'sex'] + node_str 
df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

709


In [47]:
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    tdir = '../data/tout_T1wT2w_msm50/'


    vol2hipp_LSUB  = os.path.join(tdir, 'HCP_%s_t1t2_sub_left.h5' % (subjID))
    h_LSUB   = h5py.File(vol2hipp_LSUB, 'r')
    t_LSUB   = np.array(h_LSUB[subjID])

    gdir = '../data/tout_hippoc_grad_flipped_msm50/'
    

    gfile_LSUB = h5py.File(os.path.join(gdir, 'HCP_' + subjID + '_G2_LSUB.h5'), 'r')
    g2_LSUB = np.array(gfile_LSUB[subjID])  
    gfile_LSUB.close()
    
    df.iloc[iC][node_str] = pearsonr(np.log(t_LSUB), g2_LSUB)[0]


In [48]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/r_t1t2_G2_LSUB/r_g2_lsub.csv')

709


## LCA

In [49]:
mysubjects = []

for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))

node_str = ['node_1']

mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'

ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    tdir = '../data/tout_T1wT2w_msm50/'


    vol2hipp_LCA  = os.path.join(tdir, 'HCP_%s_t1t2_ca_left.h5' % (subjID))
    h_LCA   = h5py.File(vol2hipp_LCA, 'r')
    t_LCA   = np.array(h_LCA[subjID])

    gdir = '../data/tout_hippoc_grad_flipped_msm50/'
    

    gfile_LCA = h5py.File(os.path.join(gdir, 'HCP_' + subjID + '_G2_LCA.h5'), 'r')
    g2_LCA = np.array(gfile_LCA[subjID])  
    gfile_LCA.close()
    
    
    df.iloc[iC][node_str] = pearsonr(np.log(t_LCA), g2_LCA)[0]
    

709


In [50]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/r_t1t2_G2_LCA/r_g2_lca.csv')

709


## LDG

In [51]:
mysubjects = []
for subj in subjlist:
    mysubjects.append(subj)
print(len(mysubjects))

node_str = ['node_1']
mycols = ['age', 'sex'] + node_str 

df = pd.DataFrame(index = mysubjects,
                 columns = mycols)
df.index.name = 'id'
ddir = '../data/tout_T1wT2w_msm50/'

for subjID in mysubjects:
    
    iA = HCP_r.index.get_loc(subjID)
    iB = HCP_u.index.get_loc(subjID)
    iC = df.index.get_loc(subjID)

    df.iloc[iC]['age'] = HCP_r.iloc[iA]['Age_in_Yrs']
    df.iloc[iC]['sex'] = HCP_u.iloc[iB]['Gender']

    tdir = '../data/tout_T1wT2w_msm50/'


    vol2hipp_LDG  = os.path.join(tdir, 'HCP_%s_t1t2_dg_left.h5' % (subjID))
    h_LDG   = h5py.File(vol2hipp_LDG, 'r')
    t_LDG   = np.array(h_LDG[subjID])

    gdir = '../data/tout_hippoc_grad_flipped_msm50/'
    

    gfile_LDG = h5py.File(os.path.join(gdir, 'HCP_' + subjID + '_G2_LDG.h5'), 'r')
    g2_LDG = np.array(gfile_LDG[subjID])  
    gfile_LDG.close()
    
    
    df.iloc[iC][node_str] = pearsonr(np.log(t_LDG), g2_LDG)[0]
    

709


In [52]:
df = df.dropna()
print(len(df))
#df.to_csv('../solar/r_t1t2_G2_LDG/r_g2_ldg.csv')

709
