In [1]:
import os
import pandas as pd
import numpy as np
import glob

In [2]:
path='/media/DataD800/Alina/retest_set/'

In [3]:
# indexes for tables from different atlases (Glasser atlas, Destrieux atlas (freesurfer), subcortex names)
index_gla = np.loadtxt('/media/DataD800/Alina/atlases/gla_index.txt', dtype=str)
index_subc = np.loadtxt('/media/DataD800/Alina/atlases/fs_index_subc.txt', dtype=str)

### 1. Assemble txt file to csv table for tasks

In [4]:
tasks = sorted([elem for elem in os.listdir(path)  if 'analysis_s2' in elem ])

In [5]:
#Create tables with Glasser parcellation and subcortical parcellation and save it to task folder
for task in tasks:
    #read subj names, they may differ from folder to folder, so read each time
    subjects = [nm[0:6] for nm in sorted(os.listdir(path+str(task)+'/analysis/cort_MSMAll_av_Glasser_txt'))]
    #Glasser atlas
    dct_gla={}
    for subject in subjects:
        dct_gla[subject] = np.loadtxt(path+str(task)+'/analysis/cort_MSMAll_av_Glasser_txt/'+str(subject)+'.MSMAll.aver_parc.txt', dtype=float)
    df_gla= pd.DataFrame(dct_gla, index_gla).T
    df_gla.to_csv(path+str(task)+'/'+str(task.split('_')[2])+'_table_Glasser.csv')    
    #FS subcortex
    dct_subc={}
    subjects = [nm[0:6] for nm in sorted(os.listdir(path+str(task)+'/analysis/subc_FS_MSMAll_txts'))]
    for subject in subjects:
        vec=[]
        for file in index_subc:
            vec+= [np.loadtxt(path+str(task)+'/analysis/subc_FS_MSMAll_txts/'+str(subject)+'/'+str(subject)+'.FS_subcort.'+str(file)+'.mean.txt')]
        dct_subc[subject]=pd.Series(np.array(vec))
    df_subc = pd.DataFrame(dct_subc)
    df_subc.index= index_subc
    df_subc = df_subc.T
    df_subc.to_csv(path+str(task)+'/'+str(task.split('_')[2])+'_table_FSsubcort.csv')
    df_tabl = pd.concat([df_gla,df_subc], axis=1)
    df_tabl.to_csv(path+str(task)+'/'+str(task.split('_')[2])+'_table_Glasser_FSsubcort.csv')

### 2. Assemble txt file to csv table for rest

In [6]:
path_rest = path+'3T_rfMRI_REST_fix/'

In [7]:
scans = sorted(set([nm.split('.')[0] for nm in os.listdir(path_rest+'analysis')]))

In [9]:
for scan in scans:
    os.mkdir(path_rest+'analysis/'+str(scan)+'.ind_tables_timecourse')
    path_out = path_rest+'analysis/'+str(scan)+'.ind_tables_timecourse'
    subjects = [nm[0:6] for nm in sorted(os.listdir(path_rest+'analysis/'+str(scan)+'.cort_MSMAll_av_Glasser_txt'))]
    for subject in subjects:

        df_gla = pd.read_csv(path_rest+'analysis/'+str(scan)+'.cort_MSMAll_av_Glasser_txt/'+str(subject)+'.MSMAll.aver_parc.txt',
                   sep='\t', header=None)
        df_gla.index = index_gla
        df_gla.to_csv(path_out+'/'+str(subject)+'.Glasser_timecouse.csv')

    subjects = [nm[0:6] for nm in sorted(os.listdir(path_rest+'analysis/'+str(scan)+'.subc_FS_MSMAll_txts'))]
    for subject in subjects:
        dct_ssubc={}
        for file in index_subc:
            dct_ssubc[file]= np.loadtxt(path_rest+'analysis/'+str(scan)+'.subc_FS_MSMAll_txts/'+str(subject)+'/'+str(subject)+'.FS_subcort.'+str(file)+'.mean.txt', dtype=float)
        df_ssubc = pd.DataFrame(dct_ssubc).T
        df_ssubc.to_csv(path_out+'/'+str(subject)+'.FSsubcort_timecouse.csv')

        df_1 = pd.read_csv(path_out+'/'+str(subject)+'.Glasser_timecouse.csv', index_col=0)
        df_2 = pd.read_csv(path_out+'/'+str(subject)+'.FSsubcort_timecouse.csv', index_col=0)
        df_tabl = pd.concat([df_1, df_2], axis=0)
        #display(df_tabl)
        df_tabl.to_csv(path_out+'/'+str(subject)+'.Glasser_FSsubcort_timecouse.csv')    

In [10]:
os.mkdir(path_rest+'analysis/'+str(scans[0])+'.ALL_scans_timecourses')
path_out = path_rest+'analysis/'+str(scans[0])+'.ALL_scans_timecourses'
os.mkdir(path_out+'/cormatrix')
os.mkdir(path_out+'/fulltimecourse')

In [11]:
#Check that all subjects have all 4 files

#all subj existed in all 4 folders
dct_list={}
for scan in scans:
    dct_list[scan] = sorted(set([l.split('/')[-1][:6] for l in glob.glob(path_rest+'analysis/'+str(scan)+'.ind_tables_timecourse/*.Glasser_FSsubcort_timecouse.csv')]))

sj_nm = list(set(dct_list[scans[0]]) & set(dct_list[scans[1]]) & set(dct_list[scans[2]]) & set(dct_list[scans[3]]))

print(len(sj_nm))


43


In [13]:
#concat all 4 tables into one for  each subj
dct_tmcors = {}
for subject in sj_nm:
    df_t = pd.DataFrame()
    for scan in scans:
        df = pd.read_csv(path_rest+'analysis/'+str(scan)+'.ind_tables_timecourse/'+str(subject)+'.Glasser_FSsubcort_timecouse.csv', index_col=0)
        df_t = pd.concat([df_t, df], axis=1, ignore_index=True)
    dct_tmcors[subject] = df_t

# check if somebody has shorter scan lenghts
keys = sorted(dct_tmcors.keys())   
for key in keys:
    if len(dct_tmcors[key].columns) < (1200*4):
        print(key, 'is too short', len(dct_tmcors[key].columns), 'TR out of', 1200*4)
        

#Save
for key in dct_tmcors.keys():
    dct_tmcors[key].to_csv(path_out+'/fulltimecourse/'+str(key)+'.full_rest_Glasser_FSsubcort_timecourse.csv')
    dct_tmcors[key].T.corr().to_csv(path_out+'/cormatrix/'+str(key)+'.full_rest_Glasser_FSsubcort_cormatrix.csv')

143325 is too short 4539 TR out of 4800


In [14]:
#build group table of functional connectivity
subjects = [s[:6] for s in  sorted(os.listdir(path_out+'/cormatrix'))]
dct_fc = {}
for subject in subjects:
    df = pd.read_csv(path_out+'/cormatrix/'+str(subject)+'.full_rest_Glasser_FSsubcort_cormatrix.csv', index_col=0)
    vec = []
    nms = []
    for i in range(len(df.index)):
        j=i+1
        while j<len(df.index):
            vec+=[df.iloc[i,j]]
            nms+=[(str(df.index[i])+'-'+str(df.index[j]))]
            j+=1
    dct_fc[subject] = pd.Series(np.array(vec, dtype=float), index=nms)
df_fc = pd.DataFrame(dct_fc)
df_fc.T.to_csv(path_rest+'group_rest_FC.csv') #save

#r-to-z transformation
df_fc_z = np.arctanh(df_fc)
df_fc_z.T.to_csv(path_rest+'group_rest_FC_z.csv') #save

### 3. Assemble anatomy tables (cort, subc, area)

In [212]:
path_anat = path+'3T_Structural_preproc_extended'

In [236]:
#cortical thickness
#load files
df_l = pd.read_csv(path_anat+'/aparc_stats_thickness_a2009s_lh.txt', index_col=0)
df_r = pd.read_csv(path_anat+'/aparc_stats_thickness_a2009s_rh.txt', index_col=0)
#shrink col names
left_col = [col.replace('_thickness', '') for col in df_l.columns]
right_col = [col.replace('_thickness', '') for col in df_r.columns]
df_l.columns = left_col
df_r.columns = right_col
#del name of index
df_l.index.name = None
df_r.index.name = None

#unite table into one excluding mean thickness column (last one)
df_thck = pd.concat([df_l.iloc[:,:-1], df_r.iloc[:,:-1]], axis=1)
df_thck.to_csv(path_anat+'/FS_anatomy_cortical_thickness.csv')

In [245]:
#cortical area
#load files
df_al = pd.read_csv(path_anat+'/aparc_stats_area_a2009s_lh.txt', index_col=0)
df_ar = pd.read_csv(path_anat+'/aparc_stats_area_a2009s_rh.txt', index_col=0)
#shrink col names
left_col = [col.replace('_area', '') for col in df_al.columns]
right_col = [col.replace('_area', '') for col in df_ar.columns]
df_al.columns = left_col
df_ar.columns = right_col
#del name of index
df_al.index.name = None
df_ar.index.name = None

#unite table into one excluding WhiteSurfArea column (last one)
df_area = pd.concat([df_al.iloc[:,:-1], df_ar.iloc[:,:-1]], axis=1)
df_area.to_csv(path_anat+'/FS_anatomy_cortical_area.csv')

In [262]:
#subcortex volume
df_sc = pd.read_csv(path_anat+'/aseg_stats.txt', index_col=0)
df_sc.index.name = None

In [264]:
subc_19label = ['Left-Cerebellum-Cortex',
 'Left-Thalamus-Proper',
 'Left-Caudate',
 'Left-Putamen',
 'Left-Pallidum',
 'Brain-Stem',
 'Left-Hippocampus',
 'Left-Amygdala',
 'Left-Accumbens-area',
 'Left-VentralDC',
 'Right-Cerebellum-Cortex',
 'Right-Thalamus-Proper',
 'Right-Caudate',
 'Right-Putamen',
 'Right-Pallidum',
 'Right-Hippocampus',
 'Right-Amygdala',
 'Right-Accumbens-area',
 'Right-VentralDC']

In [268]:
#filter columns
df_sc = df_sc.loc[:, sorted(subc_19label)]
df_sc.to_csv(path_anat+'/FS_anatomy_subcortical_volume.csv') #save

### 4. Assembling in-scanner movements into tables for task and rest

In [273]:
folder = [l.split('/')[-1] for l in glob.glob(path+'*tfMRI*_preproc')]
f_names = sorted([n.split('_')[-2] for n in folder])
print(f_names)

['EMOTION', 'GAMBLING', 'LANGUAGE', 'MOTOR', 'RELATIONAL', 'SOCIAL', 'WM']


In [306]:
dct_mov={}
for mod in f_names:
    path_mv = path+'3T_tfMRI_'+str(mod)+'_preproc'
    path_in = path_mv+'/analysis/confounds'
    subjects = sorted(os.listdir(path_in))
    vec = []
    for subject in subjects:
        if os.path.isfile(path_in+'/'+str(subject)+'/LR/Movement_RelativeRMS_mean.txt') and os.path.isfile(path_in+'/'+str(subject)+'/RL/Movement_RelativeRMS_mean.txt') == True:
            m1 = np.loadtxt(path_in+'/'+str(subject)+'/LR/Movement_RelativeRMS_mean.txt')
            m2 = np.loadtxt(path_in+'/'+str(subject)+'/RL/Movement_RelativeRMS_mean.txt')
            m= np.mean([m1,m2])
            vec+=[m]
            sub+=[subject]
    dct_mov[mod] = pd.Series(vec, index=sub)
    dct_mov[mod].to_csv(path_mv+'/'+str(mod)+'_all_subj_movements.csv')

In [311]:
df_mov = pd.DataFrame(dct_mov)
df_mov.columns = [col.lower()[:3] for col in df_mov.columns]
#display(df_mov)

In [312]:
dct_rmov={}
for rest in ['REST1', 'REST2']:
    subjects = sorted(os.listdir(path+'3T_rfMRI_'+str(rest)+'_preproc/analysis/confounds/'))
    vec = []
    sub=[]
    for subject in subjects:
        if os.path.isfile(path+'3T_rfMRI_'+str(rest)+'_preproc/analysis/confounds/'+str(subject)+'/LR/Movement_RelativeRMS_mean.txt') and os.path.isfile(path+'3T_rfMRI_'+str(rest)+'_preproc/z_analysis/confounds/'+str(subject)+'/RL/Movement_RelativeRMS_mean.txt') ==True:
            m1 = np.loadtxt(path+'3T_rfMRI_'+str(rest)+'_preproc/analysis/confounds/'+str(subject)+'/LR/Movement_RelativeRMS_mean.txt')
            m2 = np.loadtxt(path+'3T_rfMRI_'+str(rest)+'_preproc/analysis/confounds/'+str(subject)+'/RL/Movement_RelativeRMS_mean.txt')
            m= np.mean([m1,m2])
            vec+=[m]
            sub+=[subject]
    dct_rmov[rest] = pd.Series(vec, index=sub)
    dct_rmov[rest].to_csv(path+'3T_rfMRI_'+str(rest)+'_preproc/'+str(rest)+'_all_subj_movements.csv')

In [313]:
df_mov['rest'] = pd.DataFrame(dct_rmov).dropna(axis=0).T.mean().T

In [315]:
df_mov.to_csv(path+'3T_tfMRI_WM_preproc/all_mods_all_subj_mov.csv')

#### 5. assembling total brain volume table

In [None]:
#for main set only
beh_tab_1200 = pd.read_csv(path+'Behavioral_tables/unrestricted.csv', index_col=0)
col_volume = ['FS_IntraCranial_Vol', 'FS_TotCort_GM_Vol', 'FS_SubCort_GM_Vol', 'FS_Tot_WM_Vol', 'FS_BrainSegVol_eTIV_Ratio']
beh_tab_1200.loc[:,col_volume].to_csv(path+'3T_Structural_preproc_extended/totbrainvol_table.csv')

In [None]:
#for retest set only!!
retest_fs_tab = pd.read_csv(path+'Behavioral_tables/unrestricted_hcp_freesurfer.csv', index_col=0)
col_volume = ['FS_IntraCranial_Vol', 'FS_TotCort_GM_Vol', 'FS_SubCort_GM_Vol', 'FS_Tot_WM_Vol', 'FS_BrainSegVol_eTIV_Ratio']
retest_fs_tab.loc[:,col_volume].to_csv(path+'3T_Structural_preproc_extended/totbrainvol_table.csv')