In [1]:
import os
import pandas as pd
import numpy as np
import pcntoolkit as ptk 
from pcntoolkit.util.utils import create_design_matrix
from pcntoolkit.dataio.fileio import save as ptksave
from pcntoolkit.dataio.fileio import load as ptkload

# globals
root_dir = '/project_cephfs/3022017.06/ENIGMA_ANX/'

###  CHANGE DEPENDING ON Z-STAT OR SCALED EFFECT  ###
proc_dir = os.path.join(root_dir,'Z_stat/')
#proc_dir = os.path.join(root_dir,'Scaled_effect/')

data_dir = os.path.join(proc_dir,'data/')
mask_nii = ('/opt/fmriprep/templateflow/tpl-MNI152NLin2009cAsym/tpl-MNI152NLin2009cAsym_res-02_desc-brain_mask.nii.gz')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load covariates
print('loading covariate data ...')
df_dem = pd.read_csv(os.path.join(data_dir,'clinical_te.csv'))
 
df_tr = pd.read_csv(os.path.join(data_dir,'metadata_tr.csv'))
       

# use the whole dataset
te = np.ones(df_dem.shape[0]) == 1

#df_tr = df_dem.iloc[tr]
#df_tr.to_csv(os.path.join(proc_dir,'metadata_cl.csv'))
df_te = df_dem.iloc[te]
df_te.to_csv(os.path.join(proc_dir,'metadata_cl.csv'))

print(len(df_te))

loading covariate data ...
581


In [3]:
# Configure covariates

# design matrix parameters
xmin = 4 #REAL: 9 # boundaries for ages of participants +/- 5
xmax = 71 #REAL:66
cols_cov = ["Age", 
            "Sex",
            "MRI", 
            "Instructions",
            "Precond_number_trials",
            "Multiple_CSplus", 
            "Multiple_CSminus",
            "CS_type_neutral_faces",
            "CS_type_neutral_pictures",
            "CS_type_neutral_male_avatar",
            "CS_type_snakes_spiders",
            "CS_type_gabor_patch",
            "CS_type_animal_tool",
            "CS_type_affective_faces_pictures",
            "CS_type_humanoic_characters",
            "Number_CSplus_cond",
            "Number_CSminus_cond",
            "Reinforcing_rate",
            "US_type_electric_shock", 
            "US_type_auditory", 
            "US_type_visceral",
            "US_type_thermal", 
            "Average_ITI", 
            "Average_ISI",
            "Potential_US_confound"]

site_ids =  sorted(set(df_tr['Group_Dataset'].to_list())) #39 different sites

print('configuring covariates ...')
# X_tr = create_design_matrix(df_tr[cols_cov], site_ids = df_tr['dataset'],
#                             basis = 'bspline', xmin = xmin, xmax = xmax)
#print(X_tr)
X_te = create_design_matrix(df_te[cols_cov], site_ids = df_te['Group_Dataset'], all_sites=site_ids,
                            basis = 'bspline', xmin = xmin, xmax = xmax)

#cov_file_tr = os.path.join(proc_dir, 'cov_bspline_cl.txt')
cov_file_te = os.path.join(proc_dir, 'cov_bspline_cl.txt')
#ptk.dataio.fileio.save(X_tr, cov_file_tr)
ptksave(X_te, cov_file_te)

configuring covariates ...


In [4]:
# configure response data

data_nii = []
data_nii.append(os.path.join(data_dir, 'ENIGMA_FC_cl_1.nii.gz'))
data_nii.append(os.path.join(data_dir, 'ENIGMA_FC_cl_2.nii.gz'))

# load the response data as nifti
print('loading wholebrain response data ...') 
for i, f in enumerate(data_nii):
    print('loading study', i, '[', f, '] ...')
    if i == 0:
        x = ptkload(f, mask=mask_nii, vol=False).T
        print(x.shape)
        #x = ptk.dataio.fileio.load_nifti(f, mask=None, vol=False).T #without the  vol=False
    else: 
        x1 = ptkload(f, mask=mask_nii, vol=False).T
        print(x1.shape)
        x = np.concatenate((x, ptk.dataio.fileio.load(f, mask=mask_nii, vol=False).T))
        print(x.shape)
        #x =  np.concatenate((x, ptk.dataio.fileio.load_nifti(f, mask=None, vol=False).T)) #without the  vol=False

# HACK: some of the voxels in the mask are all zero in this dataset, which 
# causes problems. for these voxels, just impute with the mean of neighbouring
# voxels
bad_vox = np.where(np.bitwise_or(~np.isfinite(x[te,:]).any(axis=0), np.var(x[te,:], axis=0) == 0))[0]
for b in bad_vox:
    x[:,b] = (x[:,b-1] + x[:,b-2]) /2 + np.random.normal(scale=0.1, size=x.shape[0])

# and write out as pkl
#resp_file_tr = os.path.join(proc_dir,'resp_cl.pkl')
resp_file_te = os.path.join(proc_dir,'resp_cl.pkl')
#ptk.dataio.fileio.save(x[tr,:], resp_file_tr)
ptksave(x[te,:], resp_file_te)


loading wholebrain response data ...
loading study 0 [ /project_cephfs/3022017.06/ENIGMA_ANX/Z_stat/data/ENIGMA_FC_cl_1.nii.gz ] ...
(291, 235840)
loading study 1 [ /project_cephfs/3022017.06/ENIGMA_ANX/Z_stat/data/ENIGMA_FC_cl_2.nii.gz ] ...
(290, 235840)
(581, 235840)
