# Generate the model and the inputs for the pipeline

In [1]:
# Imports
import os
import numpy as np
import pandas as pd
from scipy import io as sio

In [2]:
# Variable names
regress_vars = ['AGE_AT_SCAN', 'FD_scrubbed']
#regress_vars = ['FD_scrubbed']
model_vars = ['SUB_ID', 'AGE_AT_SCAN', 'FD_scrubbed']
group_var = 'DX_GROUP'
nb_subtypes = 5
procs = 7
# 'ADOS_sb_sev', 'SRS_RAW_TOTAL'
coi = 'DX_GROUP'
scale = 7
thing = 'full'

model_name = 'model_{}_maybe_sc{}_noreg.csv'.format(thing, scale)
mat_name = 'model_{}_maybe_sc{}_noreg.mat'.format(thing, scale)

# Paths
pheno_in = '/data1/guilimin/abide/pheno/merged_abide_{}_maybe.csv'.format(thing)

local_root = '/data1/guilimin/abide/'
#remote_root = '/home/surchs/sim_data/data/abide/'
remote_root = local_root

# Fixed stuff
pipe_folder = os.path.join(remote_root, 'subtype/sc{}/{}_maybe_noreg/'.format(scale, thing))
mask_path = os.path.join(remote_root, 'masks/template_mask.nii.gz')
f_tmp = 'netstack_fmri_{:07}_session_{}_run{}.nii.gz'

In [3]:
model_dir = os.path.join(local_root, 'pheno', 'sc{}'.format(scale))
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)

In [4]:
if not coi in model_vars:
    model_vars.append(coi)
if not group_var in model_vars:
    model_vars.append(group_var)

In [5]:
pheno = pd.read_csv(pheno_in)

## Generate the input structure for the pipeline

In [6]:
data_dict = dict()
path_list = list()
sub_list = list()
pop_ind = list()
for i, r in pheno.iterrows():
    sub_name = 'sub_{}'.format(r.SUB_ID)
    rel_path = os.path.join('sca_z', 'sc{}'.format(scale),
                            f_tmp.format(r.SUB_ID,
                                         r.session,
                                         r.run))
    
    loc_path = os.path.join(local_root, rel_path)
    rem_path = os.path.join(remote_root, rel_path)
    # Check path locally
    if not os.path.isfile(loc_path):
        print('Something wrong with {}'.format(loc_path))
        pop_ind.append(i)
    else:
        data_dict[sub_name] = rem_path
        path_list.append(rem_path)
        sub_list.append(sub_name)

In [7]:
path_array = np.array(path_list, dtype=object)
sub_array = np.array(sub_list, dtype=object)

In [8]:
# Get rid of those guys
pheno.drop(pheno.index[pop_ind], inplace=True)
# Make dummies for site
dummies = pd.get_dummies(pheno['SITE_ID'], prefix='dummie')
# Drop the first site again so we can add an intercept
dummies.drop(dummies.columns[0], axis=1, inplace=True)
# Get dummie names
dummie_names = list(dummies.columns)

In [9]:
# Select and reorder the columns I need to run
ordered_pheno = pheno[model_vars]
# Add the dummie stuff to it
model = pd.merge(ordered_pheno, dummies, left_index=True, right_index=True)
# Add the dummies to the regressors
regressors = regress_vars + dummie_names

In [10]:
# Save it locally
model.to_csv(os.path.join(model_dir, model_name), index=False)

In [11]:
# Set up the files_in
file_dict = dict()
#file_dict['data'] = data_dict
file_dict['mask'] = mask_path
file_dict['model'] = os.path.join(remote_root, 'pheno', 'sc{}'.format(scale), model_name)

In [12]:
# Set up the opt
opt_dict = dict()
opt_dict['folder_out'] = pipe_folder
opt_dict['scale'] = scale
#opt_dict['stack'] = {'regress_conf': np.array(regressors,dtype=object), 'flag_conf':False}
opt_dict['stack'] = {'flag_conf':False}
opt_dict['subtype'] = {'nb_subtype':nb_subtypes, 'sub_map_type':'mean'}
opt_dict['chi2'] = {'group_col_id':group_var}

# Make the regressor thingee
cont_dict = dict()
#for regr in regressors:
#    cont_dict[regr] = 0
# Add the thing I am interested in 
cont_dict[coi] = 1

# Add this
opt_dict['association'] = {'contrast':cont_dict, 'fdr':0.05}
# Set test to true
opt_dict['flag_test'] = True
# No figures, octave is too stupid for figures
opt_dict['flag_visu'] = True
opt_dict['flag_chi2'] = False

In [13]:
# Set up psom options
psom_dict = dict()
psom_dict['path_logs'] = os.path.join(pipe_folder, 'logs')
psom_dict['max_queued'] = procs

In [14]:
# Save the whole shebang
mat_dict = dict()
mat_dict['files_in'] = file_dict
mat_dict['opt'] = opt_dict
mat_dict['opt_psom'] = psom_dict
mat_dict['paths'] = path_array
mat_dict['subs'] = sub_array
sio.savemat(os.path.join(model_dir, mat_name), mat_dict)