# Generate the model and the inputs for the pipeline

In [1]:
# Imports
import os
import numpy as np
import pandas as pd
from scipy import io as sio

In [2]:
# Variable names
#regress_vars = ['AGE_AT_SCAN', 'FD_scrubbed']
regress_vars = ['SEX', 'AGE_AT_SCAN', 'FD_scrubbed']
#model_vars = ['SUB_ID', 'SITE_ID', 'DX_GROUP', 'AGE_AT_SCAN', 'FD_scrubbed']
model_vars = ['SEX', 'AGE_AT_SCAN', 'FD_scrubbed']
nb_subtypes = 5
# 'ADOS_sb_sev', 'SRS_RAW_TOTAL'
coi = 'DX_GROUP'
scale = 7
thing = 'legacy_diag_int'

model_name = 'model_{}_maybe_sc{}.csv'.format(thing, scale)
mat_name = 'model_{}_maybe_sc{}.mat'.format(thing, scale)

# Paths
proj_dir = '/data1/abide/legacy_test'
pheno_in = '/data1/abide/Pheno/site_balanced_279.csv'

if not os.path.isdir(proj_dir):
    os.makedirs(proj_dir)

local_root = '/data1/abide/Out/Scores/sc07/time/rmap_part'
#remote_root = '/home/surchs/sim_data/data/abide/'
remote_root = local_root

# Fixed stuff
pipe_folder = os.path.join(proj_dir, 'subtype/sc{}/{}_maybe/'.format(scale, thing))
mask_path = '/data1/abide/Mask/mask_data_specific.nii.gz'
tpl = '{}_fmri_{:07d}_session_1_run1_rmap_part.nii.gz'

In [3]:
pheno = pd.read_csv(pheno_in)

In [4]:
pheno.SITE_ID.replace({'PITT':'Pitt'}, inplace=True)

## Generate the input structure for the pipeline

In [5]:
data_dict = dict()
pop_ind = list()
path_list = list()
sub_list = list()
for i, r in pheno.iterrows():
    sub_name = 'sub_{}'.format(r.SUB_ID)
    rel_path = tpl.format(r.SITE_ID, int(r.SUB_ID))
    
    loc_path = os.path.join(local_root, rel_path)
    rem_path = os.path.join(remote_root, rel_path)
    # Check path locally
    if not os.path.isfile(loc_path):
        print('Something wrong with {}'.format(loc_path))
        pop_ind.append(i)
    else:
        data_dict[sub_name] = rem_path
        path_list.append(rem_path)
        sub_list.append(sub_name)

In [6]:
path_array = np.array(path_list, dtype=object)
sub_array = np.array(sub_list, dtype=object)

In [7]:
# Get rid of those guys
pheno.drop(pheno.index[pop_ind], inplace=True)

In [8]:
# Make dummies for site
dummies = pd.get_dummies(pheno['SITE_ID'], prefix='dummie')
# Add an intercept
#dummies.rename(columns={dummies.columns[0]:'Intercept'}, inplace=True)
#dummies.Intercept = np.ones((dummies.shape[0],1))
# Get dummie names
dummie_names = list(dummies.columns)

In [9]:
if not coi in model_vars:
    model_vars.append(coi)

In [10]:
# Select and reorder the columns I need to run
ordered_pheno = pheno[model_vars]

In [11]:
for dummie in dummie_names:
    if not dummie in model_vars:
        model_vars.append(dummie)

In [12]:
# Add the dummie stuff to it
model = pd.merge(ordered_pheno, dummies, left_index=True, right_index=True)
# Add the dummies to the regressors
regressors = regress_vars + dummie_names

In [13]:
# Save it locally
model.to_csv(os.path.join(proj_dir, 'pheno', model_name), index=False)

In [14]:
# Set up the files_in
file_dict = dict()
#file_dict['data'] = data_dict
file_dict['mask'] = mask_path
file_dict['model'] = os.path.join(proj_dir, 'pheno', model_name)

In [15]:
# Set up the opt
opt_dict = dict()
opt_dict['folder_out'] = pipe_folder
opt_dict['scale'] = 7
#opt_dict['stack'] = {'regress_conf': np.array(regressors,dtype=object)}
opt_dict['subtype'] = {'nb_subtype':nb_subtypes}
opt_dict['chi2'] = {'group_col_id':'DX_GROUP'}

# Make the regressor thingee
cont_dict = dict()
for regr in regressors:
    cont_dict[regr] = 0
# Add the thing I am interested in 
cont_dict[coi] = 1

# Add this
opt_dict['association'] = {'contrast':cont_dict, 'fdr':0.5, 'flag_intercept':True, 'normalize_x':False}
# Set test to true
opt_dict['flag_test'] = True
# No figures, octave is too stupid for figures
opt_dict['flag_visu'] = True
opt_dict['stack'] = {'flag_conf':True}

In [16]:
# Set up psom options
psom_dict = dict()
psom_dict['path_logs'] = os.path.join(pipe_folder, 'logs')
psom_dict['max_queued'] = 4

In [17]:
# Save the whole shebang
mat_dict = dict()
mat_dict['files_in'] = file_dict
mat_dict['opt'] = opt_dict
mat_dict['opt_psom'] = psom_dict
mat_dict['paths'] = path_array
mat_dict['subs'] = sub_array
sio.savemat(os.path.join(proj_dir, 'pheno', mat_name), mat_dict)