# Replicability of Subtype Weights
This script is used to set up the pipeline to create subtype maps on the full ABIDE sample. Since we don't care about any association test (only want the subtype maps) we will put some dummy association test in the pipeline (the pipeline is not yet able to run without doing some association test).

In [1]:
# Imports
import os
import numpy as np
import pandas as pd
from scipy import io as sio

In [2]:
# Variable names
regress_vars = ['FD_scrubbed']
model_vars = ['SUB_ID', 'FD_scrubbed']
nb_subtypes = 5
coi = 'FD_scrubbed'
procs = 6
scale = 7

model_name = 'model_qc_maybe_full_subtpye_sc{}.csv'.format(scale)
mat_name = 'model_qc_maybe_full_subtpye_sc{}.mat'.format(scale)

# Paths
root = '/data1/guilimin/data/abide/'
pheno_in = os.path.join(root, 'pheno/merged_abide_full_maybe.csv')

# Fixed stuff
pipe_folder = os.path.join(root, 'paper/sc{}/subtype_full/'.format(scale))
mask_path = os.path.join(root, 'masks/template_mask.nii.gz')
f_tmp = 'netstack_fmri_{:07}_session_{}_run{}.nii.gz'
model_dir = os.path.join(root, 'pheno', 'sc{}'.format(scale))

In [3]:
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
if not coi in model_vars:
    model_vars.append(coi)

In [4]:
pheno = pd.read_csv(pheno_in)

In [5]:
data_dict = dict()
path_list = list()
sub_list = list()
pop_ind = list()
for i, r in pheno.iterrows():
    sub_name = 'sub_{}'.format(r.SUB_ID)
    rel_path = os.path.join('netstack', 'sca_z', 'sc{}'.format(scale),
                            f_tmp.format(r.SUB_ID,
                                         r.session,
                                         r.run))
    
    in_path = os.path.join(root, rel_path)
    # Check path locally
    if not os.path.isfile(in_path):
        print('Something wrong with {}'.format(in_path))
        pop_ind.append(i)
    else:
        data_dict[sub_name] = in_path
        path_list.append(in_path)
        sub_list.append(sub_name)
        
path_array = np.array(path_list, dtype=object)
sub_array = np.array(sub_list, dtype=object)

In [6]:
# Get rid of those guys
pheno.drop(pheno.index[pop_ind], inplace=True)
# Make dummies for site
dummies = pd.get_dummies(pheno['SITE_ID'], prefix='dummie')
# Drop the first site again so we can add an intercept
dummies.drop(dummies.columns[0], axis=1, inplace=True)
# Get dummie names
dummie_names = list(dummies.columns)

# Select and reorder the columns I need to run
ordered_pheno = pheno[model_vars]
# Add the dummie stuff to it
model = pd.merge(ordered_pheno, dummies, left_index=True, right_index=True)
# Add the dummies to the regressors
regressors = regress_vars + dummie_names

In [7]:
# Save the model locally
model.to_csv(os.path.join(model_dir, model_name), index=False)

In [8]:
# Set up the files_in
file_dict = dict()
#file_dict['data'] = data_dict
file_dict['mask'] = mask_path
file_dict['model'] = os.path.join(root, 'pheno', 'sc{}'.format(scale), model_name)

In [9]:
# Set up the opt
opt_dict = dict()
opt_dict['folder_out'] = pipe_folder
opt_dict['scale'] = scale
opt_dict['stack'] = {'regress_conf': np.array(regressors,dtype=object)}
opt_dict['subtype'] = {'nb_subtype':nb_subtypes, 'sub_map_type':'mean'}# Set up psom options
psom_dict = dict()
psom_dict['path_logs'] = os.path.join(pipe_folder, 'logs')
psom_dict['max_queued'] = procs

# Make the regressor thingee
cont_dict = dict()
#for regr in regressors:
#    cont_dict[regr] = 0
# Add the thing I am interested in 
cont_dict[coi] = 1

# Add this
opt_dict['association'] = {'contrast':cont_dict, 'fdr':0.05}
# Set test to true
opt_dict['flag_test'] = True
# No figures, octave is too stupid for figures
opt_dict['flag_visu'] = False
opt_dict['flag_chi2'] = False

In [10]:
# Set up psom options
psom_dict = dict()
psom_dict['path_logs'] = os.path.join(pipe_folder, 'logs')
psom_dict['max_queued'] = procs

In [11]:
# Save the whole shebang
mat_dict = dict()
mat_dict['files_in'] = file_dict
mat_dict['opt'] = opt_dict
mat_dict['opt_psom'] = psom_dict
mat_dict['paths'] = path_array
mat_dict['subs'] = sub_array
sio.savemat(os.path.join(model_dir, mat_name), mat_dict)