# Generate the model and the inputs for UM1

In [1]:
# Imports
import os
import numpy as np
import pandas as pd
from scipy import io as sio

In [2]:
in_pheno = '/home/surchs/GDrive/Projects/Subtypes/TRT/pAD_base_pass_PRE.csv'
in_qc = '/home/surchs/GDrive/Projects/Subtypes/TRT/qc_scrubbing_group.csv'
local_root = '/data1/guilimin/data/preventAD/subtype_TRT'
raw_path = '/data1/guilimin/database2/preventad/rsn_preprocess_20150831/fmri'

name = 'preventAD'
scale = 7
nb_subtypes = 5
procs = 6
mask_path = '/data1/guilimin/database2/preventad/mask_mnc/mask.mnc'
pipe_folder = os.path.join(local_root, 'subtype/sc{}/pre/'.format(scale, name))
model_name = '{}_model.csv'.format(name)
model_path = os.path.join(local_root, 'pheno', 'sc{}'.format(scale), model_name)
mat_name = '{}_model.mat'.format(name)
mat_path = os.path.join(local_root, 'pheno', 'sc{}'.format(scale), mat_name)

In [3]:
group_var = 'FD_scrubbed'
coi = 'FD_scrubbed'
model_vars = ['SubID', 'FD_scrubbed']
regress_vars = ['FD_scrubbed']

In [4]:
# Get the files
pheno = pd.read_csv(in_pheno)
qc = pd.read_csv(in_qc)
qc.rename(columns={' ':'SubID'}, inplace=True)
# Get the list of subject names in the sample
subs = list(pheno['CandID'].astype(str))
qc.SubID = qc.SubID.astype(str)

In [5]:
# Get the raw files for each subject in subs
data_frame = pd.DataFrame()
data_dict = dict()
path_list = list()
sub_list = list()
for sub in subs:
    s_name = 's{}'.format(sub)
    data_frame = data_frame.append(qc[qc.SubID.str.contains(sub) & qc.SubID.str.contains('BL00') & qc.SubID.str.contains('rest1')])
    f_name = data_frame.tail()['SubID'].values.astype(str)[0].strip(' ')
    f_path = os.path.join(raw_path, 'fmri_{}.mnc.gz'.format(f_name))
    if os.path.isfile(f_path):
        data_dict[s_name] = f_path
        path_list.append(f_path)
        sub_list.append(s_name)
    else:
        print('    {} wants {} but it''s  not there'.format(sub))

In [6]:
path_array = np.array(path_list, dtype=object)
sub_array = np.array(sub_list, dtype=object)

In [7]:
model_dir = os.path.join(local_root, 'pheno', 'sc{}'.format(scale))
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)

## Generate the input structure for the pipeline

In [8]:
# Select and reorder the columns I need to run
model = data_frame[model_vars]
# Set the regressors
regressors = regress_vars
# Save it locally
model.to_csv(model_path, index=False)

In [9]:
# Set up the files_in
file_dict = dict()
#file_dict['data'] = data_dict
file_dict['mask'] = mask_path
file_dict['model'] = model_path

In [10]:
# Set up the opt
opt_dict = dict()
opt_dict['folder_out'] = pipe_folder
opt_dict['scale'] = scale
opt_dict['stack'] = {'regress_conf': np.array(regressors,dtype=object)}
opt_dict['subtype'] = {'nb_subtype':nb_subtypes}

cont_dict = dict()
cont_dict[coi] = 1
# Add this
opt_dict['association'] = {'contrast':cont_dict, 'fdr':0.05}
opt_dict['visu'] = {'data_type': 'continuous'}

# Set test to true
opt_dict['flag_test'] = True
# No figures, octave is too stupid for figures
opt_dict['flag_visu'] = True
opt_dict['flag_chi2'] = False

In [11]:
# Set up psom options
psom_dict = dict()
psom_dict['path_logs'] = os.path.join(pipe_folder, 'logs')
psom_dict['max_queued'] = procs

In [12]:
# Save the whole shebang
mat_dict = dict()
mat_dict['files_in'] = file_dict
mat_dict['opt'] = opt_dict
mat_dict['opt_psom'] = psom_dict
mat_dict['paths'] = path_array
mat_dict['subs'] = sub_array
sio.savemat(mat_path, mat_dict)