In [1]:
import re
import numpy as np
import pandas as pd

In [2]:
# Paths
path_qc = '/data1/guillimin/sim_data/data/hnu1/preproc_20161103/quality_control/manual_qc_rating.csv'
path_pheno = '/data1/guillimin/sim_data/data/hnu1/pheno/hnu1_pheno.csv'
path_motion = '/data1/guillimin/sim_data/data/hnu1/preproc_20161103/quality_control/group_motion/qc_scrubbing_group.csv'
path_full = '/data1/guillimin/sim_data/data/hnu1/pheno/hnu1_full_model.csv'
path_maybe = '/data1/guillimin/sim_data/data/hnu1/pheno/hnu1_qc_pass_model.csv'

In [3]:
qc = pd.read_csv(path_qc)
qc['SubID'] = qc.SubID.astype(int)

In [4]:
pheno = pd.read_csv(path_pheno)
pheno.rename(columns={'SUBID':'SubID', 'AGE_AT_SCAN_1':'AGE_AT_SCAN'}, inplace=True)
# Rename Numeric values
pheno.SEASON.replace(to_replace={0:'Winter', 1:'Spring', 2:'Summer', 3:'Fall'}, inplace=True)
pheno.SEX.replace(to_replace={1:'Female', 2:'Male'}, inplace=True)
pheno.PRECEDING_CONDITION.replace(to_replace={0:'No active task'}, inplace=True)
pheno.VISUAL_STIMULATION_CONDITION.replace(to_replace={1:'Fixation'}, inplace=True)
pheno.RETEST_DESIGN.replace(to_replace={2:'Between Session'}, inplace=True)
# Get session and sub_session code
pheno['session'] = [np.int(re.search(r'(?<=Retest_)\d+', row.SESSION).group()[0])+1 if not row.SESSION=='Baseline' else 1 for index, row in pheno.iterrows()]
pheno['tmp'] = ['{}_{}'.format(row.SubID, row.session) for index, row in pheno.iterrows()]

In [5]:
motion = pd.read_csv(path_motion)
motion.rename(columns={' ':'ID_info'}, inplace=True)
# Get SubID and session
motion['SubID'] = [np.int(row.ID_info.split('_')[0]) for index, row in motion.iterrows()]
motion['session'] = [np.int(re.search(r'(?<=session)\d+', row.ID_info.split('_')[1]).group()) for index, row in motion.iterrows()]
motion['tmp'] = ['{}_{}'.format(row.SubID, row.session) for index, row in motion.iterrows()]

In [6]:
# Combine stuff
mot_phen = pd.merge(motion[['frames_scrubbed ', 'frames_OK ', 'FD ', 'FD_scrubbed', 'tmp']], pheno, on='tmp')
combine = pd.merge(qc, mot_phen, on='SubID')

In [7]:
# Select columns and order
cols = ['SubID', 'session', 'AGE_AT_SCAN', 'SEX', 'RESTING_STATE_INSTRUCTION', 
       'VISUAL_STIMULATION_CONDITION', 'RETEST_DESIGN', 'PRECEDING_CONDITION',
       'RETEST_DURATION', 'RETEST_UNITS', 'TIME_OF_DAY', 'SEASON',
       'LMP', 'frames_scrubbed ', 'frames_OK ', 'FD ',
       'FD_scrubbed', 'T1', 'T2']

In [8]:
full = combine[cols]

In [9]:
# Save the model
full.to_csv(path_full, index=False)
maybe = full.loc[full.T1!='FAIL']
maybe.to_csv(path_maybe, index=False)

In [10]:
full.columns

Index(['SubID', 'session', 'AGE_AT_SCAN', 'SEX', 'RESTING_STATE_INSTRUCTION',
       'VISUAL_STIMULATION_CONDITION', 'RETEST_DESIGN', 'PRECEDING_CONDITION',
       'RETEST_DURATION', 'RETEST_UNITS', 'TIME_OF_DAY', 'SEASON', 'LMP',
       'frames_scrubbed ', 'frames_OK ', 'FD ', 'FD_scrubbed', 'T1', 'T2'],
      dtype='object')