In [1]:
import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
import seaborn.objects as so

from functools import reduce
from itertools import combinations

from scipy import stats

# configure pandas
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

home = 0

if home:
    dpath = '/Users/cglab/projects/abcd/data/abcd5.1-rser/'
else:
    dpath = '/home/cglab/projects/abcd/data/abcd5.1-rser/'

#### Function to pull deriviatives

In [2]:
def get_deriviatives(df, table_file, table_key, merge_how):
    dat = pd.read_csv(dpath + table_file)
    # get table name, which is the string before the period
    table = table_file.split('.')[0]
    # get column names sub and event which will need for merging dataframes
    # it's the same for each df so overwriting is fine
    se_nms = dat.columns[:2].values.tolist()
    # deriviative variables desired
    deriviative_cols = table_key[table_key['Table']==table]['Variable'].values.tolist()
    deriviative_cols += se_nms
    # merge with overall with INNER join bc we dont want to exclude participants who have task mri data but not resting or vice versa
    print('Prior to merge rs df size is {0} and other df shape is {1}'.format(df.shape, dat[deriviative_cols].shape))
    if 'src_subject_id' not in df.columns:
        # for first table assign it to df
        df = dat[deriviative_cols].copy()
    else:
        # all others are merged
        df = df.merge(dat[deriviative_cols], how=merge_how, on=['src_subject_id', 'eventname'])
    print('Any duplicated columns? {}'.format(df.columns.duplicated().any()))
    print('New rs df size is {}'.format(df.shape))
    return df

### Load Area deprivation index

In [3]:
led = pd.read_csv(dpath + 'led_l_adi.csv')
led.head()

Unnamed: 0,src_subject_id,eventname,reshist_addr1_adi_edu_l,reshist_addr1_adi_edu_h,reshist_addr1_adi_work_c,reshist_addr1_adi_income,reshist_addr1_adi_in_dis,reshist_addr1_adi_home_v,reshist_addr1_adi_rent,reshist_addr1_adi_mortg,reshist_addr1_adi_home_o,reshist_addr1_adi_crowd,reshist_addr1_adi_unemp,reshist_addr1_adi_pov,reshist_addr1_adi_b138,reshist_addr1_adi_sp,reshist_addr1_adi_ncar,reshist_addr1_adi_ntel,reshist_addr1_adi_nplumb,reshist_addr1_adi_wsum,reshist_addr1_adi_perc,reshist_addr2_adi_edu_l,reshist_addr2_adi_edu_h,reshist_addr2_adi_work_c,reshist_addr2_adi_income,reshist_addr2_adi_in_dis,reshist_addr2_adi_home_v,reshist_addr2_adi_rent,reshist_addr2_adi_mortg,reshist_addr2_adi_home_o,reshist_addr2_adi_crowd,reshist_addr2_adi_unemp,reshist_addr2_adi_pov,reshist_addr2_adi_b138,reshist_addr2_adi_sp,reshist_addr2_adi_ncar,reshist_addr2_adi_ntel,reshist_addr2_adi_nplumb,reshist_addr2_adi_wsum,reshist_addr2_adi_perc,reshist_addr3_adi_edu_l,reshist_addr3_adi_edu_h,reshist_addr3_adi_work_c,reshist_addr3_adi_income,reshist_addr3_adi_in_dis,reshist_addr3_adi_home_v,reshist_addr3_adi_rent,reshist_addr3_adi_mortg,reshist_addr3_adi_home_o,reshist_addr3_adi_crowd,reshist_addr3_adi_unemp,reshist_addr3_adi_pov,reshist_addr3_adi_b138,reshist_addr3_adi_sp,reshist_addr3_adi_ncar,reshist_addr3_adi_ntel,reshist_addr3_adi_nplumb,reshist_addr3_adi_wsum,reshist_addr3_adi_perc
0,NDAR_INV005V6D2C,baseline_year_1_arm_1,4.861931,87.37314,95.0195,45609.0,2.94634,271100.0,995.0,1804.0,16.29029,2.440678,9.991899,17.770597,27.568094,29.07916,14.237288,1.966102,0.0,97.75763,32.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,NDAR_INV007W6H7B,baseline_year_1_arm_1,3.559711,94.81628,99.60899,129961.0,3.567694,897800.0,1605.0,1534.0,26.772322,2.930622,6.254295,3.665339,33.2687,0.0,78.11005,4.814593,0.751274,27.144264,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,NDAR_INV00BD7VDC,baseline_year_1_arm_1,0.635838,96.58959,97.06238,84150.0,1.344857,149600.0,845.0,1165.0,73.89431,0.0,3.420132,1.204819,6.977778,5.863454,7.986447,0.0,0.0,104.1378,45.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,NDAR_INV00CY2MDM,baseline_year_1_arm_1,2.196885,93.04783,92.36878,63977.0,2.714429,108100.0,713.0,1028.0,61.082424,2.690397,7.476038,6.062932,12.87053,15.349195,13.741722,1.200331,0.0,110.804726,66.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,NDAR_INV00NPMHND,baseline_year_1_arm_1,0.612392,94.5245,94.13694,81602.0,0.538996,272400.0,1100.0,1463.0,83.93555,3.215434,3.634927,3.376623,5.488851,13.073593,0.578778,0.96463,0.0,92.81106,25.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [4]:
led.eventname.value_counts()

eventname
baseline_year_1_arm_1    11215
Name: count, dtype: int64

In [5]:
pullvars = pd.read_excel(dpath + 'ContextualAdversity.xlsx')
pullvars.head()

Unnamed: 0,Variable,Table,Measure,Factor
0,reshist_addr1_adi_unemp,led_l_adi,Area Deprivation Index - unemployment,Harsh SES
1,reshist_addr1_adi_edu_l,led_l_adi,Area Deprivation Index - education,Harsh SES
2,reshist_addr1_adi_crowd,led_l_adi,Area Deprivation Index - crowding,Harsh SES
3,reshist_addr1_adi_pov,led_l_adi,Area Deprivation Index - poverty,Harsh SES
4,reshist_addr1_opat_kfrpp_avg,led_l_socmob,Social Mobility,Harsh SES


### Load, Select, & Merge data, 'abcd_p_demo.csv'
* Get relevant deriviatives from abcd_p_demo.csv
* and merge with overall adv rf

In [6]:
adv = pd.DataFrame()
adv = get_deriviatives(adv, 'abcd_p_demo.csv', pullvars, 'left')

Prior to merge rs df size is (0, 0) and other df shape is (48807, 4)
Any duplicated columns? False
New rs df size is (48807, 4)


In [7]:
adv.eventname.value_counts()

eventname
baseline_year_1_arm_1       11868
1_year_follow_up_y_arm_1    11220
2_year_follow_up_y_arm_1    10908
3_year_follow_up_y_arm_1    10123
4_year_follow_up_y_arm_1     4688
Name: count, dtype: int64

### Load, Select, & Merge data, 'led_l_adi.csv'
* Get relevant deriviatives from led_l_adi.csv
* and merge with overall adv rf

In [8]:
adv = get_deriviatives(adv, 'led_l_adi.csv', pullvars, 'left')

Prior to merge rs df size is (48807, 4) and other df shape is (11215, 6)
Any duplicated columns? False
New rs df size is (48807, 8)


In [9]:
adv.eventname.value_counts()

eventname
baseline_year_1_arm_1       11868
1_year_follow_up_y_arm_1    11220
2_year_follow_up_y_arm_1    10908
3_year_follow_up_y_arm_1    10123
4_year_follow_up_y_arm_1     4688
Name: count, dtype: int64

### Load, Select, & Merge data, 'led_l_socmob.csv'
* Get relevant deriviatives from led_l_socmob.csv
* and merge with overall adv rf

In [10]:
adv = get_deriviatives(adv, 'led_l_socmob.csv', pullvars, 'left')

Prior to merge rs df size is (48807, 8) and other df shape is (10780, 3)
Any duplicated columns? False
New rs df size is (48807, 9)


In [11]:
adv.eventname.value_counts()

eventname
baseline_year_1_arm_1       11868
1_year_follow_up_y_arm_1    11220
2_year_follow_up_y_arm_1    10908
3_year_follow_up_y_arm_1    10123
4_year_follow_up_y_arm_1     4688
Name: count, dtype: int64

### Load, Select, & Merge data, 'ce_p_nsc.csv'
* Get relevant deriviatives from cd_p_nsc.csv
* and merge with overall adv rf

In [12]:
adv = get_deriviatives(adv, 'ce_p_nsc.csv', pullvars, 'left')

Prior to merge rs df size is (48807, 9) and other df shape is (38815, 5)
Any duplicated columns? False
New rs df size is (48807, 12)


In [13]:
data = pd.read_csv(dpath + 'abcd_p_demo.csv')
data.loc[data['src_subject_id'].isin(adv['src_subject_id']), ['demo_prnt_ed_v2', 'demo_prtnr_ed_v2', 'src_subject_id']]

Unnamed: 0,demo_prnt_ed_v2,demo_prtnr_ed_v2,src_subject_id
0,13.0,13.0,NDAR_INV003RTV85
1,,,NDAR_INV003RTV85
2,,,NDAR_INV003RTV85
3,,,NDAR_INV003RTV85
4,6.0,999.0,NDAR_INV005V6D2C
...,...,...,...
48802,17.0,13.0,NDAR_INVZZZP87KR
48803,,,NDAR_INVZZZP87KR
48804,,,NDAR_INVZZZP87KR
48805,,,NDAR_INVZZZP87KR


In [17]:
yr1_vars = [v for v in adv.columns if 'adi' in v or 'opat_kfrpp' in v or 'neigh' in v]
yr1_vars += ['src_subject_id']
adv.loc[adv['eventname']=='1_year_follow_up_y_arm_1', yr1_vars]

Unnamed: 0,reshist_addr1_adi_unemp,reshist_addr1_adi_edu_l,reshist_addr1_adi_crowd,reshist_addr1_adi_pov,reshist_addr1_opat_kfrpp_avg,neighborhood1r_p,neighborhood2r_p,neighborhood3r_p,src_subject_id
1,,,,,,5.0,5.0,4.0,NDAR_INV003RTV85
5,,,,,,1.0,4.0,3.0,NDAR_INV005V6D2C
12,,,,,,5.0,5.0,4.0,NDAR_INV00BD7VDC
15,,,,,,2.0,3.0,2.0,NDAR_INV00CY2MDM
20,,,,,,4.0,2.0,3.0,NDAR_INV00HEV6HB
...,...,...,...,...,...,...,...,...,...
48781,,,,,,4.0,4.0,4.0,NDAR_INVZZLZCKAY
48786,,,,,,5.0,5.0,4.0,NDAR_INVZZNX6W2P
48791,,,,,,5.0,4.0,4.0,NDAR_INVZZPKBDAC
48796,,,,,,5.0,5.0,4.0,NDAR_INVZZZ2ALR6


In [16]:
data.head()

Unnamed: 0,src_subject_id,eventname,demoi_p_select_language___1,demo_prim,demo_brthdat_v2,demo_ed_v2,demo_adopt_agex_v2,demo_adopt_agex_v2_bl_dk,demo_sex_v2,demo_gender_id_v2,demo_race_a_p___10,demo_race_a_p___11,demo_race_a_p___12,demo_race_a_p___13,demo_race_a_p___14,demo_race_a_p___15,demo_race_a_p___16,demo_race_a_p___17,demo_race_a_p___18,demo_race_a_p___19,demo_race_a_p___20,demo_race_a_p___21,demo_race_a_p___22,demo_race_a_p___23,demo_race_a_p___24,demo_race_a_p___25,demo_race_a_p___77,demo_race_a_p___99,demo_ethn_v2,demo_ethn2_v2,demo_origin_v2,demo_years_us_v2,demo_years_us_v2_dk,demo_relig_v2,demo_prnt_age_v2,demo_prnt_age_v2_bl_refuse,demo_prnt_gender_id_v2,demo_prnt_race_a_v2___10,demo_prnt_race_a_v2___11,demo_prnt_race_a_v2___12,demo_prnt_race_a_v2___13,demo_prnt_race_a_v2___14,demo_prnt_race_a_v2___15,demo_prnt_race_a_v2___16,demo_prnt_race_a_v2___17,demo_prnt_race_a_v2___18,demo_prnt_race_a_v2___19,demo_prnt_race_a_v2___20,demo_prnt_race_a_v2___21,demo_prnt_race_a_v2___22,demo_prnt_race_a_v2___23,demo_prnt_race_a_v2___24,demo_prnt_race_a_v2___25,demo_prnt_race_a_v2___77,demo_prnt_race_a_v2___99,naas_id,naas_mom_id,naas_id_dad,naas_birthplace,naas_raised,naas_comm_contact,naas_pride,naas_self_rating,naas_traditions,demo_prnt_ethn_v2,demo_prnt_ethn2_v2,demo_prnt_16,demo_prnt_16a,demo_prnt_origin_v2,demo_biofather_v2,demo_biomother_v2,demo_matgrandm_v2,demo_matgrandf_v2,demo_patgrandm_v2,demo_patgrandf_v2,demo_prnt_years_us_v2,demo_prnt_years_us_v2_dk,demo_prnt_marital_v2,demo_prnt_ed_v2,demo_prnt_empl_v2,demo_prnt_empl_time,demo_prnt_income_v2,demo_prnt_prtnr_v2,demo_prnt_prtnr_bio,demo_prnt_prtnr_adopt,demo_prtnr_ed_v2,demo_prtnr_empl_v2,demo_prtnr_empl_time,demo_prtnr_income_v2,demo_comb_income_v2,demo_fam_exp1_v2,demo_fam_exp2_v2,demo_fam_exp3_v2,demo_fam_exp4_v2,demo_fam_exp5_v2,demo_fam_exp6_v2,demo_fam_exp7_v2,demo_roster_v2,demo_roster_v2_refuse,fam_roster_2c_v2,...,demo_prnt_prtnr_adopt_l,demo_prtnr_ed_v2_l,demo_prtnr_empl_v2_l,demo_prtnr_empl_time_l,demo_prtnr_indust_refuse_l,demo_prtnr_income_v2_l,demo_child_time_v2_l,demo_child_time2_v2_l,demo_child_time2_v2_dk_l,demo_child_time3_v2_l,demo_comb_income_v2_l,demo_roster_v2_l,demo_roster_v2_refuse_l,fam_roster_2c_v2_l,fam_roster_3c_v2_l,fam_roster_4c_v2_l,fam_roster_5c_v2_l,fam_roster_6c_v2_l,fam_roster_7c_v2_l,fam_roster_8c_v2_l,fam_roster_9c_v2_l,fam_roster_10c_v2_l,fam_roster_11c_v2_l,fam_roster_12c_v2_l,fam_roster_13c_v2_l,fam_roster_14c_v2_l,fam_roster_15c_v2_l,demo_fam_exp1_v2_l,demo_fam_exp2_v2_l,demo_fam_exp3_v2_l,demo_fam_exp4_v2_l,demo_fam_exp5_v2_l,demo_fam_exp6_v2_l,demo_fam_exp7_v2_l,demo_yrs_1_l,demo_yrs_2_l,demo_yrs_2a_l,demo_yrs_2b_l,demo_yrs_2_no_display_l___1,demo_med_insur_f_p,demo_med_insur_g_p,demo_med_insur_h_p,demo_prnt_race_acs_p__10,demo_prnt_race_acs_p__11,demo_prnt_race_acs_p__12,demo_prnt_race_acs_p__13,demo_prnt_race_acs_p__14,demo_prnt_race_acs_p__15,demo_prnt_race_acs_p__16,demo_prnt_race_acs_p__17,demo_prnt_race_acs_p__18,demo_prnt_race_acs_p__19,demo_prnt_race_acs_p__20,demo_prnt_race_acs_p__21,demo_prnt_race_acs_p__22,demo_prnt_race_acs_p__23,demo_prnt_race_acs_p__24,demo_prnt_race_acs_p__777,demo_prnt_race_acs_p__999,demo_prnt_ed_v2_2yr_l,demo_prnt_ethnic_acs_p,demo_prtnr_ed_v2_2yr_l,demo_med_insur_a_p,demo_med_insur_b_p,demo_med_insur_c_p,demo_med_insur_d_p,demo_med_insur_e_p,demo_nat_lang_3_yrs_eng_p___7,demo_nat_lang_3_yrs_eng_p___8,demo_nat_lang_3_yrs_eng_p___9,demo_nat_lang_3_yrs_eng_p___10,demo_nat_lang_3_yrs_eng_p___11,demo_nat_lang_3_yrs_eng_p___12,demo_nat_lang_3_yrs_eng_p___13,demo_nat_lang_3_yrs_eng_p___14,demo_nat_lang_3_yrs_other_p__0,demo_nat_lang_3_yrs_other_p__1,demo_nat_lang_3_yrs_other_p__2,demo_nat_lang_3_yrs_other_p__3,demo_nat_lang_3_yrs_other_p__4,demo_nat_lang_3_yrs_other_p__5,demo_nat_lang_3_yrs_other_p__6,demo_nat_lang_3_yrs_other_p__7,demo_nat_lang_3_yrs_other_p__8,demo_nat_lang_3_yrs_other_p__9,demo_nat_lang_3_yrs_other_p_10,demo_nat_lang_3_yrs_other_p_11,demo_nat_lang_3_yrs_eng_p___0,demo_nat_lang_3_yrs_other_p_12,demo_nat_lang_3_yrs_other_p_13,demo_nat_lang_3_yrs_other_p_14,demo_nat_lang_3_p,demo_nat_lang_3_yrs_eng_p___1,demo_nat_lang_3_yrs_eng_p___2,demo_nat_lang_3_yrs_eng_p___3,demo_nat_lang_3_yrs_eng_p___4,demo_nat_lang_3_yrs_eng_p___5,demo_nat_lang_3_yrs_eng_p___6,race_ethnicity,acs_raked_propensity_score
0,NDAR_INV003RTV85,baseline_year_1_arm_1,0,1.0,10.0,5.0,,,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,,189.0,,,2.0,43.0,,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,2.0,,0.0,1.0,,,,,,,,43.0,,1.0,13.0,1.0,1.0,5.0,1.0,1.0,,13.0,1.0,1.0,8.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,,1.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,466.092707
1,NDAR_INV003RTV85,1_year_follow_up_y_arm_1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,13.0,1.0,1.0,,8.0,0.0,,,,8.0,6.0,,1.0,3.0,3.0,3.0,3.0,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,533.38182
2,NDAR_INV003RTV85,2_year_follow_up_y_arm_1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,1.0,1.0,,8.0,0.0,,,,8.0,6.0,,1.0,3.0,3.0,3.0,3.0,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,13.0,0.0,13.0,1.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,NDAR_INV003RTV85,3_year_follow_up_y_arm_1,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,1.0,1.0,,8.0,0.0,,,,9.0,6.0,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,13.0,0.0,13.0,1.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,
4,NDAR_INV005V6D2C,baseline_year_1_arm_1,1,1.0,10.0,4.0,,,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,14.0,189.0,,,999.0,39.0,,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,1.0,13.0,1.0,,111.0,111.0,111.0,111.0,111.0,111.0,111.0,16.0,,1.0,6.0,6.0,,1.0,1.0,1.0,,999.0,1.0,1.0,999.0,999.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,,1.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,520.488325


In [11]:
data.duplicated(subset=['src_subject_id'])

0        False
1         True
2         True
3         True
4        False
         ...  
48802    False
48803     True
48804     True
48805     True
48806     True
Length: 48807, dtype: bool

In [12]:
adv.eventname.value_counts()

eventname
baseline_year_1_arm_1       11868
1_year_follow_up_y_arm_1    11220
2_year_follow_up_y_arm_1    10908
3_year_follow_up_y_arm_1    10123
4_year_follow_up_y_arm_1     4688
Name: count, dtype: int64

In [13]:
data.eventname.value_counts()

eventname
baseline_year_1_arm_1       11868
1_year_follow_up_y_arm_1    11220
2_year_follow_up_y_arm_1    10908
3_year_follow_up_y_arm_1    10123
4_year_follow_up_y_arm_1     4688
Name: count, dtype: int64

### Check

In [15]:
print(adv.shape)
adv.head()

(48807, 12)


Unnamed: 0,demo_prnt_ed_v2,demo_prtnr_ed_v2,src_subject_id,eventname,reshist_addr1_adi_unemp,reshist_addr1_adi_edu_l,reshist_addr1_adi_crowd,reshist_addr1_adi_pov,reshist_addr1_opat_kfrpp_avg,neighborhood1r_p,neighborhood2r_p,neighborhood3r_p
0,13.0,13.0,NDAR_INV003RTV85,baseline_year_1_arm_1,,,,,,5.0,5.0,5.0
1,,,NDAR_INV003RTV85,1_year_follow_up_y_arm_1,,,,,,5.0,5.0,4.0
2,,,NDAR_INV003RTV85,2_year_follow_up_y_arm_1,,,,,,5.0,5.0,5.0
3,,,NDAR_INV003RTV85,3_year_follow_up_y_arm_1,,,,,,,,
4,6.0,999.0,NDAR_INV005V6D2C,baseline_year_1_arm_1,9.991899,4.861931,2.440678,17.770597,0.424759,3.0,5.0,5.0


In [16]:
adv.eventname.value_counts()

eventname
baseline_year_1_arm_1       11868
1_year_follow_up_y_arm_1    11220
2_year_follow_up_y_arm_1    10908
3_year_follow_up_y_arm_1    10123
4_year_follow_up_y_arm_1     4688
Name: count, dtype: int64

#### Select onnly Wave 1

In [17]:
adv = adv[adv['eventname']=='baseline_year_1_arm_1']
print(adv.shape)

(11868, 12)


### rename variables

In [18]:
redict = {
    'reshist_addr1_adi_unemp': 'ADI1', 
    'reshist_addr1_adi_edu_l': 'ADI2', 
    'reshist_addr1_adi_crowd': 'ADI3', 
    'reshist_addr1_adi_pov': 'ADI4',
    'reshist_addr1_opat_kfrpp_avg': 'SocMob',
    'demo_prnt_ed_v2': 'peduh1',
    'demo_prtnr_ed_v2': 'peduh2',
    'neighborhood1r_p': 'neigh1',
    'neighborhood2r_p': 'neigh2',
    'neighborhood3r_p': 'neigh3',
    'src_subject_id': 'subID',
         }
adv.rename(columns=redict, inplace=True)

In [19]:
adv.head()

Unnamed: 0,peduh1,peduh2,subID,eventname,ADI1,ADI2,ADI3,ADI4,SocMob,neigh1,neigh2,neigh3
0,13.0,13.0,NDAR_INV003RTV85,baseline_year_1_arm_1,,,,,,5.0,5.0,5.0
4,6.0,999.0,NDAR_INV005V6D2C,baseline_year_1_arm_1,9.991899,4.861931,2.440678,17.770597,0.424759,3.0,5.0,5.0
8,19.0,18.0,NDAR_INV007W6H7B,baseline_year_1_arm_1,6.254295,3.559711,2.930622,3.665339,0.552627,5.0,5.0,4.0
11,20.0,20.0,NDAR_INV00BD7VDC,baseline_year_1_arm_1,3.420132,0.635838,0.0,1.204819,0.601058,5.0,5.0,5.0
14,15.0,,NDAR_INV00CY2MDM,baseline_year_1_arm_1,7.476038,2.196885,2.690397,6.062932,0.529742,4.0,4.0,3.0


#### reverse code

In [20]:
# adv['peduh1'] = adv['peduh1']*-1
# adv['peduh2'] = adv['peduh2']*-1
# adv['SocMob'] = adv['SocMob']*-1

In [21]:
# adv.head()

#### Load & Merge full df 
* and merge adv with full

In [22]:
outpath = "/home/cglab/projects/abcd/rser/"
full = pd.read_csv(dpath + 'abcd5.1_rser_nback_5-30-24.csv', low_memory=False)

In [23]:
all = full.merge(adv, on='subID', how='left')
print(all.shape)
all.head()

(5754, 594)


Unnamed: 0,Aware,NoAcpt,Implse,Goals,Threat,subID,ThreatSQ,ThreatCB,eventname_yr2,rsfmri_var_cdk_insulalh_yr2,rsfmri_var_cdk_insularh_yr2,rsfmri_var_cdk_rlaclatelh_yr2,rsfmri_var_cdk_rlaclaterh_yr2,rsfmri_var_cdk_entorhinallh_yr2,rsfmri_var_cdk_entorhinalrh_yr2,rsfmri_var_scs_amygdalalh_yr2,rsfmri_var_scs_amygdalarh_yr2,rsfmri_var_scs_hpuslh_yr2,rsfmri_var_scs_hpusrh_yr2,rsfmri_var_scs_putamenlh_yr2,rsfmri_var_scs_putamenrh_yr2,rsfmri_var_scs_tplh_yr2,rsfmri_var_scs_tprh_yr2,rsfmri_var_scs_ventraldclh_yr2,rsfmri_var_scs_ventraldcrh_yr2,rsfmri_var_scs_aalh_yr2,rsfmri_var_scs_aarh_yr2,mrirsfd121_yr2,mrirsfd48_yr2,mrirsfd122_yr2,mrirsfd49_yr2,mrirsfd123_yr2,mrirsfd47_yr2,sa_scs_aalh_yr2,sa_scs_aarh_yr2,sa_scs_aglh_yr2,sa_scs_agrh_yr2,sa_scs_bs_yr2,sa_scs_cdelh_yr2,sa_scs_cderh_yr2,sa_scs_crcxlh_yr2,sa_scs_crcxrh_yr2,sa_scs_hplh_yr2,sa_scs_hprh_yr2,sa_scs_pllh_yr2,sa_scs_plrh_yr2,sa_scs_ptlh_yr2,sa_scs_ptrh_yr2,sa_scs_thplh_yr2,sa_scs_thprh_yr2,sa_scs_vtdclh_yr2,sa_scs_vtdcrh_yr2,df_scs_aalh_yr2,df_scs_aarh_yr2,df_scs_aglh_yr2,df_scs_agrh_yr2,df_scs_bs_yr2,df_scs_cdelh_yr2,df_scs_cderh_yr2,df_scs_crcxlh_yr2,df_scs_crcxrh_yr2,df_scs_hplh_yr2,df_scs_hprh_yr2,df_scs_pllh_yr2,df_scs_plrh_yr2,df_scs_ptlh_yr2,df_scs_ptrh_yr2,df_scs_thplh_yr2,df_scs_thprh_yr2,df_scs_vtdclh_yr2,df_scs_vtdcrh_yr2,sa_ngd_ad_yr2,sa_ngd_cgc_yr2,SaCParT5,sa_ngd_dt_yr2,sa_ngd_dla_yr2,sa_ngd_fo_yr2,sa_ngd_n_yr2,sa_ngd_rspltp_yr2,sa_ngd_sa_yr2,sa_ngd_smh_yr2,sa_ngd_smm_yr2,sa_ngd_vta_yr2,sa_ngd_vs_yr2,ders_aware_clar_score_3_year_yr2,ders_aware_clar_score_4_year_yr2,ders_awareness_score_3_year_yr2,ders_awareness_score_4_year_yr2,ders_goals_score_3_year_yr2,ders_goals_score_4_year_yr2,ders_impulse_score_3_year_yr2,ders_impulse_score_4_year_yr2,ders_nonaccept_score_3_year_yr2,ders_nonaccept_score_4_year_yr2,ders_total_score_3_year_yr2,ders_total_score_4_year_yr2,ders_aware_delta_4yr3_yr2,ders_awareness_delta_4yr3_yr2,ders_goals_delta_4yr3_yr2,ders_impulse_delta_4yr3_yr2,...,tfabwdp_1163_yr1,tnbasemdp_1051_yr1,tnbasemdp_1125_yr1,tnbasemdp_1089_yr1,tnbasemdp_1163_yr1,tfabwdp_1049_yr1,tfabwdp_1123_yr1,tnbasemdp_1049_yr1,tnbasemdp_1123_yr1,tfncr1bwdp_1049_yr1,tfabwdp_1048_yr1,tfabwdp_1122_yr1,tnbasemdp_1048_yr1,tnbasemdp_1122_yr1,tfabwdp_1063_yr1,tfabwdp_1137_yr1,tnbasemdp_1063_yr1,tnbasemdp_1137_yr1,tfabwdp_1066_yr1,tfabwdp_1140_yr1,tnbasemdp_1066_yr1,tnbasemdp_1140_yr1,tfmri_nback_all_757_yr1,tfmri_nback_all_791_yr1,tfmrinbackallsem_757_yr1,tfmrinbackallsem_791_yr1,tfmri_nback_all_772_yr1,tfmri_nback_all_806_yr1,tfmrinbackallsem_772_yr1,tfmrinbackallsem_806_yr1,tfmri_nback_all_777_yr1,tfmri_nback_all_811_yr1,tfmrinbackallsem_777_yr1,tfmrinbackallsem_811_yr1,tfmri_nback_all_765_yr1,tfmri_nback_all_799_yr1,tfmrinbackallsem_765_yr1,tfmrinbackallsem_799_yr1,tfmri_nback_all_772.1_yr1,tfmri_nback_all_806.1_yr1,tfmrinbackallsem_772.1_yr1,tfmrinbackallsem_806.1_yr1,tfmri_nback_all_774_yr1,tfmri_nback_all_808_yr1,tfmrinbackallsem_774_yr1,tfmrinbackallsem_808_yr1,tfmri_nback_all_771_yr1,tfmri_nback_all_805_yr1,tfmrinbackallsem_771_yr1,tfmrinbackallsem_805_yr1,tfmri_nback_all_773_yr1,tfmri_nback_all_807_yr1,tfmrinbackallsem_773_yr1,tfmrinbackallsem_807_yr1,tfmri_nback_all_759_yr1,tfmri_nback_all_793_yr1,tfmrinbackallsem_759_yr1,tfmrinbackallsem_793_yr1,tfmri_nback_all_780_yr1,tfmri_nback_all_814_yr1,tfmrinbackallsem_780_yr1,tfmrinbackallsem_814_yr1,tfmri_nback_all_763_yr1,tfmri_nback_all_797_yr1,tfmrinbackallsem_763_yr1,tfmrinbackallsem_797_yr1,tfmri_nback_all_752_yr1,tfmri_nback_all_786_yr1,tfmrinbackallsem_752_yr1,tfmrinbackallsem_786_yr1,tfmri_nback_all_770_yr1,tfmri_nback_all_804_yr1,tfmrinbackallsem_770_yr1,tfmrinbackallsem_804_yr1,tfmri_nback_all_751_yr1,tfmri_nback_all_785_yr1,tfmrinbackallsem_751_yr1,tfmrinbackallsem_785_yr1,tfmri_nback_all_761_yr1,tfmri_nback_all_795_yr1,tfmrinbackallsem_761_yr1,tfmrinbackallsem_795_yr1,rsfmri_meanmotion_yr1,Mot1nb,imgincl_nback_include_yr1,MotionNB,Rank_AmygL1,Rank_AmygL5,AmygRnkDif,peduh1,peduh2,eventname_y,ADI1,ADI2,ADI3,ADI4,SocMob,neigh1,neigh2,neigh3
0,1.725059,-0.452354,0.865676,0.530334,1.431959,NDAR_INV00CY2MDM,2.050507,2.936242,2_year_follow_up_y_arm_1,0.016246,0.012573,0.033168,0.040843,0.111685,0.06582,0.052184,0.037029,0.021606,0.036169,0.01949,0.0173,0.009993,0.011925,0.01428,0.019273,0.085092,0.051332,0.044525,0.02433,0.027256,0.039896,0.047019,0.060342,0.087313,0.032769,0.106473,-0.089008,0.03911,-0.045963,0.054209,-0.026604,-0.132791,-0.070338,0.041616,0.1514,0.006462,0.014982,-0.005261,0.21925,0.105179,-0.158763,0.169363,-0.067979,-0.030135,0.124339,-0.079521,0.024488,-0.081768,-0.040118,-0.06764,0.104975,-0.005856,0.023446,0.159807,0.007011,-0.111618,-0.002608,-0.030167,0.033741,-0.040782,0.062618,0.008133,0.061756,0.146352,0.123696,-0.067697,0.102507,0.029304,-0.035043,0.425516,-0.163747,-0.096174,0.131247,-0.136485,-14.0,-14.0,-8.0,-8.0,9.0,11.0,10.0,8.0,6.0,6.0,24.0,18.0,0.0,0.0,2.0,-2.0,...,-0.639966,0.238058,0.22216,0.267043,0.271673,-0.146478,0.040509,0.328462,0.411548,-0.386941,0.205332,-0.155947,0.256054,0.257463,-0.048729,0.097312,0.224994,0.245847,-0.023024,0.199233,0.232755,0.233085,0.090037,-0.070745,0.311281,0.300462,0.074692,-0.320034,0.254277,0.253904,0.913415,0.006551,0.394382,0.357887,-0.095287,0.132616,0.286378,0.282792,0.074692,-0.320034,0.254277,0.253904,-0.04998,0.230001,0.217972,0.233449,-0.13531,-0.403987,0.304282,0.323711,0.113626,0.069849,0.228973,0.219881,0.590801,0.276157,0.323189,0.270815,0.210113,0.190031,0.246226,0.279899,0.149234,-0.080749,0.233854,0.255963,0.077317,0.522342,0.289736,0.287574,-0.026776,0.20842,0.243503,0.245121,0.019472,0.098374,0.38927,0.424779,-0.127707,-0.178655,0.405926,0.348835,0.578596,0.806511,1.0,0.429021,3493.0,3933.0,440.0,15.0,,baseline_year_1_arm_1,7.476038,2.196885,2.690397,6.062932,0.529742,4.0,4.0,3.0
1,-0.221019,0.420109,0.416459,0.123777,-0.723904,NDAR_INV00HEV6HB,0.524038,-0.379353,2_year_follow_up_y_arm_1,0.018285,0.025589,0.041287,0.037774,0.729836,0.787964,0.043472,0.031202,0.03518,0.049251,0.027205,0.028332,0.019482,0.018647,0.037337,0.019107,0.054783,0.052236,0.032757,0.0291,0.027761,0.030553,0.062657,0.060473,0.056604,-0.026797,-0.191681,0.062352,0.099586,0.101653,0.230095,0.07343,-0.086812,-0.035866,-0.119038,0.031807,0.021449,0.030165,0.139479,0.302171,0.137438,-0.01434,0.006388,0.100585,0.180374,0.007281,-0.074992,0.18329,-0.068998,-0.060061,-0.059577,0.064575,-0.077234,0.050871,0.174099,-0.035413,0.066688,-0.045511,-0.02758,0.065021,0.022472,0.135074,0.101773,0.198276,0.156116,0.052057,0.034849,0.084903,-0.016483,-0.166768,0.300588,0.059855,0.0115,0.035687,-0.136875,-28.0,-28.0,-16.0,-16.0,8.0,6.0,8.0,7.0,12.0,11.0,9.0,6.0,0.0,0.0,-2.0,-1.0,...,-0.325001,0.072787,0.068146,0.092503,0.09071,0.171951,-0.089597,0.119291,0.133531,0.101514,-0.140368,-0.277121,0.082154,0.078533,0.083654,-0.012812,0.076598,0.072192,-0.03105,-0.149281,0.075682,0.07481,0.011231,0.073377,0.116726,0.174309,-0.19775,-0.370314,0.080644,0.083423,0.37444,0.089637,0.48684,0.225633,-0.126928,-0.272664,0.095107,0.095332,-0.19775,-0.370314,0.080644,0.083423,0.076094,-0.027723,0.073465,0.073016,0.3629,0.111669,0.117499,0.132081,-0.06849,-0.164689,0.06202,0.056111,0.531179,0.034782,0.219668,0.211379,-0.00216,-0.055901,0.080387,0.082332,-0.186758,-0.194852,0.080386,0.088033,0.067513,-0.07065,0.103365,0.092303,-0.038981,-0.140603,0.076095,0.072963,-0.43306,0.208469,0.846002,0.648769,-0.264708,0.278124,0.240899,0.201366,0.076605,0.131465,1.0,0.142165,753.0,675.0,-78.0,13.0,999.0,baseline_year_1_arm_1,13.826366,5.202081,3.218391,11.656442,0.480171,4.0,3.0,2.0
2,0.370193,1.102345,1.319362,1.807149,-0.723904,NDAR_INV00U4FTRU,0.524038,-0.379353,2_year_follow_up_y_arm_1,0.011172,0.030374,0.046685,0.068397,0.426628,0.414262,0.070955,0.166956,0.043674,0.034466,0.021545,0.025325,0.01148,0.009867,0.040349,0.04598,0.06327,0.281109,0.043263,0.037661,0.044675,0.022888,0.040561,0.028706,0.150564,-0.088199,-0.072366,-0.015576,0.212299,0.042634,0.195005,-0.050605,-0.049594,-0.217196,-0.18028,-0.024335,0.051879,0.008202,0.059089,-0.020015,0.188981,-0.236868,0.056181,0.050782,0.014564,-0.047934,0.047245,0.062202,-0.072132,-0.047161,0.02086,0.060968,0.046827,-0.030833,-0.003948,0.013614,-0.061129,0.028396,-0.025314,0.025203,0.021824,0.149026,-0.064397,0.099622,0.136281,0.077524,-0.05409,0.07818,-0.017083,-0.037469,0.424569,-0.077055,-0.122745,0.000804,-0.048048,-23.0,,-13.0,,13.0,,11.0,,16.0,,37.0,,,,,,...,-0.125833,0.139054,0.125583,0.160196,0.132157,0.760721,0.189539,0.300498,0.256918,0.396469,0.57549,0.415694,0.19198,0.167198,0.948654,0.570694,0.194858,0.152383,0.511811,0.51824,0.162244,0.147902,0.147468,0.304078,0.157107,0.164493,0.020026,0.137693,0.133985,0.128683,0.064176,-0.88197,0.335343,0.297521,0.330807,0.430882,0.186417,0.178298,0.020026,0.137693,0.133985,0.128683,0.845489,0.540492,0.172292,0.130598,-0.403302,-0.16769,0.172008,0.177492,0.000327,-0.020603,0.077499,0.070299,0.073956,-0.108123,0.215903,0.174545,0.321047,0.094785,0.141031,0.13923,0.671606,0.453823,0.181783,0.154917,0.79664,0.875153,0.209854,0.166813,0.453289,0.379002,0.160783,0.125261,-0.09754,0.012615,0.445616,0.352809,0.945345,1.040824,0.316136,0.211015,0.126417,0.434741,1.0,0.573137,4179.0,2941.0,-1238.0,12.0,14.0,baseline_year_1_arm_1,11.137108,8.916967,7.007126,23.139654,0.378095,4.0,3.0,3.0
3,-1.216133,-0.470889,-0.647299,-0.971887,-0.723904,NDAR_INV00X2TBWJ,0.524038,-0.379353,2_year_follow_up_y_arm_1,0.01108,0.014387,0.081983,0.045247,0.023478,0.022162,0.020577,0.014855,0.0133,0.01376,0.019053,0.020115,0.004143,0.006586,0.011087,0.0146,0.054109,0.032244,0.041557,0.015856,0.015264,0.022382,0.026377,0.02941,0.079982,-0.029412,0.040365,-0.016699,0.028709,0.058492,0.280259,0.059566,-0.097127,-0.008427,-0.053497,-0.073438,0.032173,0.031564,0.013428,0.049665,-0.001658,-0.097509,-0.043193,0.097403,0.05017,-0.040949,-0.00961,0.025426,-0.004281,-0.095839,-0.011885,-0.040048,-0.041869,-0.063472,0.029441,-0.032766,0.057696,-0.006023,-0.009216,-0.053737,0.078955,0.080516,0.067029,0.137843,-0.105616,0.006192,-0.025185,0.240702,0.002911,-0.100735,0.47103,-0.021846,-0.05525,0.101311,-0.089669,-35.0,-32.0,-20.0,-18.0,4.0,6.0,4.0,4.0,7.0,7.0,-17.0,-12.0,3.0,2.0,2.0,0.0,...,-0.89744,0.098991,0.126854,0.141299,0.135217,-0.037499,-0.787055,0.139826,0.187856,0.141307,-0.386301,-0.429952,0.105538,0.103494,-0.408714,-0.486693,0.132893,0.121705,-0.360562,-0.238823,0.139768,0.111789,-0.516922,-0.533763,0.160838,0.152467,-0.480581,-0.64988,0.108899,0.121211,-0.394774,-1.587338,0.345746,0.470313,-0.255033,-0.490426,0.113139,0.134273,-0.480581,-0.64988,0.108899,0.121211,-0.493154,-0.518786,0.136172,0.130419,-0.834847,-0.35236,0.216031,0.150865,-0.429382,-0.404191,0.096432,0.091203,-0.942186,-1.187649,0.340199,0.248096,-0.388895,-0.401674,0.117637,0.124193,-0.520205,-0.541885,0.111972,0.113091,-0.58559,-0.551013,0.169378,0.148619,-0.356912,-0.308521,0.158139,0.118652,-0.713889,-0.476049,0.216351,0.167061,-0.370722,-0.577513,0.225167,0.220891,0.545235,0.156086,1.0,0.194004,1177.0,2150.0,973.0,18.0,,baseline_year_1_arm_1,4.360587,0.352596,3.588517,3.393117,0.597014,4.0,4.0,4.0
4,0.400028,0.066276,0.194382,0.01531,-0.336852,NDAR_INV01AJ15N9,0.113469,-0.038222,2_year_follow_up_y_arm_1,0.018617,0.009661,0.029251,0.034074,0.026937,0.090245,0.025568,0.03458,0.016596,0.015726,0.02323,0.020621,0.009004,0.007692,0.010249,0.011588,0.054394,0.051199,0.042267,0.013924,0.016578,0.037809,0.03833,0.032686,0.117688,-0.066759,0.073433,0.037799,0.101763,0.211117,0.222429,0.131275,-0.013833,-0.038126,0.131389,-0.229014,0.018154,0.154964,-0.041279,-0.044865,0.195221,-0.088648,0.136165,-0.00678,-0.043138,-0.152941,-0.009921,0.188304,-0.22755,-0.00368,-0.007878,0.026055,0.155536,0.226077,0.11131,0.05331,0.228924,-0.009878,-0.05764,0.050706,0.000559,0.143193,0.160046,0.158779,0.219587,0.155333,-0.070182,0.07583,0.060893,-0.091499,0.425814,0.054666,0.033991,0.171921,-0.243846,-24.0,,-12.0,,7.0,,7.0,,10.0,,9.0,,,,,,...,-0.139482,0.134652,0.131294,0.148082,0.157623,0.114444,0.208425,0.139649,0.17492,0.082792,0.010267,-0.037161,0.127463,0.116702,-0.215381,-0.107082,0.099933,0.09659,-0.046251,-0.192519,0.102199,0.096098,-0.350897,0.253652,0.166805,0.180175,0.020273,-0.081925,0.138652,0.135873,0.187516,-0.062915,0.73279,0.476567,0.347543,0.227518,0.149283,0.149035,0.020273,-0.081925,0.138652,0.135873,-0.116141,-0.073732,0.095271,0.103884,0.182574,0.013711,0.136692,0.130481,0.048976,0.027246,0.102596,0.096752,-0.188617,0.021673,0.197284,0.167536,0.105457,-0.065221,0.124996,0.108646,0.136371,0.052311,0.13247,0.11902,0.01419,0.108509,0.138506,0.139208,-0.000785,-0.091654,0.111035,0.100614,0.305342,0.032765,0.168797,0.24613,0.17975,0.038045,0.163321,0.174155,0.071862,0.231041,1.0,0.235283,790.0,4407.0,3617.0,18.0,18.0,baseline_year_1_arm_1,1.709402,1.948052,0.0,1.282051,0.628576,4.0,1.0,5.0


### Export

In [24]:
all.to_csv(outpath + 'abcd5.1_rser_nback_harsh_5-30-24.csv', index=False)