In [1]:
import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
import seaborn.objects as so

from functools import reduce
from itertools import combinations

from scipy import stats

# configure pandas
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

# Input: 0 or 1

In [2]:
home = 0

In [3]:
fpath = '/home/cglab/data_pull/abcd/'

In [4]:
rs = pd.read_csv(fpath + 'abcd5.1_rtmri_mid_llosVn_lrwdVn_opfc_subc_net_hses.csv')
rs.shape

(18595, 52)

In [5]:
rs['eventname'].value_counts()

eventname
baseline_year_1_arm_1       9175
2_year_follow_up_y_arm_1    6744
4_year_follow_up_y_arm_1    2676
Name: count, dtype: int64

In [6]:
# edit commmented code if some variables are already ini wide format
# transform eventname to columns
# except not for ders and fcon and erq here because those  aren't from those waves, and just repeated at each wave, 
# so only leave themin the rs1 so theyre not duplicated in the wide df

# pull out year 4
rs4 = rs[rs['eventname']=='4_year_follow_up_y_arm_1'].copy()
# # exclude ders and erq columns from renaming because theyre already in wide format
# ders_erq = [c for c in rs4.columns if 'ders' in c or 'erq' in c]
## drop ders and erq columns from rs4 and rs2, so they don't duplicate when merged with rs1
# rs4.drop(columns=ders_erq, inplace=True)
yr4_cols = [c + '9' for c in rs4.columns if c!='subID']
rs4_cols = [c for c in rs4.columns if c!='subID']
rs4.rename(columns=dict(zip(rs4_cols, yr4_cols)), inplace=True)

# pull out year 2
rs2 = rs[rs['eventname']=='2_year_follow_up_y_arm_1'].copy()
# # drop ders and erq columns from rs4 and rs2, so they don't duplicate when merged with rs1
# rs2.drop(columns=ders_erq, inplace=True)
yr2_cols = [c + '5' for c in rs2.columns if c!='subID']
rs2_cols = [c for c in rs2.columns if c!='subID' or 'ders' in c or 'erq' in c]
rs2.rename(columns=dict(zip(rs2_cols, yr2_cols)), inplace=True)

# pull out year 1
rs1 = rs[rs['eventname']=='baseline_year_1_arm_1'].copy()
# # get vars already in wide format, then remerge after wide transform
# prewide = rs1[ders_erq + ['subID']].copy()
# # drop ders and erq columns from rs4 and rs2, so they don't duplicate when merged with rs1
# rs1.drop(columns=ders_erq, inplace=True)
yr1_cols = [c + '1' for c in rs1.columns if c!='subID']
rs1_cols = [c for c in rs1.columns if c!='subID' or 'ders' in c or 'erq' in c]
rs1.rename(columns=dict(zip(rs1_cols, yr1_cols)), inplace=True)

In [7]:
rs1.shape, rs2.shape, rs4.shape

((9175, 52), (6744, 52), (2676, 52))

In [8]:
rs1.head()

Unnamed: 0,NAL_lln1,NAR_lln1,AmygL_lln1,AmygR_lln1,HipcL_lln1,HipcR_lln1,ThalL_lln1,ThalR_lln1,subID,eventname1,NAL_lrn1,NAR_lrn1,AmygL_lrn1,AmygR_lrn1,HipcL_lrn1,HipcR_lrn1,ThalL_lrn1,ThalR_lrn1,LtOrFrL_lln1,LtOrFrR_lln1,MedOrFrL_lln1,MedOrFrR_lln1,rACCL_lln1,rACCR_lln1,cACCL_lln1,cACCR_lln1,cACCL_lrn1,cACCR_lrn1,LtOrFrL_lrn1,LtOrFrR_lrn1,MedOrFrL_lrn1,MedOrFrR_lrn1,rACCL_lrn1,rACCR_lrn1,aInslL_lln1,aInslR_lln1,aInslL_lrn1,aInslR_lrn1,famID1,age1,income1,pedu1,spedu1,gender1,race1,prpensity1,LowEdu11,SingPH11,UnempR11,tfmri_mid_all_meanmotion1,imgincl_mid_include1,scanID1
0,0.012925,0.05735,-0.103007,0.040944,-0.278594,-0.13652,-0.226935,-0.314408,NDAR_INV003RTV85,baseline_year_1_arm_1,-0.451426,-0.238997,-0.327643,-0.223388,-0.471753,-0.240092,-0.13832,-0.261594,-0.071183,-0.014068,0.184538,0.029904,0.053321,0.144795,-0.192958,0.058817,-0.089997,-0.015292,-0.320943,-0.315482,-0.250562,-0.804303,-0.311717,-0.233716,-0.224738,-0.105134,-0.274432,-0.184748,8781.0,131.0,8.0,13.0,13.0,2.0,1.0,466.092707,,,,0.158333,1.0,0.0
1,0.429318,0.272447,0.354449,0.593416,0.234241,0.289909,0.017087,0.140608,NDAR_INV005V6D2C,baseline_year_1_arm_1,0.142344,0.284221,0.240394,0.450072,0.131028,0.231011,0.04319,0.068502,0.441208,0.173635,0.620966,0.564494,0.540422,0.546842,0.132752,0.293214,-0.080159,0.038292,0.384632,0.078511,0.239707,0.132052,0.406179,0.246562,0.212062,0.198078,0.052116,-0.058588,10210.0,121.0,999.0,6.0,999.0,2.0,3.0,520.488325,4.861931,29.07916,9.991899,0.352267,1.0,1.0
2,-0.00737,-0.09953,-0.245714,-0.077979,-0.154156,-0.118741,-0.094763,-0.06721,NDAR_INV007W6H7B,baseline_year_1_arm_1,0.024064,-0.01888,-0.231686,-0.010857,-0.064591,-0.160605,-0.088715,-0.187258,-0.216364,-0.361254,0.003707,-0.123096,-0.181924,-0.007465,-0.092264,-0.091089,-0.186848,-0.159265,-0.259863,-0.227784,-0.275979,-0.273947,-0.375694,-0.29047,-0.060283,-0.222956,-0.010167,-0.118855,4722.0,126.0,10.0,19.0,18.0,1.0,1.0,479.185338,3.559711,0.0,6.254295,0.086835,1.0,2.0
3,-0.177538,-0.14045,-0.357152,-0.321334,-0.084301,-0.276284,0.178833,0.292628,NDAR_INV00BD7VDC,baseline_year_1_arm_1,0.077727,0.066981,-0.061251,-0.269633,-0.369456,-0.346828,0.100634,0.180677,-0.152761,-0.342322,-0.663691,-0.542897,-0.041563,-0.104568,0.163341,0.139756,-0.054063,0.015743,-0.508518,-0.4861,-1.044365,-0.496992,0.113961,-0.164476,0.185713,0.146171,-0.079012,0.005056,3810.0,112.0,10.0,20.0,20.0,1.0,1.0,414.643009,0.635838,5.863454,3.420132,0.19818,1.0,3.0
5,0.039119,0.241375,0.030802,0.023442,-0.082911,0.165994,-0.009655,0.060066,NDAR_INV00CY2MDM,baseline_year_1_arm_1,-0.116054,0.062262,0.010854,-0.256097,-0.196152,-0.171516,-0.160728,-0.136292,-0.104121,0.291206,-0.009244,0.004842,-0.146274,-0.127897,-0.068194,0.002004,-0.174257,-0.117822,-0.146977,0.138468,-0.175172,-0.130648,-0.110055,-0.238281,-0.078709,0.162027,-0.111633,0.055209,5355.0,130.0,6.0,15.0,,1.0,1.0,1433.061575,2.196885,15.349195,7.476038,0.311358,1.0,4.0


In [9]:
rs2.head()

Unnamed: 0,NAL_lln5,NAR_lln5,AmygL_lln5,AmygR_lln5,HipcL_lln5,HipcR_lln5,ThalL_lln5,ThalR_lln5,subID,eventname5,NAL_lrn5,NAR_lrn5,AmygL_lrn5,AmygR_lrn5,HipcL_lrn5,HipcR_lrn5,ThalL_lrn5,ThalR_lrn5,LtOrFrL_lln5,LtOrFrR_lln5,MedOrFrL_lln5,MedOrFrR_lln5,rACCL_lln5,rACCR_lln5,cACCL_lln5,cACCR_lln5,cACCL_lrn5,cACCR_lrn5,LtOrFrL_lrn5,LtOrFrR_lrn5,MedOrFrL_lrn5,MedOrFrR_lrn5,rACCL_lrn5,rACCR_lrn5,aInslL_lln5,aInslR_lln5,aInslL_lrn5,aInslR_lrn5,famID5,age5,income5,pedu5,spedu5,gender5,race5,prpensity5,LowEdu15,SingPH15,UnempR15,tfmri_mid_all_meanmotion5,imgincl_mid_include5,scanID5
4,-0.041046,0.128552,0.216366,0.112228,-0.043041,0.166472,0.040723,-0.028979,NDAR_INV00CY2MDM,2_year_follow_up_y_arm_1,-0.014989,0.080711,0.00332,-0.106822,-0.118686,0.027917,-0.054406,0.000477,0.063674,-0.142186,0.016718,-0.137259,-0.025205,-0.07705,-0.10041,-0.090537,-0.093462,-0.216592,-0.061349,-0.12422,-0.10713,-0.153044,-0.111854,-0.087833,-0.063335,-0.181323,0.036538,-0.184872,,152.0,,,,,,,,,,0.097978,1.0,4.0
6,0.130504,0.164412,0.109012,0.137536,-0.089797,-0.042029,0.220369,0.169347,NDAR_INV00HEV6HB,2_year_follow_up_y_arm_1,-0.110512,-0.028383,0.005431,0.097268,-0.145436,-0.042677,0.04044,0.103395,0.021982,0.222105,-0.092588,0.169521,0.084949,0.192166,0.231364,-0.021036,0.160026,-0.125222,-0.067091,0.242028,0.060932,0.158086,-0.078235,-0.174084,0.221486,0.149687,0.168036,0.190892,,149.0,,,,,,,,,,0.110694,1.0,5.0
11,-0.072135,-0.190022,-0.361708,-0.012052,-0.17395,0.013394,-0.036864,-0.034026,NDAR_INV00LJVZK2,2_year_follow_up_y_arm_1,-0.343731,-0.235853,-0.022589,0.011871,-0.118869,-0.076826,-0.213056,-0.218861,-0.175469,0.43569,-0.045662,0.947797,-0.078443,-0.069964,-0.056233,0.004976,-0.301826,-0.143505,-0.526539,0.0928,-0.379528,0.092997,-0.20155,-0.186608,-0.121184,-0.028175,-0.214657,-0.232714,,147.0,,,,,,,,,,0.204849,1.0,8.0
14,0.219421,0.801728,0.421013,0.866888,0.098551,0.002561,-0.182209,-0.172124,NDAR_INV00U4FTRU,2_year_follow_up_y_arm_1,0.399865,1.034477,-0.443434,-0.685112,0.084132,-0.249742,-0.29588,-0.218136,-0.44972,-0.453321,0.05322,0.165072,-0.05219,-0.037954,-0.046476,-0.425986,-0.196025,-0.408859,-0.318089,-0.636323,0.02256,0.070323,0.348853,0.299777,0.066275,-0.049966,0.00946,-0.237695,,157.0,,,,,,,,,,0.3558,1.0,9.0
17,-0.274599,-0.125548,-0.016797,-0.08719,-0.033557,-0.014413,-0.00336,0.023561,NDAR_INV00X2TBWJ,2_year_follow_up_y_arm_1,-0.146831,-0.132898,-0.060895,-0.152181,-0.101111,-0.08003,-0.078215,-0.05237,-0.280943,-0.169339,-0.657768,-0.339706,-0.518503,-0.425403,-0.115305,-0.143939,-0.150325,-0.162336,-0.249743,-0.454127,-0.684064,-0.696218,-0.351111,-0.390781,0.158315,0.174518,0.02593,0.035899,,154.0,,,,,,,,,,0.094421,1.0,11.0


In [10]:
rs4.head()

Unnamed: 0,NAL_lln9,NAR_lln9,AmygL_lln9,AmygR_lln9,HipcL_lln9,HipcR_lln9,ThalL_lln9,ThalR_lln9,subID,eventname9,NAL_lrn9,NAR_lrn9,AmygL_lrn9,AmygR_lrn9,HipcL_lrn9,HipcR_lrn9,ThalL_lrn9,ThalR_lrn9,LtOrFrL_lln9,LtOrFrR_lln9,MedOrFrL_lln9,MedOrFrR_lln9,rACCL_lln9,rACCR_lln9,cACCL_lln9,cACCR_lln9,cACCL_lrn9,cACCR_lrn9,LtOrFrL_lrn9,LtOrFrR_lrn9,MedOrFrL_lrn9,MedOrFrR_lrn9,rACCL_lrn9,rACCR_lrn9,aInslL_lln9,aInslR_lln9,aInslL_lrn9,aInslR_lrn9,famID9,age9,income9,pedu9,spedu9,gender9,race9,prpensity9,LowEdu19,SingPH19,UnempR19,tfmri_mid_all_meanmotion9,imgincl_mid_include9,scanID9
7,0.185573,0.128662,0.146837,-0.076531,0.006196,0.137188,0.034314,0.152569,NDAR_INV00HEV6HB,4_year_follow_up_y_arm_1,0.189403,0.346258,0.213459,-0.028761,-0.008785,-0.036502,0.105139,0.186804,0.113044,0.115295,-0.002188,-0.025202,0.001261,-0.045758,0.028672,-0.055376,0.21406,0.155984,0.013079,0.156397,-0.074356,0.028391,-0.005984,-0.02096,0.130777,0.045597,0.098856,0.154571,,173.0,,,,,,,,,,0.087345,1.0,5.0
34,-0.097921,-0.008908,-0.124846,-0.101133,-0.026522,-0.110166,0.060973,0.088844,NDAR_INV01NAYMZH,4_year_follow_up_y_arm_1,0.124234,0.146936,0.098033,-0.071369,-0.001779,-0.089358,0.07927,0.086719,-0.026296,0.093212,0.210692,0.250836,0.193433,0.201146,0.113836,0.089881,0.132551,0.105264,-0.153911,0.126159,-0.536655,0.090606,-0.208607,-0.190541,0.094144,0.22245,-0.020842,0.327337,,178.0,,,,,,,,,,0.068733,1.0,13.0
37,0.119785,0.123925,0.088467,0.171592,0.234969,0.342742,0.184154,0.203953,NDAR_INV01RGTWD2,4_year_follow_up_y_arm_1,0.133591,0.183109,0.063705,0.136146,0.28791,0.195753,0.267674,0.145543,0.404661,0.139367,0.597419,0.325023,0.34826,0.082023,0.310122,0.168058,0.041728,0.066321,0.282044,0.038521,0.430917,0.320504,0.237071,0.114275,0.002938,-0.074412,-0.027569,-0.013568,,159.0,,,,,,,,,,0.094121,1.0,15.0
43,-0.154839,0.074429,0.075283,-0.018227,-0.004099,-0.076125,0.023484,0.019636,NDAR_INV021403LF,4_year_follow_up_y_arm_1,-0.113555,0.09655,-0.238865,-0.188934,-0.123856,-0.06365,-0.143094,-0.019964,-0.034394,-0.01916,0.093368,-0.100612,-0.052713,0.00089,0.07268,0.083846,0.068055,0.041513,-0.141928,-0.066539,-0.073676,-0.236033,-0.186645,-0.152406,-0.121934,0.085384,-0.197932,0.225883,,158.0,,,,,,,,,,0.077158,1.0,16.0
48,0.291451,0.056456,0.097011,0.183933,0.139668,0.195592,0.216372,0.262028,NDAR_INV028D3ELL,4_year_follow_up_y_arm_1,0.152896,0.293681,0.076307,0.116971,0.258892,0.288185,0.132351,0.224785,0.291117,0.06589,0.136102,0.145652,0.042769,0.12623,0.13081,-0.027552,-0.032286,-0.130952,0.169242,0.012452,0.159794,0.068223,0.10882,0.134342,0.268101,0.206451,0.146924,0.118246,,157.0,,,,,,,,,,0.08328,1.0,17.0


In [11]:
[c for c in rs4.columns if 'Thal' in c]

['ThalL_lln9', 'ThalR_lln9', 'ThalL_lrn9', 'ThalR_lrn9']

In [12]:
[c for c in rs4.columns if 'subID' in c]

['subID']

In [13]:
# uncomment merge of prewide below if some vars were already wide
# clear rs
rs = None
# merge the year1 and year2 into WIDE format
rs = rs1.merge(rs2, on='subID', how='left')
print(rs.shape)
# merge the rs with year4 into WIDE format
rs = rs.merge(rs4, on='subID', how='left')
print(rs.shape)
# # merge the rs with prewide
# rs = rs.merge(prewide, on='subID', how='left')
print(rs.shape)
rs.head()

(9175, 103)
(9175, 154)
(9175, 154)


Unnamed: 0,NAL_lln1,NAR_lln1,AmygL_lln1,AmygR_lln1,HipcL_lln1,HipcR_lln1,ThalL_lln1,ThalR_lln1,subID,eventname1,NAL_lrn1,NAR_lrn1,AmygL_lrn1,AmygR_lrn1,HipcL_lrn1,HipcR_lrn1,ThalL_lrn1,ThalR_lrn1,LtOrFrL_lln1,LtOrFrR_lln1,MedOrFrL_lln1,MedOrFrR_lln1,rACCL_lln1,rACCR_lln1,cACCL_lln1,cACCR_lln1,cACCL_lrn1,cACCR_lrn1,LtOrFrL_lrn1,LtOrFrR_lrn1,MedOrFrL_lrn1,MedOrFrR_lrn1,rACCL_lrn1,rACCR_lrn1,aInslL_lln1,aInslR_lln1,aInslL_lrn1,aInslR_lrn1,famID1,age1,income1,pedu1,spedu1,gender1,race1,prpensity1,LowEdu11,SingPH11,UnempR11,tfmri_mid_all_meanmotion1,imgincl_mid_include1,scanID1,NAL_lln5,NAR_lln5,AmygL_lln5,AmygR_lln5,HipcL_lln5,HipcR_lln5,ThalL_lln5,ThalR_lln5,eventname5,NAL_lrn5,NAR_lrn5,AmygL_lrn5,AmygR_lrn5,HipcL_lrn5,HipcR_lrn5,ThalL_lrn5,ThalR_lrn5,LtOrFrL_lln5,LtOrFrR_lln5,MedOrFrL_lln5,MedOrFrR_lln5,rACCL_lln5,rACCR_lln5,cACCL_lln5,cACCR_lln5,cACCL_lrn5,cACCR_lrn5,LtOrFrL_lrn5,LtOrFrR_lrn5,MedOrFrL_lrn5,MedOrFrR_lrn5,rACCL_lrn5,rACCR_lrn5,aInslL_lln5,aInslR_lln5,aInslL_lrn5,aInslR_lrn5,famID5,age5,income5,pedu5,spedu5,gender5,race5,prpensity5,LowEdu15,SingPH15,UnempR15,tfmri_mid_all_meanmotion5,imgincl_mid_include5,scanID5,NAL_lln9,NAR_lln9,AmygL_lln9,AmygR_lln9,HipcL_lln9,HipcR_lln9,ThalL_lln9,ThalR_lln9,eventname9,NAL_lrn9,NAR_lrn9,AmygL_lrn9,AmygR_lrn9,HipcL_lrn9,HipcR_lrn9,ThalL_lrn9,ThalR_lrn9,LtOrFrL_lln9,LtOrFrR_lln9,MedOrFrL_lln9,MedOrFrR_lln9,rACCL_lln9,rACCR_lln9,cACCL_lln9,cACCR_lln9,cACCL_lrn9,cACCR_lrn9,LtOrFrL_lrn9,LtOrFrR_lrn9,MedOrFrL_lrn9,MedOrFrR_lrn9,rACCL_lrn9,rACCR_lrn9,aInslL_lln9,aInslR_lln9,aInslL_lrn9,aInslR_lrn9,famID9,age9,income9,pedu9,spedu9,gender9,race9,prpensity9,LowEdu19,SingPH19,UnempR19,tfmri_mid_all_meanmotion9,imgincl_mid_include9,scanID9
0,0.012925,0.05735,-0.103007,0.040944,-0.278594,-0.13652,-0.226935,-0.314408,NDAR_INV003RTV85,baseline_year_1_arm_1,-0.451426,-0.238997,-0.327643,-0.223388,-0.471753,-0.240092,-0.13832,-0.261594,-0.071183,-0.014068,0.184538,0.029904,0.053321,0.144795,-0.192958,0.058817,-0.089997,-0.015292,-0.320943,-0.315482,-0.250562,-0.804303,-0.311717,-0.233716,-0.224738,-0.105134,-0.274432,-0.184748,8781.0,131.0,8.0,13.0,13.0,2.0,1.0,466.092707,,,,0.158333,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0.429318,0.272447,0.354449,0.593416,0.234241,0.289909,0.017087,0.140608,NDAR_INV005V6D2C,baseline_year_1_arm_1,0.142344,0.284221,0.240394,0.450072,0.131028,0.231011,0.04319,0.068502,0.441208,0.173635,0.620966,0.564494,0.540422,0.546842,0.132752,0.293214,-0.080159,0.038292,0.384632,0.078511,0.239707,0.132052,0.406179,0.246562,0.212062,0.198078,0.052116,-0.058588,10210.0,121.0,999.0,6.0,999.0,2.0,3.0,520.488325,4.861931,29.07916,9.991899,0.352267,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,-0.00737,-0.09953,-0.245714,-0.077979,-0.154156,-0.118741,-0.094763,-0.06721,NDAR_INV007W6H7B,baseline_year_1_arm_1,0.024064,-0.01888,-0.231686,-0.010857,-0.064591,-0.160605,-0.088715,-0.187258,-0.216364,-0.361254,0.003707,-0.123096,-0.181924,-0.007465,-0.092264,-0.091089,-0.186848,-0.159265,-0.259863,-0.227784,-0.275979,-0.273947,-0.375694,-0.29047,-0.060283,-0.222956,-0.010167,-0.118855,4722.0,126.0,10.0,19.0,18.0,1.0,1.0,479.185338,3.559711,0.0,6.254295,0.086835,1.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,-0.177538,-0.14045,-0.357152,-0.321334,-0.084301,-0.276284,0.178833,0.292628,NDAR_INV00BD7VDC,baseline_year_1_arm_1,0.077727,0.066981,-0.061251,-0.269633,-0.369456,-0.346828,0.100634,0.180677,-0.152761,-0.342322,-0.663691,-0.542897,-0.041563,-0.104568,0.163341,0.139756,-0.054063,0.015743,-0.508518,-0.4861,-1.044365,-0.496992,0.113961,-0.164476,0.185713,0.146171,-0.079012,0.005056,3810.0,112.0,10.0,20.0,20.0,1.0,1.0,414.643009,0.635838,5.863454,3.420132,0.19818,1.0,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0.039119,0.241375,0.030802,0.023442,-0.082911,0.165994,-0.009655,0.060066,NDAR_INV00CY2MDM,baseline_year_1_arm_1,-0.116054,0.062262,0.010854,-0.256097,-0.196152,-0.171516,-0.160728,-0.136292,-0.104121,0.291206,-0.009244,0.004842,-0.146274,-0.127897,-0.068194,0.002004,-0.174257,-0.117822,-0.146977,0.138468,-0.175172,-0.130648,-0.110055,-0.238281,-0.078709,0.162027,-0.111633,0.055209,5355.0,130.0,6.0,15.0,,1.0,1.0,1433.061575,2.196885,15.349195,7.476038,0.311358,1.0,4.0,-0.041046,0.128552,0.216366,0.112228,-0.043041,0.166472,0.040723,-0.028979,2_year_follow_up_y_arm_1,-0.014989,0.080711,0.00332,-0.106822,-0.118686,0.027917,-0.054406,0.000477,0.063674,-0.142186,0.016718,-0.137259,-0.025205,-0.07705,-0.10041,-0.090537,-0.093462,-0.216592,-0.061349,-0.12422,-0.10713,-0.153044,-0.111854,-0.087833,-0.063335,-0.181323,0.036538,-0.184872,,152.0,,,,,,,,,,0.097978,1.0,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### Convert Gender dummy and relative family id to integer
* for model ingestion
* Drop nonbinary gender
* for now

In [14]:
rs['gender1'].value_counts()

gender1
1.0      4669
2.0      4487
999.0       5
6.0         4
4.0         4
3.0         2
777.0       2
5.0         1
Name: count, dtype: int64

In [15]:
rs['gender1'] =  rs['gender1'].astype(float)
# rs.dropna(subset='rel_family_id_yr1', inplace=True)
rs['famID1'] =  rs['famID1'].astype(float)
# rs['gender'].dtype, rs['famID'].dtype
# drop non-binary genders for current analysis
rs = rs[(rs['gender1']==1) | (rs['gender1']==2)]

In [16]:
rs['gender1'].value_counts()

gender1
1.0    4669
2.0    4487
Name: count, dtype: int64

### Drop any column that is all NaN
* mostly later waves for demo variables that are only at baseline e.g., famID

In [23]:
cols_all_nan = rs.columns[rs.isnull().all()]
print(cols_all_nan)
rs.drop(columns=cols_all_nan, inplace=True)

Index(['famID5', 'income5', 'pedu5', 'spedu5', 'gender5', 'race5', 'prpensity5', 'LowEdu15', 'SingPH15', 'UnempR15', 'famID9', 'income9', 'pedu9', 'spedu9', 'gender9', 'race9', 'prpensity9', 'LowEdu19', 'SingPH19', 'UnempR19'], dtype='object')


In [24]:
rs.shape

(9156, 134)

### Save WIDE dataframe

In [25]:
rs.to_csv(fpath + 'abcd5.1_rtmri_mid_llosVn_lrwdVn_opfc_subc_net_hses_wide.csv', index=False)

# END