In [1]:
import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
import seaborn.objects as so

from functools import reduce
from itertools import combinations

from scipy import stats

# configure pandas
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

In [2]:
home = 0

In [3]:
if home:
    dpath = '/Users/cglab/projects/abcd/data/abcd5.1-rser/'
else:
    dpath = '/home/cglab/projects/abcd/data/abcd5.1-rser/'

### Load Lavaan Latent factor summary scores

In [4]:
lf = pd.read_csv(dpath + "lf_scores_thrt79_ders_all_sep_w1.csv")
lf.head()

Unnamed: 0,Aware,NoAcpt,Implse,Goals,Threat,Threat9,subID
0,-1.055112,-0.576328,-0.652295,-1.172302,-0.715173,-0.348836,NDAR_INV003RTV85
1,-0.250261,-0.527787,0.04779,-0.460943,0.622151,0.654022,NDAR_INV007W6H7B
2,,,,,-0.312589,-0.404786,NDAR_INV00BD7VDC
3,1.754136,-0.466417,0.896197,0.54688,1.420222,1.565469,NDAR_INV00CY2MDM
4,-0.203931,0.423931,0.437713,0.147407,-0.715173,-0.834722,NDAR_INV00HEV6HB


In [5]:
lf.shape

(7169, 7)

In [6]:
lf.isnull().sum()

Aware      1100
NoAcpt     1100
Implse     1100
Goals      1100
Threat        1
Threat9       1
subID         0
dtype: int64

### Compute the Quadratic and Cubic Threat
* using Lavaan latent factor summary scores

In [7]:
lf['ThreatSQ'] = lf['Threat']**2
lf['ThreatCB'] = lf['Threat']**3

In [8]:
lf.head(10)

Unnamed: 0,Aware,NoAcpt,Implse,Goals,Threat,Threat9,subID,ThreatSQ,ThreatCB
0,-1.055112,-0.576328,-0.652295,-1.172302,-0.715173,-0.348836,NDAR_INV003RTV85,0.511473,-0.365792
1,-0.250261,-0.527787,0.04779,-0.460943,0.622151,0.654022,NDAR_INV007W6H7B,0.387072,0.240817
2,,,,,-0.312589,-0.404786,NDAR_INV00BD7VDC,0.097712,-0.030544
3,1.754136,-0.466417,0.896197,0.54688,1.420222,1.565469,NDAR_INV00CY2MDM,2.017029,2.864629
4,-0.203931,0.423931,0.437713,0.147407,-0.715173,-0.834722,NDAR_INV00HEV6HB,0.511473,-0.365792
5,-0.017787,0.701461,1.630144,1.694885,-0.715173,-0.834722,NDAR_INV00J52GPG,0.511473,-0.365792
6,-0.92686,-0.033235,-0.396953,1.603505,-0.284744,-0.429204,NDAR_INV00R4TXET,0.081079,-0.023087
7,0.385712,1.127675,1.346087,1.831457,-0.715173,-0.59817,NDAR_INV00U4FTRU,0.511473,-0.365792
8,-0.873808,-0.463824,-0.592179,-0.540034,-0.312589,-0.404786,NDAR_INV00UMK5VC,0.097712,-0.030544
9,-1.203678,-0.476946,-0.631369,-0.941261,-0.715173,-0.59817,NDAR_INV00X2TBWJ,0.511473,-0.365792


### Load full RSER dataframe

In [9]:
rser = pd.read_csv(dpath + 'abcd5.1_tfmri_nback_insula_subc_net_ders_gses_sfam_thrt_demo_noscl_nopt_w1_qc.csv', low_memory=False)
rser.shape

(7173, 256)

In [10]:
rser.dtypes

subID                    object
AmygL1                  float64
AmygR1                  float64
tfmrinbackallsem_224    float64
tfmrinbackallsem_238    float64
                         ...   
SFconP8                 float64
SFconP9                 float64
pedu                    float64
pedu2                   float64
income                  float64
Length: 256, dtype: object

In [11]:
rser[['AmygL1', 'AmygR1']].isnull().sum()

AmygL1    0
AmygR1    0
dtype: int64

### Merge Latent factor df with RSER df

In [12]:
lf = lf.merge(rser, how="right", on="subID")
lf.shape

(7173, 264)

In [13]:
lf[['AmygL1', 'AmygR1']].isnull().sum()

AmygL1    0
AmygR1    0
dtype: int64

In [14]:
lf['AmygL1'].isnull().sum()

0

In [15]:
[c for c in lf.columns if 'Mot' in c]

['MotT1']

#### Export merged data 
* including full dataframe and latent factor summary scores

In [16]:
lf.to_csv(dpath + 'abcd5.1_tfmri_nback_insula_subc_net_ders_gses_sfam_thrt_demo_noscl_nopt_w1_qc_lfa_w1_full.csv', index=False)