In [1]:
import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

import seaborn as sns
import seaborn.objects as so

from functools import reduce
from itertools import combinations

from scipy import stats

# configure pandas
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

In [2]:
home = 0

In [3]:
if home:
    dpath = '/Users/cglab/projects/abcd/data/abcd5.1-rser/'
else:
    dpath = '/home/cglab/projects/abcd/data/abcd5.1-rser/'

### Load latent factor scores for threat

In [None]:
lf = pd.read(dpath + 'lf_scores_thrt_ders_all_sep.csv')
lf.head()

### Compute the Quadratic and Cubic deprivation scores
* using Lavaan latent factor summary scores

In [6]:
lf['NBDepSQ'] = lf['NBDeprv']**2
lf['NBDepCB'] = lf['NBDeprv']**3

lf['HSESSQ'] = lf['HSES']**2
lf['HSESCB'] = lf['HSES']**3

In [7]:
lf.head(10)

Unnamed: 0,NBDeprv,HSES,subID,NBDepSQ,NBDepCB,HSESSQ,HSESCB
0,-0.231087,-0.418412,NDAR_INV00CY2MDM,0.053401,-0.01234,0.175069,-0.073251
1,-0.989108,0.041865,NDAR_INV00HEV6HB,0.978335,-0.967679,0.001753,7.3e-05
2,-0.704474,1.148026,NDAR_INV00U4FTRU,0.496284,-0.349619,1.317964,1.513057
3,0.053547,-0.636576,NDAR_INV00X2TBWJ,0.002867,0.000154,0.405229,-0.257959
4,-1.081979,-0.816542,NDAR_INV01AJ15N9,1.17068,-1.266651,0.66674,-0.544421
5,-0.940071,1.936205,NDAR_INV01D03VR7,0.883733,-0.830772,3.748888,7.258614
6,1.047165,0.867887,NDAR_INV01ELX9L6,1.096555,1.148274,0.753228,0.653716
7,,,NDAR_INV01EN91PG,,,,
8,0.762531,-0.459709,NDAR_INV01NAYMZH,0.581453,0.443376,0.211332,-0.097151
9,0.289144,0.923637,NDAR_INV01RGTWD2,0.083604,0.024174,0.853106,0.787961


### Load full RSER dataframe

In [8]:
outpath = "/home/cglab/projects/abcd/rser/"
rser = pd.read_csv(dpath + 'abcd5.1_rser_nback_5-30-24.csv', low_memory=False)
rser.shape

(5754, 583)

In [9]:
rser.dtypes

Aware                        float64
NoAcpt                       float64
Implse                       float64
Goals                        float64
Threat                       float64
                              ...   
imgincl_nback_include_yr1    float64
MotionNB                     float64
Rank_AmygL1                  float64
Rank_AmygL5                  float64
AmygRnkDif                   float64
Length: 583, dtype: object

In [10]:
rser[['AmygL1', 'AmygR1', 'AmygL5', 'AmygR5']].isnull().sum()

AmygL1    1590
AmygR1    1590
AmygL5    1590
AmygR5    1590
dtype: int64

In [12]:
[m for m in rser.columns if 'mot' in m.lower()]

['MotT2',
 'MotT1',
 'ders_emotion_overwhelm_p',
 'ders_upset_emotion_overwhelm_p',
 'Sders_emotion_overwhelm_p',
 'Sders_upset_emotion_overwhelm_p',
 'Motion',
 'rsfmri_meanmotion_yr2',
 'Mot5nb',
 'rsfmri_meanmotion_yr1',
 'Mot1nb',
 'MotionNB']

### Merge Latent factor df with RSER df

In [13]:
lf = lf.merge(rser, how="right", on="subID")
lf.shape

(5754, 589)

In [14]:
lf[['AmygL1', 'AmygR1', 'AmygL5', 'AmygR5']].isnull().sum()

AmygL1    1590
AmygR1    1590
AmygL5    1590
AmygR5    1590
dtype: int64

In [15]:
[m for m in lf.columns if 'mot' in m.lower()]

['MotT2',
 'MotT1',
 'ders_emotion_overwhelm_p',
 'ders_upset_emotion_overwhelm_p',
 'Sders_emotion_overwhelm_p',
 'Sders_upset_emotion_overwhelm_p',
 'Motion',
 'rsfmri_meanmotion_yr2',
 'Mot5nb',
 'rsfmri_meanmotion_yr1',
 'Mot1nb',
 'MotionNB']

### Compute Rank Level Differences
* of Salience to Cingulo-Par connnectivity at two timepoints

In [16]:
# Rank the connectivity measures
lf['Rank_AmygL1'] = lf['AmygL1'].rank()
lf['Rank_AmygL5'] = lf['AmygL5'].rank()

# Compute the rank difference
lf['AmygRnkDif'] = lf['Rank_AmygL5'] - lf['Rank_AmygL1'].values

In [17]:
lf['AmygRnkDif'].head(20)

0      379.0
1      -60.0
2    -1157.0
3      916.0
4     3346.0
5        NaN
6        NaN
7    -1472.0
8    -1538.0
9     1302.0
10   -1317.0
11      74.0
12     394.0
13    -377.0
14      24.0
15    -702.0
16   -3245.0
17     312.0
18     277.0
19       NaN
Name: AmygRnkDif, dtype: float64

In [18]:
lf['AmygL1'].isnull().sum()

1590

In [19]:
[c for c in lf.columns if 'Mot' in c]

['MotT2', 'MotT1', 'Motion', 'Mot5nb', 'Mot1nb', 'MotionNB']

In [20]:
[c for c in lf.columns if 'Sal' in c]

['Rank_SalCPar1', 'Rank_SalCPar2']

In [21]:
# rename columns
rs_redict = {'sa_ngd_vta_yr1': 'SalVTA1', 'sa_ngd_vta_yr2': 'SalVTA5'}#, 'MotT2': 'MotT5', 'MotT1': 'Mot1nb', 'MotT5': 'Mot5nb', 'Motion': 'MotionNB'}
lf.rename(columns=rs_redict, inplace=True)

#### Export merged data 
* including full dataframe and latent factor summary scores

In [22]:
lf.to_csv(dpath + 'abcd5.1_tfmri_nback_insula_subc_net_ders_gses_sfam_thrt_demo_noscl_nopt_wide_qc_lfa_harsh.csv', index=False)

In [23]:
lf.to_csv(outpath + 'abcd5.1_tfmri_nback_insula_subc_net_ders_gses_sfam_thrt_demo_noscl_nopt_wide_qc_lfa_harsh.csv', index=False)