In [1]:
import os
import numpy as np
import pandas as pd
idx = pd.IndexSlice

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

from scipy import stats
SEED = 69

ABCD_PATH = '../../ABCD'
IDENTIFIERS = ['src_subject_id', 'eventname']
EVENTS = ['baseline_year_1_arm_1', '2_year_follow_up_y_arm_1']

FCON_TEMPLATE = 'rsfmri_c_ngd_{0}_ngd_{1}'
FCON = {
    'ad': 'auditory',
    'cgc': 'cingulo-opercular',
    'ca': 'cingulo-parietal',
    'dt': 'default',
    'dla': 'dorsal attention',
    'fo': 'fronto-parietal',
    'n': None,
    'rspltp': 'retrosplenial temporal',
    'smh': 'sensorimotor hand',
    'smm': 'sensorimotor mouth',
    'sa': 'salience',
    'vta': 'ventral attention',
    'vs': 'visual'
}

SCON_TEMPLATE = 'dmri_dtifa_fiberat_{0}'

In [2]:
betnet = pd.read_csv(os.path.join(ABCD_PATH, 'abcd_betnet02.tsv'), sep='\t',
                     skiprows=[1], index_col=IDENTIFIERS)
dti = pd.read_csv(os.path.join(ABCD_PATH, 'abcd_dti_p101.tsv'), sep='\t',
                  skiprows=[1], index_col=IDENTIFIERS)

inclusion = pd.read_csv(os.path.join(ABCD_PATH, 'abcd_imgincl01.tsv'), sep='\t',
                        skiprows=[1], index_col=IDENTIFIERS)
inclusion.dropna(subset=['visit'], inplace=True)
inclusion = inclusion.loc[~inclusion.index.duplicated(keep='last')]

covariates = pd.read_csv('output/abcd_covariates.csv', index_col=IDENTIFIERS)

In [3]:
include = inclusion.loc[(inclusion['imgincl_rsfmri_include'] == 1) &
                        (inclusion['imgincl_dmri_include'] == 1)]
subs_long = [sub for sub, df in include.groupby(level='src_subject_id') if len(df) == 2]

In [4]:
fcon_codes = [n for n, name in FCON.items() if name is not None]
fcon_columns = []
for i in range(len(fcon_codes)):
    for j in range(i+1):
        fcon_columns.append(FCON_TEMPLATE.format(fcon_codes[i], fcon_codes[j]))

fcon = betnet.loc[idx[subs_long, EVENTS], fcon_columns]

In [5]:
scon_column_filter = dti.columns.str.startswith(SCON_TEMPLATE.format(''))

scon = dti.loc[idx[subs_long, EVENTS], scon_column_filter]

# Preprocess

In [6]:
deltaFC = (fcon.groupby(level='src_subject_id')
           .diff().dropna().droplevel('eventname'))

age = covariates.loc[idx[deltaFC.index, EVENTS], 'interview_age'] / 12
age_diff = (age.groupby(level='src_subject_id')
            .diff().dropna().droplevel('eventname'))

dFC = deltaFC.divide(age_diff, axis=0)

In [7]:
SC = scon.loc[idx[dFC.index, EVENTS[0]], :].droplevel('eventname')

In [8]:
family = covariates.loc[idx[SC.index, EVENTS[0]], 'rel_family_id']

## Split test sample

In [9]:
from sklearn.model_selection import GroupShuffleSplit

gss = GroupShuffleSplit(n_splits=1, test_size=.2, random_state=SEED)
train, test = next(gss.split(SC, dFC, family))

## Regress out confounds

In [None]:
fcon_confounds = (covariates.loc[idx[dFC.index, EVENTS], ['sex', 'race.6level', 'site_id']]
                  .join(betnet['rsfmri_c_ngd_meanmotion'].rename('meanmotion')))
scon_confounds = (covariates.loc[idx[SC.index, EVENTS], ['sex', 'race.6level', 'site_id']]
                  .join(dti['dmri_dti_meanmotion'].rename('meanmotion')))