# Phenotypes preprocessing

In [None]:
# Set working directory
setwd("./WorkDir")

# Create holding directory for preprocessed datasets
if (!dir.exists("./CSI/Preprocessed")) {dir.create("./CSI/Preprocessed")}

# Load necessary libraries
suppressPackageStartupMessages(library(tidyverse))

### Cognitive Behavioral Kernel

In [None]:
# Load all data in

abcd_bpmt01 <- read_delim("./ABCD_Data/abcd_bpmt01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("bpmt_q"))) %>%
    mutate_at(., vars(contains("bpmt_q")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("bpmt_q")), mean, na.rm=T)

abcd_cbcl01 <- read_delim("./ABCD_Data/abcd_cbcl01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("cbcl_q"))) %>%
    mutate_at(., vars(contains("cbcl_q")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("cbcl_q")), mean, na.rm=T)

abcd_cbcls01 <- read_delim("./ABCD_Data/abcd_cbcls01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, ends_with("_r"))) %>%
    mutate_at(., vars(contains("_r")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_r")), mean, na.rm=T)

abcd_ssbpmtf01 <- read_delim("./ABCD_Data/abcd_ssbpmtf01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, ends_with("_r"))) %>%
    mutate_at(., vars(contains("_r")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_r")), mean, na.rm=T)

abcd_bpm01 <- read_delim("./ABCD_Data/abcd_bpm01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("bpm_"))) %>%
    mutate_at(., vars(contains("bpm_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("bpm_")), mean, na.rm=T)

abcd_yssbpm01 <- read_delim("./ABCD_Data/abcd_yssbpm01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, ends_with("_r"), ends_with("_sum"))) %>%
    mutate_at(., vars(c(contains("_r"), contains("_sum"))), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(c(contains("_r"))), mean, na.rm=T)

abcd_ywpss01 <- read_delim("./ABCD_Data/abcd_ywpss01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("wps_q"))) %>%
    mutate_at(., vars(c(contains("wps_q"))), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(c(contains("wps_q"))), mean, na.rm=T)

psb01 <- read_delim("./ABCD_Data/psb01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("prosocial_q"))) %>%
    mutate_at(., vars(c(contains("prosocial_q"))), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(c(contains("prosocial_q"))), mean, na.rm=T)

abcd_psb01 <- read_delim("./ABCD_Data/abcd_psb01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("prosocial_q"))) %>%
    mutate_at(., vars(c(contains("prosocial_q"))), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(c(contains("prosocial_q"))), mean, na.rm=T)

abcd_cb01 <- read_delim("./ABCD_Data/abcd_cb01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("cybb_phenx"))) %>%
    mutate_at(., vars(contains("cybb_phenx")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("cybb_phenx")), mean, na.rm=T)

cct01 <- read_delim("./ABCD_Data/cct01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, cash_choice_task)) %>%
    mutate_at(., vars(contains("cash_choice_task")), as.numeric) 

abcd_ps01 <- read_delim("./ABCD_Data/abcd_ps01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("pea_"))) %>% 
    select(-c(pea_assessment_status, pea_assessmentdate, pea_assessmentid)) %>%
    mutate_at(., vars(contains("pea_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("pea_")), mean, na.rm=T)

abcd_ehis01 <- read_delim("./ABCD_Data/abcd_ehis01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, ehi1b, ehi2b, ehi3b, ehi4b)) %>% 
    mutate_at(., vars(contains("ehi")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("ehi")), mean, na.rm=T)

abcd_tbss01 <- read_delim("./ABCD_Data/abcd_tbss01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, ends_with("_rawscore"))) %>% 
    mutate_at(., vars(contains("_rawscore")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_rawscore")), mean, na.rm=T)

abcd_ytbpai01 <- read_delim("./ABCD_Data/abcd_ytbpai01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("poa_"))) %>% 
    mutate_at(., vars(contains("poa_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("poa_")), mean, na.rm=T)

abcd_eatqp01 <- read_delim("./ABCD_Data/abcd_eatqp01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, ends_with("_p"))) %>%
    mutate_at(., vars(ends_with("_p")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_p")), mean, na.rm=T)

abcd_pbp01 <- read_delim("./ABCD_Data/abcd_pbp01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("pbp_"))) %>% 
    mutate_at(., vars(contains("pbp_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("pbp_")), mean, na.rm=T)

abcd_upps01 <- read_delim("./ABCD_Data/abcd_upps01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("upps"))) %>% 
    mutate_at(., vars(contains("upps")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("upps")), mean, na.rm=T)

pps01 <- read_delim("./ABCD_Data/pps01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    select(c(subjectkey, ends_with("_y"))) %>% 
    mutate_at(., vars(contains("_y")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_y")), mean, na.rm=T)

abcd_y7mi01 <- read_delim("./ABCD_Data/abcd_y7mi01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("sup_"))) %>% 
    mutate_at(., vars(contains("sup_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("sup_")), mean, na.rm=T)

abcd_gish2y01 <- read_delim("./ABCD_Data/abcd_gish2y01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("gish2_"))) %>% 
    mutate_at(., vars(contains("gish2_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("gish2_")), mean, na.rm=T)

abcd_sds01 <- read_delim("./ABCD_Data/abcd_sds01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("sleepdisturb"))) %>% 
    mutate_at(., vars(contains("sleepdisturb")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("sleepdisturb")), mean, na.rm=T)

abcd_ysr01 <- read_delim("./ABCD_Data/abcd_ysr01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>% 
    select(c(subjectkey, ends_with("_y"))) %>% 
    select(-c(starts_with("resiliency7"))) %>%
    mutate_at(., vars(contains("_y")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_y")), mean, na.rm=T)

abcd_gdss01 <- read_delim("./ABCD_Data/abcd_gdss01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(contains(c("gdt_scr_values_", "gdt_scr_parameters_")), subjectkey, gdt_scr_script_elapsedtime)) %>%
    mutate_at(., vars(contains("gdt_")), as.numeric)

lmtp201 <- read_delim("./ABCD_Data/lmtp201.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("lmt_scr_"))) %>%
    mutate_at(., vars(contains("lmt_scr_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("lmt_scr_")), mean, na.rm=T)
    
macv01 <- read_delim("./ABCD_Data/macv01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("mex_"))) %>% 
    mutate_at(., vars(contains("mex_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("mex_")), mean, na.rm=T)

abcd_meim01 <- read_delim("./ABCD_Data/abcd_meim01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("meim_"))) %>% 
    mutate_at(., vars(contains("meim_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("meim_")), mean, na.rm=T)

abcd_via01 <- read_delim("./ABCD_Data/abcd_via01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("vancouver_q"))) %>% 
    mutate_at(., vars(contains("vancouver_q")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("vancouver_q")), mean, na.rm=T)

abcd_peq01 <- read_delim("./ABCD_Data/abcd_peq01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("peq_"))) %>% 
    mutate_at(., vars(contains("peq_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("peq_")), mean, na.rm=T)

srpf01 <- read_delim("./ABCD_Data/srpf01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    select(c(subjectkey, starts_with("school_"))) %>% 
    mutate_at(., vars(contains("school_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("school_")), mean, na.rm=T)

abcd_siss01 <- read_delim("./ABCD_Data/abcd_siss01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("sit_"))) %>% 
    select(-c(ends_with("date"),ends_with("version"),ends_with("date"),ends_with("id"),ends_with("session"),ends_with("event"),ends_with("platform"),ends_with("time"))) %>%
    mutate_at(., vars(contains("sit_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("sit_")), mean, na.rm=T)
    
abcd_sscep01 <- read_delim("./ABCD_Data/abcd_sscep01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, contains("_p_"))) %>% 
    select(-c(ends_with("_nm"),ends_with("_nt"))) %>%
    mutate_at(., vars(contains("_p_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_p_")), mean, na.rm=T)

abcd_sscey01 <- read_delim("./ABCD_Data/abcd_sscey01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, contains("_y_"))) %>% 
    select(-c(ends_with("_nm"),ends_with("_nt"),ends_with("_na"))) %>%
    mutate_at(., vars(contains("_y_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_y_")), mean, na.rm=T)

abcd_ydmes01 <- read_delim("./ABCD_Data/abcd_ydmes01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dim_"))) %>% 
    mutate_at(., vars(contains("dim_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dim_")), mean, na.rm=T)

abcd_pssrs01 <- read_delim("./ABCD_Data/abcd_pssrs01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, ends_with("_p"))) %>% 
    mutate_at(., vars(contains("_p")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_p")), mean, na.rm=T)

abcd_ksad01 <- read_delim("./ABCD_Data/abcd_ksad01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "555","666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("ksads_"))) %>% 
    select(-c(starts_with("ksads_11_"), ksads_timestamp_p, ksads_duration_p, ksads_import_id_p)) %>% 
    mutate_at(., vars(contains("ksads_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("ksads_")), mean, na.rm=T)

abcd_ksad501 <- read_delim("./ABCD_Data/abcd_ksad501.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "555", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("ksads_"))) %>% 
    select(-c(starts_with("ksads_11_"), ksads_timestamp_t, ksads_duration_t, ksads_import_id_t)) %>% 
    mutate_at(., vars(contains("ksads_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("ksads_")), mean, na.rm=T)

abcd_mhy02 <- read_delim("./ABCD_Data/abcd_mhy02.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, contains("_y_"))) %>% 
    select(-c(ends_with("_nm"),ends_with("_nt"))) %>% 
    mutate_at(., vars(contains("_y_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_y_")), mean, na.rm=T)

abcd_mrinback02 <- read_delim("./ABCD_Data/abcd_mrinback02.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("tfmri_"))) %>% 
    select(-c(ends_with("_visitid"))) %>% 
    mutate_at(., vars(contains("tfmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("tfmri_")), mean, na.rm=T)

abcd_yddss01 <- read_delim("./ABCD_Data/abcd_yddss01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("ddis_scr_val"), starts_with("ddis_scr_expr_"))) %>% 
    select(-c(ddis_scr_val_testdelays)) %>%
    mutate_at(., vars(contains("ddis_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("ddis_")), mean, na.rm=T)

abcd_yest01 <- read_delim("./ABCD_Data/abcd_yest01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("strp_scr_acc_"))) %>% 
    mutate_at(., vars(contains("strp_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("strp_")), mean, na.rm=T)

abcd_bisbas01 <- read_delim("./ABCD_Data/abcd_bisbas01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("bisbas"))) %>% 
    mutate_at(., vars(contains("bisbas")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("bisbas")), mean, na.rm=T)

abcd_ygi01 <- read_delim("./ABCD_Data/abcd_ygi01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("gish_"))) %>% 
    mutate_at(., vars(contains("gish_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("gish_")), mean, na.rm=T)

abcd_macvsy01 <- read_delim("./ABCD_Data/abcd_macvsy01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("macv_"))) %>% 
    mutate_at(., vars(contains("macv_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("macv_")), mean, na.rm=T)

abcd_mcqc01 <- read_delim("./ABCD_Data/abcd_mcqc01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("mctq_"))) %>% 
    mutate_at(., vars(contains("mctq_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("mctq_")), mean, na.rm=T)

abcd_pgbi01 <- read_delim("./ABCD_Data/abcd_pgbi01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("gen_child"))) %>% 
    mutate_at(., vars(contains("gen_child")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("gen_child")), mean, na.rm=T)

crpbi01 <- read_delim("./ABCD_Data/crpbi01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1",        "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, ends_with("_y"))) %>%
    select(-crpbi_caregiver1_y) %>%
    mutate_at(., vars(contains("_y")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_y")), mean, na.rm=T)

abcd_abcls01 <- read_delim("./ABCD_Data/abcd_abcls01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1",        "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, ends_with("_r"))) %>%
    mutate_at(., vars(contains("abcl")), as.numeric) 

fes02 <- read_delim("./ABCD_Data/fes02.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("fam_"))) %>%
    mutate_at(., vars(contains("fam_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("fam_")), mean, na.rm=T)

pmq01 <- read_delim("./ABCD_Data/pmq01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("parent_"))) %>%
    mutate_at(., vars(contains("parent_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("parent_")), mean, na.rm=T)

abcd_fes01 <- read_delim("./ABCD_Data/abcd_fes01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("fes_"))) %>%
    mutate_at(., vars(contains("fes_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("fes_")), mean, na.rm=T)

abcd_ple01 <- read_delim("./ABCD_Data/abcd_ple01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("ple_"))) %>%
    mutate_at(., vars(contains("ple_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("ple_")), mean, na.rm=T)

abcd_mhp02 <- read_delim("./ABCD_Data/abcd_mhp02.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, contains("_p_"))) %>%
    select(-c(ends_with("_nt"), ends_with("_nm"))) %>%
    mutate_at(., vars(contains("_p_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_p_")), mean, na.rm=T)

abcd_crpf01 <- read_delim("./ABCD_Data/abcd_crpf01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("su_risk_"))) %>%
    mutate_at(., vars(contains("_p_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_p_")), mean, na.rm=T)

abcd_yle01 <- read_delim("./ABCD_Data/abcd_yle01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, ends_with("_y"))) %>%
    mutate_at(., vars(contains("_y")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_y")), mean, na.rm=T)

abcd_pnsc01 <- read_delim("./ABCD_Data/abcd_pnsc01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("neighborhood"))) %>%
    mutate_at(., vars(contains("neighborhood")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("neighborhood")), mean, na.rm=T)

abcd_ypdms01 <- read_delim("./ABCD_Data/abcd_ypdms01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, ends_with("_y"))) %>%
    select(-c(menstrualcycle1_y)) %>%
    mutate_at(., vars(contains("_y")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_y")), mean, na.rm=T)

abcd_yrb01 <- read_delim("./ABCD_Data/abcd_yrb01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("physical_activity"))) %>%
    mutate_at(., vars(contains("physical_activity")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("physical_activity")), mean, na.rm=T)

abcd_hsss01 <- read_delim("./ABCD_Data/abcd_hsss01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA", "666", "777", "888", "999"))[-1,] %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1",        "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, hormone_scr_dhea_mean, hormone_scr_hse_mean, hormone_scr_ert_mean)) %>%
    mutate_at(., vars(contains("_mean")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("_mean")), mean, na.rm=T)

In [None]:
# # Combine non-qc and qc frames
sk <- purrr::reduce(list(abcd_bpmt01$subjectkey, abcd_cbcl01$subjectkey, abcd_cbcls01$subjectkey,
                         abcd_ssbpmtf01$subjectkey, abcd_bpm01$subjectkey, abcd_yssbpm01$subjectkey,
                         abcd_ywpss01$subjectkey, psb01$subjectkey, abcd_psb01$subjectkey,
                         abcd_cb01$subjectkey, cct01$subjectkey, abcd_ps01$subjectkey, 
                         abcd_ehis01$subjectkey, abcd_tbss01$subjectkey, abcd_ytbpai01$subjectkey,
                         abcd_eatqp01$subjectkey, abcd_pbp01$subjectkey, abcd_upps01$subjectkey, 
                         pps01$subjectkey, abcd_y7mi01$subjectkey, abcd_gish2y01$subjectkey,
                         abcd_sds01$subjectkey, abcd_ysr01$subjectkey, abcd_gdss01$subjectkey,
                         lmtp201$subjectkey, macv01$subjectkey, abcd_meim01$subjectkey,
                         abcd_pxccp01$subjectkey, abcd_via01$subjectkey, abcd_peq01$subjectkey,
                         srpf01$subjectkey, abcd_siss01$subjectkey, abcd_sscep01$subjectkey,
                         abcd_sscey01$subjectkey, abcd_ydmes01$subjectkey, abcd_pssrs01$subjectkey,
                         abcd_ksad01$subjectkey, abcd_ksad501$subjectkey, abcd_mhy02$subjectkey,
                         abcd_mrinback02$subjectkey, abcd_yddss01$subjectkey, abcd_yest01$subjectkey,
                         abcd_bisbas01$subjectkey, abcd_ygi01$subjectkey, abcd_macvsy01$subjectkey,
                         abcd_mcqc01$subjectkey, abcd_pgbi01$subjectkey, crpbi01$subjectkey, 
                         abcd_abcls01$subjectkey, fes02$subjectkey, pmq01$subjectkey,
                         abcd_fes01$subjectkey, abcd_ple01$subjectkey, abcd_mhp02$subjectkey,
                         abcd_crpf01$subjectkey, abcd_yle01$subjectkey, abcd_pnsc01$subjectkey,
                         abcd_ypdms01$subjectkey, abcd_yrb01$subjectkey, abcd_hsss01$subjectkey), union)

Pheno <- data.frame(subjectkey = sk) %>%
    full_join(abcd_bpmt01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_bpmt01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_cbcl01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_cbcls01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ssbpmtf01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_bpm01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_yssbpm01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ywpss01, by=c("subjectkey"="subjectkey")) %>%
    full_join(psb01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_psb01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_cb01, by=c("subjectkey"="subjectkey")) %>%
    full_join(cct01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ps01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ehis01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_tbss01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ytbpai01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_eatqp01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_pbp01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_upps01, by=c("subjectkey"="subjectkey")) %>%
    full_join(pps01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_y7mi01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_gish2y01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_sds01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ysr01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_gdss01, by=c("subjectkey"="subjectkey")) %>%
    full_join(lmtp201, by=c("subjectkey"="subjectkey")) %>%
    full_join(macv01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_meim01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_pxccp01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_via01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_peq01, by=c("subjectkey"="subjectkey")) %>%
    full_join(srpf01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_siss01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_sscep01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_sscey01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ydmes01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_pssrs01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ksad01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ksad501, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_mhy02, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_mrinback02, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_yddss01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_yest01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_bisbas01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ygi01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_macvsy01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_mcqc01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_pgbi01, by=c("subjectkey"="subjectkey")) %>%
    full_join(crpbi01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_abcls01, by=c("subjectkey"="subjectkey")) %>%
    full_join(fes02, by=c("subjectkey"="subjectkey")) %>%
    full_join(pmq01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_fes01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ple01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_mhp02, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_crpf01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_yle01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_pnsc01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_ypdms01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_yrb01, by=c("subjectkey"="subjectkey")) %>%
    full_join(abcd_hsss01, by=c("subjectkey"="subjectkey"))

dim(Pheno)
sum(is.na(Pheno))

In [None]:
# Remove columns with high rates of NAs
Remove <- names(Pheno)[apply(Pheno, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)}) > 0.1]

Pheno <- Pheno %>%
    select(-c(all_of(Remove)))

dim(Pheno)
sum(is.na(Pheno))

In [None]:
# Mean impute rsfMRI
for(i in 2:ncol(Pheno)){
  Pheno[is.na(Pheno[,i]), i] <- mean(Pheno[,i], na.rm = TRUE)
}

sum(is.na(Pheno))

In [None]:
# Remove variables with range of 0
Range <- apply(Pheno[,-1], 2, function(x) {range(x)[2]-range(x)[1]})
Pheno <- Pheno %>% select(-names(Range[Range==0]))

dim(Pheno)
head(Pheno)

In [None]:
# Range-normalize data
Pheno[,-1] <- apply(Pheno[,-1], 2, function(x) {(x - min(x))/(max(x) - min(x))})
sum(is.na(Pheno))

dim(Pheno)
head(Pheno)

In [None]:
# Remove variables with low variances
Vars <- apply(Pheno[,-1], 2, function(x) {var(x)})
Pheno <- Pheno %>% select(-names(Vars[Vars<0.001]))

dim(Pheno)
head(Pheno)

In [None]:
# Save data
saveRDS(Pheno, file="./CSI/Preprocessed/kernel_Pheno.rds")