# rsfMRI Kernel Preprocessing

In [None]:
# Set working directory
setwd("./WorkDir")

# Create holding directory for preprocessed datasets
if (!dir.exists("./CSI/Preprocessed")) {dir.create("./CSI/Preprocessed")}

# Load necessary libraries
suppressPackageStartupMessages(library(tidyverse))

### MRI QC Raw: _mriqcrp_

In [None]:
# Import partial mriqcrp data
mriqcrp10301 <- read_delim("./ABCD_Data/mriqcrp10301.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA"))[-1,]

mriqcrp20301 <- read_delim("./ABCD_Data/mriqcrp20301.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA"))[-1,]

mriqcrp302 <- read_delim("./ABCD_Data/mriqcrp302.txt", 
                         delim = "\t", 
                         escape_double = FALSE, 
                         col_types = "c", 
                         trim_ws = TRUE, 
                         na = c("", "NA"))[-1,]

In [None]:
# Load partial mriqcrp data and join

mriqcrp10301 <- mriqcrp10301 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("iqc")))

mriqcrp20301 <- mriqcrp20301 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("iqc")))

mriqcrp302 <- mriqcrp302 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("iqc")))

mriqcrp <- mriqcrp10301 %>%
    full_join(mriqcrp20301, by=c("subjectkey"="subjectkey")) %>%
    full_join(mriqcrp302, by=c("subjectkey"="subjectkey"))

In [None]:
# Create a rsfMRI_mriqcrp subset
rsfMRI_mriqcrp <- mriqcrp %>%
    select(c(subjectkey, starts_with("iqc_rsfMRI_"))) %>%
    select(-c(ends_with("sub_02"), ends_with("seriestime"), ends_with("studydate"), ends_with("seuid"), ends_with("suid"))) %>%
    select(-matches("iqc_rsfMRI_\\d_.{0,}"))%>%
    mutate_at(., vars(contains("iqc_rsfMRI")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("iqc_rsfMRI")), mean, na.rm=T)

dim(rsfMRI_mriqcrp)
head(rsfMRI_mriqcrp)

In [None]:
# Save rsfMRI_mriqcrp, remove intermediate files
saveRDS(rsfMRI_mriqcrp, file="./CSI/Preprocessed/rsfMRI_mriqcrp.rds")
rm(mriqcrp, mriqcrp10301, mriqcrp20301, mriqcrp302)

### Automated Post-Processing QC Metrics: _postqc_

In [None]:
# Import partial postqc data
postqc01 <- read_delim("./ABCD_Data/abcd_auto_postqc01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA"))[-1,]

In [None]:
# Create a rsfMRI_postqc subset
rsfMRI_postqc <- postqc01 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("apqc_fmri_"))) %>%
    mutate_at(., vars(starts_with("apqc_fmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(starts_with("apqc_fmri_")), mean, na.rm=T)

dim(rsfMRI_postqc)
head(rsfMRI_postqc)

In [None]:
# Save rsfMRI_postqc, remove intermediate files
saveRDS(rsfMRI_postqc, file="./CSI/Preprocessed/rsfMRI_postqc.rds")
rm(postqc01)

### ABCD rsfMRI Destrieux: _mrirsfd_

In [None]:
# Import partial mrirsfd data
abcd_mrirsfd01 <- read_delim("./ABCD_Data/abcd_mrirsfd01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA"))[-1,]

In [None]:
# Create a mrirsfd subset
mrirsfd <- abcd_mrirsfd01 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("mrirsfd"))) %>%
    mutate_at(., vars(starts_with("mrirsfd")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(starts_with("mrirsfd")), mean, na.rm=T)

dim(mrirsfd)
head(mrirsfd)

In [None]:
# Save mrirsfd, remove intermediate files
saveRDS(mrirsfd, file="./CSI/Preprocessed/mrirsfd.rds")
rm(abcd_mrirsfd01)

### ABCD rsfMRI Destrieux: _betnet_

In [None]:
# Import partial betnet data
abcd_betnet02 <- read_delim("./ABCD_Data/abcd_betnet02.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA"))[-1,]

In [None]:
# Create a betnet subset
betnet <- abcd_betnet02 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("rsfmri_c_"))) %>%
    select(-rsfmri_c_ngd_visitid) %>%
    mutate_at(., vars(starts_with("rsfmri_c_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(starts_with("rsfmri_c_")), mean, na.rm=T)

dim(betnet)
head(betnet)

In [None]:
# Save betnet, remove intermediate files
saveRDS(betnet, file="./CSI/Preprocessed/betnet.rds")
rm(abcd_betnet02)

### ABCD rsfMRI Network to Subcortical ROI Correlations: _mrirscor_

In [None]:
# Import partial mrirscor data
mrirscor02 <- read_delim("./ABCD_Data/mrirscor02.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA"))[-1,]

In [None]:
# Create a mrirscor subset
mrirscor <- mrirscor02 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("rsfmri_cor_"))) %>%
    select(-rsfmri_cor_ngd_scs_visitid) %>%
    mutate_at(., vars(starts_with("rsfmri_cor_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(starts_with("rsfmri_cor_")), mean, na.rm=T)

dim(mrirscor)
head(mrirscor)

In [None]:
# Save mrirscor, remove intermediate files
saveRDS(mrirscor, file="./CSI/Preprocessed/mrirscor.rds")
rm(mrirscor02)

### ABCD rsfMRI Temporal Variance: _mrirstv_

In [None]:
# Import partial mrirstv data
abcd_mrirstv02 <- read_delim("./ABCD_Data/abcd_mrirstv02.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA"))[-1,]

In [None]:
# Create a mrirstv subset
mrirstv <- abcd_mrirstv02 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("rsfmri_var_"))) %>%
    select(-rsfmri_var_visitid) %>%
    mutate_at(., vars(starts_with("rsfmri_var_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(starts_with("rsfmri_var_")), mean, na.rm=T)

dim(mrirstv)
head(mrirstv)

In [None]:
# Save mrirstv, remove intermediate files
saveRDS(mrirstv, file="./CSI/Preprocessed/mrirstv.rds")
rm(abcd_mrirstv02)

### Manual fMRI Post-Processing QC: _fmriqc_

In [None]:
# Import partial fmriqc data
abcd_fmriqc01 <- read_delim("./ABCD_Data/abcd_fmriqc01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA"))[-1,]

In [None]:
# Create a fmriqc subset
fmriqc <- abcd_fmriqc01 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("fmri_postqc_"))) %>%
    mutate_at(., vars(starts_with("fmri_postqc_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(starts_with("fmri_postqc_")), mean, na.rm=T)

dim(fmriqc)
head(fmriqc)

In [None]:
# Save fmriqc, remove intermediate files
saveRDS(fmriqc, file="./CSI/Preprocessed/rsfMRI_fmriqc.rds")
rm(abcd_fmriqc01)

### Remove High Missingness

In [None]:
# Count Infinities, NAs, and NaNs per variable
NAs_rsfMRI_mriqcrp <- apply(rsfMRI_mriqcrp, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_rsfMRI_mriqcrp <- apply(rsfMRI_mriqcrp, 2, function(x) {is.infinite(x)/length(x)})

NAs_rsfMRI_postqc <- apply(rsfMRI_postqc, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_rsfMRI_postqc <- apply(rsfMRI_postqc, 2, function(x) {is.infinite(x)/length(x)})

NAs_mrirsfd <- apply(mrirsfd, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_mrirsfd <- apply(mrirsfd, 2, function(x) {is.infinite(x)/length(x)})

NAs_betnet <- apply(betnet, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_betnet <- apply(betnet, 2, function(x) {is.infinite(x)/length(x)})

NAs_mrirscor <- apply(mrirscor, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_mrirscor <- apply(mrirscor, 2, function(x) {is.infinite(x)/length(x)})

NAs_mrirstv<- apply(mrirstv, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_mrirstv <- apply(mrirstv, 2, function(x) {is.infinite(x)/length(x)})

NAs_fmriqc <- apply(fmriqc, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_fmriqc <- apply(fmriqc, 2, function(x) {is.infinite(x)/length(x)})

In [None]:
# Max missingness
cat("NAs_rsfMRI_mriqcrp: ", round(max(NAs_rsfMRI_mriqcrp), 3),
   "\nINFs_rsfMRI_mriqcrp: ", round(max(INFs_rsfMRI_mriqcrp), 3),
    
   "\n\nNAs_rsfMRI_postqc: ", round(max(NAs_rsfMRI_postqc), 3),
   "\nINFs_rsfMRI_postqc: ", round(max(INFs_rsfMRI_postqc), 3),
    
   "\n\nNAs_mrirsfd: ", round(max(NAs_mrirsfd), 3),
   "\nINFs_mrirsfd: ", round(max(INFs_mrirsfd), 3),
    
   "\n\nNAs_betnet: ", round(max(NAs_betnet), 3),
   "\nINFs_betnet: ", round(max(INFs_betnet), 3),
   
   "\n\nNAs_mrirscor: ", round(max(NAs_mrirscor), 3),
   "\nINFs_mrirscor: ", round(max(INFs_mrirscor), 3),
    
   "\n\nNAs_mrirstv: ", round(max(NAs_mrirstv), 3),
   "\nINFs_mrirstv: ", round(max(INFs_mrirstv), 3),
   
   "\n\nNAs_fmriqc: ", round(max(NAs_fmriqc), 3),
   "\nINFs_fmriqc: ", round(max(INFs_fmriqc), 3))

In [None]:
# Remove columns with high rates of NAs

rsfMRI_mriqcrp <- rsfMRI_mriqcrp[!colSums(is.na(rsfMRI_mriqcrp)) > 0.1]
rsfMRI_postqc <- rsfMRI_postqc[!colSums(is.na(rsfMRI_postqc)) > 0.1]
fmriqc <- fmriqc[!colSums(is.na(fmriqc)) > 0.1]

NAs_rsfMRI_mriqcrp <- apply(rsfMRI_mriqcrp, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
NAs_rsfMRI_postqc <- apply(rsfMRI_postqc, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
NAs_fmriqc <- apply(fmriqc, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})

cat("NAs_rsfMRI_mriqcrp: ", round(max(NAs_rsfMRI_mriqcrp), 3),    
   "\nNAs_rsfMRI_postqc: ", round(max(NAs_rsfMRI_postqc), 3),    
   "\n: ", round(max(NAs_fmriqc), 3))

### Combine and Process Datasets

In [None]:
# Calculate number of predictors for all datasets

cat("rsfMRI_mriqcrp:", dim(rsfMRI_mriqcrp)[2]-1,
    "\nrsfMRI_postqc:", dim(rsfMRI_postqc)[2]-1,
    "\nmrirsfd:", dim(mrirsfd)[2]-1,
    "\nbetnet:", dim(betnet)[2]-1,
    "\nmrirscor:", dim(mrirscor)[2]-1,
    "\nmrirstv:", dim(mrirstv)[2]-1,
    "\nfmriqc:", dim(fmriqc)[2]-1)

In [None]:
# Combine non-qc and qc frames
sk <- purrr::reduce(list(rsfMRI_mriqcrp$subjectkey, rsfMRI_postqc$subjectkey, mrirsfd$subjectkey, betnet$subjectkey, mrirscor$subjectkey, mrirstv$subjectkey, fmriqc$subjectkey), union)

rsfMRI <- data.frame(subjectkey = sk) %>%
    full_join(rsfMRI_mriqcrp, by=c("subjectkey"="subjectkey")) %>%
    full_join(rsfMRI_postqc, by=c("subjectkey"="subjectkey")) %>%
    full_join(mrirsfd, by=c("subjectkey"="subjectkey")) %>%
    full_join(betnet, by=c("subjectkey"="subjectkey")) %>%
    full_join(mrirscor, by=c("subjectkey"="subjectkey")) %>%
    full_join(mrirstv, by=c("subjectkey"="subjectkey")) %>%
    full_join(fmriqc, by=c("subjectkey"="subjectkey"))

dim(rsfMRI)
sum(is.na(rsfMRI))

In [None]:
# Mean impute rsfMRI
for(i in 2:ncol(rsfMRI)){
  rsfMRI[is.na(rsfMRI[,i]), i] <- mean(rsfMRI[,i], na.rm = TRUE)
}

sum(is.na(rsfMRI))

In [None]:
# Remove variables with range of 0
Range <- apply(rsfMRI[,-1], 2, function(x) {range(x)[2]-range(x)[1]})
rsfMRI <- rsfMRI %>% select(-names(Range[Range==0]))

dim(rsfMRI)
head(rsfMRI)

In [None]:
# Range-normalize data
rsfMRI[,-1] <- apply(rsfMRI[,-1], 2, function(x) {(x - min(x))/(max(x) - min(x))})
sum(is.na(rsfMRI))

dim(rsfMRI)
head(rsfMRI)

In [None]:
# Remove variables with low variances
Vars <- apply(rsfMRI[,-1], 2, function(x) {var(x)})
rsfMRI <- rsfMRI %>% select(-names(Vars[Vars<0.001]))

dim(rsfMRI)
head(rsfMRI)

In [None]:
# Save data
saveRDS(rsfMRI, file="./CSI/Preprocessed/kernel_rsfMRI.rds")