# dMRI Kernel Preprocessing

In [None]:
# Set working directory
setwd("./WorkDir")

# Create holding directory for preprocessed datasets
if (!dir.exists("./CSI/Preprocessed")) {dir.create("./CSI/Preprocessed")}

# Load necessary libraries
suppressPackageStartupMessages(library(tidyverse))

### ABCD dMRI RSI: _drsip_

In [None]:
# Import drsip data

abcd_drsip101 <- read_delim("./ABCD_Data/abcd_drsip101.txt", 
                            delim = "\t", 
                            escape_double = FALSE, 
                            col_types = "c", 
                            trim_ws = TRUE, 
                            na = c("", "NA"))[-1,]

abcd_drsip201 <- read_delim("./ABCD_Data/abcd_drsip201.txt", 
                            delim = "\t", 
                            escape_double = FALSE, 
                            col_types = "c", 
                            trim_ws = TRUE, 
                            na = c("", "NA"))[-1,]

abcd_drsip301 <- read_delim("./ABCD_Data/abcd_drsip301.txt", 
                            delim = "\t", 
                            escape_double = FALSE, 
                            col_types = "c", 
                            trim_ws = TRUE, 
                            na = c("", "NA"))[-1,]

abcd_drsip401 <- read_delim("./ABCD_Data/abcd_drsip401.txt", 
                            delim = "\t", 
                            escape_double = FALSE, 
                            col_types = "c", 
                            trim_ws = TRUE, 
                            na = c("", "NA"))[-1,]

abcd_drsip501 <- read_delim("./ABCD_Data/abcd_drsip501.txt", 
                            delim = "\t", 
                            escape_double = FALSE, 
                            col_types = "c", 
                            trim_ws = TRUE, 
                            na = c("", "NA"))[-1,]

abcd_drsip601 <- read_delim("./ABCD_Data/abcd_drsip601.txt", 
                            delim = "\t", 
                            escape_double = FALSE, 
                            col_types = "c", 
                            trim_ws = TRUE, 
                            na = c("", "NA"))[-1,]

abcd_drsip701 <- read_delim("./ABCD_Data/abcd_drsip701.txt", 
                            delim = "\t", 
                            escape_double = FALSE, 
                            col_types = "c", 
                            trim_ws = TRUE, 
                            na = c("", "NA"))[-1,]

In [None]:
# Preprocess abcd_drsip101
drsip101 <- abcd_drsip101 %>% 
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmri_"))) %>%
    select(-dmri_rsi_visitid) %>%
    mutate_at(., vars(contains("dmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmri_")), mean, na.rm=T)

dim(drsip101)
head(drsip101)

In [None]:
# Preprocess abcd_drsip201
drsip201 <- abcd_drsip201 %>% 
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmri_"))) %>%
    mutate_at(., vars(contains("dmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmri_")), mean, na.rm=T)

dim(drsip201)
head(drsip201)

In [None]:
# Preprocess abcd_drsip301
drsip301 <- abcd_drsip301 %>% 
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmri_"))) %>%
    mutate_at(., vars(contains("dmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmri_")), mean, na.rm=T)

dim(drsip301)
head(drsip301)

In [None]:
# Preprocess abcd_drsip401
drsip401 <- abcd_drsip401 %>% 
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmri_"))) %>%
    mutate_at(., vars(contains("dmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmri_")), mean, na.rm=T)

dim(drsip401)
head(drsip401)

In [None]:
# Preprocess abcd_drsip501
drsip501 <- abcd_drsip501 %>% 
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmri_"))) %>%
    mutate_at(., vars(contains("dmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmri_")), mean, na.rm=T)

dim(drsip501)
head(drsip501)

In [None]:
# Preprocess abcd_drsip601
drsip601 <- abcd_drsip601 %>% 
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmri_"))) %>%
    mutate_at(., vars(contains("dmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmri_")), mean, na.rm=T)

dim(drsip601)
head(drsip601)

In [None]:
# Preprocess abcd_drsip701
drsip701 <- abcd_drsip701 %>% 
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmri_"))) %>%
    mutate_at(., vars(contains("dmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmri_")), mean, na.rm=T)

dim(drsip701)
head(drsip701)

In [None]:
# Remove all empty data sets
rm(drsip401, abcd_drsip401, drsip501, abcd_drsip501, drsip601, abcd_drsip601)

In [None]:
# Combine all drsip frames into a single frame
drsip <- drsip101 %>%
    full_join(drsip201, by=c("subjectkey"="subjectkey")) %>%
    full_join(drsip301, by=c("subjectkey"="subjectkey")) %>%
    full_join(drsip701, by=c("subjectkey"="subjectkey"))

dim(drsip)
head(drsip)

In [None]:
# Save drsip, remove intermediate files
saveRDS(drsip, file="./CSI/Preprocessed/drsip.rds")
rm(abcd_drsip101, abcd_drsip201, abcd_drsip301, abcd_drsip701, drsip101, drsip201, drsip301, drsip701)

### ABCD dMRI DTI Destrieux Parcellations: _ddtidp_

In [None]:
# Import ddtidp data
abcd_ddtidp101 <- read_delim("./ABCD_Data/abcd_ddtidp101.txt", 
                             delim = "\t", 
                             escape_double = FALSE, 
                             col_types = "c", 
                             trim_ws = TRUE, 
                             na = c("", "NA"))[-1,]

abcd_ddtidp201 <- read_delim("./ABCD_Data/abcd_ddtidp201.txt", 
                             delim = "\t", 
                             escape_double = FALSE, 
                             col_types = "c", 
                             trim_ws = TRUE, 
                             na = c("", "NA"))[-1,]

In [None]:
# Preprocess abcd_ddtidp101
ddtidp101 <- abcd_ddtidp101 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("ddtidp_"))) %>%
    mutate_at(., vars(contains("ddtidp_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("ddtidp_")), mean, na.rm=T)

dim(ddtidp101)
head(ddtidp101)

In [None]:
# Preprocess abcd_ddtidp201
ddtidp201 <- abcd_ddtidp201 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("ddtidp_"))) %>%
    mutate_at(., vars(contains("ddtidp_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("ddtidp_")), mean, na.rm=T)

dim(ddtidp201)
head(ddtidp201)

In [None]:
# Combine all ddtidp frames into a single frame
ddtidp <- ddtidp101 %>%
    full_join(ddtidp201, by=c("subjectkey"="subjectkey"))

dim(ddtidp)
head(ddtidp)

In [None]:
# Save ddtidp, remove intermediate files
saveRDS(ddtidp, file="./CSI/Preprocessed/ddtidp.rds")
rm(ddtidp101, ddtidp201, abcd_ddtidp101, abcd_ddtidp201)

### ABCD dMRI DTI Full Destrieux Parcellation: _ddtifp_

In [None]:
# Import ddtifp data
abcd_ddtifp101 <- read_delim("./ABCD_Data/abcd_ddtifp101.txt", 
                             delim = "\t", 
                             escape_double = FALSE, 
                             col_types = "c", 
                             trim_ws = TRUE, 
                             na = c("", "NA"))[-1,]

abcd_ddtifp201 <- read_delim("./ABCD_Data/abcd_ddtifp201.txt", 
                             delim = "\t", 
                             escape_double = FALSE, 
                             col_types = "c", 
                             trim_ws = TRUE, 
                             na = c("", "NA"))[-1,]

In [None]:
# Preprocess abcd_ddtifp101
ddtifp101 <- abcd_ddtifp101 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("ddtifp_"))) %>%
    mutate_at(., vars(contains("ddtifp_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("ddtifp_")), mean, na.rm=T)

dim(ddtifp101)
head(ddtifp101)

In [None]:
# Preprocess abcd_ddtifp201
ddtifp201 <- abcd_ddtifp201 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("ddtifp_"))) %>%
    mutate_at(., vars(contains("ddtifp_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("ddtifp_")), mean, na.rm=T)

dim(ddtifp201)
head(ddtifp201)

In [None]:
# Combine all ddtifp frames into a single frame
ddtifp <- ddtifp101 %>%
    full_join(ddtifp201, by=c("subjectkey"="subjectkey"))

dim(ddtifp)
head(ddtifp)

In [None]:
# Save ddtifp, remove intermediate files
saveRDS(ddtifp, file="./CSI/Preprocessed/ddtifp.rds")
rm(abcd_ddtifp101, abcd_ddtifp201, ddtifp101, ddtifp201)

### ABCD dMRI DTI Full: _dmdtifp_

In [None]:
# Import dmdtifp data
abcd_dmdtifp101 <- read_delim("./ABCD_Data/abcd_dmdtifp101.txt", 
                              delim = "\t", 
                              escape_double = FALSE, 
                              col_types = "c", 
                              trim_ws = TRUE, 
                              na = c("", "NA"))[-1,]

abcd_dmdtifp201 <- read_delim("./ABCD_Data/abcd_dmdtifp201.txt", 
                              delim = "\t", 
                              escape_double = FALSE, 
                              col_types = "c", 
                              trim_ws = TRUE, 
                              na = c("", "NA"))[-1,]

In [None]:
# Preprocess abcd_dmdtifp101
dmdtifp101 <- abcd_dmdtifp101 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmdtifp1_"))) %>%
    mutate_at(., vars(contains("dmdtifp1_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmdtifp1_")), mean, na.rm=T)

dim(dmdtifp101)
head(dmdtifp101)

In [None]:
# Preprocess abcd_dmdtifp201
dmdtifp201 <- abcd_dmdtifp201 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmdtifp1_"))) %>%
    mutate_at(., vars(contains("dmdtifp1_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmdtifp1_")), mean, na.rm=T)

dim(dmdtifp201)
head(dmdtifp201)

In [None]:
# Combine all dmdtifp frames into a single frame
dmdtifp <- dmdtifp101 %>%
    full_join(dmdtifp201, by=c("subjectkey"="subjectkey"))

dim(dmdtifp)
head(dmdtifp)

In [None]:
# Save dmdtifp, remove intermediate files
saveRDS(dmdtifp, file="./CSI/Preprocessed/dmdtifp.rds")
rm(abcd_dmdtifp101, abcd_dmdtifp201, dmdtifp101, dmdtifp201)

### ABCD dMRI DTI Full: _dti_

In [None]:
# Import dti data
abcd_dti_p101 <- read_delim("./ABCD_Data/abcd_dti_p101.txt", 
                            delim = "\t", 
                            escape_double = FALSE, 
                            col_types = "c", 
                            trim_ws = TRUE, 
                            na = c("", "NA"))[-1,]

abcd_dti_p201 <- read_delim("./ABCD_Data/abcd_dti_p201.txt", 
                            delim = "\t", 
                            escape_double = FALSE, 
                            col_types = "c", 
                            trim_ws = TRUE, 
                            na = c("", "NA"))[-1,]

In [None]:
# Preprocess abcd_dti_p101
dti_p101 <- abcd_dti_p101 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmri_"))) %>%
    select(-dmri_dti_visitid) %>%
    mutate_at(., vars(contains("dmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmri_")), mean, na.rm=T)

dim(dti_p101)
head(dti_p101)

In [None]:
# Preprocess abcd_dti_p201
dti_p201 <- abcd_dti_p201 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1",        "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmri_"))) %>%
    mutate_at(., vars(contains("dmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmri_")), mean, na.rm=T)

dim(dti_p201)
head(dti_p201)

In [None]:
# Combine all dti frames into a single frame
dti <- dti_p101 %>%
    full_join(dti_p201, by=c("subjectkey"="subjectkey"))

dim(dti)
head(dti)

In [None]:
# Save dti, remove intermediate files
saveRDS(dti, file="./CSI/Preprocessed/dti.rds")
rm(dti_p101, dti_p201, abcd_dti_p101, abcd_dti_p201)

### ABCD dMRI DTI Full: _dmriqc_

In [None]:
# Import dmriqc data 
abcd_dmriqc01 <- read_delim("./ABCD_Data/abcd_dmriqc01.txt", 
                            delim = "\t", 
                            escape_double = FALSE, 
                            col_types = "c", 
                            trim_ws = TRUE, 
                            na = c("", "NA"))[-1,]

In [None]:
# Preprocess abcd_dmriqc01
dmriqc <- abcd_dmriqc01 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("dmri_dti_postqc_"))) %>%
    select(-c(dmri_dti_postqc_visitid)) %>%
    mutate_at(., vars(contains("dmri_dti_postqc_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("dmri_dti_postqc_")), mean, na.rm=T)

dim(dmriqc)
head(dmriqc)

In [None]:
# Save dmriqc, remove intermediate files
saveRDS(dmriqc, file="./CSI/Preprocessed/dmriqc.rds")
rm(abcd_dmriqc01)

### MRI QC Raw: _mriqcrp_

In [None]:
# Import partial mriqcrp data
mriqcrp10301 <- read_delim("./ABCD_Data/mriqcrp10301.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA"))[-1,]

mriqcrp20301 <- read_delim("./ABCD_Data/mriqcrp20301.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA"))[-1,]

mriqcrp302 <- read_delim("./ABCD_Data/mriqcrp302.txt", 
                         delim = "\t", 
                         escape_double = FALSE, 
                         col_types = "c", 
                         trim_ws = TRUE, 
                         na = c("", "NA"))[-1,]

In [None]:
# Load partial mriqcrp data and join

mriqcrp10301 <- mriqcrp10301 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("iqc")))

mriqcrp20301 <- mriqcrp20301 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("iqc")))

mriqcrp302 <- mriqcrp302 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("iqc")))

mriqcrp <- mriqcrp10301 %>%
    full_join(mriqcrp20301, by=c("subjectkey"="subjectkey")) %>%
    full_join(mriqcrp302, by=c("subjectkey"="subjectkey"))

In [None]:
# Create a dMRI_mriqcrp subset
dMRI_mriqcrp <- mriqcrp %>%
    select(c(subjectkey, starts_with("iqc_dmri"))) %>%
    select(-c(ends_with("sub_02"), ends_with("seriestime"), ends_with("studydate"), ends_with("seuid"), ends_with("suid"))) %>%
    select(-matches("iqc_dmri_\\d_.{0,}"))%>%
    mutate_at(., vars(contains("iqc_dmri")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(contains("iqc_dmri")), mean, na.rm=T)

dim(dMRI_mriqcrp)
head(dMRI_mriqcrp)

In [None]:
# Save dMRI_mriqcrp, remove intermediate files
saveRDS(dMRI_mriqcrp, file="./CSI/Preprocessed/dMRI_mriqcrp.rds")
rm(mriqcrp, mriqcrp10301, mriqcrp20301, mriqcrp302)

### Automated Post-Processing QC Metrics: _postqc_

In [None]:
# Import partial postqc data
postqc01 <- read_delim("./ABCD_Data/abcd_auto_postqc01.txt", 
                           delim = "\t", 
                           escape_double = FALSE, 
                           col_types = "c", 
                           trim_ws = TRUE, 
                           na = c("", "NA"))[-1,]

In [None]:
# Create a dMRI_postqc subset
dMRI_postqc <- postqc01 %>%
    filter(eventname %in% c("baseline_year_1_arm_1", "1_year_follow_up_y_arm_1", "2_year_follow_up_y_arm_1")) %>%
    select(c(subjectkey, starts_with("apqc_dmri_"))) %>%
    mutate_at(., vars(starts_with("apqc_dmri_")), as.numeric) %>%
    group_by(subjectkey) %>%
    summarise_at(., vars(starts_with("apqc_dmri_")), mean, na.rm=T)

dim(dMRI_postqc)
head(dMRI_postqc)

In [None]:
# Save dMRI_postqc, remove intermediate files
saveRDS(dMRI_postqc, file="./CSI/Preprocessed/dMRI_postqc.rds")
rm(postqc01)

### Remove high missingness

In [None]:
# Count Infinities, NAs, and NaNs per variable
NAs_drsip <- apply(drsip, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_drsip <- apply(drsip, 2, function(x) {is.infinite(x)/length(x)})

NAs_ddtidp <- apply(ddtidp, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_ddtidp <- apply(ddtidp, 2, function(x) {is.infinite(x)/length(x)})

NAs_ddtifp <- apply(ddtifp, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_ddtifp <- apply(ddtifp, 2, function(x) {is.infinite(x)/length(x)})

NAs_dmdtifp <- apply(dmdtifp, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_dmdtifp <- apply(dmdtifp, 2, function(x) {is.infinite(x)/length(x)})

NAs_dti <- apply(dti, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_dti <- apply(dti, 2, function(x) {is.infinite(x)/length(x)})

NAs_dmriqc <- apply(dmriqc, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_dmriqc <- apply(dmriqc, 2, function(x) {is.infinite(x)/length(x)})

NAs_dMRI_mriqcrp <- apply(dMRI_mriqcrp, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_dMRI_mriqcrp <- apply(dMRI_mriqcrp, 2, function(x) {is.infinite(x)/length(x)})

NAs_dMRI_postqc <- apply(dMRI_postqc, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
INFs_dMRI_postqc <- apply(dMRI_postqc, 2, function(x) {is.infinite(x)/length(x)})

In [None]:
# Max missingness
cat("NAs_drsip: ", round(max(NAs_drsip), 3),
   "\nINFs_drsip: ", round(max(INFs_drsip), 3),
    
   "\n\nNAs_ddtidp: ", round(max(NAs_ddtidp), 3),
   "\nINFs_ddtidp: ", round(max(INFs_ddtidp), 3),
    
   "\n\nNAs_ddtifp: ", round(max(NAs_ddtifp), 3),
   "\nINFs_ddtifp: ", round(max(INFs_ddtifp), 3),
    
   "\n\nNAs_dmdtifp: ", round(max(NAs_dmdtifp), 3),
   "\nINFs_dmdtifp: ", round(max(INFs_dmdtifp), 3),
   
   "\n\nNAs_dti: ", round(max(NAs_dti), 3),
   "\nINFs_dti: ", round(max(INFs_dti), 3),
    
   "\n\nNAs_dmriqc: ", round(max(NAs_dmriqc), 3),
   "\nINFs_dmriqc: ", round(max(INFs_dmriqc), 3),
    
   "\n\nNAs_dMRI_mriqcrp: ", round(max(NAs_dMRI_mriqcrp), 3),
   "\nINFs_dMRI_mriqcrp: ", round(max(INFs_dMRI_mriqcrp), 3),
    
   "\n\nNAs_dMRI_postqc: ", round(max(NAs_dMRI_postqc), 3),
   "\nINFs_dMRI_postqc: ", round(max(INFs_dMRI_postqc), 3))

In [None]:
# Remove columns with high rates of NAs

dmriqc <- dmriqc[!colSums(is.na(dmriqc)) > 0.1]
dMRI_mriqcrp <- dMRI_mriqcrp[!colSums(is.na(dMRI_mriqcrp)) > 0.1]

NAs_dmriqc <- apply(dmriqc, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})
NAs_dMRI_mriqcrp <- apply(dMRI_mriqcrp, 2, function(x) {sum(is.na(x)|is.nan(x))/length(x)})

cat("NAs_dmriqc: ", round(max(NAs_dmriqc), 3),    
   "\nNAs_dMRI_mriqcrp: ", round(max(NAs_dMRI_mriqcrp), 3))

### Combine and process datasets

In [None]:
# Calculate number of predictors for all datasets

cat("drsip:", dim(drsip)[2]-1,
    "\nddtidp:", dim(ddtidp)[2]-1,
    "\nddtifp:", dim(ddtifp)[2]-1,
    "\ndmdtifp:", dim(dmdtifp)[2]-1,
    "\ndti:", dim(dti)[2]-1,
    "\ndmriqc:", dim(dmriqc)[2]-1,
    "\ndMRI_mriqcrp:", dim(dMRI_mriqcrp)[2]-1,
    "\ndMRI_postqc:", dim(dMRI_postqc)[2]-1)

In [None]:
# Combine non-qc and qc frames
sk <- purrr::reduce(list(drsip$subjectkey, ddtidp$subjectkey, ddtifp$subjectkey, dmdtifp$subjectkey, dti$subjectkey, dmriqc$subjectkey, dMRI_mriqcrp$subjectkey), union)

dMRI <- data.frame(subjectkey = sk) %>%
    full_join(drsip, by=c("subjectkey"="subjectkey")) %>%
    full_join(ddtidp, by=c("subjectkey"="subjectkey")) %>%
    full_join(ddtifp, by=c("subjectkey"="subjectkey")) %>%
    full_join(dmdtifp, by=c("subjectkey"="subjectkey")) %>%
    full_join(dti, by=c("subjectkey"="subjectkey")) %>%
    full_join(dmriqc, by=c("subjectkey"="subjectkey")) %>%
    full_join(dMRI_mriqcrp, by=c("subjectkey"="subjectkey")) %>%
    full_join(dMRI_postqc, by=c("subjectkey"="subjectkey")) 

sum(is.na(dMRI))

In [None]:
# Mean impute dMRI
for(i in 2:ncol(dMRI)){
  dMRI[is.na(dMRI[,i]), i] <- mean(dMRI[,i], na.rm = TRUE)
}

sum(is.na(dMRI))

In [None]:
# Remove variables with range of 0
Range <- apply(dMRI[,-1], 2, function(x) {range(x)[2]-range(x)[1]})
dMRI <- dMRI %>% select(-names(Range[Range==0]))

dim(dMRI)
head(dMRI)

In [None]:
# Range-normalize data
dMRI[,-1] <- apply(dMRI[,-1], 2, function(x) {(x - min(x))/(max(x) - min(x))})
sum(is.na(dMRI))

dim(dMRI)
head(dMRI)

In [None]:
# Remove variables with low variances
Vars <- apply(dMRI[,-1], 2, function(x) {var(x)})
dMRI <- dMRI %>% select(-names(Vars[Vars<0.001]))

dim(dMRI)
head(dMRI)

In [None]:
# Save data
saveRDS(dMRI, file="./CSI/Preprocessed/kernel_dMRI.rds")