### Association of prevalent CH PRS (Kessler et al 2022) and incident CH in the ARIC study

In [None]:
library(data.table)
library(dplyr) # ntile
setwd("/medpop/esp2/mesbah/projects/ch_progression/aric/gwas/PRS")


In [None]:
  # Load data
load("PRS.Prev_CH.n_pheno.2023Jul14.rda")
ls()

In [None]:
hist(ch_aa$SCORE1_AVG, breaks=50)
hist(ch_ea$SCORE1_AVG, breaks=50)

In [None]:
plot(density(ch_aa$SCORE1_AVG))
plot(density(ch_ea$SCORE1_AVG))

In [None]:
############
## PRS
############

############# CH
  ### CH EA
ch_ea <- fread("prs.ea_aric_rg22ch_beta.21SNV.sscore", 
                   header = T)
ch_ea <- merge(ch_ea[,c(2:5)], 
               aric_baseline_n_v05_ea[,c(2,3,47:56)], 
               by="IID")
ch_ea <- merge(ch_ea, 
               aric_baseline_n_v05, 
               by="GWAS_ID")
  ### CH AA
ch_aa <- fread("prs.aa_aric_rg22ch_beta.21SNV.sscore", 
               header = T)
ch_aa <- merge(ch_aa[,c(2:5)], 
               aric_baseline_n_v05_aa[,c(2,3,47:56)], 
               by="IID")
ch_aa <- merge(ch_aa, 
               aric_baseline_n_v05, 
               by="GWAS_ID")
################

####### Density plot of raw prs
library(ggplot2)
library(cowplot)
ch_aa <- ch_aa[, c(1:5)]
ch_aa$Ancestry <- "AA (n=637)"
ch_ea <- ch_ea[, c(1:5)]
ch_ea$Ancestry <- "EA (n=2376)"

ch_aa_ea <- as.data.frame(rbind(ch_aa, ch_ea))

str(ch_aa_ea)

# Use semi-transparent fill
pdf("Fig4a.DistributionofprsCH.pdf", 
    width = 7, height= 7)

ggplot(ch_aa_ea, aes(x=SCORE1_AVG, fill=Ancestry)) +
  geom_density(alpha=0.4) + xlab("Prevalent CH PRS")

dev.off()
######


In [None]:
### 
  ### source:  https://www.biostars.org/p/80597/ and the supplement of Yang et al. Nature 2012.
INT_yang2012 <- function(x){
  y<-qnorm((rank(x,na.last='keep')-0.5)/sum(!is.na(x)))
  return(y)
}

In [None]:
### CH
    # AA
ch_aa$hdl_base_INT <- INT_yang2012(ch_aa$hdl_base)

ch_aa$nonHDL_base <- (ch_aa$chol_base - ch_aa$hdl_base)
ch_aa$nonHDL_base_INT <- INT_yang2012(ch_aa$nonHDL_base)

ch_aa$bmi_base_INT <- INT_yang2012(ch_aa$bmi_base)

ch_aa$tg_to_hdl_base <- (ch_aa$tg_base/ch_aa$hdl_base)
ch_aa$tg_to_hdl_base_INT <- INT_yang2012(ch_aa$tg_to_hdl_base)

ch_aa$decile <- ntile(ch_aa$SCORE1_AVG, 10)
ch_aa$prs_top10 <- ifelse(ch_aa$decile==10, 1, 0)

    # EA
ch_ea$hdl_base_INT <- INT_yang2012(ch_ea$hdl_base)

ch_ea$nonHDL_base <- (ch_ea$chol_base - ch_ea$hdl_base)
ch_ea$nonHDL_base_INT <- INT_yang2012(ch_ea$nonHDL_base)


ch_ea$bmi_base_INT <- INT_yang2012(ch_ea$bmi_base)

ch_ea$tg_to_hdl_base <- (ch_ea$tg_base/ch_ea$hdl_base)
ch_ea$tg_to_hdl_base_INT <- INT_yang2012(ch_ea$tg_to_hdl_base)


    # Top 10% PRS

ch_ea$decile <- ntile(ch_ea$SCORE1_AVG, 10)
ch_ea$prs_top10 <- ifelse(ch_ea$decile==10, 1, 0)


In [None]:
### DNMT3A

    # AA
dnmt3a_aa$hdl_base_INT <- INT_yang2012(dnmt3a_aa$hdl_base)

dnmt3a_aa$nonHDL_base <- (dnmt3a_aa$chol_base - dnmt3a_aa$hdl_base)
dnmt3a_aa$nonHDL_base_INT <- INT_yang2012(dnmt3a_aa$nonHDL_base)

dnmt3a_aa$bmi_base_INT <- INT_yang2012(dnmt3a_aa$bmi_base)

dnmt3a_aa$tg_to_hdl_base <- (dnmt3a_aa$tg_base/dnmt3a_aa$hdl_base)
dnmt3a_aa$tg_to_hdl_base_INT <- INT_yang2012(dnmt3a_aa$tg_to_hdl_base)

dnmt3a_aa$decile <- ntile(dnmt3a_aa$SCORE1_AVG, 10)
dnmt3a_aa$prs_top10 <- ifelse(dnmt3a_aa$decile==10, 1, 0)

    # EA
dnmt3a_ea$hdl_base_INT <- INT_yang2012(dnmt3a_ea$hdl_base)

dnmt3a_ea$nonHDL_base <- (dnmt3a_ea$chol_base - dnmt3a_ea$hdl_base)
dnmt3a_ea$nonHDL_base_INT <- INT_yang2012(dnmt3a_ea$nonHDL_base)


dnmt3a_ea$bmi_base_INT <- INT_yang2012(dnmt3a_ea$bmi_base)

dnmt3a_ea$tg_to_hdl_base <- (dnmt3a_ea$tg_base/dnmt3a_ea$hdl_base)
dnmt3a_ea$tg_to_hdl_base_INT <- INT_yang2012(dnmt3a_ea$tg_to_hdl_base)


    # Top 10% PRS

dnmt3a_ea$decile <- ntile(dnmt3a_ea$SCORE1_AVG, 10)
dnmt3a_ea$prs_top10 <- ifelse(dnmt3a_ea$decile==10, 1, 0)


In [None]:
### TET2

    # AA
tet2_aa$hdl_base_INT <- INT_yang2012(tet2_aa$hdl_base)

tet2_aa$nonHDL_base <- (tet2_aa$chol_base - tet2_aa$hdl_base)
tet2_aa$nonHDL_base_INT <- INT_yang2012(tet2_aa$nonHDL_base)

tet2_aa$bmi_base_INT <- INT_yang2012(tet2_aa$bmi_base)

tet2_aa$tg_to_hdl_base <- (tet2_aa$tg_base/tet2_aa$hdl_base)
tet2_aa$tg_to_hdl_base_INT <- INT_yang2012(tet2_aa$tg_to_hdl_base)

tet2_aa$decile <- ntile(tet2_aa$SCORE1_AVG, 10)
tet2_aa$prs_top10 <- ifelse(tet2_aa$decile==10, 1, 0)

    # EA
tet2_ea$hdl_base_INT <- INT_yang2012(tet2_ea$hdl_base)

tet2_ea$nonHDL_base <- (tet2_ea$chol_base - tet2_ea$hdl_base)
tet2_ea$nonHDL_base_INT <- INT_yang2012(tet2_ea$nonHDL_base)


tet2_ea$bmi_base_INT <- INT_yang2012(tet2_ea$bmi_base)

tet2_ea$tg_to_hdl_base <- (tet2_ea$tg_base/tet2_ea$hdl_base)
tet2_ea$tg_to_hdl_base_INT <- INT_yang2012(tet2_ea$tg_to_hdl_base)


    # Top 10% PRS

tet2_ea$decile <- ntile(tet2_ea$SCORE1_AVG, 10)
tet2_ea$prs_top10 <- ifelse(tet2_ea$decile==10, 1, 0)


#### determinants of incident CH

In [None]:
### Per SD increase of PRS is ..... 
cat(gsub(pattern = ", ", replacement = ",", x = toString(
  c("Dataset","Outcome", "Exposure","Beta", "SE", "t-stat", "P"))),
  file = "ch_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", append = F, fill = T)

# Outcomes
ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1")

#### VAF>=2%
## EA PRS
test_exposures <- "EA_CH_PRS"

for (j in ch_phenotype){
 # for (k in 1:length(test_exposures)) {
      
    cat("outcome:",j," exposure: ",test_exposures[1], "\n")
      
    prs1 <- summary(ch_ea %>% 
                        filter(!is.na(get(j))) %>% 
                        glm(get(j) ~ 
                            scale(SCORE1_AVG) + age_base + 
                            Sex + ever_smoke + Center + v2_vs_other + 
                            PC1 + PC2 + PC3 + PC4 + PC5, 
                            data = ., family="binomial"))
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("EA", paste0(j), paste0(test_exposures[1]),
        prs1$coefficients[1+1,1:4]) ) ),
      file = "ch_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", 
      append = T, fill = T)
      
#  }
}

    ## AA PRS: no need to adjust for "Center"

test_exposures <- "AA_CH_PRS"

for (j in ch_phenotype){
 # for (k in 1:length(test_exposures)) {
      
    cat("outcome:",j," exposure: ",test_exposures[1], "\n")
      
    prs1 <- summary(ch_aa %>% 
                        filter(!is.na(get(j))) %>% 
                        glm(get(j) ~ 
                            scale(SCORE1_AVG) + age_base + 
                            Sex + ever_smoke + v2_vs_other + 
                            PC1 + PC2 + PC3 + PC4 + PC5, 
                            data = ., family="binomial"))
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("AA", paste0(j), paste0(test_exposures[1]),
        prs1$coefficients[1+1,1:4]) ) ),
      file = "ch_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", 
      append = T, fill = T)
      
#  }
}



In [None]:
### Per SD increase of PRS is ..... 
cat(gsub(pattern = ", ", replacement = ",", x = toString(
  c("Dataset","Outcome", "Exposure","Beta", "SE", "t-stat", "P"))),
  file = "dnmt3a_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", append = F, fill = T)

# Outcomes
ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1")

#### VAF>=2%
## EA PRS
test_exposures <- "EA_DNMT3A_PRS"

for (j in ch_phenotype){
 # for (k in 1:length(test_exposures)) {
      
    cat("outcome:",j," exposure: ",test_exposures[1], "\n")
      
    prs1 <- summary(dnmt3a_ea %>% 
                        filter(!is.na(get(j))) %>% 
                        glm(get(j) ~ 
                            scale(SCORE1_AVG) + age_base + 
                            Sex + ever_smoke + Center + v2_vs_other + 
                            PC1 + PC2 + PC3 + PC4 + PC5, 
                            data = ., family="binomial"))
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("EA", paste0(j), paste0(test_exposures[1]),
        prs1$coefficients[1+1,1:4]) ) ),
      file = "dnmt3a_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", 
      append = T, fill = T)
      
#  }
}

    ## AA PRS: no need to adjust for "Center"

test_exposures <- "AA_DNMT3A_PRS"

for (j in ch_phenotype){
 # for (k in 1:length(test_exposures)) {
      
    cat("outcome:",j," exposure: ",test_exposures[1], "\n")
      
    prs1 <- summary(dnmt3a_aa %>% 
                        filter(!is.na(get(j))) %>% 
                        glm(get(j) ~ 
                            scale(SCORE1_AVG) + age_base + 
                            Sex + ever_smoke + v2_vs_other + 
                            PC1 + PC2 + PC3 + PC4 + PC5, 
                            data = ., family="binomial"))
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("AA", paste0(j), paste0(test_exposures[1]),
        prs1$coefficients[1+1,1:4]) ) ),
      file = "dnmt3a_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", 
      append = T, fill = T)
      
#  }
}


#### VAF>=10%
## EA PRS
test_exposures <- "EA_DNMT3A_PRS"

for (j in ch_phenotype){
 # for (k in 1:length(test_exposures)) {
      
    cat("outcome:",j," exposure: ",test_exposures[1], "\n")
      
    prs1 <- summary(dnmt3a_ea %>% 
                        filter(!is.na(incident_CHvaf10)) %>% 
                        glm(get(j) ~ 
                            scale(SCORE1_AVG) + age_base + 
                            Sex + ever_smoke + Center + v2_vs_other + 
                            PC1 + PC2 + PC3 + PC4 + PC5, 
                            data = ., family="binomial"))
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("EA_vaf10", paste0(j), paste0(test_exposures[1]),
        prs1$coefficients[1+1,1:4]) ) ),
      file = "dnmt3a_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", 
      append = T, fill = T)
      
#  }
}

    ## AA PRS: no need to adjust for "Center"

test_exposures <- "AA_DNMT3A_PRS"

for (j in ch_phenotype){
 # for (k in 1:length(test_exposures)) {
      
    cat("outcome:",j," exposure: ",test_exposures[1], "\n")
      
    prs1 <- summary(dnmt3a_aa %>% 
                        filter(!is.na(incident_CHvaf10)) %>% 
                        glm(get(j) ~ 
                            scale(SCORE1_AVG) + age_base + 
                            Sex + ever_smoke + v2_vs_other + 
                            PC1 + PC2 + PC3 + PC4 + PC5, 
                            data = ., family="binomial"))
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("AA_vaf10", paste0(j), paste0(test_exposures[1]),
        prs1$coefficients[1+1,1:4]) ) ),
      file = "dnmt3a_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", 
      append = T, fill = T)
      
#  }
}



In [None]:
### Per SD increase of PRS is ..... 
cat(gsub(pattern = ", ", replacement = ",", x = toString(
  c("Dataset","Outcome", "Exposure","Beta", "SE", "t-stat", "P"))),
  file = "tet2_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", append = F, fill = T)

# Outcomes
ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1")

#### VAF>=2%
## EA PRS
test_exposures <- "EA_TET2_PRS"

for (j in ch_phenotype){
 # for (k in 1:length(test_exposures)) {
      
    cat("outcome:",j," exposure: ",test_exposures[1], "\n")
      
    prs1 <- summary(tet2_ea %>% 
                        filter(!is.na(get(j))) %>% 
                        glm(get(j) ~ 
                            scale(SCORE1_AVG) + age_base + 
                            Sex + ever_smoke + Center + v2_vs_other + 
                            PC1 + PC2 + PC3 + PC4 + PC5, 
                            data = ., family="binomial"))
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("EA", paste0(j), paste0(test_exposures[1]),
        prs1$coefficients[1+1,1:4]) ) ),
      file = "tet2_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", 
      append = T, fill = T)
      
#  }
}

    ## AA PRS: no need to adjust for "Center"

test_exposures <- "AA_TET2_PRS"

for (j in ch_phenotype){
 # for (k in 1:length(test_exposures)) {
      
    cat("outcome:",j," exposure: ",test_exposures[1], "\n")
      
    prs1 <- summary(tet2_aa %>% 
                        filter(!is.na(get(j))) %>% 
                        glm(get(j) ~ 
                            scale(SCORE1_AVG) + age_base + 
                            Sex + ever_smoke + v2_vs_other + 
                            PC1 + PC2 + PC3 + PC4 + PC5, 
                            data = ., family="binomial"))
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("AA", paste0(j), paste0(test_exposures[1]),
        prs1$coefficients[1+1,1:4]) ) ),
      file = "tet2_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", 
      append = T, fill = T)
      
#  }
}


#### VAF>=10%
## EA PRS
test_exposures <- "EA_TET2_PRS"

for (j in ch_phenotype){
 # for (k in 1:length(test_exposures)) {
      
    cat("outcome:",j," exposure: ",test_exposures[1], "\n")
      
    prs1 <- summary(tet2_ea %>% 
                        filter(!is.na(incident_CHvaf10)) %>% 
                        glm(get(j) ~ 
                            scale(SCORE1_AVG) + age_base + 
                            Sex + ever_smoke + Center + v2_vs_other + 
                            PC1 + PC2 + PC3 + PC4 + PC5, 
                            data = ., family="binomial"))
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("EA_vaf10", paste0(j), paste0(test_exposures[1]),
        prs1$coefficients[1+1,1:4]) ) ),
      file = "tet2_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", 
      append = T, fill = T)
      
#  }
}

    ## AA PRS: no need to adjust for "Center"

test_exposures <- "AA_TET2_PRS"

for (j in ch_phenotype){
 # for (k in 1:length(test_exposures)) {
      
    cat("outcome:",j," exposure: ",test_exposures[1], "\n")
      
    prs1 <- summary(tet2_aa %>% 
                        filter(!is.na(incident_CHvaf10)) %>% 
                        glm(get(j) ~ 
                            scale(SCORE1_AVG) + age_base + 
                            Sex + ever_smoke + v2_vs_other + 
                            PC1 + PC2 + PC3 + PC4 + PC5, 
                            data = ., family="binomial"))
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("AA_vaf10", paste0(j), paste0(test_exposures[1]),
        prs1$coefficients[1+1,1:4]) ) ),
      file = "tet2_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", 
      append = T, fill = T)
      
#  }
}



In [None]:
for (j in ch_phenotype){
 # for (k in 1:length(test_exposures)) {
      
    cat("outcome:",j," exposure: ",test_exposures[1], "\n")
      
    prs1 <- summary(ch_aa %>% 
                        filter(!is.na(get(j))) %>% 
                        glm(get(j) ~ 
                            prs_top10 + age_base + 
                            Sex + ever_smoke + v2_vs_other + 
                            PC1 + PC2 + PC3 + PC4 + PC5, 
                            data = ., family="binomial"))
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("AA", paste0(j), paste0(test_exposures[1]),
        prs1$coefficients[1+1,1:4]) ) ),
      file = "ch_prs_std.final_glm.multivariable.incident_ch.2023Jul15.csv", 
      append = T, fill = T)
      
#  }
}