# Forest Plots

In [None]:
##############################
#### Forest plot
##############################
library(data.table) # version 1.14.6
library(meta) # version 6.2-1
library(grid) # version 4.2.2
library(scales) # version 1.2.1

In [None]:
# set working directory
setwd("/medpop/esp2/mesbah/projects/ch_progression/aric/epi/")


In [None]:
    # load cohph summary
multivariable_glm_dat <- fread("final_glm.multivariable.incident_ch.2023Jul07.csv", header = T)
table(multivariable_glm_dat$Dataset)
table(multivariable_glm_dat$Exposure)
table(multivariable_glm_dat$Outcome)

In [None]:
## Exposures
multivariable_glm_dat$Exposure[multivariable_glm_dat$Exposure=="age_base"] <- "Age"
multivariable_glm_dat$Exposure[multivariable_glm_dat$Exposure=="bmi_base_INT"] <- "BMI"

multivariable_glm_dat$Exposure[multivariable_glm_dat$Exposure=="chd_is_base"] <- "ASCVD"

multivariable_glm_dat$Exposure[multivariable_glm_dat$Exposure=="dm_126_base"] <- "T2D"
multivariable_glm_dat$Exposure[multivariable_glm_dat$Exposure=="ever_smoke"] <- "Ever Smoker"
multivariable_glm_dat$Exposure[multivariable_glm_dat$Exposure=="hdl_base_INT"] <- "HDL-C"
multivariable_glm_dat$Exposure[multivariable_glm_dat$Exposure=="htn_5_base"] <- "Hypertension"
multivariable_glm_dat$Exposure[multivariable_glm_dat$Exposure=="nonHDL_base_INT"] <- "non-HDL-C"

multivariable_glm_dat$Exposure[multivariable_glm_dat$Exposure=="race_BW"] <- "European"
multivariable_glm_dat$Exposure[multivariable_glm_dat$Exposure=="Sex"] <- "Male Sex"

table(multivariable_glm_dat$Exposure)
    ## Outcome
# glm_dat$Outcome[glm_dat$Outcome=="incident_CH_or_growingClones"] <- "Incident or growing clones"
multivariable_glm_dat$Outcome[multivariable_glm_dat$Outcome=="incident_CH"] <- "Overall CH"
multivariable_glm_dat$Outcome[multivariable_glm_dat$Outcome=="incident_DNMT3A"] <- "DNMT3A"
multivariable_glm_dat$Outcome[multivariable_glm_dat$Outcome=="incident_TET2"] <- "TET2"
multivariable_glm_dat$Outcome[multivariable_glm_dat$Outcome=="incident_ASXL1"] <- "ASXL1"
# glm_dat$Outcome[glm_dat$Outcome=="incident_DTA"] <- "DTA"
multivariable_glm_dat$Outcome[multivariable_glm_dat$Outcome=="incident_SF"] <- "SF"
multivariable_glm_dat$Outcome[multivariable_glm_dat$Outcome=="incident_DDR"] <- "DDR"

table(multivariable_glm_dat$Outcome)

In [None]:
## 20 independent test at 5%; P< 0.05/20 = 0.0025
# cat("P threshold< 0.0025")
# 0.05/20 = 0.0025 = "2.5E-03"
# 0.05/15 = 0.0033 = "3.3E-03"
cat("P threshold< ",round(0.05/20,4))
multivariable_glm_dat$sig <- ifelse(multivariable_glm_dat$P<0.0025, "***","")
table(multivariable_glm_dat$sig)

In [None]:
# format 
multivariable_glm_dat$P_val <- formatC(x = multivariable_glm_dat$P, digits = 1,format = "E")

    # OR
multivariable_glm_dat$OR <- formatC(round(exp(multivariable_glm_dat$Beta),2), digits = 2, format = "f")

multivariable_glm_dat$lSE <- ( multivariable_glm_dat$Beta - 1.96 * multivariable_glm_dat$SE)
multivariable_glm_dat$uSE <- ( multivariable_glm_dat$Beta + 1.96 * multivariable_glm_dat$SE)

    # 95% CI
multivariable_glm_dat$CI95 <- paste0("[",formatC(round(exp( multivariable_glm_dat$Beta - 1.96 * multivariable_glm_dat$SE),2), digits = 2, format = "f"),
                       ", ",
                       formatC(round(exp( multivariable_glm_dat$Beta + 1.96 * multivariable_glm_dat$SE),2), digits = 2, format = "f"), 
                       "]")

head(multivariable_glm_dat)



In [None]:
## Sort outcome
multivariable_glm_dat.v1 <- subset(multivariable_glm_dat, multivariable_glm_dat$Outcome %in% 
                                   c("Overall CH", 
                                      "DNMT3A", "TET2"))
multivariable_glm_dat.v1$Outcome <- ordered(multivariable_glm_dat.v1$Outcome, 
                           levels = c("Overall CH", 
                                      "DNMT3A", "TET2")) 

multivariable_glm_dat.v2 <- subset(multivariable_glm_dat, multivariable_glm_dat$Outcome %in% 
                                   c("ASXL1", 
                                      "SF", "DDR"))
multivariable_glm_dat.v2$Outcome <- ordered(multivariable_glm_dat.v2$Outcome, 
                           levels = c("ASXL1", 
                                      "SF", "DDR")) 

## exposure
multivariable_glm_dat.v1$Exposure <- ordered(multivariable_glm_dat.v1$Exposure, 
                            levels = c("Age", "Male Sex", 
                                       "European", "Ever Smoker", 
                                       "BMI", "Hypertension", 
                                       "ASCVD", "T2D", 
                                       "HDL-C", "non-HDL-C"))

multivariable_glm_dat.v2$Exposure <- ordered(multivariable_glm_dat.v2$Exposure, 
                            levels = c("Age", "Male Sex", 
                                       "European", "Ever Smoker", 
                                       "BMI", "Hypertension", 
                                       "ASCVD", "T2D", 
                                       "HDL-C", "non-HDL-C"))

# Fig. 4 | Association of clinical cardiovascular risk factors with incident clonal hematopoiesis of indeterminate potential (CHIP).

In [None]:
## Figure 4
##### adjusted model
# adjusted
b_adj.v1 <- metagen(TE = Beta,
                    lower = lSE,
                    upper = uSE,
                    studlab = Outcome,
                    subgroup = Exposure,
                    data = multivariable_glm_dat.v1,
                    sm="OR")

### adjusted CH, DNMT3A, TET2
pdf("Fig3,final_glm.Forest_incidentCH.adj.2023Jul07.pdf", width = 8, height= 12)
forest(x = b_adj.v1, 
       common=F, 
       random=F, 
       hetstat=F, 
       subgroup=k.w>=1, 
       weight.study="same",  
       level=0.95, 
       xlim=c(0.5, 3), 
       smlab="Effect of Exposures\non Incident CH\n", 
       smlab.pos=0, 
       colgap=unit(7, "mm"),
       xlab="Odds Ratio", 
       squaresize=0.6, 
       col.subgroup="black", 
       colgap.left=unit(0.1,"cm"),
       colgap.forest.left="3mm", 
       colgap.forest.right="2mm", 
       leftcols=c("studlab"), 
       leftlabs = c("                     "),
       rightcols=c("OR","CI95","P_val", "sig"),
       rightlabs=c("OR","95% CI","P", ""),
       #rightcols=NULL, 
       #rightlabs=NULL,
       col.inside="black", 
       plotwidth=unit(6.5, "cm"), 
       print.subgroup.name=F)
dev.off()


# Supplementary Figure 6 | Multivariable adjusted logistic regression for incident ASXL1, SF, and DDR CHIP vs baseline risk factors.

In [None]:
## Suppl. Fig. 6
##### adjusted model: ## ASXL1, SF, DDR
b_adj.v2 <- metagen(TE = Beta,
                    lower = lSE,
                    upper = uSE,
                    studlab = Outcome,
                    subgroup = Exposure,
                    data = multivariable_glm_dat.v2,
                    sm="OR")

### adjusted ASXL1, SF, DDR
pdf("FigS4,final_glm.Forest_incidentCH.adj.2023Jul07.pdf", width = 8, height= 12)
forest(x = b_adj.v2, 
       common=F, 
       random=F, 
       hetstat=F, 
       subgroup=k.w>=1, 
       weight.study="same",  
       level=0.95, 
       xlim=c(0.5, 3), 
       smlab="Effect of Exposures\non Incident CH\n", 
       smlab.pos=0, 
       colgap=unit(7, "mm"),
       xlab="Odds Ratio", 
       squaresize=0.6, 
       col.subgroup="black", 
       colgap.left=unit(0.1,"cm"),
       colgap.forest.left="3mm", 
       colgap.forest.right="2mm", 
       leftcols=c("studlab"), 
       leftlabs = c("                     "),
       rightcols=c("OR","CI95","P_val", "sig"),
       rightlabs=c("OR","95% CI","P", ""),
       #rightcols=NULL, 
       #rightlabs=NULL,
       col.inside="black", 
       plotwidth=unit(6.5, "cm"), 
       print.subgroup.name=F)
dev.off()
