## Forest Plots

In [None]:
##############################
#### Forest plot
##############################
library(data.table) # version 1.14.6
library(meta) # version 6.2-1
library(grid) # version 4.2.2
library(scales) # version 1.2.1

In [None]:
# set working directory
setwd("/medpop/esp2/mesbah/projects/ch_progression/aric/epi/")


In [None]:
    # load cohph summary
univariable_glm_dat <- fread("final_glm.univariable.incident_ch.2023Jul07.csv", header = T)
table(univariable_glm_dat$Dataset)
table(univariable_glm_dat$Exposure)
table(univariable_glm_dat$Outcome)

In [None]:
univariable_glm_dat <- subset(univariable_glm_dat, univariable_glm_dat$Exposure %in% 
                              c("age_base",  "bmi_base_INT", "hdl_base_INT", 
                                "nonHDL_base_INT", "Sex", "race_BW", "ever_smoke", 
                                "dm_126_base", "htn_5_base", "chd_is_base"))

table(univariable_glm_dat$Exposure)

In [None]:
## Exposures
univariable_glm_dat$Exposure[univariable_glm_dat$Exposure=="age_base"] <- "Age"
univariable_glm_dat$Exposure[univariable_glm_dat$Exposure=="bmi_base_INT"] <- "BMI"
# glm_dat$Exposure[glm_dat$Exposure=="ascvd_base"] <- "ASCVD"
univariable_glm_dat$Exposure[univariable_glm_dat$Exposure=="chd_is_base"] <- "ASCVD"
# glm_dat$Exposure[glm_dat$Exposure=="hf_base"] <- "Heart Failure"
# glm_dat$Exposure[glm_dat$Exposure=="chol_base_std"] <- "Total Cholesterol"
univariable_glm_dat$Exposure[univariable_glm_dat$Exposure=="dm_126_base"] <- "T2D"
univariable_glm_dat$Exposure[univariable_glm_dat$Exposure=="ever_smoke"] <- "Ever Smoker"
univariable_glm_dat$Exposure[univariable_glm_dat$Exposure=="hdl_base_INT"] <- "HDL-C"
univariable_glm_dat$Exposure[univariable_glm_dat$Exposure=="htn_5_base"] <- "Hypertension"
univariable_glm_dat$Exposure[univariable_glm_dat$Exposure=="nonHDL_base_INT"] <- "non-HDL-C"
# glm_dat$Exposure[glm_dat$Exposure=="nonHDL_base_std"] <- "Non-HDL-C"
univariable_glm_dat$Exposure[univariable_glm_dat$Exposure=="race_BW"] <- "European"
univariable_glm_dat$Exposure[univariable_glm_dat$Exposure=="Sex"] <- "Male Sex"
# glm_dat$Exposure[glm_dat$Exposure=="tg_base_std"] <- "Triglyceride"
table(univariable_glm_dat$Exposure)
    ## Outcome
# glm_dat$Outcome[glm_dat$Outcome=="incident_CH_or_growingClones"] <- "Incident or growing clones"
univariable_glm_dat$Outcome[univariable_glm_dat$Outcome=="incident_CH"] <- "Overall CH"
univariable_glm_dat$Outcome[univariable_glm_dat$Outcome=="incident_DNMT3A"] <- "DNMT3A"
univariable_glm_dat$Outcome[univariable_glm_dat$Outcome=="incident_TET2"] <- "TET2"
univariable_glm_dat$Outcome[univariable_glm_dat$Outcome=="incident_ASXL1"] <- "ASXL1"
# glm_dat$Outcome[glm_dat$Outcome=="incident_DTA"] <- "DTA"
univariable_glm_dat$Outcome[univariable_glm_dat$Outcome=="incident_SF"] <- "SF"
univariable_glm_dat$Outcome[univariable_glm_dat$Outcome=="incident_DDR"] <- "DDR"

table(univariable_glm_dat$Outcome)

In [None]:
## 20 independent test at 5%; P< 0.05/20 = 0.0025
# cat("P threshold< 0.0025")
# 0.05/20 = 0.0025 = "2.5E-03"
# 0.05/15 = 0.0033 = "3.3E-03"
cat("P threshold< ",round(0.05/20,4))
univariable_glm_dat$sig <- ifelse(univariable_glm_dat$P<0.0025, "***","")
table(univariable_glm_dat$sig)

In [None]:
# format 
univariable_glm_dat$P_val <- formatC(x = univariable_glm_dat$P, digits = 1,format = "E")

    # OR
univariable_glm_dat$OR <- formatC(round(exp(univariable_glm_dat$Beta),2), digits = 2, format = "f")

univariable_glm_dat$lSE <- ( univariable_glm_dat$Beta - 1.96 * univariable_glm_dat$SE)
univariable_glm_dat$uSE <- ( univariable_glm_dat$Beta + 1.96 * univariable_glm_dat$SE)

    # 95% CI
univariable_glm_dat$CI95 <- paste0("[",formatC(round(exp( univariable_glm_dat$Beta - 1.96 * univariable_glm_dat$SE),2), digits = 2, format = "f"),
                       ", ",
                       formatC(round(exp( univariable_glm_dat$Beta + 1.96 * univariable_glm_dat$SE),2), digits = 2, format = "f"), 
                       "]")

head(univariable_glm_dat)

In [None]:
## Sort outcome
univariable_glm_dat$Outcome <- ordered(univariable_glm_dat$Outcome, 
                           levels = c("Overall CH", 
                                      "DNMT3A", "TET2", 
                                      "ASXL1", 
                                      "SF", "DDR")) 

## exposure
univariable_glm_dat$Exposure <- ordered(univariable_glm_dat$Exposure, 
                            levels = c("Age", "Male Sex", 
                                       "European", "Ever Smoker", 
                                       "BMI", "Hypertension", 
                                       "ASCVD", "T2D", 
                                       "HDL-C", "non-HDL-C"))

In [None]:
names(univariable_glm_dat)

str(univariable_glm_dat)

In [None]:
##### Unadjusted model
# Unadjusted
b_unadj <- metagen(TE = Beta,
                    lower = lSE,
                    upper = uSE,
                    studlab = Outcome,
                    subgroup = Exposure,
                    data = univariable_glm_dat,
                    sm="OR")

In [None]:
pdf("SupplFig3.final_glm.Forest_incidentCH.unadj.2023Jul07.pdf",
    width = 8, height= 18)
forest(x = b_unadj, 
       common=F, 
       random=F, 
       hetstat=F, 
       subgroup=k.w>=1, 
       weight.study="same",  
       level=0.95, 
       xlim=c(0.5, 3), 
       smlab="Effect of Exposures\non Incident CH\n", 
       smlab.pos=0, 
       colgap=unit(7, "mm"),
       xlab="Odds Ratio", 
       squaresize=0.6, 
       col.subgroup="black", 
       colgap.left=unit(0.1,"cm"),
       colgap.forest.left="3mm", 
       colgap.forest.right="2mm", 
       leftcols=c("studlab"), 
       leftlabs = c("                     "),
       rightcols=c("OR","CI95","P_val", "sig"),
       rightlabs=c("OR","95% CI","P", ""),
       #rightcols=NULL, 
       #rightlabs=NULL,
       col.inside="black", 
       plotwidth=unit(6.5, "cm"), 
       print.subgroup.name=F)
dev.off()