# Sensitivity Analysis 

* Only keep incident CH where baseline position has DP>=20

* Incident CHIP is defined as new clone detected at follow-up visit (at VAF >=2%) w/o a detectable prevalent clone
* all baselone samples with
    * DP >=20
    * W/O detactable prevalenct clone at VAF 0.1% (i.e. no prevalent clone detected or prevalent clone size<0.1%)

## 1. Analysis 1: DP>=20 and Baseline Clone VAF <0.1% (i.e. <0.001)

## 2. Analysis 2: DP>=20, variant >=5 and >=2 forward and reverse reads and Baseline Clone VAF <0.1% (i.e. <0.001)

In [None]:
library(data.table) # version 1.14.6

library(dplyr)

# set working directory
setwd("/medpop/esp2/mesbah/projects/ch_progression/aric/epi/")

In [None]:

## Loading saved dataframe used in the final glm analysis
aric_baseline_n_v05 <- fread("/medpop/esp2/mesbah/projects/ch_progression/aric/pheno/aric_baseline_n_v05_N3730.pheno_ch_status.noHemeCA.correct_lipids.FinalDataset_4_glm.July132023.csv", 
  header=T)

nrow(aric_baseline_n_v05)

table(aric_baseline_n_v05$incident_CH, 
      exclude=NULL)

aric_baseline_n_v05$Time_Followup <- aric_baseline_n_v05$Age - aric_baseline_n_v05$age_base

summary(aric_baseline_n_v05$Time_Followup)

In [None]:
## Clone data
cln_grt.vaf2.DP20_base.corrected <- fread("/medpop/esp2/mesbah/projects/ch_progression/aric/pheno/cln_grt.vaf2.DP20_base.relaxd.modified_hiseq.29Nov2023.csv", header=T)

nrow(cln_grt.vaf2.DP20_base.corrected)

summary(cln_grt.vaf2.DP20_base.corrected$DP.v2)

summary(cln_grt.vaf2.DP20_base.corrected$VAF.v2)

In [None]:
## Overlap
table(aric_baseline_n_v05$incident_CH[aric_baseline_n_v05$ARIC_ID %in% cln_grt.vaf2.DP20_base.corrected$ARIC_ID],
      exclude = NULL)

table(aric_baseline_n_v05$incident_CH[aric_baseline_n_v05$ARIC_ID %in% 
                                      cln_grt.vaf2.DP20_base.corrected$ARIC_ID
                                      [round(cln_grt.vaf2.DP20_base.corrected$VAF.v2,2)>=0.02]],
      exclude = NULL)

table(aric_baseline_n_v05$incident_CH[aric_baseline_n_v05$ARIC_ID %in% 
                                      cln_grt.vaf2.DP20_base.corrected$ARIC_ID
                                      [round(cln_grt.vaf2.DP20_base.corrected$VAF.v2,2)>=0.01]],
      exclude = NULL)

table(aric_baseline_n_v05$incident_CH[aric_baseline_n_v05$ARIC_ID %in% 
                                      cln_grt.vaf2.DP20_base.corrected$ARIC_ID
                                      [round(cln_grt.vaf2.DP20_base.corrected$VAF.v2,2)<0.001]],
      exclude = NULL)

table(aric_baseline_n_v05$incident_CH[aric_baseline_n_v05$ARIC_ID %in% cln_grt.vaf2.DP20_base.corrected$ARIC_ID[round(cln_grt.vaf2.DP20_base.corrected$VAF.v5,2)>=0.02]],
      exclude = NULL)

table(aric_baseline_n_v05$incident_CH[aric_baseline_n_v05$ARIC_ID %in% cln_grt.vaf2.DP20_base.corrected$ARIC_ID])

## Analysis 1: DP>=20 and Baseline Clone VAF <0.1% (i.e. <0.001)

In [None]:
aric_baseline_n_v05$incident_CH_DPbase20VAFbase001 <- ifelse(aric_baseline_n_v05$incident_CH==1 & 
                                                   aric_baseline_n_v05$ARIC_ID %in% 
                                                   cln_grt.vaf2.DP20_base.corrected$ARIC_ID
                                                             [round(cln_grt.vaf2.DP20_base.corrected$VAF.v2,2)
                                                              <0.001],
                                                   1,
                                                   ifelse(aric_baseline_n_v05$incident_CH==0,
                                                          0,NA))

table(aric_baseline_n_v05$incident_CH_DPbase20VAFbase001, exclude = NULL)



## 01. Sensitivity Analysis - Univariable


In [None]:
#### Univariable
# cat(gsub(pattern = ", ", replacement = ",", x = toString(
 # c("Dataset","Outcome", "Exposure","Beta", "SE", "t-stat", "P"))),
 # file = "final_glm.univariable.incident_CH_DPbase20VAFbase001.2023Nov30.csv", append = F, fill = T)

cat(gsub(pattern = ", ", replacement = ",", x = toString(
  c("Dataset","Outcome", "Exposure","Beta", "SE", "Z-value", "P", 
    "Cases", "Controls", "N"))),
  file = "sensitivity01.final_glm.univariable.incident_CH_DPbase20VAFbase001.2024Jul22.csv", append = F, fill = T)

exposures <- c("age_base",  "bmi_base_INT",   
               "chol_base_INT", "ldl_base_INT",
               "hdl_base_INT", "tg_base_INT",
               "nonHDL_base_INT", "tg_to_hdl_base_INT",
               "ldl_base_nomal_vs_high", "Dyslipidemia",
               "hdl_base_low",
               "Sex", "race_BW", "ever_smoke", 
               "dm_126_base", "htn_5_base", 
               "chd_is_base")

ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1",
                  "incident_SF",
                  "incident_DDR")

##
for(i in exposures){
  
  for (j in ch_phenotype){
    cat("outcome:",j," exposure:", i,"\n")
    # remove NA
    model1 <- summary(aric_baseline_n_v05 %>% filter(!is.na(incident_CH_DPbase20VAFbase001) & 
                                                     !is.na(get(i)) & 
                                                     !is.na(get(j))) %>%
                        glm(get(j) ~  get(i), 
                            data = ., family = "binomial"))
    
#    cat( gsub(pattern = ", ", replacement = ",", x = toString(
 #     c("Univariable", paste0(j), paste0(i), 
  #      model1$coefficients[2,1:4]) ) ), 
   #   file = "final_glm.univariable.incident_CH_DPbase20VAFbase001.2024Jul22.csv", append = T, fill = T)
      
######## addeded for case-control number 
      # Extract the data used in the model
model_data <- model.frame(model1)

# Count the number of cases and controls
case_control_count <- table(model_data[[1]])

# Print number of cases and controls
cat(paste0(j),"~", paste0(i), ": Number of controls:", case_control_count[1], "\n")

cat(paste0(j),"~", paste0(i), ": Number of cases:", case_control_count[2], "\n")

      
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("Univariable", paste0(j), paste0(i), 
        model1$coefficients[2,1:4], 
        case_control_count[2], 
        case_control_count[1], 
        length(model_data[[1]])) ) ), 
      file = "sensitivity01.final_glm.univariable.incident_CH_DPbase20VAFbase001.2024Jul22.csv", append = T, fill = T)
    
  }
}
    




## 01. Sensitivity Analysis - Multivariable

In [None]:
## Multivariable
## adjusted for age, Sex, Race, Smoking, bmi, ldl-c, hdl-c, t2d, htn, ascvd, chol_med, batch(visit,center)

# cat(gsub(pattern = ", ", replacement = ",", x = toString(
# c("Dataset","Outcome", "Exposure","Beta", "SE", "t-stat", "P"))),
 # file = "final_glm.multivariable.incident_CH_DPbase20VAFbase001.2023Nov30.csv", append = F, fill = T)

cat(gsub(pattern = ", ", replacement = ",", x = toString(
  c("Dataset","Outcome", "Exposure","Beta", "SE", "Z-value", "P",
    "Cases", "Controls", "N"))),
  file = "sensitivity01.final_glm.multivariable.incident_CH_DPbase20VAFbase001.2024Jul22.csv", append = F, fill = T)

# Outcomes
ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1",
                  "incident_SF",
                  "incident_DDR")

# Exposures
test_exposures <- c("age_base", "Sex", "race_BW", 
                    "ever_smoke", "bmi_base_INT", 
                    "nonHDL_base_INT", "hdl_base_INT", 
                    "dm_126_base", "htn_5_base", 
                    "chd_is_base")

for (j in ch_phenotype){
  for (k in 1:length(test_exposures)) {
    
    cat("outcome:",j," exposure:", test_exposures[k],"\n")
    
    model3 <- summary(aric_baseline_n_v05 %>% 
                        filter(!is.na(incident_CH_DPbase20VAFbase001) & 
                               !is.na(get(j))) %>% 
                        glm(get(j) ~ 
                              age_base + Sex + race_BW + 
                              ever_smoke + bmi_base_INT + 
                              nonHDL_base_INT + hdl_base_INT + 
                              dm_126_base + htn_5_base + chd_is_base +  
                              chol_med_base + Center + v2_vs_other, 
                            data = ., family="binomial"))
      
      
      
      ######## addeded for case-control number 
      # Extract the data used in the model
model_data <- model.frame(model3)

# Count the number of cases and controls
case_control_count <- table(model_data[[1]])

# Print number of cases and controls
cat("total N=", length(model_data[[1]]))
      
cat(paste0(j),"~", paste0(test_exposures[k]), ": Number of controls:", case_control_count[1], "\n")

cat(paste0(j),"~", paste0(test_exposures[k]), ": Number of cases:", case_control_count[2], "\n")

     cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("Adjusted_no_dAge", paste0(j), paste0(test_exposures[k]),
       model3$coefficients[k+1,1:4], 
        case_control_count[2], 
        case_control_count[1], 
        length(model_data[[1]])) ) ),
     file = "sensitivity01.final_glm.multivariable.incident_CH_DPbase20VAFbase001.2024Jul22.csv", 
     append = T, fill = T)  
      
      
    
#    cat( gsub(pattern = ", ", replacement = ",", x = toString(
#      c("Adjusted", paste0(j), paste0(test_exposures[k]),
#        model3$coefficients[k+1,1:4]) ) ),
#      file = "final_glm.multivariable.incident_CH_DPbase20VAFbase001.2023Nov30.csv", 
#      append = T, fill = T)
    
  }
}


################
### Remove Model 3, and related results
rm(model3, model_data, case_control_count)

################


##### secondary analysis
## With Age and dAge adjustment 
#####
# Outcomes
ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1",
                  "incident_SF",
                  "incident_DDR")

# Exposures
test_exposures <- c("age_base", "Sex", "race_BW", 
                    "ever_smoke", "bmi_base_INT", 
                    "nonHDL_base_INT", "hdl_base_INT", 
                    "dm_126_base", "htn_5_base", 
                    "chd_is_base", "Time_Followup")
## 
for (j in ch_phenotype){
  for (k in 1:length(test_exposures)) {
    
    cat("outcome:",j," exposure:", test_exposures[k],"\n")
    
    model4 <- summary(aric_baseline_n_v05 %>% 
                        filter(!is.na(incident_CH_DPbase20VAFbase001) & 
                               !is.na(get(j))) %>% 
                        glm(get(j) ~ 
                              age_base + Sex + race_BW + 
                              ever_smoke + bmi_base_INT + 
                              nonHDL_base_INT + hdl_base_INT + 
                              dm_126_base + htn_5_base + chd_is_base + 
                            Time_Followup +
                              chol_med_base + Center + v2_vs_other, 
                            data = ., family="binomial"))
    
#    cat( gsub(pattern = ", ", replacement = ",", x = toString(
#      c("Adjusted_with_dAge", paste0(j), paste0(test_exposures[k]),
#        model4$coefficients[k+1,1:4]) ) ),
#      file = "final_glm.multivariable.incident_CH_DPbase20VAFbase001.2023Nov30.csv", 
#      append = T, fill = T)
      
      ######## addeded for case-control number 
      # Extract the data used in the model
model_data <- model.frame(model4)

# Count the number of cases and controls
case_control_count <- table(model_data[[1]])

# Print number of cases and controls
cat("total N=", length(model_data[[1]]))
      
cat(paste0(j),"~", paste0(test_exposures[k]), ": Number of controls:", case_control_count[1], "\n")

cat(paste0(j),"~", paste0(test_exposures[k]), ": Number of cases:", case_control_count[2], "\n")

     cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("Adjusted_with_dAge", paste0(j), paste0(test_exposures[k]),
       model4$coefficients[k+1,1:4], 
        case_control_count[2], 
        case_control_count[1], 
        length(model_data[[1]])) ) ),
     file = "sensitivity01.final_glm.multivariable.incident_CH_DPbase20VAFbase001.2024Jul22.csv", 
     append = T, fill = T)  

    
  }
}

## Analysis 2: DP>=20, variant >=5 and >=2 forward and reverse reads and Baseline Clone VAF <0.1% (i.e. <0.001)

#### Variant filters: 
* DP>=20
* AD>=5
* FR/RR>=2
* VAF>=2%
* No prevalent CHIP clones (i.e.  0<=VAF<0.1% at baseline visit) 

## 02. Sensitivity Analysis - Univariable 

In [None]:
## Remove all files
rm(list=ls())
###

library(data.table) # version 1.14.6

library(dplyr)

# set working directory
setwd("/medpop/esp2/mesbah/projects/ch_progression/aric/epi/")

## Loading saved dataframe used in the final glm analysis
aric_baseline_n_v05 <- fread("/medpop/esp2/mesbah/projects/ch_progression/aric/pheno/aric_baseline_n_v05_N3730.pheno_ch_status.noHemeCA.correct_lipids.FinalDataset_4_glm.July132023.csv", 
  header=T)

nrow(aric_baseline_n_v05)

table(aric_baseline_n_v05$incident_CH, 
      exclude=NULL)

aric_baseline_n_v05$Time_Followup <- aric_baseline_n_v05$Age - aric_baseline_n_v05$age_base

summary(aric_baseline_n_v05$Time_Followup)

## All VAR filters: 
# DP>=20
# AD>=5
# FR/RR>=2
# VAF>=2%
# No prevalent CHIP clones (i.e.  0<=VAF<0.1% at baseline visit) 
cln_grt.vaf2.DP20_base_allAD5FRRR2.corrected <- fread("../pheno/cln_grt.vaf2.DP20_base_allAD5FRRR2.modified_hiseq.stringent.29Nov2023.csv", 
                                                      header=T)
nrow(cln_grt.vaf2.DP20_base_allAD5FRRR2.corrected)

aric_baseline_n_v05$incident_CH_DPbase20VAFbase001.allAD5FRRR2 <- ifelse(aric_baseline_n_v05$incident_CH==1 & 
                                                   aric_baseline_n_v05$ARIC_ID %in% 
                                                   cln_grt.vaf2.DP20_base_allAD5FRRR2.corrected$ARIC_ID
                                                             [round(cln_grt.vaf2.DP20_base_allAD5FRRR2.corrected$VAF.v2,2)
                                                              <0.001],
                                                   1,
                                                   ifelse(aric_baseline_n_v05$incident_CH==0,
                                                          0,NA))

table(aric_baseline_n_v05$incident_CH_DPbase20VAFbase001.allAD5FRRR2, exclude = NULL)

In [None]:
#### Univariable
# cat(gsub(pattern = ", ", replacement = ",", x = toString(
#  c("Dataset","Outcome", "Exposure","Beta", "SE", "t-stat", "P"))),
#  file = "final_glm.univariable.incident_CH_DPbase20VAFbase001.allAD5FRRR2.2023Nov30.csv", append = F, fill = T)

cat(gsub(pattern = ", ", replacement = ",", x = toString(
  c("Dataset","Outcome", "Exposure","Beta", "SE", "Z-value", "P", 
    "Cases", "Controls", "N"))),
  file = "sensitivity02.final_glm.univariable.incident_CH_DPbase20VAFbase001.allAD5FRRR2.2024Jul22.csv", append = F, fill = T)

exposures <- c("age_base",  "bmi_base_INT",   
               "chol_base_INT", "ldl_base_INT",
               "hdl_base_INT", "tg_base_INT",
               "nonHDL_base_INT", "tg_to_hdl_base_INT",
               "ldl_base_nomal_vs_high", "Dyslipidemia",
               "hdl_base_low",
               "Sex", "race_BW", "ever_smoke", 
               "dm_126_base", "htn_5_base", 
               "chd_is_base","Time_Followup")

ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1",
                  "incident_SF",
                  "incident_DDR")

##
for(i in exposures){
  
  for (j in ch_phenotype){
    cat("outcome:",j," exposure:", i,"\n")
    # remove NA
    model1 <- summary(aric_baseline_n_v05 %>% filter(!is.na(incident_CH_DPbase20VAFbase001.allAD5FRRR2) & 
                                                     !is.na(get(i)) & 
                                                     !is.na(get(j))) %>%
                        glm(get(j) ~  get(i), 
                            data = ., family = "binomial"))
      
      ######## addeded for case-control number 
      # Extract the data used in the model
model_data <- model.frame(model1)

# Count the number of cases and controls
case_control_count <- table(model_data[[1]])

# Print number of cases and controls
cat(paste0(j),"~", paste0(i), ": Number of controls:", case_control_count[1], "\n")

cat(paste0(j),"~", paste0(i), ": Number of cases:", case_control_count[2], "\n")

      
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("Univariable", paste0(j), paste0(i), 
        model1$coefficients[2,1:4], 
        case_control_count[2], 
        case_control_count[1], 
        length(model_data[[1]])) ) ), 
      file = "sensitivity02.final_glm.univariable.incident_CH_DPbase20VAFbase001.allAD5FRRR2.2024Jul22.csv", append = T, fill = T)

    
#    cat( gsub(pattern = ", ", replacement = ",", x = toString(
 #     c("Univariable", paste0(j), paste0(i), 
  #      model1$coefficients[2,1:4]) ) ), 
   #   file = "sensitivity21.final_glm.univariable.incident_CH_DPbase20VAFbase001.allAD5FRRR2.2024Jul22.csv", append = T, fill = T)
    
  }
}



## 02. Sensitivity Analysis - Multivariable 

In [None]:
## Remove all files
rm(list=ls())
###

library(data.table) # version 1.14.6

library(dplyr)

# set working directory
setwd("/medpop/esp2/mesbah/projects/ch_progression/aric/epi/")

## Loading saved dataframe used in the final glm analysis
aric_baseline_n_v05 <- fread("/medpop/esp2/mesbah/projects/ch_progression/aric/pheno/aric_baseline_n_v05_N3730.pheno_ch_status.noHemeCA.correct_lipids.FinalDataset_4_glm.July132023.csv", 
  header=T)

nrow(aric_baseline_n_v05)

table(aric_baseline_n_v05$incident_CH, 
      exclude=NULL)

aric_baseline_n_v05$Time_Followup <- aric_baseline_n_v05$Age - aric_baseline_n_v05$age_base

summary(aric_baseline_n_v05$Time_Followup)

## All VAR filters: 
# DP>=20
# AD>=5
# FR/RR>=2
# VAF>=2%
# No prevalent CHIP clones (i.e.  0<=VAF<0.1% at baseline visit) 
cln_grt.vaf2.DP20_base_allAD5FRRR2.corrected <- fread("../pheno/cln_grt.vaf2.DP20_base_allAD5FRRR2.modified_hiseq.stringent.29Nov2023.csv", 
                                                      header=T)
nrow(cln_grt.vaf2.DP20_base_allAD5FRRR2.corrected)

aric_baseline_n_v05$incident_CH_DPbase20VAFbase001.allAD5FRRR2 <- ifelse(aric_baseline_n_v05$incident_CH==1 & 
                                                   aric_baseline_n_v05$ARIC_ID %in% 
                                                   cln_grt.vaf2.DP20_base_allAD5FRRR2.corrected$ARIC_ID
                                                             [round(cln_grt.vaf2.DP20_base_allAD5FRRR2.corrected$VAF.v2,2)
                                                              <0.001],
                                                   1,
                                                   ifelse(aric_baseline_n_v05$incident_CH==0,
                                                          0,NA))

table(aric_baseline_n_v05$incident_CH_DPbase20VAFbase001.allAD5FRRR2, exclude = NULL)

In [None]:
## Multivariable
## adjusted for age, Sex, Race, Smoking, bmi, ldl-c, hdl-c, t2d, htn, ascvd, chol_med, batch(visit,center)
# cat(gsub(pattern = ", ", replacement = ",", x = toString(
#  c("Dataset","Outcome", "Exposure","Beta", "SE", "t-stat", "P"))),
#  file = "final_glm.multivariable.incident_CH_DPbase20VAFbase001.allAD5FRRR2.2023Nov30.csv", append = F, fill = T)

####

####
cat(gsub(pattern = ", ", replacement = ",", x = toString(
  c("Dataset","Outcome", "Exposure","Beta", "SE", "Z-value", "P", 
    "Cases", "Controls", "N"))),
  file = "sensitivity02.final_glm.multivariable.incident_CH_DPbase20VAFbase001.allAD5FRRR2.2024Jul22.csv", append = F, fill = T)

# Outcomes
ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1",
                  "incident_SF",
                  "incident_DDR")

# Exposures
test_exposures <- c("age_base", "Sex", "race_BW", 
                    "ever_smoke", "bmi_base_INT", 
                    "nonHDL_base_INT", "hdl_base_INT", 
                    "dm_126_base", "htn_5_base", 
                    "chd_is_base")

for (j in ch_phenotype){
  for (k in 1:length(test_exposures)) {
    
    cat("outcome:",j," exposure:", test_exposures[k],"\n")
    
    model3 <- summary(aric_baseline_n_v05 %>% 
                        filter(!is.na(incident_CH_DPbase20VAFbase001.allAD5FRRR2) & 
                               !is.na(get(j))) %>% 
                        glm(get(j) ~ 
                              age_base + Sex + race_BW + 
                              ever_smoke + bmi_base_INT + 
                              nonHDL_base_INT + hdl_base_INT + 
                              dm_126_base + htn_5_base + chd_is_base +  
                              chol_med_base + Center + v2_vs_other, 
                            data = ., family="binomial"))

 
      ######## addeded for case-control number 
      # Extract the data used in the model
model_data <- model.frame(model3)

# Count the number of cases and controls
case_control_count <- table(model_data[[1]])

# Print number of cases and controls
cat("total N=", length(model_data[[1]]))
      
cat(paste0(j),"~", paste0(test_exposures[k]), ": Number of controls:", case_control_count[1], "\n")

cat(paste0(j),"~", paste0(test_exposures[k]), ": Number of cases:", case_control_count[2], "\n")

     cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("Adjusted_no_dAge", paste0(j), paste0(test_exposures[k]),
       model3$coefficients[k+1,1:4], 
        case_control_count[2], 
        case_control_count[1], 
        length(model_data[[1]])) ) ),
     file = "sensitivity02.final_glm.multivariable.incident_CH_DPbase20VAFbase001.allAD5FRRR2.2024Jul22.csv", 
     append = T, fill = T)  

      
#    cat( gsub(pattern = ", ", replacement = ",", x = toString(
#      c("Adjusted", paste0(j), paste0(test_exposures[k]),
#        model3$coefficients[k+1,1:4]) ) ),
#      file = "final_glm.multivariable.incident_CH_DPbase20VAFbase001.allAD5FRRR2.2023Nov30.csv", 
#      append = T, fill = T)
    
  }
}

################
### Remove Model 3, and related results
rm(model3, model_data, case_control_count)

################
##### secondary analysis
## With Age and dAge adjustment 
#####
##### With dAGE adjustment
## 
# Outcomes
ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1",
                  "incident_SF",
                  "incident_DDR")

# Exposures
test_exposures <- c("age_base", "Sex", "race_BW", 
                    "ever_smoke", "bmi_base_INT", 
                    "nonHDL_base_INT", "hdl_base_INT", 
                    "dm_126_base", "htn_5_base", 
                    "chd_is_base", "Time_Followup")
## 
for (j in ch_phenotype){
  for (k in 1:length(test_exposures)) {
    
    cat("outcome:",j," exposure:", test_exposures[k],"\n")
    
    model4 <- summary(aric_baseline_n_v05 %>% 
                        filter(!is.na(incident_CH_DPbase20VAFbase001.allAD5FRRR2) & 
                               !is.na(get(j))) %>% 
                        glm(get(j) ~ 
                              age_base + Sex + race_BW + 
                              ever_smoke + bmi_base_INT + 
                              nonHDL_base_INT + hdl_base_INT + 
                              dm_126_base + htn_5_base + chd_is_base + 
                            Time_Followup +
                              chol_med_base + Center + v2_vs_other, 
                            data = ., family="binomial"))

      
      ######## addeded for case-control number 
      # Extract the data used in the model
model_data <- model.frame(model4)

# Count the number of cases and controls
case_control_count <- table(model_data[[1]])

# Print number of cases and controls
cat("total N=", length(model_data[[1]]))
      
cat(paste0(j),"~", paste0(test_exposures[k]), ": Number of controls:", case_control_count[1], "\n")

cat(paste0(j),"~", paste0(test_exposures[k]), ": Number of cases:", case_control_count[2], "\n")

     cat( gsub(pattern = ", ", replacement = ",", x = toString(
      c("Adjusted_with_dAge", paste0(j), paste0(test_exposures[k]),
       model4$coefficients[k+1,1:4], 
        case_control_count[2], 
        case_control_count[1], 
        length(model_data[[1]])) ) ),
     file = "sensitivity02.final_glm.multivariable.incident_CH_DPbase20VAFbase001.allAD5FRRR2.2024Jul22.csv", 
     append = T, fill = T)  


#    cat( gsub(pattern = ", ", replacement = ",", x = toString(
#      c("Adjusted", paste0(j), paste0(test_exposures[k]),
#        model4$coefficients[k+1,1:4]) ) ),
#      file = "final_glm.multivariable.incident_CH_DPbase20VAFbase001.allAD5FRRR2.2023Nov30.csv", 
#      append = T, fill = T)
    
  }
}