# Supplementary Figure 8 

# Associations between incident CHIP categories and 
## (a) triglyceride to high-density lipoprotein cholesterol (TG/HDL-C) ratio, 
## (b) dyslipidemia, and 
## (c) male sex stratified by smoking status (never vs. ever) were examined.

In [None]:
library(data.table) # version 1.14.6

library(dplyr)

### 
  ### source:  https://www.biostars.org/p/80597/ and the supplement of Yang et al. Nature 2012.
INT_yang2012 <- function(x){
  y<-qnorm((rank(x,na.last='keep')-0.5)/sum(!is.na(x)))
  return(y)
}
# set working directory
setwd("/medpop/esp2/mesbah/projects/ch_progression/aric/epi/")

In [None]:
## Load 3730 participant w/o prevalent CHIP and heme CA 
aric_baseline_n_v05 <- fread("/medpop/esp2/mesbah/projects/ch_progression/aric/pheno/aric_baseline_n_v05_N3730.pheno_ch_status.noHemeCA.correct_lipids.FinalDataset_4_glm.July132023.csv", 
                             header=T)
nrow(aric_baseline_n_v05)

ncol(aric_baseline_n_v05)
ls()
names(aric_baseline_n_v05)

## Load 4187 participants before excluding prev. CHIP
# aric_baseline_n_v05 <- fread("../pheno/aric_baseline_n_v05_N4187.pheno_ch_status.noHemeCA.correct_lipids.Jun3May2023.csv", header=T)
### Exclude Prev. CH 
# aric_baseline_n_v05 <- subset(aric_baseline_n_v05, !is.na(aric_baseline_n_v05$incident_CH))
# nrow(aric_baseline_n_v05)

## a) TG/HDL-C ratio (Triglyceride to high-density lipoprotein cholesterol)

* Not adjusted for cholesterol medication
* Not adjusted for HDL-C
* Not adjusted for non-HDL-C


In [None]:
# cat(gsub(pattern = ", ", replacement = ",", x = toString(
#  c("Dataset","Outcome", "Exposure","Beta", "SE", "t-stat", "P"))),
#  file = "final_glm.multivariable_atherogenic_lipid.incident_ch.2023Jul07.csv", append = F, fill = T)


cat(gsub(pattern = ", ", replacement = ",", x = toString(
  c("Dataset","Outcome", "Exposure","Beta", "SE", "Z-value", "P", "Cases", "Controls", "N"))),
  file = "TGtoHDL_base_INT.notadjusted_cholMed_hdl_nonhdl.final_glm.multivariable.incident_ch.2024Jul22.csv", append = F, fill = T)

### Exposures
# exposures <- c("tg_to_hdl_base_INT", "Dyslipidemia")
exposures <- "tg_to_hdl_base_INT"


### Outcomes
ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1",
                  "incident_SF",
                  "incident_DDR")

## 
for (j in ch_phenotype){
    
  for (k in exposures) {
      
    cat("outcome:",j," exposure:", k,"\n")
      
    model_tg_to_hdl <- summary(aric_baseline_n_v05 %>% 
                        filter(!is.na(get(j))) %>% 
                        glm(get(j) ~ 
                            get(k) + age_base + Sex + race_BW + 
                            
                            ever_smoke + bmi_base_INT + 
                             
                            dm_126_base + htn_5_base + chd_is_base +  
                            
                            Center + v2_vs_other, 
                            
                            data = ., family="binomial")) # $coefficients[2,1:4])
      
#    cat( gsub(pattern = ", ", replacement = ",", x = toString(
#      c("Adjusted", paste0(j), paste0(k),
#        model_athero$coefficients[1+1,1:4]) ) ),
#      file = "final_glm.multivariable_atherogenic_lipid.incident_ch.2023Jul07.csv", 
#       append = T, fill = T)

      ######## addeded for case-control number 
      # Extract the data used in the model
model_data_tg_to_hdl <- model.frame(model_tg_to_hdl)

# Count the number of cases and controls of incident CHIP
case_control_tg_to_hdl <- table(model_data_tg_to_hdl[[1]])

# Print number of cases and controls
cat(paste0(j),"~", paste0(k), ": Number of controls:", case_control_tg_to_hdl[1], "\n")

cat(paste0(j),"~", paste0(k), ": Number of cases:", case_control_tg_to_hdl[2], "\n")

cat(paste0(j),"~", paste0(k), ": Number of participants with TG/HDL-C:", length(model_data_tg_to_hdl[[2]]), "\n")

      
      
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
       c("Adjusted", paste0(j), paste0(k), 
        model_tg_to_hdl$coefficients[2,1:4], 
        case_control_tg_to_hdl[2], 
        case_control_tg_to_hdl[1], 
        length(model_data_tg_to_hdl[[2]])) ) ), 
      file = "TGtoHDL_base_INT.notadjusted_cholMed_hdl_nonhdl.final_glm.multivariable.incident_ch.2024Jul22.csv", append = T, fill = T)
      
      
      cat("\n")
      
  }
}

## b) Dyslipidemia

* Not adjusted for cholesterol medication
* Not adjusted for HDL-C
* Not adjusted for non-HDL-C

In [None]:
## Dyslipidemia: 
## LDL-C>=160
## total Chol>=240
## Triglyceride >=200
## HDL-C<40 in Men and <50 in Women
## or use of Statin

# table( (aric_baseline_n_v05$ldl_base>=160 & 
#        aric_baseline_n_v05$chol_base>=240 & 
#        aric_baseline_n_v05$tg_base>=200) & 
#      ( (aric_baseline_n_v05$Gender=="M" & aric_baseline_n_v05$hdl_base<40) | 
#       (aric_baseline_n_v05$Gender=="F" & aric_baseline_n_v05$hdl_base<50) ) | 
#      aric_baseline_n_v05$statin_base==1)

##
# aric_baseline_n_v05$Dyslipidemia <- ifelse((aric_baseline_n_v05$ldl_base>=160 & 
#                                            aric_baseline_n_v05$chol_base>=240 & 
#                                            aric_baseline_n_v05$tg_base>=200) & 
#                                           ( (aric_baseline_n_v05$Gender=="M" & 
#                                              aric_baseline_n_v05$hdl_base<40) | 
#                                            (aric_baseline_n_v05$Gender=="F" & 
#                                             aric_baseline_n_v05$hdl_base<50) ) | 
#                                           aric_baseline_n_v05$statin_base==1, 1, 0)

# table(aric_baseline_n_v05$Dyslipidemia, exclude=NULL)

In [None]:
# cat(gsub(pattern = ", ", replacement = ",", x = toString(
#  c("Dataset","Outcome", "Exposure","Beta", "SE", "t-stat", "P"))),
#  file = "final_glm.multivariable_atherogenic_lipid.incident_ch.2023Jul07.csv", append = F, fill = T)


cat(gsub(pattern = ", ", replacement = ",", x = toString(
  c("Dataset","Outcome", "Exposure","Beta", "SE", "Z-value", "P", 
    "CHIP_Cases", "CHIP_Controls", "CHIP_N", 
    "Dyslipidemia_Cases", "Dyslipidemia_Control", "Dyslipidemia_N"))),
  file = "Dyslipidemia.notadjusted_cholMed_hdl_nonhdl.final_glm.multivariable.incident_ch.2024Jul22.csv", append = F, fill = T)

### Exposures
# exposures <- c("tg_to_hdl_base_INT", "Dyslipidemia")
exposures <- "Dyslipidemia"


### Outcomes
ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1",
                  "incident_SF",
                  "incident_DDR")

## 
for (j in ch_phenotype){
    
  for (k in exposures) {
      
    cat("outcome:",j," exposure:", k,"\n")
      
    model_Dyslipidemia <- summary(aric_baseline_n_v05 %>% 
                        filter(!is.na(get(j))) %>% 
                        glm(get(j) ~ 
                            get(k) + age_base + Sex + race_BW + 
                            
                            ever_smoke + bmi_base_INT + 
                             
                            dm_126_base + htn_5_base + chd_is_base +  
                            
                            Center + v2_vs_other, 
                            
                            data = ., family="binomial")) # $coefficients[2,1:4])
      
#    cat( gsub(pattern = ", ", replacement = ",", x = toString(
#      c("Adjusted", paste0(j), paste0(k),
#        model_athero$coefficients[1+1,1:4]) ) ),
#      file = "final_glm.multivariable_atherogenic_lipid.incident_ch.2023Jul07.csv", 
#       append = T, fill = T)

      ######## addeded for case-control number 
      # Extract the data used in the model

      model_data_Dyslipidemia <- model.frame(model_Dyslipidemia)

# Count the number of cases and controls of incident CHIP
      cat("CHIP\n")
      case_control_chip <- table(model_data_Dyslipidemia[[1]])

      cat(paste0(j),"~", paste0(k), ": Number of controls:", case_control_chip[1], "\n")

      cat(paste0(j),"~", paste0(k), ": Number of cases:", case_control_chip[2], "\n")

      cat(paste0(j),"~", paste0(k), ": Number of participants with CHIP:", 
          length(model_data_Dyslipidemia[[1]]), "\n")

      
# Count the number of cases and controls of Dyslipidemia
      cat("Dyslipidemia\n")
      case_control_Dyslipidemia <- table(model_data_Dyslipidemia[[2]])

      cat(paste0(j),"~", paste0(k), ": Number of controls:", case_control_Dyslipidemia[1], "\n")

      cat(paste0(j),"~", paste0(k), ": Number of cases:", case_control_Dyslipidemia[2], "\n")

      cat(paste0(j),"~", paste0(k), ": Number of participants with Dyslipidemia:", 
          length(model_data_Dyslipidemia[[2]]), "\n")
      
####           
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
       c("Adjusted", paste0(j), paste0(k), 
        model_Dyslipidemia$coefficients[2,1:4], 
        case_control_chip[2], 
        case_control_chip[1], 
        length(model_data_Dyslipidemia[[1]]),
         case_control_Dyslipidemia[2], 
        case_control_Dyslipidemia[1], 
        length(model_data_Dyslipidemia[[2]])) ) ), 
      file = "Dyslipidemia.notadjusted_cholMed_hdl_nonhdl.final_glm.multivariable.incident_ch.2024Jul22.csv", append = T, fill = T)
      
      
      cat("\n")
      
  }
}

## c) Male sex stratified by smoking status (never vs. ever)
### Smoking x Sex interaction

In [None]:
#####
cat(gsub(pattern = ", ", replacement = ",", x = toString(
  c("Dataset","Outcome", "Exposure","Beta", "SE", "Z-value", "P", 
    "CHIP_Cases", "CHIP_Controls", "CHIP_N", 
    "ever_smoke_Cases", "ever_smoke_Control", 
    "Females", "Males"))),
  file = "SexBySmoking.final_glm.multivariable.incident_ch.2024Jul22.csv", append = F, fill = T)

####
ch_phenotype <- c("incident_CH", 
                  "incident_DNMT3A",
                  "incident_TET2",
                  "incident_ASXL1",
                  "incident_SF",
                  "incident_DDR")
####
for (j in ch_phenotype){
        
    cat("outcome:",j," exposure: sex_by_smoking","\n")
      
    model_sex_by_smoking <- summary(aric_baseline_n_v05 %>% 
                        filter(!is.na(get(j))) %>% 
                        glm(get(j) ~ 
                           
                            ever_smoke : Sex +  ever_smoke + Sex + 
                            
                            bmi_base_INT + age_base + race_BW +  
                            
                            hdl_base_INT + nonHDL_base_INT + 
                            
                            dm_126_base + htn_5_base + chd_is_base +  
                            
                            chol_med_base + Center + v2_vs_other, 
                            
                            data = ., family="binomial")) # $coefficients[16,1:4])
      
#    cat( gsub(pattern = ", ", replacement = ",", x = toString(
#      c("Adjusted", paste0(j), "sex_by_smoking",
#        model_sex_by_smoking$coefficients[16,1:4]) ) ),
#      file = "final_glm.multivariable_atherogenic_lipid.incident_ch.2023Jul07.csv", 
#       append = T, fill = T)

      ######## addeded for case-control number 
      # Extract the data used in the model

      model_data_SexXSmoking <- model.frame(model_sex_by_smoking)

# Count the number of cases and controls of incident CHIP
      cat("CHIP\n")
      case_control_chip <- table(model_data_SexXSmoking[[1]])

      cat(paste0(j),"~: Number of controls:", case_control_chip[1], "\n")

      cat(paste0(j),"~: Number of cases:", case_control_chip[2], "\n")

      cat(paste0(j),"~: Number of participants with CHIP:", 
          length(model_data_SexXSmoking[[1]]), "\n")

      
# Count the number of cases and controls of Ever Smoker
      cat("Ever Smoker\n")
      case_control_ever_smoke <- table(model_data_SexXSmoking[[2]])

      cat(paste0(j),"~: Number of controls:", case_control_ever_smoke[1], "\n")

      cat(paste0(j),"~: Number of cases:", case_control_ever_smoke[2], "\n")

      cat(paste0(j),"~: Number of participants with ever_smoke:", 
          length(model_data_SexXSmoking[[2]]), "\n")
    
    # Count the number of cases and controls of Female vs Male
      cat("Ever Smoker\n")
      case_control_Sex <- table(model_data_SexXSmoking[[3]])

      cat(paste0(j),"~: Number of Females:", case_control_Sex[1], "\n")

      cat(paste0(j),"~: Number of Males:", case_control_Sex[2], "\n")

      cat(paste0(j),"~: Number of participants with Sex data:", 
          length(model_data_SexXSmoking[[3]]), "\n")
      
####           
    cat( gsub(pattern = ", ", replacement = ",", x = toString(
        c("Adjusted", paste0(j), "sex_by_smoking", 
        
         model_sex_by_smoking$coefficients[16,1:4], 
        
         case_control_chip[2], 
        
         case_control_chip[1], 
        
         length(model_data_SexXSmoking[[1]]),
         
         case_control_ever_smoke[2], 
         case_control_ever_smoke[1],
         
        case_control_Sex[1], 
        case_control_Sex[2] ) ) ), 
        
      file = "SexBySmoking.final_glm.multivariable.incident_ch.2024Jul22.csv", append = T, fill = T)
      
      
      cat("\n")
      
  }



####### TG/HDL-C ratios

Triglyceride/HDL-C ratio, also known as the TG/HDL-C ratio, is a measure that combines the levels of triglycerides (TG) and high-density lipoprotein cholesterol (HDL-C) in the blood. It is used as an indicator of cardiovascular risk and can provide valuable insights into lipid metabolism and the balance between "good" and "bad" cholesterol.

To calculate the TG/HDL-C ratio, divide the triglyceride level (measured in mg/dL) by the HDL-C level (also measured in mg/dL).

The TG/HDL-C ratio is considered a useful marker of lipid abnormalities and insulin resistance, both of which are associated with an increased risk of cardiovascular disease. Higher TG levels and lower HDL-C levels are typically associated with an unfavorable lipid profile.

A higher TG/HDL-C ratio indicates a greater cardiovascular risk. It suggests an increased presence of small, dense LDL particles (which are more atherogenic) and decreased levels of beneficial HDL particles. Insulin resistance, obesity, metabolic syndrome, and diabetes are conditions commonly associated with higher TG/HDL-C ratios.

In general, a TG/HDL-C ratio below 2 is considered optimal, as it indicates a lower risk of cardiovascular disease. Ratios between 2 and 3.9 are considered average, while ratios above 4 are associated with an increased risk.

It's important to note that the TG/HDL-C ratio is just one component of a comprehensive assessment of cardiovascular risk. Other factors such as blood pressure, smoking status, family history, and additional lipid parameters should also be considered when evaluating overall cardiovascular health.

####### Dyslipidemia and incident CH
According to the guidelines provided by the American Heart Association (AHA) and the American College of Cardiology (ACC), the following are the threshold values for lipid levels in mg/dL:

1. Total Cholesterol (TC):
   - Desirable level: Less than 200 mg/dL
   - Borderline high: 200-239 mg/dL
   - High: 240 mg/dL and above

2. Low-Density Lipoprotein Cholesterol (LDL-C):
   - Optimal: Less than 100 mg/dL
   - Near optimal/above optimal: 100-129 mg/dL
   - Borderline high: 130-159 mg/dL
   - High: 160-189 mg/dL
   - Very high: 190 mg/dL and above

3. High-Density Lipoprotein Cholesterol (HDL-C):
   - Low: Less than 40 mg/dL (in men), less than 50 mg/dL (in women)
   - High: 60 mg/dL and above (considered protective against heart disease)

4. Triglycerides:
   - Normal: Less than 150 mg/dL
   - Borderline high: 150-199 mg/dL
   - High: 200-499 mg/dL
   - Very high: 500 mg/dL and above

These thresholds may be used as a general guideline for assessing lipid levels in the United States. However, it's important to consult with a healthcare professional who can evaluate your specific health situation, other risk factors, and determine the most appropriate management strategy for dyslipidemia.