## Initialize

In [None]:
#library(Rmisc)
library(tidyverse)
library(glue)
library(arrow)
library(patchwork)

In [None]:
if (grepl("sc", Sys.info()[["nodename"]], fixed=TRUE)) {
    base_path = "/sc-projects/sc-proj-ukb-cvd"
} else {
    base_path = "/data/analysis/ag-reils/ag-reils-shared/cardioRS"}
print(base_path)

dataset_name = "210714_metabolomics"
path = "/data/analysis/ag-reils/steinfej/code/umbrella/pre/ukbb"
data_path = glue("{base_path}/data")
dataset_path = glue("{data_path}/3_datasets_post/{dataset_name}")

project_label="21_metabolomics_multitask"
project_path = glue("{base_path}/results/projects/{project_label}")
figures_path = glue("{project_path}/figures")
data_results_path = glue("{project_path}/data")

## Load data

In [None]:
scores = c("COX_Age+Sex", 
                "COX_Metabolomics",
                "DS_Metabolomics",
                "DS_Age+Sex+Metabolomics",
               "DS_AgeSexMetabolomics")

In [None]:
library(ggalt)

In [None]:
library("jsonlite")
colors_path = "colors.json"
colors_dict = read_json(colors_path)

In [None]:
color_map <- c("all" = "grey", "none" = "black",
               
               'COX_Age+Sex' = colors_dict$pastel$grey$light, 
               
               'PCA_Metabolomics' = "#4F8EC1",
               'COX_Metabolomics' = "#4F8EC1",
               'DS_Metabolomics' = "#4F8EC1",  #colors_dict$pastel$orange$dark,
               
               'COX_ASCVD' = colors_dict$pastel$grey$light, 
               'COX_PANEL' = colors_dict$pastel$grey$light, 

               'DS_Age+Sex+Metabolomics' = "#53dd6c",#colors_dict$pastel$orange$mid,
               'DS_SCORE2+Metabolomics' = colors_dict$pastel$red$mid,
               'DS_ASCVD+Metabolomics' = "#d8315b",#colors_dict$pastel$red$mid,
               'DS_PANEL+Metabolomics' = "#1e1b18" #colors_dict$pastel$red$dark
      )

In [None]:
scores_full = names(color_map)

In [None]:
name = glue("benchmark_cindex_220126")
benchmark_cindex_general = read_feather(glue("{data_results_path}/{name}.feather")) %>%
    distinct() %>% unite("score", c(module, features), remove=FALSE) %>% distinct()

In [None]:
library(data.table)

In [None]:
benchmarks_LLS = fread("ExternalCohorts/LLS_PAROFFS/coxsummaries_LLS_PAROFFS_220406.csv") %>% mutate(cohort="LLS")

In [None]:
benchmarks_LLS

In [None]:
benchmarks_PROSPER = fread("ExternalCohorts/PROSPER/coxsummaries_PROSPER_220331.csv") %>% mutate(cohort="PROSPER")

In [None]:
benchmarks_ROTTERDAM = fread("ExternalCohorts/ROTTERDAM/coxsummaries_bbmri_220331.csv") %>% mutate(cohort="RS")

In [None]:
benchmarks_whii = bind_rows(
    fread("ExternalCohorts/WHII/benchmark_cindex100_ukbscaled.csv") %>% mutate(scaler="ukbscaled"),
    fread("ExternalCohorts/WHII/benchmark_cindex100_whiiscaled.csv") %>% mutate(scaler="rescaled")) %>%
    mutate(cohort="WHII") %>% 
    filter(time==10) %>%
    select(cohort, endpoint, features, scaler, iteration, cindex) %>%
     mutate(
        features = case_when(
            features == "Age+Sex" ~ "Age+Sex",
            features == "Metabolomics" ~ "MET",
            features == "Age+Sex+Metabolomics" ~ "Age+Sex+MET",
            features == "ASCVD" ~ "ASCVD",
            features == "ASCVD+Metabolomics" ~ "ASCVD+MET"
        )
    ) 

In [None]:
unique(benchmarks_whii$features)

In [None]:
benchmarks_bbmri = bind_rows(benchmarks_LLS, benchmarks_PROSPER, benchmarks_ROTTERDAM) %>%
     mutate(
            features = case_when(
                features == "AgeSex" ~ "Age+Sex",
                features == "MET" ~ "MET",
                features == "AgeSex+MET" ~ "Age+Sex+MET",
                features == "ASCVD" ~ "ASCVD",
                features == "ASCVD+MET" ~ "ASCVD+MET"
            )
        ) %>%
    mutate(
            scaler = case_when(
                scaler == "ukbbscaled" ~ "ukbscaled",
                scaler == "rescaled" ~ "rescaled"
            )
        ) %>%
     select(cohort, endpoint, features, scaler, iteration, cindex_t10) %>% rename(cindex=cindex_t10)
benchmarks_bbmri %>% sample_n(5)

In [None]:
unique(benchmark_cindex_general$score)

In [None]:
benchmarks_UKB1 = benchmark_cindex_general %>% mutate(cohort="UKB") %>% 
    mutate(
        features = case_when(
            score == "COX_Age+Sex" ~ "Age+Sex",
            score == "DS_Metabolomics" ~ "MET",
            score == "DS_Age+Sex+Metabolomics" ~ "Age+Sex+MET",
            score == "COX_ASCVD" ~ "ASCVD",
            score == "DS_ASCVD+Metabolomics" ~ "ASCVD+MET"
        )
    ) %>%
    mutate(scaler="ukbscaled") %>%
    select(cohort, endpoint, features, scaler, iteration, cindex) %>% 
    filter(features==features) %>% mutate(scaler="ukbscaled")

benchmarks_UKB2 = benchmark_cindex_general %>% mutate(cohort="UKB") %>% 
    mutate(
        features = case_when(
            score == "COX_Age+Sex" ~ "Age+Sex",
            score == "DS_Metabolomics" ~ "MET",
            score == "DS_Age+Sex+Metabolomics" ~ "Age+Sex+MET",
            score == "COX_ASCVD" ~ "ASCVD",
            score == "DS_ASCVD+Metabolomics" ~ "ASCVD+MET"
        )
    ) %>%
    mutate(scaler="ukbscaled") %>%
    select(cohort, endpoint, features, scaler, iteration, cindex) %>% 
    filter(features==features) %>% mutate(scaler="rescaled")

benchmarks_UKB = bind_rows(benchmarks_UKB1, benchmarks_UKB2)

In [None]:
benchmarks_all = bind_rows(benchmarks_UKB, benchmarks_bbmri, benchmarks_whii) %>% filter(scaler=="rescaled")

In [None]:
# counts BBMRI
counts_bbmri = bind_rows(benchmarks_LLS, benchmarks_PROSPER, benchmarks_ROTTERDAM) %>%
     mutate(
            features = case_when(
                features == "AgeSex" ~ "Age+Sex",
                features == "MET" ~ "MET",
                features == "AgeSex+MET" ~ "Age+Sex+MET",
                features == "ASCVD" ~ "ASCVD",
                features == "ASCVD+MET" ~ "ASCVD+MET"
            )
        ) %>%
    mutate(
            scaler = case_when(
                scaler == "ukbbscaled" ~ "ukbscaled",
                scaler == "rescaled" ~ "rescaled"
            )
        ) %>% 
    filter(iteration==0)  %>%
    filter(scaler=="rescaled") %>%
     select(cohort, endpoint, features, n, nevent) %>%
    filter(features %in% c("MET", "Age+Sex", "Age+Sex+MET")) #endpoint, features, scaler, iteration, cindex_t10) %>% rename(cindex=cindex_t10)
#counts_bbmri %>% arrange(cohort, endpoint, features) #' %>% filter(cohort!="LLS")#%>% sample_n(5)

In [None]:
# counts UKB
counts_ukb_raw = arrow::read_feather(glue("{data_results_path}/nevents_UKB_220401.feather"))
counts_ukb = counts_ukb_raw %>% filter(group=="age") %>% 
    group_by(endpoint) %>% summarise(n=n(), nevent=sum(event_10)) %>%
    mutate(cohort="UKB") %>% select(cohort, endpoint, n, nevent)

In [None]:
## new 10y
counts_WHII = 
    tribble(
        ~cohort, ~endpoint, ~n, ~nevent, 
        "WHII", "M_all_cause_dementia", 6117, 39,
        "WHII", "M_asthma", 5596, 186,
        "WHII", "M_cerebral_stroke", 5739, 232,
        "WHII", "M_chronic_obstructuve_pulmonary_disease", 6115, 80,
        "WHII", "M_coronary_heart_disease", 5807, 402,
        "WHII", "M_heart_failure", 6096, 64,
        "WHII", "M_MACE", 5619, 304,
        "WHII", "M_parkinsons_disease", 6117, 31,
        "WHII", "M_type_2_diabetes", 5917, 302,
        
)
counts_WHII %>% select(cohort, endpoint, nevent, n)

In [None]:
counts_all = bind_rows(counts_bbmri, counts_ukb, counts_WHII) %>% distinct(cohort, endpoint, n, nevent) %>% 
    mutate(cohort = factor(cohort, levels=c("UKB", "WHII", "RS", "LLS", "PROSPER")))
counts_all

In [None]:
library(ggthemes)
endpoint_map = c(
    'M_MACE'='MACE',
    'M_all_cause_dementia'='Dementia',
    'M_type_2_diabetes'='T2 Diabetes',
    'M_liver_disease'='Liver Disease',
    'M_renal_disease'='Renal Disease',
    'M_atrial_fibrillation'='Atrial Fibrillation',
    'M_heart_failure'= 'Heart Failure',
    'M_coronary_heart_disease'='CHD',
    'M_venous_thrombosis'='Ven. Thrombosis',
    'M_cerebral_stroke'='Cerebral Stroke',
    'M_abdominal_aortic_aneurysm'='AAA',
    'M_peripheral_arterial_disease'='PAD',
    "M_chronic_obstructuve_pulmonary_disease" = "COPD",
    "M_asthma" = "Asthma",
    'M_parkinsons_disease' = "Parkinson's",    
    "M_lung_cancer" = "Lung Cancer",
    "M_non_melanoma_skin_cancer" = "Skin Cancer",
    "M_colon_cancer"= "Colon Cancer",
    "M_rectal_cancer" = "Rectal Cancer",
    "M_prostate_cancer"= "Prostate Cancer",
    "M_breast_cancer" = "Breast Cancer",
    'M_cataracts' = "Cataracts", 
    'M_glaucoma' = "Glaucoma",
    'M_fractures' = "Fractures"
)

endpoint_order = c("M_MACE", "M_coronary_heart_disease", "M_cerebral_stroke", "M_all_cause_dementia", "M_heart_failure", "M_atrial_fibrillation",
                   "M_type_2_diabetes", "M_liver_disease", "M_renal_disease", "M_peripheral_arterial_disease", "M_venous_thrombosis",  "M_abdominal_aortic_aneurysm",
                   "M_chronic_obstructuve_pulmonary_disease", "M_asthma", 'M_parkinsons_disease', 'M_cataracts', 'M_glaucoma', 'M_fractures',
                    "M_lung_cancer","M_non_melanoma_skin_cancer","M_colon_cancer","M_rectal_cancer","M_prostate_cancer","M_breast_cancer"
                   
)

In [None]:
em_wrap = endpoint_map
em_wrap[names(endpoint_map)] <- str_wrap(unname(endpoint_map), 20)
em_wrap

In [None]:
library(ggdist)

In [None]:
library(ggtext)

In [None]:
library(ggforestplot)

In [None]:
library(cowplot)

## Figure 3 - Performance in Context

In [None]:
library(scales)

In [None]:
features_plot = c(
    "MET",
    "Age+Sex",
    "Age+Sex+MET",
    
    "ASCVD",
    "ASCVD+MET"
               )

endpoint_order = c("M_MACE", "M_coronary_heart_disease", "M_cerebral_stroke", "M_all_cause_dementia", "M_heart_failure", "M_atrial_fibrillation",
                   "M_type_2_diabetes", "M_liver_disease", "M_renal_disease", "M_peripheral_arterial_disease", "M_venous_thrombosis",  "M_abdominal_aortic_aneurysm",
                   "M_chronic_obstructuve_pulmonary_disease", "M_asthma", 'M_parkinsons_disease', 'M_cataracts', 'M_glaucoma', 'M_fractures',
                    "M_lung_cancer","M_non_melanoma_skin_cancer","M_colon_cancer","M_rectal_cancer","M_prostate_cancer","M_breast_cancer"
                   
)


temp = benchmarks_all %>% 
   mutate_at(vars(features), list(~ factor(., levels=features_plot))) %>% filter(features==features) %>% filter(features %in% features_plot) %>% 
    mutate(endpoint = factor(endpoint, levels=endpoint_order)) #%>% #filter(endpoint %in% endpoint_selection)
temp_desc = temp %>% group_by(cohort, scaler, endpoint, features) %>% summarise(mean=median(cindex, na.rm=TRUE), max = max(cindex), .groups="drop")

In [None]:
endpoints_plot = c(
"M_cerebral_stroke",
"M_type_2_diabetes",
"M_coronary_heart_disease",
"M_heart_failure",
"M_chronic_obstructuve_pulmonary_disease",
"M_all_cause_dementia",
"M_atrial_fibrillation"#,
#"M_MACE"
#M_asthma",
#"M_parkinsons_disease"
)

In [None]:
color_map <- c('Age+Sex' = colors_dict$pastel$grey$light, 
               'MET' = "#4F8EC1",  #colors_dict$pastel$orange$dark,
               'ASCVD' = colors_dict$pastel$grey$light, 
               'Age+Sex+MET' = "#53dd6c",#colors_dict$pastel$orange$mid,
               'ASCVD+Metabolomics' = "#d8315b"#colors_dict$pastel$red$mid,
      )

In [None]:
base_size = 7
title_size = 7
facet_size = 7
geom_text_size=3
library(ggplot2); 
theme_set(theme_classic(base_size = base_size) + 
          theme(strip.background = element_blank(), plot.title=element_text(size=title_size, hjust=0), 
                strip.text.x = element_text(size = facet_size),axis.title=element_text(size=7), axis.text=element_text(size=7, color="black"), axis.text.x=element_text(size=7, color="black"),
                legend.position="bottom", axis.line = element_line(size = 0.2), axis.ticks=element_line(size=0.2)))

In [None]:
plot_width=4; plot_height=8; plot_dpi=320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_dpi)

counts_temp = counts_all %>% filter(endpoint %in% endpoints_plot) %>% 
    mutate(endpoint = factor(endpoint, levels=endpoint_order))

met_discrimination = temp %>%
    filter(features %in% c("Age+Sex", "MET", "Age+Sex+MET")) %>% 
    filter(scaler=="rescaled") %>%
    filter(endpoint %in% endpoints_plot) %>%
    mutate(endpoint = factor(endpoint, levels=endpoint_order)) %>%
    mutate(cohort = factor(cohort, levels=c("UKB", "WHII", "RS", "LLS", "PROSPER"))) %>%
    #filter(cohort="UKB") %>% 
    ggplot(aes(x=features)) +
    labs(x=NULL, y="Absolute C-Index")+#, title="Metabolomics (orange) vs. Age+Sex (grey) vs. AgeSexMetabolomic (red)")+
    geom_text(data=counts_temp, mapping=aes(label=glue("{nevent}/{n}"), x=2, y=Inf), hjust=0.5, vjust=1, size=2, alpha=0.7)+
    stat_gradientinterval(mapping = aes(y=cindex, , color=features), alpha=0.7, fatten_point=0.8, interval_size_range=c(0.3, 0.6), slab_alpha=0, position="dodge")+
    #geom_hline(data = temp %>% filter(features=="Age+Sex") %>% group_by(endpoint) %>% summarise(mean_ci=mean(cindex)), 
          #     mapping = aes(yintercept=mean_ci), color=color_map[['COX_Age+Sex']], alpha=0.5, linetype="22", size=0.4)+
    #geom_hline(data = temp %>% filter(features=="ASCVD") %>% group_by(endpoint) %>% summarise(mean_ci=mean(cindex)), 
               #mapping = aes(yintercept=mean_ci), color=color_map[['COX_ASCVD']], alpha=0.5, linetype="22", size=0.4)+

    scale_x_discrete()+
    scale_color_manual(values=color_map) + 
    scale_y_continuous(breaks=scales::extended_breaks())+
    #scale_color_manual(values=color_map)+scale_fill_manual(values=color_map)+
    #coord_flip() + 
    #facet_wrap(~endpoint+cohort, scales="free_y", labeller = labeller(endpoint = endpoint_map), ncol=10) +theme(legend.position="none")+
    facet_grid(endpoint~cohort, scales="free_x", labeller = labeller(endpoint = endpoint_map), switch="y") +
    theme(axis.text.x = element_text(angle =90, hjust = 1))+#+#+
    theme(strip.text.y.left = element_text(angle = 90))+
    theme(panel.spacing = unit(0.8, "lines"), 
          panel.grid.major.y = element_line(), 
          axis.text.x = element_text(vjust=0.5),
         legend.position="none", strip.placement = "outside", strip.text.y = element_text(size=7.5))

met_discrimination

## Diff endpoints

In [None]:
temp_wide = temp %>% filter(iteration !=0) %>% 
    filter(scaler=="rescaled") %>%
    filter(features %in% c("MET", "Age+Sex", "Age+Sex+MET")) %>%
    filter(endpoint %in% endpoints_plot) %>%
    pivot_wider(names_from=features, values_from=cindex) 

In [None]:
scores = c('MET', 'Age+Sex', 'Age+Sex+MET')

In [None]:
temp_diff_calc = temp_wide %>% select(-scaler)

score_df = tibble(score1="", score2="")
for (score1 in rev(scores)){
    for (score2 in scores){
        if (score1!=score2){
            score_df_temp = score_df %>% filter(
                (score1==!!score1&score2==!!score2)|
                (score1==!!score2&score2==!!score1)
            )
            if (nrow(score_df_temp)==0){
                score_df = score_df #%>% add_row(score1=score1, score2=score2)
                delta = glue("{score1} vs. {score2}")
                temp_diff_calc = temp_diff_calc %>% mutate(!!delta := !!sym(score1)-!!sym(score2))
            }
            }
    }
}
temp_diff_calc = temp_diff_calc %>% select(-all_of(scores)) 

In [None]:
temp_agg = temp_diff_calc %>% select(cohort, endpoint, iteration, `MET vs. Age+Sex`, `Age+Sex+MET vs. Age+Sex`) %>% 
    pivot_longer(-c(cohort, endpoint, iteration), names_to="comparison", values_to="delta") %>% 
    group_by(cohort, endpoint, comparison) %>% 
    median_qi(delta) %>%
    #filter(endpoint=="M_all_cause_dementia") %>% 
    mutate(significant = case_when((0!=delta & 0 >= `.lower` & 0 <= `.upper`) ~ "ns", TRUE ~ "*")) %>%
    mutate(endpoint=factor(endpoint, levels=endpoint_order)) %>% arrange(endpoint)
temp_agg$endpoint = recode(temp_agg$endpoint, !!!endpoint_map) 

In [None]:
temp_agg %>% 
    filter(comparison %in% c("MET vs. Age+Sex", "Age+Sex+MET vs. Age+Sex")) %>%
    mutate(comparison = factor(comparison, levels=c("MET vs. Age+Sex", "Age+Sex+MET vs. Age+Sex"))) %>%
    mutate(cohort = factor(cohort, levels=c("UKB", "WHII", "RS", "LLS", "PROSPER"))) %>%
    select(endpoint, comparison, everything()) %>% arrange(endpoint, comparison, cohort)

In [None]:
plot_name = "Suppl_Table_PerformanceComparisons"
temp_agg %>% write_csv(glue("outputs/{plot_name}.csv"))

In [None]:
temp %>% filter(iteration !=0) %>% 
    filter(scaler=="rescaled") %>%
    filter(endpoint %in% endpoints_plot) %>%
    filter(cohort == "WHII") %>% arrange(iteration, endpoint, features)

In [None]:
temp %>% filter(iteration !=0) %>% 
    filter(scaler=="rescaled") %>%
    filter(endpoint %in% endpoints_plot) %>%
    filter(cohort == "UKB") %>% arrange(iteration, endpoint, features)

In [None]:
ep_table  = temp %>% select(cohort, scaler, endpoint, features, iteration, cindex) %>% 
    group_by(cohort, scaler, endpoint, features) %>% median_qi(cindex) %>% mutate(result = glue("{round(cindex, 3)} ({round(.lower, 3)}, {round(.upper, 3)})")) %>% 
    select(cohort, scaler, endpoint, features, cindex, result)

In [None]:
agg_table = temp %>% group_by(cohort, scaler, features, iteration) %>% 
    summarise(cindex = mean(cindex)) %>% group_by(cohort, scaler, features) %>% 
    median_qi(cindex) %>% ungroup() %>% mutate(result = glue("{round(cindex, 3)} ({round(.lower, 3)}, {round(.upper, 3)})")) %>% mutate(endpoint = "Overall") %>% 
    select(cohort, scaler, endpoint, features, cindex, result)
agg_table %>% sample_n(5)

In [None]:
perf_table = bind_rows(ep_table, agg_table) %>% select(-cindex) %>% pivot_wider(names_from="features", values_from="result")
perf_table$endpoint = recode(perf_table$endpoint, !!!endpoint_map)
perf_table %>% head(5)

In [None]:
endpoints_plot = c(
"M_cerebral_stroke",
"M_type_2_diabetes",
"M_coronary_heart_disease",
"M_heart_failure",
"M_chronic_obstructuve_pulmonary_disease",
"M_all_cause_dementia",
"M_atrial_fibrillation"#,
#"M_MACE"
#M_asthma",
#"M_parkinsons_disease"
)

## Hazard Ratios

In [None]:
hrs_ukb = read_feather(glue("outputs/ukb_hrs.feather")) %>% ungroup() %>% 
    filter(features %in% c("Metabolomics", "Age+Sex+Metabolomics", "ASCVD+Metabolomics")) %>%
    mutate(HR = exp(coef)) %>% 
    mutate(iteration=as.integer(i)) %>% 
    select(-i) %>%
    arrange(endpoint, features, iteration) %>%
    ungroup() %>% mutate(cohort="UKB") %>%
    select(cohort, endpoint, features, iteration, HR) %>%
    mutate(
            features = case_when(
                features == "Metabolomics" ~ "MET",
                features == "Age+Sex+Metabolomics" ~ "Age+Sex+MET",
                features == "ASCVD+Metabolomics" ~ "ASCVD+MET"
            )
        ) 
hrs_ukb %>% head()

In [None]:
hrs_LLS = fread("ExternalCohorts/LLS_PAROFFS/coxcoeffients_LLS_PAROFFS_220406.csv") %>% 
    mutate(cohort="LLS") %>% mutate(estimate=as.numeric(estimate))
hrs_PROSPER = fread("ExternalCohorts/PROSPER/coxcoeffients_PROSPER_220331.csv") %>% 
    mutate(cohort="PROSPER") %>% mutate(estimate=as.numeric(estimate))
hrs_ROTTERDAM = fread("ExternalCohorts/ROTTERDAM/coxcoeffients_bbmri_220331.csv") %>% 
    mutate(cohort="RS") %>% mutate(estimate=as.numeric(estimate))

In [None]:
hrs_bbmri = bind_rows(hrs_LLS, hrs_PROSPER, hrs_ROTTERDAM) %>% 
    filter(str_detect(term, "logh_"))  %>% 
    mutate(HR=estimate) %>% 
    filter(scaler=="rescaled") %>% 
    select(cohort, endpoint, features, iteration, HR) %>%
    mutate(
            features = case_when(
                features == "AgeSex" ~ "Age+Sex",
                features == "MET" ~ "MET",
                features == "AgeSex+MET" ~ "Age+Sex+MET",
                features == "ASCVD" ~ "ASCVD",
                features == "ASCVD+MET" ~ "ASCVD+MET"
            )
        ) 

In [None]:
hrs_whii_raw = fread("ExternalCohorts/WHII/MET_HRs_metabolomics_whiiscaled.csv") %>% 
    mutate(HR = HR_Metabolomics, cohort="WHII") %>%
    select(cohort, endpoint, features, HR)

In [None]:
purrr::map_dfr(integer(1000), ~ hrs_whii_raw %>% group_by(endpoint, features) %>% sample_n(1), .id="i") -> hrs_whii_boot

In [None]:
hrs_whii = hrs_whii_boot %>% 
    filter(features %in% c("Metabolomics", "Age+Sex+Metabolomics", "ASCVD+Metabolomics")) %>%
    mutate(iteration=as.integer(i)) %>% 
    select(cohort, endpoint, features, HR, iteration) %>% 
    ungroup() %>% mutate(cohort="WHII") %>%
    mutate(
            features = case_when(
                features == "Metabolomics" ~ "MET",
                features == "Age+Sex+Metabolomics" ~ "Age+Sex+MET",
                features == "ASCVD+Metabolomics" ~ "ASCVD+MET"
            )
        ) 
    
    
hrs_whii %>% head()

In [None]:
hrs_all = bind_rows(hrs_ukb, hrs_bbmri, hrs_whii) %>% 
    filter(features %in% c("MET", "Age+Sex+MET", "ASCVD+MET")) %>% 
    filter(endpoint %in% endpoints_plot) %>%
    mutate(endpoint = factor(endpoint, levels=endpoint_order))  %>%
    mutate(cohort = factor(cohort, levels=c("UKB", "WHII", "RS", "LLS", "PROSPER")))

In [None]:
hrs_all %>% group_by(cohort, features) %>% tally()

In [None]:
temp_hrs_agg = hrs_all %>% 
    filter(iteration!=0) %>%
    group_by(cohort, endpoint, features) %>% median_qi(HR, na.rm=TRUE) %>% rename(hr = HR) %>%
    mutate(hr = round(hr, 2), .lower=round(.lower, 2), .upper=round(.upper, 2)) %>%#c(hr, .lower, .upper), round, 1) %>%
    mutate(HR_Metabolomics = glue("{hr} ({.lower}, {.upper})")) %>% 
    select(cohort, endpoint, features, HR_Metabolomics) %>%
    pivot_wider(names_from="features", values_from="HR_Metabolomics") %>% 
    select(all_of(c(
        "endpoint", 
        "cohort",
                    'Age+Sex+MET' 
                    #'SCORE2+Metabolomics', 
                   # 'ASCVD+MET'
    ))) %>% 
    mutate(endpoint = factor(endpoint, levels = endpoint_order)) %>% arrange(endpoint) %>%
    mutate(endpoint = recode(endpoint, !!!endpoint_map)) %>%
    ungroup() %>% arrange(endpoint, cohort)
temp_hrs_agg

In [None]:
hr_labels = temp_hrs_agg %>% 
    pivot_longer(-c(endpoint, cohort), names_to="features", values_to="HR")
hr_labels

In [None]:
temp = hrs_all %>% 
    filter(endpoint %in% endpoints_plot)  %>%  
    mutate(endpoint = factor(endpoint, levels = endpoint_order)) %>% 
    arrange(endpoint) %>%
    mutate(endpoint = recode(endpoint, !!!endpoint_map)) %>% 
    filter(features %in% c("Age+Sex+MET")) %>%
    mutate(features = factor(features, levels=c("Age+Sex+MET"))) %>%
    ungroup() %>%
    mutate(coef = log(HR))
    
temp %>% head()

In [None]:
## 66%, 95%
#endpoint_order_panel = (temp_wide %>% group_by(endpoint) %>% mutate(delta = `DS_PANEL+Metabolomics`-`COX_PANEL`) %>% summarise(delta=mean(delta)) %>% arrange(delta))$endpoint
plot_width=4.25; plot_height=8; plot_dpi=320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_dpi)

hr_plot = ggplot(
    temp, aes(x=fct_rev(cohort), y=HR, group=cohort))+
    labs(x=NULL, y="Adjusted Hazard Ratio / SD Metabolomic State")+
    #geom_boxplot()+
    stat_gradientinterval(color="black", alpha=0.5, fatten_point=0.8, interval_size_range=c(0.3, 0.6), fill=NA)+
    geom_hline(yintercept=1, alpha=0.7, color="black", size=0.25)+
    geom_hline(yintercept=1.5, alpha=0.3, color="black", size=0.25, linetype="22")+
    geom_hline(yintercept=2, alpha=0.4, color="black", size=0.25, linetype="22")+
    geom_hline(yintercept=3, alpha=0.5, color="black", size=0.25, linetype="22")+
    scale_x_discrete()+
    coord_flip(ylim=c(-0.3, 4.1))+#ylim=c(-0.02, 0.02))+
    #scale_y_continuous()+ 
    theme(plot.title = element_text(hjust=0.5))+
    facet_grid(endpoint ~ ., space="free", scales="free_y") +
      theme(strip.placement = "outside")+
    theme(strip.text.y.left = element_text(angle = 90), strip.text.y = element_text(size=7.5))+
    theme(axis.text.y = element_text(size=5.5))+
    geom_text(data=hr_labels, mapping=aes(x=cohort, label=glue("HR: {HR}")), 
            #position=position_dodge(width = .9), 
             y=-0.4, size=2, hjust=0)
hr_plot

In [None]:
plot_width=8.25; plot_height=8; plot_dpi=320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_dpi)
ext_val = (met_discrimination | hr_plot) + plot_layout(width=c(4, 4.25))
ext_val

In [None]:
# add events etc!!!

In [None]:
library(gt)
plot_name = "Suppl_Figures_4_ExternalValidation"
ext_val %>% ggsave(filename=glue("outputs/{plot_name}.pdf"), device="pdf", width=plot_width, height=plot_height, dpi=plot_dpi)