## Initialize

In [None]:
#library(Rmisc)
library(tidyverse)
library(glue)
library(arrow)
library(patchwork)

In [None]:
if (grepl("sc", Sys.info()[["nodename"]], fixed=TRUE)) {
    base_path = "/sc-projects/sc-proj-ukb-cvd"
} else {
    base_path = "/data/analysis/ag-reils/ag-reils-shared/cardioRS"}
print(base_path)

dataset_name = "210714_metabolomics"
path = "/data/analysis/ag-reils/steinfej/code/umbrella/pre/ukbb"
data_path = glue("{base_path}/data")
dataset_path = glue("{data_path}/3_datasets_post/{dataset_name}")

project_label="21_metabolomics_multitask"
project_path = glue("{base_path}/results/projects/{project_label}")
figures_path = glue("{project_path}/figures")
data_results_path = glue("{project_path}/data")

## Load data

In [None]:
list.dirs(path = project_path, full.names = TRUE, recursive = TRUE)

In [None]:
run = "220126"

In [None]:
DSM = "MultiTaskSurvivalTraining"
data = arrow::read_feather(glue("{dataset_path}/data_merged.feather")) 
data_description = arrow::read_feather(glue("{dataset_path}/description_merged.feather"))
predictions = arrow::read_feather(glue("{data_results_path}/predictions_{run}_metabolomics.feather")) 
loghazards = arrow::read_feather(glue("{data_results_path}/loghazards_model_{run}_metabolomics.feather")) %>% 
    pivot_longer(starts_with("logh"), names_to=c("endpoint", "features"), values_to="logh", names_pattern="logh_?(.*)_(.*)$")

In [None]:
data_events = data %>% select(eid, ends_with("event"), ends_with("event_time")) %>% 
    pivot_longer(-eid, names_to=c("endpoint", "type"), values_to="value", names_pattern="(.*)(event_time|event)") %>% 
    mutate(endpoint = stringr::str_sub(endpoint, end=-2)) %>% pivot_wider(names_from="type", values_from="value")

In [None]:
loghazards_tte = loghazards %>% left_join(data_events, by=c("endpoint", "eid"))

In [None]:
logh_T = loghazards_tte %>% filter(split=="test") %>% mutate(hr=exp(logh))

In [None]:
logh_T = logh_T %>% group_by(endpoint, features) %>% mutate(logh_perc = ntile(logh, 100))
logh_T_agg = logh_T %>% group_by(endpoint, features) %>% mutate(logh_perc = ntile(logh, 100)) %>% group_by(endpoint, features, logh_perc) %>% summarise(ratio = mean(event))

In [None]:
labels = logh_T %>% group_by(endpoint, event) %>% summarise(median_logh = mean(logh)) %>% pivot_wider(names_from="event", values_from="median_logh") %>% mutate(delta = `1`-`0`) %>% arrange(desc(delta))

In [None]:
scores = c("COX_Age+Sex", 
                "COX_Metabolomics",
                "DS_Metabolomics",
                "DS_Age+Sex+Metabolomics",
               "DS_AgeSexMetabolomics")

In [None]:
library(ggalt)

In [None]:
library("jsonlite")
colors_path = "colors.json"
colors_dict = read_json(colors_path)

In [None]:
color_map <- c("all" = "grey", "none" = "black",
               
               'COX_Age+Sex' = colors_dict$pastel$grey$light, 
               
               'PCA_Metabolomics' = "#4F8EC1",
               'COX_Metabolomics' = "#4F8EC1",
               'DS_Metabolomics' = "#4F8EC1",  #colors_dict$pastel$orange$dark,
               
               'COX_ASCVD' = colors_dict$pastel$grey$light, 
               'COX_PANEL' = colors_dict$pastel$grey$light, 

               'DS_Age+Sex+Metabolomics' = "#53dd6c",#colors_dict$pastel$orange$mid,
               'DS_SCORE2+Metabolomics' = colors_dict$pastel$red$mid,
               'DS_ASCVD+Metabolomics' = "#d8315b",#colors_dict$pastel$red$mid,
               'DS_PANEL+Metabolomics' = "#1e1b18" #colors_dict$pastel$red$dark
      )

In [None]:
scores_full = names(color_map)

In [None]:
name = glue("benchmark_cindex_{run}")
benchmark_cindex_general = read_feather(glue("{data_results_path}/{name}.feather"))  %>% distinct() %>% unite("score", c(module, features), remove=FALSE) %>%  distinct()

In [None]:
base_size = 8
title_size = 8
facet_size = 8
geom_text_size=3
library(ggplot2); 
theme_set(theme_classic(base_size = base_size) + 
          theme(strip.background = element_blank(), plot.title=element_text(size=title_size, hjust=0), 
                strip.text.x = element_text(size = facet_size),axis.title=element_text(size=8), axis.text=element_text(size=8, color="black"), axis.text.x=element_text(size=8, color="black"),
                legend.position="bottom", axis.line = element_line(size = 0.2), axis.ticks=element_line(size=0.2)))

In [None]:
library(ggthemes)
endpoint_map = c(
    'M_MACE'='MACE',
    'M_all_cause_dementia'='Dementia',
    'M_type_2_diabetes'='T2 Diabetes',
    'M_liver_disease'='Liver Disease',
    'M_renal_disease'='Renal Disease',
    'M_atrial_fibrillation'='Atrial Fibrillation',
    'M_heart_failure'= 'Heart Failure',
    'M_coronary_heart_disease'='CHD',
    'M_venous_thrombosis'='Ven. Thrombosis',
    'M_cerebral_stroke'='Cerebral Stroke',
    'M_abdominal_aortic_aneurysm'='AAA',
    'M_peripheral_arterial_disease'='PAD',
    "M_chronic_obstructuve_pulmonary_disease" = "COPD",
    "M_asthma" = "Asthma",
    'M_parkinsons_disease' = "Parkinson's",    
    "M_lung_cancer" = "Lung Cancer",
    "M_non_melanoma_skin_cancer" = "Skin Cancer",
    "M_colon_cancer"= "Colon Cancer",
    "M_rectal_cancer" = "Rectal Cancer",
    "M_prostate_cancer"= "Prostate Cancer",
    "M_breast_cancer" = "Breast Cancer",
    'M_cataracts' = "Cataracts", 
    'M_glaucoma' = "Glaucoma",
    'M_fractures' = "Fractures"
)

endpoint_order = c("M_MACE", "M_coronary_heart_disease", "M_cerebral_stroke", "M_all_cause_dementia", "M_heart_failure", "M_atrial_fibrillation",
                   "M_type_2_diabetes", "M_liver_disease", "M_renal_disease", "M_peripheral_arterial_disease", "M_venous_thrombosis",  "M_abdominal_aortic_aneurysm",
                   "M_chronic_obstructuve_pulmonary_disease", "M_asthma", 'M_parkinsons_disease', 'M_cataracts', 'M_glaucoma', 'M_fractures',
                    "M_lung_cancer","M_non_melanoma_skin_cancer","M_colon_cancer","M_rectal_cancer","M_prostate_cancer","M_breast_cancer"
                   
)

In [None]:
em_wrap = endpoint_map
em_wrap[names(endpoint_map)] <- str_wrap(unname(endpoint_map), 20)
em_wrap

In [None]:
options(repr.plot.width = 8, repr.plot.height = 8)
library(ggbeeswarm)
scores_plot = names(color_map)

temp = benchmark_cindex_general %>% 
    mutate_at(vars(score), list(~ factor(., levels=scores_full))) %>% filter(score==score) %>% filter(score %in% scores_plot)
temp_wide = temp %>% select(-module, -features) %>% pivot_wider(names_from="score", values_from="cindex") %>% mutate(delta=`DS_Metabolomics`-`COX_Age+Sex`)
temp_desc = temp %>% group_by(endpoint, score) %>% summarise(mean=median(cindex, na.rm=TRUE), max = max(cindex), .groups="drop")
temp_desc = temp_desc %>% select(-max) %>% pivot_wider(names_from="score", values_from="mean") %>% mutate(delta=`DS_Metabolomics`-`COX_Age+Sex`) %>% mutate(endpoint=fct_reorder(endpoint, desc(delta)))
endpoint_order_desc = levels(temp_desc$endpoint)

In [None]:
library(ggdist)

In [None]:
library(ggtext)

In [None]:
library(ggforestplot)

In [None]:
library(cowplot)

In [None]:
scores_map = c(
    "DS_Metabolomics" = "MET",
    
    "COX_Age+Sex" = "AgeSex",
    "DS_Age+Sex+Metabolomics" = "AgeSex+MET",
    
    "COX_ASCVDnoblood" = "ASCVD(noblood)",
    "DS_ASCVDnoblood+Metabolomics" = "ASCVD(noblood)+MET",
    
    "COX_ASCVD" = "ASCVD",
    "DS_ASCVD+Metabolomics" = "ASCVD+MET",
    
     "COX_PANELnoblood" = "PANEL(noB)", 
    "DS_PANELnoblood+Metabolomics" = "PANEL(noB)+MET",
    
    "COX_PANELjustbloodcount" = "PANEL(jBC)", 
    "DS_PANELjustbloodcount+Metabolomics" = "PANEL(jBC)+MET",
    
    "COX_PANEL" = "PANEL", 
    "DS_PANEL+Metabolomics" = "PANEL+MET"
               )

## Figure 3 - Performance in Context

In [None]:
library(scales)

In [None]:
scores_plot = c(
    "DS_Metabolomics",
    "COX_Age+Sex",
    "DS_Age+Sex+Metabolomics",
    
    "COX_ASCVD",
    "DS_ASCVD+Metabolomics",   
    
    "COX_PANEL",
    "DS_PANEL+Metabolomics"
               )

endpoint_order = c("M_MACE", "M_coronary_heart_disease", "M_cerebral_stroke", "M_all_cause_dementia", "M_heart_failure", "M_atrial_fibrillation",
                   "M_type_2_diabetes", "M_liver_disease", "M_renal_disease", "M_peripheral_arterial_disease", "M_venous_thrombosis",  "M_abdominal_aortic_aneurysm",
                   "M_chronic_obstructuve_pulmonary_disease", "M_asthma", 'M_parkinsons_disease', 'M_cataracts', 'M_glaucoma', 'M_fractures',
                    "M_lung_cancer","M_non_melanoma_skin_cancer","M_colon_cancer","M_rectal_cancer","M_prostate_cancer","M_breast_cancer"
                   
)


temp = benchmark_cindex_general %>% 
   mutate_at(vars(score), list(~ factor(., levels=scores_plot))) %>% filter(score==score) %>% filter(score %in% scores_plot) %>% 
    mutate(endpoint = factor(endpoint, levels=endpoint_order)) #%>% #filter(endpoint %in% endpoint_selection)
temp_desc = temp %>% group_by(endpoint, score) %>% summarise(mean=median(cindex, na.rm=TRUE), max = max(cindex), .groups="drop")

## Diff endpoints

In [None]:
ep_table  = temp %>% select(endpoint, features, iteration, cindex) %>% 
    group_by(endpoint, features) %>% median_qi(cindex) %>% mutate(result = glue("{round(cindex, 3)} ({round(.lower, 3)}, {round(.upper, 3)})")) %>% 
    select(endpoint, features, cindex, result)

In [None]:
agg_table = temp %>% group_by(features, iteration) %>% 
    summarise(cindex = mean(cindex)) %>% group_by(features) %>% 
    median_qi(cindex) %>% ungroup() %>% mutate(result = glue("{round(cindex, 3)} ({round(.lower, 3)}, {round(.upper, 3)})")) %>% mutate(endpoint = "Overall") %>% 
    select(endpoint, features, cindex, result)
agg_table

In [None]:
perf_table = bind_rows(ep_table, agg_table) %>% select(-cindex) %>% pivot_wider(names_from="features", values_from="result")
perf_table$endpoint = recode(perf_table$endpoint, !!!endpoint_map)
perf_table

In [None]:
plot_width=8.25; plot_height=5.5; plot_dpi=320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_dpi)

met_discrimination = ggplot(temp, aes(x=score)) +
    labs(x=NULL, y="Absolute C-Index")+#, title="Metabolomics (orange) vs. Age+Sex (grey) vs. AgeSexMetabolomic (red)")+
    stat_gradientinterval(mapping = aes(y=cindex, color=score, fill=score), alpha=0.7, fatten_point=0.8, interval_size_range=c(0.3, 0.6), slab_alpha=0)+
    geom_hline(data = temp %>% filter(features=="Age+Sex") %>% group_by(endpoint) %>% summarise(mean_ci=mean(cindex)), 
               mapping = aes(yintercept=mean_ci), color=color_map[['COX_Age+Sex']], alpha=0.5, linetype="22", size=0.4)+
    geom_hline(data = temp %>% filter(features=="ASCVD") %>% group_by(endpoint) %>% summarise(mean_ci=mean(cindex)), 
               mapping = aes(yintercept=mean_ci), color=color_map[['COX_ASCVD']], alpha=0.5, linetype="22", size=0.4)+
    geom_hline(data = temp %>% filter(features=="PANEL") %>% group_by(endpoint) %>% summarise(mean_ci=mean(cindex)), 
               mapping = aes(yintercept=mean_ci), color=color_map[['COX_PANEL']], alpha=0.5, linetype="22", size=0.4)+

    scale_x_discrete(labels=scores_map)+
    scale_y_continuous(breaks=scales::extended_breaks())+
    scale_color_manual(values=color_map)+scale_fill_manual(values=color_map)+
    #coord_flip() + 
    facet_wrap(~endpoint, scales="free_y", labeller = labeller(endpoint = endpoint_map), ncol=6) +theme(legend.position="none")+theme(axis.text.x = element_text(angle =90, hjust = 1))+#+#+
    theme(panel.spacing = unit(0.8, "lines"), 
          #panel.grid.major.y = element_line(), 
          axis.text.x = element_text(size=5.5, hjust=1))

met_discrimination

In [None]:
library(gt)
plot_name = "Figures_3_A_AbsoluteCindex"
met_discrimination %>% ggsave(filename=glue("outputs/{plot_name}.pdf"), device="pdf", width=plot_width, height=plot_height, dpi=plot_dpi)

In [None]:
plot_width=2.66; plot_height=2.5; plot_dpi=320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_dpi)
met_agesex_context = ggplot(temp_wide, aes(x=factor(endpoint, levels=endpoint_order_desc))) +
    labs(x=NULL, y="\u0394 C-Index", title="Age+Sex")+#, title="Metabolomics (orange) vs. Age+Sex (grey) vs. AgeSexMetabolomic (red)")+
    geom_hline(yintercept=0, alpha=0.7, color=color_map[['COX_Age+Sex']], size=0.5, linetype="22")+
    geom_vline(xintercept=21.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=18.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=15.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=12.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=9.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=6.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=3.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    stat_gradientinterval(mapping = aes(y=`DS_Metabolomics`-`COX_Age+Sex`), 
                 color=color_map[['DS_Metabolomics']], fill=color_map[['DS_Metabolomics']], fatten_point=0.8, interval_size_range=c(0.3, 0.6), alpha=0.7, position = position_nudge(x = 0.4), slab_alpha=0)+
    #geom_quasirandom(mapping = aes(y=`DS_Age+Sex+Metabolomics`-`COX_Age+Sex`), color=alpha(color_map[['DS_Age+Sex+Metabolomics']], 0.1), size=0.1)+
    stat_gradientinterval(mapping = aes(y=`DS_Age+Sex+Metabolomics`-`COX_Age+Sex`), 
                 color=color_map[['DS_Age+Sex+Metabolomics']], fill=color_map[['DS_Age+Sex+Metabolomics']], fatten_point=0.8, interval_size_range=c(0.3, 0.6), alpha=1, 
                          position = position_nudge(x = 0.15), slab_alpha=0)+
    scale_x_discrete(labels=em_wrap)+coord_flip(ylim=(c(-0.22, 0.22)))+
    scale_y_continuous(minor_breaks=seq(-0.24, +0.24, 0.02))+ theme(plot.title = element_text(hjust=0.5))

In [None]:
## 66%, 95%
endpoint_order_agesex = (temp_wide %>% group_by(endpoint) %>% mutate(delta = `DS_Age+Sex+Metabolomics`-`COX_Age+Sex`) %>% summarise(delta=mean(delta)) %>% arrange(delta))$endpoint
#endpoint_order_panel = (temp_wide %>% group_by(endpoint) %>% mutate(delta = `DS_ASCVD+Metabolomics`-`COX_ASCVD`) %>% summarise(delta=mean(delta)) %>% arrange(delta))$endpoint
plot_width=8; plot_height=4; plot_dpi=320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_dpi)
met_ascvd_context = ggplot(temp_wide, aes(x=factor(endpoint, levels=endpoint_order_desc)))+#levels=endpoint_order_desc))) +
    labs(x=NULL, y="\u0394 C-Index", title="ASCVD")+#, title="Metabolomics (orange) vs. Age+Sex (grey) vs. AgeSexMetabolomic (red)")+
    geom_hline(yintercept=0, alpha=0.7, color=color_map[['COX_Age+Sex']], size=0.5, linetype="22")+
    geom_vline(xintercept=21.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=18.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=15.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=12.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=9.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=6.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=3.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    stat_gradientinterval(mapping = aes(y=`DS_Age+Sex+Metabolomics`-`COX_ASCVD`), 
                          color=color_map[['DS_Age+Sex+Metabolomics']], fill=color_map[['DS_Age+Sex+Metabolomics']], alpha=1, fatten_point=0.8, interval_size_range=c(0.3, 0.6), position = position_nudge(x = 0.2),
                            slab_alpha=0)+#, position = position_nudge(x = 0))+

    stat_gradientinterval(mapping = aes(y=`DS_ASCVD+Metabolomics`-`COX_ASCVD`), 
                          color=color_map[['DS_ASCVD+Metabolomics']],#color_map[['DS_ASCVD+Metabolomics']], 
                         fill=color_map[['DS_ASCVD+Metabolomics']],#color_map[['DS_Age+Sex+Metabolomics']], 
                         alpha=1, fatten_point=0.8, interval_size_range=c(0.3, 0.6), position = position_nudge(x = 0), slab_alpha=0)+

    scale_x_discrete(labels=em_wrap)+coord_flip(ylim=c(-0.1, 0.1))+
    scale_y_continuous(minor_breaks=seq(-0.24, +0.24, 0.02))+ theme(plot.title = element_text(hjust=0.5))#+theme(panel.grid.major.y=element_line(size=0.2))#labels = function(x) str_replace(x, '0(?=.)', ''))# + 

In [None]:
## 66%, 95%
#endpoint_order_panel = (temp_wide %>% group_by(endpoint) %>% mutate(delta = `DS_PANEL+Metabolomics`-`COX_PANEL`) %>% summarise(delta=mean(delta)) %>% arrange(delta))$endpoint
plot_width=8; plot_height=4; plot_dpi=320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_dpi)
endpoint_order_panel = (temp_wide %>% group_by(endpoint) %>% 
                        mutate(delta = `DS_PANEL+Metabolomics`-`COX_PANEL`) %>% 
                        summarise(delta=mean(delta)) %>% arrange(delta))$endpoint
met_panel_context = ggplot(temp_wide, aes(x=factor(endpoint, levels=endpoint_order_desc)))+#levels=endpoint_order_desc))) +
    labs(x=NULL, y="\u0394 C-Index", title="PANEL")+#, title="Metabolomics (orange) vs. Age+Sex (grey) vs. AgeSexMetabolomic (red)")+
    geom_hline(yintercept=0, alpha=0.7, color=color_map[['COX_Age+Sex']], size=0.5, linetype="22")+
    geom_vline(xintercept=21.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=18.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=15.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=12.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=9.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=6.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+
    geom_vline(xintercept=3.5, alpha=0.3, color=color_map[['COX_Age+Sex']], size=0.2)+

    stat_gradientinterval(mapping = aes(y=`DS_ASCVD+Metabolomics`-`COX_PANEL`), 
                          color=color_map[['DS_ASCVD+Metabolomics']],#color_map[['DS_ASCVD+Metabolomics']], 
                         fill=color_map[['DS_ASCVD+Metabolomics']],#color_map[['DS_Age+Sex+Metabolomics']], 
                         alpha=1, fatten_point=0.8, interval_size_range=c(0.3, 0.6), position = position_nudge(x = 0), slab_alpha=0)+

    stat_gradientinterval(mapping = aes(y=`DS_PANEL+Metabolomics`-`COX_PANEL`), 
                          color=color_map[['DS_PANEL+Metabolomics']], fill=color_map[['DS_PANEL+Metabolomics']], alpha=1, fatten_point=0.8, interval_size_range=c(0.3, 0.6), 
                          position = position_nudge(x = -0.2), slab_alpha=0)+

    scale_x_discrete(labels=em_wrap)+coord_flip(ylim=c(-0.04, 0.04))+
    scale_y_continuous(minor_breaks=seq(-0.24, +0.24, 0.02))+ theme(plot.title = element_text(hjust=0.5))#+theme(panel.grid.major.y=element_line(size=0.2))#labels = function(x) str_replace(x, '0(?=.)', ''))# + 

In [None]:
endpoint_order_panel

In [None]:
plot_width=8.25; plot_height=5.25; plot_dpi=320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_dpi)
met_context = (met_agesex_context + theme(panel.grid.minor.x=element_line(size=0.2), panel.grid.major.x=element_line(size=0.2))|#+ annotate("segment", x=24.5, xend=24.5, y=0.15, yend=0.17, size=1.2)|
               (met_ascvd_context +theme(axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.line.y = element_blank()) + theme(panel.grid.minor.x=element_line(size=0.2)))|#+ annotate("segment", x=24.5, xend=24.5, y=0.05, yend=0.07, size=1.2))|
               (met_panel_context+ theme(axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.line.y = element_blank()) + theme(panel.grid.minor.x=element_line(size=0.2), panel.grid.major.x=element_line(size=0.2)))# + annotate("segment", x=24.5, xend=24.5, y=0.02, yend=0.04, size=1.2))
               ) + 
plot_layout(width=c(1, 1, 1))
met_context# & coord_flip(ylim=c(-0.22, 0.22))

In [None]:
library(gt)
plot_name = "Figures_3_BCD_RelativeCindexDeltas"
met_context %>% ggsave(filename=glue("outputs/{plot_name}.pdf"), device=cairo_pdf, width=plot_width, height=plot_height, dpi=plot_dpi)

### Differences

In [None]:
temp_diff = temp_wide %>% select(-time, -delta, -PCA_Metabolomics, -COX_Metabolomics)
colnames(temp_diff) = str_remove_all(colnames(temp_diff), "DS_|COX_")
scores = c('Metabolomics', 'Age+Sex', 'Age+Sex+Metabolomics', 'ASCVD', 'ASCVD+Metabolomics', 'PANEL', 'PANEL+Metabolomics')

In [None]:
temp_diff_calc = temp_diff

score_df = tibble(score1="", score2="")
for (score1 in scores){
    for (score2 in scores){
        if (score1!=score2){
            score_df_temp = score_df %>% filter(
                (score1==!!score1&score2==!!score2)|
                (score1==!!score2&score2==!!score1)
            )
            if (nrow(score_df_temp)==0){
                score_df = score_df %>% add_row(score1=score1, score2=score2)
                delta = glue("{score1} vs. {score2}")
                temp_diff_calc = temp_diff_calc %>% mutate(!!delta := !!sym(score1)-!!sym(score2))
            }
            }
    }
}
temp_diff_calc = temp_diff_calc %>% select(-all_of(scores))

In [None]:
temp_agg = temp_diff_calc %>% 
    pivot_longer(-c(endpoint, iteration), names_to="comparison", values_to="delta") %>% group_by(endpoint, comparison) %>% 
    median_qi(delta) %>%
    mutate(different = case_when((0!=delta & 0 >= `.lower` & 0 <= `.upper`) ~ "", TRUE ~ "*")) %>%
    mutate(delta_cindex = glue("{round(delta, 3)} ({round(.lower, 3)}, {round(.upper, 3)}){different}")) %>%
    select(endpoint, comparison, delta_cindex) %>% pivot_wider(names_from="comparison", values_from="delta_cindex")  %>% mutate(endpoint=factor(endpoint, levels=endpoint_order)) %>% arrange(endpoint)
temp_agg$endpoint = recode(temp_agg$endpoint, !!!endpoint_map)

In [None]:
plot_name = "Suppl_Table_PerformanceComparisons"
temp_agg %>% write_csv(glue("outputs/{plot_name}.csv"))

## Suppl Figure 1

In [None]:
plot_width = 8.25; plot_height=4; plot_dpi=320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_dpi)

scores_plot = c(
    "PCA_Metabolomics",
    "COX_Metabolomics",
    "DS_Metabolomics"
               )

temp = benchmark_cindex_general %>% 
   mutate_at(vars(score), list(~ factor(., levels=scores_plot))) %>% filter(score==score) %>% filter(score %in% scores_plot) %>% mutate(endpoint = factor(endpoint, levels=endpoint_order))
temp_desc = temp %>% group_by(endpoint, score) %>% summarise(mean=median(cindex, na.rm=TRUE), max = max(cindex), .groups="drop") %>% mutate(score=str_remove_all(score, "_Metabolomics"))

supplfig1c = ggplot(temp, aes(x=factor(module, levels=c("PCA", "COX", "DS")))) +
    labs(x=NULL, y="Absolute C-Index")+

    stat_gradientinterval(mapping = aes(y=cindex, fill=score), 
                          color=color_map[['DS_Metabolomics']], 
                          alpha=1, fatten_point=0.8, interval_size_range=c(0.3, 0.6), slab_alpha=0)+
    scale_y_continuous()+
    scale_x_discrete(labels=endpoint_map)+scale_color_manual(values=color_map)+scale_fill_manual(values=color_map)+#coord_flip() + 
    facet_wrap(~endpoint, scales="free_y", labeller = labeller(endpoint = endpoint_map), ncol=6) + theme(legend.position="none", panel.grid.major.y=element_line(size=0.2))
supplfig1c

In [None]:
library(gt)
plot_name = "Suppl_Figures_2_A_PCACOXDS"
supplfig1c %>% ggsave(filename=glue("outputs/{plot_name}.pdf"), device="pdf", width=plot_width, height=plot_height, dpi=plot_dpi)