In [None]:
library(ggplot2)
library(dtplyr)
library(tidyverse)
library(glue)
library(arrow)
library(patchwork)
library(data.table)
library("jsonlite")
library(ggthemes)
library(stringr)
library(ggtext)
library(ggdist)

In [None]:
base_path = "/sc-projects/sc-proj-ukb-cvd/results/projects/22_medical_records/data/220823_allofus/230502_revision"
base_path

In [None]:
base_size = 8
title_size = 10
facet_size = 9
geom_text_size=3
theme_set(theme_classic(base_size = base_size) +
          theme(strip.background = element_blank(), plot.title=element_text(size=title_size, hjust=0),
                strip.text.x = element_text(size = facet_size),axis.title=element_text(size=10), axis.text=element_text(size=8, color="black"),
                legend.position="bottom", axis.line = element_line(size = 0.2), axis.ticks=element_line(size=0.2), panel.grid.major.y=element_line()))

In [None]:
endpoint_defs = arrow::read_feather(glue("{base_path}/phecode_defs_220306.feather")) %>% arrange(endpoint)
endpoints_md = fread(glue("{base_path}/endpoints.csv"), colClasses=c("phecode"="character"))
endpoints = sort(endpoints_md$endpoint)

glue("{base_path}/phecode_defs_220306.feather")

In [None]:
endpoint_map = endpoint_defs$phecode_string
names(endpoint_map) =  endpoint_defs$endpoint
endpoint_order = (endpoint_defs %>% arrange(as.numeric(phecode)))$endpoint

In [None]:
base_size = 8
title_size = 10
facet_size = 9
geom_text_size=3
theme_set(theme_classic(base_size = base_size) +
          theme(strip.background = element_blank(), plot.title=element_text(size=title_size, hjust=0),
                strip.text.x = element_text(size = facet_size),axis.title=element_text(size=10), axis.text=element_text(size=8, color="black"),
                legend.position="bottom", axis.line = element_line(size = 0.2), axis.ticks=element_line(size=0.2), panel.grid.major.y=element_line()))

In [None]:
endpoint_selection = c(

   'phecode_401', #  "Hypertension", # intervention
    'phecode_202', #  "Diabetes mellitus", # intervention
    'phecode_416-21', #  "Atrial fibrillation", # intervention
    'phecode_468', #  "Pneumonia", # intervention
    'phecode_474', #  "Chronic obstructive pulmonary disease [COPD]", # interventio
    'phecode_583', #  "Chronic kidney disease", # intervention

    'phecode_404', #  "Ischemic heart disease",
    'phecode_404-1', #  "Myocardial infarction [Heart attack]", # intervention
    'phecode_431-11', #  "Cerebral infarction [Ischemic stroke]",
    'phecode_424', #  "Heart failure", # intervention
    'phecode_420', #  "Cardiac arrest", # intervention
    'OMOP_4306655', #  "All-Cause Death", # intervention

    'phecode_438-11',   #  "Abdominal aortic aneurysm",
    'phecode_440-3',#  "Pulmonary embolism", # intervention
    'phecode_413-21',#  "Aortic stenosis", # intervention
    'phecode_413-11', #  "Mitral valve insufficiency",
    'phecode_410-2',#  "Endocarditis",
    'phecode_400',#  "Rheumatic fever and chronic rheumatic heart diseases",

    'phecode_164', #  "Anemia", # intervention
    'phecode_718',  #  "Back pain", # intervention
    'phecode_324-11', #  "Parkinson's disease (Primary)",
    'phecode_705-1', #  "Rheumatoid arthritis", # NEW + interventio
    'phecode_665', #  "Psoriasis", # interesting
    'phecode_284'#  "Suicide ideation and attempt or self harm" # intervention
)
endpoint_defs = endpoint_defs %>%
    mutate(name = phecode_string) %>%
    mutate(name =
           case_when(
               phecode_string == "Myocardial infarction [Heart attack]"~"Myocardial infarction",
               phecode_string == "Cerebral infarction [Ischemic stroke]"~"Ischemic stroke",
               phecode_string == "Chronic obstructive pulmonary disease [COPD]"~"COPD",
               phecode_string == "Mitral valve insufficiency"~"Mitral insufficiency",
               phecode_string == "Parkinson's disease (Primary)"~"Parkinson's",
               phecode_string == "Suicide ideation and attempt or self harm"~"Suicide attempt",
               phecode_string == "Ischemic heart disease"~"Ischemic HD",
               phecode_string == "Chronic kidney disease"~"Chronic KD",
               phecode_string == "Rheumatic fever and chronic rheumatic heart diseases"~"Rheumatic HD",
               phecode_string == "Abdominal aortic aneurysm"~"Abdominal AA",
                  TRUE ~ name)
           )

endpoint_map = endpoint_defs$name
names(endpoint_map) =  endpoint_defs$endpoint
#endpoint_order = (endpoint_defs %>% arrange(as.numeric(phecode)))$endpoint
endpoint_order = endpoint_selection

In [None]:
name = "230503_bootstrap_results_revision_ensemble"
benchmark_endpoints = arrow::read_feather(glue("{base_path}/{name}.feather"))

In [None]:
incident_events = read_csv(glue("{base_path}/../221011_incident_events.csv"))
incident_events = incident_events %>% rename(endpoint = "...1")
incident_events = setDT(incident_events)[, endpoint := str_replace(endpoint, "\\.", "-")] %>% as_tibble()

In [None]:
incident_events %>% filter(num_incident_events >= 100)

In [None]:
benchmark_endpoints = benchmark_endpoints %>% group_by(endpoint, uuid) %>% filter(all(!is.na(cindex))) %>% ungroup()
benchmark_endpoints = setDT(benchmark_endpoints)[, endpoint := str_replace(endpoint, "\\.", "-")] %>% as_tibble()
benchmark_endpoints = benchmark_endpoints %>% left_join(endpoints_md)

In [None]:
benchmark_endpoints

In [None]:
endpoints_sorted = (benchmark_endpoints %>%
    filter(score == "AgeSex+MedicalHistory_UKBBParams") %>%
    group_by(endpoint, score) %>%
    summarise(cindex=mean(cindex, na.rm=TRUE)) %>%
    arrange(cindex) %>% ungroup())$endpoint

In [None]:
categories_sorted = (endpoint_defs %>% distinct(phecode_category))$phecode_category

In [None]:
plot_width = 8.25; plot_height=2.5; plot_res = 320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_res)

library(ggtext)
library(ggdist)

scores_plot = c("AgeSex_AoUCPH", "AgeSex+MedicalHistory_UKBBParams")

table_2_aou = benchmark_endpoints %>%
    filter(score %in% scores_plot) %>%
    mutate(score = factor(score, levels=scores_plot)) %>%
    mutate(endpoint = factor(endpoint, levels=endpoints_sorted)) %>%
    select(c(endpoint, uuid, score, cindex, phecode_category, phecode_string)) %>%
    group_by(uuid) %>%
    pivot_wider(names_from=score, values_from=cindex) %>%
    mutate(delta = `AgeSex+MedicalHistory_UKBBParams`-`AgeSex_AoUCPH`) %>%
    rename('Age+Sex+MedicalHistory'='AgeSex+MedicalHistory_UKBBParams') %>%
    rename('Age+Sex'='AgeSex_AoUCPH') %>%
    pivot_longer(all_of(c("Age+Sex", "Age+Sex+MedicalHistory", "delta")), names_to="type", values_to="cindex") %>%
    group_by(endpoint, phecode_string, phecode_category, type) %>%
    median_qi(cindex, .width=1-(0.05/1568)) %>%
    mutate(agg = glue("{round(cindex, 3)} ({round(.lower, 3)}, {round(.upper, 3)})")) %>%
    ungroup() %>% select(endpoint, phecode_string, phecode_category, type, agg) %>%
    pivot_wider(names_from=type, values_from=agg) %>%
      mutate(delta2 = str_replace_all(delta, "\\(", ",")) %>% mutate(delta2 = str_replace_all(delta2, "\\)", "")) %>% #mutate(delta2 = str_replace_all(delta2, ",", " ")) %>%#mutate(delta2 = gsub(")", "", delta2)) %>%
    separate(delta2, into = c("delta_median", "delta_lower", "delta_upper"), sep = ",", remove = TRUE, extra = "merge") %>%
      mutate(delta_median= as.numeric(delta_median),
             delta_lower = as.numeric(delta_lower),
             delta_upper = as.numeric(delta_upper)) %>%
    mutate(sign = case_when(
        delta_upper<0 ~ "-",
        delta_lower>0 ~ "+",
        TRUE ~ "ns")) %>%
    select(-delta_upper, -delta_lower) %>% arrange(as.character(endpoint)) %>% 
    left_join(incident_events, by='endpoint') %>% 
    filter(num_incident_events >= 100)%>% 
    rename(delta_agesex_aou=delta, sign_agesex_aou=sign, `Age+Sex_aou`=`Age+Sex`, `Age+Sex+MedicalHistory_aou`=`Age+Sex+MedicalHistory`, delta_median_agesex_aou = delta_median)

table_2_aou

In [None]:
table_2_aou %>% group_by(sign_agesex_aou) %>% tally() %>% mutate(freq=n/sum(n))

In [None]:
options(pillar.print_max = Inf)
table_2_aou %>% 
    #select(all_of(c("phecode_category", "endpoint", "phecode_string", "Age+Sex", 'Age+Sex+MedicalHistory', "delta", "sign"))) %>%
    mutate(endpoint = factor(endpoint, levels = endpoints_md$endpoint)) %>% 
    #filter(endpoint %in% endpoint_selection) %>%
    arrange(endpoint) %>% 
    write_csv("outputs/SupplTable9_DiscriminativePerformanceAll-AoU.csv")

In [None]:
table_2_ukb = fread("outputs/SupplTable5_DiscriminativePerformanceAll.csv") %>% as_tibble()

In [None]:
table_2_ukb %>% group_by(sign_agesex) %>% tally() %>% mutate(freq=n/sum(n))

In [None]:
results_both = table_2_ukb %>% select(endpoint, phecode_string, phecode_category, `Age+Sex`, `Age+Sex+MedicalHistory`, `delta_agesex`, sign_agesex, delta_median_agesex) %>% 
    left_join(table_2_aou) %>% filter(sign_agesex_aou == sign_agesex_aou) %>%
    mutate(sign_agg = glue("UKB: {sign_agesex} | AoU: {sign_agesex_aou}")) %>%
    mutate(label = glue("{sign_agesex}{sign_agesex_aou}"))
    #mutate(label = case_when(sign_agg == "UKB: + | AoU: +" ~ "++"))
    #mutate(perc_ukb = ntile(delta_median_agesex, 10), perc_aou=ntile(delta_median_agesex_aou, 10)) %>%
    
    #mutate(sign_agg = case_when(sign_agesex=="+" & sign_agesex_aou=="+" ~ "++",
    #                            sign_agesex=="+" & sign_agesex_aou=="-" ~ "+-", 
    ##                            sign_agesex=="-" & sign_agesex_aou=="+" ~ "-+",
    #                            sign_agesex=="-" & sign_agesex_aou=="-" ~ "--",
    #                            TRUE ~ "ns"
    #                           ))

In [None]:
results_both %>% write_csv("outputs/SupplTable8_UKBvsAoU.csv")

In [None]:
#results_both %>% mutate(perc_ukb = ntile(delta_median_agesex, 10), perc_aou=ntile(delta_median_agesex_aou, 10)) %>% group_by(perc_ukb) %>% 

In [None]:
results_both %>% group_by(sign_agesex, sign_agesex_aou) %>% tally() %>% ungroup() %>% mutate(freq=n/sum(n))

In [None]:
results_both %>% group_by(sign_agg) %>% tally() %>% ungroup() %>% mutate(freq=n/sum(n)) %>% arrange(desc(freq))

In [None]:
1347+99+54

In [None]:
cor(results_both$delta_median_agesex, results_both$delta_median_agesex_aou, method="pearson")

In [None]:
library(ggforce)

In [None]:
plot_width = 5; plot_height=5; plot_res = 600
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_res)

aou_all = ggplot(results_both, aes(x=delta_median_agesex, y=delta_median_agesex_aou)) + 
    labs(x="Delta C-Index in UK Biobank", y="Delta C-Index in All of US", color="Significance")+
    coord_cartesian(xlim=c(-0.6, 0.6), ylim=c(-0.6, 0.6))+
    geom_abline(alpha=.3)+
    geom_hline(yintercept=0, linetype="22", alpha=.3)+
    geom_vline(xintercept=0, linetype="22", alpha=.3)+

    geom_mark_hull(data = results_both %>% filter(label %in% c("++", "--")), 
                 aes(label = factor(sign_agg), fill=factor(sign_agg), color=factor(sign_agg)), 
                  show.legend = FALSE, alpha=0.1, size=0.5)+#, radius=0, concavity=2)+#, expand = unit(3, "mm")) +
    scale_fill_manual(values=c("UKB: + | AoU: +"="forestgreen", "UKB: - | AoU: -"="firebrick"))+
    scale_color_manual(values=c("UKB: + | AoU: +"="forestgreen", "UKB: - | AoU: -"="firebrick"))+
    geom_point(data = results_both %>% filter(!label %in% c("++")), color="black", alpha=0.5, size=0.5)+
    geom_point(data = results_both %>% filter(label %in% c("++")), color="forestgreen", alpha=0.5, size=0.5)+
    geom_point(data = results_both %>% filter(label %in% c("--")), color="firebrick", alpha=0.5, size=0.5)
    
aou_all
    #geom_label(size=1) + 


In [None]:
library(ggrepel)
library(ggpubr)

plot_width <- 3.25
plot_height <- 4.5
plot_res <- 600
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res = plot_res)

temp <- results_both %>%
  mutate(quant_ukb = ntile(delta_median_agesex, 100)) %>%
  group_by(quant_ukb) %>% 
  summarise(median_delta_ukb = median(delta_median_agesex),
            median_delta_aou = median(delta_median_agesex_aou))

aou_perc <- ggplot(temp, aes(x = median_delta_ukb, y = median_delta_aou)) + 
  labs(x = "Delta C-Index in UK Biobank", y = "Delta C-Index in All of US", color = "Percentile of Delta\nC-Index in UK Biobank") +
  coord_cartesian(xlim = c(-0.4, 0.4), ylim = c(-0.4, 0.4)) +
  geom_abline(alpha = 0.3) +
  geom_hline(yintercept = 0, linetype = "22", alpha = 0.3) +
  geom_vline(xintercept = 0, linetype = "22", alpha = 0.3) +
  geom_point(aes(color = as.numeric(quant_ukb))) +
  scale_colour_gradient(low = "#7AC6FF", high = "#023768", space = "Lab", na.value = "grey50", guide = "colourbar", aesthetics = "colour") +              
  theme(legend.position = c(0.75, 0.12), legend.direction = "horizontal", legend.key.width = unit(0.8, "cm")) +
  guides(colour = guide_colourbar(title.position = "top", title.hjust = 0.5), size = guide_legend(title.position = "top", title.hjust = 0.5)) #+ 
  #stat_cor(method = "pearson", label.x = 0, label.y = 0)

aou_perc

In [None]:
plot_width = 10; plot_height=5; plot_res = 600
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_res)

aou_bc = aou_all + aou_perc
aou_bc

In [None]:
plot_name = "Figure4bc_AoU_details"
aou_bc %>% ggsave(filename=glue("outputs/{plot_name}.png"), device="png", width=plot_width, height=plot_height, dpi=plot_res)
aou_bc %>% ggsave(filename=glue("outputs/{plot_name}.pdf"), device="pdf", width=plot_width, height=plot_height, dpi=plot_res)

In [None]:
results = arrow::read_feather(glue("{base_path}/230503_result_ukbbparams_ubr_clean_cindices_ensemble.feather")) %>% left_join(endpoints_md)

In [None]:
library(gghighlight)

In [None]:
base_size = 8
title_size = 10
facet_size = 9
geom_text_size=3
theme_set(theme_classic(base_size = base_size) +
          theme(strip.background = element_blank(), plot.title=element_text(size=title_size, hjust=0),
                strip.text.x = element_text(size = facet_size),axis.title=element_text(size=10), axis.text=element_text(size=8, color="black"),
                legend.position="bottom", axis.line = element_line(size = 0.2), axis.ticks=element_line(size=0.2), panel.grid.major.y=element_line()))

plot_width = 8.25; plot_height=2.5; plot_res = 320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_res)

#temp = benchmark_endpoints %>%
#    filter(score %in% scores_plot) %>%
#    mutate(score = factor(score, levels=scores_plot)) %>%
#    mutate(endpoint = factor(endpoint, levels=endpoints_sorted)) %>%

groups_sorted = c('Entire population', 'No UBR categorization', 'At least one UBR categorization', 'Age >= 65', 'Annual income < $25k', 'Less than high school education', 'Non-white or Hispanic/Latino', 'Sexual and gender minorities')

cbbPalette <- c("#000000", "#D55E00")

annotation_y = results %>% filter(endpoint == 'phecode_420') %>% filter(score == 'Entire population') %>% pull(cindex)
results = results %>% 
  mutate(Group=factor(Group, levels=groups_sorted)) %>%
  mutate("is_cardiac" = endpoint == 'phecode_420') %>%
  mutate(Endpoint=factor(is_cardiac, labels=c("Phenome-wide", "Cardiac arrest"))) %>%
  mutate(is_cardiac_dbl = as.numeric(endpoint == 'phecode_420')) %>%
  filter(Group != "At least one UBR categorization")

fig_phenome = results %>%
  ggplot(aes(x = Group, y = cindex))+#, color=is_cardiac))+#, scale=1 + is_cardiac_dbl)) +
  #stat_halfeye(side='left', scale=0.5) + 
    stat_pointinterval(size=0.5, alpha=0.7)+
  coord_cartesian(ylim=c(0.5, 0.9), clip = "on")+
  #stat_dotsinterval(side = "right", scale = 0.5, slab_size = NA) +
  scale_fill_brewer(palette="Set1") + 
  labs(x="Group", y="C-Index") + 
    theme(axis.title.x=element_blank(),
      panel.grid.major=element_blank(),
      strip.text = element_text(angle=270, hjust=1)
    ) +
    theme(legend.position="none") + 
    #theme(legend.position = c(0.95, 0.25)) + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 5)) +
  scale_fill_manual(values=cbbPalette) + 
  scale_colour_manual(values=cbbPalette)# + 
  #annotate(geom = "text", x = "Entire population", y = annotation_y + 0.15, label = "Cardiac arrest", hjust = "left", size=2)

# Create a new data frame with only "Cardiac arrest" data
cardiac_arrest_data <- results %>%
  filter(is_cardiac == TRUE) %>%
  mutate(color = "Cardiac arrest")

# Add the point for "Cardiac arrest" with the desired color (orange)
fig_phenome <- fig_phenome +
  geom_point(data = cardiac_arrest_data, aes(x = Group, y = cindex, color = color), size = 2.5) +
  scale_color_manual("", values = c("Phenome-wide" = cbbPalette[1], "Cardiac arrest" = "#D55E00"), breaks = "Cardiac arrest") +
  theme(
      legend.position = c(0.85, 0.15),
      legend.background = element_rect(fill = "transparent")
  )# +
  #guides(color = guide_legend(override.aes = list(shape = c(NA, 19), linetype = c("blank", "solid"))))
                   
                   
fig_phenome



In [None]:

results_cardiac = arrow::read_feather(glue("{base_path}/230503_result_ukbbparams_ubr_cardiac_arrest_delta_clean_cindices_revision_ensemble.feather"))

In [None]:
results_cardiac = results_cardiac %>% mutate(Group=score)

In [None]:
results_cardiac

In [None]:
base_size = 8
title_size = 10
facet_size = 9
geom_text_size=3
theme_set(theme_classic(base_size = base_size) +
          theme(strip.background = element_blank(), plot.title=element_text(size=title_size, hjust=0),
                strip.text.x = element_text(size = facet_size),axis.title=element_text(size=10), axis.text=element_text(size=6, color="black"),
                legend.position="bottom", axis.line = element_line(size = 0.2), axis.ticks=element_line(size=0.2), panel.grid.major.y=element_line()))

plot_width = 4; plot_height=5; plot_res = 320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_res)

groups_sorted = c('Entire population', 'No UBR categorization', 'At least one UBR categorization', 'Age >= 65', 'Annual income < $25k', 'Less than high school education', 'Non-white or Hispanic/Latino', 'Sexual and gender minorities')

fig_cardiac = results_cardiac %>% 
  mutate(Group=factor(Group, levels=groups_sorted)) %>%
  #filter(Group != "Entire population") %>%
  filter(Group != "At least one UBR categorization") %>%
  ggplot(aes(x = Group, y = delta_cindex)) +
    #geom_boxplot(outlier.alpha = 0.25) + 
    stat_pointinterval(size=0.5, alpha=0.7)+
    #geom_signif(
    #  comparisons = list(
    #    c("No UBR categorization", "Age >= 65"), 
    #    c("No UBR categorization", "Annual income < $25k"),
    #    c("No UBR categorization", "Less than high school education"),
    #    c("No UBR categorization", "Non-white or Hispanic/Latino"),
    #    c("No UBR categorization", "Sexual and gender minorities")),
    #  map_signif_level = TRUE,
    #  test=wilcox.test,
    #  step_increase=0.08,
    #  vjust=0.5
    #) + 
  coord_cartesian(ylim=c(0, 0.3), clip = "on")+
  #stat_dotsinterval(side = "right", scale = 0.5, slab_size = NA) +
  scale_fill_brewer(palette="Set1") + 
  labs(x="Group", y="Delta C-Index\nover age and sex") + 
    theme(axis.title.x=element_blank(),
      panel.grid.major=element_blank(),
      strip.text = element_text(angle=270, hjust=1)
    ) +
    theme(legend.position="none") + 
  scale_x_discrete(labels = function(x) str_wrap(x, width = 5))


combined = fig_phenome / fig_cardiac

In [None]:
plot_name = "Figure4de_AoU_subgroups"
combined %>% ggsave(filename=glue("outputs/{plot_name}.png"), device="png", width=plot_width, height=plot_height, dpi=plot_res)
combined %>% ggsave(filename=glue("outputs/{plot_name}.pdf"), device="pdf", width=plot_width, height=plot_height, dpi=plot_res)

In [None]:
plot_width = 13; plot_height=5; plot_res = 320
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res=plot_res)

bcde = aou_bc + combined
bcde

In [None]:
plot_name = "Figure4bcde_AoU_detailsandsubgroups"
bcde %>% ggsave(filename=glue("outputs/{plot_name}.png"), device="png", width=plot_width, height=plot_height, dpi=plot_res)
bcde %>% ggsave(filename=glue("outputs/{plot_name}.pdf"), device="pdf", width=plot_width, height=plot_height, dpi=plot_res)

In [None]:
library(ggrepel)
library(ggpubr)

plot_width <- 5
plot_height <- 5
plot_res <- 600
options(repr.plot.width = plot_width, repr.plot.height = plot_height, repr.plot.res = plot_res)

temp <- results_both %>%
  mutate(quant_ukb = ntile(delta_median_agesex, 10)) %>%
  group_by(quant_ukb) %>% 
  summarise(median_delta_ukb = median(delta_median_agesex),
            median_delta_aou = median(delta_median_agesex_aou))

aou_perc <- ggplot(temp, aes(x = median_delta_ukb, y = median_delta_aou)) + 
  labs(x = "Delta C-Index in UK Biobank", y = "Delta C-Index in All of US", color = "Decile of Delta\nC-Index in UK Biobank") +
  coord_cartesian(xlim = c(-0, 0.2), ylim = c(-0, 0.2)) +
  geom_abline(alpha = 0.3) +
  geom_hline(yintercept = 0, linetype = "22", alpha = 0.3) +
  geom_vline(xintercept = 0, linetype = "22", alpha = 0.3) +
  geom_point(aes(color = as.numeric(quant_ukb))) +
  scale_colour_gradient(low = "#7AC6FF", high = "#023768", space = "Lab", na.value = "grey50", guide = "colourbar", aesthetics = "colour") +              
  theme(legend.position = c(0.78, 0.1), legend.direction = "horizontal", legend.key.width = unit(0.8, "cm")) +
  guides(colour = guide_colourbar(title.position = "top", title.hjust = 0.5), size = guide_legend(title.position = "top", title.hjust = 0.5)) + 
  stat_cor(method = "pearson", label.x = 0, label.y = 0.2)

aou_perc