In [None]:
source(paste0(dirname(dirname(dirname(getwd()))),'/map.r'))
source(paste0(HELP_DIR, "shortcuts.r"))
source(paste0(HELP_DIR, "helpers.r"))

In [None]:
library(scales)

### 0 - go

In [None]:
go <- fread("sim_go_new.csv")

### 1 - Power for never response

In [None]:
go_binom_test <- function( n, x, p = .02) {
   if(is.na(x)){1}
   else if (n == 0) {1}
   else{ binom.test(x, n, p, alternative = "less")$p.value } 
}

In [None]:
go <- 
go %>% 
 rw() %>% 
 mu(expected_events = n*prevalence, 
    pval_under2 = go_binom_test(events, responders_event, .02), 
    pval_under5 = go_binom_test(events, responders_event, .05),
    pval_under10 = go_binom_test(events, responders_event, .1)) %>% 
 ug()

In [None]:
plts_base <- 
go %>% 
 #fi(p_event == 0) %>% 
 #mu( prevalence = as.factor(prevalence), `  Feature\nPrevalence` = prevalence) %>% 
 gb(prevalence, n, p_base, p_event) %>% 
 su( signal_raw = mean(p_fisher < .05), 
     signal_adjusted = mean(p_fisher < .004),
     never_response_lt_10 = mean(pval_under10 < .05),
     never_response_lt_05 = mean(pval_under5 < .05),
     never_response_lt_02 = mean(pval_under2 < .05))

In [None]:
mapper <- c(
  "0" = "Probability Response = 0% (Never Response)",
  "0.01" = "Probability Response = 1%",
  "0.1" = "Probability Response = 10%",
  "0.2" = "Probability Response = 20%",
  "0.4" = "Probability Response = 40% (No Signal)")

In [None]:
mapper_threshold <- 
c("signal_raw" = "P-value signal raw", 
  "signal_adjusted" = "P-value signal adjusted", 
  "never_response_lt_10" = "Response < 10%",
  "never_response_lt_05" = "Response < 5%",
  "never_response_lt_02" = "Response < 2%")

In [None]:
plts_ready <- 
plts_base %>% 
 ga(threshold, val, -prevalence, -n, -p_base, -p_event) %>% 
 mu(expected_non_events = n * (1-as.numeric(as.character(prevalence))), 
    expected_events = n * as.numeric(as.character(prevalence)), 
    expected_events_non_response = n * as.numeric(as.character(prevalence)) * p_event, 
    expected_events_response = n * as.numeric(as.character(prevalence)) * p_base) %>% 
 rw() %>% 
 mu(event = factor(mapper[as.character(p_event)], levels = rev(unname(mapper))), 
    gp = factor(mapper_threshold[[threshold]], levels = rev(unname(mapper_threshold)))) %>% ug()

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4)

In [None]:
p1 <- 
plts_ready %>% 
 fi(p_event == 0, !grepl("never", threshold)) %>% 
 ggplot( aes(x = n, y = val, alpha = gp, color = as.character(prevalence))) + 
 geom_point(size = 3) +
 geom_line(aes(group = interaction(gp,prevalence)), linewidth = 1.2) + 
 go_theme + 
 scale_x_continuous(trans = "log10", breaks = c(20, 30, 40, 50, 60, 80, 100, 200, 500, 1000, 2000, 5000)) + 
 labs(y = "Statistical Power",
      x = "Sample Size", 
      title = "Statistical Power to detect Never Response signals") +  
 scale_y_continuous(labels = label_percent()) 

In [None]:
p1
ggsave( paste0(FIG_DIR, "p1_power.png"), width = 8, height = 4)

- Make Barplots for 80% power

In [None]:
s1 <- 
plts_ready %>% 
 fi(val > .8) %>% 
 gb(gp, prevalence) %>% 
 su( min_samples = min(n), .groups = "drop") %>% 
 ug() 

In [None]:
power_summary <- 
s1 %>% 
 complete(gp, prevalence, fill = list(min_samples = 5000)) %>% 
 mu(label = ifelse(min_samples == 5000, "5000+", as.character(min_samples)))

In [None]:
p2 <- 
power_summary %>% 
 fi( prevalence != .01) %>% 
 ggplot( aes(x = as.factor(prevalence), y = min_samples, fill = gp)) + 
 geom_bar(stat = "identity", position = "dodge", color = "black") + 
 geom_text(aes(label = label), position = position_dodge(width = 0.9),vjust = -0.5, size = 4) + 
 go_theme + 
 labs( x = "Biomarker Prevalence", y = "# Patients for 80% Power", title = "Biomarkers for Never Response\n(40% Baseline Response Rate)")

In [None]:
p2
ggsave( paste0(FIG_DIR, "p2_power.png"), width = 8, height = 4)

- Minimum Events Needed

In [None]:
min_events_needed <- 
data.frame(adjusted = factor(c(rep("No", 4), rep("Yes", 4)), levels = c("Yes", "No")),
           gp = factor(rep(c("Response < 1%", "Response < 2%", "Response < 5%", "Response < 10%"), 2), levels = c("Response < 1%", "Response < 2%", "Response < 5%", "Response < 10%")), 
           n = c(300, 150, 60, 30, 550, 270, 110, 50))

In [None]:
p3 <- 
min_events_needed %>% 
 ggplot( aes(x = gp, y = n, fill = adjusted)) + 
 geom_bar(stat = "identity", position = "dodge", color = "black") + 
 geom_text(aes(label = round(n)), position = position_dodge(width = 0.9),vjust = -0.5, size = 4) + 
 go_theme +
 ylim(0, 570) + 
 labs( x = "Biomarker Prevalence", y = "# Events for 80% Power", title = "Biomarkers for Never Response\nMinimum Events Needed")

In [None]:
p3
ggsave( paste0(FIG_DIR, "p3_power.png"), width = 6, height = 4)

- Final simulation Figure

In [None]:
options(repr.plot.width = 16, repr.plot.height = 4)

In [None]:
annotate <- 
data.frame( name = c("B2M Loss\n(Melanoma Anti-PD1)", "KRAS hotspot + TMB High\n(Colon Chemo)", "???"), 
            expected_events = c(10, 37, 150), 
            event = c("Probability Response = 0% (Never Response)", "Probability Response = 0% (Never Response)", "Probability Response = 0% (Never Response)"),
            val = c(1.12, 1.12, 1.12)
          )

In [None]:
#annotat

In [None]:
p4 <- 
plts_ready %>% 
 fi(prevalence == .1, event == "Probability Response = 0% (Never Response)") %>% 
 rw() %>% mu(event = factor(mapper[as.character(p_event)], levels = rev(unname(mapper)))) %>% ug() %>%
 ggplot( aes(x = expected_events, y = val, color = gp)) + 
 geom_point(size = 3) +
 geom_line(aes(group = interaction(threshold,prevalence)), linewidth = 1.2) + 
 facet_wrap(~event, ncol = 5) + 
 go_theme + 
 scale_x_continuous(trans = "log10", breaks = c(1, 5, 10, 20, 37, 100, 200, 500, 1000), limits = c(5,300)) + 
 labs(y = "Statistical Power",
      x = "Log(# Events = (Sample Size * Feature Prevalence))", 
      title = "Statistical Power to detect never response signals") +  
 scale_y_continuous(labels = label_percent(), breaks = c(.25,.5,.75,1), limits = c(0, 1.2)) + 
 geom_hline(yintercept = .8, color= "grey", alpha = .2) + 
 geom_text(data = annotate, aes( label = name), alpha = 1, color = "black", size = 3) + 
 geom_vline(xintercept = 10, color= "grey", alpha = .2) + 
 geom_vline(xintercept = 37, color= "grey", alpha = .2)

In [None]:
options(repr.plot.width = 8, repr.plot.height = 5)

In [None]:
p4

In [None]:
ggsave( paste0(FIG_DIR, "p4_power.png"), width = 8, height = 5)

In [None]:
go_binom_test <- function( n, x, p = .02) {
   if(is.na(x)){1}
   else if (n == 0) {1}
   else{ binom.test(x, n, p, alternative = "less")$p.value } 
}

In [None]:
 #binom.test(x = 0, n = 100, p = .05, alternative = "less")$p.value

In [None]:
plts_ready %>% 
 gb(gp, expected_events, event) %>% 
 su(val = mean(val)) %>% 
 ggplot( aes(x = expected_events, y = val, color = gp)) + 
 geom_point(size = 3) +
 geom_line(aes(group = gp), linewidth = 1.2) + 
 facet_wrap(~event, ncol = 5) + 
 go_theme + 
 scale_x_continuous(trans = "log10", breaks = c(1, 5, 10, 20, 40, 100, 500, 1000), limits = c(1,1000)) + 
 labs(y = "Statistical Power",
      x = "Expected Number of Events = (Sample Size * Feature Prevalence)", 
      title = "Statistical Power to detect response signals") +  
 scale_y_continuous(labels = label_percent()) + 
 geom_hline(yintercept = .05) + 
 geom_text(data = annotate, aes( label = name), alpha = 1, color = "black", size = 3) + 
   geom_rect(data = highlight_df,
            aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
            fill = "lightgreen", alpha = 0.3,
            inherit.aes = FALSE) 

### 2 - Never Response signals

In [None]:
plts_ready <- 
plts_base %>% 
 ga(threshold, val, -`  Feature\nPrevalence`, -n,  -p_base, -p_event) %>% 
 mu(expected_events = n * as.numeric(as.character(`  Feature\nPrevalence`)), 
    expected_events_non_response = n * as.numeric(as.character(`  Feature\nPrevalence`)) * p_event, 
    expected_events_response = n * as.numeric(as.character(`  Feature\nPrevalence`)) * p_base) %>% 
 rw() %>% mu(event = factor(mapper[as.character(p_event)], levels = rev(unname(mapper)))) %>% ug()

In [None]:
#plts_ready