In [None]:
HELPER_DIR <- paste0(getwd(),'/helpers/')

In [None]:
source(paste0(HELPER_DIR,'map.r'))
source(paste0(HELPER_DIR,'shortcuts.r'))
source(paste0(HELPER_DIR,'helpers.r'))

In [None]:
library(cowplot)
library(ggrepel)

# Read ready biomarkers data

- Results

In [None]:
go <- 
fread(paste0("data/0_run.csv")) %>% 
 fi(!grepl("rna_geneset", x), grepl("driver", x), !grepl("pathway", x), x != "drivers_total") %>%
 fi(cohort != "Unknown primary (e.g. CUP)", !grepl("Other", cohort))

- Prep output 

In [None]:
#base <- fread(paste0(SHARE_DIR, "biomarkers_base.csv"))
base <- fread("tmp.csv")
base <- rbind(base, base %>% mu(cohort = "Pan-Cancer")) %>% gb(cohort) %>% mu(ct = n()) %>% fi(ct > 40, cohort != "Unknown primary (e.g. CUP)")

In [None]:
base_ready <- 
base %>% 
 se(sampleId, cohort, contains("teal"), any_of( unique(go %>% pu(x))))  %>%
 ga( x, val, -sampleId, -cohort, -teal_ref_raw,-teal_tumor_raw, -teal_tumor_final,-teal_raw_ratio,	-teal_final_ratio) %>%
 gb( cohort, x ) %>%
 mu( total_events = sum(val)) %>% 
 fi( total_events >= 2 )

In [None]:
fixer <- base %>% gb(cohort) %>% su(total_patients = n())

In [None]:
idx <- base_ready %>% se(cohort, x, total_events) %>% unique()

- Co-occurence with ATRX, POT1, TERT

In [None]:
all_patients <- base %>% gb(cohort) %>% su(total_patients = n())

In [None]:
co_occurence_atrx <- 
base %>% 
 se( sampleId, cohort, contains("driver"), -contains("pathway"), -drivers_total ) %>%
 ga( x, val, -sampleId, -cohort, -driver_ATRX) %>%
 gb( cohort, x) %>%
 su( total_driver_events = sum(val), 
     correlation_ATRX = cor(driver_ATRX, val, use = "pairwise.complete.obs"), 
     total_ATRX = sum(driver_ATRX),
     total_cooccurrence_ATRX = sum(driver_ATRX + val == 2)) %>%
 ug()

In [None]:
co_occurence_tert <- 
base %>% 
 se( sampleId, cohort, contains("driver"), -contains("pathway"), -drivers_total ) %>%
 ga( x, val, -sampleId, -cohort, -driver_TERT) %>%
 gb( cohort, x) %>%
 su( total_driver_events = sum(val), 
     correlation_TERT = cor(driver_TERT, val, use = "pairwise.complete.obs"), 
     total_TERT = sum(driver_TERT),
     total_cooccurrence_TERT = sum(driver_TERT + val == 2)) %>%
 ug()

In [None]:
co_occurence_pot1 <- 
base %>% 
 se( sampleId, cohort, contains("driver"), -contains("pathway"), -drivers_total ) %>%
 ga( x, val, -sampleId, -cohort, -driver_POT1) %>%
 gb( cohort, x) %>%
 su( total_driver_events = sum(val), 
     correlation_POT1 = cor(driver_POT1, val, use = "pairwise.complete.obs"), 
     total_POT1 = sum(driver_POT1),
     total_cooccurrence_POT1 = sum(driver_POT1 + val == 2)) %>%
 ug()

In [None]:
cocurrence <- 
co_occurence_atrx %>%
 full_join(co_occurence_pot1 , by = c("cohort", "x", "total_driver_events")) %>%
 full_join(co_occurence_tert , by = c("cohort", "x", "total_driver_events")) %>%
 full_join(all_patients, by = "cohort") %>% 
 mu(x = gsub("driver_", "", x))

- Filter output to cohorts atleast 3 events

In [None]:
go_ready <- 
go %>% 
 ij(idx, by = c("cohort", "x")) %>%
 mu(x = gsub("driver_", "", x)) %>%
 lj(cocurrence, by = c("cohort", "x")) %>%
 se(-type, -data, -model, -lrt_pval) %>%
 tm(cohort, total_patients, measurement = y, driver = x, total_driver_events, covariate, est, se, pval, 
    correlation_ATRX = round(correlation_ATRX, 2), 
    correlation_POT1 = round(correlation_POT1,2), 
    correlation_TERT = round(correlation_TERT, 2), 
    total_ATRX, total_POT1, total_TERT,	
    total_cooccurrence_ATRX, total_cooccurrence_POT1, total_cooccurrence_TERT,
    frac_cooccurrence_ATRX = round(total_cooccurrence_ATRX/total_ATRX, 2), 
    frac_cooccurrence_POT1 = round(total_cooccurrence_POT1/total_POT1, 2), 
    frac_cooccurrence_TERT = round(total_cooccurrence_TERT/total_TERT, 2)) %>%
 fi(measurement != "teal_ref_raw") %>%
 ug() %>% 
 fi( (cohort == "Pan-Cancer" & covariate == "+ as.factor(cohort) + as.factor(biopsy) + purity + purity_ploidy") | 
     (cohort != "Pan-Cancer" & covariate == "+ as.factor(biopsy) + purity + purity_ploidy")) %>%
 mu(
  colors = 
     case_when(
   est - 1.65*se > 0 ~ "Positive (p < .1)",
   est + 1.65*se < 0 ~ "Negative (p < .1)",   
   TRUE ~ "Non-Signficant")
)

- Add driver provenance

In [None]:
drivers <- 
fread( paste0(TMP_DIR, "drivers.csv")) %>% 
  mu(likelihood = ifelse(category == "TSG" & likelihoodMethod == "DISRUPTION", 1, driverLikelihood)) %>%
  fi(driverLikelihood > .8) %>%
  mu(gene = gsub("-", "_", gene))

In [None]:
idx_type <-
drivers %>%
 mu(driver_type = 
    case_when(
     grepl("AMP", driver) ~ "AMPLIFICATION",
     grepl("DISRUPTION", driver) ~ "DISRUPTION",
     grepl("DEL", driver) ~ "DELETION",
     TRUE ~ "MUTATION")) %>% 
 gb(gene, driver_type) %>% su(ct = n()) %>% 
 gb(gene) %>% mu(rk = row_number(desc(ct)) ) %>% 
 fi(rk == 1) %>%
 gb(gene, driver_type) %>% su(tot = sum(ct)) %>% ar(desc(tot)) %>% ug() %>%
 tm(driver = gene, main_driver = driver_type)

In [None]:
go_ready <- go_ready %>% lj(idx_type, by = "driver")

- Filter to top results

In [None]:
share <- 
go_ready %>%
 gb(driver) %>%
 mu(min_p = min(pval)) %>%
 fi(min_p < .001) %>%
 ar( desc(total_patients), pval)

In [None]:
fwrite(share, "data/share_raw_output.csv")

# 2 - Visualize the results

In [None]:
options(repr.plot.width = 10)

In [None]:
plotter <- function( y, title ) {
go_ready %>% 
 drop_na(main_driver) %>% 
 fi(main_driver != "DISRUPTION") %>% 
 fi(measurement == y, cohort == "Pan-Cancer", pval < .01) %>% 
 ggplot( aes( x = est, y = log2(-log10(pval)), color = main_driver)) +
 geom_text_repel(aes(label = driver), size = 2.5, max.overlaps = Inf) + 
 theme_bw() + 
 labs( y = "Log2( -Log 10( p-value ))", 
       x = paste0("Driver association with ", title), 
       title = paste0(title, " vs Driver Signals")) +
 theme(plot.title = element_text(hjust = .5)) + 
 xlim(-.09, .09) + ylim(2,7.3) + 
 facet_wrap(~main_driver, scales = "free")
}

In [None]:
plt_names <- c("teal_final_ratio" = "Tumor vs Germline Telomere Ratio (Final)")

In [None]:
plts <- list()
for( i in names(plt_names)){
 plts[[i]] <- plotter(i, plt_names[i])
}

- Go

In [None]:
view <- 
go_ready %>% 
 fi(cohort == "Pan-Cancer", est > 0, pval < .0001, measurement == "teal_final_ratio") %>%
 pu(driver)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 3.5)
share_by_driver_type <- plts$teal_final_ratio
share_by_driver_type

- Make Individual Views

In [None]:
candidates <- 
go_ready %>%
 fi(cohort == "Pan-Cancer", pval < .001, main_driver != "DISRUPTION", measurement != "purity_svTMB") %>%
 gb(main_driver) %>% mu(rk = row_number(pval)) %>% fi(rk <= 10) %>%
 se(driver, main_driver)

In [None]:
deletion_candidates <- candidates %>% fi(main_driver == "DELETION") %>% pu(driver)
amplification_candidates <- candidates %>% fi(main_driver == "AMPLIFICATION") %>% pu(driver)
mutation_candidates <- candidates %>% fi(main_driver == "MUTATION") %>% pu(driver)

- Candidate Plotter

In [None]:
options(repr.plot.width = 7, repr.plot.height = 6)

In [None]:
driver_plot <- function(gene){
go_ready %>%
 fi(measurement == "teal_final_ratio", driver == gene) %>%
 ggplot( aes(x = est, y = reorder(cohort, est), color = colors)) + 
 geom_point(alpha = .6) + 
 geom_errorbarh(aes(xmin = est - 1.65*se, xmax = est + 1.65*se), height = 0.2) +
 theme_bw() + 
 geom_vline(xintercept = 0, color = "red") + 
   scale_color_manual(
    values = c(
      "Positive (p < .1)" = "dark green",
      "Negative (p < .1)" = "red",
      "Non-Significant"  = "gray"
    )
  ) +
  theme(legend.position = "none") +
  labs(x = "Effect Estimate\nShorter Tel <- -> Longer Tel", y = "", title = paste0("Driver ", gene)) +
  theme(plot.title = element_text(hjust = .5))
}

In [None]:
d_plts <- list(); m_plts <- list(); a_plts <- list()
for(i in deletion_candidates) {d_plts[[i]] <- driver_plot(i)}
for(i in mutation_candidates) {m_plts[[i]] <- driver_plot(i)}
for(i in amplification_candidates) {a_plts[[i]] <- driver_plot(i)}

In [None]:
deletion_candidates

In [None]:
options(repr.plot.width = 20, repr.plot.height = 9)

In [None]:
library(patchwork)
deletion_share <- 
wrap_plots(d_plts, ncol = 5) + 
 plot_annotation(title = "Deletion Based Drivers") &
  theme(
    plot.title = element_text(size = 18, face = "bold", hjust = 0.5)
  )

In [None]:
amplification_share <- 
wrap_plots(a_plts, ncol = 5) + 
 plot_annotation(title = "Amplification Based Drivers") &
  theme(
    plot.title = element_text(size = 18, face = "bold", hjust = 0.5)
  )

In [None]:
mutation_share <- 
wrap_plots(m_plts, ncol = 5) + 
 plot_annotation(title = "Mutation Based Drivers") &
  theme(
    plot.title = element_text(size = 18, face = "bold", hjust = 0.5)
  )

In [None]:
mutation_share

## Share plots

In [None]:
ggsave(filename = "top_pancer.png", plot = share_by_driver_type, width = 10, height = 3.5)
ggsave(filename = "top_deletion.png", plot = deletion_share , width = 20, height = 9)
ggsave(filename = "top_amplification.png", plot = amplification_share, width = 20, height = 9)
ggsave(filename = "top_mutation.png", plot = mutation_share, width = 20, height = 9)

In [None]:
getwd()