In [None]:
source(paste0(dirname(dirname(getwd())),'/map.r'))
source(paste0(HELP_DIR, "shortcuts.r"))
source(paste0(HELP_DIR, "helpers.r"))

In [None]:
library(forcats)
#library(wesanderson)

### 0 - Read exhaustive analysis

In [None]:
fe <- fread(paste0(SHARE_DIR, "0_run_fe.csv"))
lr <- fread(paste0(SHARE_DIR, "1_run_lr.csv"))

- Remove redundancies in Fisher exact output
- Some tests are equivalent but signs are flipped

In [None]:
remove_last <- function(i) i[-length(i)]

In [None]:
fe_go <- 
fe %>% 
 rw() %>% 
 mu( raw = paste(remove_last(strsplit(feature, "_")[[1]]), collapse = "_")) %>% 
 ug() %>% 
 gb(cohortGo, raw, fisher_pval) %>% 
 mu( rk = row_number(desc(e_nr)), 
     label = factor(ifelse(pr_nr_given_e > pr_nr_overall, "Non-Response Associated", "Response Associated"), 
                   levels = c("Response Associated", "Non-Response Associated"))) %>%
 fi(rk == 1) %>% se(-rk) %>% 
 gb( cohortGo, label ) %>% 
 fi( !grepl("geneset_mp", feature), cohortGo != "Pan-Cancer ## Folinic acid ## Platinum ## Pyrimidine (ant)agonist ## Topoisomerase inhibitor")

In [None]:
dim(
fe_go %>% 
 fi(cohortGo == "Pan-Cancer ## Anti-PD-1") 
)#%>% 
 

### Check the QQ plots

In [None]:
options(repr.plot.width = 12, repr.plot.height = 7)

In [None]:
fe_go %>% 
 #fi(!grepl("rna_", feature)) %>%
 fi(nchar(cohortGo) < 60) %>% 
 gb(cohortGo) %>% 
 mu( observed = -log10(sort(fisher_pval)), 
     expected = -log10(ppoints(length(observed))), 
     rk = row_number(max(observed))) %>%
 ar(rk) %>% 
 ug() %>% 
 ggplot(aes(x = expected, y = observed)) + 
 geom_point() + 
 facet_wrap(~fct_reorder(str_wrap(cohortGo, width = 23), -observed, .fun = min), ncol = 7) + 
 geom_abline(a = 0, b = 1) + 
 go_theme + 
 labs(x = "-Log10( expected p-values )", y = "-Log10( observed p-values )", title = "QQ Plots by Cohort")

#### Data Preparation for Bar plots

In [None]:
map <- c("e_nr" = "Event + No DCB", "e_r" = "Event + DCB", "ne_nr" = "No Event + No DCB", "ne_r" = "No Event + DCB")

In [None]:
base <- 
fe_go %>% 
 se( cohortGo, label, feature, fisher_pval, e_nr, e_r, ne_nr, ne_r) %>% 
 ga( event, ct, -cohortGo, -label, -feature, -fisher_pval ) %>% 
 rw() %>% 
 mu(event2 = factor(map[[event]], levels = unname(map))) 

In [None]:
base$p_adj <- p.adjust(base$fisher_pval, method = "BH")

In [None]:
now <- 
base %>% 
 mu(rna = ifelse(m("rna_", feature), "RNA Feature", "DNA or Clinical Feature" )) %>% 
 mu( feature = ifelse(m("rna_geneset", feature), strsplit(feature, "rna_geneset_")[[1]][2], feature), 
     feature = ifelse(m("gene_set_", feature), strsplit(feature, "gene_set_")[[1]][2], feature),
     feature = str_to_title(feature)) %>% 
 gb(cohortGo, label) %>% 
 mu(rk = row_number(fisher_pval)) %>% 
 fi(rk < 5) %>% 
 mu(event = !m("No Event", event2), dcb = !m("No DCB", event2)) %>% 
 fi(label == "Non-Response Associated") %>% 
 fi(p_adj < .1, !grepl("CTLA", cohortGo)) %>% 
 gb(cohortGo) %>% mu(top = max(ct)) %>% ug()

In [None]:
name_map <- c(
"Apm_lt75" = "RNA: Low Antigen Processing",
"Cn_simple_chr13_q_gt25" = "Copy Number: High Chr13q",
"Cn_simple_chr18_p_gt25" = "Copy Number: High Chr18p",
"Hallmark_apical_surface_lt50" = "RNA: Low Apical Surface Expression",
"Kegg_cytosolic_dna_sensing_pathway_lt75" = "RNA: Low Cytosolic DNA Sensing",
"Cn_simple_chr3_q_gt25" = "Copy Number: High Chr3q",
"Hallmark_interferon_alpha_response_lt50" = "RNA: Low interferon alpha response",
"Driver_total_gt50" = "Drivers: High total number")

In [None]:
now <- now %>% rw() %>% mu(feature = name_map[[feature]]) %>% ug()

In [None]:
cohort_plotter <- function() {
 #most <- max(now$ct) + 1.05   

 now %>% 
 #fi(label == "Non-Response Associated") %>% 
 ggplot(aes(x = event2, y = ct, alpha = event, fill = dcb)) + 
 #facet_wrap(~rna + fct_reorder(feature, fisher_pval, .fun = mean), ncol = 5) + 
 facet_wrap(~fct_reorder(cohortGo, fisher_pval, .fun = mean) + feature, ncol = 4, scales = "free_y") +    
  geom_bar(stat = "identity", color = "black") + 
  scale_fill_manual( values = response) +  
  scale_alpha_manual( values = alphas) + 
  go_theme + 
  geom_text(aes(label = ct), vjust = -0.5, color = "black", size = 3) + 
  geom_text(aes(x = 2.5, 
                y = top + top/40, 
               label = paste0("p-value: ", formatC(fisher_pval, format = "e", digits = 1))), 
               color = "black", size = 3) + 
  labs(title = "Most Statistically Significant Results within Cohorts (BH adjusted p-value < .1)", 
       y = "Number of Patients\n", 
       x = "Event and Durable Clinical Benefit") + 
  theme(strip.background = element_rect(fill = "white", color = "black"), legend.position = "none")
}

In [None]:
options(repr.plot.width = 11.4, repr.plot.height = 6.4)
gg <- cohort_plotter()

In [None]:
gg

In [None]:
ggsave( paste0(FIG_DIR, "top_go.png"), plot = gg, width = 12, height = 7)

In [None]:
paste0(FIG_DIR, "top_go.png")