In [None]:
source(paste0(dirname(dirname(dirname(getwd()))),'/map.r'))
source(paste0(HELP_DIR, "shortcuts.r"))
source(paste0(HELP_DIR, "helpers.r"))

In [None]:
FIG_DIR <- "/home/josephusset@vhio.org/biomarkers/util/figs/"

In [None]:
library(forcats)

### 0 - Read prepped cohorts data

- Prepared categorical markers

In [None]:
ii <- readRDS(paste0(SHARE_DIR, "biomarkers_ready.Rds"))
ready <- ii$ready
categorical_features <- ii$features

- Cohorts 

In [None]:
cohorts <- fread("/mnt/bioinfnas2/immunocomp/shared_reference_data/cohorts/cohorts_ready.csv")

- Define top cohorts/mechanisms

In [None]:
tot_threshold <- 50
dcb_threshold <- 15

In [None]:
go <- 
rbind(ready %>% lj(cohorts %>% se(sampleId, cohort), by = "sampleId"), ready %>% mu(cohort = "Pan-Cancer")) %>% 
 mu(cohortGo = paste0(cohort, " ## ", derived_treatmentMechanism))

In [None]:
top_mechanisms <- 
go %>% 
 gb(cohortGo) %>% 
 su(ct = n(), no_dcb = sum(nrDcb), dcb = ct - no_dcb) %>% 
 fi(ct > tot_threshold, no_dcb > dcb_threshold, dcb > dcb_threshold) %>% 
 fi(cohortGo != "Pan-Cancer ## Anti-AR") %>% 
 ug() %>% 
 mu(pan = factor(ifelse(m("Pan-Cancer", cohortGo), "Pan-Cancer", "Cohort Specific"), levels = c("Pan-Cancer", "Cohort Specific"))) %>% 
 rw() %>% 
 mu(cohortShow = ifelse(pan == "Pan-Cancer", paste0(strsplit(cohortGo, " ## ")[[1]][-1], collapse = "/"), gsub(" ## ", "/", cohortGo)),
    cohortShow = gsub(" \\(ant\\)agonist", "", cohortShow), cohortShow = gsub("Topoisomerase inhibitor", "TI", cohortShow)) %>% 
 mu(treatment = paste0(strsplit(cohortGo, " ## ")[[1]][-1], collapse = "/"), 
    cohort = paste0(strsplit(cohortGo, " ## ")[[1]][1]), 
    cohort = ifelse(cohort == "Pan-Cancer", "", cohort)) %>% 
 ug() 

In [None]:
fwrite(top_mechanisms, paste0(SHARE_DIR, "top_mechanisms.csv"))

- Visualise the cohort counts

In [None]:
colors <- c("No Durable Clinical Benefit" = "#e52f28", "Durable Clinical Benefit" = "#7AABD3")

In [None]:
options(repr.plot.width = 12, repr.plot.height = 8)

In [None]:
top_mechanisms %>% 
 ga(response, patients, -cohortShow, -ct, -pan, -cohortGo, -cohort, -treatment) %>% 
 mu(response = ifelse(response == "dcb", "Durable Clinical Benefit", "No Durable Clinical Benefit")) %>% 
 ggplot(aes(y = fct_reorder(cohortShow, patients), x = patients, fill = response)) + 
 geom_bar(stat = "identity", color = "black") + 
 scale_fill_manual( values = colors) +  
 go_theme + 
 labs(x = "# of Patients", y = "Post-Biopsy Treatment Mechanism", title = "Available Cohort Sizes", fill = NULL) + 
 facet_grid(pan ~., scales = "free_y", space = "free") + 
 geom_text(aes(label = patients), position = position_stack(vjust = 0.5), color = "black", size = 6) +
 theme(strip.text = element_text(size = 21, color = "black"), axis.text.y = element_text(size = 21), axis.text.x = element_text(size = 21)) + 
 theme_bw(base_size = 22) + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + 
 theme(legend.position = c(0.96, .5),  legend.justification = c(1, 0))

In [None]:
ggsave( paste0(FIG_DIR, "cohort_counts.png"), width = 12, height = 8, dpi = 500)

In [None]:
paste0(FIG_DIR, "cohort_counts.png")

# Prepare count data for fisher exact tests
- Wish I only needed to write this once!

In [None]:
base <- 
go %>% 
 fi(cohortGo %in% (top_mechanisms %>% pu(cohortGo))) %>% 
 se(cohortGo, non_response = nrDcb, any_of(categorical_features)) %>% 
 ga(feature, event, -cohortGo, -non_response) %>% 
 drop_na(event) %>% 
 gb(cohortGo, feature, non_response, event) %>% 
 su(tot = n(), .groups = "drop") %>% 
 pivot_wider(names_from = c(event, non_response),  values_from = tot)

In [None]:
base[is.na(base)] <- 0

In [None]:
base <- base %>% mu(events = `1_0` + `1_1`) %>% fi( events > 5 )

### Go RA Fisher!
- Love struck RA fisher
- Run fished exact test across all categorical features

In [None]:
ra_fisher <- function(a,b,c,d){
 fisher.test(matrix(c(a,b,c, d), ncol = 2))$p.value
}

In [None]:
ra_go <- 
base %>% 
 rw() %>% 
 mu(fisher_pval = ra_fisher(`0_0`, `0_1`, `1_0`, `1_1`)) %>% 
 ug() %>% 
 se(cohortGo, feature, `0_0`, `0_1`, `1_0`, `1_1`, events, fisher_pval)

In [None]:
ra_go %>% ar(fisher_pval) %>% fi(cohortGo != "Pan-Cancer ## Anti-PD-1")

In [None]:
ra_go %>% fi(grepl("hotspot", feature )) %>% ar(fisher_pval)

In [None]:
ra_ready <- 
ra_go %>% 
 ar(fisher_pval) %>% 
 rename("ne_r" = `0_0`, "e_r" = `1_0`, "ne_nr" = `0_1`, "e_nr" = `1_1`) %>%
 mu( tot_e = e_r + e_r, 
     tot_ne = ne_r + ne_nr,
     tot_nr = e_nr + ne_nr, 
     tot_r = e_r + ne_r, 
     tot = tot_nr + tot_r, 
     pr_nr_given_e = e_nr/tot_e,
     pr_nr_overall = tot_nr/tot) %>% 
 se(cohortGo, feature, fisher_pval, 
    e_nr, e_r, ne_nr, ne_r, 
    tot_e, tot_ne, tot_nr, tot_r, tot, 
    pr_nr_overall, pr_nr_given_e) 

### Send it!

In [None]:
fwrite(ra_ready, paste0(SHARE_DIR, "0_run_fe.csv"))