In [None]:
source(paste0(dirname(dirname(dirname(getwd()))),'/map.r'))
source(paste0(HELP_DIR, "shortcuts.r"))
source(paste0(HELP_DIR, "helpers.r"))

In [None]:
library(forcats)

# Read prepped cohorts data

- Prepared categorical markers

In [None]:
ii <- readRDS(paste0(SHARE_DIR, "biomarkers_ready.Rds"))
ready <- ii$ready %>% rw() %>% mu(groupedTreatmentType = paste0(unique(strsplit(derived_treatmentType, " ## ")[[1]]), collapse = " ## ")) %>% ug()
categorical_features <- ii$features

- Cohorts 

In [None]:
cohorts <- fread("/mnt/bioinfnas2/immunocomp/shared_reference_data/cohorts/cohorts_ready.csv")

- Define top cohorts/mechanisms

In [None]:
min_patients <- 40
min_response <- 15
min_events <- 5

In [None]:
go_treat <- 
rbind(ready %>% lj(cohorts %>% se(sampleId, cohort), by = "sampleId"), ready %>% mu(cohort = "Pan-Cancer")) %>% 
 mu(cohortGo = paste0(cohort, " ## ", derived_treatmentMechanism), group = "mechanism")

In [None]:
go_type <- 
rbind(ready %>% lj(cohorts %>% se(sampleId, cohort), by = "sampleId"), ready %>% mu(cohort = "Pan-Cancer")) %>% 
 mu(cohortGo = paste0(cohort, " ## ", groupedTreatmentType), group = "type" )

In [None]:
go <- go_treat %>% bind_rows(go_type)

In [None]:
top_mechanisms <- 
go %>% 
 gb(cohortGo, group) %>% 
 su(ct = n(), no_dcb = sum(nrDcb), dcb = ct - no_dcb) %>% 
 fi(ct > min_patients, no_dcb > min_response, dcb > min_response) %>% 
 ug()

In [None]:
fwrite(top_mechanisms, paste0(SHARE_DIR, "top_mechanisms.csv"))

# Prepare count data for fisher exact tests
- Wish I only needed to write this once!

In [None]:
base <- 
go %>% 
 fi(cohortGo %in% (top_mechanisms %>% pu(cohortGo))) %>% 
 se(cohortGo, non_response = nrDcb, any_of(categorical_features)) %>% 
 ga(feature, event, -cohortGo, -non_response) %>% 
 drop_na(event) %>% 
 mu(non_response = ifelse(non_response == 1, "nr", "r"), event = ifelse(event == 1, "e", "ne")) %>% 
 gb(cohortGo, feature, non_response, event) %>% 
 su(tot = n(), .groups = "drop") %>% 
 pivot_wider(names_from = c(event, non_response),  values_from = tot) %>% 
 mu(across(everything(), ~replace_na(., 0)), 
    events = e_r + e_nr, no_events = ne_r + ne_nr, responders = e_r + ne_r, non_responders = e_nr + ne_nr, total_patients = events + no_events) %>% 
 fi( events > min_events, no_events > min_events ) %>% 
 mu(direction = ifelse( e_nr/events > non_responders/total_patients, "Non-Response", "Response"))

## Go RA Fisher!
- Run fished exact test across all categorical features
- Just leave the duplicates, add metadata for future analyses

In [None]:
ra_fisher <- function(a,b,c,d){
 fisher.test(matrix(c(a,b,c,d), ncol = 2))
}

In [None]:
ra_go <- 
base %>% 
 rw() %>% 
  mu( results = list({
    oo = ra_fisher(`ne_nr`, `ne_r`, `e_nr`, `e_r`)
    tibble(fisher_pval = oo$p.value, or = oo$estimate, ci_low = oo$conf.int[1], ci_high = oo$conf.int[2])
  })) %>% 
  unnest_wider(results) 

- Add p-value adjustments

In [None]:
ra_go$p_adj_BH <- p.adjust(ra_go$fisher_pval, method = "BH")
ra_go$p_adj_BY <- p.adjust(ra_go$fisher_pval, method = "BY")

In [None]:
ra_go <-
ra_go %>% 
mu(
 sig_category = case_when( 
    fisher_pval >= .05 ~ "none",
    fisher_pval < .05 ~ "raw",
    p_adj_BH < .05 ~ "bh",
    p_adj_BY < .05 ~ "by")
)

# Write home

In [None]:
fwrite(ra_go, paste0(SHARE_DIR, "1_run_fishers_exact.csv"))