### To add
- Tumor stage (svTMB)
- Only focus on amplification and deletion drivers

In [None]:
HELPER_DIR <- paste0(getwd(),'/helpers/')

In [None]:
source(paste0(HELPER_DIR,'map.r'))
source(paste0(HELPER_DIR,'shortcuts.r'))
source(paste0(HELPER_DIR,'helpers.r'))

# Read ready biomarkers data

In [None]:
base <- fread(paste0(SHARE_DIR, "biomarkers_base.csv"))

In [None]:
ready <- 
base %>% 
 se( sampleId, 
     cohort, 
     biopsyStructure, 
     contains("driver"), 
     contains("purity"), 
     contains("teal_")) %>% 
 se(where(~n_distinct(.) > 1)) %>% 
 se(where(~ !all(. %in% c(0, NA)))) %>% 
 mu(biopsy = ifelse(biopsyStructure %in% c("Liver", "Lymph node", "Bone", "Lung"), biopsyStructure, "Other")) %>%  
 mu(across(where(is.numeric), ~ replace_na(., median(., na.rm = TRUE)))) %>% 
 mu(sv_group = cut(purity_svTMB, breaks = 3, labels = c("Low", "Medium", "High")))

In [None]:
ready <- 
rbind(ready,
      ready %>% fi(sv_group == "Low") %>% mu(cohort = "Pan-Cancer: SV Burden Low"),
      ready %>% fi(sv_group == "Medium") %>% mu(cohort = "Pan-Cancer: SV Burden Med"),
      ready %>% fi(sv_group == "High") %>% mu(cohort = "Pan-Cancer: SV Burden High"))

In [None]:
non_epithelial <- 
ready %>%
 fi(grepl("Soft tissue", cohort) | cohort == "Glioblastoma" | grepl("NET", cohort) | grepl("Melanoma", cohort)) %>%
 mu(cohort = "Non-Epithelial")

In [None]:
epithelial <- 
ready %>%
 fi(!(grepl("Soft tissue", cohort) | cohort == "Glioblastoma" | grepl("NET", cohort) | grepl("Melanoma", cohort))) %>%
 mu(cohort = "Epithelial")

In [None]:
ready <- rbind(ready, non_epithelial, epithelial)

In [None]:
fwrite(ready, "tmp.csv")

# Run

- Define outcomes, features, cohorts

In [None]:
telomeres <- names(ready %>% se(contains("teal")))
features <- names(ready %>% se(-sampleId, -contains("teal"), -cohort, -biopsyStructure, -biopsy, -cohort, -sv_group))
cohorts <- c(ready %>% gb(cohort) %>% su(ct = n()) %>% fi(ct > 30) %>% ar(desc(ct)) %>% pu(cohort), "Pan-Cancer")
covariates <- c("", 
                " + purity_ploidy",
                "+ as.factor(biopsy)", 
                "+ as.factor(biopsy) + purity_ploidy", 
                "+ as.factor(biopsy) + purity", 
                "+ as.factor(biopsy) + purity + purity_ploidy",
                "+ as.factor(cohort) + as.factor(biopsy) + purity",
                "+ as.factor(cohort) + as.factor(biopsy) + purity + purity_ploidy"
               )

- Scale features for common comparison

In [None]:
go <- ready %>% mu(across(any_of(features), scale))

- Let's go

In [None]:
results <- data.frame()
#for(i in c(telomeres, "purity_svTMB")){
for(i in c("teal_final_ratio", "purity_svTMB")){
 print(i); flush.console()     
 for( j in cohorts ) {
  print(j); flush.console()
  if(j == "Pan-Cancer"){ run <- go }
  else { run <- go %>% fi(cohort == j)}
  for( k in covariates){
    print(k); flush.console()   
    oo <- scanner(y = i, features, covariates = k, df = "run", mod = "lm")
 results <- rbind(results, oo %>% mu(cohort = j))
}}}

# Send them!

In [None]:
fwrite( results, paste0("data/0_run.csv"))