In [None]:
options(dplyr.summarise.inform = FALSE)
library(tidyverse)
library(data.table)
library(survival)

In [None]:
source(paste0(dirname(dirname(dirname(getwd()))),'/map.r'))
source(paste0(dirname(dirname(dirname(getwd()))),'/stats.r'))

#### 0 - Read in scaled and prepared data

In [None]:
system.time(go <- readRDS(paste0(SHARE_DIR, "ready_ex.Rds")))

In [None]:
all <- go$data_ready #%>% mutate(Y_cup = ifelse(grepl("Unknown", location), 1, 0))
features <- go$features

- Add cups

In [None]:
base <- fread("/mnt/petasan_immunocomp/projects/CUPs/cohorts_definition.csv")

In [None]:
ready <- 
all %>% 
  inner_join(base, by = "sampleId") %>%
  mutate(Y_cup = ifelse(is_CUP == "CUP", 1, 0))
  #mutate(Y_cup = ifelse(location == "CUP", 1, 0))

#### 1 - Define Cohorts

In [None]:
cohorts <- list()
cohorts[['pan']] <- ready
for( i in c("Lung: Non-small cell: LUAD", "Bile duct/Gallbladder", "Colorectum/Small intestine/Appendix")){
    cohorts[[i]] <- ready %>% filter(cancer_type == i)
}

#### 2 - Run Survival and BOR Analyses across cohorts

- Best response

In [None]:
bor_out <- data.frame()
for( c in names(cohorts)){
  print(c); flush.console()
  df <- cohorts[[c]]
  if( c == "pan"){ 
      bor <- scanner("Y_cup", features, "+ as.factor(cancer_type) + clin_age + clin_sex", "df", "bor")
      #bor <- scanner("Y_cup", features, "", "df", "bor")
  } else { 
      bor <- scanner("Y_cup", features, "+ clin_age + clin_sex", "df", "bor")
  }
  bor_out <- rbind(bor_out, bor %>% mutate(cohort = c ))
}

#### 3 - Share the output 

In [None]:
share <- 
bor_out %>% 
  select(-data) %>% 
  relocate(cohort) %>% 
  mutate(pval_by = p.adjust(pval, method = "BY")) 

In [None]:
fwrite(share, paste0(UTIL_DIR, "biomarker_cup_ex.csv"))