In [None]:
options(dplyr.summarise.inform = FALSE)
library(tidyverse)
library(data.table)
library(survival)
library(gridExtra)

In [None]:
source(paste0(dirname(dirname(dirname(getwd()))),'/map.r'))
source(paste0(dirname(dirname(dirname(getwd()))),'/stats.r'))

#### 0 - Run analyses

In [None]:
system.time(go <- readRDS(paste0(SHARE_DIR, "ready_ex.Rds")))

- Extract data

In [None]:
df <- go$data_ready
features <- go$features
top_zscores <- unique(fread(paste0(UTIL_DIR, "zscores_tmp2.csv")) %>% pull(y))#, "rna_geneset_gene_set_prolif")

- Set cohorts 

In [None]:
df_go <- 
df %>% 
  mutate( location = ifelse(grepl("Unknown", clin_primaryTumorLocation2), "CUP", clin_primaryTumorLocation2))

In [None]:
cohorts <- df_go %>% group_by(location) %>% summarise(ct = n()) %>% filter(ct > 50) %>% pull(location)

In [None]:
cohort_dfs <- list()
for( i in cohorts ){
  cohort_dfs[[i]] <- df_go %>% filter( location == i ) 
}

#### 1 - Go for a run

- Survival, test everything

In [None]:
results <- data.frame()
system.time(
for( j in names(cohort_dfs)){
  df <- cohort_dfs[[j]]
  for( i in c(top_zscores)){
      tmp_results <- get_stats2( y = "Surv(Y_os_days, Y_os_event)", 
                      x = i, 
                      covariate = " + clin_age + clin_sex", 
                      data = "df", 
                      model = "coxph")
    
    if( is.data.frame(tmp_results)){ 
        results <- rbind(results, tmp_results %>% mutate(cohort = j))
    }
}})

In [None]:
scanner <- 
function( y = "Surv(Y_os_days, Y_os_event)", features, covariates, df = "df", mod = "coxph"){
    oo <- data.frame()
    for( f in features ) {
      tmp <- get_stats2( y = y, x = f, covariate = covariates, data = df, model = mod )
      if( is.data.frame(tmp)) oo <- rbind( oo, tmp)
    }
    oo
}

In [None]:
scanner( "clin_purity", top_zscores, " + clin_age + clin_sex", "df", "lm")

In [None]:
x

In [None]:
options(repr.plot.width = 16)

In [None]:
ggplot(results, aes(x = est, y = reorder(cohort, est))) +
  geom_point() +
  geom_errorbarh(aes(xmin = est - 2*se, xmax = est + 2*se), height = 0.2) +
  labs(x = "Log Hazard", y = "Study", title = "Forest Plot") +
  theme_minimal() +
  facet_wrap(~x, ncol = 6) + 
  geom_vline(xintercept = 0, linetype = "dashed")


#### 2 - Relations with top Z-scores

- Linear model with proliferation

In [None]:
lms <- data.frame()

for(j in top_zscores){
  print(j); flush.console()
  for( i in features){
    for( k in names(cohort_dfs)){
      df <- cohort_dfs[[k]]

      tmp_lms <- get_stats2( y = j, 
                 x = i, 
                 covariate = "+ clin_age + clin_sex", 
                 data = "df", 
                 model = "lm")
    
    if( is.data.frame(tmp_lms)){ 
        lms <- rbind(lms, tmp_lms %>% mutate(cohort = j))
    }  
}}}

In [None]:
lms <- lms %>% mutate(pval_by = p.adjust(pval, method = "BY")) 

In [None]:
fwrite(lms, paste0(UTIL_DIR, "zscores_tmp2.csv"))