In [None]:
options(dplyr.summarise.inform = FALSE)
library(tidyverse)
library(data.table)
library(survival)
library(gridExtra)
library(forcats)

In [None]:
source(paste0(dirname(dirname(getwd())),'/map.r'))
source(paste0(dirname(dirname(getwd())),'/stats.r'))

In [None]:
##list.files(SHARE_DIR)

In [None]:
## readRDS(paste0(SHARE_DIR, 'test.Rds'))

#### 0 - Run analyses

In [None]:
go <- readRDS(paste0(SHARE_DIR, "ready_ex.Rds"))

In [None]:
df <- go$data_ready
features <- go$features

#### 1 - Go for a run

- Survival

In [None]:
feature_go <- c("rna_geneset_gene_set_prolif")

In [None]:
tissues <- 
df %>% 
  drop_na(Y_bor) %>%
  filter( !clin_primaryTumorLocation2 %in% c('Unknown primary (e.g. CUP)', "Other")) %>% 
  group_by(clin_primaryTumorLocation2) %>% 
  summarise(ct = n()) %>% 
  filter(ct > 20) %>% 
  pull(clin_primaryTumorLocation2) 

In [None]:
results <- data.frame()
system.time(
for( i in feature_go){
  for( j in tissues ) {
    df_tissue <- df %>% filter(clin_primaryTumorLocation2 == j)
    results <- rbind(results, 
                     get_stats2( y = "Surv(Y_os_days, Y_os_event)", 
                                 x = i, 
                                 covariate = " + clin_age + clin_sex + Y_bor", 
                                 data = "df_tissue", 
                                 model = "coxph") %>% mutate(tissue = j ))
}})

In [None]:
fp <-
results %>% 
  mutate(tissue = fct_reorder(tissue, est)) %>%
  mutate( ci_high = est + 2*se, ci_low = est - 2*se) %>%
  ggplot( aes(x = est, xmin  = ci_low, xmax = ci_high, y = tissue)) +
  geom_pointrange() + 
  theme_bw() + 
  ggtitle("OS vs Proliferation: Cox-Ph hazard estimates") + 
  xlab("Hazard")

In [None]:
options(repr.plot.width = 5)
fp

- Linear model with proliferation

In [None]:
results <- data.frame()
system.time(
for( i in feature_go){
  for( j in tissues ) {
    df_tissue <- df %>% filter(clin_primaryTumorLocation2 == j)
    results <- rbind(results, 
                     get_stats2( y = "Y_bor", 
                                 x = i, 
                                 covariate = " + clin_age + clin_sex", 
                                 data = "df_tissue", 
                                 model = "bor") %>% mutate(tissue = j ))
}})

In [None]:
response <- results |> mutate(pval_by = p.adjust(pval, method = "BY")) 

In [None]:
fp_response <-
response %>% 
  mutate(tissue = fct_reorder(tissue, est)) %>%
  mutate( ci_high = est + 2*se, ci_low = est - 2*se) %>%
  ggplot( aes(x = est, xmin  = ci_low, xmax = ci_high, y = tissue)) +
  geom_pointrange() + 
  theme_bw()

In [None]:
fp_response

#### 2 - Figure examples

In [None]:
a <- ggplot( results, aes( x = est, y = -log10(pval_by), color = type)) + 
  geom_point() + 
  theme_classic() + 
  xlab("Log Hazard") + 
  ylab("-Log10 (BY Adjusted p-value)") + 
  ggtitle("Cox-ph: OS vs features (tissue, age, sex adjusted)") + 
  geom_hline(yintercept = -log10(.01)) + 
  facet_wrap(~type, ncol = 4) + 
  theme(legend.position = "none")

In [None]:
b <- ggplot( lms, aes( x = est, y = -log10(pval_by), color = type)) + 
  geom_point() + 
  theme_classic() + 
  xlab("Beta estimate") + 
  ylab("-Log10 (BY Adjusted p-value)") + 
  ggtitle("LM: Malignant_MP1 vs features (covariate adjusted)") + 
  geom_hline(yintercept = -log10(.01)) + 
  facet_wrap(~type, scales = "free", ncol = 4) + 
  theme(legend.position = "none")

In [None]:
#### 

In [None]:
options(repr.plot.width = 10, repr.plot.height = 4)
grid.arrange(a,b, ncol = 2)

#### Add Breakdown by Type

In [None]:
df <- df %>% mutate( drivers_TP53_RB1 = (driver_TP53 > 0) + (driver_RB1 > 0))

In [None]:
a <- 
ggplot(df %>% drop_na(rna_mp_Malignant_MP1..Cell.Cycle...G2.M, drivers_TP53_RB1) %>% filter(clin_primaryTumorLocation2 != "Other"),
       aes( x =  as.factor(clin_primaryTumorLocation2), 
            y = rna_mp_Malignant_MP1..Cell.Cycle...G2.M,
            fill = as.factor(drivers_TP53_RB1))) + 
  geom_boxplot() + 
  theme_classic() + 
  ylab("Malignant MP1 Cell Cycle") + 
  ggtitle("Malignant MP1 Cell Cycle vs Number of Drivers TP53 + RB1") + 
  theme(legend.position = "bottom")

In [None]:
b <- 
ggplot(df %>% drop_na(rna_mp_Malignant_MP1..Cell.Cycle...G2.M, drivers_TP53_RB1) %>% filter(clin_primaryTumorLocation2 != "Other"),
       aes( x =  as.factor(clin_primaryTumorLocation2), 
            y = rna_geneset_gene_set_prolif,
            fill = as.factor(drivers_TP53_RB1))) + 
  geom_boxplot() + 
  theme_classic() + 
  ylab("RNA Proliferation Gene Set") + 
  ggtitle("RNA Proliferation Gene Set vs Number of Drivers TP53 + RB1") + 
  theme(legend.position = "bottom")

In [None]:
c <- 
ggplot(df %>% drop_na(rna_mp_Malignant_MP1..Cell.Cycle...G2.M, drivers_TP53_RB1) %>% filter(clin_primaryTumorLocation2 != "Other"),
       aes( x =  rna_geneset_gene_set_prolif, 
            y = rna_mp_Malignant_MP1..Cell.Cycle...G2.M,
            color = as.factor(clin_primaryTumorLocation2))) + 
  geom_point() + 
  theme_classic() + 
  ylab("Malignant MP1 Cell Cycle") + 
  xlab("RNA Proliferation Gene Set") + 
  ggtitle("Malignant MP1 Cell Cycle vs RNA Proliferation Gene Set") + 
  theme(legend.position = "bottom")

In [None]:
d <- 
ggplot(df %>% drop_na(rna_mp_Malignant_MP1..Cell.Cycle...G2.M, drivers_TP53_RB1) %>% filter(clin_primaryTumorLocation2 != "Other"),
       aes( x =  purity, 
            y = rna_mp_Malignant_MP1..Cell.Cycle...G2.M,
            color = as.factor(clin_primaryTumorLocation2))) + 
  geom_point() + 
  theme_classic() + 
  ylab("Malignant MP1 Cell Cycle") + 
  xlab("Scaled Purity") + 
  ggtitle("Malignant MP1 Cell Cycle vs Scaled Purity") + 
  theme(legend.position = "bottom")

In [None]:
options(repr.plot.width = 12, repr.plot.height = 10)
grid.arrange(a,b, c, d, ncol = 2)