In [None]:
source(paste0(dirname(dirname(getwd())),'/map.r'))
source(paste0(HELP_DIR, "shortcuts.r"))

In [None]:
library(readxl)

### 0 - Read and process collected driver data
- References

In [None]:
tcga <- read_xlsx("/mnt/petasan_immunocomp/datasets/hartwig/biomarkers/ref/pathways_tcga.xlsx")
ccg <- fread("/mnt/petasan_immunocomp/datasets/hartwig/biomarkers/ref/cancer_gene_census.csv")
hmf <- fread("/mnt/petasan_immunocomp/datasets/hartwig/biomarkers/database/drivers.csv") %>% gb(sampleId, gene) %>% mu(rk = row_number()) %>% fi(rk == 1)

In [None]:
ccg_fusions <- ccg %>% fi(grepl("fusion", `Role in Cancer`)) %>% pu(`Gene Symbol`) 

In [None]:
tcga_tsgs <- tcga %>% fi(grepl("TSG", Label)) %>% pu(Gene) 
tcga_oncs <- tcga %>% fi(grepl("OG", Label)) %>% pu(Gene)

In [None]:
ccg_tsgs <- ccg %>% fi(grepl("TSG", `Role in Cancer`)) %>% pu(`Gene Symbol`) 
ccg_oncs <- ccg %>% fi(grepl("onco", `Role in Cancer`)) %>% pu(`Gene Symbol`)

In [None]:
hmf_tsgs <- hmf %>% fi(category == "TSG") %>% pu(gene) %>% unique()
hmf_oncs <- hmf %>% fi(category == "ONCO") %>% pu(gene) %>% unique()

In [None]:
preset_tsgs <- unique(c(ccg_tsgs, hmf_tsgs, tcga_tsgs))
preset_oncs <- unique(c(ccg_oncs, hmf_oncs, tcga_oncs))
preset_drivers <- c(preset_tsgs, preset_oncs)

In [None]:
ref <- 
df( rbind(df("gene" = tcga_tsgs, "category" = "TSG", "db" = "TCGA"), 
          df("gene" = tcga_oncs, "category" = "ONC", "db" = "TCGA"), 
          df("gene" = ccg_tsgs, "category" = "TSG", "db" = "COSMIC"), 
          df("gene" = ccg_oncs, "category" = "ONC", "db" = "COSMIC"), 
          df("gene" = hmf_tsgs, "category" = "TSG", "db" = "HMF"), 
          df("gene" = hmf_oncs, "category" = "ONC", "db" = "HMF")))

In [None]:
ref <- 
ref %>% 
 gb(gene, category) %>% su(ct = n()) %>% 
 gb(gene) %>% mu(rk = row_number(desc(ct))) %>% ug() %>% fi(rk == 1) %>% se(-ct, -rk)

- Reading prepared driver files

In [None]:
reader <- function(i) fread(paste0("/mnt/petasan_immunocomp/datasets/hartwig/biomarkers/database/drivers_full/", i))

In [None]:
somatic <- reader("somatic.txt") 
amp <- reader("amp.txt")
del <- reader("del.txt")
disruption <- reader("disruption.txt")
fusion <- reader("fusion.txt")

- Somatic ready

In [None]:
somatic_ready <- 
somatic %>% 
 filter((method == "BIALLELIC" & gene %in% preset_drivers & !chromosome %in% c("X", "Y")) | 
        (method == "INFRAME" & gene %in% preset_drivers) | 
        (method %in% c("DNDS", "HOTSPOT"))) %>% 
 se(sampleId, gene, annotation, biallelic, method)

- Get disruptions

In [None]:
disrupt_ready <- 
disruption %>% 
 tm( sampleId, gene, annotation = "disrupted", biallelic = NA, method = "DISRUPTION") %>% 
 fi(gene %in% preset_tsgs)

- Fusions 

In [None]:
fusion_ready <-
rbind(
fusion %>% 
 fi(upstream %in% preset_drivers) %>% 
 tm(sampleId, gene = upstream, method = "FUSION_UP"),
fusion %>% 
 fi(upstream %in% preset_drivers) %>% 
 tm(sampleId, gene = downstream, method = "FUSION_DOWN"), 
fusion %>% gb(gene) %>% mu(ct = n()) %>% fi(ct > 30) %>% tm(sampleId, gene, method = "FUSION")
) %>% mu(annotation = "fusion", biallelic = NA)

- Copy number

In [None]:
amps_ready <- 
amp %>% 
 mu(arm = ifelse(grepl("q", chromosomeBand), "q", "p")) %>% 
 gb(gene) %>% mu(ct = n()) %>% ug() %>% 
 gb(chromosome, arm) %>% mu(rk = dense_rank(desc(ct))) %>% ug() %>% 
 fi(gene %in% preset_oncs) %>% 
 tm(sampleId, gene, annotation = "high_cn", biallelic = NA, method = "AMP")

In [None]:
dels_ready <- 
del %>% 
 fi(!chromosome %in% c("X", "Y")) %>% 
 mu(arm = ifelse(grepl("q", chromosomeBand), "q", "p")) %>% 
 gb(gene) %>% mu(ct = n()) %>% ug() %>% 
 gb(chromosome, arm) %>% mu(rk = dense_rank(desc(ct))) %>% ug() %>% 
 fi(gene %in% preset_tsgs) %>% 
 tm(sampleId, gene, annotation = "low_cn", biallelic = NA, method = "DEL")

- Together

In [None]:
full_drivers <- 
rbind(somatic_ready, amps_ready, dels_ready, disrupt_ready, fusion_ready) %>% 
 mu(method = factor(method, levels = c("HOTSPOT", "BIALLELIC", "DNDS", "INFRAME", "DEL", "AMP", "DISRUPTION", "FUSION", "FUSION_UP", "FUSION_DOWN"))) %>% 
 gb(sampleId, gene) %>% su(type = paste0(unique(method), collapse = "_")) %>% 
 ug()

In [None]:
full_drivers_superset <- 
full_drivers %>% 
 full_join(
     hmf %>% 
      fi(driverLikelihood > .95 | likelihoodMethod == "DISRUPTION") %>% 
      tm(sampleId, gene, purple_method = likelihoodMethod), by = c("sampleId", "gene")) %>%
 mu(type= ifelse( !is.na(type), type, purple_method)) %>% 
 su(sampleId, gene, type)

# 1 - Share full drivers

In [None]:
fwrite(full_drivers_superset %>% lj(ref, by = "gene"), "/mnt/petasan_immunocomp/datasets/hartwig/biomarkers/database/drivers_full/full_drivers.txt")

In [None]:
fwrite(full_drivers_superset %>% lj(ref, by = "gene") %>% gb(gene, category) %>% su(ct = n()) %>% ar(desc(ct)), 
       "/mnt/petasan_immunocomp/datasets/hartwig/biomarkers/database/drivers_full/full_drivers_list.txt")

In [None]:
full_drivers_superset %>% lj(ref, by = "gene") %>% fi(gene == "GSTP1") %>% gb(type) %>% su(ct = n())

# 2 - No drivers?

In [None]:
purity <- fread("/mnt/petasan_immunocomp/datasets/hartwig/biomarkers/database/purities.csv")
exome <- fread("/mnt/petasan_immunocomp/datasets/hartwig/biomarkers/database/somatic_exome.csv")

In [None]:
no_drivers <-
purity %>% 
 se(sampleId, purity) %>% 
 lj(full_drivers_superset %>% lj(ref, by = "gene"), by = "sampleId") %>% 
 fi(is.na(gene)) %>% 
 pu(sampleId)

In [None]:
dim(purity)
length(no_drivers)

In [None]:
exome %>% 
 fi(sampleId %in% no_drivers) %>% 
 gb( gene, chromosome, position ) %>% 
 su( ct = n() ) %>% 
 ar( desc(ct) ) %>% 
 fi( ct > 1 ) %>% 
 gb(gene) %>% 
 su(ct = n()) %>% 
 head(5)

In [None]:
fusion %>% 
 fi(sampleId %in% no_drivers) %>% 
 gb(gene) %>% 
 su(ct = n()) %>% 
 ar(desc(ct)) %>% 
 head( 5 )

In [None]:
del %>% 
 fi(sampleId %in% no_drivers) %>% 
 gb(chromosome, gene) %>% 
 su(ct = n()) %>% 
 ar(desc(ct)) %>% 
 fi(chromosome != "Y") %>% 
 head(5)

In [None]:
disruption %>% 
 fi(sampleId %in% no_drivers) %>% 
 gb(gene) %>% 
 su(ct = n()) %>% 
 ar(desc(ct))