In [None]:
source(paste0(dirname(dirname(getwd())),'/map.r'))
source(paste0(HELP_DIR, "shortcuts.r"))

# 0 - Read somatic exome data

In [None]:
somatic_exome <- 
fread("/mnt/petasan_immunocomp/datasets/hartwig/biomarkers/database/somatic_exome.csv") %>% 
 gb(chromosome, position, gene, tier) %>% 
 mu(ct = n(), hotspot = (tier == "HOTSPOT" | ct > 30)) %>% 
 ug()

In [None]:
annotater <- function(i){
 if(grepl("stop", i)){ "nonsense" }
 else if(grepl("missense", i)){ "missense" }       
 else if(grepl("inframe", i)){ "indel" }
 else if(grepl("frameshift", i)){ "indel" }    
 else if(grepl("start_lost", i)){ "nonsense" }
 else {"synonymous"}
}

In [None]:
somatic_exome <- 
somatic_exome %>% 
 rw() %>% mu( type = annotater(annotation)) %>% ug() %>%
 fi(type != "synonymous")

# 1 - Compute hotspots, biallelic, inframe

- Hotspots

In [None]:
hotspots <- 
somatic_exome %>% 
 fi(hotspot) %>% 
 tm(sampleId, chromosome, gene, transcript, annotation, biallelic, method = "HOTSPOT")

- Biallelic

In [None]:
biallelic <- 
somatic_exome %>% 
 fi(!hotspot, biallelic) %>% 
 tm(sampleId, chromosome, gene, transcript, annotation, biallelic, method = "BIALLELIC")

- Inframe

In [None]:
inframe <- 
somatic_exome %>% 
 fi(!hotspot, !biallelic, type == "indel") %>% 
 tm(sampleId, chromosome, gene, transcript, annotation, biallelic, method = "INFRAME")

- Somatic drivers 

In [None]:
#som_drivers <- rbind(hotspots, biallelic, inframe) %>% lj(cn_gene %>% se(gene, chromosomeBand), by = "gene")

# 2 - Now DNDS! 

- Read in DNDS output, create referencd for labelling DNDS drivers

In [None]:
sel_cvs <- fread("/mnt/petasan_immunocomp/datasets/hartwig/biomarkers/database/dnds/sel_cvs.csv")

In [None]:
w_thresh <- 2
q_thresh <- .05

In [None]:
gper <- function(i){
 if(grepl("non", i)){ "nonsense" }
 else if(grepl("mis", i)){ "missense" }       
 else { "indel" }
}

In [None]:
dnds_drivers_ref <- 
sel_cvs %>% 
 fi(grepl("Pan-Cancer", cohort), 
    (qtrunc_cv < q_thresh | qmis_cv < q_thresh | qind_cv < q_thresh),
    (wmis_cv > w_thresh | wnon_cv > w_thresh | wind_cv > w_thresh )) %>% 
 mu(tot = n_syn	+ n_mis	+ n_non + n_spl	+ n_ind) %>% 
 se(gene_name, tot, wmis_cv, wnon_cv, wind_cv, qmis_cv, qtrunc_cv, qind_cv) %>% 
 ga( w, wval, -gene_name, -tot, -qmis_cv, -qtrunc_cv, -qind_cv) %>% 
 ga( q, qval, -gene_name, -tot, -w, -wval) %>% 
 fi(wval > w_thresh, qval < q_thresh, 
   (grepl("mis", w) & grepl("mis", q)) | (grepl("ind", w) & grepl("ind", q)) | (grepl("non", w) & grepl("trunc", q))) %>%
 rw() %>% mu(type = gper(w)) %>% ug() %>% 
 tm(gene = gene_name, type)

In [None]:
sel_cvs %>% fi(gene_name == "CSMD1") %>% fi(cohort == "Pan-Cancer")

- Annotate variant types in somatic exome

In [None]:
dnds <- 
somatic_exome %>% 
 fi( gene %in% unique(dnds_drivers_ref$gene)) %>% 
 se( sampleId, chromosome, gene, transcript, type, biallelic ) %>% 
 ij( dnds_drivers_ref, by = c("gene", "type")) %>% 
 mu( method = "DNDS" ) %>% 
 rename(annotation = type)

# 2 - Together

In [None]:
somatic_drivers <- rbind(dnds, hotspots, biallelic, inframe) 

In [None]:
fwrite(somatic_drivers, "/mnt/petasan_immunocomp/datasets/hartwig/biomarkers/database/drivers_full/somatic.txt")