In [1]:
source("~/wisdom/r/data_analysis_environment.R")
source("../data/env/variables.R")

options("readr.num_columns" = 0)

Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
Conflicts with tidy packages ---------------------------------------------------
filter(): dplyr, stats
lag():    dplyr, stats


In [2]:
# Supplementary File 1 
read_tsv("../data/pancancer/candidateList_full.tsv") %>%
    filter(Reliable == 1 & Origin == "Tumor") %>%
    select(-Reliable, -Origin) %>%
    rename( AS_driver = Candidate, Samples = Patients_affected, Number_samples = PatientNumber,
            Percentage_samples = Percentage) %>%
    arrange(desc(Number_samples, AS_driver)) %>%
    write_tsv("../results/supplementary_files/supplementary_file_1.tsv")

In [3]:
# Supplementary File 2
read_tsv("../results/pfam_enrichment_analysis.tsv") %>%
    separate(Feature, c("Pfam_id","Name"), "\\|") %>%
    rename(Switches_where_gained = switches_g, Switches_where_lost = switches_l, 
           p_switch_gain = p_g, p_switch_loss = p_l, adjp_switch_gain = adjp_g, 
           adjp_switch_loss = adjp_l, p_mutation = p_m, adjp_mutation = adjp_m) %>%
    select(-starts_with("fc")) %>%
    select(Pfam_id, Name,p_switch_gain:Switches_where_lost) %>%
    write_tsv("../results/supplementary_files/supplementary_file_2.tsv")

In [4]:
# Supplementary File 3
top_drivers <- read_tsv("../data/mutations/driver_mutation_number.txt") %>%
    select(Tumor,Symbol)

top_drivers <- lapply(cancerTypes, function(tumor){
    lapply(1:10, function(i){
        top_drivers %>%
            filter(Tumor == tumor) %>%
            head(n = i) %>% 
            .$Symbol %>%
            paste(collapse = ", ")
    } ) %>% set_names(as.character(1:10))
} ) %>% set_names(cancerTypes)

# pannegative
me <- lapply(1:10, function(i){
    read_tsv(paste0("../data/mutations/pannegative_mutual_exclusion.top_",i,"_drivers.txt")) %>%
        mutate(NumDrivers = i)
}) %>% 
    do.call("rbind",.) %>%
    by(., paste0(.$GeneId,.$Tumor), function(X){
        X %>% slice(which.max(ifelse(p.me < 0.05, NumDrivers, NA)))
    }) %>%
    do.call("rbind",.)  %>%
    select(Tumor,GeneId,Symbol,Normal_transcript,Tumor_transcript,p.me,NumDrivers) %>%
    mutate(ME_drivers = apply(cbind(Tumor, NumDrivers), 1, function(x){
        top_drivers[[x[1]]][x[2]]
    }) %>% unlist) %>%
    rename(p_pannegative = p.me, Number_ME_drivers = NumDrivers)

# pathways
me.specificDrivers <- read_tsv("../data/mutations/mutual_exclusion_top_drivers.txt") %>%
    filter(!is.na(Pathway)) %>%
    mutate(Tag = paste0(DriverSymbol," (",Pathway,")")) %>%
    group_by(Tumor,GeneId,Symbol,Normal_transcript,Tumor_transcript) %>%
    summarise(Same_pathway_driver = Tag[which.min(p.me)],
              p_me_pathway_driver = min(p.me))

# merge and filter
validSwitches <- read_tsv("../data/pancancer/candidateList_full.tsv") %>%
    filter(Reliable==1 & Origin=="Tumor") %>%
    select(GeneId,Symbol,Normal_transcript,Tumor_transcript) %>%
    unique
             
merge(me, me.specificDrivers, all = TRUE) %>%
    merge(validSwitches) %>%
    write_tsv("../results/supplementary_files/supplementary_file_3.tsv")

In [5]:
# Supplementary File 4
switches.split <- read_tsv("../data/pancancer/candidateList_full.tumorSplit.tsv") %>%
    mutate(Reliable = as.numeric((NotNoise == 1) & (IsModel == 1) & (EnoughRecurrence == 1) & (Origin == "Tumor"))) %>%
    filter(Reliable==1 & Origin=="Tumor") %>%
    select(Tumor,GeneId,Symbol,Normal_transcript,Tumor_transcript)

########################
#       Domains        #
########################
pfams <- read_tsv("../data/structural_analysis/interpro_analysis.tsv") %>%
    # add tumor annotation
    merge(switches.split) %>%
    select(Tumor,GeneId,Symbol,Normal_transcript,Tumor_transcript,Feature,What,normalReps,tumorReps) %>%
    rename(Normal_isoform_order = normalReps, Tumor_isoform_order = tumorReps, Observation = What) %>%
    mutate(Observation = ifelse(Observation == "Nothing", "No_change", Observation),
           Feature_type = "Pfam") %>%
    separate(Feature, c("Feature_id", "Feature_name"), "\\|")

proteome <- read_tsv("../data/mutations/proteome_information.txt") %>%
    select(Tumor,Transcript)

# PPI
ppi.file <- "../data/eporta/raw_tables/Switched_interactions_consensus.txt"

## get max number of columns (necessary for reading)
no_col <- max(count.fields(ppi.file,sep = "\t"))
no_col.ppi <- (no_col-6)/2
ppi.cols <- paste(c("Origin","Interaction"), floor(seq(1,no_col.ppi,0.5)), sep="_")

## read table
ppi.split <- read.table(ppi.file,header=F,fill=T,col.names=1:no_col) %>%
    set_colnames(c("GeneId","Symbol","Normal_transcript","Tumor_transcript",
                   "GeneId_partner","Symbol_partner",ppi.cols)) %>%
    # all Origin columns contail "DDI_match", so we can disregard them
    select(-starts_with("Origin_")) %>%
    # convert from wide to long table format
    reshape2::melt(id.vars = c("GeneId","Symbol","Normal_transcript","Tumor_transcript",
                               "GeneId_partner","Symbol_partner"), value.name = "Interaction") %>%
    select(-variable) %>%
    # remove cases with no interaction described
    filter(Interaction != "") %>%
    # split interaction information
    separate(Interaction, into = c("Effect_on_interaction","Transcript_partner","Domains"), sep = "-") %>%
    mutate(Domains = strsplit(Domains, "_")) %>%
    unnest(Domains) %>%
    separate(Domains, into = c("Feature_id","Pfam_id_partner"), sep = "/") %>%
    mutate(Effect_on_interaction = plyr::revalue(Effect_on_interaction, 
                                                 replace = c("Gained"="Gain", "Lost"="Loss", "Kept"="Unaffected"))) %>%
    merge(switches.split) %>%
    merge(proteome, by.x = c("Tumor","Transcript_partner"), by.y = c("Tumor","Transcript"))

pfams <- merge(pfams, ppi.split, all.x = TRUE) %>%
    select(Tumor:Tumor_transcript,Feature_type,Feature_id:Tumor_isoform_order,GeneId_partner,
           Symbol_partner,Transcript_partner,Pfam_id_partner,Effect_on_interaction) %>%
    # remove cases where a domain is mapped but is not in the isoform-specific region
    mutate(GeneId_partner = ifelse(Effect_on_interaction %in% c("Gain","Loss") & Observation == "No_change", 
                                   NA, GeneId_partner),
           Symbol_partner = ifelse(Effect_on_interaction %in% c("Gain","Loss") & Observation == "No_change", 
                                   NA, as.character(Symbol_partner)),
           Transcript_partner = ifelse(Effect_on_interaction %in% c("Gain","Loss") & Observation == "No_change", 
                                       NA, Transcript_partner),
           Pfam_id_partner = ifelse(Effect_on_interaction %in% c("Gain","Loss") & Observation == "No_change", 
                                    NA, Pfam_id_partner),
           Effect_on_interaction = ifelse(Effect_on_interaction %in% c("Gain","Loss") & Observation == "No_change", 
                                          NA, Effect_on_interaction)) %>%
    unique

########################
#       ProSite        #
########################
prosites <- read_tsv("../data/structural_analysis/prosite_analysis.tsv") %>%
    # add tumor annotation
    merge(switches.split) %>%
    select(Tumor,GeneId,Symbol,Normal_transcript,Tumor_transcript,Feature,What,normalReps,tumorReps) %>%
    rename(Normal_isoform_order = normalReps, Tumor_isoform_order = tumorReps, Observation = What) %>%
    mutate(Observation = ifelse(Observation == "Nothing", "No_change", Observation),
           Feature_type = "ProSite") %>%
    separate(Feature, c("Feature_id", "Feature_name"), "\\|") %>%
    select(Tumor,GeneId:Tumor_transcript,Feature_type,Feature_id:Tumor_isoform_order) %>%
    mutate(GeneId_partner = NA, Symbol_partner = NA, Transcript_partner = NA,
           Pfam_id_partner = NA, Effect_on_interaction = NA)

########################
#        IUPRED        #
########################
iupreds <- read_tsv("../data/structural_analysis/iupred_analysis.tsv") %>%
    filter(Significant == 1) %>%
    # add tumor annotation
    merge(switches.split) %>%
    select(Tumor,GeneId,Symbol,Normal_transcript,Tumor_transcript,What,StartPos,EndPos) %>%
    rename(Observation = What) %>%
    mutate(Feature_type = "IUPRED",
           Feature_id = NA,
           Feature_name = paste(StartPos,EndPos, sep = "_"),
           Observation = ifelse(Observation == "Nothing", "No_change", Observation),
           Normal_isoform_order = NA, Tumor_isoform_order = NA) %>%
    select(Tumor:Tumor_transcript,Feature_type,Feature_id,Feature_name,
           Observation,Normal_isoform_order,Tumor_isoform_order)  %>%
    mutate(GeneId_partner = NA, Symbol_partner = NA, Transcript_partner = NA,
           Pfam_id_partner = NA, Effect_on_interaction = NA)

########################
#        IUPRED        #
########################
anchors <- read_tsv("../data/structural_analysis/anchor_analysis.tsv") %>%
    filter(Significant == 1) %>%
    # add tumor annotation
    merge(switches.split) %>%
    select(Tumor,GeneId,Symbol,Normal_transcript,Tumor_transcript,What,StartPos,EndPos) %>%
    rename(Observation = What) %>%
    mutate(Feature_type = "ANCHOR",
           Feature_id = NA,
           Feature_name = paste(StartPos,EndPos, sep = "_"),
           Observation = ifelse(Observation == "Nothing", "No_change", Observation),
           Normal_isoform_order = NA, Tumor_isoform_order = NA) %>%
    select(Tumor:Tumor_transcript,Feature_type,Feature_id,Feature_name,
           Observation,Normal_isoform_order,Tumor_isoform_order)  %>%
    mutate(GeneId_partner = NA, Symbol_partner = NA, Transcript_partner = NA,
           Pfam_id_partner = NA, Effect_on_interaction = NA)

rbind(pfams,prosites) %>%
    rbind(iupreds) %>%
    rbind(anchors) %>%
    arrange(Tumor) %>%
    write_tsv("../results/supplementary_files/supplementary_file_4.tsv")

“attributes are not identical across measure variables; they will be dropped”

In [6]:
# Supplementary File 5
canonical_patways <- read_tsv("../results/networks/canonical_patways_test.tsv") %>%
    mutate(Geneset_type = "Pathway")
complexes <- read_tsv("../results/networks/complexes_test.tsv") %>%
    mutate(Geneset_type = "Complex")
mrna <- read_tsv("../results/networks/mrna_test.tsv") %>%
    mutate(Geneset_type = "mRNA_regulation")

rbind(canonical_patways, complexes) %>%
    rbind(mrna) %>%
    select(Geneset_type, geneset:p, adjp, OR, eOR, switched) %>%
    rename(Geneset = geneset, Number_drivers = NumDrivers, Switched_genes = switched) %>%
    mutate(Geneset = gsub(" ", "_", Geneset)) %>%
    write_tsv("../results/supplementary_files/supplementary_file_5.tsv")

In [7]:
# Supplementary File 6
modules <- read_tsv("../results/networks/annotated_modules.tsv") %>%
    mutate(Geneset = gsub(" ", "_", Geneset)) %>%
    write_tsv("../results/supplementary_files/supplementary_file_6.tsv")