In [5]:
suppressMessages({
        library(tidyverse)
})

In [21]:
## Reading the data
suppressMessages(suppressWarnings({
    apc <- read_tsv("../results/biostatistics/joined_tables/APC.tsv")
    braf <- read_tsv("../results/biostatistics/joined_tables/BRAF.tsv")
    kras <- read_tsv("../results/biostatistics/joined_tables/KRAS.tsv")
    pik3ca <- read_tsv("../results/biostatistics/joined_tables/PIK3CA.tsv")
}))


In [34]:
## Processing to get the values that we want

df_apc <- apc %>%
    select("sample","symbol") %>% 
    separate_wider_delim(sample, delim="_", names=c("sample","chr")) %>% 
    select(-"chr") 

df_braf <- braf %>%
    select("sample","symbol") %>% 
    separate_wider_delim(sample, delim="_", names=c("sample","chr")) %>% 
    select(-"chr") 

df_kras <- kras %>%
    select("sample","symbol") %>% 
    separate_wider_delim(sample, delim="_", names=c("sample","chr")) %>% 
    select(-"chr") 

df_pik3ca <- pik3ca %>%
    select("sample","symbol") %>% 
    separate_wider_delim(sample, delim="_", names=c("sample","chr")) %>% 
    select(-"chr") 


In [49]:
## Joining the data together
suppressMessages(suppressWarnings({
    df_genes <- rbind(df_apc,df_braf,df_kras,df_pik3ca) %>%
        group_by(sample,symbol) %>%
        summarise(n=n())
    
    df_genes_processed <- expand.grid(sample=as.character(unique(as.character(df_genes$sample))),
                                      symbol=as.character(unique(as.character(df_genes$symbol)))) %>% 
    left_join(
        df_genes, by=c("sample","symbol")
        ) %>% 
    mutate(n=ifelse(is.na(n),0,n)) 
}))

In [69]:
## Plotting the data
final_plot <- df_genes_processed %>%
    mutate(
        sample=factor(sample,
                      levels=sort(unique(df_genes$sample))),
        symbol=factor(symbol,
                      levels=c("PIK3CA","APC","BRAF","KRAS"))
    ) %>%
    ggplot(aes(sample, n, fill=symbol)) +
    geom_bar(stat="identity", color="black", 
             position=position_dodge()) +
    scale_fill_manual(values=c("white","gray","black","orange")) +
    scale_y_continuous(expand=expansion(0),
                       limits=c(0,100),
                       breaks=seq(0,100,25)) +
    labs(
        title = "Distribution of the number of genes per samples",
        x = "Samples",
        y = "Number of variants"
    ) +
    theme_classic() +
    theme(
        plot.title=element_text(hjust=.5,size=16,face="bold"),
        axis.title=element_text(face="bold", size=14),
        axis.text=element_text(color="black", size=10),
        legend.position="top"
    )

ggsave(file="../results/biostatistics/plots/final_plot.png",
       plot=final_plot,
       heigh=5,
       width=10)


In [42]:
?ggsave()