In [None]:
library(tidyverse)
library(bedtoolsr)

In [None]:
remap_chr_names <- function(df){
    df_out <- df %>% 
    mutate(Chr = case_when(Chr == 'Chr01_Occ' ~ 1,
           Chr == 'Chr01_Pall' ~ 2,
           Chr == 'Chr02_Occ' ~ 3,
           Chr == 'Chr02_Pall' ~ 4,
           Chr == 'Chr03_Occ' ~ 5,
           Chr == 'Chr03_Pall' ~ 6,
           Chr == 'Chr04_Occ' ~ 7,
           Chr == 'Chr04_Pall' ~ 8,
           Chr == 'Chr05_Occ' ~ 9,
           Chr == 'Chr05_Pall' ~ 10,
           Chr == 'Chr06_Occ' ~ 11,
           Chr == 'Chr06_Pall' ~ 12,
           Chr == 'Chr07_Occ' ~ 13,
           Chr == 'Chr07_Pall' ~ 14,
           Chr == 'Chr08_Occ' ~ 15,
           Chr == 'Chr08_Pall' ~ 16))
    return(df_out)
}

In [None]:
# ggplot theme with transparent backgrounds
transp_theme <- theme(panel.background = element_rect(fill = "transparent", colour = NA_character_), 
                      panel.grid.major = element_blank(),
                      panel.grid.minor = element_blank(),
                      plot.background = element_rect(fill = "transparent", colour = NA_character_),
                      legend.background = element_rect(fill = "transparent"),
                      legend.box.background = element_rect(fill = "transparent"),
                      legend.key = element_rect(fill = "transparent"))

In [None]:
mapq <- read_delim(snakemake@input[["mapq"]], delim="\t", col_names = c("Chr", "start", "end", "mapq")) %>% 
    remap_chr_names() %>% 
    mutate(winCenter = start + ((end - start) / 2)) %>% 
    mutate(mapq = as.numeric(ifelse(mapq == ".", NA, mapq))) %>% 
    filter(!is.na(Chr))

In [None]:
nrow(mapq)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 6)
data_cum <- mapq %>% 
    group_by(Chr) %>% 
    summarise(max_winCenter = max(winCenter)) %>% 
    mutate(winCenter_add = lag(cumsum(max_winCenter), default = 0)) %>% 
    dplyr::select(Chr, winCenter_add)

mapq_mod <- mapq %>%
    inner_join(data_cum, by = "Chr") %>% 
    mutate(winCenter_cum = winCenter + winCenter_add) 

axis_set <- mapq_mod %>% 
  group_by(Chr) %>% 
  summarize(center = mean(winCenter_cum))

mapq_mod <- mapq_mod %>%
        mutate(chrom_cat = case_when(Chr == 1 ~ 'One',
                                     Chr == 2 ~ 'Two',
                                     Chr == 3 ~ 'One',
                                     Chr == 4 ~ 'Two',
                                     Chr == 5 ~ 'One',
                                     Chr == 6 ~ 'Two',
                                     Chr == 7 ~ 'One',
                                     Chr == 8 ~ 'Two',
                                     Chr == 9 ~ 'One',
                                     Chr == 10 ~ 'Two',
                                     Chr == 11 ~ 'One',
                                     Chr == 12 ~ 'Two',
                                     Chr == 13 ~ 'One',
                                     Chr == 14 ~ 'Two',
                                     Chr == 15 ~ 'One',
                                     Chr == 16 ~ 'Two')) %>% 
    sample_frac(0.1)

In [None]:
mapq_manhat <- mapq_mod %>% 
    ggplot(., aes(x = winCenter_cum, y = mapq)) +
        geom_point(data = mapq_mod, shape = 21, alpha = 0.5, size = 0.25, 
                   aes(fill = chrom_cat, color = chrom_cat)) +
        geom_smooth(aes(group = Chr), method = "loess", color = "red", span = 0.1) +
        scale_x_continuous(label = axis_set$Chr, breaks = axis_set$center) +
        scale_y_continuous(expand = c(0,0), breaks = seq(0, 60, 10)) +
        coord_cartesian(ylim = c(0, 60)) +
        scale_fill_manual(values = c("black", "grey40")) + 
        scale_color_manual(values = c("black", "grey40")) + 
        ylab("Phred-scaled MapQ") + xlab('Chromosomes') +
        theme_classic() +
        theme(
            legend.position = "none",
            panel.border = element_blank(),
            panel.grid.major.x = element_blank(),
            panel.grid.minor.x = element_blank(),
            axis.text = element_text(size=16),
            axis.title = element_text(size=20)) +
        transp_theme

mapq_manhat
ggsave(filename = snakemake@output[["mapq_manhat"]], plot = mapq_manhat, 
       height = 6, width = 20, device = "pdf", dpi = 600, units = "in")

In [None]:
# Load GFF
gff <- ape::read.gff(snakemake@input[['gff']], GFF3 = TRUE) %>% 
    dplyr::select(seqid, start, end, everything())

# Get gene names dataframe
genes <- gff %>% 
    filter(type == 'gene') %>% 
    mutate(gene = str_extract(attributes, pattern = '(?<=gene=)\\w+(?=;)'),
           gene_id = str_extract(attributes, pattern = '(?<=ID\\=)ACLI19_g\\d+(?=;)')) %>% 
    dplyr::select(seqid, start, end, gene_id, gene) %>% 
    rename('Chr' = 'seqid') %>% 
    remap_chr_names()

In [None]:
gene_mapq <- bt.intersect(genes, mapq, wa = T, wb = T) %>% 
    dplyr::select(V1, V2, V3, V4, V9)
names(gene_mapq) <- c("Chr", "start", "end", "gene_id", "mapq")

In [None]:
mean_gene_mapq <- gene_mapq %>% 
    group_by(Chr, start, end, gene_id) %>% 
    summarise(mean_mapq = mean(mapq)) %>% 
    mutate(sg = ifelse(Chr %in% c(1,3,5,7,9,11,13,15), "Occ", "Pall"))

In [None]:
gene_mapq_hist <- ggplot(mean_gene_mapq, aes(x = mean_mapq, color = sg, fill = sg)) +
    # geom_bar(position = "dodge") +
    geom_histogram(binwidth = 2, position = position_dodge()) + 
    ylab("Number of genes") + xlab("Mean phred-scaled MapQ") +
    scale_fill_manual(values = c("#2a9d8f", "#f4a261")) +
    scale_color_manual(values = c("#2a9d8f", "#f4a261")) +
    # scale_x_binned(breaks = seq(0, 60, by = 2)) +
    scale_x_continuous(breaks = seq(0, 60, 10)) +
    geom_vline(xintercept = 30, color = "black", linetype = "dashed") +
    theme_classic() +
    theme(axis.text = element_text(size = 15),
          axis.title = element_text(size = 17),
          legend.text = element_text(size = 13),
          legend.title = element_text(size = 15)) +
    transp_theme
gene_mapq_hist

ggsave(filename = snakemake@output[["gene_mapq_hist"]], plot = gene_mapq_hist, 
       height = 6, width = 20, device = "pdf", dpi = 600, units = "in")