In [None]:
library(tidyverse)
source("scripts/r/functions_objects.R")

In [None]:
remap_chr_names <- function(df){
    df_out <- df %>% 
    mutate(chrom = case_when(chrom == 'Chr01_Occ' ~ 1,
           chrom == 'Chr01_Pall' ~ 2,
           chrom == 'Chr02_Occ' ~ 3,
           chrom == 'Chr02_Pall' ~ 4,
           chrom == 'Chr03_Occ' ~ 5,
           chrom == 'Chr03_Pall' ~ 6,
           chrom == 'Chr04_Occ' ~ 7,
           chrom == 'Chr04_Pall' ~ 8,
           chrom == 'Chr05_Occ' ~ 9,
           chrom == 'Chr05_Pall' ~ 10,
           chrom == 'Chr06_Occ' ~ 11,
           chrom == 'Chr06_Pall' ~ 12,
           chrom == 'Chr07_Occ' ~ 13,
           chrom == 'Chr07_Pall' ~ 14,
           chrom == 'Chr08_Occ' ~ 15,
           chrom == 'Chr08_Pall' ~ 16))
    return(df_out)
}

In [None]:
# Load files with chromsome and position order for each split
site_order <- suppressMessages(read_delim(snakemake@input[["site_order"]], 
                                          delim = "\t",
                                          col_names = c("chrom", "pos")))

# Load BayPass contrasts stat output and combine with site_order files. 
# This works because files are in the same order by default.
# Estimate q-values using FDR and outliers at qval cutoff of 0.1
load_contrast_output <- function(path){
    df <- suppressMessages(read_table(path))
    return(df)
}

qval_cut <- snakemake@params[["qval_cut"]]
cont_df <- snakemake@input[["cont_out"]] %>% 
    purrr::map_dfr(., load_contrast_output) %>% 
    bind_cols(., site_order) %>% 
    arrange(chrom, pos) %>% 
    rename("log10_1_pval" = "log10(1/pval)") %>% 
    mutate(pval = 10^(-1 * log10_1_pval),
           qval = p.adjust(pval, method = "fdr"),
           is_outlier = ifelse(-log10(qval) >= -log10(qval_cut), 1, 0)) %>% 
    dplyr::select(chrom, pos, M_C2, SD_C2, C2_std, log10_1_pval, pval, qval, is_outlier) %>% 
    remap_chr_names()

In [None]:
range(-log10(cont_df$qval))

In [None]:
cont_df %>% 
    filter(is_outlier == 1) %>% 
    write_delim(snakemake@output[["c2_outliers"]], delim = "\t")

In [None]:
brks <- hist(cont_df$pval, plot=F, breaks=50)$breaks
c2_pval_hist <- cont_df %>% 
    ggplot(aes(x = pval)) +
    geom_histogram(aes(y = ..density..), bins = 50, color = "black", fill = "grey", breaks = brks) +
    scale_x_continuous(breaks = seq(0, 1, 0.2)) +
    geom_hline(yintercept = 1, color = "red", linetype = "dashed") +
    ylab("Density") + xlab("P-value") +
    my_theme
c2_pval_hist

ggsave(filename = snakemake@output[["c2_pval_hist"]], plot = c2_pval_hist, device = 'pdf', 
       width = 8, height = 8, units = 'in', dpi = 600)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 6)

# Setting up cummulative genome-wide x-axis
data_cum <- cont_df %>% 
    group_by(chrom) %>% 
    summarise(max_pos = max(pos)) %>% 
    mutate(pos_add = lag(cumsum(max_pos), default = 0)) %>% 
    dplyr::select(chrom, pos_add)

df_mod <- cont_df %>%
    inner_join(data_cum, by = "chrom") %>% 
    mutate(pos_cum = pos + pos_add)  %>% 
    mutate(chrom_cat = case_when(chrom == '1' ~ 'One',
                                 chrom == '2' ~ 'Two',
                                 chrom == '3' ~ 'One',
                                 chrom == '4' ~ 'Two',
                                 chrom == '5' ~ 'One',
                                 chrom == '6' ~ 'Two',
                                 chrom == '7' ~ 'One',
                                 chrom == '8' ~ 'Two',
                                 chrom == '9' ~ 'One',
                                 chrom == '10' ~ 'Two',
                                 chrom == '11' ~ 'One',
                                 chrom == '12' ~ 'Two',
                                 chrom == '13' ~ 'One',
                                 chrom == '14' ~ 'Two',
                                 chrom == '15' ~ 'One',
                                 chrom == '16' ~ 'Two'))

axis_set <- df_mod %>% 
  group_by(chrom) %>% 
  summarize(center = mean(pos_cum))

# Get outliers to plot as separate layer
c2_outliers <- df_mod %>% filter(is_outlier == 1)
not_outlier <- df_mod %>% filter(is_outlier == 0)

In [None]:
c2_manhat <- ggplot() +
        geom_point(data = not_outlier, shape = 21, alpha = 0.4, size = 1, 
                   aes(x = pos_cum, y = -log10(qval), fill = chrom_cat, color = chrom_cat), , show.legend = FALSE) +
        geom_point(data = c2_outliers, shape = 21, alpha = 1, size = 3, color = "red", fill = "red",
                   aes(x = pos_cum, y = -log10(qval))) +
        geom_hline(yintercept = -log10(qval_cut), color = "grey40", linetype = "dashed") +
        scale_x_continuous(label = axis_set$chrom, breaks = axis_set$center) +
        scale_y_continuous(expand = c(0,0), breaks = seq(0, 4, 1)) +
        coord_cartesian(ylim = c(0, 4)) +
        scale_fill_manual(values = c("black", "grey40")) + 
        scale_color_manual(values = c("black", "grey40")) + 
        ylab(expression(-log[10] * "(q-value)")) + xlab('Chromosomes') +
        theme_classic() +
        my_theme +
        theme(legend.position = "none") +
c2_manhat

ggsave(filename = snakemake@output[["c2_manhat_pdf"]], plot = c2_manhat, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")
ggsave(filename = snakemake@output[["c2_manhat_png"]], plot = c2_manhat, 
       height = 8, width = 20, device = "png", dpi = 600, units = "in")