In [None]:
library(tidyverse)
source("scripts/r/functions_objects.R")

In [None]:
# Load in ngsParalog files
load_paralog_file <- function(path){
    cols <- c("chrom", "pos", "neg_null_log_lik", "neg_alt_log_lik", "LRT")
    df_out <- suppressMessages(read_table(path, col_names = cols)) %>%
        mutate(pval = 0.5*pchisq(LRT, df=1, lower.tail=FALSE))
    return(df_out)
}

para_df <- snakemake@input[["para"]] %>% purrr::map_dfr(., load_paralog_file) %>%
    remap_chr_names()

In [None]:
# Identify top 1% of LR scores and consider these outliers
quant <- quantile(para_df$LRT, probs = 0.99)
para_df <- para_df %>%
    mutate(is_outlier = ifelse(LRT >= quant, 1, 0))

para_df %>%
    group_by(is_outlier) %>%
    summarise(n = n())

In [None]:
options(repr.plot.width = 20, repr.plot.height = 6)

# Setting up cummulative genome-wide x-axis
data_cum <- para_df %>% 
    group_by(chrom) %>% 
    summarise(max_pos = max(pos)) %>% 
    mutate(pos_add = lag(cumsum(max_pos), default = 0)) %>% 
    dplyr::select(chrom, pos_add)


df_mod <- para_df %>%
    inner_join(data_cum, by = "chrom") %>% 
    mutate(pos_cum = pos + pos_add)  %>% 
    mutate(chrom_cat = case_when(chrom == '1' ~ 'One',
                                 chrom == '2' ~ 'Two',
                                 chrom == '3' ~ 'One',
                                 chrom == '4' ~ 'Two',
                                 chrom == '5' ~ 'One',
                                 chrom == '6' ~ 'Two',
                                 chrom == '7' ~ 'One',
                                 chrom == '8' ~ 'Two',
                                 chrom == '9' ~ 'One',
                                 chrom == '10' ~ 'Two',
                                 chrom == '11' ~ 'One',
                                 chrom == '12' ~ 'Two',
                                 chrom == '13' ~ 'One',
                                 chrom == '14' ~ 'Two',
                                 chrom == '15' ~ 'One',
                                 chrom == '16' ~ 'Two'))

axis_set <- df_mod %>% 
  group_by(chrom) %>% 
  summarize(center = mean(pos_cum))

# Get outliers to plot as separate layer
outliers <- df_mod %>% filter(is_outlier == 1)
not_outlier <- df_mod %>% filter(is_outlier == 0)

In [None]:
manhat <- ggplot() +
    geom_point(data = outliers, shape = 21, alpha = 0.2, size = 1, color = "red", fill = "red",
               aes(x = pos_cum, y = LRT)) +
    geom_point(data = not_outlier, shape = 21, alpha = 0.2, size = 1, 
               aes(x = pos_cum, y = LRT, fill = chrom_cat, color = chrom_cat), , show.legend = FALSE) +
    geom_hline(yintercept = quant, color = "grey40", linetype = "dashed") +
    scale_x_continuous(label = axis_set$chrom, breaks=axis_set$center) +
    scale_y_continuous(breaks = seq(0, 250000, 50000)) +
    coord_cartesian(ylim = c(0, 262000)) +
    scale_fill_manual(values = c("black", "grey40")) + 
    scale_color_manual(values = c("black", "grey40")) + 
    ylab("Likelihood ratio") + xlab('Chromosome') +
    theme_classic() +
    my_theme +
    theme(legend.position = "none")
manhat

ggsave(snakemake@output[["manhat"]], plot = manhat, device = "pdf", 
       width = 20, height = 8, dpi = 600, units = "in")

In [None]:
write_sites <- function(df){
    chrom <- df %>% pull(chrom) %>% unique()
    chrom <- case_when(chrom == '1' ~ 'Chr01_Occ',
                       chrom == '2' ~ 'Chr01_Pall',
                       chrom == '3' ~ 'Chr02_Occ',
                       chrom == '4' ~ 'Chr02_Pall',
                       chrom == '5' ~ 'Chr03_Occ',
                       chrom == '6' ~ 'Chr03_Pall',
                       chrom == '7' ~ 'Chr04_Occ',
                       chrom == '8' ~ 'Chr04_Pall',
                       chrom == '9' ~ 'Chr05_Occ',
                       chrom == '10' ~ 'Chr05_Pall',
                       chrom == '11' ~ 'Chr06_Occ',
                       chrom == '12' ~ 'Chr06_Pall',
                       chrom == '13' ~ 'Chr07_Occ',
                       chrom == '14' ~ 'Chr07_Pall',
                       chrom == '15' ~ 'Chr08_Occ',
                       chrom == '16' ~ 'Chr08_Pall')
    outpath <- sprintf("%s/%s_filtered.sites", snakemake@params[["out"]], chrom)
    write_delim(df, outpath, delim="\t")
}

para_df %>%
    filter(is_outlier == 0) %>%
    group_split(chrom) %>%
    purrr::walk(., write_sites)