In [None]:
library(tidyverse)
source("scripts/r/functions_objects.R")

In [None]:
asso_freq <- read_delim(snakemake@input[["freq"]], delim = "\t") %>% 
    filter(Frequency <= 0.5) %>% 
    mutate(qval = p.adjust(P, method = "fdr"),
          is_outlier = ifelse(qval <= 0.05, 1, 0)) %>% 
    rename("chrom" = "Chromosome") %>% 
    remap_chr_names()
asso_freq

In [None]:
brks <- hist(asso_freq$P, plot=F, breaks=50)$breaks
freq_pval_hist <- asso_freq %>% 
    ggplot(aes(x = P)) +
    geom_histogram(aes(y = ..density..), bins = 50, color = "black", fill = "grey", breaks = brks) +
    scale_x_continuous(breaks = seq(0, 1, 0.2)) +
    geom_hline(yintercept = 1, color = "red", linetype = "dashed") +
    ylab("Density") + xlab("P-value") +
    my_theme
freq_pval_hist

ggsave(filename = snakemake@output[["freq_pval_hist"]], plot = freq_pval_hist, device = 'pdf', 
       width = 8, height = 8, units = 'in', dpi = 600)

In [None]:
freq_pval_qq <- asso_freq %>%
    ggplot(aes(sample = LRT)) +
        stat_qq(distribution=stats::qchisq, dparams=list(df = 1)) +
        stat_qq_line(distribution=stats::qchisq, dparams=list(df = 1)) +
        ylab("LRT") + xlab(bquote(chi[1]^2)) +
        my_theme
freq_pval_qq

ggsave(filename = snakemake@output[["freq_pval_qq"]], plot = freq_pval_qq, device = 'pdf', 
       width = 8, height = 8, units = 'in', dpi = 600)

In [None]:
asso_freq %>%
    group_by(is_outlier) %>%
    summarize(n = n())

In [None]:
asso_freq %>%
    filter(is_outlier == 1) %>%
    write_delim(., snakemake@output[["freq_outliers"]], delim = "\t")

In [None]:
options(repr.plot.width = 20, repr.plot.height = 6)

# Setting up cummulative genome-wide x-axis
data_cum <- asso_freq %>% 
    group_by(chrom) %>% 
    summarise(max_pos = max(Position)) %>% 
    mutate(pos_add = lag(cumsum(max_pos), default = 0)) %>% 
    dplyr::select(chrom, pos_add)

df_mod <- asso_freq %>%
    inner_join(data_cum, by = "chrom") %>% 
    mutate(pos_cum = Position + pos_add)  %>% 
    mutate(chrom_cat = case_when(chrom == '1' ~ 'One',
                                 chrom == '2' ~ 'Two',
                                 chrom == '3' ~ 'One',
                                 chrom == '4' ~ 'Two',
                                 chrom == '5' ~ 'One',
                                 chrom == '6' ~ 'Two',
                                 chrom == '7' ~ 'One',
                                 chrom == '8' ~ 'Two',
                                 chrom == '9' ~ 'One',
                                 chrom == '10' ~ 'Two',
                                 chrom == '11' ~ 'One',
                                 chrom == '12' ~ 'Two',
                                 chrom == '13' ~ 'One',
                                 chrom == '14' ~ 'Two',
                                 chrom == '15' ~ 'One',
                                 chrom == '16' ~ 'Two'))

axis_set <- df_mod %>% 
  group_by(chrom) %>% 
  summarize(center = mean(pos_cum))

# Get outliers to plot as separate layer
outliers <- df_mod %>% filter(is_outlier == 1)
not_outlier <- df_mod %>% filter(is_outlier == 0)

In [None]:
freq_manhat <- ggplot() +
        geom_point(data = not_outlier, shape = 21, alpha = 0.4, size = 1, 
                   aes(x = pos_cum, y = -log10(qval), fill = chrom_cat, color = chrom_cat), , show.legend = FALSE) +
        geom_point(data = outliers, shape = 21, alpha = 1, size = 1, color = "red", fill = "red",
                   aes(x = pos_cum, y = -log10(qval))) +
        geom_hline(yintercept = -log10(0.05), color = "grey40", linetype = "dashed") +
        scale_x_continuous(label = axis_set$chrom, breaks = axis_set$center) +
        # scale_y_continuous(expand = c(0,0), breaks = seq(0, 4, 1)) +
        # coord_cartesian(ylim = c(0, 4)) +
        scale_fill_manual(values = c("black", "grey40")) + 
        scale_color_manual(values = c("black", "grey40")) + 
        ylab(expression(-log[10] * "(q-value)")) + xlab('Chromosomes') +
        theme_classic() +
        my_theme +
        theme(legend.position = "none")
freq_manhat

ggsave(filename = snakemake@output[["freq_manhat"]], plot = freq_manhat, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")

In [None]:
asso_score <- read_delim(snakemake@input[["score"]], delim = "\t") %>%
    filter(Frequency <= 0.5) %>%
    filter(LRT != -999) %>% 
    mutate(qval = p.adjust(P, method = "fdr"),
          is_outlier = ifelse(qval <= 0.05, 1, 0)) %>% 
    rename("chrom" = "Chromosome") %>% 
    remap_chr_names()
asso_score

In [None]:
options(repr.plot.width = 8, repr.plot.height = 8)
brks <- hist(asso_score$P, plot=F, breaks=50)$breaks
score_pval_hist <- asso_score %>% 
    ggplot(aes(x = P)) +
    geom_histogram(aes(y = ..density..), bins = 50, color = "black", fill = "grey", breaks = brks) +
    scale_x_continuous(breaks = seq(0, 1, 0.2)) +
    geom_hline(yintercept = 1, color = "red", linetype = "dashed") +
    ylab("Density") + xlab("P-value") +
    my_theme
score_pval_hist

ggsave(filename = snakemake@output[["score_pval_hist"]], plot = score_pval_hist, device = 'pdf', 
       width = 8, height = 8, units = 'in', dpi = 600)

In [None]:
score_pval_qq <- asso_score %>%
    ggplot(aes(sample = LRT)) +
        stat_qq(distribution=stats::qchisq, dparams=list(df = 1)) +
        stat_qq_line(distribution=stats::qchisq, dparams=list(df = 1)) +
        ylab("LRT") + xlab(bquote(chi[1]^2)) +
        my_theme
score_pval_qq

ggsave(filename = snakemake@output[["score_pval_qq"]], plot = score_pval_qq, device = 'pdf', 
       width = 8, height = 8, units = 'in', dpi = 600)

In [None]:
asso_score %>%
    group_by(is_outlier) %>%
    summarize(n = n())

In [None]:
asso_score %>%
    filter(is_outlier == 1) %>%
    write_delim(., snakemake@output[["score_outliers"]], delim = "\t")

In [None]:
options(repr.plot.width = 20, repr.plot.height = 6)

# Setting up cummulative genome-wide x-axis
data_cum <- asso_score %>% 
    group_by(chrom) %>% 
    summarise(max_pos = max(Position)) %>% 
    mutate(pos_add = lag(cumsum(max_pos), default = 0)) %>% 
    dplyr::select(chrom, pos_add)

df_mod <- asso_score %>%
    inner_join(data_cum, by = "chrom") %>% 
    mutate(pos_cum = Position + pos_add)  %>% 
    mutate(chrom_cat = case_when(chrom == '1' ~ 'One',
                                 chrom == '2' ~ 'Two',
                                 chrom == '3' ~ 'One',
                                 chrom == '4' ~ 'Two',
                                 chrom == '5' ~ 'One',
                                 chrom == '6' ~ 'Two',
                                 chrom == '7' ~ 'One',
                                 chrom == '8' ~ 'Two',
                                 chrom == '9' ~ 'One',
                                 chrom == '10' ~ 'Two',
                                 chrom == '11' ~ 'One',
                                 chrom == '12' ~ 'Two',
                                 chrom == '13' ~ 'One',
                                 chrom == '14' ~ 'Two',
                                 chrom == '15' ~ 'One',
                                 chrom == '16' ~ 'Two'))

axis_set <- df_mod %>% 
  group_by(chrom) %>% 
  summarize(center = mean(pos_cum))

# Get outliers to plot as separate layer
outliers <- df_mod %>% filter(is_outlier == 1)
not_outlier <- df_mod %>% filter(is_outlier == 0) %>% sample_frac(0.01)

In [None]:
score_manhat <- ggplot() +
        geom_point(data = not_outlier, shape = 21, alpha = 0.4, size = 1, 
                   aes(x = pos_cum, y = -log10(qval), fill = chrom_cat, color = chrom_cat), , show.legend = FALSE) +
        geom_point(data = outliers, shape = 21, alpha = 1, size = 1, color = "red", fill = "red",
                   aes(x = pos_cum, y = -log10(qval))) +
        geom_hline(yintercept = -log10(0.05), color = "grey40", linetype = "dashed") +
        scale_x_continuous(label = axis_set$chrom, breaks = axis_set$center) +
        # scale_y_continuous(expand = c(0,0), breaks = seq(0, 4, 1)) +
        # coord_cartesian(ylim = c(0, 4)) +
        scale_fill_manual(values = c("black", "grey40")) + 
        scale_color_manual(values = c("black", "grey40")) + 
        ylab(expression(-log[10] * "(q-value)")) + xlab('Chromosomes') +
        theme_classic() +
        my_theme +
        theme(legend.position = "none")
score_manhat

ggsave(filename = snakemake@output[["score_manhat"]], plot = score_manhat, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")