In [None]:
library(tidyverse)
source("scripts/r/functions_objects.R")

In [None]:
asso_freq <- read_delim(snakemake@input[[1]], delim = "\t")
asso_freq

In [None]:
asso_freq %>%
    ggplot(aes(x = P)) +
        geom_histogram(bins = 100, color = "black", fill = "grey") +
        xlab("P-value") +
        my_theme

In [None]:
asso_freq %>%
    filter(Frequency > 0.51)

In [None]:
asso_freq %>%
    ggplot(aes(sample = LRT)) +
        stat_qq(distribution=stats::qchisq, dparams=list(df = 1)) +
        stat_qq_line(distribution=stats::qchisq, dparams=list(df = 1)) +
        ylab("LRT") + xlab(bquote(chi[1]^2)) +
        my_theme

In [None]:
asso_freq %>%
    mutate(qval = p.adjust(P, method = "fdr")) %>%
    mutate(is_outlier = ifelse(qval <= 0.05, 1, 0)) %>%
    group_by(is_outlier) %>%
    summarize(n = n())

In [None]:
asso_lg <- read_delim(snakemake@input[[2]], delim = "\t") %>%
    mutate(passes_filter = ifelse(is.nan(beta), 0, 1))
asso_lg

In [None]:
asso_lg %>%
    group_by(passes_filter) %>%
    summarize(n = n())

In [None]:
asso_lg %>%
    filter(passes_filter == 1) %>%
    ggplot(aes(x = P)) +
        geom_histogram(aes(y=..density..), bins = 100, color = "black", fill = "grey") +
        xlab("P-value") +
        my_theme

In [None]:
asso_lg %>%
    filter(passes_filter == 1) %>%
    ggplot(aes(sample = LRT)) +
        stat_qq(distribution=stats::qchisq, dparams=list(df = 1)) +
        stat_qq_line(distribution=stats::qchisq, dparams=list(df = 1)) +
        ylab("LRT") + xlab(bquote(chi[1]^2)) +
        my_theme

In [None]:
asso_lg %>%
    filter(passes_filter == 1) %>%
    mutate(qval = p.adjust(P, method = "fdr")) %>%
    mutate(is_outlier = ifelse(qval <= 0.05, 1, 0)) %>%
    group_by(is_outlier) %>%
    summarize(n = n())

In [None]:
asso_lg %>%
    filter(passes_filter == 1) %>%
    mutate(qval = p.adjust(P, method = "fdr")) %>%
    mutate(is_outlier = ifelse(qval <= 0.05, 1, 0)) %>%
    filter(is_outlier == 1)

In [None]:
asso_lg %>%
    filter(passes_filter == 1) %>%
    ggplot(aes(x = beta)) +
        geom_histogram(bins = 50, color = "black", fill = "white") +
        my_theme

In [None]:
asso_lg_with_gts <- asso_lg %>%
    filter(passes_filter == 1) %>%
    rename("GTs" = "high_WT/HE/HO") %>%
    separate(GTs, sep = "/", into = c("WT", "HE", "HO")) %>%
    mutate(WT = as.numeric(WT), HE = as.numeric(HE), HO = as.numeric(HO))

In [None]:
asso_lg_with_gts %>%
    filter(Frequency > 0.5)

In [None]:
asso_lg_with_gts %>%
    ggplot(aes(x = Frequency)) +
        geom_histogram(bins = 50, color = "black", fill = "grey") +
        xlab("Allele frequency") + ylab("Number of sites") +
        my_theme

In [None]:
asso_lg_with_gts %>%
    mutate(num_gts = WT + HE + HO) %>%
    ggplot(aes(x = num_gts)) +
        geom_histogram(bins = 50, color = "black", fill = "grey") +
        xlab("Number of genotypes") + ylab("Number of sites") +
        my_theme

In [None]:
asso_lg_with_gts %>%
    ggplot(aes(x = WT)) +
        geom_histogram(bins = 50, color = "black", fill = "grey") +
        xlab("# HOM REFs") + ylab("Number of sites") +
        my_theme

In [None]:
asso_lg_with_gts %>%
    ggplot(aes(x = HE)) +
        geom_histogram(bins = 50, color = "black", fill = "grey") +
        xlab("# HETs") + ylab("Number of sites") +
        my_theme

In [None]:
asso_lg_with_gts %>%
    ggplot(aes(x = HO)) +
        geom_histogram(bins = 50, color = "black", fill = "grey") +
        xlab("# HOM ALTs") + ylab("Number of sites") +
        my_theme

In [None]:
num_passing_filter <- function(thresh, df){
    num_sites_df <- df %>%
        mutate(passes_filter = case_when(WT >= thresh & HE >= thresh ~ 1,
                                         WT >= thresh & HO >= thresh ~ 1,
                                         HE >= thresh & HO >= thresh ~ 1,
                                         TRUE ~ 0)) %>%
        filter(passes_filter == 1) %>%
        summarise(num_sites = n()) %>%
        mutate(thresh = thresh)
    return(num_sites_df)
}


my_seq <- seq(from = 10, to = 50)
purrr::map_dfr(my_seq, num_passing_filter, asso_lg_with_gts) %>%
    ggplot(aes(x = thresh, y = num_sites)) +
        geom_point(size = 2.5) +
        geom_line(linewidth = 1) +
        xlab("minHigh threshold") + ylab("Number of sites") +
        my_theme

In [None]:
thresh <- 20
asso_lg_with_gts %>%
    mutate(passes_filter = case_when(WT >= thresh & HE >= thresh ~ 1,
                                     WT >= thresh & HO >= thresh ~ 1,
                                     HE >= thresh & HO >= thresh ~ 1,
                                     TRUE ~ 0))