In [None]:
library(tidyverse)
source("scripts/r/functions_objects.R")

In [None]:
# Load files with chromsome and position order for each split
site_order <- suppressMessages(read_delim(snakemake@input[["site_order"]], 
                                          delim = "\t",
                                          col_names = c("chrom", "pos")))

In [None]:
load_xtx_output <- function(path){
    base <- basename(path)
    city <- str_extract(base, pattern = ".*(?=_split)")
    split <- str_extract(base, pattern = "(?<=split).*(?=_summary)")
    
    df <- suppressMessages(read.table(path, header = TRUE)) %>% 
        rename("XtX_log10_pval" = "log10.1.pval.") %>% 
        mutate(XtX_pval = 10^(-1 * XtX_log10_pval)) %>% 
        dplyr::select(XtXst, XtX_log10_pval, XtX_pval) %>% 
        mutate(city = city, split = split)
    return(df)
}

xtx_df <- snakemake@input[["pi_xtx"]] %>% 
    purrr::map_dfr(., load_xtx_output)

In [None]:
load_cont_output <- function(path){
    base <- basename(path)
    city <- str_extract(base, pattern = ".*(?=_split)")
    split <- str_extract(base, pattern = "(?<=split).*(?=_summary)")
    
    df <- suppressMessages(read.table(path, header = TRUE)) %>% 
        rename("C2_log10_pval" = "log10.1.pval.") %>% 
        mutate(C2_pval = 10^(-1 * C2_log10_pval)) %>% 
        dplyr::select(C2_std, C2_log10_pval, C2_pval) %>% 
        mutate(city = city, split = split)
    return(df)
}

c2_df <- snakemake@input[["cont_out"]] %>% 
    purrr::map_dfr(., load_cont_output)

In [None]:
xtx_df_withPos <- xtx_df %>% 
    group_split(city) %>% 
    purrr::map_dfr(., bind_cols, site_order) %>% 
    arrange(city, chrom, pos)

In [None]:
c2_df_withPos <- c2_df %>% 
    group_split(city) %>% 
    purrr::map_dfr(., bind_cols, site_order) %>% 
    arrange(city, chrom, pos)

In [None]:
head(c2_df_withPos)

In [None]:
test <- c2_df_withPos %>% filter(city == "Toronto")
brks <- hist(test$C2_pval, plot=F, breaks=50)$breaks
test %>% 
    ggplot(aes(x = C2_pval)) +
    geom_histogram(aes(y = ..density..), bins = 50, color = "black", fill = "grey", breaks = brks) +
    scale_x_continuous(breaks = seq(0, 1, 0.2)) +
    geom_hline(yintercept = 1, color = "red", linetype = "dashed") +
    ylab("Density") + xlab("P-value") +
    my_theme

In [None]:
qval_cut <- snakemake@params[["qval_cut"]]
test <- test %>% 
    mutate(C2_qval = p.adjust(C2_pval, method = "bonferroni"),
           is_outlier = ifelse(C2_qval <= qval_cut, 1, 0)) %>% 
    dplyr::select(chrom, pos, C2_std, C2_pval, C2_qval, is_outlier) %>% 
    remap_chr_names()

In [None]:
test %>% 
    group_by(is_outlier) %>% 
    summarise(n = n())

In [None]:
head(test)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 6)

# Setting up cummulative genome-wide x-axis
data_cum <- test %>% 
    group_by(chrom) %>% 
    summarise(max_pos = max(pos)) %>% 
    mutate(pos_add = lag(cumsum(max_pos), default = 0)) %>% 
    dplyr::select(chrom, pos_add)

df_mod <- test %>%
    inner_join(data_cum, by = "chrom") %>% 
    mutate(pos_cum = pos + pos_add)  %>% 
    mutate(chrom_cat = case_when(chrom == '1' ~ 'One',
                                 chrom == '2' ~ 'Two',
                                 chrom == '3' ~ 'One',
                                 chrom == '4' ~ 'Two',
                                 chrom == '5' ~ 'One',
                                 chrom == '6' ~ 'Two',
                                 chrom == '7' ~ 'One',
                                 chrom == '8' ~ 'Two',
                                 chrom == '9' ~ 'One',
                                 chrom == '10' ~ 'Two',
                                 chrom == '11' ~ 'One',
                                 chrom == '12' ~ 'Two',
                                 chrom == '13' ~ 'One',
                                 chrom == '14' ~ 'Two',
                                 chrom == '15' ~ 'One',
                                 chrom == '16' ~ 'Two'))

axis_set <- df_mod %>% 
  group_by(chrom) %>% 
  summarize(center = mean(pos_cum))

# Get outliers to plot as separate layer
outliers <- df_mod %>% filter(is_outlier == 1)
not_outlier <- df_mod %>% filter(is_outlier == 0) %>% sample_frac(0.01)

In [None]:
manhat <- ggplot() +
        geom_point(data = not_outlier, shape = 21, alpha = 0.4, size = 1, 
                   aes(x = pos_cum, y = -log10(C2_qval), fill = chrom_cat, color = chrom_cat), , show.legend = FALSE) +
        geom_point(data = outliers, shape = 21, alpha = 1, size = 1, color = "red", fill = "red",
                   aes(x = pos_cum, y = -log10(C2_qval))) +
        geom_hline(yintercept = -log10(qval_cut), color = "grey40", linetype = "dashed") +
        scale_x_continuous(label = axis_set$chrom, breaks = axis_set$center) +
        # scale_y_continuous(expand = c(0,0), breaks = seq(0, 4, 1)) +
        # coord_cartesian(ylim = c(0, 4)) +
        scale_fill_manual(values = c("black", "grey40")) + 
        scale_color_manual(values = c("black", "grey40")) + 
        ylab(expression(-log[10] * "(q-value)")) + xlab('Chromosomes') +
        theme_classic() +
        my_theme +
        theme(legend.position = "none")
manhat

# ggsave(filename = snakemake@output[["c2_manhat_pdf"]], plot = c2_manhat, 
#        height = 8, width = 20, device = "pdf", dpi = 600, units = "in")
# ggsave(filename = snakemake@output[["c2_manhat_png"]], plot = c2_manhat, 
#        height = 8, width = 20, device = "png", dpi = 600, units = "in")