## Setup

In [None]:
# Load required packages
library(tidyverse)

In [None]:
# Load all data
all_fst_df <- read_delim(snakemake@input[["all_fsts"]], delim="\t")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)
arg_site_gt_cor_plot <- all_fst_df %>% 
    filter(no_snps >= 20) %>% 
    ggplot(aes(x = arg_site_fst, y = avg_hudson_fst)) +
        geom_point(size = 2) +
        geom_smooth(method = "lm", linewidth = 1, color = 'blue') +
        ylab("Hudson's Fst from Pixy (i.e. VCFs)") + xlab("Site-based Fst from tskit (i.e. ARGs)") +
        theme_classic() +
        theme(axis.text = element_text(size = 13),
              axis.title = element_text(size = 15))

ggsave(filename = snakemake@output[["arg_site_gt_cor"]], plot = arg_site_gt_cor_plot,
       device = "pdf", height = 6, width = 6, units = "in", dpi = 300)

In [None]:
snakemake@output[["arg_site_gt_cor"]]

In [None]:
arg_branch_gt_cor_plot <- all_fst_df %>% 
    filter(no_snps >= 20) %>% 
    ggplot(aes(x = arg_branch_fst, y = avg_hudson_fst)) +
        geom_point(size = 2) +
        geom_smooth(method = "lm", linewidth = 1, color = 'blue') +
        ylab("Hudson's Fst from Pixy (i.e. VCFs)") + xlab("Branch-based Fst from tskit (i.e. ARGs)") +
        theme_classic() +
        theme(axis.text = element_text(size = 13),
              axis.title = element_text(size = 15))

ggsave(filename = snakemake@output[["arg_branch_gt_cor"]], plot = arg_branch_gt_cor_plot,
       device = "pdf", height = 6, width = 6, units = "in", dpi = 300)

In [None]:
arg_site_sfs_cor_plot <- all_fst_df %>% 
    filter(no_snps >= 20) %>% 
    ggplot(aes(x = arg_site_fst, y = sfs_hudson_fst)) +
        geom_point(size = 2) +
        geom_smooth(method = "lm", linewidth = 1, color = 'blue') +
        ylab("Hudson's Fst from ANGSD (i.e. VCFs)") + xlab("Site-based Fst from tskit (i.e. ARGs)") +
        theme_classic() +
        theme(axis.text = element_text(size = 13),
              axis.title = element_text(size = 15))

ggsave(filename = snakemake@output[["arg_site_sfs_cor"]], plot = arg_site_sfs_cor_plot,
       device = "pdf", height = 6, width = 6, units = "in", dpi = 300)

In [None]:
arg_branch_fst_cor_plot <- all_fst_df %>% 
    filter(no_snps >= 20) %>% 
    ggplot(aes(x = arg_branch_fst, y = sfs_hudson_fst)) +
        geom_point(size = 2) +
        geom_smooth(method = "lm", linewidth = 1, color = 'blue') +
        ylab("Hudson's Fst from ANGSD (i.e. VCFs)") + xlab("Branch-based Fst from tskit (i.e. ARGs)") +
        theme_classic() +
        theme(axis.text = element_text(size = 13),
              axis.title = element_text(size = 15))

ggsave(filename = snakemake@output[["arg_branch_sfs_cor"]], plot = arg_branch_fst_cor_plot,
       device = "pdf", height = 6, width = 6, units = "in", dpi = 300)

In [None]:
# Create dataframe with Fst correlation coefficients
all_fst_cors <- all_fst_df %>% 
    filter(no_snps >= 20) %>% 
    group_by(regionID) %>% 
    summarize(branch_gt_cor = cor(arg_branch_fst, avg_hudson_fst, use = "complete.obs"),
              site_gt_cor = cor(arg_site_fst, avg_hudson_fst, use = "complete.obs"),
              branch_sfs_cor = cor(arg_branch_fst, sfs_hudson_fst, use = "complete.obs"),
              site_sfs_cor = cor(arg_site_fst, sfs_hudson_fst, use = "complete.obs")) 

In [None]:
arg_branch_gt_cor_hist <- all_fst_cors %>% 
    ggplot(aes(x = branch_gt_cor)) +
        geom_histogram(bins = 30, color = "black", fill = "red") +
        ylab("Number of 1Mb regions") + xlab("ARG branch Fst vs. Pixy GT Fst correlation") +
        theme_classic() +
        theme(axis.text = element_text(size = 13),
              axis.title = element_text(size = 15))

ggsave(filename = snakemake@output[["arg_branch_gt_hist"]], plot = arg_branch_gt_cor_hist,
       device = "pdf", height = 6, width = 6, units = "in", dpi = 300)

In [None]:
arg_site_gt_cor_hist <- all_fst_cors %>% 
    ggplot(aes(x = site_gt_cor)) +
        geom_histogram(bins = 30, color = "black", fill = "red") +
        ylab("Number of 1Mb regions") + xlab("ARG site Fst vs. Pixy GT Fst correlation") +
        theme_classic() +
        theme(axis.text = element_text(size = 13),
              axis.title = element_text(size = 15))

ggsave(filename = snakemake@output[["arg_site_gt_hist"]], plot = arg_site_gt_cor_hist,
       device = "pdf", height = 6, width = 6, units = "in", dpi = 300)

In [None]:
arg_branch_sfs_cor_hist <- all_fst_cors %>% 
    ggplot(aes(x = branch_sfs_cor)) +
        geom_histogram(bins = 30, color = "black", fill = "red") +
        ylab("Number of 1Mb regions") + xlab("ARG branch Fst vs. ANGSD Fst correlation") +
        theme_classic() +
        theme(axis.text = element_text(size = 13),
              axis.title = element_text(size = 15))

ggsave(filename = snakemake@output[["arg_branch_sfs_hist"]], plot = arg_branch_sfs_cor_hist,
       device = "pdf", height = 6, width = 6, units = "in", dpi = 300)

In [None]:
arg_site_sfs_cor_hist <- all_fst_cors %>% 
    ggplot(aes(x = site_sfs_cor)) +
        geom_histogram(bins = 30, color = "black", fill = "red") +
        ylab("Number of 1Mb regions") + xlab("ARG site Fst vs. ANGSD GT Fst correlation") +
        theme_classic() +
        theme(axis.text = element_text(size = 13),
              axis.title = element_text(size = 15))

ggsave(filename = snakemake@output[["arg_site_sfs_hist"]], plot = arg_site_sfs_cor_hist,
       device = "pdf", height = 6, width = 6, units = "in", dpi = 300)