In [None]:
library(tidyverse)

In [None]:
regions <- read_delim(snakemake@input[["regions"]], delim='\t')
head(regions)

In [None]:
arg_fst_df <- snakemake@input[["arg_fst"]] %>% 
    purrr::map_dfr(., read_delim, delim = '\t')

In [None]:
head(arg_fst_df)

In [None]:
mean_arg_fst_df <- arg_fst_df %>% 
    group_by(regionID, type) %>% 
    summarise(mean = mean(fst))  %>% 
    pivot_wider(., names_from = "type", values_from = "mean", names_prefix = "fst_") %>% 
    left_join(., regions, by = "regionID")
head(mean_arg_fst_df)

In [None]:
ggplot(mean_arg_fst_df, aes(x = fst_site, y = fst_branch)) +
    geom_point(size = 3, color = 'black', fill = 'black') +
    geom_smooth(method = 'lm', color = 'blue') +
    xlab('Site Fst') + ylab("Branch Fst") +
    theme_classic() +
    theme(axis.text = element_text(size = 15),
          axis.title = element_text(size = 18))

In [None]:
gt_fst <- read_delim(snakemake@input[["gt_fst"]], delim = "\t")
mean_arg_fst_df <- mean_arg_fst_df %>% 
    left_join(., gt_fst, by = "regionID")

In [None]:
ggplot(mean_arg_fst_df, aes(x = fst_branch, y = gt_fst_weighted)) +
    geom_point(size = 3, color = 'black', fill = 'black') +
    geom_smooth(method = 'lm', color = 'blue') +
    xlab('ARG Branch Fst') + ylab("VCFtools GT Fst") +
    theme_classic() +
    theme(axis.text = element_text(size = 15),
          axis.title = element_text(size = 18))

In [None]:
cor.test(mean_arg_fst_df$gt_fst_mean, mean_arg_fst_df$fst_branch)

In [None]:
sfs_fst <- read_delim(snakemake@input[["sfs_fst"]][1], delim = '\t', col_names = c("Chr","Pos","x","Fst")) %>% 
    dplyr::select(-x)
head(sfs_fst)

In [None]:
calculate_mean_sfs_fst <- function(path, mean_arg_fst_df){
    get_region_fst <- function(arg_region_df, sfs_fst_df){
        region <- arg_region_df[["regionID"]]
        chr <- arg_region_df[["Chr"]]
        start <- arg_region_df[["start"]]
        end <- arg_region_df[["end"]]
        sfs_fst_df_filt <- sfs_fst_df %>% 
            filter(Pos >= start & Pos <= end) %>% 
            summarise(Fst = mean(Fst)) %>% 
            mutate(regionID = region)
        return(sfs_fst_df_filt)     
    }
    
    sfs_fst_df <- read_delim(path, delim = '\t', col_names = c("Chr","Pos","x","Fst")) %>% 
        dplyr::select(-x)
    chr <- sfs_fst_df %>% pull(Chr) %>% unique()
    print(chr)
    flush.console()
    mean_sfs_fst_df <- mean_arg_fst_df %>% 
        ungroup() %>% 
        filter(Chr == chr) %>% 
        group_split(regionID) %>% 
        purrr::map(., get_region_fst, sfs_fst_df = sfs_fst_df)
    return(mean_sfs_fst_df)
}

sfs_fst_df <- snakemake@input[["sfs_fst"]] %>% 
    purrr::map_dfr(., calculate_mean_sfs_fst, mean_arg_fst_df = mean_arg_fst_df)

In [None]:
# sfs_fst_df <- sfs_fst_df %>% rename("regionID" = "region")
mean_arg_fst_df %>% 
    left_join(., sfs_fst_df, by = "regionID") %>% 
    ggplot(., aes(x = Fst, y = fst_branch)) +
        geom_point(size = 3, aes(color = direction, fill = direction)) +
        geom_smooth(method = 'lm', color = 'blue') +
        xlab('SFS Fst') + ylab("ARG Branch Fst") +
        theme_classic() +
        theme(axis.text = element_text(size = 15),
              axis.title = element_text(size = 18))

In [None]:
mean_arg_fst_df %>% ungroup() %>% filter(fst_branch == max(fst_branch))