In [130]:
library(tidyverse)
library(fs)

In [139]:
habitat <- suppressMessages(read_delim('../../sequencing-prep/resources/low1_sampleSheet.txt', delim = '\t')) %>% 
    dplyr::select(city, pop, site) %>% 
    mutate(pop = as.character(pop)) %>% 
    distinct()

In [277]:
compare_relatedness <- function(path){
    
    city <- str_split(path_dir(path), pattern = '/', simplify = TRUE)[1,1]
    
    relate_df <- suppressMessages(
        read_delim(sprintf(
            '../results/population_structure/ngsrelate/%s/%s_4fold_ngsrelate_maf0.05.out', city, city),
                         delim = '\t')
    ) %>% 
    dplyr::select(a, b, rab) %>% 
    mutate(city = city)
    
    bams_df <- suppressMessages(
        read_table(sprintf('../results/program_resources/bam_lists/by_city/%s/%s_bams.list', city, city),
                       col_names = 'bam')
    ) %>% 
    mutate(base = basename(bam)) %>% 
    mutate(city = city,
           pop = str_extract(base, pattern = '(?<=_)(\\d+)(?=_\\d+)')) %>% 
    dplyr::select(-bam, -base) %>% 
    mutate(order = 1:n() - 1,
           pop = as.character(pop)) %>%
    left_join(., habitat, by = c('city', 'pop'))
    
    
    df_out <- relate_df %>% 
        pivot_longer(cols = c('a', 'b'), names_to = 'sample', values_to = 'order') %>% 
        left_join(., bams_df, by = c('order', 'city')) %>% 
        dplyr::select(-order) %>% 
        group_by(sample) %>% 
        mutate(row = row_number()) %>%
        pivot_wider(names_from = c('sample'), values_from = c('pop', 'site')) %>%
        dplyr::select(-row) %>% 
        mutate(comparison = case_when((pop_a == pop_b) & (site_a == 'u') ~ 'within-pop_urban',
                                      (pop_a == pop_b) & (site_a == 'r') ~ 'within-pop_rural',
                                      (pop_a != pop_b) & (site_a == site_b) ~ 'between-pop_within-habitat',
                                      (pop_a != pop_b) & (site_a != site_b) ~ 'between-pop_between-habitat',
                                      TRUE ~ 'Error')) %>% 
        ungroup() %>% 
        group_by(comparison, city) %>% 
        summarise(n = n(),
                  mean_rab = mean(rab),
                  se = sd(rab) / sqrt(n))
    
    return(df_out)
    
}

In [278]:
inpath <- '../results/population_structure/ngsrelate/'
relate_df <- list.files(inpath, pattern = '*.out', recursive = TRUE) %>% 
    map_dfr(., compare_relatedness)

# relate_df

In [270]:
relate_df %>% 
    filter(comparison %in% c('within-pop_urban', 'within-pop_rural')) %>% 
    ggplot(., aes(x = city, y = mean_rab, fill = comparison)) +
    geom_bar(stat = 'identity', color = 'black', position = position_dodge(1)) +
    geom_errorbar(aes(ymax = mean_rab + se, ymin = mean_rab - se), width = 0.15, position = position_dodge(1)) +
    xlab('city') + ylab('Mean pairwise relatedness') +
    theme_classic() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

In [279]:
file.create(snakemake@output[[1]])