In [None]:
library(tidyverse)
source("scripts/r/baypass_utils.R")
source("scripts/r/functions_objects.R")

In [None]:
load_omega_mat <- function(path){
    seed <- str_extract(path, "(?<=seed)(\\d)(?=_)")
    split <- str_extract(path, "(?<=split)(\\d+)(?=_)")
    mat <- suppressMessages(read_table(path, col_names = FALSE))
    df_out <- tibble(seed = seed, split = split, mat = list(mat))
    return(df_out)
}

mat_df <- snakemake@input[["omega_mat"]] %>% 
    purrr::map_dfr(load_omega_mat)

In [None]:
calculate_fmd_within_seeds <- function(data_df){

    fmd_df <- utils::combn(0:(nrow(data_df) - 1), 2) %>% 
        t() %>% 
        as.data.frame() %>% 
        rename("n1" = "V1", "n2" = "V2")
    seed <- data_df %>% pull(seed) %>% unique()
    
    calculate_fmd <- function(fmd_df, data_df){
        n1 <- fmd_df %>% pull(n1)
        n2 <- fmd_df %>% pull(n2)
        
        mat1 <- data_df[data_df$split == n1, ]$mat[[1]] %>% as.matrix()
        mat2 <- data_df[data_df$split == n2, ]$mat[[1]] %>% as.matrix()

        fmd <- fmd.dist(mat1, mat2)
        df_mod <- fmd_df %>% 
            mutate(fmd = fmd)
        return(df_mod)
    }
    
    fmd_df <- fmd_df %>% 
        group_split(n1, n2) %>% 
        purrr::map_dfr(., calculate_fmd, data_df = data_df) %>% 
        mutate(seed = seed)

    return(fmd_df)
}

fmd_within_seeds <- mat_df %>% 
    group_split(seed) %>% 
    purrr::map_dfr(calculate_fmd_within_seeds)

In [None]:
fmd_within_seeds %>% 
    group_by(seed) %>% 
    reframe(mean = mean(fmd),
            sd = sd(fmd),
            median = median(fmd),
            min = min(fmd),
            max = max(fmd))

In [None]:
fmd_box <- fmd_within_seeds %>% 
    ggplot(aes(x = seed, y = fmd)) +
        geom_boxplot() +
        xlab("Random seed") +
        ylab("Förstner and Moonen Distance (FMD)") +
        my_theme

ggsave(filename = snakemake@output[["fmd_box"]], plot = fmd_box, 
       height = 8, width = 8, device = "pdf", dpi = 600, units = "in")