In [23]:
library(tidyverse)
library(wesanderson)

In [17]:
# Sample order from ANGSD
angsd_sample_order_path <- '../results/program_resources/angsd_allFinalSamples_order.txt'
sample_order <- suppressMessages(
        read_delim(angsd_sample_order_path, col_names = FALSE, delim = '\t')
)

In [18]:
# Load sample sheet
ss_inpath <- '../resources/sequencedPlants_phenotypesHabitat.txt'
samples <- suppressMessages(
        read_delim(ss_inpath, col_names = TRUE, delim = '\t') %>%
        filter(Sample %in% sample_order$X1)
)

In [4]:
# Load covariance matrix
covMat_path <- '../results/population_structure/pcangsd/allSamples_allChroms_4fold_maf0.05_pcangsd.cov'
covMat <- suppressMessages(
        read_delim(covMat_path, col_names = FALSE, delim = ' ')
)

In [20]:
eigenvectors <- eigen(covMat)
eigen_df <- eigenvectors$vectors %>%
        as.data.frame() %>%
        dplyr::select(V1, V2, V3, V4) %>%
        rename('PC1' = 'V1',
               'PC2' = 'V2',
               'PC3' = 'V3',
               'PC4' = 'V4') %>%
    bind_cols(., samples)

In [21]:
summary(prcomp(covMat))

In [97]:
cols <- wes_palette("FantasticFox1", n = 3, type = 'discrete')
pca_plot <- eigen_df %>%
    ggplot(., aes(x = PC1, y = PC2, color = Habitat, shape = Habitat)) +
        geom_point(size = 3) +
        scale_color_manual(values = cols) +
        theme_classic() +
        xlab('PC1 (3.4%)') + ylab('PC2 (2.2%)') +
#         scale_x_continuous(breaks = seq(-0.10, 0.10, 0.10)) +
        theme(axis.text = element_text(size = 13),
              axis.title = element_text(size = 15),
              legend.position = 'top')
pca_plot

In [89]:
outpath <- snakemake@output[[1]]
print(outpath)
ggsave(filename = outpath, plot = pca_plot, device = 'pdf',
       dpi = 600, width = 8, height = 8, units = 'in')

In [64]:
# Best K by Evanno is 6. Load logs for K = 6
load_ngsadmix_log <- function(path){
    seed <- as.numeric(str_extract(basename(path), pattern = '(?<=seed)[0-9]+(?=\\.log)'))
    
    logfile <- readLines(path)[9]  ## 9 index is for line with likelihood
    like <- as.numeric(str_extract(logfile, pattern = '(?<=like=)-[0-9]+\\.[0-9]+'))
    
    df_out <- data.frame(seed = seed, like = like)
    return(df_out)
}

In [65]:
ngsadmix_inpath <- '../results/population_structure/ngsadmix/K6/'
like_df <- list.files(ngsadmix_inpath, pattern = '*.log', full.names = TRUE) %>%
    map_dfr(., load_ngsadmix_log)

In [66]:
like_df

In [67]:
like_df %>%
    filter(like == min(like))

In [71]:
admix_results <- '../results/population_structure/ngsadmix/K6/ngsadmix_4fold_maf0.05_K6_seed6.qopt'
admix <- read_delim(admix_results, col_names = FALSE, delim = " ") %>%
    dplyr::select(-X7) %>%
    bind_cols(., samples) %>% 
    pivot_longer(X1:X6, values_to = 'Probs') %>% 
    mutate(Probs = round(Probs, 3))
head(admix)

In [94]:
cols <- wes_palette("Darjeeling1", n = 6, type = 'continuous')
admix_plot <-
  ggplot(admix, aes(factor(Sample), Probs, fill = factor(name))) +
  geom_col(color = "gray", size = 0.1) +
  facet_grid(~fct_inorder(Habitat), switch = "x", scales = "free", space = "free") +
  theme_minimal() + labs(x = "", title = "K=6", y = "Ancestry") +
  scale_y_continuous(expand = c(0, 0)) +
  scale_x_discrete(expand = expand_scale(add = 1)) +
  scale_fill_manual(values = cols) + 
  theme(
    legend.position = 'none',
    panel.spacing.x = unit(0.1, "lines"),
    axis.text.x = element_blank(),
    panel.grid = element_blank(),
    axis.text = element_text(size=12),
    axis.title = element_text(size=15),
    strip.text.x = element_text(size = 12)
  ) 
admix_plot

In [95]:
outpath <- snakemake@output[[2]]
print(outpath)
ggsave(filename = outpath, plot = admix_plot, device = 'pdf',
       dpi = 600, width = 14, height = 6, units = 'in')