# Setup

In [None]:
# Load packages
library(tidyverse)
library(bedtoolsr)

In [None]:
# Load windowed Fst and XP-nSL
fst_win_df <- read_delim(snakemake@input[['fst_win']], delim = '\t') %>% 
    dplyr::select(Chr, start, end, everything())
xpnsl_win_df <- read_delim(snakemake@input[['xpnsl_win']], delim = '\t')

# Load raw XP-nSL scores
load_xpnsl_norm <- function(path, type){
    
    chrom_name <- str_extract(basename(path), pattern = '.+(?=_Urban)')
    df <- suppressMessages(read_delim(path, delim = '\t', col_names = TRUE)) %>% 
        mutate(Chr = chrom_name) %>% 
        dplyr::select(Chr, pos, normxpehh) %>% 
        rename('norm_xpnsl' = 'normxpehh')
    return(df)
    
}

xpnsl_raw <- snakemake@input[['xpnsl_raw']] %>% 
    purrr::map_dfr(load_xpnsl_norm)

# Load Top10 selected regions
top10 <- read_delim(snakemake@input[['top_ten']], delim = '\t')

In [None]:
head(fst_win_df)

In [None]:
head(xpnsl_win_df)

In [None]:
head(xpnsl_raw)

In [None]:
head(top10)

# Manhattan plots

## Genome-wide

### Fst

In [None]:
# Setting up cummulative genome-wide x-axis
data_cum <- fst_win_df %>% 
    group_by(Chr) %>% 
    summarise(max_WinCenter = max(WinCenter)) %>% 
    mutate(WinCenter_add = lag(cumsum(max_WinCenter), default = 0)) %>% 
    dplyr::select(Chr, WinCenter_add)
fst_win_df_mod <- fst_win_df %>%
    inner_join(data_cum, by = "Chr") %>% 
    mutate(WinCenter_cum = WinCenter + WinCenter_add) 
axis_set <- fst_win_df_mod %>% 
  group_by(Chr) %>% 
  summarize(center = mean(WinCenter_cum))

# Get Fst outliers and genome-wide critical value
fst_outliers <- fst_win_df_mod %>% filter(fst_outlier == 1) 
fst_quant_filt <- quantile(fst_win_df %>% pull(fst), probs = c(0.99))

# Generate Manhattan plot
manhat_plot_fst <- fst_win_df_mod %>%
    filter(fst_outlier != 1) %>% 
    mutate(chrom_cat = case_when(Chr == 'Chr01_Occ' ~ 'One',
                                 Chr == 'Chr01_Pall' ~ 'Two',
                                 Chr == 'Chr02_Occ' ~ 'One',
                                 Chr == 'Chr02_Pall' ~ 'Two',
                                 Chr == 'Chr03_Occ' ~ 'One',
                                 Chr == 'Chr03_Pall' ~ 'Two',
                                 Chr == 'Chr04_Occ' ~ 'One',
                                 Chr == 'Chr04_Pall' ~ 'Two',
                                 Chr == 'Chr05_Occ' ~ 'One',
                                 Chr == 'Chr05_Pall' ~ 'Two',
                                 Chr == 'Chr06_Occ' ~ 'One',
                                 Chr == 'Chr06_Pall' ~ 'Two',
                                 Chr == 'Chr07_Occ' ~ 'One',
                                 Chr == 'Chr07_Pall' ~ 'Two',
                                 Chr == 'Chr08_Occ' ~ 'One',
                                 Chr == 'Chr08_Pall' ~ 'Two')) %>%
    ggplot(aes(x = WinCenter_cum, y = fst)) +
        geom_point(shape = 21, alpha = 0.5, size = 3, aes(fill = chrom_cat, color = chrom_cat)) +
        geom_point(data = fst_outliers, shape = 21, alpha = 1, size = 3, color = '#F21A00', fill = '#F21A00') +
        geom_hline(yintercept = fst_quant_filt, color = "grey40", linetype = "dashed") +
        scale_x_continuous(label = axis_set$Chr, breaks = axis_set$center) +
        scale_y_continuous(expand = c(0,0)) +
        coord_cartesian(ylim = c(0, 0.05)) +
        scale_fill_manual(values = c("black", "grey40")) + 
        scale_color_manual(values = c("black", "grey40")) + 
        ylab('Fst') + xlab('') +
        theme_classic() +
        theme(
            legend.position = "none",
            panel.border = element_blank(),
            panel.grid.major.x = element_blank(),
            panel.grid.minor.x = element_blank(),
            axis.text = element_text(size=16),
            axis.title = element_text(size=20),
            axis.text.x = element_text(angle = 45, hjust = 1),
          )
options(repr.plot.width = 20, repr.plot.height = 6)
manhat_plot_fst

ggsave(filename = snakemake@output[["fst_manhat"]], plot = manhat_plot_fst, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")

### XP-nSL

In [None]:
# Setting up cummulative genome-wide x-axis
data_cum <- xpnsl_win_df %>% 
    group_by(Chr) %>% 
    summarise(max_winCenter = max(winCenter)) %>% 
    mutate(winCenter_add = lag(cumsum(max_winCenter), default = 0)) %>% 
    dplyr::select(Chr, winCenter_add)
xpnsl_win_df_mod <- xpnsl_win_df %>%
    inner_join(data_cum, by = "Chr") %>% 
    mutate(winCenter_cum = winCenter + winCenter_add) 
axis_set <- xpnsl_win_df_mod %>% 
  group_by(Chr) %>% 
  summarize(center = mean(winCenter_cum))

# Get XP-nSL outliers and genome-wide critical value
all_xpnsl_outliers <- xpnsl_win_df_mod %>% filter(direction != 'Not outlier')
xpnsl_score_quant_filt <- quantile(xpnsl_win_df_mod %>% pull(mean), probs = c(0.01, 0.99))
urban_xpnsl_outliers <- xpnsl_win_df_mod %>% filter(direction == 'Urban sel')
rural_xpnsl_outliers <- xpnsl_win_df_mod %>% filter(direction == 'Rural sel')

xpnsl_win_df_mod <- xpnsl_win_df_mod %>%
    filter(direction == 'Not outlier') %>% 
    mutate(chrom_cat = case_when(Chr == 'Chr01_Occ' ~ 'One',
                                 Chr == 'Chr01_Pall' ~ 'Two',
                                 Chr == 'Chr02_Occ' ~ 'One',
                                 Chr == 'Chr02_Pall' ~ 'Two',
                                 Chr == 'Chr03_Occ' ~ 'One',
                                 Chr == 'Chr03_Pall' ~ 'Two',
                                 Chr == 'Chr04_Occ' ~ 'One',
                                 Chr == 'Chr04_Pall' ~ 'Two',
                                 Chr == 'Chr05_Occ' ~ 'One',
                                 Chr == 'Chr05_Pall' ~ 'Two',
                                 Chr == 'Chr06_Occ' ~ 'One',
                                 Chr == 'Chr06_Pall' ~ 'Two',
                                 Chr == 'Chr07_Occ' ~ 'One',
                                 Chr == 'Chr07_Pall' ~ 'Two',
                                 Chr == 'Chr08_Occ' ~ 'One',
                                 Chr == 'Chr08_Pall' ~ 'Two'))

xpnsl_manhat <- ggplot() +
        geom_point(data = xpnsl_win_df_mod, shape = 21, alpha = 0.5, size = 3, 
                   aes(x = winCenter_cum, y = mean, fill = chrom_cat, color = chrom_cat)) +
        geom_point(data = all_xpnsl_outliers, shape = 21, alpha = 1, size = 3, color = '#F21A00', fill = '#F21A00',
                   aes(x = winCenter_cum, y = mean)) +
        geom_hline(yintercept = xpnsl_score_quant_filt, color = "grey40", linetype = "dashed") +
        scale_x_continuous(label = axis_set$Chr, breaks = axis_set$center) +
        scale_y_continuous(expand = c(0,0)) +
        coord_cartesian(ylim = c(-6, 5)) +
        scale_fill_manual(values = c("black", "grey40")) + 
        scale_color_manual(values = c("black", "grey40")) + 
        ylab('Normalized XP-nSL') + xlab('Chromosomes') +
        theme_classic() +
        theme(
            legend.position = "none",
            panel.border = element_blank(),
            panel.grid.major.x = element_blank(),
            panel.grid.minor.x = element_blank(),
            axis.text = element_text(size=16),
            axis.title = element_text(size=20),
            axis.text.x = element_text(angle = 45, hjust = 1)
          )
xpnsl_manhat

ggsave(filename = snakemake@output[["xpnsl_manhat"]], plot = xpnsl_manhat, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")

## Zoomed in on top hits

### Positive selection in urban populations

In [None]:
top10 %>% filter(direction == 'Urban sel') %>% dplyr::select(Chr, start, end, win_size)

In [None]:
max_xpnsl <- xpnsl_raw %>% filter(norm_xpnsl == max(norm_xpnsl))
max_xpnsl

#### Chr04_Occ

In [None]:
region_df <- top10 %>% filter(direction == 'Urban sel') %>% filter(Chr == 'Chr04_Occ') %>% dplyr::select(Chr, start, end)

chrom <- region_df %>% pull(Chr) %>% unique()
min_pos <- region_df %>% pull(start) %>% min()
max_pos <- region_df %>% pull(end) %>% max()
buffer <- 500000

region_raw_df <- xpnsl_raw %>%
    filter(Chr == chrom & pos >= min_pos - buffer & pos <= max_pos + buffer)

formatter1e6 <- function(x){ 
    x <- x / 1e6
    return(sprintf(x, fmt = '%#.2f'))
}

fst_outliers <- fst_win_df %>% filter(fst_outlier == 1) %>% dplyr::select(Chr, start, end)
fst_wins <- bt.intersect(fst_outliers, region_df, wa = T) %>% 
    rename('Chr' = 'V1', 'start' = 'V2', 'end' = 'V3')

manhat_plot <- ggplot(region_raw_df, aes(x = pos, y = norm_xpnsl)) +
    geom_rect(data=region_df, aes(xmin=start, xmax=end), ymin=-Inf, ymax=Inf, fill="black", alpha=0.3, inherit.aes = F) +
    # annotate(geom = "rect", xmin = min_pos, xmax = max_pos, ymin = -Inf, ymax = Inf, fill = 'black', alpha = 0.3) +
    geom_point(shape = 21, size = 3, fill = 'black', alpha = 0.3) +
    geom_hline(yintercept = 2, color = "grey40", linetype = "dashed") +
    geom_hline(yintercept = -2, color = "grey40", linetype = "dashed") +
    xlab('Chromosome 04_Occ (position in Mbp)') + ylab('Normalized XP-nSL score') +
    # coord_cartesian(xlim = c(xmin, xmax), ylim = c(-2.9, 6.5)) +
    geom_segment(data = fst_wins, aes(x = start, xend = end), y = -3, yend = -3, color = 'blue', linewidth = 1) +
    scale_x_continuous(breaks = seq(min_pos - buffer, max_pos + buffer, 500000), labels = formatter1e6) +
    # scale_y_continuous(breaks = seq(-2, 6, 2)) +
    theme_classic() +
    theme(
        panel.border = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.text = element_text(size=16),
        axis.title = element_text(size=20),
      )
manhat_plot

ggsave(filename = snakemake@output[["chr04_Occ_urb"]], plot = manhat_plot, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")

#### Chr05_Occ

In [None]:
region_df <- top10 %>% filter(direction == 'Urban sel') %>% filter(Chr == 'Chr05_Occ') %>% dplyr::select(Chr, start, end)

chrom <- region_df %>% pull(Chr) %>% unique()
min_pos <- region_df %>% pull(start) %>% min()
max_pos <- region_df %>% pull(end) %>% max()
buffer <- 50000

region_raw_df <- xpnsl_raw %>%
    filter(Chr == chrom & pos >= min_pos - buffer & pos <= max_pos + buffer)

formatter1e6 <- function(x){ 
    x <- x / 1e6
    return(sprintf(x, fmt = '%#.2f'))
}

fst_wins <- bt.intersect(fst_outliers, region_df, wa = T) %>% 
    rename('Chr' = 'V1', 'start' = 'V2', 'end' = 'V3')

manhat_plot <- ggplot(region_raw_df, aes(x = pos, y = norm_xpnsl)) +
    geom_rect(data=region_df, aes(xmin=start, xmax=end), ymin=-Inf, ymax=Inf, fill="black", alpha=0.3, inherit.aes = F) +
    # annotate(geom = "rect", xmin = min_pos, xmax = max_pos, ymin = -Inf, ymax = Inf, fill = 'black', alpha = 0.3) +
    geom_point(shape = 21, size = 4, fill = 'black', alpha = 0.3) +
    geom_point(data = max_xpnsl, shape = 23, size = 5, fill = 'yellow', alpha = 1) +
    geom_hline(yintercept = 2, color = "grey40", linetype = "dashed") +
    geom_hline(yintercept = -2, color = "grey40", linetype = "dashed") +
    xlab('Chromosome 05_Occ (position in Mbp)') + ylab('Normalized XP-nSL score') +
    coord_cartesian(ylim = c(-2.9, 6.5)) +
    scale_y_continuous(breaks = seq(-2, 6, 2)) +
    geom_segment(data = fst_wins, aes(x = start, xend = end), y = -2.5, yend = -2.5, color = 'blue', linewidth = 1) +
    scale_x_continuous(breaks = seq(min_pos - buffer, max_pos + buffer, 50000), labels = formatter1e6) +
    theme_classic() +
    theme(
        panel.border = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.text = element_text(size=16),
        axis.title = element_text(size=20),
      )
manhat_plot

ggsave(filename = snakemake@output[["chr05_Occ_urb"]], plot = manhat_plot, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")

### Positive selection in rural habitats

In [None]:
top10 %>% filter(direction == 'Rural sel') %>% dplyr::select(Chr, start, end, win_size)

In [None]:
min_xpnsl <- xpnsl_raw %>% filter(norm_xpnsl == min(norm_xpnsl))
min_xpnsl

#### Chr04_Occ

In [None]:
region_df <- top10 %>% filter(direction == 'Rural sel') %>% filter(Chr == 'Chr04_Occ') %>% dplyr::select(Chr, start, end)

chrom <- region_df %>% pull(Chr) %>% unique()
min_pos <- region_df %>% pull(start) %>% min()
max_pos <- region_df %>% pull(end) %>% max()
buffer <- 150000

region_raw_df <- xpnsl_raw %>%
    filter(Chr == chrom & pos >= min_pos - buffer & pos <= max_pos + buffer)

formatter1e6 <- function(x){ 
    x <- x / 1e6
    return(sprintf(x, fmt = '%#.2f'))
}

fst_wins <- bt.intersect(fst_outliers, region_df, wa = T) %>% 
    rename('Chr' = 'V1', 'start' = 'V2', 'end' = 'V3')

manhat_plot <- ggplot(region_raw_df, aes(x = pos, y = norm_xpnsl)) +
    geom_rect(data=region_df, aes(xmin=start, xmax=end), ymin=-Inf, ymax=Inf, fill="black", alpha=0.3, inherit.aes = F) +
    # annotate(geom = "rect", xmin = min_pos, xmax = max_pos, ymin = -Inf, ymax = Inf, fill = 'black', alpha = 0.3) +
    geom_point(shape = 21, size = 4, fill = 'black', alpha = 0.3) +
    geom_hline(yintercept = 2, color = "grey40", linetype = "dashed") +
    geom_hline(yintercept = -2, color = "grey40", linetype = "dashed") +
    xlab('Chromosome 04_Occ (position in Mbp)') + ylab('Normalized XP-nSL score') +
    geom_segment(data = fst_wins, aes(x = start, xend = end), y = -6, yend = -6, color = 'blue', linewidth = 1) +
    coord_cartesian(ylim = c(-6.5, 2.1)) +
    scale_y_continuous(breaks = seq(-6, 2, 2)) +
    scale_x_continuous(breaks = seq(min_pos - buffer, max_pos + buffer, 50000), labels = formatter1e6) +
    theme_classic() +
    theme(
        panel.border = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.text = element_text(size=16),
        axis.title = element_text(size=20),
      )
manhat_plot

ggsave(filename = snakemake@output[["chr04_Occ_rur"]], plot = manhat_plot, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")

#### Chr05_Pall

In [None]:
region_df <- top10 %>% filter(direction == 'Rural sel') %>% filter(Chr == 'Chr05_Pall') %>% dplyr::select(Chr, start, end)

chrom <- region_df %>% pull(Chr) %>% unique()
min_pos <- region_df %>% pull(start) %>% min()
max_pos <- region_df %>% pull(end) %>% max()
buffer <- 50000

region_raw_df <- xpnsl_raw %>%
    filter(Chr == chrom & pos >= min_pos - buffer & pos <= max_pos + buffer)

formatter1e6 <- function(x){ 
    x <- x / 1e6
    return(sprintf(x, fmt = '%#.2f'))
}

fst_wins <- bt.intersect(fst_outliers, region_df, wa = T) %>% 
    rename('Chr' = 'V1', 'start' = 'V2', 'end' = 'V3')

manhat_plot <- ggplot(region_raw_df, aes(x = pos, y = norm_xpnsl)) +
    geom_rect(data=region_df, aes(xmin=start, xmax=end), ymin=-Inf, ymax=Inf, fill="black", alpha=0.3, inherit.aes = F) +
    # annotate(geom = "rect", xmin = min_pos, xmax = max_pos, ymin = -Inf, ymax = Inf, fill = 'black', alpha = 0.3) +
    geom_point(shape = 21, size = 4, fill = 'black', alpha = 0.3) +
    geom_hline(yintercept = 2, color = "grey40", linetype = "dashed") +
    geom_hline(yintercept = -2, color = "grey40", linetype = "dashed") +
    xlab('Chromosome 05_Pall (position in Mbp)') + ylab('Normalized XP-nSL score') +
    geom_segment(data = fst_wins, aes(x = start, xend = end), y = -6, yend = -6, color = 'blue', linewidth = 1) +
    coord_cartesian(ylim = c(-6.5, 2.1)) +
    scale_y_continuous(breaks = seq(-6, 2, 2)) +
    scale_x_continuous(breaks = seq(min_pos - buffer, max_pos + buffer, 50000), labels = formatter1e6) +
    theme_classic() +
    theme(
        panel.border = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.text = element_text(size=16),
        axis.title = element_text(size=20),
      )
manhat_plot

ggsave(filename = snakemake@output[["chr05_Pall_rur"]], plot = manhat_plot, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")

In [None]:
region_raw_df %>% filter(Chr == chrom & pos >= min_pos & pos <= max_pos) %>% 
    filter(norm_xpnsl == min(norm_xpnsl))

#### Chr07_Occ

In [None]:
region_df <- top10 %>% filter(direction == 'Rural sel') %>% filter(Chr == 'Chr07_Occ') %>% dplyr::select(Chr, start, end)

chrom <- region_df %>% pull(Chr) %>% unique()
min_pos <- region_df %>% pull(start) %>% min()
max_pos <- region_df %>% pull(end) %>% max()
buffer <- 100000

region_raw_df <- xpnsl_raw %>%
    filter(Chr == chrom & pos >= min_pos - buffer & pos <= max_pos + buffer)

formatter1e6 <- function(x){ 
    x <- x / 1e6
    return(sprintf(x, fmt = '%#.2f'))
}

fst_wins <- bt.intersect(fst_outliers, region_df, wa = T) %>% 
    rename('Chr' = 'V1', 'start' = 'V2', 'end' = 'V3')

manhat_plot <- ggplot(region_raw_df, aes(x = pos, y = norm_xpnsl)) +
    geom_rect(data=region_df, aes(xmin=start, xmax=end), ymin=-Inf, ymax=Inf, fill="black", alpha=0.3, inherit.aes = F) +
    # annotate(geom = "rect", xmin = min_pos, xmax = max_pos, ymin = -Inf, ymax = Inf, fill = 'black', alpha = 0.3) +
    geom_point(shape = 21, size = 4, fill = 'black', alpha = 0.3) +
    geom_hline(yintercept = 2, color = "grey40", linetype = "dashed") +
    geom_hline(yintercept = -2, color = "grey40", linetype = "dashed") +
    xlab('Chromosome 07_Occ (position in Mbp)') + ylab('Normalized XP-nSL score') +
    geom_segment(data = fst_wins, aes(x = start, xend = end), y = -6, yend = -6, color = 'blue', linewidth = 1) +
    coord_cartesian(ylim = c(-6.5, 2.1)) +
    scale_y_continuous(breaks = seq(-6, 2, 2)) +
    scale_x_continuous(breaks = seq(min_pos - buffer, max_pos + buffer, 50000), labels = formatter1e6) +
    theme_classic() +
    theme(
        panel.border = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.text = element_text(size=16),
        axis.title = element_text(size=20),
      )
manhat_plot

ggsave(filename = snakemake@output[["chr07_Occ_rur"]], plot = manhat_plot, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")

#### Chr08_Pall (First)

In [None]:
region_df <- top10 %>% 
    filter(direction == 'Rural sel') %>% 
    filter(Chr == 'Chr08_Pall') %>% 
    filter(win_size == 150000) %>% 
    dplyr::select(Chr, start, end)

chrom <- region_df %>% pull(Chr) %>% unique()
min_pos <- region_df %>% pull(start) %>% min()
max_pos <- region_df %>% pull(end) %>% max()
buffer <- 300000

region_raw_df <- xpnsl_raw %>%
    filter(Chr == chrom & pos >= min_pos - buffer & pos <= max_pos + buffer)

formatter1e6 <- function(x){ 
    x <- x / 1e6
    return(sprintf(x, fmt = '%#.2f'))
}

fst_wins <- bt.intersect(fst_outliers, region_df, wa = T) %>% 
    rename('Chr' = 'V1', 'start' = 'V2', 'end' = 'V3')

manhat_plot <- ggplot(region_raw_df, aes(x = pos, y = norm_xpnsl)) +
    geom_rect(data=region_df, aes(xmin=start, xmax=end), ymin=-Inf, ymax=Inf, fill="black", alpha=0.3, inherit.aes = F) +
    # annotate(geom = "rect", xmin = min_pos, xmax = max_pos, ymin = -Inf, ymax = Inf, fill = 'black', alpha = 0.3) +
    geom_point(shape = 21, size = 4, fill = 'black', alpha = 0.3) +
    geom_point(data = min_xpnsl, shape = 23, size = 5, fill = 'yellow', alpha = 1) +
    geom_hline(yintercept = 2, color = "grey40", linetype = "dashed") +
    geom_hline(yintercept = -2, color = "grey40", linetype = "dashed") +
    xlab('Chromosome 08_Pall (position in Mbp)') + ylab('Normalized XP-nSL score') +
    geom_segment(data = fst_wins, aes(x = start, xend = end), y = -9, yend = -9, color = 'blue', linewidth = 1) +
    coord_cartesian(ylim = c(-9, 3)) +
    scale_y_continuous(breaks = seq(-8, 2, 2)) +
    scale_x_continuous(breaks = seq(min_pos - buffer, max_pos + buffer, 100000), labels = formatter1e6) +
    theme_classic() +
    theme(
        panel.border = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.text = element_text(size=16),
        axis.title = element_text(size=20),
      )
manhat_plot

ggsave(filename = snakemake@output[["chr08_Pall_rur1"]], plot = manhat_plot, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")

#### Chr08_Pall (Second)

In [None]:
region_df <- top10 %>% 
    filter(direction == 'Rural sel') %>% 
    filter(Chr == 'Chr08_Pall') %>% 
    filter(win_size == 50000) %>% 
    dplyr::select(Chr, start, end)

chrom <- region_df %>% pull(Chr) %>% unique()
min_pos <- region_df %>% pull(start) %>% min()
max_pos <- region_df %>% pull(end) %>% max()
buffer <- 100000

region_raw_df <- xpnsl_raw %>%
    filter(Chr == chrom & pos >= min_pos - buffer & pos <= max_pos + buffer)

formatter1e6 <- function(x){ 
    x <- x / 1e6
    return(sprintf(x, fmt = '%#.2f'))
}


manhat_plot <- ggplot(region_raw_df, aes(x = pos, y = norm_xpnsl)) +
    geom_rect(data=region_df, aes(xmin=start, xmax=end), ymin=-Inf, ymax=Inf, fill="black", alpha=0.3, inherit.aes = F) +
    # annotate(geom = "rect", xmin = min_pos, xmax = max_pos, ymin = -Inf, ymax = Inf, fill = 'black', alpha = 0.3) +
    geom_point(shape = 21, size = 4, fill = 'black', alpha = 0.3) +
    geom_hline(yintercept = 2, color = "grey40", linetype = "dashed") +
    geom_hline(yintercept = -2, color = "grey40", linetype = "dashed") +
    xlab('Chromosome 08_Pall (position in Mbp)') + ylab('Normalized XP-nSL score') +
    coord_cartesian(ylim = c(-5.5, 3)) +
    scale_y_continuous(breaks = seq(-4, 2, 2)) +
    scale_x_continuous(breaks = seq(min_pos - buffer, max_pos + buffer, 50000), labels = formatter1e6) +
    theme_classic() +
    theme(
        panel.border = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.text = element_text(size=16),
        axis.title = element_text(size=20),
      )
manhat_plot

ggsave(filename = snakemake@output[["chr08_Pall_rur2"]], plot = manhat_plot, 
       height = 8, width = 20, device = "pdf", dpi = 600, units = "in")