In [130]:
library(MASS)
library(tidyverse)
library(car)
library(emmeans)
library(wesanderson)
library(ggrepel)
library(fs)

# Pairwise urban-rural pi and Fst within cities

In this notebook, we'll examine urban-rural differences in diversity (theta_pi) and urban-rural Fst across all 26 cities. 

## Description of the data

- Low coverage individuals have been removed from these analyses, so we're using the samples that are part of the *finalSamples_lowCovRemoved* sample set from the previous analyses. 
- All analyses were performed using genome-wide 4-fold degenerate site.

The basic workflow is as follows:

1. Generate the Site Allele Frequency (SAF) likelihood distribution for each habitat within each city. 

    - Used the same filters as previously, the most important of which is that sites are only included if 50% of individuals have data. Remember, this is 50% of individuals _within_ a habitat; this would correspond to 5 individuals if none have been removed due to low coverage. 

2. To estimate diversity, generate the folded, one-dimensional SFS from the SAF file in step 1 and estimate diversity separately in urban and rural habitats
3. To estimate Fst, generate the folded, two-dimensional joint SFS of urban-rural habitats and estimate Fst. This uses only sites that are shared between both populations (i.e., the intersection of the two SAF files). 

    - For comparison, I estimated Fst using both Weir and Cockerham (1984) and Hudson (1992).

## Pairwise diversity

### Load diversity data

In [3]:
# Function to load diversity estimates by city and habitat
load_pairwise_diversity <- function(path){
    
    # Get city and site names from file
    city <- dirname(path)
    site <- str_extract(basename(path), pattern = '(?<=_)[r|u]')
    
    full_path <- paste0(inpath, path)
    df <- suppressMessages(read_delim(full_path, delim = '\t')) %>% 
        mutate(tp_scaled = tP / nSites,
               tw_scaled = tW / nSites,
               city = city,
               habitat = site) %>% 
    dplyr::select(city, habitat, tp_scaled, tw_scaled, nSites) %>% 
    group_by(city, habitat) %>% 
    
    # Mean across chromosomes
    summarise(tp_scaled = mean(tp_scaled),
              tw_scaled = mean(tw_scaled),
              total_sites = sum(nSites),
              .groups = 'drop')
    return(df)
    
}

In [4]:
# Merge diversity and habitat diversity dfs into single dataframe
inpath <- '../results/angsd/summary_stats/thetas/by_city/'
div_df <- list.files(inpath, pattern = '*.pestPG', recursive = TRUE) %>% 
    map_dfr(., load_pairwise_diversity) 

In [5]:
head(div_df)

In [6]:
print(mean(div_df$total_sites))
print(range(div_df$total_sites))

In [7]:
div_df %>% filter(total_sites == min(total_sites) | total_sites == max(total_sites))

### Plot differences in diversity

In [8]:
div_df_wide <- div_df %>% 
    # Calculate urban-rural difference in theta
    pivot_wider(names_from = habitat, values_from = c(tp_scaled, tw_scaled, total_sites)) %>% 
    mutate(tp_diff = tp_scaled_u - tp_scaled_r)

In [9]:
# Histogram of urban-rural differences in diversity
div_diff_hist <- div_df_wide %>% 
    ggplot(., aes(x = tp_diff)) +
        geom_histogram(color = 'black', fill = 'white', bins = 13) +
        ylab("Number of cities") + xlab("Urban - rural difference in diversity") +
        theme_classic()
div_diff_hist

## Fst

### Load Fst dataframe

In [10]:
# Function to load Fst df by city/habitat
load_fst <- function(path){
    
    # Get Fst type and city from filenames
    dir <- str_split(path, pattern = '/')
    fst_type <- dir[[1]][[1]]
    city <- dir[[1]][[2]]    
    
    full_path <- paste0(inpath, path)
    colnames <- c('chrom', 'pos', 'num', 'denom')
    df <- suppressMessages(read_delim(full_path, delim = '\t', col_names = colnames)) %>% 
        
        # Cap numerators at 0 if negative 
        # https://github.com/ANGSD/angsd/issues/309
        # Does not affect overall pattern
        mutate(num = ifelse(num < 0, 0, num)) %>% 
        
        # Estimate weighted Fst as ratio of averages
        # https://github.com/ANGSD/angsd/issues/61
        summarise(num_sum = sum(num),
                  denom_sum = sum(denom),
                  fst = num_sum / denom_sum,
                  nSites = n()) %>% 
        mutate(fst_type = ifelse(fst_type == 'fst0', 'wc', 'hudson'),
               city = city)
    
    return(df)
    
}

In [11]:
# Function to get number of samples buy city/habitat
get_num_samples <- function(path){
    
    city <- dirname(path)
    habitat <- str_extract(basename(path), pattern = '(?<=_)[r|u]')  
    
    full_path <- paste0(inpath, path)
    df <- suppressMessages(read_table(full_path, col_names = FALSE)) 
    nsamples <- nrow(df)
    
    df <- data.frame(city = city, habitat = habitat, n = nsamples)
    return(df)
    
}

In [12]:
# Merged df with sample size info
inpath <- '../results/program_resources/bam_lists/by_city/'
sample_size_df <- list.files(inpath, pattern = '*.list', recursive = TRUE) %>% 
    map_dfr(., get_num_samples) %>% 
    pivot_wider(names_from = habitat, values_from = n)
head(sample_size_df)

In [13]:
# Merge Fst dataframes
inpath <- '../results/angsd/summary_stats/fst/'
fst_df <- list.files(inpath, pattern = '*_readable.fst', recursive = TRUE) %>% 
    map_dfr(., load_fst)

In [14]:
fst_df_withSampleSize <- fst_df %>% 
    left_join(., sample_size_df, by = 'city') %>% 
    mutate(nmax = pmax(r, u),
           nmin = pmin(r, u),
           ndiff = nmax - nmin) %>% 
    rowwise() %>% 
    mutate(nmean = mean(c(r, u)))

In [15]:
head(fst_df_withSampleSize)

In [16]:
# Remove Kyoto since only 1 rural sample
fst_df_withSampleSize_noKyoto <-  fst_df_withSampleSize %>% 
    filter(!(city == 'Kyoto'))

In [17]:
# Mean number of sites fo Hudson's Fst (without Kyoto)
fst_df_withSampleSize_noKyoto %>% filter(fst_type == 'hudson') %>% pull(nSites) %>% mean()

In [18]:
fst_df_withSampleSize_noKyoto %>% 
    filter(fst_type == 'hudson') %>% 
    ungroup() %>% 
    filter(nSites == min(nSites) | nSites == max(nSites))

### WC vs. Hudson's Fst

In [19]:
wc <- fst_df_withSampleSize %>% filter(fst_type == 'wc') %>% pull(fst)
hudson <- fst_df_withSampleSize %>% filter(fst_type == 'hudson') %>% pull(fst)

In [20]:
wc_vs_hudson <- qplot() + 
    geom_point(aes(x = wc, y = hudson), size = 2, alpha = 0.5) +
    geom_abline(slope = 1, intercept = 0) +
    xlab('Weir and Cockerham Fst') + ylab('Hudson Fst') +
    theme_classic()
wc_vs_hudson

In [21]:
outpath <- snakemake@output[[1]]
print(outpath)
ggsave(filename = outpath, plot = wc_vs_hudson, device = 'pdf', width = 8, height = 9, units = 'in', dpi = 300)

### Dependence of Fst on sample size

In [22]:
fst_by_ss <- ggplot(fst_df_withSampleSize, aes(x = nmin, y = fst, color = fst_type)) + 
    geom_point(size = 2, alpha = 0.5) +
    geom_smooth(method = 'loess', se = FALSE) + 
    xlab('Minimum sample size') + ylab('Fst') +
    theme_classic()
fst_by_ss

In [23]:
outpath <- snakemake@output[[2]]
print(outpath)
ggsave(filename = outpath, plot = fst_by_ss, device = 'pdf', width = 8, height = 9, units = 'in', dpi = 300)

#### My take

- WC Fst generally higher than Hudson, as expected based on Bhatia (2013)
- Highest Fst estimates occur for cities where sample sizes are lowest, suggesting these estimates may be biased upward
    - Downsampling Toronto data will help us resolve this
- That being said, Fst estimates are generally low for seemingly reliable estimates. Often Fst < 0.05

## Euclidean distance from PCA

- Estimate Euclidean distance between urban and rural centroids by city

In [24]:
euclidean <- function(x1, y1, x2, y2){
    
    dist <- sqrt((x1 - x2)^2 + (y1 - y2)^2)
    return(dist)
}

In [25]:
# Load data with habitat info
habitat_info <- suppressMessages(
    read_delim(
        '../../sequencing-prep/resources/low1_sampleSheet.txt', 
                           delim = '\t')) %>% 
    dplyr::select(continent, range, city, pop, individual, site, sample)

In [26]:
# Load covariance matrix from PCAngsd
cov_mat <- suppressMessages(
    read_delim(
        '../results/population_structure/pcangsd/highErrorRemoved_4fold_maf0.05_pcangsd.cov', 
                      col_names = FALSE, delim = ' ')) %>% 
      as.matrix()

# Combine continent and habitat data with sample order from ANGSD
samples <- suppressMessages(
    read_table(
        '../results/program_resources/angsd_highErrorRemoved_order.txt', col_names = FALSE) %>% 
  rename('sample' = 'X1')) %>%
  left_join(., habitat_info, by = 'sample')

In [27]:
# Dataframe with eigenvectors
eigenvectors <- eigen(cov_mat)
eigen_df <- eigenvectors$vectors %>% 
    as.data.frame() %>% 
    dplyr::select(V1, V2) %>% 
    rename('PC1' = 'V1',
         'PC2' = 'V2') %>% 
    bind_cols(., samples) %>% 
    mutate(sample_set = 'highErrorRemoved')

In [28]:
euc_dist_df <- eigen_df %>% 
    group_by(city, site) %>% 
    summarise(x = mean(PC1),
              y = mean(PC2)) %>% 
    pivot_wider(names_from = site, values_from = c(x, y)) %>% 
    mutate(distance = euclidean(x_u, y_u, x_r, y_r)) %>% 
    dplyr::select(city, distance)

In [29]:
head(euc_dist_df)

## Fst vs. Euclidean distance

In [30]:
fst <- fst_df %>% filter(fst_type == 'hudson') %>% pull(fst)
dist <- euc_dist_df %>% pull(distance)

In [31]:
fst_by_eucl <- qplot() + 
    geom_point(aes(x = fst, y = dist), size = 2, alpha = 0.5) +
#     geom_abline(slope = 1, intercept = 0) +
    xlab("Hudson's Fst") + ylab('Euclidean distance') +
    theme_classic()
fst_by_eucl

In [32]:
outpath <- snakemake@output[[3]]
print(outpath)
ggsave(filename = outpath, plot = fst_by_eucl, device = 'pdf', width = 8, height = 9, units = 'in', dpi = 300)

In [33]:
# Correlation using all points
cor(fst, dist, method = 'pearson')

In [34]:
fst_highDrop <- fst[fst<0.1]
dist_highDrop <- dist[fst<0.1]

In [35]:
fst_by_eucl_highDrop <- qplot() + 
    geom_point(aes(x = fst_highDrop, y = dist_highDrop), size = 2, alpha = 0.5) +
#     geom_abline(slope = 1, intercept = 0) +
    xlab("Hudson's Fst") + ylab('Euclidean distance') +
    theme_classic()
fst_by_eucl_highDrop

In [36]:
# Correlation when large Fst outliers are removed
cor.test(fst_highDrop, dist_highDrop)

## Models

In [41]:
# Get dataframe with slopes and significance of clines
betaLog <- suppressMessages(read_csv('../../phenotypic-analyses/analysis/supplementary-tables/allCities_logisticReg_coefs.csv')) %>% 
dplyr::select(city, betaLog, pvalLog)
head(betaLog)

In [102]:
df_allStats <- euc_dist_df %>% 
    left_join(., fst_df %>% filter(fst_type == 'hudson'), by = 'city') %>% 
    left_join(., div_df_wide, by = 'city') %>% 
    left_join(., betaLog, by = 'city') %>% 
    mutate(sigLog = ifelse(pvalLog < 0.05, 'Yes', 'No'),
           significance = case_when(sigLog == 'Yes' & betaLog < 0 ~ 'Significantly negative',
                                    sigLog == 'Yes' & betaLog > 0 ~ 'Significantly positive',
                                    TRUE ~ 'Not significant'))

### Does pi differ by habitat or city?

In [44]:
pi_mod <- aov(tp_scaled ~ city + habitat, data = div_df)
summary(pi_mod)

In [45]:
# Least squared means of pi in each habitat
emmeans(pi_mod, specs = 'habitat')

In [46]:
# Standard errors from data instead of model
div_df %>% 
    group_by(habitat) %>% 
    summarise(mean = round(mean(tp_scaled), 4),
              n = n(),
              se = round(sd(tp_scaled) / sqrt(n), 6))

### Pi by habitat and clines (sig vs. ns)

In [51]:
div_df_mod <- div_df %>% 
    left_join(., betaLog, by = 'city') %>% 
    mutate(sig = ifelse(pvalLog < 0.05, 'Yes', 'No'))

In [52]:
# Model to get least squared means
pi_mod_sig <- aov(tp_scaled ~ habitat + sig, data = div_df_mod)
summary(pi_mod_sig)

In [54]:
# Get least quared means
emmeans(pi_mod_sig, specs = 'habitat', by = 'sig')

In [56]:
# Standard errors from data instead of model
div_df_mod %>% 
    group_by(habitat, sig) %>% 
    summarise(mean = round(mean(tp_scaled), 4),
              n = n(),
              se = round(sd(tp_scaled) / sqrt(n), 6))

### Does the strength of clines predict mean diversity?

- Model above suggests diversity is higher in cities with clines?
- Is this a real result?
- Do mean diversity across cities vary with the strength of clines?

In [57]:
div_df_mean <- div_df %>% 
    group_by(city) %>% 
    summarise(tp_scaled = mean(tp_scaled)) %>% 
    left_join(., betaLog, by = 'city')

In [58]:
summary(lm(tp_scaled ~ betaLog, data = div_df_mean))

### Does difference in neutral diversity predict HCN clines?

In [59]:
df_allStats$betaLog

In [60]:
div_mod <- aov(betaLog ~ tp_diff, data = df_allStats)
summary(div_mod)

### Does difference in pi differ between cities with and without clines?

In [64]:
tpDiff_by_sig_mod <- aov(tp_diff ~ sigLog, data = df_allStats)
summary(tpDiff_by_sig_mod)

In [65]:
emmeans(tpDiff_by_sig_mod, specs = 'sigLog')

### Does Fst predict HCN?

In [106]:
# Does Fst predict HCN?
# Rremove Kyoto since Fst likely biased by low sample size (N = 1)
df_allStats_noKyoto <- df_allStats %>% filter(!(city == 'Kyoto'))
fst_mod <- aov(betaLog ~ fst, data = df_allStats_noKyoto)
summary(fst_mod)

### Does Fst differ between cities with and without clines?

In [67]:
df_allStats_noKyoto %>% ungroup() %>% summarise(meanFst = mean(fst), n = n(), se = sd(fst) / sqrt(n))

In [68]:
fst_mod <- aov(fst ~ sigLog, data = df_allStats_noKyoto)
summary(fst_mod)

In [69]:
emmeans(fst_mod, specs = 'sigLog')

In [71]:
# Standard errors from data instead of model
df_allStats_noKyoto %>% 
    group_by(sigLog) %>% 
    summarise(mean = round(mean(fst), 4),
              n = n(),
              se = round(sd(fst) / sqrt(n), 4))

### Does euclidean distance predict HCN?

In [72]:
dist_mod <- aov(betaLog ~ distance, data = df_allStats)
summary(dist_mod)

## Figure 4 for main text

- Figure 4 will illustrate some of the (null) results from the models above

In [73]:
# Theme used for plotting
ng1 <- theme(aspect.ratio=0.7,panel.background = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border=element_blank(),
          axis.line.x = element_line(color="black",size=1),
          axis.line.y = element_line(color="black",size=1),
          axis.ticks=element_line(color="black"),
          axis.text=element_text(color="black",size=15),
          axis.title=element_text(color="black",size=1),
          axis.title.y=element_text(vjust=2,size=17),
          axis.title.x=element_text(vjust=0.1,size=17),
          axis.text.x=element_text(size=15),
          axis.text.y=element_text(size=15),
          strip.text.x = element_text(size = 10, colour = "black",face = "bold"),
          strip.background = element_rect(colour="black"),
          legend.position = "top", legend.direction="vertical",
          legend.text=element_text(size=17), legend.key = element_rect(fill = "white"),
          legend.title = element_text(size=17),legend.key.size = unit(1.0, "cm"))

### Figure 4A

- Histogram showing urban and rural theta pi

In [74]:
# Colors for histograms
pal <- wes_palette('Darjeeling1', 5, type = 'discrete')
urban_col <- pal[4]
rural_col <- pal[2]
cols <- c(urban_col, rural_col)

In [98]:
rural_div_df <- div_df %>% filter(habitat == 'r')
urban_div_df <- div_df %>% filter(habitat == 'u')
alpha = 0.75
pi_by_habitat <- ggplot() +
    geom_histogram(data = rural_div_df, 
                   bins = 26, 
                   aes(x = tp_scaled, fill = habitat), 
                   color = 'black',
                  alpha = alpha) +
    geom_histogram(data = urban_div_df, 
                   bins = 26, 
                   aes(x = tp_scaled, fill = habitat), 
                   color = 'black', 
                   alpha = alpha) +
    scale_fill_manual(values = rev(cols), labels = c('Rural', 'Urban')) +
    geom_vline(xintercept = mean(rural_div_df %>% pull(tp_scaled)), color = rural_col, linetype = 'dashed') +
    geom_vline(xintercept = mean(urban_div_df %>% pull(tp_scaled)), color = urban_col, linetype = 'dashed') +
    ylab('Number of cities') + xlab('Pairwise nucleotide diversity') +
    scale_y_continuous(breaks = seq(from = 0, to = 7, by = 1)) +
    scale_x_continuous(breaks = seq(from = -0.017, to = 0.023, by = 0.001)) +
    coord_cartesian(xlim = c(0.017, 0.023)) +
    ng1
pi_by_habitat

In [99]:
outpath <- snakemake@output[[4]]
print(outpath)
ggsave(filename = outpath, plot = pi_by_habitat, device = 'pdf', width = 8, height = 8, units = 'in', dpi = 600,
       useDingbats = FALSE)

### Figure 4B

- Slope of HCN cline vs. difference in pi

In [100]:
pal <- c("#909090", "#FF0000", "#046C9A")

In [103]:
slope_by_PIdiff <- ggplot(df_allStats, aes(x = tp_diff, y = betaLog)) +
    geom_point(size = 5, aes(fill = significance), shape = 21) +
    geom_smooth(method = 'lm', color = 'black', size = 1.5) +
    scale_fill_manual(values = pal) +
    ylab('Slope of HCN clines (log-odds)') + xlab('Rural-urban difference in pariwise nucleotide diversity') +
    ng1
slope_by_PIdiff

In [104]:
outpath <- snakemake@output[[5]]
print(outpath)
ggsave(filename = outpath, plot = slope_by_PIdiff, device = 'pdf', width = 8, height = 8, units = 'in', dpi = 600,
       useDingbats = FALSE)

### Figure 4C

- Histogram of Fst by significant cline

In [113]:
alpha <- 0.6
fst_by_cline <- ggplot(df_allStats_noKyoto, aes(x = fst)) +
    geom_histogram(data = df_allStats_noKyoto %>% filter(significance == 'Not significant'),
                                                         bins = 25, 
                                                         aes(fill = significance), 
                                                         color = 'black', 
                                                         alpha = alpha) +
    geom_histogram(data = df_allStats_noKyoto %>% filter(significance == 'Significantly negative'),
                                                         bins = 25, 
                                                         aes(fill = significance), 
                                                         color = 'black', 
                                                         alpha = alpha) +
   geom_histogram(data = df_allStats_noKyoto %>% filter(significance == 'Significantly positive'),
                                                         bins = 25, 
                                                         aes(fill = significance), 
                                                         color = 'black', 
                                                         alpha = alpha) +
    scale_fill_manual(values = pal) +
    coord_cartesian(xlim = c(0.01, 0.13)) +
    scale_x_continuous(breaks = seq(from = 0.01, to = 0.13, by = 0.02)) +
    ylab('Number of cities') + xlab('Urban-rural Fst') +
#     scale_y_continuous(breaks = seq(from = 0, to = 7, by = 1)) +
    ng1
fst_by_cline

In [114]:
outpath <- snakemake@output[[6]]
print(outpath)
ggsave(filename = outpath, plot = fst_by_cline, device = 'pdf', width = 8, height = 8, units = 'in', dpi = 600,
       useDingbats = FALSE)

### Figure 4D

- Slope of clines vs. Fst

In [115]:
slope_by_fst <- ggplot(df_allStats_noKyoto, aes(x = fst, y = betaLog)) +
    geom_point(size = 5, aes(fill = significance), shape = 21) +
    geom_smooth(method = 'lm', color = 'black', size = 1.5) +
    scale_fill_manual(values = pal) +
    scale_x_continuous(breaks = seq(from = 0.01, to = 0.13, by = 0.02)) +
    ylab('Slope of HCN clines (log-odds)') + xlab("Hudson's Fst") +
    ng1
slope_by_fst

In [116]:
outpath <- snakemake@output[[7]]
print(outpath)
ggsave(filename = outpath, plot = slope_by_fst, device = 'pdf', width = 8, height = 8, units = 'in', dpi = 600,
       useDingbats = FALSE)

## Figure SX 

- PCA figure with urban/rural centroids

In [117]:
euc_dist_df_forPlot <- eigen_df %>% 
    group_by(city, site) %>% 
    summarise(x = mean(PC1),
              y = mean(PC2))
head(euc_dist_df_forPlot)

In [118]:
pca_centroids <- ggplot(euc_dist_df_forPlot, aes(x = x, y = y, fill = site, shape = site)) +
    geom_line(size = 0.35, alpha = 0.4, aes(group = city)) +
    geom_point(size = 3.5) +
    scale_fill_manual(values = rev(cols), labels = c('Rural', 'Urban')) +
    scale_shape_manual(values = c(21, 24), labels = c('Rural', 'Urban')) +
    scale_x_continuous(breaks = seq(from = -0.08, to = 0.06, by = 0.02)) +
    scale_y_continuous(breaks = seq(from = -0.05, to = 0.15, by = 0.03)) +
    ylab('PC2 (7%)') + xlab("PC1 (18%)") + 
    geom_label_repel(data = euc_dist_df_forPlot %>% filter(site == 'r'),
                     aes(label = city),
                        color = 'black',
                        fill = 'white', 
                        size = 3,
                        arrow = arrow(length = unit(0.03, "npc"), 
                        type = "closed", ends = "last"),
                        nudge_y = 0.1,
                        segment.size  = 0.3) +
    ng1
pca_centroids

In [119]:
outpath <- snakemake@output[[8]]
print(outpath)
ggsave(filename = outpath, plot = pca_centroids, device = 'pdf', width = 8, height = 8, units = 'in', dpi = 600,
       useDingbats = FALSE)

## Single-city SFS

- Plot 4fold SFS for single cities, out of curiosity

In [176]:
load_wide_sfs <- function(path){
  
    # Get name of folder with parameter combinations
    dir <- str_split(path_dir(path), '/', simplify = TRUE)
    name <- basename(path)
    site <- str_split(name, simplify = TRUE, pattern = '_')[1,2]
    city <- dir[1]

    # Read in SFS
    full_path <- paste0(inpath, '/', path)
    sfs <- suppressMessages(read_delim(full_path, delim= ' ', col_names = FALSE)) %>% 
    t() %>% 
    as.data.frame() %>% 
    rename('num_sites' = 'V1') %>% 
    filter(num_sites != 0) %>%  #  folded SFS so samples > # of samples will be 0
    mutate(maf = 1:n() - 1,
          city = city,
          site = site,
          prop_sites = num_sites / sum(num_sites))
    return(sfs)
}

In [177]:
inpath <- '../results/angsd/sfs/by_city/'
sfs_df <- list.files(inpath, pattern = '4fold.sfs', recursive = TRUE) %>% 
    map_dfr(., load_wide_sfs)

In [178]:
sfs_df_subset <- sfs_df %>% 
    filter(city %in% c('Toronto', 'Quito', 'Albuquerque'))

In [180]:
sfs_plot <- sfs_df_subset %>% 
    filter(maf != 0 & maf <= 25)  %>%
    ggplot(., aes(x = maf, y = prop_sites)) + 
    geom_bar(stat ='identity', color = 'black',  width=.70) + 
    facet_grid(city~ site) +
    ylab('Proportion of sites') + xlab('Minor allele frequency') +
    scale_fill_manual(values = cols) +
    scale_x_continuous(breaks = seq(1, 25, 6)) +
    scale_y_continuous(breaks = seq(0, 0.13, 0.02)) + 
    theme_classic() + 
    theme(axis.text = element_text(size = 13),
        axis.title = element_text(size = 15))
sfs_plot