In [None]:
library(tidyverse)
library(topGO)

# Setup

In [None]:
# Load Gene2GO IDs mapfile
gene_id_to_go <- readMappings(snakemake@input[['all_genes']])

In [None]:
# Load selected genes
top_hits <- read_delim(snakemake@input[['top_ten_genes']])
head(top_hits)

# GO enrichment analysis

## Selection in urban habitats

In [None]:
# Urban selection
top_genes_urban <- top_hits %>% 
    filter(direction == 'Urban') %>% 
    pull(gene_id)

In [None]:
gene_names <- names(gene_id_to_go)
gene_list_urban <- factor(as.integer(gene_names %in% top_genes_urban))
names(gene_list_urban) <- gene_names
str(gene_list_urban)

### Biological Process

In [None]:
# Setup GO Data object
go_data_urban_BP <- new("topGOdata", 
                     description = "GO enrichment analysis of all urban-selected genes",
                     ontology = 'BP', 
                     allGenes = gene_list_urban, 
                     annot = annFUN.gene2GO, 
                     nodeSize = 5,
                     gene2GO = gene_id_to_go)
go_data_urban_BP

In [None]:
go_urban_fisher_BP <- runTest(go_data_urban_BP, algorithm = "weight01", statistic = "fisher")
go_urban_fisher_BP

In [None]:
num_sig <- length(go_urban_fisher_BP@score[go_urban_fisher_BP@score < 0.05])
go_urban_tbl_BP <- GenTable(go_data_urban_BP, Pval = go_urban_fisher_BP, topNodes = num_sig) %>% 
    mutate(Selection = 'Urban', Ontology = 'BP')
go_urban_tbl_BP

In [None]:
go_urban_tbl_BP %>% 
    dplyr::select(GO.ID, Pval) %>% 
    pull(GO.ID)

### Molecular Function

In [None]:
# Setup GO Data object
go_data_urban_MF <- new("topGOdata", 
                     description = "GO enrichment analysis of all urban-selected genes",
                     ontology = 'MF', 
                     allGenes = gene_list_urban, 
                     annot = annFUN.gene2GO, 
                     nodeSize = 5,
                     gene2GO = gene_id_to_go)
go_data_urban_MF

In [None]:
go_urban_fisher_MF <- runTest(go_data_urban_MF, algorithm = "weight01", statistic = "fisher")
go_urban_fisher_MF

In [None]:
num_sig <- length(go_urban_fisher_MF@score[go_urban_fisher_MF@score < 0.05])
go_urban_tbl_MF <- GenTable(go_data_urban_MF, Pval = go_urban_fisher_MF, topNodes = num_sig) %>%
    mutate(Selection = 'Urban', Ontology = 'MF')
go_urban_tbl_MF

## Selection in rural habitats

In [None]:
# Rural selection
all_genes_rural <- top_hits %>% 
    filter(direction == 'Rural') %>% 
    pull(gene_id)

In [None]:
gene_list_rural <- factor(as.integer(gene_names %in% all_genes_rural))
names(gene_list_rural) <- gene_names
str(gene_list_rural)

### Biological Process

In [None]:
# Setup GO Data object
go_data_rural_BP <- new("topGOdata", 
                     description = "GO enrichment analysis of all rural-selected genes",
                     ontology = 'BP', 
                     allGenes = gene_list_rural, 
                     annot = annFUN.gene2GO, 
                     nodeSize = 5,
                     gene2GO = gene_id_to_go)
go_data_rural_BP

In [None]:
go_rural_fisher_BP <- runTest(go_data_rural_BP, algorithm = "weight01", statistic = "fisher")
go_rural_fisher_BP

In [None]:
num_sig <- length(go_rural_fisher_BP@score[go_rural_fisher_BP@score < 0.05])
go_rural_tbl_BP <- GenTable(go_data_rural_BP, Pval = go_rural_fisher_BP, topNodes = num_sig) %>% 
    mutate(Selection = 'Rural', Ontology = 'BP')
go_rural_tbl_BP

In [None]:
go_rural_tbl_BP %>% 
    dplyr::select(GO.ID, Pval) %>% 
    pull(GO.ID)

### Molecular Function

In [None]:
# Setup GO Data object
go_data_rural_MF <- new("topGOdata", 
                     description = "GO enrichment analysis of all rural-selected genes",
                     ontology = 'MF', 
                     allGenes = gene_list_rural, 
                     annot = annFUN.gene2GO, 
                     nodeSize = 5,
                     gene2GO = gene_id_to_go)
go_data_rural_MF

In [None]:
go_rural_fisher_MF <- runTest(go_data_rural_MF, algorithm = "weight01", statistic = "fisher")
go_rural_fisher_MF

In [None]:
num_sig <- length(go_rural_fisher_MF@score[go_rural_fisher_MF@score < 0.05])
go_rural_tbl_MF <- GenTable(go_data_rural_MF, Pval = go_rural_fisher_MF, topNodes = num_sig) %>% 
    mutate(Selection = 'Rural', Ontology = 'MF')
go_rural_tbl_MF

# Output

In [None]:
all_go_results <- bind_rows(go_urban_tbl_BP, go_urban_tbl_MF, go_rural_tbl_BP, go_rural_tbl_MF)
all_go_results

In [None]:
write_delim(all_go_results, snakemake@output[['all_go_res']], delim = '\t')