# Pathway analysis of metagenomic data


In [None]:
if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("clusterProfiler")
BiocManager::install("org.Hs.eg.db")
BiocManager::install("org.EcK12.eg.db")
BiocManager::install("KEGGREST")

library(clusterProfiler)
library(KEGGREST)
library(org.Hs.eg.db)
library(org.EcK12.eg.db)
#keytypes(org.EcK12.eg.db)

# Import the metagenomics data
metagenomics <- read.csv(file = '../Data/ecs_relab_split.csv', sep = '\t', header = TRUE)
head (metagenomics)

## KEGG pathway over-representation analysis

In [None]:
# Convert EC numbers to Entrez IDs
gene <- clusterProfiler::bitr(metagenomics$Gene.Family,fromType = "ENZYME",toType = "ENTREZID",OrgDb = org.EcK12.eg.db)

# Convert Entrez IDs to KEGG IDs
geneList <- sub("^", "ncbi-geneid:", gene[,2])
geneList <- keggConv("eco", geneList)

In [None]:
head(geneList)
geneList[2]

kk <- enrichKEGG(gene         = geneList[,2],
                 organism     = 'eco',
                 pvalueCutoff = 0.05)
head(kk)

## KEGG pathway gene set enrichment analysis

In [None]:
geneList <- sort(gene[,2], decreasing = TRUE)

kk2 <- gseKEGG(geneList     = geneList,
               organism     = 'ece',
               minGSSize    = 120,
               pvalueCutoff = 0.05,
               verbose      = FALSE)
head(kk2)