# Pathway analysis of metagenomic data


In [None]:
if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("clusterProfiler")
BiocManager::install("org.Hs.eg.db")

library(clusterProfiler)
library(org.Hs.eg.db)
# keytypes(org.Hs.eg.db)

# Import the metagenomics data
metagenomics <- read.csv(file = '../Data/ecs_relab_split.csv', sep = '\t', header = TRUE)
head (metagenomics)

## KEGG pathway over-representation analysis

In [50]:
# Convert KEGG IDs to Entrez IDs for enrichment
gene <- clusterProfiler::bitr(metagenomics$Gene.Family,fromType = "ENZYME",toType = "ENTREZID",OrgDb = org.Hs.eg.db)

kk <- enrichKEGG(gene         = gene[,2],
                 organism     = 'hsa',
                 pvalueCutoff = 0.05)
head(kk)

'select()' returned 1:many mapping between keys and columns

"73.68% of input gene IDs are fail to map..."


Unnamed: 0_level_0,ID,Description,GeneRatio,BgRatio,pvalue,p.adjust,qvalue,geneID,Count
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<int>
hsa00190,hsa00190,Oxidative phosphorylation,128/1468,134/8142,1.285031e-88,4.1121e-86,1.650251e-86,1537/4519/7381/7384/7385/7386/7388/10975/27089/29796/440567/6389/6390/6391/6392/4535/4536/4537/4538/4539/4540/4541/4694/4695/4696/4697/4698/4700/4701/4702/4704/4705/4706/4707/4708/4709/4710/4711/4712/4713/4714/4715/4716/4717/4718/4719/4720/4722/4723/4724/4725/4726/4728/4729/4731/51079/54539/55967/56901/126328/374291/1327/1329/1337/1339/1340/1345/1346/1347/1349/1350/1351/4512/4513/4514/9167/9377/84701/125965/170712/341947/5464/27068/64077/479/495/496/498/506/509/513/514/515/516/517/518/521/522/523/525/526/527/528/529/533/534/535/537/539/4508/4509/8992/9114/9296/9550/9551/10312/10476/10632/23545/50617/51382/51606/90423/127124/155066/245972/245973,128
hsa05208,hsa05208,Chemical carcinogenesis - reactive oxygen species,164/1468,223/8142,5.894539e-76,9.431263e-74,3.784915e-74,873/8644/10327/1537/4519/7381/7384/7385/7386/7388/10975/27089/29796/440567/847/4025/6647/6648/1109/1645/1646/6389/6390/6391/6392/4535/4536/4537/4538/4539/4540/4541/4694/4695/4696/4697/4698/4700/4701/4702/4704/4705/4706/4707/4708/4709/4710/4711/4712/4713/4714/4715/4716/4717/4718/4719/4720/4722/4723/4724/4725/4726/4728/4729/4731/51079/54539/55967/56901/126328/374291/1327/1329/1337/1339/1340/1345/1346/1347/1349/1350/1351/4512/4513/4514/9167/9377/84701/125965/170712/341947/2938/2939/2940/2941/2944/2946/2947/2948/2949/2952/2953/4257/4258/4259/9446/119391/221357/5290/5291/5293/1956/4233/25/27/5747/6714/5580/207/208/369/673/5170/5894/10000/1432/5594/5595/5599/5600/5601/5602/5603/6300/4217/9020/5604/5605/5609/6416/52/5770/5781/5795/5728/5337/5338/2053/2052/498/506/509/513/514/515/516/517/518/522/539/4508/4509/10476/10105,164
hsa01200,hsa01200,Carbon metabolism,106/1468,115/8142,7.492651e-69,7.992161e-67,3.20738e-67,128/4190/4191/4200/4199/10873/3419/3420/3421/3417/3418/5226/9563/2539/26227/51179/54363/847/2597/4329/5160/5161/5162/4967/55753/51/8310/6389/6390/6391/6392/35/2746/2747/2731/1738/275/6470/6472/7086/8277/84076/6888/1737/1743/38/39/1431/2805/2806/2875/84706/189/29968/5211/5213/5214/3098/3099/3101/80201/26007/2645/132158/5313/5315/5230/5232/5631/5634/221823/25796/26275/2203/8789/283871/5723/226/229/230/2023/2026/2027/1892/2271/48/50/10993/6120/729020/84693/7167/22934/2821/5223/5224/441531/4594/55902/84532/8801/8802/8803/5091/5095/5096,106
hsa05415,hsa05415,Diabetic cardiomyopathy,143/1468,203/8142,4.493304e-62,3.594643e-60,1.442587e-60,2539/1537/4519/7381/7384/7385/7386/7388/10975/27089/29796/440567/4846/2597/5160/5161/5162/6389/6390/6391/6392/4535/4536/4537/4538/4539/4540/4541/4694/4695/4696/4697/4698/4700/4701/4702/4704/4705/4706/4707/4708/4709/4710/4711/4712/4713/4714/4715/4716/4717/4718/4719/4720/4722/4723/4724/4725/4726/4728/4729/4731/51079/54539/55967/56901/126328/374291/2936/1327/1329/1337/1339/1340/1345/1346/1347/1349/1350/1351/4512/4513/4514/9167/9377/84701/125965/170712/341947/2997/2998/2673/9945/5290/5291/5293/3643/5578/5579/5580/5582/5590/815/816/817/818/207/208/10000/1432/5599/5600/5601/5602/5603/6300/7046/7048/5499/5500/5501/5728/5330/5331/5332/23236/498/506/509/513/514/515/516/517/518/522/539/4508/4509/10476/487/488/489/10105,143
hsa00230,hsa00230,Purine metabolism,96/1468,128/8142,1.088605e-45,6.967072e-44,2.7959959999999996e-44,3614/3615/7498/6240/6241/50484/2766/51292/2618/471/5471/4860/353/3251/1716/132/9060/9061/1633/50808/203/204/205/26289/122481/4830/4831/4832/4833/10201/29922/654364/2987/5631/5634/221823/4907/22978/30833/56953/84618/93034/5136/5137/5138/5139/5140/5141/5142/5143/5144/5150/5151/5153/8622/10846/27115/50940/5167/5169/55821/9615/100/270/271/272/58497/11164/53343/56985/84284/318/3704/2272/954/953/956/377841/10606/158/107/108/109/111/112/113/114/115/55811/196883/5236/55276/159/122622/8833/5198,96
hsa01230,hsa01230,Biosynthesis of amino acids,69/1468,75/8142,7.939766e-45,4.234542e-43,1.699389e-43,3419/3420/3421/3417/3418/26227/5053/2597/5832/5831/29920/65263/4548/6470/6472/5009/7086/8277/84076/6888/162417/1431/4143/4144/27430/2805/2806/2875/84706/586/587/29968/5211/5213/5214/5313/5315/5230/5232/5631/5634/221823/5723/95/383/384/226/229/230/2023/2026/2027/875/48/50/10993/435/1491/6120/729020/7167/22934/5223/5224/441531/2752/445/440/5091,69


## KEGG pathway gene set enrichment analysis

In [54]:
geneList <- sort(gene[,2], decreasing = TRUE)

kk2 <- gseKEGG(geneList     = geneList,
               organism     = 'hsa',
               minGSSize    = 120,
               pvalueCutoff = 0.05,
               verbose      = FALSE)
head(kk2)

--> Expected input gene ID: 113612,223,9365,7360,223,2203



ERROR: Error in check_gene_id(geneList, geneSets): --> No gene can be mapped....
