In [1]:
diffgene = read.table('./3.enrichment/DEG_geneid_allcomapre.txt',fill = T,header = T)

In [2]:
alldiffgene = data.frame('KO.CLPvsWT.CLP' = c(diffgene[,1],diffgene[,2]),
                         'KO.NCvsWT.NC' = c(diffgene[,3],diffgene[,4]),
                         'WT.CLPvsWT.NC' = c(diffgene[,5],diffgene[,6]),
                         'KO.CLPvsKO.NC' = c(diffgene[,7],diffgene[,8])
                        
                        )

In [8]:
enrichment<-function(species,outDir,geneList){
  library(clusterProfiler)
  library(org.Hs.eg.db)
  library(ggplot2)
  library(dplyr)
  library(rjson)
  library(jsonlite)
  if(species=="hsa" | species=="mmu"){
  species.org=switch(species,
                     "hsa"=org.Hs.eg.db,
                     "mmu"=org.Mm.eg.db,
  )
  species.kegg=switch(species,
                      "hsa"="/Business/psn_company/sc01/local/KEGG.db/hsa/",
                      "mmu"="/Business/psn_company/sc01/local/KEGG.db/mmu/",
  )
  library(KEGG.db,lib.loc=species.kegg)
  if(species=="hsa"){
    EG2symbol=toTable(org.Hs.egSYMBOL)
  }else if(species=="mmu"){
    EG2symbol=toTable(org.Mm.egSYMBOL)}
  #EG2symbol=toTable(paste0(species.org,"SYMBOL"))
  if("TRUE" %in% unique(geneList%in%EG2symbol$symbol)){
    gene.ncbi=bitr(geneList, fromType = "SYMBOL",
                   toType = "ENTREZID",
                   OrgDb = species.org)
    
    processGO(geneList,species.org,outDir)
    processKEGG(gene.ncbi[,2],species,outDir)
    processREACTOME(gene.ncbi[,2],species,outDir)
  }}else{
    
    json_dic<-read_json("/data/script/ref.json")
    pathwayInfo_KEGG_file <- json_dic[[species]][['kegg_desc']]
    pathwayInfo_GO_file <- json_dic[[species]][['go_desc']]
    
    #make term2gene and term2name mapping, name also called Description
    prepareMapping<-function(pathwayInfo,tag="term2gene",header=T){
      mat = pathwayInfo
      mat=switch(tag,
                 term2gene=mat[,c(2,1)],
                 term2name=mat[,c(2,3)])
      return(mat)
    }
    
    
    pathwayInfo_KEGG = read.delim(pathwayInfo_KEGG_file, header=F, sep="\t",check.names=F,quote="",colClasses = "character")
    term2gene=prepareMapping(pathwayInfo=pathwayInfo_KEGG,tag="term2gene")
    term2name=prepareMapping(pathwayInfo=pathwayInfo_KEGG,tag="term2name")
    kegg_enrichment<-enricher(geneList,
                              TERM2GENE = term2gene, 
                              TERM2NAME = term2name, 
                              pAdjustMethod = "BH",
                              minGSSize=1 ,maxGSSize=100000,
                              qvalueCutoff = 1, pvalueCutoff=1)
    
    if(!is.null(kegg_enrichment)){ #判断是否为空
      res=kegg_enrichment@result
      res$Total=apply(res,1,function(x){getBGnumber(x[4])})
      df=data.frame( PathwayID=res$ID,
                     Pathway=res$Description,
                     List=res$Count,
                     Total=res$Total,
                     Pvalue=res$pvalue,
                     adjustPvalue=res$p.adjust,
                     Gene=res$geneID
      )
      write.table(df,file=paste(outDir,"/","KEGG_enrichment.xls",sep=""),col.names=T,row.names=F,quote=F,sep='\t')
      if(nrow(df) < 20){out_df <- df[seq(1,nrow(df),1),]}else{out_df <- df[seq(1,20,1),]}
      out_df$Pathway=factor(out_df$Pathway,levels=unique(out_df$Pathway))
      out_df$Pathway=substring(out_df$Pathway,1,50)
      out_df<-out_df[!duplicated(out_df[,"Pathway"]),]
      out_df$rich=out_df$List / out_df$Total
      out_df$Number=out_df$List
      q<-qplot(rich,Pathway,data=out_df,colour=Pvalue,size=Number,main="KEGG Enrichment")+
        scale_colour_gradient(low="red",high="green",limits=c(0,1))+
        theme(panel.background=element_rect(fill="white",color="black"),panel.grid.major=element_line(color="grey80",linetype="dotted"))+theme_bw() 
      ggsave(paste(outDir,"KEGG.richfactor.pdf",sep="/"),width = 10, height = 10)
      ggsave(paste(outDir,"KEGG.richfactor.png",sep="/"),width = 10, height = 10)
      
      out_df$Pathway <- factor(out_df$Pathway, levels = out_df$Pathway)
      p<- ggplot(data = out_df) +
        geom_col(aes(x = Pathway, y = -log10(as.numeric(Pvalue)),fill=-log10(as.numeric(Pvalue))),width =0.8) +
        scale_color_brewer(type="seq",palette="Dark2")+
        theme( strip.text.y = element_text(angle = 0),axis.text.x=element_text(size=10,angle=80,hjust=1)) +
        ylab("-log10(P-value)")
      ggsave(paste(outDir,"KEGG_enrichment_pvalue_barplot.pdf",sep="/"), width = 10, height = 10)
      ggsave(paste(outDir,"KEGG_enrichment_pvalue_barplot.png",sep="/"), width = 10, height = 10)
      
    }
    
    pathwayInfo_GO = read.delim(pathwayInfo_GO_file, header=F, sep="\t",check.names=F,quote="",colClasses = "character")
    term2gene=prepareMapping(pathwayInfo=pathwayInfo_GO,tag="term2gene")
    term2name=prepareMapping(pathwayInfo=pathwayInfo_GO,tag="term2name")
    
    go_enrichment<-enricher(geneList,
                            TERM2GENE = term2gene, 
                            TERM2NAME = term2name, 
                            pAdjustMethod = "BH",
                            minGSSize=1 ,maxGSSize=100000,
                            qvalueCutoff = 1, pvalueCutoff=1)
    if(!is.null(go_enrichment)){
      res=go_enrichment@result
      res$Total=apply(res,1,function(x){getBGnumber(x[4])})
      df=data.frame( PathwayID=res$ID,
                     Pathway=res$Description,
                     List=res$Count,
                     Total=res$Total,
                     Pvalue=res$pvalue,
                     adjustPvalue=res$p.adjust,
                     Gene=res$geneID
      )
      write.table(df,file=paste(outDir,"/","GO_enrichment.xls",sep=""),col.names=T,row.names=F,quote=F,sep='\t')
      if(nrow(df) < 20){out_df <- df[seq(1,nrow(df),1),]}else{out_df <- df[seq(1,20,1),]}
      out_df$Pathway=factor(out_df$Pathway,levels=unique(out_df$Pathway))
      out_df$Pathway=substring(out_df$Pathway,1,50)
      out_df<-out_df[!duplicated(out_df[,"Pathway"]),]
      out_df$rich=out_df$List / out_df$Total
      out_df$Number=out_df$List
      q<-qplot(rich,Pathway,data=out_df,colour=Pvalue,size=Number,main="GO Enrichment")+
        scale_colour_gradient(low="red",high="green",limits=c(0,1))+
        theme(panel.background=element_rect(fill="white",color="black"),panel.grid.major=element_line(color="grey80",linetype="dotted"))
      
        ggsave(paste(outDir,"GO.richfactor.pdf",sep="/"),width = 10, height = 10)
      ggsave(paste(outDir,"GO.richfactor.png",sep="/"),width = 10, height = 10)
      
      out_df$Pathway <- factor(out_df$Pathway, levels = out_df$Pathway)
      p<- ggplot(data = out_df) +
        geom_col(aes(x = Pathway, y = -log10(as.numeric(Pvalue)),fill=-log10(as.numeric(Pvalue))),width =0.8) +
        scale_color_brewer(type="seq",palette="Dark2")+
        theme( strip.text.y = element_text(angle = 0),axis.text.x=element_text(size=10,angle=80,hjust=1)) +
        ylab("-log10(P-value)")
      ggsave(paste(outDir,"GO_enrichment_pvalue_barplot.pdf",sep="/"), width = 10, height = 10)
      ggsave(paste(outDir,"GO_enrichment_pvalue_barplot.png",sep="/"), width = 10, height = 10)
    }
  }}


processGO<- function(geneList,species.org,outDir){
  en <- enrichGO(gene       = geneList,
                 OrgDb         = species.org,
                 keyType       = 'SYMBOL',
                 ont           = "ALL",
                 minGSSize     = 1 ,
                 maxGSSize     = 100000 ,
                 pAdjustMethod = "BH",
                 pvalueCutoff  = 1,
                 qvalueCutoff  = 1)
  if(!is.null(en)){
    res=en@result
    res$Total=apply(res,1,function(x){getBGnumber(x[5])})
    df=data.frame( Category=res$ONTOLOGY,
                   GO=res$ID,
                   Term=res$Description,
                   List=res$Count,
                   Total=res$Total,
                   Pvalue=res$pvalue,
                   adjustPvalue=res$p.adjust,
                   Gene=res$geneID
    )
    df<-df[order(df$Pvalue),]
    write.table(df,file=paste(outDir,"/","GO_enrichment.xls",sep=""),col.names=T,row.names=F,quote=F,sep='\t')
    
    if(nrow(df) < 20){out_df <- df[seq(1,nrow(df),1),]}else{out_df <- df[seq(1,20,1),]}
    out_df$Term=factor(out_df$Term,levels=unique(out_df$Term))
    out_df$Term=substring(out_df$Term,1,50)
    out_df<-out_df[!duplicated(out_df[,"Term"]),]
    out_df$rich=out_df$List / out_df$Total
    out_df$Number=out_df$List
    q<-qplot(rich,Term,data=out_df,colour=Pvalue,size=Number,main="GO Enrichment")+
      scale_colour_gradient(low="red",high="green",limits=c(0,1))+
      theme(panel.background=element_rect(
          fill="white",color="black"),
            panel.grid.major=element_line(color="grey80",linetype="dotted"))+theme_bw() 
    ggsave(paste(outDir,"GO.richfactor.pdf",sep="/"),width = 10, height = 10)
    ggsave(paste(outDir,"GO.richfactor.png",sep="/"),width = 10, height = 10)
    
    out_df=out_df[order(out_df$Category),]
    
    out_df$Category <- factor(out_df$Category, levels = unique(out_df$Category))
    out_df$Term <- factor(out_df$Term, levels = out_df$Term)
    p<- ggplot(data = out_df) +
      geom_col(aes(x = Term, y = -log10(as.numeric(Pvalue)), fill = Category),width =0.8) +
      scale_color_brewer(type="seq",palette="Dark2")+
      theme( strip.text.y = element_text(angle = 0),axis.text.x=element_text(size=10,angle=80,hjust=1)) +
      ylab("-log10(P-value)")
    ggsave(paste(outDir,"GO_enrichment_pvalue_barplot.pdf",sep="/"), width = 10, height = 10)
    ggsave(paste(outDir,"GO_enrichment_pvalue_barplot.png",sep="/"), width = 10, height = 10)
    
    library(topGO)
    #ifor(catergory in c("CC","BP",'MF')){
     # ego <- enrichGO(gene       = geneList,
      #                OrgDb         = species.org,
       #               keyType       = 'SYMBOL',
        #              ont           = catergory,
         #             minGSSize     = 1 ,
          #            maxGSSize     = 100000 ,
           #           pAdjustMethod = "BH",
            #          pvalueCutoff  = 1,
             #         qvalueCutoff  = 1)
      #pdf(paste(outDir,"/","GODAG_",catergory,"_top10",".pdf",sep=""))
      #plotGOgraph(ego)
      #dev.off()
      #png(paste(outDir,"/","GODAG_",catergory,"_top10",".png",sep=""))
      #plotGOgraph(ego)
      #dev.off()
      
    #}
  }
}


processKEGG<- function(geneList,species,outDir){
  species.kegg=switch(species,
                      "hsa"="/Business/psn_company/sc01/local/KEGG.db/hsa/",
                      "mmu"="/Business/psn_company/sc01/local/KEGG.db/mmu/",
  )
  library(KEGG.db,lib.loc=species.kegg)
  en <- enrichKEGG(gene     = geneList,
                   organism      = species,
                   pAdjustMethod = "BH",
                   pvalueCutoff  = 1,
                   minGSSize     = 1 ,
                   maxGSSize     = 100000 ,
                   qvalueCutoff  = 1,
                   use_internal_data =T)
  
  kegg_level=read.table("/Business/psn_company/sc01/local/KEGG.db/pathway_level",header=F,sep="\t",stringsAsFactors=F,colClasses="character",row.names=1)
  #if(length(unique(en@result$p.adjust<0.05))>1)
  if(!is.null(en)){
    res=en@result
    res$Total=apply(res,1,function(x){getBGnumber(x[4])})
    df=data.frame( Category=kegg_level[gsub(species,"",res$ID),1] ,
                   PathwayID=res$ID,
                   Pathway=res$Description,
                   List=res$Count,
                   Total=res$Total,
                   Pvalue=res$pvalue,
                   adjustPvalue=res$p.adjust,
                   Gene=res$geneID
    )
    write.table(df,file=paste(outDir,"/","KEGG_enrichment.xls",sep=""),col.names=T,row.names=F,quote=F,sep='\t')
    if(nrow(df) < 20){out_df <- df[seq(1,nrow(df),1),]}else{out_df <- df[seq(1,20,1),]}
    out_df$Pathway=factor(out_df$Pathway,levels=unique(out_df$Pathway))
    out_df$Pathway=substring(out_df$Pathway,1,50)
    out_df<-out_df[!duplicated(out_df[,"Pathway"]),]
    out_df$rich=out_df$List / out_df$Total
    out_df$Number=out_df$List
    q<-qplot(rich,Pathway,data=out_df,colour=Pvalue,size=Number,main="KEGG Enrichment")+
      scale_colour_gradient(low="red",high="green",limits=c(0,1))+
      theme(panel.background=element_rect(
          fill="white",color="black"),
            panel.grid.major=element_line(color="grey80",linetype="dotted"))+theme_bw() 
    ggsave(paste(outDir,"KEGG.richfactor.pdf",sep="/"),width = 10, height = 10)
    ggsave(paste(outDir,"KEGG.richfactor.png",sep="/"),width = 10, height = 10)
    
    out_df=out_df[order(out_df$Category),]
    out_df$Category <- factor(out_df$Category, levels = unique(out_df$Category))
    out_df$Pathway <- factor(out_df$Pathway, levels = out_df$Pathway)
    p<- ggplot(data = out_df) +
      geom_col(aes(x = Pathway, y = -log10(as.numeric(Pvalue)), fill = Category),width =0.8) +
      scale_color_brewer(type="seq",palette="Dark2")+
      theme( strip.text.y = element_text(angle = 0),axis.text.x=element_text(size=10,angle=80,hjust=1)) +
      ylab("-log10(P-value)")
    ggsave(paste(outDir,"KEGG_enrichment_pvalue_barplot.pdf",sep="/"), width = 10, height = 10)
    ggsave(paste(outDir,"KEGG_enrichment_pvalue_barplot.png",sep="/"), width = 10, height = 10)
    
  }}

processREACTOME <- function(geneList,species,outDir){
  library(ReactomePA)
  species.reactome=switch(species,
                          'hsa'='human',
                          'mmu'='mouse',
  )
  en <- enrichPathway(gene       = geneList,
                      organism      = species.reactome,
                      pAdjustMethod = "BH",
                      pvalueCutoff  = 1,
                      minGSSize     = 1 ,
                      maxGSSize     = 100000 ,
                      qvalueCutoff  = 1)
  #   if(length(unique(en@result$p.adjust<0.05))>1){
  if(!is.null(en)){
    res=en@result
    res$Total=apply(res,1,function(x){getBGnumber(x[4])})
    df=data.frame( PathwayID=res$ID,
                   Pathway=res$Description,
                   List=res$Count,
                   Total=res$Total,
                   Pvalue=res$pvalue,
                   adjustPvalue=res$p.adjust,
                   Gene=res$geneID
    )
    write.table(df,file=paste(outDir,"/","Reactome_enrichment.xls",sep=""),col.names=T,row.names=F,quote=F,sep='\t')
    
    #Plot
    if(nrow(df) < 20){out_df <- df[seq(1,nrow(df),1),]}else{out_df <- df[seq(1,20,1),]}
    out_df$Pathway=factor(out_df$Pathway,levels=unique(out_df$Pathway))
    out_df$Pathway=substring(out_df$Pathway,1,50)
    out_df<-out_df[!duplicated(out_df[,"Pathway"]),]
    out_df$rich=out_df$List / out_df$Total
    out_df$Number=out_df$List
    q<-qplot(rich,Pathway,data=out_df,colour=Pvalue,size=Number,main="Reactome Enrichment")+
      scale_colour_gradient(low="red",high="green",limits=c(0,1))+
      theme(panel.background=element_rect(
          fill="white",color="black"),
            panel.grid.major=element_line(color="grey80",linetype="dotted"))+theme_bw() 
    
      ggsave(paste(outDir,"Reactome.richfactor.pdf",sep="/"),width = 10, height = 10)
    ggsave(paste(outDir,"Reactome.richfactor.png",sep="/"),width = 10, height = 10)
    
    
    p<- ggplot(data = out_df) +
      geom_col(aes(y = Pathway, x = -log10(as.numeric(Pvalue))),width =0.8) +
      theme( strip.text.y = element_text(angle = 0),axis.text.x=element_text(size=10,angle=80,hjust=1)) +
      xlab("-log10(P-value)")
    ggsave(paste(outDir,"Reactome_enrichment_pvalue_barplot.pdf",sep="/"), width = 10, height = 10)
    ggsave(paste(outDir,"Reactome_enrichment_pvalue_barplot.png",sep="/"), width = 10, height = 10)
    
  }}

richfactor<-function(en){
  total <- apply(en,1,function(x){getBGnumber(x[4])})
  return(en$Count/total)
}
getBGnumber <- function(ratio,split="/"){
  list<-strsplit(ratio, split = split)[[1]]
  return(as.numeric(list[1]))
}

In [15]:
for (i in colnames(alldiffgene)){
    print(i)
    genelist = alldiffgene[,i]
    outdir = paste0("3-2.enrichment",'/',i)
    if(dir.exists(outdir)){
      print("dir exists")
    }else{
      dir.create(outdir)
    }
    enrichment('minji',outdir,genelist)
}

[1] "KO.CLPvsWT.CLP"


“[1m[22m`qplot()` was deprecated in ggplot2 3.4.0.”


[1] "KO.NCvsWT.NC"
[1] "WT.CLPvsWT.NC"
[1] "KO.CLPvsKO.NC"


In [5]:
class(alldiffgene[,1])

In [6]:
outdir = paste0("3-2.enrichment",'/',1)
    if(dir.exists(outdir)){
      print("dir exists")
    }else{
      dir.create(outdir)
    }

In [9]:
enrichment('minji',outdir,alldiffgene[,1])

“package ‘clusterProfiler’ was built under R version 4.2.2”


Registered S3 methods overwritten by 'treeio':
  method              from    
  MRCA.phylo          tidytree
  MRCA.treedata       tidytree
  Nnode.treedata      tidytree
  Ntip.treedata       tidytree
  ancestor.phylo      tidytree
  ancestor.treedata   tidytree
  child.phylo         tidytree
  child.treedata      tidytree
  full_join.phylo     tidytree
  full_join.treedata  tidytree
  groupClade.phylo    tidytree
  groupClade.treedata tidytree
  groupOTU.phylo      tidytree
  groupOTU.treedata   tidytree
  is.rooted.treedata  tidytree
  nodeid.phylo        tidytree
  nodeid.treedata     tidytree
  nodelab.phylo       tidytree
  nodelab.treedata    tidytree
  offspring.phylo     tidytree
  offspring.treedata  tidytree
  parent.phylo        tidytree
  parent.treedata     tidytree
  root.treedata       tidytree
  rootnode.phylo      tidytree
  sibling.phylo       tidytree

clusterProfiler v4.6.0  For help: https://yulab-smu.t