# Gene term enrichment analysis

In [1]:
suppressMessages({
    library(clusterProfiler)
    library(org.Hs.eg.db)
    library(enrichplot)
    library(pathview)})

In [2]:
extract_geneList <- function(dft, keytype='EntrezID'){
    dft = na.exclude(dft)
    gg = as.vector(dft[, 'logFC'])
    names(gg) = as.vector(dft[, keytype])
    gg = sort(gg, decreasing=TRUE)
    return(gg)
}


extract_geneSet <- function(dft, keytype='EntrezID'){
    dft = na.exclude(dft)
    gg = subset(dft, adj.P.Val <= 0.05)
    gene = as.vector(gg[, keytype])
    return(gene)
}


In [3]:
run_pathview <- function(geneList, pathwayID){
    pathview(gene.data = geneList, pathway.id = pathwayID,
             species = 'hsa', limit = list(gene=0.5, cpd=0.5))
}

## Genes

In [4]:
inputfile = "../../_m/genes/diffExpr_maleVfemale_full.txt"

zz = read.delim(inputfile)
geneList1 = extract_geneList(zz)

In [5]:
aa_uniprot <- merge(bitr(names(geneList1), fromType="ENTREZID",
                         toType=c("UNIPROT", "ENSEMBL"), OrgDb="org.Hs.eg.db"),
                    zz, by.x='ENSEMBL', by.y='ensemblID')
aa_kegg <- merge(bitr_kegg(aa_uniprot[['UNIPROT']], fromType='uniprot',
                           toType='kegg', organism='hsa'), 
                 aa_uniprot, by.x='uniprot', by.y='UNIPROT')

'select()' returned 1:many mapping between keys and columns

“10.52% of input gene IDs are fail to map...”
Reading KEGG annotation online:


“24.38% of input gene IDs are fail to map...”


In [6]:
geneList2 = extract_geneList(aa_kegg, 'kegg')
gene2 = extract_geneSet(aa_kegg, 'kegg')

### Pathview plotting

In [7]:
run_pathview(geneList2, "hsa04020")

Info: Downloading xml files for hsa04020, 1/1 pathways..

Info: Downloading png files for hsa04020, 1/1 pathways..

'select()' returned 1:1 mapping between keys and columns

Info: Working in directory /ceph/users/jbenja13/github_projects/sex_differences_sz/differential_expression/caudate/dopamine_system/_m

Info: Writing image file hsa04020.pathview.png



In [8]:
#run_pathview(geneList2, "hsa04724")
#run_pathview(geneList2, "hsa04725")
#run_pathview(geneList2, "hsa04726")
#run_pathview(geneList2, "hsa04727")
run_pathview(geneList2, "hsa04728")

Info: Downloading xml files for hsa04728, 1/1 pathways..

Info: Downloading png files for hsa04728, 1/1 pathways..

'select()' returned 1:1 mapping between keys and columns

Info: Working in directory /ceph/users/jbenja13/github_projects/sex_differences_sz/differential_expression/caudate/dopamine_system/_m

Info: Writing image file hsa04728.pathview.png



In [9]:
#run_pathview(geneList2, "hsa04010")
run_pathview(geneList2, "hsa04713")
#run_pathview(geneList2, "hsa04927")
#run_pathview(geneList2, "hsa04710")

Info: Downloading xml files for hsa04713, 1/1 pathways..

Info: Downloading png files for hsa04713, 1/1 pathways..

'select()' returned 1:1 mapping between keys and columns

Info: Working in directory /ceph/users/jbenja13/github_projects/sex_differences_sz/differential_expression/caudate/dopamine_system/_m

Info: Writing image file hsa04713.pathview.png



## Session Info

In [10]:
Sys.time()
proc.time()
options(width=120)
sessioninfo::session_info()

[1] "2021-07-10 10:27:29 EDT"

   user  system elapsed 
 18.891   1.688  27.501 

─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
 setting  value                       
 version  R version 4.0.3 (2020-10-10)
 os       Arch Linux                  
 system   x86_64, linux-gnu           
 ui       X11                         
 language (EN)                        
 collate  en_US.UTF-8                 
 ctype    en_US.UTF-8                 
 tz       America/New_York            
 date     2021-07-10                  

─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
 package         * version  date       lib source        
 AnnotationDbi   * 1.52.0   2020-10-27 [1] Bioconductor  
 assertthat        0.2.1    2019-03-21 [1] CRAN (R 4.0.2)
 base64enc         0.1-3    2015-07-28 [1] CRAN (R 4.0.2)
 Biobase         * 2.50.0   2020-10-27 [1] Bioconductor  
 BiocGenerics    * 0.36.1   2021-04-16 [1] Bioconductor  
 BiocManager       1.30