add new file

GangLiLab · Sep 6, 2021 · 804b66b · 804b66b
1 parent 1a8e245
commit 804b66b
Show file tree

Hide file tree

Showing 16 changed files with 137 additions and 84 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: genekitr
 Type: Package
 Title: Gene Analysis Toolkit in R
-Version: 0.1.5
+Version: 0.2.0
 Authors@R: c(person("Yunze", "Liu", email = "jieandze1314@gmail.com", role = c("aut", "cre"), 
             comment = c(ORCID = "0000-0002-7414-8556")))
 Maintainer: Yunze Liu <jieandze1314@gmail.com>

diff --git a/R/data.R b/R/data.R
@@ -1,3 +1,13 @@
+#' Datasets
+#' geneList entrez gene list with decreasing fold change value
+#'
+#'
+#' @name Datasets
+#' @aliases geneList
+#' @docType data
+#' @keywords datasets
+NULL
+
 #' Datasets
 #' msig_species contains msigdb species information
 #'

diff --git a/R/genGO.R b/R/genGO.R
@@ -14,19 +14,21 @@
 #' @param universe Background genes. If missing, then all gene list in
 #'   orgdb will be used as background.
 #' @param ... other argument to `enrichGO` function
-#' @importFrom dplyr  %>% mutate
-#' @importFrom clusterProfiler enrichGO
+#' @importFrom dplyr select filter pull mutate %>%
 #' @importFrom stringr str_split
+#' @importFrom clusterProfiler enrichGO
 #'
 #' @return A `data.frame` contains gene ratio and fold enrichment.
 #' @export
 #'
 #' @examples
-#' data(geneList, package="DOSE")
+#' \donttest{
+#' data(geneList, package="genekitr")
 #' id <- names(geneList)[1:100]
 #' ego <- genGO(id, org = 'human',ont = 'mf',pvalueCutoff = 0.05,
 #'   qvalueCutoff = 0.1 ,use_symbol = TRUE)
 #' head(ego)
+#' }
 genGO <- function(id,
                   org,
                   ont,
@@ -40,11 +42,9 @@ genGO <- function(id,
                   ...){
 
   #--- args ---#
-  options(rstudio.connectionObserver.errorsSuppressed = TRUE)
   stopifnot(is.character(id))
   if (missing(universe)) universe <- NULL
 
-  org_bk = org
   org = mapBiocOrg(tolower(org))
   pkg=paste0("org.", org, ".eg.db")
   keyType = .gentype(id, org)
@@ -65,10 +65,10 @@ genGO <- function(id,
   }
 
   if( use_symbol){
-    info = genInfo(id,org)
+    info = genInfo(id,org,unique = T) %>% na.omit()
     new_geneID = stringr::str_split(ego$geneID,'\\/') %>%
       lapply(., function(x) {
-        info[x,'symbol']
+        info %>% dplyr::filter(input_id %in% x) %>% dplyr::pull(symbol)
       }) %>% sapply(., paste0, collapse = "/")
     new_ego =  ego %>% as.data.frame() %>%
       dplyr::mutate(geneID = new_geneID) %>% calcFoldEnrich()

diff --git a/R/genGSEA.R b/R/genGSEA.R
@@ -7,34 +7,34 @@
 #'   'C4','C5','C6','C7','C8','H'.
 #' @param subcategory MSigDB sub-collection abbreviation, choose from
 #'   `msig_category`.
-#' @param use_symbol Logical to set result gene id as gene symbol, default is TRUE.
 #' @param minGSSize Minimal size of each geneSet for analyzing, default is 10.
 #' @param maxGSSize Maximal size of each geneSet for analyzing, default is 500.
 #' @param pvalueCutoff Adjusted pvalue cutoff, default is 0.05.
 #' @param ... Other argument to `GSEA` function
-#' @importFrom dplyr select
+#' @importFrom dplyr select filter pull mutate %>%
+#' @importFrom stringr str_split
 #' @importFrom clusterProfiler GSEA
 #'
 #' @return A `data.frame`.
 #' @export
 #'
 #' @examples
-#' data(geneList, package="DOSE")
+#' \donttest{
+#' data(geneList, package="genekitr")
 #' genGSEA(genelist = geneList,org = 'human', category='C3',
-#'   subcategory = 'TFT:GTRD',use_symbol = FALSE)
+#'   subcategory = 'TFT:GTRD')
+#' }
 
 genGSEA <- function(genelist,
                     org,
                     category = c('C1','C2','C3','C4','C5','C6','C7','C8','H'),
                     subcategory = NULL,
-                    use_symbol = TRUE,
                     minGSSize = 10,
                     maxGSSize = 500,
                     pvalueCutoff = 0.05,
                     ...){
 
   #--- args ---#
-  options(rstudio.connectionObserver.errorsSuppressed = TRUE)
   category = match.arg(category)
 
   stopifnot(
@@ -46,8 +46,7 @@ genGSEA <- function(genelist,
   if (is.unsorted(rev(genelist))) stop("genelist should be a decreasing sorted vector...")
 
   #--- codes ---#
-  org.bk = org
-  geneset <- getMsigdb(org.bk, category, subcategory)
+  geneset <- getMsigdb(org, category, subcategory)
 
   # use entrez id or symbol
   if (any(names(genelist) %in% geneset$gene_symbol)) {
@@ -66,19 +65,20 @@ genGSEA <- function(genelist,
                                                  pvalueCutoff, verbose=F,
                                                  ...))
 
-  if( use_symbol){
-    info = genInfo(names(genelist),org)
-    new_geneID = stringr::str_split(egmt$core_enrichment,'\\/') %>%
-      lapply(., function(x) {
-        info[x,'symbol']
-      }) %>% sapply(., paste0, collapse = "/")
-    new_egmt =  egmt %>% as.data.frame() %>%
-      dplyr::mutate(core_enrichment = new_geneID)
-
-  }else{
-    new_egmt = egmt %>% as.data.frame()
-  }
-
+  # if( use_symbol){
+  #   info = genInfo(names(genelist),org,unique = T) %>% na.omit()
+  #   new_geneID = stringr::str_split(egmt$geneID,'\\/') %>%
+  #     lapply(., function(x) {
+  #       info %>% dplyr::filter(input_id %in% x) %>% dplyr::pull(symbol)
+  #     }) %>% sapply(., paste0, collapse = "/")
+  #   new_egmt =  egmt %>% as.data.frame() %>%
+  #     dplyr::mutate(core_enrichment = new_geneID)
+  #
+  # }else{
+  #   new_egmt = egmt %>% as.data.frame()
+  # }
+  #
+  new_egmt = egmt %>% as.data.frame()
   return(new_egmt)
 
 }
diff --git a/R/genInfo.R b/R/genInfo.R
@@ -11,18 +11,18 @@
 #' @export
 #'
 #' @examples
+#' \donttest{
 #' # input id contains fake id and one-to-many match id
 #' x <- genInfo(id = c(
 #'   "MCM10", "CDC20", "S100A9", "MMP1", "BCC7",
 #'   "FAKEID", "TP53", "HBD", "NUDT10"
 #' ), org = "hg", unique = FALSE)
 #' head(x)
+#' }
 genInfo <- function(id,
                     org,
                     unique = FALSE) {
   #--- args ---#
-  options(rstudio.connectionObserver.errorsSuppressed = TRUE)
-
   org <- mapBiocOrg(tolower(org))
   keytype <- .gentype(id, org) %>% tolower()
 

diff --git a/R/genKEGG.R b/R/genKEGG.R
@@ -23,10 +23,12 @@
 #' @export
 #'
 #' @examples
-#' data(geneList, package="DOSE")
+#' \donttest{
+#' data(geneList, package="genekitr")
 #' id <- names(geneList)[1:100]
 #' keg <- genKEGG(id, org = 'human')
 #' head(keg)
+#' }
 
 genKEGG <- function(id,
                     org,
@@ -40,22 +42,20 @@ genKEGG <- function(id,
                     ...){
 
   #--- args ---#
-  options(rstudio.connectionObserver.errorsSuppressed = TRUE)
   stopifnot(is.character(id))
   if (missing(universe)) universe <- NULL
 
-  org.bk = org
   org = mapKeggOrg(tolower(org))
   keyType = .gentype(id, org)
 
   if(! keyType %in% c('ENTREZID') ) {
     message(paste0(keyType), ' gene will be mapped to entrez id')
-    trans_id = suppressMessages(transId(id,'entrezid',org.bk)) %>% stringi::stri_remove_na()
+    trans_id = suppressMessages(transId(id,'entrezid',org)) %>% stringi::stri_remove_na()
   }else{
     trans_id = id
   }
 
-  info = genInfo(trans_id,org) %>% dplyr::mutate(entrezid := rownames(.))
+  info = genInfo(trans_id,org,unique = T) %>% na.omit()
 
   #--- codes ---#
   keg <- suppressMessages(
@@ -71,19 +71,13 @@ genKEGG <- function(id,
   if( use_symbol ){
     new_geneID = stringr::str_split(keg$geneID,'\\/') %>%
       lapply(., function(x) {
-        info[x,'symbol']
+        info %>% dplyr::filter(input_id %in% x) %>% dplyr::pull(symbol)
       }) %>% sapply(., paste0, collapse = "/")
     new_keg =  keg %>% as.data.frame() %>%
       dplyr::mutate(geneID = new_geneID) %>% calcFoldEnrich()
 
   }else{
-    new_geneID = stringr::str_split(keg$geneID,'\\/') %>%
-      lapply(., function(x) {
-        info %>%
-          dplyr::filter(entrezid %in% x) %>% dplyr::pull(tolower(keyType))
-      }) %>% sapply(., paste0, collapse = "/")
-    new_keg =  keg %>% as.data.frame() %>%
-      dplyr::mutate(geneID = new_geneID) %>% calcFoldEnrich()
+    new_keg =  keg %>% as.data.frame()  %>% calcFoldEnrich()
   }
 
   return(new_keg)

diff --git a/R/transID.R b/R/transID.R
@@ -11,17 +11,18 @@
 #' @export
 #'
 #' @examples
+#' \donttest{
 #' transId(
 #'   id = c("Cyp2c23", "Fhit", "Gal3st2b", "Trp53", "Tp53"),
 #'   trans_to = "ensembl", org = "mouse", unique = TRUE)
 #' # input id contains fake id and one-to-many match id
 #' transId(
 #'   id = c("MMD2", "HBD", "RNR1", "TEC", "BCC7", "FAKEID", "TP53"),
 #'   trans_to = "entrez", org = "hg", unique = FALSE)
+#' }
 transId <- function(id, trans_to, org, unique = TRUE) {
 
   #--- args ---#
-  options(rstudio.connectionObserver.errorsSuppressed = TRUE)
   org <- mapBiocOrg(tolower(org))
   keytype <- .gentype(id, org)
   from <- tolower(keytype)

diff --git a/README.md b/README.md
@@ -106,7 +106,8 @@ remotes::install_github("GangLiLab/genekitr", build_vignettes = TRUE, dependenci
 
 ##### 数据分析（Analyse）
 
-- [ ] 设置自己的示例数据，like：`data(geneList, package="genekitr")`
+- [x] 设置自己的示例数据，like：`data(geneList, package="genekitr")`
+  数据来自`airway` ，使用`DESeq2`进行差异分析
 - [x] 富集分析先将基因id全部转成entrez id，然后再根据需要利用 `transId()`  进行转换，达到`setReadable`的目的
 
 ##### 可视化（Visualize）

diff --git a/data/geneList.rda b/data/geneList.rda
diff --git a/man/Datasets.Rd b/man/Datasets.Rd
diff --git a/man/genGO.Rd b/man/genGO.Rd
diff --git a/man/genGSEA.Rd b/man/genGSEA.Rd
diff --git a/man/genInfo.Rd b/man/genInfo.Rd
diff --git a/man/genKEGG.Rd b/man/genKEGG.Rd
diff --git a/man/transId.Rd b/man/transId.Rd