Skip to content

Commit

Permalink
add new file
Browse files Browse the repository at this point in the history
  • Loading branch information
reedliu committed Sep 6, 2021
1 parent 1a8e245 commit 804b66b
Show file tree
Hide file tree
Showing 16 changed files with 137 additions and 84 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: genekitr
Type: Package
Title: Gene Analysis Toolkit in R
Version: 0.1.5
Version: 0.2.0
Authors@R: c(person("Yunze", "Liu", email = "jieandze1314@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-7414-8556")))
Maintainer: Yunze Liu <jieandze1314@gmail.com>
Expand Down
10 changes: 10 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
#' Datasets
#' geneList entrez gene list with decreasing fold change value
#'
#'
#' @name Datasets
#' @aliases geneList
#' @docType data
#' @keywords datasets
NULL

#' Datasets
#' msig_species contains msigdb species information
#'
Expand Down
14 changes: 7 additions & 7 deletions R/genGO.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,21 @@
#' @param universe Background genes. If missing, then all gene list in
#' orgdb will be used as background.
#' @param ... other argument to `enrichGO` function
#' @importFrom dplyr %>% mutate
#' @importFrom clusterProfiler enrichGO
#' @importFrom dplyr select filter pull mutate %>%
#' @importFrom stringr str_split
#' @importFrom clusterProfiler enrichGO
#'
#' @return A `data.frame` contains gene ratio and fold enrichment.
#' @export
#'
#' @examples
#' data(geneList, package="DOSE")
#' \donttest{
#' data(geneList, package="genekitr")
#' id <- names(geneList)[1:100]
#' ego <- genGO(id, org = 'human',ont = 'mf',pvalueCutoff = 0.05,
#' qvalueCutoff = 0.1 ,use_symbol = TRUE)
#' head(ego)
#' }
genGO <- function(id,
org,
ont,
Expand All @@ -40,11 +42,9 @@ genGO <- function(id,
...){

#--- args ---#
options(rstudio.connectionObserver.errorsSuppressed = TRUE)
stopifnot(is.character(id))
if (missing(universe)) universe <- NULL

org_bk = org
org = mapBiocOrg(tolower(org))
pkg=paste0("org.", org, ".eg.db")
keyType = .gentype(id, org)
Expand All @@ -65,10 +65,10 @@ genGO <- function(id,
}

if( use_symbol){
info = genInfo(id,org)
info = genInfo(id,org,unique = T) %>% na.omit()
new_geneID = stringr::str_split(ego$geneID,'\\/') %>%
lapply(., function(x) {
info[x,'symbol']
info %>% dplyr::filter(input_id %in% x) %>% dplyr::pull(symbol)
}) %>% sapply(., paste0, collapse = "/")
new_ego = ego %>% as.data.frame() %>%
dplyr::mutate(geneID = new_geneID) %>% calcFoldEnrich()
Expand Down
42 changes: 21 additions & 21 deletions R/genGSEA.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,34 @@
#' 'C4','C5','C6','C7','C8','H'.
#' @param subcategory MSigDB sub-collection abbreviation, choose from
#' `msig_category`.
#' @param use_symbol Logical to set result gene id as gene symbol, default is TRUE.
#' @param minGSSize Minimal size of each geneSet for analyzing, default is 10.
#' @param maxGSSize Maximal size of each geneSet for analyzing, default is 500.
#' @param pvalueCutoff Adjusted pvalue cutoff, default is 0.05.
#' @param ... Other argument to `GSEA` function
#' @importFrom dplyr select
#' @importFrom dplyr select filter pull mutate %>%
#' @importFrom stringr str_split
#' @importFrom clusterProfiler GSEA
#'
#' @return A `data.frame`.
#' @export
#'
#' @examples
#' data(geneList, package="DOSE")
#' \donttest{
#' data(geneList, package="genekitr")
#' genGSEA(genelist = geneList,org = 'human', category='C3',
#' subcategory = 'TFT:GTRD',use_symbol = FALSE)
#' subcategory = 'TFT:GTRD')
#' }

genGSEA <- function(genelist,
org,
category = c('C1','C2','C3','C4','C5','C6','C7','C8','H'),
subcategory = NULL,
use_symbol = TRUE,
minGSSize = 10,
maxGSSize = 500,
pvalueCutoff = 0.05,
...){

#--- args ---#
options(rstudio.connectionObserver.errorsSuppressed = TRUE)
category = match.arg(category)

stopifnot(
Expand All @@ -46,8 +46,7 @@ genGSEA <- function(genelist,
if (is.unsorted(rev(genelist))) stop("genelist should be a decreasing sorted vector...")

#--- codes ---#
org.bk = org
geneset <- getMsigdb(org.bk, category, subcategory)
geneset <- getMsigdb(org, category, subcategory)

# use entrez id or symbol
if (any(names(genelist) %in% geneset$gene_symbol)) {
Expand All @@ -66,19 +65,20 @@ genGSEA <- function(genelist,
pvalueCutoff, verbose=F,
...))

if( use_symbol){
info = genInfo(names(genelist),org)
new_geneID = stringr::str_split(egmt$core_enrichment,'\\/') %>%
lapply(., function(x) {
info[x,'symbol']
}) %>% sapply(., paste0, collapse = "/")
new_egmt = egmt %>% as.data.frame() %>%
dplyr::mutate(core_enrichment = new_geneID)

}else{
new_egmt = egmt %>% as.data.frame()
}

# if( use_symbol){
# info = genInfo(names(genelist),org,unique = T) %>% na.omit()
# new_geneID = stringr::str_split(egmt$geneID,'\\/') %>%
# lapply(., function(x) {
# info %>% dplyr::filter(input_id %in% x) %>% dplyr::pull(symbol)
# }) %>% sapply(., paste0, collapse = "/")
# new_egmt = egmt %>% as.data.frame() %>%
# dplyr::mutate(core_enrichment = new_geneID)
#
# }else{
# new_egmt = egmt %>% as.data.frame()
# }
#
new_egmt = egmt %>% as.data.frame()
return(new_egmt)

}
4 changes: 2 additions & 2 deletions R/genInfo.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@
#' @export
#'
#' @examples
#' \donttest{
#' # input id contains fake id and one-to-many match id
#' x <- genInfo(id = c(
#' "MCM10", "CDC20", "S100A9", "MMP1", "BCC7",
#' "FAKEID", "TP53", "HBD", "NUDT10"
#' ), org = "hg", unique = FALSE)
#' head(x)
#' }
genInfo <- function(id,
org,
unique = FALSE) {
#--- args ---#
options(rstudio.connectionObserver.errorsSuppressed = TRUE)

org <- mapBiocOrg(tolower(org))
keytype <- .gentype(id, org) %>% tolower()

Expand Down
20 changes: 7 additions & 13 deletions R/genKEGG.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
#' @export
#'
#' @examples
#' data(geneList, package="DOSE")
#' \donttest{
#' data(geneList, package="genekitr")
#' id <- names(geneList)[1:100]
#' keg <- genKEGG(id, org = 'human')
#' head(keg)
#' }

genKEGG <- function(id,
org,
Expand All @@ -40,22 +42,20 @@ genKEGG <- function(id,
...){

#--- args ---#
options(rstudio.connectionObserver.errorsSuppressed = TRUE)
stopifnot(is.character(id))
if (missing(universe)) universe <- NULL

org.bk = org
org = mapKeggOrg(tolower(org))
keyType = .gentype(id, org)

if(! keyType %in% c('ENTREZID') ) {
message(paste0(keyType), ' gene will be mapped to entrez id')
trans_id = suppressMessages(transId(id,'entrezid',org.bk)) %>% stringi::stri_remove_na()
trans_id = suppressMessages(transId(id,'entrezid',org)) %>% stringi::stri_remove_na()
}else{
trans_id = id
}

info = genInfo(trans_id,org) %>% dplyr::mutate(entrezid := rownames(.))
info = genInfo(trans_id,org,unique = T) %>% na.omit()

#--- codes ---#
keg <- suppressMessages(
Expand All @@ -71,19 +71,13 @@ genKEGG <- function(id,
if( use_symbol ){
new_geneID = stringr::str_split(keg$geneID,'\\/') %>%
lapply(., function(x) {
info[x,'symbol']
info %>% dplyr::filter(input_id %in% x) %>% dplyr::pull(symbol)
}) %>% sapply(., paste0, collapse = "/")
new_keg = keg %>% as.data.frame() %>%
dplyr::mutate(geneID = new_geneID) %>% calcFoldEnrich()

}else{
new_geneID = stringr::str_split(keg$geneID,'\\/') %>%
lapply(., function(x) {
info %>%
dplyr::filter(entrezid %in% x) %>% dplyr::pull(tolower(keyType))
}) %>% sapply(., paste0, collapse = "/")
new_keg = keg %>% as.data.frame() %>%
dplyr::mutate(geneID = new_geneID) %>% calcFoldEnrich()
new_keg = keg %>% as.data.frame() %>% calcFoldEnrich()
}

return(new_keg)
Expand Down
3 changes: 2 additions & 1 deletion R/transID.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,18 @@
#' @export
#'
#' @examples
#' \donttest{
#' transId(
#' id = c("Cyp2c23", "Fhit", "Gal3st2b", "Trp53", "Tp53"),
#' trans_to = "ensembl", org = "mouse", unique = TRUE)
#' # input id contains fake id and one-to-many match id
#' transId(
#' id = c("MMD2", "HBD", "RNR1", "TEC", "BCC7", "FAKEID", "TP53"),
#' trans_to = "entrez", org = "hg", unique = FALSE)
#' }
transId <- function(id, trans_to, org, unique = TRUE) {

#--- args ---#
options(rstudio.connectionObserver.errorsSuppressed = TRUE)
org <- mapBiocOrg(tolower(org))
keytype <- .gentype(id, org)
from <- tolower(keytype)
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ remotes::install_github("GangLiLab/genekitr", build_vignettes = TRUE, dependenci

##### 数据分析(Analyse)

- [ ] 设置自己的示例数据,like:`data(geneList, package="genekitr")`
- [x] 设置自己的示例数据,like:`data(geneList, package="genekitr")`
数据来自`airway` ,使用`DESeq2`进行差异分析
- [x] 富集分析先将基因id全部转成entrez id,然后再根据需要利用 `transId()` 进行转换,达到`setReadable`的目的

##### 可视化(Visualize)
Expand Down
Binary file added data/geneList.rda
Binary file not shown.
6 changes: 5 additions & 1 deletion man/Datasets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/genGO.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 4 additions & 5 deletions man/genGSEA.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/genInfo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/genKEGG.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/transId.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 804b66b

Please sign in to comment.