Skip to content

Commit

Permalink
add new file
Browse files Browse the repository at this point in the history
  • Loading branch information
reedliu committed Sep 6, 2021
1 parent 419ef79 commit 1a8e245
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 20 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Imports:
Suggests:
BiocManager,
cowplot,
DOSE,
data.table,
easyPubMed,
futile.logger,
Expand Down
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ importFrom(clusterProfiler,GSEA)
importFrom(clusterProfiler,enrichGO)
importFrom(clusterProfiler,enrichKEGG)
importFrom(dplyr,"%>%")
importFrom(dplyr,add_row)
importFrom(dplyr,all_of)
importFrom(dplyr,arrange)
importFrom(dplyr,as_tibble)
Expand All @@ -30,7 +29,6 @@ importFrom(dplyr,na_if)
importFrom(dplyr,pull)
importFrom(dplyr,relocate)
importFrom(dplyr,select)
importFrom(dplyr,slice)
importFrom(dplyr,slice_head)
importFrom(dplyr,summarize)
importFrom(ggplot2,aes)
Expand Down
40 changes: 28 additions & 12 deletions R/genInfo.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@
#'
#' @param id Gene id (symbol, ensembl or entrez id) or uniprot id.
#' @param org Species name from `biocOrg_name`, both full name and short name are fine.
#' @param unique Logical to keep only one unique mapped ID, default is FALSE.
#' @param unique Logical to keep only one matched ID, default is FALSE.
#' @importFrom stringr str_detect
#' @importFrom dplyr %>% filter relocate select mutate mutate_all na_if slice add_row
#' @importFrom dplyr %>% filter relocate select mutate mutate_all na_if
#' @importFrom tidyr unnest
#'
#' @return A `data.frame`.
#' @export
#'
#' @examples
#' # input id contains fake id and one-to-many match id
#' x <- genInfo(id = c(
#' "MCM10", "CDC20", "S100A9", "MMP1", "BCC7",
#' "FAKEID", "TP53", "HBD", "NUDT10"),
#' org = "hg", unique = FALSE)
#' "FAKEID", "TP53", "HBD", "NUDT10"
#' ), org = "hg", unique = FALSE)
#' head(x)
genInfo <- function(id,
org,
Expand All @@ -36,17 +37,19 @@ genInfo <- function(id,
tmp1 <- data.frame(input_id = id)
tmp2 <- all %>% dplyr::filter(eval(parse(text = keytype)) %in% id)

tmp3 =tmp2 %>% dplyr::select(-c('symbol','uniprot')) %>% apply(.,1,is.na)

## keep each id even has no info
# only symbol id needs to consider alias
if (keytype != "symbol") {
gene_info <- merge(tmp1, tmp2, by.x = "input_id", by.y = keytype, all.x = T)
} else {
} else if(any(apply(tmp3, 2, sum) == nrow(tmp3))){
# if only symbol and uniprot are not NA
tmp2 <- tmp2 %>% dplyr::filter_at(dplyr::vars(-symbol, -uniprot), dplyr::all_vars(!is.na(.)))
gene_info <- merge(tmp1, tmp2, by.x = "input_id", by.y = keytype, all.x = T) %>%
dplyr::arrange(id) %>%
dplyr::mutate(symbol = dplyr::case_when(input_id %in% tmp2$symbol ~ input_id)) %>%
dplyr::relocate(symbol, .after = input_id)

# check if symbol in alias (only check input ids without matched)
all_alias <- data.frame(all_alias = paste(all$ncbi_alias,all$ensembl_alias, sep = "; "))
check_row <- which(is.na(gene_info$symbol))
Expand All @@ -60,11 +63,25 @@ genInfo <- function(id,
gene_info = gene_info %>% dplyr::slice(-i)
}
}
}else{
gene_info <- merge(tmp1, tmp2, by.x = "input_id", by.y = keytype, all.x = T) %>%
dplyr::mutate(symbol = dplyr::case_when(input_id %in% all$symbol ~ input_id)) %>%
dplyr::relocate(symbol, .after = input_id)

# check if symbol in alias (only check input ids without matched)
all_alias <- data.frame(all_alias = paste(all$ncbi_alias, all$ensembl_alias, sep = "; "))
check_row <- which(is.na(gene_info$symbol))
for (i in check_row) {
alias_row <- which(stringr::str_detect(all_alias[, 1], paste0("\\b", gene_info[i, 1], "\\b")))
if (length(alias_row) != 0) {
# not match symbol but match alias
gene_info[i, 2:ncol(gene_info)] <- all[alias_row, ]
}
}
}

# one-to-many match
check_n <- table(gene_info$input_id)
tomany_id <- names(check_n)[check_n > 1]
tomany_id <- names(table(gene_info$input_id))[table(gene_info$input_id) > 1]
tomany_id <- tomany_id[!tomany_id %in% id[duplicated(id)]]
if (length(tomany_id) > 0 & length(tomany_id) < 3 & !unique) {
message(paste0(
Expand All @@ -78,9 +95,8 @@ genInfo <- function(id,
))
}

# if keep unique, choose row with minimum NA;
# if NA number is identical, then choose the smallest entrezid
if (unique) {
# if keep unique, choose row with minimum NA
if (unique & length(tomany_id) != 0) {
sub = gene_info %>% dplyr::filter(input_id %in% tomany_id)
other = gene_info %>% dplyr::filter(!input_id %in% tomany_id)

Expand All @@ -98,7 +114,7 @@ genInfo <- function(id,
gene_info = rbind(other, sub[uniq_order,])
gene_info = gene_info[match(id,gene_info$input_id),]

} else {
} else{
id = factor(id,ordered = T,levels = unique(id))
gene_info$input_id = factor(gene_info$input_id,ordered = T,levels = unique(id))
gene_info = gene_info[order(gene_info$input_id),]
Expand Down
4 changes: 2 additions & 2 deletions R/transID.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' transId(
#' id = c("Cyp2c23", "Fhit", "Gal3st2b", "Trp53", "Tp53"),
#' trans_to = "ensembl", org = "mouse", unique = TRUE)
#' # input id contains duplicates,fake id and one-to-many match id
#' # input id contains fake id and one-to-many match id
#' transId(
#' id = c("MMD2", "HBD", "RNR1", "TEC", "BCC7", "FAKEID", "TP53"),
#' trans_to = "entrez", org = "hg", unique = FALSE)
Expand Down Expand Up @@ -48,7 +48,7 @@ transId <- function(id, trans_to, org, unique = TRUE) {
message('\n',percent, " genes are mapped from ", from, " to ", trans_to)
if (n_new != length(id)) {
message(paste0(
"Non matched ID are marked as NA",
"Non-matched ID are marked as NA",
'...\nMaybe use "na.omit()" for downstream analysis'
))
}
Expand Down
7 changes: 4 additions & 3 deletions man/genInfo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/transId.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 1a8e245

Please sign in to comment.