Skip to content

Commit

Permalink
default asEnrichdat to enrichment analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
reedliu committed Sep 10, 2021
1 parent a629428 commit 0e6cbb8
Show file tree
Hide file tree
Showing 10 changed files with 154 additions and 145 deletions.
124 changes: 124 additions & 0 deletions R/asEnrichdat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#' Adjust dataframe for enrichment plot
#'
#' make sure colname contains Description, Count, FoldEnrich/GeneRatio, pvalue/qvalue/p.adjust
#'
#' @param enrich_df dataframe of enrichment analysis result .
#'
#' @importFrom stringr str_remove_all str_split str_remove
#' @importFrom dplyr mutate pull
#' @return A `data.frame`.
#' @export

as.enrichdat <- function(enrich_df) {

## get lower case colnames
remove <- c("\\(", "\\)", " ", "-", "_")
to_check <- stringr::str_remove_all(tolower(colnames(enrich_df)), paste(remove, collapse = "|"))

## find description col
if (!any(grepl("description", to_check))) {
check_description <- apply(enrich_df, 2, function(x) all(grepl("[A-Za-z]{3,}", x)))
check2 <- which(check_description)
if (any(check2 < (ncol(enrich_df) / 2))) {
colnames(enrich_df)[check_description][check2 < 5] <- "Description"
} else {
stop("Not found description column!")
}
} else {
colnames(enrich_df)[grepl("description", to_check)] <- "Description"
}

## find count col
# if finds genes col, calc gene num as count; else find another col as count
if (!any(grepl("count", to_check))) {
check_gene <- apply(enrich_df, 2, function(x) all(grepl("[A-Za-z]{3,}|\\/|,", x) & !grepl("tags|list", x)))
check2 <- which(check_gene)
if (any(check2 > (ncol(enrich_df) / 2))) {
colnames(enrich_df)[check_gene][check2 > (ncol(enrich_df) / 2)] <- "geneID"
gen_num <- stringr::str_split(enrich_df$geneID, ",|\\/") %>%
lapply(., length) %>%
unlist()
enrich_df <- enrich_df %>% dplyr::mutate(Count = gen_num)
} else if (any(grepl("\\([1-9]{,4}\\)", colnames(enrich_df)))) {
gen_num <- enrich_df[grepl("\\([1-9]{,4}\\)", colnames(enrich_df))] %>%
dplyr::pull(1) %>%
as.numeric()
enrich_df <- enrich_df %>% dplyr::mutate(Count = gen_num)
} else {
stop('Please rename the gene count column as "Count"!')
# head(enrich_df[1:2,])
# message("Cannot auto-select count column...","\n","Please specify the column number which includes gene count...")
# answer <- scan(what = "character", n =1,quiet =T)
# message('Choose the No. ',answer,' column as gene count...')
# enrich_df = enrich_df %>% dplyr::rename(Count = eval(parse(text = answer)))
}
} else {
colnames(enrich_df)[grepl("count", to_check)] <- "Count"
}

## find FoldEnrich col
# GSEA result has no FoldEnrich, need to exclude
if (!(any(grepl("\\benrichmentscore\\b", to_check)) & any(grepl("\\bleadingedge\\b", to_check)))) {
if (any(grepl("foldenrich|enrichment", to_check))) {
colnames(enrich_df)[grepl("foldenrich|enrichment", to_check)] <- "FoldEnrich"
} else {
stop('Please rename the fold enrichment column as "FoldEnrich"!')
# head(enrich_df[1:2,])
# message("Cannot auto-select fold enrichment column...","\n",
# "Please specify the column number which includes fold enrichment...")
# answer <- scan(what = "character", n =1,quiet =T)
# message('Choose the No. ',answer,' column as fold enrichment...')
# enrich_df = enrich_df %>% dplyr::rename(FoldEnrich = eval(parse(text = answer)))
}
}


## find GeneRatio col
if (any(grepl("generatio", to_check))) {
colnames(enrich_df)[grepl("generatio", to_check)] <- "GeneRatio"
enrich_df <- enrich_df %>% dplyr::mutate(GeneRatio = sapply(.$GeneRatio, function(x) eval(parse(text = x))))
} else {
# gsea
if (any(grepl("setsize", to_check))) {
colnames(enrich_df)[grepl("setsize", to_check)] <- "setSize"
enrich_df <- enrich_df %>% dplyr::mutate(GeneRatio = as.numeric(Count) / as.numeric(setSize))
} else if (any(grepl("\\([1-9]{,4}\\)", colnames(enrich_df)))) {
# panther result
setsize <- colnames(enrich_df)[grepl("\\([1-9]{,4}\\)", colnames(enrich_df))] %>%
stringr::str_remove(., ".*\\(") %>%
stringr::str_remove(., "\\)") %>%
as.numeric()
enrich_df <- enrich_df %>% dplyr::mutate(GeneRatio = as.numeric(Count) / setsize)
} else if (apply(enrich_df, 2, function(x) length(unique(x)) == 1)) {
setsize <- enrich_df[1, apply(enrich_df, 2, function(x) length(unique(x)) == 1)] %>%
stringr::str_remove("0") %>%
as.numeric() %>%
sort() %>%
.[1]
enrich_df <- enrich_df %>% dplyr::mutate(GeneRatio = as.numeric(Count) / setsize)
}
}

## find pvalue col
if (any(grepl("pvalue", to_check))) {
colnames(enrich_df)[grepl("pvalue", to_check)] <- "pvalue"
} else if (any(grepl("\\buncorrectedpvalue\\b", to_check))) {
colnames(enrich_df)[grepl("\\buncorrectedpvalue\\b", to_check)] <- "pvalue"
}

## find p.adjust col
if (any(grepl("p.adjust", to_check))) {
colnames(enrich_df)[grepl("p.adjust", to_check)] <- "p.adjust"
} else if (any(grepl("\\bcorrectedpvalue\\b", to_check))) {
colnames(enrich_df)[grepl("\\bcorrectedpvalue\\b", to_check)] <- "p.adjust"
}

## find qvalue col
if (any(grepl("qvalue", to_check))) {
colnames(enrich_df)[grepl("qvalue", to_check)] <- "qvalue"
} else if (any(grepl("fdr", to_check) & !grepl("fdrrate", to_check))) {
colnames(enrich_df)[grepl("fdr", to_check)] <- "qvalue"
}

return(enrich_df)
}
6 changes: 4 additions & 2 deletions R/genGO.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,13 @@ genGO <- function(id,
new_ego <- ego %>%
as.data.frame() %>%
dplyr::mutate(geneID = new_geneID) %>%
calcFoldEnrich()
calcFoldEnrich() %>%
as.enrichdat()
} else {
new_ego <- ego %>%
as.data.frame() %>%
calcFoldEnrich()
calcFoldEnrich() %>%
as.enrichdat()
}

return(new_ego)
Expand Down
2 changes: 1 addition & 1 deletion R/genGSEA.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,6 @@ genGSEA <- function(genelist,
# new_egmt = egmt %>% as.data.frame()
# }
#
new_egmt <- egmt %>% as.data.frame()
new_egmt <- egmt %>% as.data.frame() %>% as.enrichdat()
return(new_egmt)
}
6 changes: 4 additions & 2 deletions R/genKEGG.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,13 @@ genKEGG <- function(id,
new_keg <- keg %>%
as.data.frame() %>%
dplyr::mutate(geneID = new_geneID) %>%
calcFoldEnrich()
calcFoldEnrich() %>%
as.enrichdat()
} else {
new_keg <- keg %>%
as.data.frame() %>%
calcFoldEnrich()
calcFoldEnrich() %>%
as.enrichdat()
}

return(new_keg)
Expand Down
3 changes: 2 additions & 1 deletion R/ploTheme.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
#' @importFrom ggplot2 theme_bw theme margin unit element_text element_rect element_line
#' @examples
#' library(ggplot2)
#' ggplot(mtcars, aes(x=wt, y=mpg))+ geom_point()+ plot_theme(theme_type = 'bw')
#' ggplot(mtcars, aes(x=wt, y=mpg))+ geom_point()+
#' plot_theme(theme_type = 'bw', font_type = 'Times', border_thick = 2)
#' @export
plot_theme <- function(theme_type = c('bw','classic'),
main_text_size = 14,
Expand Down
145 changes: 11 additions & 134 deletions R/plotEnrichDot.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#'
#' @param enrich_df `data.frame` of enrichment analysis result .
#' @param xlab_type X-axis label type, one of 'GeneRatio','Count','FoldEnrich'.
#' @param legend_by Stats legend type, one of "pvalue", "p.adjust", "qvalue".
#' @param legend_type Stats legend type, one of "pvalue", "p.adjust", "qvalue".
#' @param low_color Legend color for low pvalue or qvalue, default is "red".
#' @param high_color Legend color for high pvalue or qvalue, default is "blue".
#' @param show_item Numeric, select top N rows to show, default is 10.
Expand All @@ -27,12 +27,13 @@
#' org = "human", ont = "mf", pvalueCutoff = 0.05,
#' qvalueCutoff = 0.1, use_symbol = FALSE
#' )
#' plotEnrichDot(ego)
#' plotEnrichDot(ego,remove_grid = T, main_text_size = 8,
#' legend_text_size = 6,border_thick = 1.5)
#' }
#'
plotEnrichDot <- function(enrich_df,
xlab_type = c("FoldEnrich", "GeneRatio", "Count"),
legend_by = c("p.adjust", "pvalue", "qvalue"),
legend_type = c("p.adjust", "pvalue", "qvalue"),
low_color = "red",
high_color = "blue",
show_item = 10,
Expand All @@ -43,7 +44,7 @@ plotEnrichDot <- function(enrich_df,
#--- args ---#
stopifnot(is.numeric(show_item))
xlab_type <- match.arg(xlab_type)
legend_by <- match.arg(legend_by)
legend_type <- match.arg(legend_type)

types <- c("GeneRatio", "Count", "FoldEnrich")
legends <- c("p.adjust", "pvalue", "qvalue")
Expand All @@ -54,9 +55,9 @@ plotEnrichDot <- function(enrich_df,
paste(intersect(colnames(enrich_df), types), collapse = " | ")
)
}
if (!legend_by %in% colnames(enrich_df)) {
if (!legend_type %in% colnames(enrich_df)) {
stop(
legend_by, " not included in this dataframe, try: ",
legend_type, " not included in this dataframe, try: ",
paste(intersect(colnames(enrich_df), legends), collapse = " | ")
)
}
Expand All @@ -65,8 +66,8 @@ plotEnrichDot <- function(enrich_df,
xlab_title <- ifelse(xlab_type == "FoldEnrich", "Fold Enrichment",
ifelse(xlab_type == "GeneRatio", "Gene Ratio", "Count")
)
legend_title <- ifelse(legend_by == "pvalue", "Pvalue",
ifelse(legend_by == "p.adjust", "P.adjust", "FDR")
legend_title <- ifelse(legend_type == "pvalue", "Pvalue",
ifelse(legend_type == "p.adjust", "P.adjust", "FDR")
)

if (show_item <= nrow(enrich_df)) {
Expand All @@ -83,7 +84,7 @@ plotEnrichDot <- function(enrich_df,
#--- plot ---#
p <- ggplot(enrich_df, aes(x = eval(parse(text = xlab_type)), y = Description)) +
geom_point(aes(
color = eval(parse(text = legend_by)),
color = eval(parse(text = legend_type)),
size = Count
)) +
scale_color_continuous(
Expand All @@ -92,7 +93,7 @@ plotEnrichDot <- function(enrich_df,
labels = function(x) format(x, scientific = T)
) +
xlab(xlab_title) +
labs(color = legend_by)+
labs(color = legend_type)+
xlim(xlim_left,xlim_right)+
plot_theme(...)

Expand All @@ -114,127 +115,3 @@ text_wraper <- function(width) {
}


##' Adjust dataframe for enrichment plot
##'
##' make sure colname contains Description, Count, FoldEnrich/GeneRatio, pvalue/qvalue/p.adjust
##'
##' @param enrich_df dataframe of enrichment analysis result .
##'
##' @importFrom stringr str_remove_all str_split str_remove
##' @importFrom dplyr mutate pull
##' @return A `data.frame`.
##' @export

as.enrichdat <- function(enrich_df) {

## get lower case colnames
remove <- c("\\(", "\\)", " ", "-", "_")
to_check <- stringr::str_remove_all(tolower(colnames(enrich_df)), paste(remove, collapse = "|"))

## find description col
if (!any(grepl("description", to_check))) {
check_description <- apply(enrich_df, 2, function(x) all(grepl("[A-Za-z]{3,}", x)))
check2 <- which(check_description)
if (any(check2 < (ncol(enrich_df) / 2))) {
colnames(enrich_df)[check_description][check2 < 5] <- "Description"
} else {
stop("Not found description column!")
}
} else {
colnames(enrich_df)[grepl("description", to_check)] <- "Description"
}

## find count col
# if finds genes col, calc gene num as count; else find another col as count
if (!any(grepl("count", to_check))) {
check_gene <- apply(enrich_df, 2, function(x) all(grepl("[A-Za-z]{3,}|\\/|,", x) & !grepl("tags|list", x)))
check2 <- which(check_gene)
if (any(check2 > (ncol(enrich_df) / 2))) {
colnames(enrich_df)[check_gene][check2 > (ncol(enrich_df) / 2)] <- "geneID"
gen_num <- stringr::str_split(enrich_df$geneID, ",|\\/") %>%
lapply(., length) %>%
unlist()
enrich_df <- enrich_df %>% dplyr::mutate(Count = gen_num)
} else if (any(grepl("\\([1-9]{,4}\\)", colnames(enrich_df)))) {
gen_num <- enrich_df[grepl("\\([1-9]{,4}\\)", colnames(enrich_df))] %>%
dplyr::pull(1) %>%
as.numeric()
enrich_df <- enrich_df %>% dplyr::mutate(Count = gen_num)
} else {
stop('Please rename the gene count column as "Count"!')
# head(enrich_df[1:2,])
# message("Cannot auto-select count column...","\n","Please specify the column number which includes gene count...")
# answer <- scan(what = "character", n =1,quiet =T)
# message('Choose the No. ',answer,' column as gene count...')
# enrich_df = enrich_df %>% dplyr::rename(Count = eval(parse(text = answer)))
}
} else {
colnames(enrich_df)[grepl("count", to_check)] <- "Count"
}

## find FoldEnrich col
# GSEA result has no FoldEnrich, need to exclude
if (!(any(grepl("\\benrichmentscore\\b", to_check)) & any(grepl("\\bleadingedge\\b", to_check)))) {
if (any(grepl("foldenrich|enrichment", to_check))) {
colnames(enrich_df)[grepl("foldenrich|enrichment", to_check)] <- "FoldEnrich"
} else {
stop('Please rename the fold enrichment column as "FoldEnrich"!')
# head(enrich_df[1:2,])
# message("Cannot auto-select fold enrichment column...","\n",
# "Please specify the column number which includes fold enrichment...")
# answer <- scan(what = "character", n =1,quiet =T)
# message('Choose the No. ',answer,' column as fold enrichment...')
# enrich_df = enrich_df %>% dplyr::rename(FoldEnrich = eval(parse(text = answer)))
}
}


## find GeneRatio col
if (any(grepl("generatio", to_check))) {
colnames(enrich_df)[grepl("generatio", to_check)] <- "GeneRatio"
enrich_df <- enrich_df %>% dplyr::mutate(GeneRatio = sapply(.$GeneRatio, function(x) eval(parse(text = x))))
} else {
# gsea
if (any(grepl("setsize", to_check))) {
colnames(enrich_df)[grepl("setsize", to_check)] <- "setSize"
enrich_df <- enrich_df %>% dplyr::mutate(GeneRatio = as.numeric(Count) / as.numeric(setSize))
} else if (any(grepl("\\([1-9]{,4}\\)", colnames(enrich_df)))) {
# panther result
setsize <- colnames(enrich_df)[grepl("\\([1-9]{,4}\\)", colnames(enrich_df))] %>%
stringr::str_remove(., ".*\\(") %>%
stringr::str_remove(., "\\)") %>%
as.numeric()
enrich_df <- enrich_df %>% dplyr::mutate(GeneRatio = as.numeric(Count) / setsize)
} else if (apply(enrich_df, 2, function(x) length(unique(x)) == 1)) {
setsize <- enrich_df[1, apply(enrich_df, 2, function(x) length(unique(x)) == 1)] %>%
stringr::str_remove("0") %>%
as.numeric() %>%
sort() %>%
.[1]
enrich_df <- enrich_df %>% dplyr::mutate(GeneRatio = as.numeric(Count) / setsize)
}
}

## find pvalue col
if (any(grepl("pvalue", to_check))) {
colnames(enrich_df)[grepl("pvalue", to_check)] <- "pvalue"
} else if (any(grepl("\\buncorrectedpvalue\\b", to_check))) {
colnames(enrich_df)[grepl("\\buncorrectedpvalue\\b", to_check)] <- "pvalue"
}

## find p.adjust col
if (any(grepl("p.adjust", to_check))) {
colnames(enrich_df)[grepl("p.adjust", to_check)] <- "p.adjust"
} else if (any(grepl("\\bcorrectedpvalue\\b", to_check))) {
colnames(enrich_df)[grepl("\\bcorrectedpvalue\\b", to_check)] <- "p.adjust"
}

## find qvalue col
if (any(grepl("qvalue", to_check))) {
colnames(enrich_df)[grepl("qvalue", to_check)] <- "qvalue"
} else if (any(grepl("fdr", to_check) & !grepl("fdrrate", to_check))) {
colnames(enrich_df)[grepl("fdr", to_check)] <- "qvalue"
}

return(enrich_df)
}
1 change: 1 addition & 0 deletions R/utils_tool.R
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,4 @@ calcFoldEnrich <- function(df) {
}
return(df)
}

2 changes: 1 addition & 1 deletion man/as.enrichdat.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0e6cbb8

Please sign in to comment.