/
fix_celltype_names.R
33 lines (33 loc) · 1.17 KB
/
fix_celltype_names.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#' Fix celltype names
#'
#' Make sure celltypes don't contain characters that could interfere with
#' downstream analyses. For example, the R package
#' \href{https://github.com/neurogenomics/MAGMA_Celltyping}{MAGMA.Celltyping}
#' cannot have spaces in celltype names because spaces are used as a delimiter
#' in later steps.
#'
#' @param celltypes Character vector of celltype names.
#' @param replace_chars Regex string of characters to replace
#' with "_" when renaming columns.
#' @param make_unique Make all entries unique.
#' @returns Fixed celltype names.
#'
#' @export
#' @examples
#' ct <- c("microglia", "astryocytes", "Pyramidal SS")
#' ct_fixed <- fix_celltype_names(celltypes = ct)
fix_celltype_names <- function(celltypes,
replace_chars = "[-]|[.]|[ ]|[//]|[\\/]",
make_unique = TRUE) {
if (is.null(celltypes)) {
return(NULL)
}
celltypes <- gsub(replace_chars, "_", celltypes)
### Remove repeating "_" ####
celltypes <- gsub("[_]+", "_", celltypes)
#### Make sure all are unique ####
if(isTRUE(make_unique)){
celltypes <- make.unique(celltypes)
}
return(celltypes)
}