Skip to content

Commit

Permalink
Merge pull request #40 from SingerLab/isd
Browse files Browse the repository at this point in the history
version update, new f(x) and name changes
  • Loading branch information
RodrigoGM committed May 26, 2023
2 parents aca8507 + cca6eb0 commit cb3d661
Show file tree
Hide file tree
Showing 24 changed files with 354 additions and 83 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: gac
Version: 0.0.9034
Version: 0.0.9035
Date: 2023-05-23
Title: Genetic Analysis of Cells
Authors@R: person("Rodrigo Gularte Merida",
Expand Down
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(addGeneInfo)
export(addInfo)
export(addPheno)
export(addQC)
export(add_in_silico_root)
export(avg_num_alleles_per_locus)
export(binary.X)
export(binary.cnr)
Expand All @@ -19,7 +20,7 @@ export(doKSpectral)
export(estimate_joint_effects)
export(excludeCells)
export(expand2genes)
export(exportCNR)
export(export_cnr)
export(export_pval_igv)
export(genotype_vdj)
export(get_alteration_frequencies)
Expand Down Expand Up @@ -61,7 +62,7 @@ export(setBrayClusters)
export(setKcc)
export(split_cnr)
export(subsetCNR)
export(summaryCNR)
export(summary_cnr)
export(sync_cnr)
export(vdjBrayClust)
export(vdjHeatmap)
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# gac 0.0.9035
* added function to add an in-silico root cell and clone

* bin.id checks between chrmoInfo and gene.index

* standardizing function names, renamed as export_cnr and summary_cnr

* pull_gene_details now pulls information from a gene list, rather than regions

# gac 0.0.9034
* added functions to order bins and genes, and are implemented by default in sync_cnr

Expand Down
3 changes: 2 additions & 1 deletion R/addCells.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ addCells <- function(cnr, newX, newY, newqc, newYe = NULL, do.clean = TRUE, ...)
cnr[["qc"]] <- qc
cnr[["cells"]] <- newCells
}

cnr <- sync_cnr(cnr)

return(cnr)
}
12 changes: 10 additions & 2 deletions R/addInfo.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ addInfo <- function(cnr, df) {
#'
#' @param sort wether to sort the ouput object, default is FALSE
#'
#' @param full.sync logical, sync cnr. If TRUE function syncs cells and chromosomes
#' when FASLE, only sync cells. default TRUE
#'
#' @param chromosome.order order of chromosomes for full.sync
#'
#' @param ... additional parameters passed to merge
#'
#' @return
Expand All @@ -66,18 +71,21 @@ addInfo <- function(cnr, df) {
#' @examples
#'#' data(cnr)
#'
#' fakePval <- data.frame(pval = runif(5000))
#' fakePval <- data.frame(hgnc.symbol = cnr$gene.index$hgnc.symbol, pval = runif(nrow(cnr$gene.index)))
#'
#' cnr <- addGeneInfo(cnr, df = fakePval)
#'
#' head(cnr$gene.index)
#'
#' @export
addGeneInfo <- function(cnr, df, sort = FALSE, ...) {
addGeneInfo <- function(cnr, df, sort = FALSE, full.sync = TRUE,
chromosome.order = c(1:22, "X", "Y", "MT"), ...) {

gInfo <- merge(cnr$gene.index, df, sort = sort, ...)

cnr[["gene.index"]] <- gInfo

cnr <- sync_cnr(cnr)

return(cnr)
}
2 changes: 2 additions & 0 deletions R/addQC.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ addQC <- function(cnr, df, ...) {
}

cnr[["qc"]] <- QC

cnr <- sync_cnr(cnr)

return(cnr)
}
95 changes: 95 additions & 0 deletions R/add_in_silico_root.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@

#' build an in-silico root cell and clone profile
#'
#' By default cell is a female diploid cell
#'
#' @param cnr a cnr
#'
#' @param cell.name name of root cell. default "diploid"
#'
#' @param female logical, weather to build a female or male
#' cell and clone, default is female. If female is NULL, no sex chromosomes
#' correction is performed, all bins will have copy number equal base.ploidy
#'
#' @param base.ploidy integer, base ploidy (2, 4, 6, 8), default 2, for diploid cell
#'
#' @return
#'
#' Append an in-silico diplod (or polyploid) cell to the cnr. By default a female
#' human genome is created. If FALSE, a male human genome is constructed. NULL will
#' not peform a gender correction and the entire genome will be set to the base ploidy.
#'
#' In tetraploid males copy number is half of the base.ploidy e.g. for 4n : X = 2, Y = 2.
#'
#' @examples
#'
#' data(cnr)
#'
#' cnr <- add_in_silico_root(cnr)
#'
#' head(cnr$Y)
#'
#' @importFrom assertthat assert_that
#' @export
add_in_silico_root <- function(cnr, cell.name = "diploid",
female = TRUE, base.ploidy = 2L) {
##
if(!is.integer(base.ploidy)) {
base.ploidy <- as.integer(round(base.ploidy))
}
##
assertthat::assert_that(nrow(cnr$X) == nrow(cnr$chromInfo))
assertthat::assert_that(ncol(cnr$genes) == nrow(cnr$gene.index))
##
dX <- data.frame(rep(base.ploidy, times = nrow(cnr$chromInfo)))
names(dX) <- cell.name
dG <- data.frame(rep(base.ploidy, times = nrow(cnr$gene.index)))
names(dG) <- cell.name
##
if(!is.null(female)) {
if(female) {
dX[cnr$chromInfo$bin.chrom == "Y", cell.name] <- 0
dG[cnr$gene.index$chrom == "Y", cell.name] <- 0
} else {
dX[cnr$chromInfo$bin.chrom %in% c("X", "Y"), cell.name] <- base.ploidy / 2
dG[cnr$gene.index$chrom %in% c("X", "Y"), cell.name] <- base.ploidy / 2
}
}
##
if(! cell.name %in% colnames(cnr$X)) {
cnr$X <- cbind(cnr$X, dX)
} else {
warning(paste(cell.name, "exists in X"))
}
if(! cell.name %in% colnames(cnr$genes)) {
cnr$genes[cell.name, ] <- as.integer(dG[,1])
} else {
warning(paste(cell.name, "exists in genes"))
}
##
if(length(cnr$DDRC.df)) {
if(! cell.name %in% colnames(cnr$DDRC.df)) {
cnr$DDRC.df <- cbind(cnr$DDRC.df, dX)
} else {
warning(paste(cell.name, "exists in DDRC.df"))
}
}
##
if(length(cnr$DDRC.g)) {
if(! cell.name %in% colnames(cnr$DDRC.df)) {
cnr$DDRC.g <- cbind(cnr$DDRC.g, dG)
} else {
warning(paste(cell.name, "exists in DDRC.g"))
}
}
##
cnr$Y[cell.name, ] <- NA
cnr$Y[cell.name, "cellID"] <- cell.name
cnr$qc[cell.name, "cellID"] <- cell.name
cnr$qc[cell.name, "qc.status"] <- "PASS"

cnr$cells <- colnames(cnr$X)

return(cnr)
}

24 changes: 18 additions & 6 deletions R/buildCNR.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@
#' or integer copy number. If `TRUE` data is an untransformed segment ratio.
#' If `FALSE` data is integer copy number
#'
#' @param full.sync sync cnr tables to preseve cell order, and sort chromInfo
#' and gene.index based on chromsome and start position
#'
#' @param chromosome.order chromosome order. Default is a human genome primary
#' assembly: 1:22, X, Y, and MT.
#'
#' @param ... parameters passed to roundCNR
#'
#' @return
Expand Down Expand Up @@ -71,7 +77,8 @@
#'
#' @export
buildCNR <- function(X, Y, qc, chromInfo, exprs = NULL, gene.index,
bulk = FALSE, ...) {
bulk = FALSE, full.sync = TRUE,
chromosome.order=c(1:22, "X", "Y", "MT"), ...) {

## chose if data is ratio or integer CN
if(bulk) {
Expand Down Expand Up @@ -105,9 +112,13 @@ buildCNR <- function(X, Y, qc, chromInfo, exprs = NULL, gene.index,
cnr[["Y"]] <- Y[colnames(cnr[["X"]]), ]
cnr[["qc"]] <- qc[colnames(cnr[["X"]]), ]

chromInfo <- cbind(bin.id = 1:nrow(chromInfo), chromInfo)
assertthat::assert_that(nrow(chromInfo) == max(gene.index$bin.id),
msg = "number of rows chromInfo does not match gene.index bin.id, please correct the gene.index$bin.id")

cnr[["chromInfo"]] <- chromInfo
cnr[["gene.index"]] <- gene.index
rownames(cnr$gene.index) <- cnr$gene.index$hgnc.symbol
rownames(cnr[["gene.index"]]) <- gene.index$hgnc.symbol

## if expression matrix is available, add it here
## must have rownames as cellID/sampleID
Expand All @@ -117,15 +128,16 @@ buildCNR <- function(X, Y, qc, chromInfo, exprs = NULL, gene.index,
assertthat::assert_that(all(rownames(exprs) %in% colnames(cnr[["X"]])))
assertthat::assert_that(all(colnames(exprs) %in%
colnames(cnr[["gene.index"]]$hgnc.symbol)))

cnr[["exprs"]] <- exprs[colnames(cnr[["X"]]), ]
}

cnr[["cells"]] <- colnames(cnr[["X"]])
cnr[["bulk"]] <- bulk

cnr <- sync_cnr(cnr)

cnr <- sync_cnr(cnr, full.sync = full.sync,
chromosome.order=c(1:22, "X", "Y", "MT"))

return(cnr)

} ## end buildCNR
7 changes: 3 additions & 4 deletions R/cnr.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#'
#' @format An object class list containg a rounded CNR
#'
#' * Input
#' \itemize{
#'
#' \item X, An integer matrix of bins x n.cells containing copy number
Expand Down Expand Up @@ -59,14 +58,15 @@
#' ...
#' }
#'
#' * Output
#' @return
#'
#' \itemize{
#' \item cdb, pairwise cell dissimilarity using Bray-Curtis
#'
#' \item hcdb, heirarchical clustering of cells based
#'
#' \item phylo, cell phyogenetic tree. Analysis is produced with
#' \code{\link[ape]}. Default is "balanced minimum evolution"
#' \code{ape}. Default is "balanced minimum evolution"
#'
#' \item tree.height, height cutoff of the tree, set as intersection between
#' the total number of multi-cell clusters and one-cell clusters
Expand Down Expand Up @@ -95,7 +95,6 @@
#' \item DDRC.dist, bray curtis disimilarity of clones
#'
#' \item DDRC.phylo, phylogenetic analysis of clones
#'
#' }
#'
#' @docType data
Expand Down
6 changes: 3 additions & 3 deletions R/exportCNR.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
#'\dontrun{
#' data(cnr)
#'
#' exportCNR(cnr, outdir = "cnr_out/")
#' export_cnr(cnr, outdir = "cnr_out/")
#'}
#'
#' @importFrom utils write.table
#' @importFrom ape write.tree
#'
#' @export
exportCNR <- function(cnr, outdir = ".", ...) {
export_cnr <- function(cnr, outdir = ".", ...) {

if(! dir.exists(outdir) ) {
dir.create(outdir, recursive = TRUE)
Expand Down Expand Up @@ -83,4 +83,4 @@ exportCNR <- function(cnr, outdir = ".", ...) {
...)
}

} # end exportCNR
} # end export_cnr
48 changes: 28 additions & 20 deletions R/gene_lookups.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,17 @@ get_gene_details <- function(cnr, chrom = 12, start = 69200804, end = 69246466)
} ## get_gene_details


#' Pull gene details for a genomic region
#' Pull gene details for a set of genes
#'
#' This function subsets the gene index for a genomic region of interest.
#' This function subsets the gene index for a given set of genes
#'
#' @param cnr a cnr bundle
#'
#' @param coord genomic region in ensembl format
#' @param genes a list of genes
#'
#' @param show.columns columns of gene.index to show
#'
#' @param identifier gene identifier hgnc.symbol or ensembl_gene_id. default hgnc.symbol
#'
#' @return
#'
Expand All @@ -162,30 +166,34 @@ get_gene_details <- function(cnr, chrom = 12, start = 69200804, end = 69246466)
#' @examples
#'
#' data(cnr)
#' coord <- "12:69200804:69246466"
#'
#' pull_gene_details(cnr)
#'
#' pull_gene_details(cnr, coord = "4:82351690:138565783")
#' pull_gene_details(cnr,
#' genes = c("JUN", "MDM2", "CDK4"),
#' show.columns = c("hgnc.symbol", "bin.id", "gene_biotype"))
#'
#' pull_gene_details(cnr, coord = coord)
#' pull_gene_details(cnr,
#' genes = c("ENSG00000177606", "ENSG00000135446", "ENSG00000135679"),
#' identifier = "ensembl_gene_id",
#' show.columns = c("hgnc.symbol", "bin.id", "gene_biotype"))
#'
#' coords <- c("1:170120554:172941951",
#' "12:69200804:69246466")
#'
#' do.call(rbind, lapply(coords, function(rr)
#' pull_gene_details(cnr, coord = rr)))
#'
#' @importFrom assertthat assert_that
#' @export
pull_gene_details <- function(cnr, coord = "12:69200804:69246466") {
pull_gene_details <- function(cnr, genes = c("MDM2", "CDK4"),
show.columns = NULL,
identifier = "hgnc.symbol") {

seqname <- unlist(strsplit(coord, split = ":"))[1]
start <- as.numeric(unlist(strsplit(coord, split = ":"))[2])
end <- as.numeric(unlist(strsplit(coord, split = ":"))[3])
assertthat::assert_that(all(genes %in% cnr$gene.index[, identifier]))

assertthat::assert_that(start < end)

gene.details <- get_gene_details(cnr, chrom = seqname,
start = start,
end = end)
idx <- cnr$gene.index[, identifier] %in% genes

if(is.null(show.columns)) {
gene.details <- cnr$gene.index[idx, ]
} else {
gene.details <- cnr$gene.index[idx, show.columns]
}

return(gene.details)
} ## end pull gene details
Expand Down
2 changes: 1 addition & 1 deletion R/split_cnr.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#'
#' cnrL <- split_cnr(cnr, split.by = "category1")
#'
#' lapply(cnrL, summaryCNR)
#' lapply(cnrL, summary_cnr)
#'
#' @export
split_cnr <- function(cnr, split.by) {
Expand Down
Loading

0 comments on commit cb3d661

Please sign in to comment.