From cca6eb0121b488520823603bd1b1eb7cfb4a0000 Mon Sep 17 00:00:00 2001
From: Rodrigo Gularte Merida <gularter@mskcc.org>
Date: Thu, 25 May 2023 21:08:21 -0400
Subject: [PATCH] continued to standardized names, added fx to add in-silico
 cells e.g. diploid change fx operation in `pull_gene_details`

---
 DESCRIPTION                           |  2 +-
 NAMESPACE                             |  5 +-
 NEWS.md                               |  9 +++
 R/addCells.R                          |  3 +-
 R/addInfo.R                           | 12 +++-
 R/addQC.R                             |  2 +
 R/add_in_silico_root.R                | 95 +++++++++++++++++++++++++++
 R/buildCNR.R                          | 24 +++++--
 R/cnr.R                               |  7 +-
 R/exportCNR.R                         |  6 +-
 R/gene_lookups.R                      | 48 ++++++++------
 R/split_cnr.R                         |  2 +-
 R/summaryCNR.R                        |  9 +--
 R/sync_cnr.R                          |  4 +-
 cran-comments.md                      | 13 +++-
 man/addGeneInfo.Rd                    | 16 ++++-
 man/add_in_silico_root.Rd             | 38 +++++++++++
 man/buildCNR.Rd                       | 19 +++++-
 man/cnr.Rd                            | 69 +++++++++++++++----
 man/{exportCNR.Rd => export_cnr.Rd}   |  8 +--
 man/pull_gene_details.Rd              | 32 +++++----
 man/split_cnr.Rd                      |  2 +-
 man/{summaryCNR.Rd => summary_cnr.Rd} |  8 +--
 vignettes/getting_started.Rmd         |  4 +-
 24 files changed, 354 insertions(+), 83 deletions(-)
 create mode 100644 R/add_in_silico_root.R
 create mode 100644 man/add_in_silico_root.Rd
 rename man/{exportCNR.Rd => export_cnr.Rd} (84%)
 rename man/{summaryCNR.Rd => summary_cnr.Rd} (80%)

diff --git a/DESCRIPTION b/DESCRIPTION
index 38c2a4c..c779285 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: gac
-Version: 0.0.9034
+Version: 0.0.9035
 Date: 2023-05-23
 Title: Genetic Analysis of Cells
 Authors@R: person("Rodrigo Gularte Merida",
diff --git a/NAMESPACE b/NAMESPACE
index a6bdc6f..a696475 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,6 +6,7 @@ export(addGeneInfo)
 export(addInfo)
 export(addPheno)
 export(addQC)
+export(add_in_silico_root)
 export(avg_num_alleles_per_locus)
 export(binary.X)
 export(binary.cnr)
@@ -19,7 +20,7 @@ export(doKSpectral)
 export(estimate_joint_effects)
 export(excludeCells)
 export(expand2genes)
-export(exportCNR)
+export(export_cnr)
 export(export_pval_igv)
 export(genotype_vdj)
 export(get_alteration_frequencies)
@@ -61,7 +62,7 @@ export(setBrayClusters)
 export(setKcc)
 export(split_cnr)
 export(subsetCNR)
-export(summaryCNR)
+export(summary_cnr)
 export(sync_cnr)
 export(vdjBrayClust)
 export(vdjHeatmap)
diff --git a/NEWS.md b/NEWS.md
index b31de83..7b8912a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,12 @@
+# gac 0.0.9035
+* added function to add an in-silico root cell and clone
+
+* bin.id checks between chrmoInfo and gene.index
+
+* standardizing function names, renamed as export_cnr and summary_cnr
+
+* pull_gene_details now pulls information from a gene list, rather than regions
+
 # gac 0.0.9034
 * added functions to order bins and genes, and are implemented by default in sync_cnr
 
diff --git a/R/addCells.R b/R/addCells.R
index 8ceece6..d8173d9 100644
--- a/R/addCells.R
+++ b/R/addCells.R
@@ -116,7 +116,8 @@ addCells <- function(cnr, newX, newY, newqc, newYe = NULL, do.clean = TRUE, ...)
         cnr[["qc"]] <- qc
         cnr[["cells"]] <- newCells
     }
+
     cnr <- sync_cnr(cnr)
-    
+
     return(cnr)
 }
diff --git a/R/addInfo.R b/R/addInfo.R
index 629edf0..ad22ea7 100644
--- a/R/addInfo.R
+++ b/R/addInfo.R
@@ -57,6 +57,11 @@ addInfo <- function(cnr, df) {
 #'
 #' @param sort wether to sort the ouput object, default is FALSE
 #'
+#' @param full.sync logical, sync cnr. If TRUE function syncs cells and chromosomes
+#' when FASLE, only sync cells.  default TRUE
+#'
+#' @param chromosome.order order of chromosomes for full.sync
+#'
 #' @param ... additional parameters passed to merge
 #'
 #' @return
@@ -66,18 +71,21 @@ addInfo <- function(cnr, df) {
 #' @examples
 #'#' data(cnr)
 #'
-#' fakePval <- data.frame(pval = runif(5000))
+#' fakePval <- data.frame(hgnc.symbol = cnr$gene.index$hgnc.symbol, pval = runif(nrow(cnr$gene.index)))
 #'
 #' cnr <- addGeneInfo(cnr, df = fakePval)
 #' 
 #' head(cnr$gene.index)
 #'
 #' @export
-addGeneInfo <- function(cnr, df, sort = FALSE, ...) {
+addGeneInfo <- function(cnr, df, sort = FALSE, full.sync = TRUE,
+                        chromosome.order = c(1:22, "X", "Y", "MT"), ...) {
     
     gInfo <- merge(cnr$gene.index, df, sort = sort,  ...)
     
     cnr[["gene.index"]] <- gInfo
+
+    cnr <- sync_cnr(cnr)
     
     return(cnr)
 }
diff --git a/R/addQC.R b/R/addQC.R
index c1d35dd..feeb7bc 100644
--- a/R/addQC.R
+++ b/R/addQC.R
@@ -31,6 +31,8 @@ addQC <- function(cnr, df, ...) {
     }
     
     cnr[["qc"]] <- QC
+
+    cnr <- sync_cnr(cnr)
     
     return(cnr)
 }
diff --git a/R/add_in_silico_root.R b/R/add_in_silico_root.R
new file mode 100644
index 0000000..375991f
--- /dev/null
+++ b/R/add_in_silico_root.R
@@ -0,0 +1,95 @@
+
+#' build an in-silico root cell and clone profile
+#'
+#' By default cell is a female diploid cell
+#'
+#' @param cnr a cnr
+#'
+#' @param cell.name name of root cell. default "diploid"
+#' 
+#' @param female logical, weather to build a female or male
+#'  cell and clone, default is female. If female is NULL, no sex chromosomes
+#' correction is performed, all bins will have copy number equal base.ploidy
+#'
+#' @param base.ploidy integer, base ploidy (2, 4, 6, 8), default 2, for diploid cell
+#'
+#' @return
+#'
+#' Append an in-silico diplod (or polyploid) cell to the cnr.  By default a female
+#' human genome is created.  If FALSE, a male human genome is constructed.  NULL will
+#' not peform a gender correction and the entire genome will be set to the base ploidy.
+#'
+#' In tetraploid males copy number is half of the base.ploidy e.g. for 4n : X = 2, Y = 2.
+#' 
+#' @examples
+#'
+#' data(cnr)
+#'
+#' cnr <- add_in_silico_root(cnr)
+#'
+#' head(cnr$Y)
+#'
+#' @importFrom assertthat assert_that
+#' @export
+add_in_silico_root <- function(cnr, cell.name = "diploid",
+                               female = TRUE, base.ploidy = 2L) {
+    ## 
+    if(!is.integer(base.ploidy)) {
+        base.ploidy <- as.integer(round(base.ploidy))
+    }
+    ## 
+    assertthat::assert_that(nrow(cnr$X) == nrow(cnr$chromInfo))
+    assertthat::assert_that(ncol(cnr$genes) == nrow(cnr$gene.index))
+    ## 
+    dX <- data.frame(rep(base.ploidy, times = nrow(cnr$chromInfo)))
+    names(dX) <- cell.name
+    dG <- data.frame(rep(base.ploidy, times = nrow(cnr$gene.index)))
+    names(dG) <- cell.name
+    ##
+    if(!is.null(female)) {
+        if(female) {
+            dX[cnr$chromInfo$bin.chrom == "Y", cell.name] <- 0
+            dG[cnr$gene.index$chrom == "Y", cell.name] <- 0
+        } else {
+            dX[cnr$chromInfo$bin.chrom %in% c("X", "Y"), cell.name] <- base.ploidy / 2
+            dG[cnr$gene.index$chrom %in% c("X", "Y"), cell.name] <- base.ploidy / 2
+        }
+    }
+    ##
+    if(! cell.name %in% colnames(cnr$X)) {
+        cnr$X <- cbind(cnr$X, dX)
+    } else {
+        warning(paste(cell.name, "exists in X"))
+    }
+    if(! cell.name %in% colnames(cnr$genes)) {
+        cnr$genes[cell.name, ] <- as.integer(dG[,1])
+    } else {
+        warning(paste(cell.name, "exists in genes"))
+    }
+    ##
+    if(length(cnr$DDRC.df)) {
+        if(! cell.name %in% colnames(cnr$DDRC.df)) {
+            cnr$DDRC.df <- cbind(cnr$DDRC.df, dX)
+        } else {
+            warning(paste(cell.name, "exists in DDRC.df"))
+        }
+    }
+    ##    
+    if(length(cnr$DDRC.g)) {
+        if(! cell.name %in% colnames(cnr$DDRC.df)) {
+            cnr$DDRC.g <- cbind(cnr$DDRC.g, dG)
+        } else {
+            warning(paste(cell.name, "exists in DDRC.g"))
+        }
+    }
+    ##
+    cnr$Y[cell.name, ] <- NA
+    cnr$Y[cell.name, "cellID"] <- cell.name
+    cnr$qc[cell.name, "cellID"] <- cell.name
+    cnr$qc[cell.name, "qc.status"] <- "PASS"
+
+    cnr$cells <- colnames(cnr$X)
+    
+    return(cnr)
+}
+
diff --git a/R/buildCNR.R b/R/buildCNR.R
index 1f259c3..83b5438 100644
--- a/R/buildCNR.R
+++ b/R/buildCNR.R
@@ -32,6 +32,12 @@
 #'   or integer copy number.  If `TRUE` data is an untransformed segment ratio.
 #'   If `FALSE` data is integer copy number
 #'
+#' @param full.sync sync cnr tables to preseve cell order, and sort chromInfo
+#'  and gene.index based on chromsome and start position
+#'
+#' @param chromosome.order chromosome order. Default is a human genome primary
+#'  assembly: 1:22, X, Y, and MT.  
+#' 
 #' @param ... parameters passed to roundCNR
 #'
 #' @return
@@ -71,7 +77,8 @@
 #' 
 #' @export
 buildCNR <- function(X, Y, qc, chromInfo, exprs = NULL, gene.index,
-                     bulk = FALSE, ...) {
+                     bulk = FALSE, full.sync = TRUE,
+                     chromosome.order=c(1:22, "X", "Y", "MT"), ...) {
 
     ## chose if data is ratio or integer CN
     if(bulk) {
@@ -105,9 +112,13 @@ buildCNR <- function(X, Y, qc, chromInfo, exprs = NULL, gene.index,
     cnr[["Y"]] <- Y[colnames(cnr[["X"]]), ]
     cnr[["qc"]] <- qc[colnames(cnr[["X"]]), ]
 
+    chromInfo <- cbind(bin.id = 1:nrow(chromInfo), chromInfo)
+    assertthat::assert_that(nrow(chromInfo) == max(gene.index$bin.id),
+                            msg = "number of rows chromInfo does not match gene.index bin.id, please correct the gene.index$bin.id")
+    
     cnr[["chromInfo"]] <- chromInfo
     cnr[["gene.index"]] <- gene.index
-    rownames(cnr$gene.index) <- cnr$gene.index$hgnc.symbol
+    rownames(cnr[["gene.index"]]) <- gene.index$hgnc.symbol
     
     ## if expression matrix is available, add it here
     ## must have rownames as cellID/sampleID
@@ -117,15 +128,16 @@ buildCNR <- function(X, Y, qc, chromInfo, exprs = NULL, gene.index,
         assertthat::assert_that(all(rownames(exprs) %in% colnames(cnr[["X"]])))
         assertthat::assert_that(all(colnames(exprs) %in%
                                     colnames(cnr[["gene.index"]]$hgnc.symbol)))
-                                    
+        
         cnr[["exprs"]] <- exprs[colnames(cnr[["X"]]), ]
     }
     
     cnr[["cells"]] <- colnames(cnr[["X"]])
     cnr[["bulk"]] <- bulk
-
-    cnr <- sync_cnr(cnr)
+    
+    cnr <- sync_cnr(cnr, full.sync = full.sync,
+                    chromosome.order=c(1:22, "X", "Y", "MT"))
     
     return(cnr)
-
+    
 } ## end buildCNR
diff --git a/R/cnr.R b/R/cnr.R
index bd17a3b..1d1b023 100644
--- a/R/cnr.R
+++ b/R/cnr.R
@@ -14,7 +14,6 @@
 #'
 #' @format An object class list containg a rounded CNR
 #'
-#' * Input 
 #' \itemize{
 #' 
 #'   \item X, An integer matrix of bins x n.cells containing copy number
@@ -59,14 +58,15 @@
 #'   ...
 #' }
 #'
-#' * Output
+#' @return
+#' 
 #' \itemize{
 #'   \item cdb, pairwise cell dissimilarity using Bray-Curtis
 #'
 #'   \item hcdb, heirarchical clustering of cells based
 #' 
 #'   \item phylo, cell phyogenetic tree.  Analysis is produced with
-#'     \code{\link[ape]}. Default is "balanced minimum evolution"
+#'     \code{ape}. Default is "balanced minimum evolution"
 #' 
 #'   \item tree.height, height cutoff of the tree, set as intersection between
 #'      the total number of multi-cell clusters and one-cell clusters
@@ -95,7 +95,6 @@
 #'   \item DDRC.dist, bray curtis disimilarity of clones
 #' 
 #'   \item DDRC.phylo, phylogenetic analysis of clones
-#' 
 #' }
 #' 
 #' @docType data
diff --git a/R/exportCNR.R b/R/exportCNR.R
index f6d9b30..a30af45 100644
--- a/R/exportCNR.R
+++ b/R/exportCNR.R
@@ -17,14 +17,14 @@
 #'\dontrun{
 #' data(cnr)
 #' 
-#' exportCNR(cnr, outdir = "cnr_out/")
+#' export_cnr(cnr, outdir = "cnr_out/")
 #'}
 #'
 #' @importFrom utils write.table
 #' @importFrom ape write.tree
 #' 
 #' @export
-exportCNR <- function(cnr, outdir = ".", ...) {
+export_cnr <- function(cnr, outdir = ".", ...) {
 
     if(! dir.exists(outdir) ) {
         dir.create(outdir, recursive = TRUE)
@@ -83,4 +83,4 @@ exportCNR <- function(cnr, outdir = ".", ...) {
                         ...)
     }
     
-} # end exportCNR
+} # end export_cnr
diff --git a/R/gene_lookups.R b/R/gene_lookups.R
index 8cb3df9..f05c489 100755
--- a/R/gene_lookups.R
+++ b/R/gene_lookups.R
@@ -147,13 +147,17 @@ get_gene_details <- function(cnr, chrom = 12, start = 69200804, end = 69246466)
 } ## get_gene_details
 
     
-#' Pull gene details for a genomic region
+#' Pull gene details for a set of genes
 #'
-#' This function subsets the gene index for a genomic region of interest.
+#' This function subsets the gene index for a given set of genes
 #'
 #' @param cnr a cnr bundle
 #'
-#' @param coord genomic region in ensembl format
+#' @param genes a list of genes
+#'
+#' @param show.columns columns of gene.index to show
+#'
+#' @param identifier gene identifier hgnc.symbol or ensembl_gene_id. default hgnc.symbol
 #'
 #' @return
 #'
@@ -162,30 +166,34 @@ get_gene_details <- function(cnr, chrom = 12, start = 69200804, end = 69246466)
 #' @examples
 #'
 #' data(cnr)
-#' coord <- "12:69200804:69246466"
+#'
+#' pull_gene_details(cnr)
 #' 
-#' pull_gene_details(cnr, coord = "4:82351690:138565783")
+#' pull_gene_details(cnr,
+#'   genes = c("JUN", "MDM2", "CDK4"),
+#'   show.columns = c("hgnc.symbol", "bin.id", "gene_biotype"))
 #'
-#' pull_gene_details(cnr, coord = coord)
+#' pull_gene_details(cnr,
+#'   genes = c("ENSG00000177606", "ENSG00000135446", "ENSG00000135679"),
+#'   identifier = "ensembl_gene_id",
+#'   show.columns = c("hgnc.symbol", "bin.id", "gene_biotype"))
 #'
-#' coords <- c("1:170120554:172941951",
-#'           "12:69200804:69246466")
-#' 
-#' do.call(rbind, lapply(coords, function(rr)
-#'                      pull_gene_details(cnr, coord = rr)))
 #' 
+#' @importFrom assertthat assert_that
 #' @export
-pull_gene_details <- function(cnr, coord = "12:69200804:69246466") {
+pull_gene_details <- function(cnr, genes = c("MDM2", "CDK4"),
+                              show.columns = NULL,
+                              identifier = "hgnc.symbol") {
 
-    seqname <- unlist(strsplit(coord, split = ":"))[1]
-    start <- as.numeric(unlist(strsplit(coord, split = ":"))[2])
-    end <- as.numeric(unlist(strsplit(coord, split = ":"))[3])
+    assertthat::assert_that(all(genes %in% cnr$gene.index[, identifier]))
 
-    assertthat::assert_that(start < end)
-
-    gene.details <- get_gene_details(cnr, chrom = seqname,
-                                     start = start,
-                                     end = end)
+    idx <- cnr$gene.index[, identifier] %in% genes
+    
+    if(is.null(show.columns)) {
+        gene.details <- cnr$gene.index[idx, ]
+    } else {
+        gene.details <- cnr$gene.index[idx, show.columns]
+    }
 
     return(gene.details)
 } ## end pull gene details
diff --git a/R/split_cnr.R b/R/split_cnr.R
index b28981c..ed1e910 100644
--- a/R/split_cnr.R
+++ b/R/split_cnr.R
@@ -15,7 +15,7 @@
 #'
 #' cnrL <- split_cnr(cnr, split.by = "category1")
 #'
-#' lapply(cnrL, summaryCNR)
+#' lapply(cnrL, summary_cnr)
 #' 
 #' @export
 split_cnr <- function(cnr, split.by) {
diff --git a/R/summaryCNR.R b/R/summaryCNR.R
index 4f2674c..453c40f 100644
--- a/R/summaryCNR.R
+++ b/R/summaryCNR.R
@@ -13,12 +13,12 @@
 #'
 #' data(cnr)
 #'
-#' summaryCNR(cnr)
+#' summary_cnr(cnr)
 #'
 #' @importFrom dplyr tibble
 #' 
 #' @export
-summaryCNR <- function(cnr, detailed = FALSE, display = FALSE) {
+summary_cnr<- function(cnr, detailed = FALSE, display = FALSE) {
 
     ncells(cnr, detailed = detailed, display = FALSE)
     npheno(cnr, detailed = detailed, display = FALSE)
@@ -47,11 +47,12 @@ summaryCNR <- function(cnr, detailed = FALSE, display = FALSE) {
 #' @param detailed logical, detailed summary
 #'
 #' @param display display cell ID's
-#' 
+#'
 #' @importFrom assertthat assert_that
 #' 
 #' @export
-ncells <- function(cnr, detailed = FALSE, display = FALSE) {
+ncells <- function(cnr, detailed = FALSE,
+                   display = FALSE) {
 
     nxc <- ncol(cnr$X)
     nyc <- nrow(cnr$Y)
diff --git a/R/sync_cnr.R b/R/sync_cnr.R
index 8b7605b..8670256 100644
--- a/R/sync_cnr.R
+++ b/R/sync_cnr.R
@@ -50,9 +50,9 @@ sync_cnr <- function(cnr, cell.order = NULL, full.sync = TRUE,
 
         cell.order <- cnr$Y$cellID
         rownames(cnr$Y) <- cnr$Y$cellID
-
+        
     }
-
+    
     if(!is.null(cnr$exprs)) {
         assertthat::assert_that(all(rownames(cnr$exprs) %in% cell.order))
         assertthat::assert_that(all(cell.order %in% rownames(cnr$exprs)))
diff --git a/cran-comments.md b/cran-comments.md
index cd5080e..415c51d 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,10 +1,21 @@
 ## Test environments
-* local R installation, R 4.1.0
+* local R installation, R version 4.2.2 (2022-10-31)
 * ubuntu 16.04 (on travis-ci), R 4.1.0
 * CentOS7 Institutional HPC Cluster
 
 
 ## R CMD check results
+## Thu May 25 21:07:10 EDT 2023
+── R CMD check results ──────────────────────────────────────────────── gac 0.0.9035 ────
+Duration: 3h 42.5s
+
+❯ checking installed package size ... NOTE
+    installed size is  6.0Mb
+    sub-directories of 1Mb or more:
+      data   4.5Mb
+
+0 errors ✔ | 0 warnings ✔ | 1 note ✖
+
 ## Tue May 23 15:33:07 EDT 2023
 ── R CMD check results ────────────────────────────────────────────────────── gac 0.0.9034 ────
 Duration: 19m 50.1s
diff --git a/man/addGeneInfo.Rd b/man/addGeneInfo.Rd
index 5de15d6..2906a28 100644
--- a/man/addGeneInfo.Rd
+++ b/man/addGeneInfo.Rd
@@ -4,7 +4,14 @@
 \alias{addGeneInfo}
 \title{Adds Gene Information to the gene.index}
 \usage{
-addGeneInfo(cnr, df, sort = FALSE, ...)
+addGeneInfo(
+  cnr,
+  df,
+  sort = FALSE,
+  full.sync = TRUE,
+  chromosome.order = c(1:22, "X", "Y", "MT"),
+  ...
+)
 }
 \arguments{
 \item{cnr}{a cnr bundle}
@@ -15,6 +22,11 @@ analyses e.g. p-values, genetic effects, etc to the genes}
 
 \item{sort}{wether to sort the ouput object, default is FALSE}
 
+\item{full.sync}{logical, sync cnr. If TRUE function syncs cells and chromosomes
+when FASLE, only sync cells.  default TRUE}
+
+\item{chromosome.order}{order of chromosomes for full.sync}
+
 \item{...}{additional parameters passed to merge}
 }
 \value{
@@ -27,7 +39,7 @@ The function adds information to the gene.index.#'
 \examples{
 #' data(cnr)
 
-fakePval <- data.frame(pval = runif(5000))
+fakePval <- data.frame(hgnc.symbol = cnr$gene.index$hgnc.symbol, pval = runif(nrow(cnr$gene.index)))
 
 cnr <- addGeneInfo(cnr, df = fakePval)
 
diff --git a/man/add_in_silico_root.Rd b/man/add_in_silico_root.Rd
new file mode 100644
index 0000000..f49f676
--- /dev/null
+++ b/man/add_in_silico_root.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/add_in_silico_root.R
+\name{add_in_silico_root}
+\alias{add_in_silico_root}
+\title{build an in-silico root cell and clone profile}
+\usage{
+add_in_silico_root(cnr, cell.name = "diploid", female = TRUE, base.ploidy = 2L)
+}
+\arguments{
+\item{cnr}{a cnr}
+
+\item{cell.name}{name of root cell. default "diploid"}
+
+\item{female}{logical, weather to build a female or male
+ cell and clone, default is female. If female is NULL, no sex chromosomes
+correction is performed, all bins will have copy number equal base.ploidy}
+
+\item{base.ploidy}{integer, base ploidy (2, 4, 6, 8), default 2, for diploid cell}
+}
+\value{
+Append an in-silico diplod (or polyploid) cell to the cnr.  By default a female
+human genome is created.  If FALSE, a male human genome is constructed.  NULL will
+not peform a gender correction and the entire genome will be set to the base ploidy.
+
+In tetraploid males copy number is half of the base.ploidy e.g. for 4n : X = 2, Y = 2.
+}
+\description{
+By default cell is a female diploid cell
+}
+\examples{
+
+data(cnr)
+
+cnr <- add_in_silico_root(cnr)
+
+head(cnr$Y)
+
+}
diff --git a/man/buildCNR.Rd b/man/buildCNR.Rd
index 5c7123d..a31ae64 100644
--- a/man/buildCNR.Rd
+++ b/man/buildCNR.Rd
@@ -4,7 +4,18 @@
 \alias{buildCNR}
 \title{Build a CNR bundle (Copy Number, --Rounded)}
 \usage{
-buildCNR(X, Y, qc, chromInfo, exprs = NULL, gene.index, bulk = FALSE, ...)
+buildCNR(
+  X,
+  Y,
+  qc,
+  chromInfo,
+  exprs = NULL,
+  gene.index,
+  bulk = FALSE,
+  full.sync = TRUE,
+  chromosome.order = c(1:22, "X", "Y", "MT"),
+  ...
+)
 }
 \arguments{
 \item{X}{bin or common segment copy number data.  Can be in `numeric`
@@ -30,6 +41,12 @@ expression matrix. Must have cellID as rownames. default is NULL.}
 or integer copy number.  If `TRUE` data is an untransformed segment ratio.
 If `FALSE` data is integer copy number}
 
+\item{full.sync}{sync cnr tables to preseve cell order, and sort chromInfo
+and gene.index based on chromsome and start position}
+
+\item{chromosome.order}{chromosome order. Default is a human genome primary
+assembly: 1:22, X, Y, and MT.}
+
 \item{...}{parameters passed to roundCNR}
 }
 \value{
diff --git a/man/cnr.Rd b/man/cnr.Rd
index 0de24c6..e9def06 100644
--- a/man/cnr.Rd
+++ b/man/cnr.Rd
@@ -17,24 +17,23 @@ algorithm) (Baslan et al 2012.), and implemented on Ginkgo, or from hmmCopy.
 These are upstream analyses to the package.
 
 For mutations in single-cells, the cnq can be a binary incidence (0,1) matrix
-representing presence or absence of specific mutations, or a ternary (0,1,2)
-representing genotypes as the number of alternate allele copies
+ representing presence or absence of specific mutations, or a ternary (0,1,2)
+ representing genotypes as the number of alternate allele copies
 
-Unlike a standard rounding, we noticed that estimates for deletions and losses
-don't follow standard numeric rounding. We set a the thresholds of < 0.2
-(average quantal estimate of the Y chromosome in females) for deletions
-(i.e. 0 copies); between 0.2 and 1.2 for losses (i.e. 1 copy); between
-1.2 and 2.5 for 2 copies, and everything else standard numeric rounding.
+We noticed that estimates for deletions and losses don't follow standard
+ numeric rounding. We set a the thresholds of < 0.2 (average quantal estimate
+ of the Y chromosome in females) for deletions (i.e. 0 copies); between 0.2
+ and 1.2 for losses (i.e. 1 copy); between 1.2 and 2.5 for 2 copies, and
+ everything else standard numeric rounding.
 
   \item genes, gene copy number interpolation from bins.  The genes matrix
 is an interpolated, transposed, expansion of bins. The expansion is
 constructed internally using the expand2genes function.
 
-
   \item Y, phenotypic data of single-cells, contains cells as rows, and
 phentypes in columns. Phenotypes can be information about individual samples,
-or if same-cell methods were used, the RNA expression from the same cell.
-#' 
+or if same-cell methods were used, the RNA expression from the same cell.  
+
   \item qc, quality control metrics. This matrix contains additional metadata
 that is technical, e.g. number of reads, MAPD estimates, and the PASS/FAIL
 qc.status for individual cells.  contains cells as rows and metadata as columns
@@ -43,6 +42,12 @@ qc.status for individual cells.  contains cells as rows and metadata as columns
 
  \item gene.index, table to map bins to genes
 
+ \item cells, a list of cells
+
+ \item bulk, logical, weather the data is bulk DNA or cells.  If TRUE, data
+will not be rounded and it's assumed is log ratio data.  If FALSE, data is
+considered as single-cell and copy numbers are considered as integer copy
+number.  Estimates are rounded to the nearest integer (see above).
   ...
 }
 }
@@ -52,10 +57,52 @@ qc.status for individual cells.  contains cells as rows and metadata as columns
 \usage{
 data(cnr)
 }
+\value{
+\itemize{
+  \item cdb, pairwise cell dissimilarity using Bray-Curtis
+
+  \item hcdb, heirarchical clustering of cells based
+
+  \item phylo, cell phyogenetic tree.  Analysis is produced with
+    \code{ape}. Default is "balanced minimum evolution"
+
+  \item tree.height, height cutoff of the tree, set as intersection between
+     the total number of multi-cell clusters and one-cell clusters
+
+  \item ccp, output from ConsensusClusterPlus, the first element is a color map.
+     Elements 2:(40) contain 4 outputs: consensusMatrix, consensusTree,
+     consensusClass, ml, and clrs.  Each element corresponds to k in k-means
+     clustering
+
+  \item kStats, spectral analysis of consensus clustering
+
+  \item eigenVals, spectral analysis eigen values
+
+  \item optK optimumn k-parameter (kCC), and stable K (sK)
+
+  \item cluster_heterogeneity, summary metrics of clones/clusters
+
+  \item uclust, unique clusters with 3 or more cells
+
+  \item DDRC.df, bin pseudobulk profiles of each clone (or other chosen group)
+
+  \item DDRC.g, gene pseudobulk profiles
+
+  \item vdj.cells list of vdj.cells produced by \code{genotype_vdj}
+
+  \item DDRC.dist, bray curtis disimilarity of clones
+
+  \item DDRC.phylo, phylogenetic analysis of clones
+}
+}
 \description{
 The cnr object is a list of four relational matrices. The bins, genes,
 annotation, qc, chromInfo, and gene.index.  The structure is inspired by
-Scanpy's AnnData to which cleverly integrates complex data into a simple
+Scanpy's AnnData which cleverly integrates complex data into a simple
 architecture.
+
+The object stores processing results required for data exploration,
+visualization and genetic analyses.  Specifically, the distance matrix,
+phylogenetic analysis, pseudobulk, and heterogeneity analysis.
 }
 \keyword{datasets}
diff --git a/man/exportCNR.Rd b/man/export_cnr.Rd
similarity index 84%
rename from man/exportCNR.Rd
rename to man/export_cnr.Rd
index a981f89..92afea3 100644
--- a/man/exportCNR.Rd
+++ b/man/export_cnr.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/exportCNR.R
-\name{exportCNR}
-\alias{exportCNR}
+\name{export_cnr}
+\alias{export_cnr}
 \title{export CNR to flat text files}
 \usage{
-exportCNR(cnr, outdir = ".", ...)
+export_cnr(cnr, outdir = ".", ...)
 }
 \arguments{
 \item{cnr}{a cnr}
@@ -26,7 +26,7 @@ export CNR to flat text files
 \dontrun{
 data(cnr)
 
-exportCNR(cnr, outdir = "cnr_out/")
+export_cnr(cnr, outdir = "cnr_out/")
 }
 
 }
diff --git a/man/pull_gene_details.Rd b/man/pull_gene_details.Rd
index bb2e7e8..24415dd 100644
--- a/man/pull_gene_details.Rd
+++ b/man/pull_gene_details.Rd
@@ -2,34 +2,44 @@
 % Please edit documentation in R/gene_lookups.R
 \name{pull_gene_details}
 \alias{pull_gene_details}
-\title{Pull gene details for a genomic region}
+\title{Pull gene details for a set of genes}
 \usage{
-pull_gene_details(cnr, coord = "12:69200804:69246466")
+pull_gene_details(
+  cnr,
+  genes = c("MDM2", "CDK4"),
+  show.columns = NULL,
+  identifier = "hgnc.symbol"
+)
 }
 \arguments{
 \item{cnr}{a cnr bundle}
 
-\item{coord}{genomic region in ensembl format}
+\item{genes}{a list of genes}
+
+\item{show.columns}{columns of gene.index to show}
+
+\item{identifier}{gene identifier hgnc.symbol or ensembl_gene_id. default hgnc.symbol}
 }
 \value{
 Returns the subset of the `gene.index` table for the genomic region.
 }
 \description{
-This function subsets the gene index for a genomic region of interest.
+This function subsets the gene index for a given set of genes
 }
 \examples{
 
 data(cnr)
-coord <- "12:69200804:69246466"
 
-pull_gene_details(cnr, coord = "4:82351690:138565783")
+pull_gene_details(cnr)
 
-pull_gene_details(cnr, coord = coord)
+pull_gene_details(cnr,
+  genes = c("JUN", "MDM2", "CDK4"),
+  show.columns = c("hgnc.symbol", "bin.id", "gene_biotype"))
 
-coords <- c("1:170120554:172941951",
-          "12:69200804:69246466")
+pull_gene_details(cnr,
+  genes = c("ENSG00000177606", "ENSG00000135446", "ENSG00000135679"),
+  identifier = "ensembl_gene_id",
+  show.columns = c("hgnc.symbol", "bin.id", "gene_biotype"))
 
-do.call(rbind, lapply(coords, function(rr)
-                     pull_gene_details(cnr, coord = rr)))
 
 }
diff --git a/man/split_cnr.Rd b/man/split_cnr.Rd
index 5692eb4..079b5ce 100644
--- a/man/split_cnr.Rd
+++ b/man/split_cnr.Rd
@@ -25,6 +25,6 @@ data(cnr)
 
 cnrL <- split_cnr(cnr, split.by = "category1")
 
-lapply(cnrL, summaryCNR)
+lapply(cnrL, summary_cnr)
 
 }
diff --git a/man/summaryCNR.Rd b/man/summary_cnr.Rd
similarity index 80%
rename from man/summaryCNR.Rd
rename to man/summary_cnr.Rd
index 83fb0e2..25360b5 100644
--- a/man/summaryCNR.Rd
+++ b/man/summary_cnr.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/summaryCNR.R
-\name{summaryCNR}
-\alias{summaryCNR}
+\name{summary_cnr}
+\alias{summary_cnr}
 \title{summary of cnr bundle}
 \usage{
-summaryCNR(cnr, detailed = FALSE, display = FALSE)
+summary_cnr(cnr, detailed = FALSE, display = FALSE)
 }
 \arguments{
 \item{cnr}{a cnr bundle}
@@ -23,6 +23,6 @@ summary of cnr bundle
 
 data(cnr)
 
-summaryCNR(cnr)
+summary_cnr(cnr)
 
 }
diff --git a/vignettes/getting_started.Rmd b/vignettes/getting_started.Rmd
index 1e3487c..eaec0bf 100644
--- a/vignettes/getting_started.Rmd
+++ b/vignettes/getting_started.Rmd
@@ -183,14 +183,14 @@ cnr <- buildCNR(X = X, Y= Y, qc = qc, chromInfo = chromInfo, gene.index = grch37
 ## example for changing loss threshold in round CNR
 cnr2 <- buildCNR(X = X, Y= Y, qc = qc, chromInfo = chromInfo, gene.index = grch37.genes.5k, loss = 1.5) 
 
-summaryCNR(cnr)
+summary_cnr(cnr)
 ```
 
 ### View what data is present in the cnr
 ### make sure numbers of cells match in all objects
 ```{r data}
 data(cnr)
-summaryCNR(cnr)
+summary_cnr(cnr)
 ```
 
 GAC makes use of [ComplexHeatmap](https://jokergoo.github.io/ComplexHeatmap-reference/book/).  ComplexHeatmap is one of the most powerful visualization tools available for R.  To use the richness of ComplexHeatmap, the `HeatmapCNR` function attempts to minimize the total number of presets.  Please visit the ComplexHeatmap documentation to take advantage of its potential.