Second Commit to Bioc Revisions

RussBainer · Aug 4, 2016 · 127ee47 · 127ee47
1 parent a3216d6
commit 127ee47
Show file tree

Hide file tree

Showing 20 changed files with 206 additions and 108 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: gCrisprTools
 Type: Package
 Title: Suite of Functions for Pooled Crispr Screen QC and Analysis
-Version: 0.99.5
-Date: 2016-08-01
+Version: 1.2.49
+Date: 2016-08-04
 Author: Russell Bainer, Dariusz Ratman, Pete Haverty, Steve Lianoglou
 Maintainer: Russell Bainer <bainer.russell@gene.com>
 Description: Set of tools for evaluating pooled high-throughput screening experiments, 
@@ -53,4 +53,4 @@ biocViews:
     RNASeq, 
     Regression, 
     Software,
-    Visualization
+    Visualization
diff --git a/NAMESPACE b/NAMESPACE
@@ -38,12 +38,8 @@ import(BiocParallel)
 import(PANTHER.db)
 import(RobustRankAggreg)
 import(ggplot2)
-import(grDevices)
-import(graphics)
 import(limma)
 import(parallel)
-import(stats)
-import(utils)
 importFrom(grDevices,colorRampPalette)
 importFrom(grDevices,rgb)
 importFrom(graphics,abline)

diff --git a/R/GenePvalsFromEset.R b/R/GenePvalsFromEset.R
@@ -24,7 +24,7 @@
 ##' @author Russell Bainer
 ##' @examples data('fit')
 ##' data('ann')
-##' output <- ct.generateResults(fit, ann)
+##' output <- ct.generateResults(fit, ann, permutations = 10)
 ##' head(output)
 ##' @export
 

diff --git a/R/GeneratePRC.R b/R/GeneratePRC.R
@@ -25,7 +25,7 @@
 ct.PRC <-
   function(summaryDF,
            target.list,
-           stat = c("enrich.p", "deplete.p", "enrich.fc", "deplete.fc"), 
+           stat = c("enrich.p", "deplete.p", "enrich.fc", "deplete.fc", "enrich.rho", "deplete.rho"), 
            plot.it = TRUE) {
 
 
@@ -56,15 +56,19 @@ ct.PRC <-
          enrich.p = (summaryDF[(summaryDF$geneID %in% present),"Target-level Enrichment P"]), 
          deplete.p = (summaryDF[(summaryDF$geneID %in% present),"Target-level Depletion P"]), 
          enrich.fc = (-summaryDF[(summaryDF$geneID %in% present),"Median log2 Fold Change"]), 
-         deplete.fc = (summaryDF[(summaryDF$geneID %in% present),"Median log2 Fold Change"])
-        )   
+         deplete.fc = (summaryDF[(summaryDF$geneID %in% present),"Median log2 Fold Change"]),
+         enrich.rho = (summaryDF[(summaryDF$geneID %in% present),"RhoRank_enrich"]),
+         deplete.rho = (summaryDF[(summaryDF$geneID %in% present),"RhoRank_deplete"])
+    )   
     #Extract the appropriate stat. 
     values <- switch(stat, 
         enrich.p = sort(summaryDF[,"Target-level Enrichment P"]), 
         deplete.p = sort(summaryDF[,"Target-level Depletion P"]), 
         enrich.fc = sort(-summaryDF[,"Median log2 Fold Change"]), 
-        deplete.fc = sort(summaryDF[,"Median log2 Fold Change"])
-        )
+        deplete.fc = sort(summaryDF[,"Median log2 Fold Change"]),
+        enrich.rho = sort(summaryDF[,"RhoRank_enrich"]), 
+        deplete.rho = sort(summaryDF[,"RhoRank_deplete"])
+    )
 
     out <- list()
     out$precision <- c(1, unlist(lapply(unique(values), function(x){sum(targvals <= x, na.rm = TRUE)/sum(values <= x, na.rm= TRUE)})), 0)
@@ -74,7 +78,9 @@ ct.PRC <-
                      enrich.p = ct.targetSetEnrichment(summaryDF, target.list, enrich = TRUE),
                      deplete.p =  ct.targetSetEnrichment(summaryDF, target.list, enrich = FALSE),
                      enrich.fc =  ct.targetSetEnrichment(summaryDF, target.list, enrich = TRUE),
-                     deplete.fc =  ct.targetSetEnrichment(summaryDF, target.list, enrich = FALSE)
+                     deplete.fc =  ct.targetSetEnrichment(summaryDF, target.list, enrich = FALSE),
+                     enrich.rho = ct.targetSetEnrichment(summaryDF, target.list, enrich = TRUE),
+                     deplete.rho = ct.targetSetEnrichment(summaryDF, target.list, enrich = FALSE)
     )
     out <- c(out, enrich)
 

diff --git a/R/GenerateROC.R b/R/GenerateROC.R
@@ -12,7 +12,7 @@
 ##' @param target.list A character vector containing the names of the targets to be tested. Only targets contained in the \code{geneID} 
 ##' column of the provided \code{summaryDF} are considered.
 ##' @param stat The statistic to use when ordering the genes. Must be one of \code{"enrich.p"}, \code{"deplete.p"}, \code{"enrich.fc"}, 
-##' or \code{"deplete.fc"}. 
+##' \code{"deplete.fc"}, \code{"enrich.rho"}, or \code{"deplete.rho"}. 
 ##' @param condense Logical indicating whether the returned x and y coordinates should be "condensed", returning only the points at which 
 ##' the detected proportion of \code{target.list} changes. If set to \code{FALSE}, the returned \code{x} and \code{y} vectors will explicitly
 ##' indicate the curve value at every position (useful for performing curve arithmetic downstream).   
@@ -28,7 +28,7 @@
 ct.ROC <-
   function(summaryDF,
            target.list,
-           stat = c("enrich.p", "deplete.p", "enrich.fc", "deplete.fc"),
+           stat = c("enrich.p", "deplete.p", "enrich.fc", "deplete.fc", "enrich.rho", "deplete.rho"),
            condense = TRUE, 
            plot.it = TRUE) {
 
@@ -60,16 +60,20 @@ ct.ROC <-
          enrich.p = (summaryDF[(summaryDF$geneID %in% present),"Target-level Enrichment P"]), 
          deplete.p = (summaryDF[(summaryDF$geneID %in% present),"Target-level Depletion P"]), 
          enrich.fc = (-summaryDF[(summaryDF$geneID %in% present),"Median log2 Fold Change"]), 
-         deplete.fc = (summaryDF[(summaryDF$geneID %in% present),"Median log2 Fold Change"])
-        )   
+         deplete.fc = (summaryDF[(summaryDF$geneID %in% present),"Median log2 Fold Change"]),
+         enrich.rho = (summaryDF[(summaryDF$geneID %in% present),"RhoRank_enrich"]),
+         deplete.rho = (summaryDF[(summaryDF$geneID %in% present),"RhoRank_deplete"])
+    )   
 
     #Extract the appropriate stat for the curve 
     values <- switch(stat, 
         enrich.p = sort(summaryDF[,"Target-level Enrichment P"]), 
         deplete.p = sort(summaryDF[,"Target-level Depletion P"]), 
         enrich.fc = sort(-summaryDF[,"Median log2 Fold Change"]), 
-        deplete.fc = sort(summaryDF[,"Median log2 Fold Change"])
-        )
+        deplete.fc = sort(summaryDF[,"Median log2 Fold Change"]),
+        enrich.rho = sort(summaryDF[,"RhoRank_enrich"]), 
+        deplete.rho = sort(summaryDF[,"RhoRank_deplete"])
+    )
 
 
     out <- list()

diff --git a/R/RRAalpha.R b/R/RRAalpha.R
@@ -39,7 +39,6 @@ ct.numcores <- function()  {
 ##' betaScores((testp))
 ##' ct.alphaBeta(testp)  
 ##' @export
-
 ct.alphaBeta <- function(p.in){ 
   p.in <- na.omit(p.in)
   n <- length(p.in)  

diff --git a/R/Raw_read_densities.R b/R/Raw_read_densities.R
@@ -21,7 +21,7 @@
 ##' 
 ##' ct.rawCountDensities(es, sk)
 ##' @export
-ct.rawCountDensities <- function(eset, sampleKey){
+ct.rawCountDensities <- function(eset, sampleKey = NULL){
 
   if(class(eset) != "ExpressionSet"){stop(paste(deparse(substitute(eset)), "is not an ExpressionSet."))}
 
@@ -37,7 +37,7 @@ ct.rawCountDensities <- function(eset, sampleKey){
   densities <- apply(e.dat, 2, density)
 
   y <- c(0, max(unlist(lapply(densities, function(dens){max(dens$y)}))))
-  x <- c(0,  max(ceiling(unlist(lapply(densities, function(dens){max(dens$x)})))));  
+  x <- c(0, max(ceiling(unlist(lapply(densities, function(dens){max(dens$x)})))));  
   plot(x[1], y[1], 
        xlim = x, ylim = y, 
        xlab = "gRNA Read Counts (Raw)", 

diff --git a/R/ViewGuides.R b/R/ViewGuides.R
@@ -88,7 +88,7 @@ ct.viewGuides <- function(gene, fit, ann, type = "geneSymbol", contrast.term = N
 
   #Find the gRNAs targeting the gene from the annotation, and order them
   options(warn=-1)
-  ann <- ct.prepareAnnotation(ann, fit)
+  ann <- ct.prepareAnnotation(ann, fit, controls = FALSE)
   options(warn=0)
 
   if(!(sum(ann[,type] %in% gene))){stop(paste(gene, "is not present in the annotation file."))}

diff --git a/R/ViewTopTargets.R b/R/ViewTopTargets.R
@@ -1,20 +1,20 @@
 ##' @title Display the log2 fold change estimates and associated standard deviations of the guides targeting the top 
 ##' candidates in a crispr screen
-##' @description This is a function for displaying the top candidates from a crispr screen, using the information summarized 
-##' in the corresponding \code{fit} and the output from \code{ct.generateResults()}. The results are plotted as a 
-##' classed scatterplot of the top \code{ntargets} upregulated and downregulated targets in the screen, as indicated by the 
-##' relevant p-values contained in the \code{summaryDF}. The fold change and standard deviation estimates for each gRNA targeting each target
-##' (extracted from the \code{coefficients} and \code{stdev.unscaled} slot of \code{fit}) are plotted on the y axis. Targets are 
-##' selected on the basis of their gene-level P-values; in the case of ties, they are ranked on the basis of the median gRNA 
-##' fold change. 
+##' @description This is a function for displaying candidates from a crispr screen, using the information summarized 
+##' in the corresponding \code{fit} and the output from \code{ct.generateResults()}. The fold change and standard deviation 
+##' estimates for each gRNA associated with each target (extracted from the \code{coefficients} and \code{stdev.unscaled} slot 
+##' of \code{fit}) are plotted on the y axis. Targets are selected on the basis of their gene-level enrichment or depletion 
+##' P-values; in the case of ties, they are ranked on the basis of their corresponding Rho statistics. 
 ##' @param fit An object of class \code{MArrayLM} containing, at minimum, a \code{coefficents} slot with coefficients from the comparison, 
 ##' and a \code{stdev.unscaled} slot with the corresponding standard deviation of the coefficent estimates. The \code{row.names} attribute 
 ##' should ideally match that which is found in \code{annotation}.
 ##' @param summaryDF A data.frame summarizing the results of the screen, returned by the function \code{\link{ct.generateResults}}. 
 ##' @param annotation An annotation file for the experiment, usually extracted with \code{ep.load.annot()} in ExpressionPlot. gRNAs are annotated by 
 ##' row, and must minimally contain a column \code{geneSymbol}. 
-##' @param ntargets The number of targets to display. 
-##' @param enrich Logical indicating whether to display guides that are enriched (default) or depleted within the screen. 
+##' @param targets Either the number of top targets to display, or a list of \code{geneSymbol}s contained in the \code{geneSymbol} 
+##' slot of the \code{annotation} object. 
+##' @param enrich Logical indicating whether to display guides that are enriched (default) or depleted within the screen. If a vector of 
+##' \code{geneSymbol}s is specified, this controls the left-t0-right ordering of the corresponding gRNAs. 
 ##' @param contrast.term If a fit object with multiple coefficients is passed in, a string indiating the coefficient of interest.   
 ##' @return An image on the default device indicating each gRNA's log2 fold change and the unscaled standard deviation of the effect estimate, 
 ##' derived from the \code{MArrayLM} object.
@@ -27,7 +27,7 @@
 ##' ct.topTargets(fit, resultsDF, ann) 
 ##' @export
 
-ct.topTargets <- function(fit, summaryDF, annotation, ntargets = 10, enrich = TRUE, contrast.term = NULL){
+ct.topTargets <- function(fit, summaryDF, annotation, targets = 10, enrich = TRUE, contrast.term = NULL){
   current.graphic.params <- par(no.readonly = TRUE)
   on.exit(suppressWarnings(par(current.graphic.params)))
 
@@ -42,14 +42,14 @@ ct.topTargets <- function(fit, summaryDF, annotation, ntargets = 10, enrich = TR
     #Test input: 
     #testing
     if(class(fit) != "MArrayLM"){stop(paste(deparse(substitute(eset)), "is not an MArrayLM."))}
-    if(ntargets <= 0){stop("Please specify a positive number of targets to display.")}
+
     if(!setequal(row.names(annotation), row.names(fit))){
       warning("row.names of the fit object and the annotation file are not identical. Using the intersection only.")
         grnas <- intersect(row.names(fit), row.names(annotation))
         fit <- fit[grnas,]
         annotation <- annotation[grnas,]
     }
-    
+
   if(!(enrich %in% c(TRUE, FALSE))){
     stop('enrich must be either TRUE or FALSE.')
   }
@@ -63,21 +63,32 @@ ct.topTargets <- function(fit, summaryDF, annotation, ntargets = 10, enrich = TR
     summaryDF$geneSymbol <- as.character(summaryDF$geneSymbol)
     summaryDF <- summaryDF[with(summaryDF, 
                                order(summaryDF[,"Target-level Enrichment P"], 
-                                      -summaryDF[,"RhoRank_enrich"], 
+                                      summaryDF[,"RhoRank_enrich"], 
                                       summaryDF[,"geneSymbol"], 
                                       -summaryDF[,"gRNA Log2 Fold Change"])),]   
-    plottitle <- paste0("Top ", ntargets, " Enriched Targets")
+    plottitle <- "Enriched Targets"
 
     if(enrich == FALSE){
       summaryDF <- summaryDF[with(summaryDF, 
                                   order(summaryDF[,"Target-level Depletion P"], 
                                         summaryDF[,"RhoRank_deplete"], 
                                         summaryDF[,"geneSymbol"], 
                                         summaryDF[,"gRNA Log2 Fold Change"])),]   
-      plottitle <- paste0("Top ", ntargets, " Depleted Targets")
+      plottitle <- "Depleted Targets"
     }
 
-    toptargets <- unique(summaryDF$geneSymbol)[1:ntargets]
+    if(is.character(targets)){
+      toptargets <- intersect(targets, annotation$geneSymbol)
+      ntargets <- length(toptargets)
+      plottitle <- ''
+    } else {
+      if((length(targets) != 1) | !is.numeric(targets)){stop('"targets" must be specified as a single number or a vector of elements in the geneSymbol column of the annotation object.')}
+      ntargets <- targets
+      plottitle <- paste('Top', ntargets, plottitle)
+      toptargets <- unique(summaryDF$geneSymbol)[1:ntargets]
+    } 
+    if(ntargets <= 0){stop("No valid targets were specified.")}
+
     targetrows <- row.names(summaryDF)[(summaryDF$geneSymbol %in% toptargets)]
     nguides <- unlist(lapply(toptargets, function(x){sum(summaryDF$geneSymbol %in% x, na.rm = TRUE)}))
 

diff --git a/R/gCrisprTools.R b/R/gCrisprTools.R
@@ -13,19 +13,15 @@ NULL
 ##' @import parallel
 ##' @import BiocParallel
 ##' @import PANTHER.db
-##' @import grDevices
-##' @import graphics
-##' @import stats 
-##' @import utils
-NULL
-
 ##' @importFrom grDevices colorRampPalette rgb
 ##' @importFrom graphics abline axis barplot layout legend lines mtext par plot points polygon segments hist
 ##' @importFrom stats density lm median na.omit p.adjust pbeta phyper predict pt smooth.spline spline
 ##' @importFrom utils capture.output getFromNamespace 
 ##' @importFrom rmarkdown render draft 
 NULL
 
+
+
 ##' @name es
 ##' @aliases es
 ##' @docType data

diff --git a/inst/gCrisprTools_package_design.pptx b/inst/gCrisprTools_package_design.pptx
diff --git a/inst/rmarkdown/templates/CRISPR_QC_report/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/CRISPR_QC_report/skeleton/skeleton.Rmd
@@ -60,6 +60,29 @@ ct.rawCountDensities(eset = params$eset, sampleKey = params$sampleKey)
 <br/>
 <br/>
 
+#####GC Content and gRNA Abundance
+
+GC content can influence PCR efficiency, and a strong relationshib between GC content and gRNA abundance may be evidence of poor viral library quality or sample preparation in Crispr screens. Ideally, there should be no clear relationship between GC content and a gRNA's measured abundance. Keep in mind that some libraries may contain important subsets of gRNAs that have systematic differences in GC content (e.g., nontargeting controls) which may affect this.  
+
+```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.height=10, fig.width=10, fig.cap="Figure 3: GC content and gRNA Abundance"}
+if (is.null(params$sampleKey)) {
+  ct.GCbias(data.obj = params$eset, 
+            ann = params$annotation, 
+            lib.size = params$lib.size
+            )
+} else {
+   ct.GCbias(data.obj = params$eset, 
+            ann = params$annotation, 
+            lib.size = params$lib.size,
+            sampleKey = params$sampleKey
+            )
+}
+```
+
+<br/>
+<br/>
+<br/>
+
 ###Section 2: Library-level Distortion of gRNA Abundances
 
 Pooled CRISPR screens are fundamentally dynamic experiments, in which the
@@ -84,7 +107,7 @@ may have failed if library-level distortion is not readily apparent. The
 locations of nontargeting control guides are indicated as diamonds along each
 distribution.
 
-```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.height=10, fig.width=10, fig.cap= "Figure 3: Ranked gRNA Distributions by Replicate"}
+```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.height=10, fig.width=10, fig.cap= "Figure 4: Ranked gRNA Distributions by Replicate"}
 ct.gRNARankByReplicate(
   eset = params$eset, 
   sampleKey = params$sampleKey, 
@@ -109,7 +132,7 @@ of reads that the top *N* targets represent (*Y* axis), we can tell if the
 treatment libraries essentially only contain cassettes targeting a small number
 of genes, or if most of the cells survive the selection step.
 
-```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.height=10, fig.width=10, fig.cap= "Figure 4: Per-Replicate gRNA Cumulative Distributions"}
+```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.height=10, fig.width=10, fig.cap= "Figure 5: Per-Replicate gRNA Cumulative Distributions"}
 ct.guideCDF(
   eset = params$eset,
   sampleKey = params$sampleKey,
@@ -132,7 +155,7 @@ necessarily critical that the relative abundances of nontargeting gRNAs remain
 superficially identical across treatments, but extremely large changes may
 indicate that individual gRNA levels are not likely to be well estimated.
 
-```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.height=10, fig.width=10, fig.cap="Figure 5: Nontargeting Control Behavior Before and After Median Scaling"}
+```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.height=10, fig.width=10, fig.cap="Figure 6: Nontargeting Control Behavior Before and After Median Scaling"}
 tryCatch(
   ct.viewControls(
     eset = params$eset,
@@ -176,7 +199,7 @@ the samples are not of the expected shape it may be necessary to rerun the filte
 with different parameter choices (see `ct.filterReads()` manual page for details). 
 
 
-```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.height=20, fig.width=10, fig.cap="Figure 6: gRNA Density Before and After Filtering with the Default Parameters"}
+```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.height=20, fig.width=10, fig.cap="Figure 7: gRNA Density Before and After Filtering with the Default Parameters"}
 filt.eset <- ct.filterReads(
   eset = params$eset,
   trim = params$trim,