Export helper functions tidyTranscripts, tidyExons, and tidyIntrons

Also document them. Note that before being exported these functions were named .tidy_transcripts, .tidy_exons, and .tidy_introns, respectively.
Bioconductor · Dec 5, 2018 · 0b596c1 · 0b596c1
1 parent f21fb9a
commit 0b596c1
Show file tree

Hide file tree

Showing 4 changed files with 84 additions and 8 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: GenomicFeatures
 Title: Tools for making and manipulating transcript centric annotations
-Version: 1.35.3
+Version: 1.35.4
 Encoding: UTF-8
 Author: M. Carlson, H. Pagès, P. Aboyoun, S. Falcon, M. Morgan,
 	D. Sarkar, M. Lawrence, V. Obenchain

diff --git a/NAMESPACE b/NAMESPACE
@@ -90,6 +90,7 @@ export(
   transcriptLengths,
 
   ## exonicParts.R:
+  tidyTranscripts, tidyExons, tidyIntrons,
   exonicParts, intronicParts,
 
   ## disjointExons.R:

diff --git a/R/exonicParts.R b/R/exonicParts.R
@@ -7,13 +7,17 @@
 ###
 
 
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### 3 helper functions used internally by exonicParts() and intronicParts()
+###
+
 ### Return a GRanges object with 1 range per transcript and metadata columns
 ### tx_id, tx_name, and gene_id.
 ### If 'drop.geneless' is FALSE (the default) then the transcripts are
 ### returned in the same order as with transcripts(), which is expected
 ### to be by transcript id (tx_id). Otherwise they are ordered first by
 ### gene id (gene_id), then by transcript id.
-.tidy_transcripts <- function(txdb, drop.geneless=FALSE)
+tidyTranscripts <- function(txdb, drop.geneless=FALSE)
 {
     tx <- transcripts(txdb, columns=c("tx_id", "tx_name", "gene_id"))
     mcols(tx)$gene_id <- as.character(mcols(tx)$gene_id)
@@ -51,9 +55,9 @@
 ### by transcript id (tx_id), then by exon rank (exon_rank). Otherwise they
 ### are ordered first by gene id (gene_id), then by transcript id, and then
 ### by exon rank.
-.tidy_exons <- function(txdb, drop.geneless=FALSE)
+tidyExons <- function(txdb, drop.geneless=FALSE)
 {
-    tx <- .tidy_transcripts(txdb, drop.geneless=drop.geneless)
+    tx <- tidyTranscripts(txdb, drop.geneless=drop.geneless)
     ex_by_tx <- .exons_by_txids(txdb, mcols(tx)$tx_id)
 
     ans <- unlist(ex_by_tx, use.names=FALSE)
@@ -67,9 +71,9 @@
 ### If 'drop.geneless' is FALSE (the default) then the introns are ordered
 ### by transcript id (tx_id). Otherwise they are ordered first by gene id
 ### (gene_id), then by transcript id.
-.tidy_introns <- function(txdb, drop.geneless=FALSE)
+tidyIntrons <- function(txdb, drop.geneless=FALSE)
 {
-    tx <- .tidy_transcripts(txdb, drop.geneless=drop.geneless)
+    tx <- tidyTranscripts(txdb, drop.geneless=drop.geneless)
     ex_by_tx <- .exons_by_txids(txdb, mcols(tx)$tx_id)
 
     introns_by_tx <- psetdiff(tx, ex_by_tx)
@@ -99,14 +103,19 @@
     ans
 }
 
+
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+### exonicParts() and intronicParts()
+###
+
 ### Return a disjoint and strictly sorted GRanges object with 1 range per
 ### exonic part and with metadata columns tx_id, tx_name, gene_id, exon_id,
 ### exon_name, and exon_rank.
 exonicParts <- function(txdb, linked.to.single.gene.only=FALSE)
 {
     if (!isTRUEorFALSE(linked.to.single.gene.only))
         stop("'linked.to.single.gene.only' must be TRUE or FALSE")
-    ex <- .tidy_exons(txdb, drop.geneless=linked.to.single.gene.only)
+    ex <- tidyExons(txdb, drop.geneless=linked.to.single.gene.only)
     .break_in_parts(ex, linked.to.single.gene.only)
 }
 
@@ -116,7 +125,7 @@ intronicParts <- function(txdb, linked.to.single.gene.only=FALSE)
 {
     if (!isTRUEorFALSE(linked.to.single.gene.only))
         stop("'linked.to.single.gene.only' must be TRUE or FALSE")
-    introns <- .tidy_introns(txdb, drop.geneless=linked.to.single.gene.only)
+    introns <- tidyIntrons(txdb, drop.geneless=linked.to.single.gene.only)
     .break_in_parts(introns, linked.to.single.gene.only)
 }
 
diff --git a/man/exonicParts.Rd b/man/exonicParts.Rd
@@ -1,5 +1,8 @@
 \name{exonicParts}
 
+\alias{tidyTranscripts}
+\alias{tidyExons}
+\alias{tidyIntrons}
 \alias{exonicParts}
 \alias{intronicParts}
 
@@ -15,6 +18,11 @@
 \usage{
 exonicParts(txdb, linked.to.single.gene.only=FALSE)
 intronicParts(txdb, linked.to.single.gene.only=FALSE)
+
+## 3 helper functions used internally by exonicParts() and intronicParts():
+tidyTranscripts(txdb, drop.geneless=FALSE)
+tidyExons(txdb, drop.geneless=FALSE)
+tidyIntrons(txdb, drop.geneless=FALSE)
 }
 
 \arguments{
@@ -40,6 +48,33 @@ intronicParts(txdb, linked.to.single.gene.only=FALSE)
             the set of exonic (or intronic) parts obtained previously.
     }
   }
+  \item{drop.geneless}{
+    If \code{FALSE} (the default), then all the transcripts (or exons, or
+    introns) get extracted from the \link{TxDb} object.
+
+    If \code{TRUE}, then only the transcripts (or exons, or introns) that
+    are linked to a gene get extracted from the \link{TxDb} object.
+
+    Note that \code{drop.geneless} also impacts the order in which the
+    features are returned:
+    \itemize{
+      \item Transcripts: If \code{drop.geneless} is \code{FALSE} then
+            transcripts are returned in the same order as with
+            \code{\link{transcripts}}, which is expected to be by
+            internal transcript id (\code{tx_id}).
+            Otherwise they are ordered first by gene id (\code{gene_id}),
+            then by internal transcript id.
+      \item Exons: If \code{drop.geneless} is \code{FALSE} then exons are
+            ordered first by internal transcript id (\code{tx_id}),
+            then by exon rank (\code{exon_rank}).
+            Otherwise they are ordered first by gene id (\code{gene_id}),
+            then by internal transcript id, and then by exon rank.
+      \item Introns: If \code{drop.geneless} is \code{FALSE} then introns
+            are ordered by internal transcript id (\code{tx_id}).
+            Otherwise they are ordered first by gene id (\code{gene_id}),
+            then by internal transcript id.
+    }
+  }
 }
 
 \value{
@@ -51,6 +86,19 @@ intronicParts(txdb, linked.to.single.gene.only=FALSE)
   \code{intronicParts} returns a disjoint and strictly sorted
   \link[GenomicRanges]{GRanges} object with 1 range per intronic part
   and with metadata columns \code{tx_id}, \code{tx_name}, and \code{gene_id}.
+
+  \code{tidyTranscripts} returns a \link[GenomicRanges]{GRanges} object
+  with 1 range per transcript and with metadata columns \code{tx_id},
+  \code{tx_name}, and \code{gene_id}.
+
+  \code{tidyExons} returns a \link[GenomicRanges]{GRanges} object
+  with 1 range per exon and with metadata columns \code{tx_id},
+  \code{tx_name}, \code{gene_id}, \code{exon_id}, \code{exon_name},
+  and \code{exon_rank}.
+
+  \code{tidyIntrons} returns a \link[GenomicRanges]{GRanges} object
+  with 1 range per intron and with metadata columns \code{tx_id},
+  \code{tx_name}, and \code{gene_id}.
 }
 
 \note{
@@ -158,6 +206,24 @@ stopifnot(identical(
     lengths(mcols(intronic_parts1)$gene_id) == 1L,
     intronic_parts1 \%within\% intronic_parts2
 ))
+
+## ---------------------------------------------------------------------
+## Helper functions
+## ---------------------------------------------------------------------
+
+tidyTranscripts(txdb)                      # Ordered by 'tx_id'.
+tidyTranscripts(txdb, drop.geneless=TRUE)  # Ordered first by 'gene_id',
+                                           # then by 'tx_id'.
+
+tidyExons(txdb)                            # Ordered first by 'tx_id',
+                                           # then by 'exon_rank'.
+tidyExons(txdb, drop.geneless=TRUE)        # Ordered first by 'gene_id',
+                                           # then by 'tx_id',
+                                           # then by 'exon_rank'.
+
+tidyIntrons(txdb)                          # Ordered by 'tx_id'.
+tidyIntrons(txdb, drop.geneless=TRUE)      # Ordered first by 'gene_id',
+                                           # then by 'tx_id'.
 }
 
 \keyword{manip}