Skip to content

Commit

Permalink
Export helper functions tidyTranscripts, tidyExons, and tidyIntrons
Browse files Browse the repository at this point in the history
Also document them.

Note that before being exported these functions were named
.tidy_transcripts, .tidy_exons, and .tidy_introns, respectively.
  • Loading branch information
hpages committed Dec 5, 2018
1 parent f21fb9a commit 0b596c1
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 8 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,6 +1,6 @@
Package: GenomicFeatures
Title: Tools for making and manipulating transcript centric annotations
Version: 1.35.3
Version: 1.35.4
Encoding: UTF-8
Author: M. Carlson, H. Pagès, P. Aboyoun, S. Falcon, M. Morgan,
D. Sarkar, M. Lawrence, V. Obenchain
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -90,6 +90,7 @@ export(
transcriptLengths,

## exonicParts.R:
tidyTranscripts, tidyExons, tidyIntrons,
exonicParts, intronicParts,

## disjointExons.R:
Expand Down
23 changes: 16 additions & 7 deletions R/exonicParts.R
Expand Up @@ -7,13 +7,17 @@
###


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### 3 helper functions used internally by exonicParts() and intronicParts()
###

### Return a GRanges object with 1 range per transcript and metadata columns
### tx_id, tx_name, and gene_id.
### If 'drop.geneless' is FALSE (the default) then the transcripts are
### returned in the same order as with transcripts(), which is expected
### to be by transcript id (tx_id). Otherwise they are ordered first by
### gene id (gene_id), then by transcript id.
.tidy_transcripts <- function(txdb, drop.geneless=FALSE)
tidyTranscripts <- function(txdb, drop.geneless=FALSE)
{
tx <- transcripts(txdb, columns=c("tx_id", "tx_name", "gene_id"))
mcols(tx)$gene_id <- as.character(mcols(tx)$gene_id)
Expand Down Expand Up @@ -51,9 +55,9 @@
### by transcript id (tx_id), then by exon rank (exon_rank). Otherwise they
### are ordered first by gene id (gene_id), then by transcript id, and then
### by exon rank.
.tidy_exons <- function(txdb, drop.geneless=FALSE)
tidyExons <- function(txdb, drop.geneless=FALSE)
{
tx <- .tidy_transcripts(txdb, drop.geneless=drop.geneless)
tx <- tidyTranscripts(txdb, drop.geneless=drop.geneless)
ex_by_tx <- .exons_by_txids(txdb, mcols(tx)$tx_id)

ans <- unlist(ex_by_tx, use.names=FALSE)
Expand All @@ -67,9 +71,9 @@
### If 'drop.geneless' is FALSE (the default) then the introns are ordered
### by transcript id (tx_id). Otherwise they are ordered first by gene id
### (gene_id), then by transcript id.
.tidy_introns <- function(txdb, drop.geneless=FALSE)
tidyIntrons <- function(txdb, drop.geneless=FALSE)
{
tx <- .tidy_transcripts(txdb, drop.geneless=drop.geneless)
tx <- tidyTranscripts(txdb, drop.geneless=drop.geneless)
ex_by_tx <- .exons_by_txids(txdb, mcols(tx)$tx_id)

introns_by_tx <- psetdiff(tx, ex_by_tx)
Expand Down Expand Up @@ -99,14 +103,19 @@
ans
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### exonicParts() and intronicParts()
###

### Return a disjoint and strictly sorted GRanges object with 1 range per
### exonic part and with metadata columns tx_id, tx_name, gene_id, exon_id,
### exon_name, and exon_rank.
exonicParts <- function(txdb, linked.to.single.gene.only=FALSE)
{
if (!isTRUEorFALSE(linked.to.single.gene.only))
stop("'linked.to.single.gene.only' must be TRUE or FALSE")
ex <- .tidy_exons(txdb, drop.geneless=linked.to.single.gene.only)
ex <- tidyExons(txdb, drop.geneless=linked.to.single.gene.only)
.break_in_parts(ex, linked.to.single.gene.only)
}

Expand All @@ -116,7 +125,7 @@ intronicParts <- function(txdb, linked.to.single.gene.only=FALSE)
{
if (!isTRUEorFALSE(linked.to.single.gene.only))
stop("'linked.to.single.gene.only' must be TRUE or FALSE")
introns <- .tidy_introns(txdb, drop.geneless=linked.to.single.gene.only)
introns <- tidyIntrons(txdb, drop.geneless=linked.to.single.gene.only)
.break_in_parts(introns, linked.to.single.gene.only)
}

66 changes: 66 additions & 0 deletions man/exonicParts.Rd
@@ -1,5 +1,8 @@
\name{exonicParts}

\alias{tidyTranscripts}
\alias{tidyExons}
\alias{tidyIntrons}
\alias{exonicParts}
\alias{intronicParts}

Expand All @@ -15,6 +18,11 @@
\usage{
exonicParts(txdb, linked.to.single.gene.only=FALSE)
intronicParts(txdb, linked.to.single.gene.only=FALSE)

## 3 helper functions used internally by exonicParts() and intronicParts():
tidyTranscripts(txdb, drop.geneless=FALSE)
tidyExons(txdb, drop.geneless=FALSE)
tidyIntrons(txdb, drop.geneless=FALSE)
}

\arguments{
Expand All @@ -40,6 +48,33 @@ intronicParts(txdb, linked.to.single.gene.only=FALSE)
the set of exonic (or intronic) parts obtained previously.
}
}
\item{drop.geneless}{
If \code{FALSE} (the default), then all the transcripts (or exons, or
introns) get extracted from the \link{TxDb} object.

If \code{TRUE}, then only the transcripts (or exons, or introns) that
are linked to a gene get extracted from the \link{TxDb} object.

Note that \code{drop.geneless} also impacts the order in which the
features are returned:
\itemize{
\item Transcripts: If \code{drop.geneless} is \code{FALSE} then
transcripts are returned in the same order as with
\code{\link{transcripts}}, which is expected to be by
internal transcript id (\code{tx_id}).
Otherwise they are ordered first by gene id (\code{gene_id}),
then by internal transcript id.
\item Exons: If \code{drop.geneless} is \code{FALSE} then exons are
ordered first by internal transcript id (\code{tx_id}),
then by exon rank (\code{exon_rank}).
Otherwise they are ordered first by gene id (\code{gene_id}),
then by internal transcript id, and then by exon rank.
\item Introns: If \code{drop.geneless} is \code{FALSE} then introns
are ordered by internal transcript id (\code{tx_id}).
Otherwise they are ordered first by gene id (\code{gene_id}),
then by internal transcript id.
}
}
}

\value{
Expand All @@ -51,6 +86,19 @@ intronicParts(txdb, linked.to.single.gene.only=FALSE)
\code{intronicParts} returns a disjoint and strictly sorted
\link[GenomicRanges]{GRanges} object with 1 range per intronic part
and with metadata columns \code{tx_id}, \code{tx_name}, and \code{gene_id}.

\code{tidyTranscripts} returns a \link[GenomicRanges]{GRanges} object
with 1 range per transcript and with metadata columns \code{tx_id},
\code{tx_name}, and \code{gene_id}.

\code{tidyExons} returns a \link[GenomicRanges]{GRanges} object
with 1 range per exon and with metadata columns \code{tx_id},
\code{tx_name}, \code{gene_id}, \code{exon_id}, \code{exon_name},
and \code{exon_rank}.

\code{tidyIntrons} returns a \link[GenomicRanges]{GRanges} object
with 1 range per intron and with metadata columns \code{tx_id},
\code{tx_name}, and \code{gene_id}.
}

\note{
Expand Down Expand Up @@ -158,6 +206,24 @@ stopifnot(identical(
lengths(mcols(intronic_parts1)$gene_id) == 1L,
intronic_parts1 \%within\% intronic_parts2
))

## ---------------------------------------------------------------------
## Helper functions
## ---------------------------------------------------------------------

tidyTranscripts(txdb) # Ordered by 'tx_id'.
tidyTranscripts(txdb, drop.geneless=TRUE) # Ordered first by 'gene_id',
# then by 'tx_id'.

tidyExons(txdb) # Ordered first by 'tx_id',
# then by 'exon_rank'.
tidyExons(txdb, drop.geneless=TRUE) # Ordered first by 'gene_id',
# then by 'tx_id',
# then by 'exon_rank'.

tidyIntrons(txdb) # Ordered by 'tx_id'.
tidyIntrons(txdb, drop.geneless=TRUE) # Ordered first by 'gene_id',
# then by 'tx_id'.
}

\keyword{manip}

0 comments on commit 0b596c1

Please sign in to comment.