Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added bedTracks function (incl. tests) #39

Merged
merged 2 commits into from
Mar 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ Unreleased
------------
NEW FEATURES

* new bedTracks()/writeBedTracks() functions generating BED6-style
GRangesList/writing BED files for circRNA browser tracks

IMPROVEMENTS AND BUG FIXES


Expand Down
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Imports:
IRanges,
RColorBrewer,
RMySQL,
rtracklayer,
S4Vectors,
stringr,
SummarizedExperiment
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(annotateCircs)
export(annotateFlanks)
export(annotateHostGenes)
export(annotateJunctions)
export(bedTracks)
export(circLinRatio)
export(getStudiesList)
export(gtf2sqlite)
Expand All @@ -18,6 +19,7 @@ export(resTable)
export(summarizeCircs)
export(testCoordinateIndexing)
export(uniqReadsQC)
export(writeBedTracks)
import(AnnotationHub)
import(DBI)
import(RMySQL)
Expand All @@ -29,6 +31,7 @@ import(stringr)
importFrom(AnnotationDbi,loadDb)
importFrom(AnnotationDbi,saveDb)
importFrom(GenomicFeatures,makeTxDbFromGRanges)
importFrom(GenomicRanges,GRangesList)
importFrom(GenomicRanges,makeGRangesFromDataFrame)
importFrom(GenomicRanges,reduce)
importFrom(GenomicRanges,resize)
Expand All @@ -45,3 +48,5 @@ importFrom(data.table,set)
importFrom(data.table,setnames)
importFrom(hash,hash)
importFrom(hash,keys)
importFrom(rtracklayer,export)
importFrom(rtracklayer,score)
3 changes: 2 additions & 1 deletion R/circus.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@
#' @importFrom biomaRt useMart useDataset getBM
#' @importFrom data.table data.table rbindlist dcast.data.table set setnames fread
#' @import DBI
#' @importFrom GenomicRanges makeGRangesFromDataFrame resize reduce
#' @importFrom GenomicRanges makeGRangesFromDataFrame resize reduce GRangesList
#' @importFrom GenomicFeatures makeTxDbFromGRanges
#' @import ggplot2
#' @importFrom hash hash keys
#' @importFrom IRanges findOverlaps
#' @import methods
#' @importFrom RColorBrewer brewer.pal
#' @import RMySQL
#' @importFrom rtracklayer export score
#' @import S4Vectors
#' @import stringr
#' @import SummarizedExperiment
Expand Down
201 changes: 201 additions & 0 deletions R/exportData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
# ---------------------------------------------------------------------------- #
#' bedTracks
#'
#' Export circRNA information from given \code{SummarizedExperiment} object to
#' \code{BED6}-style \code{GRangesList}.
#' This is useful to generate genome browser tracks showing circRNA candidates
#' per sample.
#'
#'
#' @param se \code{SummarizedExperiment} object with circRNA information
#' @param score \code{character} vector (only first elemen will be used) naming
#' the assay to use as \code{BED} score (\code{NULL} to omit BED
#' score and use `.` as a placeholder instead)
#' @param min.score \code{numeric} vector (only first element will be used) with
#' minimal score a circRNA must have to be included in the
#' \code{BED} output for a given sample (\code{NULL} to include
#' all circRNAs for all samples, even if they were not detected
#' in that sample)
#' @param max.score \code{numeric} vector (only first element will be used) with
#' maximal score to be used in \code{BED} output (higher score
#' will be truncated) (\code{NULL} to keep scores unlimited,
#" ignoring the \code{BED} format definition)
#'
#' @return returns a GRangesList object with BED6 circRNA data per sample
#'
#'
#' @docType methods
#' @rdname bedTracks-methods
#'
#' @export
setGeneric("bedTracks",
function(se,
score = "circ.uniq",
min.score = unlist(ifelse(is.null(score), list(NULL),
list(1))),
max.score = unlist(ifelse(is.null(score), list(NULL),
list(1000))))
standardGeneric("bedTracks"))

#' @aliases bedTracks,RangedSummarizedExperiment-method
#' @rdname bedTracks-methods
setMethod("bedTracks",
signature("RangedSummarizedExperiment"),
definition = function(se, score, min.score, max.score) {

# check input
if (length(score) > 1) {
warning("length(score) > 1; only first entry will be used")
score <- score[1]
}
if (length(min.score) > 1) {
warning("length(min.score) > 1; only first entry will be used")
min.score <- min.score[1]
}
if (length(max.score) > 1) {
warning("length(max.score) > 1; only first entry will be used")
max.score <- max.score[1]
}
if (!is.null(score)) {
if (!(score %in% names(assays(se)))) {
warning(paste0("no assay named '", score, "'",
"; BED output will be generated without scores"))
score <- NULL
}
}
if (is.null(score) & !is.null(min.score)) {
warning("no BED score defined; circRNAs will not be filtered")
min.score <- NULL
}
if (is.null(score) & !is.null(max.score)) {
warning("no BED score defined; BED scores will not be truncated")
min.score <- NULL
}

# get circRNA coordinates
ranges <- rowRanges(se)

# drop metadata columns
mcols(ranges) <- NULL

# get score matrix (circRNA x sample)
if (!is.null(score)) {
scores <- assays(se)[[score]]

# use `.` as score of no score assay was specified (1 x sample)
} else {
message("no BED score defined; using `.` as placeholder")
scores <- matrix(rep(".", ncol(se)), nrow = 1)
colnames(scores) <- colnames(se)
}

# setup GRangesList with circRNA coordinates scored per sample
sample.names <- colnames(scores)
ranges <- lapply(sample.names,
function(sample.name) {
mcols(ranges)$score <- scores[, sample.name]
ranges
})
names(ranges) <- sample.names
ranges <- GRangesList(ranges)

# filter out circRNA not passing the minimal score (per sample)
if (!is.null(min.score))
ranges <- endoapply(ranges,
function(gr) {
subset(gr,
rtracklayer::score(gr) >= min.score)
})

# truncate score at given maximum
if (!is.null(max.score))
ranges <- endoapply(ranges,
function(gr) {
mcols(gr)$score[rtracklayer::score(gr) >
max.score] <- max.score
gr
})

# return GRangesList
return(ranges)
})


# ---------------------------------------------------------------------------- #
#' writeBedTracks
#'
#' Write circRNA information from given \code{BED6}-style \code{GRangesList} (or
#' \code{SummarizedExperiment} object) to \code{BED} files (one per sample).
#' These can be loaded as genome browser tracks showing circRNA candidates.
#'
#'
#' @param circs \code{GRangesList} or \code{SummarizedExperiment} object with
#' circRNA information
#' @param out.prefix \code{character} vector (only first elemen will be used)
#' specifying the prefix to use for output files (before the
#' sample name)
#' @param out.suffix \code{character} vector (only first elemen will be used)
#' specifying the suffix to use for output files (after the
#' sample name)
#' @param seqlevels.style \code{character} vector (only first element will be
#' used) specifying the seqlevels style to use for the
#' output BED files
#' @param ... named arguments defined above to be passed on from
#' \{code{SummarizedExperiment}-method to \code{GRangesList}-method
#'
#' @return None
#'
#'
#' @docType methods
#' @rdname writeBedTracks-methods
#'
#' @export
setGeneric("writeBedTracks",
function(circs, ...)
standardGeneric("writeBedTracks"))

#' @aliases writeBedTracks,GRangesList-method
#' @rdname writeBedTracks-methods
setMethod("writeBedTracks",
signature("GRangesList"),
definition = function(circs,
out.prefix = "ciRcus_",
out.suffix = ".bed",
seqlevels.style = "UCSC") {

# check input
if (length(seqlevels.style) > 1){
warning(paste("length(seqlevels.style) > 1;",
"only first entry will be used"))
seqlevels.style <- seqlevels.style[1]
}

# adjust seqlevels style
if (!is.null(seqlevels.style)) {
if (!(seqlevels.style) %in% seqlevelsStyle(circs)){
message(paste0("Changing seqlevels style to '", seqlevels.style,
"' for BED track output file",
ifelse(length(circs) > 1, "s", ""), "."))
seqlevelsStyle(circs) <- seqlevels.style
}
}

# export GRanges to BED files one sample at a time
for (sample.name in names(circs)) {
export(circs[[sample.name]],
con = paste0(out.prefix, sample.name, out.suffix),
format = "BED"
)
}
})

#' @aliases writeBedTracks,RangedSummarizedExperiment-method
#' @rdname writeBedTracks-methods
setMethod("writeBedTracks",
signature("RangedSummarizedExperiment"),
definition = function(circs, ...) {

# extract BED6-style GRangesList from SummarizedExperiment and
# write BED tracks based on that GRangesList
writeBedTracks(bedTracks(circs), ...)
})
42 changes: 42 additions & 0 deletions man/bedTracks-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions man/writeBedTracks-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading