Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ export(schema)
export(selected_ranges)
export(set_max_chunk_size)
export(set_return_as_preference)
export(tdb_collect)
export(tdb_filter)
export(tdb_select)
export(tile)
export(tile_order)
export(tiledb_array)
Expand Down Expand Up @@ -195,7 +198,6 @@ export(tiledb_query_status)
export(tiledb_query_submit)
export(tiledb_query_submit_async)
export(tiledb_query_type)
export(tiledb_schema_get_dim_attr_status)
export(tiledb_schema_get_names)
export(tiledb_schema_get_types)
export(tiledb_set_context)
Expand Down Expand Up @@ -295,6 +297,9 @@ exportMethods(schema)
exportMethods(selected_ranges)
exportMethods(set_max_chunk_size)
exportMethods(show)
exportMethods(tdb_collect)
exportMethods(tdb_filter)
exportMethods(tdb_select)
exportMethods(tile)
exportMethods(tile_order)
exportMethods(tiledb_ndim)
Expand Down
3 changes: 2 additions & 1 deletion R/ArraySchema.R
Original file line number Diff line number Diff line change
Expand Up @@ -497,10 +497,11 @@ tiledb_schema_get_types <- function(sch) {

##' Get Dimension or Attribute Status
##'
##' Note that this function is an unexported internal function.
##'
##' @param sch A TileDB Schema object
##' @return An integer vector where each element corresponds to a schema entry,
##' and a value of one signals dimension and a value of two an attribute.
##' @export
tiledb_schema_get_dim_attr_status <- function(sch) {
stopifnot(`Argument must be a schema` = is(sch, "tiledb_array_schema"))
dom <- tiledb::domain(sch)
Expand Down
36 changes: 28 additions & 8 deletions R/QueryCondition.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,22 @@ tiledb_query_condition_combine <- function(lhs, rhs, op) {
#'
#' @param expr An expression that is understood by the TileDB grammar for
#' query conditions.
#' @param ta An optional tiledb_array object that the query condition is applied to
#' @param debug A boolean toogle to enable more verbose operations, defaults
#' to 'FALSE'.
#' @param strict A boolean toogle to, if set, errors if a non-existing attribute is selected
#' or filtered on, defaults to 'TRUE'; if 'FALSE' a warning is shown by execution proceeds.
#' @return A `tiledb_query_condition` object
#' @export
parse_query_condition <- function(expr, debug=FALSE) {
parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE) {
.hasArray <- !is.null(ta) && is(ta, "tiledb_array")
if (.hasArray && length(ta@sil) == 0) ta@sil <- .fill_schema_info_list(ta@uri)
.isComparisonOperator <- function(x) as.character(x) %in% c(">", ">=", "<", "<=", "==", "!=")
.isBooleanOperator <- function(x) as.character(x) %in% c("&&", "||", "!")
.isAscii <- function(x) grepl("^[[:alnum:]_]+$", x)
.isInteger <- function(x) grepl("^[[:digit:]]+$", as.character(x))
.isDouble <- function(x) grepl("^[[:digit:]\\.]+$", as.character(x)) && length(grepRaw(".", as.character(x), fixed = TRUE, all = TRUE)) == 1
.errorFunction <- if (strict) stop else warning
.getType <- function(x) {
if (isTRUE(.isInteger(x))) "INT32"
else if (isTRUE(.isDouble(x))) "FLOAT64"
Expand All @@ -126,7 +132,7 @@ parse_query_condition <- function(expr, debug=FALSE) {
`&&` = "AND",
`||` = "OR",
`!` = "NOT")
.makeExpr <- function(x) {
.makeExpr <- function(x, debug=FALSE) {
if (is.symbol(x)) {
stop("Unexpected symbol in expression: ", format(x))
} else if (.isBooleanOperator(x[1])) {
Expand All @@ -140,20 +146,34 @@ parse_query_condition <- function(expr, debug=FALSE) {
.mapBoolToCharacter(as.character(x[1])))

} else if (.isComparisonOperator(x[1])) {
if (debug) cat(" [",as.character(x[2]),"] ",
as.character(x[1]), " (aka ", .mapOpToCharacter(as.character(x[1])), ")",
" [",as.character(x[3]), "] ", .getType(x[3]), "\n", sep="")
op <- as.character(x[1])
attr <- as.character(x[2])
ch <- as.character(x[3])
dtype <- .getType(ch)
tiledb_query_condition_init(attr = as.character(x[2]), # still need to check again schema
if (.hasArray) {
ind <- match(attr, ta@sil$names)
if (!is.finite(ind)) {
.errorFunction("No attibute '", attr, "' present.", call. = FALSE)
return(NULL)
}
if (ta@sil$status[ind] != 2) {
.errorFunction("Argument '", attr, "' is not an attribute.", call. = FALSE)
return(NULL)
}
dtype <- ta@sil$types[ind]
}
if (debug) cat(" [", attr,"] ",
op, " (aka ", .mapOpToCharacter(op), ")",
" [",ch, "] ", dtype, "\n", sep="")
tiledb_query_condition_init(attr = attr,
value = if (dtype == "ASCII") ch else as.numeric(ch),
dtype = dtype,
op = .mapOpToCharacter(as.character(x[1])))
op = .mapOpToCharacter(op))
} else {
stop("Unexpected token in expression: ", format(x))
}
}

e <- substitute(expr)
.makeExpr(e)
.makeExpr(e, debug)
}
91 changes: 91 additions & 0 deletions R/TileDBArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
#' @slot query_statistics A logical value, defaults to \sQuote{FALSE}; if \sQuote{TRUE} the
#' query statistics are returned (as a JSON string) via the attribute
#' \sQuote{query_statistics} of the return object.
#' @slot sil An optional and internal list object with schema information, used for
#' parsing queries.
#' @slot ptr External pointer to the underlying implementation
#' @exportClass tiledb_array
setClass("tiledb_array",
Expand All @@ -72,6 +74,7 @@ setClass("tiledb_array",
timestamp_end = "POSIXct",
return_as = "character",
query_statistics = "logical",
sil = "list",
ptr = "externalptr"))

#' Constructs a tiledb_array object backed by a persisted tiledb array uri
Expand Down Expand Up @@ -115,6 +118,8 @@ setClass("tiledb_array",
#' @param query_statistics optional A logical value, defaults to \sQuote{FALSE}; if \sQuote{TRUE} the
#' query statistics are returned (as a JSON string) via the attribute
#' \sQuote{query_statistics} of the return object.
#' @param sil optional A list, by default empty to store schema information when query objects are
#' parsed.
#' @param ctx optional tiledb_ctx
#' @return tiledb_array object
#' @export
Expand All @@ -136,6 +141,7 @@ tiledb_array <- function(uri,
timestamp_end = as.POSIXct(double(), origin="1970-01-01"),
return_as = get_return_as_preference(),
query_statistics = FALSE,
sil = list(),
ctx = tiledb_get_context()) {
query_type = match.arg(query_type)
if (!is(ctx, "tiledb_ctx"))
Expand Down Expand Up @@ -206,6 +212,7 @@ tiledb_array <- function(uri,
timestamp_end = timestamp_end,
return_as = return_as,
query_statistics = query_statistics,
sil = sil,
ptr = array_xptr)
}

Expand Down Expand Up @@ -1610,3 +1617,87 @@ setReplaceMethod("query_statistics",
validObject(x)
x
})


## piped query support


#' @rdname generics
#' @export
setGeneric("tdb_filter", function(x, ...) standardGeneric("tdb_filter"))

#' Filter from array for query via logical conditions
#'
#' @param x A tiledb_array object as first argument, permitting piping
#' @param ... One or more expressions that are parsed as query_condition objects
#' @param strict A boolean toogle to, if set, errors if a non-existing attribute is selected
#' or filtered on, defaults to 'TRUE'; if 'FALSE' a warning is shown by execution proceeds.
#' @return The tiledb_array object, permitting piping
#' @export
setMethod("tdb_filter", signature("tiledb_array"), function(x, ..., strict=TRUE) {
qc <- parse_query_condition(..., ta=x, debug=FALSE, strict=strict)
if (is.null(qc))
return(x)
if (isTRUE(x@query_condition@init)) { # if prior qc exists, combine by AND
x@query_condition <- tiledb_query_condition_combine(x@query_condition, qc, "AND")
} else { # else just assign
x@query_condition <- qc
}
x
})

#' @rdname generics
#' @export
setGeneric("tdb_select", function(x, ...) standardGeneric("tdb_select"))

#' Select attributes from array for query
#'
#' @param x A tiledb_array object as first argument, permitting piping
#' @param ... One or more attributes of the query
#' @return The tiledb_array object, permitting piping
#' @export
setMethod("tdb_select", signature("tiledb_array"), function(x, ...) {
if (length(x@sil) == 0) x@sil <- .fill_schema_info_list(x@uri)
## helper with a nod to data.table and its name_dots
names_from_dots <- function(...) {
dot_sub <- as.list(substitute(list(...)))[-1L]
vnames <- character(length(dot_sub))
notnamed <- vnames == ""
syms <- sapply(dot_sub, is.symbol) # save the deparse() in most cases of plain symbol
for (i in which(notnamed)) {
tmp <- if (syms[i]) as.character(dot_sub[[i]]) else deparse(dot_sub[[i]])[1L]
if (tmp == make.names(tmp)) vnames[i] <- tmp
}
vnames
}

vec <- names_from_dots(...)
ind <- match(vec, x@sil$names) # match against schema names
ind <- ind[x@sil$status[ind] == 2L] # allow only attributes (where status == 2)
newvec <- na.omit(x@sil$names[ ind ]) # and create subset (filtering NA for wrong entry)
x@attrs <- newvec
x
})

#' @rdname generics
#' @export
setGeneric("tdb_collect", function(x, ...) standardGeneric("tdb_collect"))

#' Collect the query results to finalize piped expression
#'
#' @param x A tiledb_array object as first argument, permitting piping
#' @param ... Ignored
#' @return The object returning from a tiledb_array query (the type of which can be
#' set via the return preference mechanism, see the help for \code{"["} accessor)
#' @export
setMethod("tdb_collect", signature("tiledb_array"), function(x, ...) {
x[]
})

# unexported helper
.fill_schema_info_list <- function(uri) {
sch <- schema(uri)
list(names=tiledb_schema_get_names(sch),
types=tiledb_schema_get_types(sch),
status=tiledb_schema_get_dim_attr_status(sch))
}
14 changes: 14 additions & 0 deletions inst/tinytest/test_tiledbarray.R
Original file line number Diff line number Diff line change
Expand Up @@ -1409,3 +1409,17 @@ arr <- tiledb_array(uri)
expect_false(query_statistics(arr)) # as not set
query_statistics(arr) <- TRUE
expect_true(query_statistics(arr))


## piped filtering and selection
uri <- tempfile()
fromDataFrame(penguins, uri, sparse = TRUE, col_index=1:2)
res <- tiledb_array(uri, return_as="data.frame") |>
tdb_filter(body_mass_g >= 5500) |>
tdb_filter(bill_length_mm > 50) |>
tdb_select(body_mass_g, bill_length_mm, year, sex) |>
tdb_collect()
expect_equal(dim(res), c(14,6))
expect_true(min(res$body_mass_g) >= 5500)
expect_true(min(res$bill_length_mm) > 50)
expect_equal(colnames(res), c("species", "island", "body_mass_g", "bill_length_mm", "year", "sex"))
12 changes: 11 additions & 1 deletion man/generics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion man/parse_query_condition.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/tdb_collect-tiledb_array-method.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/tdb_filter-tiledb_array-method.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/tdb_select-tiledb_array-method.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/tiledb_array-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading