From df33b80fa2cf06599fcea3cbc4a2fa3e9cb58c89 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 14 Jul 2022 14:21:52 -0500 Subject: [PATCH 01/10] Tweak to ensure range is set on dense array too --- R/TileDBArray.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/TileDBArray.R b/R/TileDBArray.R index e8566e25df..5d06e5ae2d 100644 --- a/R/TileDBArray.R +++ b/R/TileDBArray.R @@ -653,9 +653,9 @@ setMethod("[", "tiledb_array", if (is.null(x@selected_ranges[[k]])) { #cat("Adding null dim", k, "on", dimtypes[k], "\n") vec <- .map2integer64(nonemptydom[[k]], dimtypes[k]) - if (vec[1] != 0 && vec[2] != 0) { # corner case of A[] on empty array - qryptr <- libtiledb_query_add_range_with_type(qryptr, k-1, dimtypes[k], vec[1], vec[2]) - rangeunset <- FALSE + if (vec[1] != 0 || vec[2] != 0) { # corner case of A[] on empty array + qryptr <- libtiledb_query_add_range_with_type(qryptr, k-1, dimtypes[k], vec[1], vec[2]) + rangeunset <- FALSE } } else if (is.null(nrow(x@selected_ranges[[k]]))) { #cat("Adding nrow null dim", k, "on", dimtypes[k], "\n") From e0a6bc4ec991a48841f3380d581861958700ad58 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 14 Jul 2022 14:22:22 -0500 Subject: [PATCH 02/10] Enhanced query conditions for dense arrays, better int64 treatment --- R/QueryCondition.R | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/R/QueryCondition.R b/R/QueryCondition.R index 74128d28f2..9ecb9b0de9 100644 --- a/R/QueryCondition.R +++ b/R/QueryCondition.R @@ -67,7 +67,11 @@ tiledb_query_condition_init <- function(attr, value, dtype, op, qc = tiledb_quer "Argument 'dtype' must be character" = is.character(dtype), "Argument 'op' must be character" = is.character(op)) op <- match.arg(op, c("LT", "LE", "GT", "GE", "EQ", "NE")) - ## maybe check dtype too + ## if dtype is INT64 or UINT64 but the class of value does not yet inherit from integer64, cast + if (grepl("INT64", dtype) && !inherits(value, "integer64")) { + value <- bit64::as.integer64(value) + #message("QCI ", attr, ", ", value, ", ", class(value)[1], ", ", dtype, ", ", op) + } libtiledb_query_condition_init(qc@ptr, attr, value, dtype, op) qc@init <- TRUE invisible(qc) @@ -106,9 +110,11 @@ tiledb_query_condition_combine <- function(lhs, rhs, op) { #' to 'FALSE'. #' @param strict A boolean toogle to, if set, errors if a non-existing attribute is selected #' or filtered on, defaults to 'TRUE'; if 'FALSE' a warning is shown by execution proceeds. +#' @param use_int64 A boolean toggle to switch to \code{integer64} if \code{integer} is seen, +#' default is false to remain as a default four-byte \code{int} #' @return A `tiledb_query_condition` object #' @export -parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE) { +parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE, use_int64=FALSE) { .hasArray <- !is.null(ta) && is(ta, "tiledb_array") if (.hasArray && length(ta@sil) == 0) ta@sil <- .fill_schema_info_list(ta@uri) .isComparisonOperator <- function(x) as.character(x) %in% c(">", ">=", "<", "<=", "==", "!=") @@ -117,8 +123,8 @@ parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE) { .isInteger <- function(x) grepl("^[[:digit:]]+$", as.character(x)) .isDouble <- function(x) grepl("^[[:digit:]\\.]+$", as.character(x)) && length(grepRaw(".", as.character(x), fixed = TRUE, all = TRUE)) == 1 .errorFunction <- if (strict) stop else warning - .getType <- function(x) { - if (isTRUE(.isInteger(x))) "INT32" + .getType <- function(x, use_int64=FALSE) { + if (isTRUE(.isInteger(x))) { if (use_int64) "INT64" else "INT32" } else if (isTRUE(.isDouble(x))) "FLOAT64" else "ASCII" } @@ -150,7 +156,7 @@ parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE) { op <- as.character(x[1]) attr <- as.character(x[2]) ch <- as.character(x[3]) - dtype <- .getType(ch) + dtype <- .getType(ch, use_int64) if (.hasArray) { ind <- match(attr, ta@sil$names) if (!is.finite(ind)) { From e10d25c673cb5000fe665255c20cd7b06e10bf44 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 14 Jul 2022 14:23:02 -0500 Subject: [PATCH 03/10] Manual page update --- man/parse_query_condition.Rd | 11 ++++++++++- man/tiledb_filter.Rd | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/man/parse_query_condition.Rd b/man/parse_query_condition.Rd index 7b03c23cdc..23329df391 100644 --- a/man/parse_query_condition.Rd +++ b/man/parse_query_condition.Rd @@ -4,7 +4,13 @@ \alias{parse_query_condition} \title{Create a 'tiledb_query_condition' object from an expression} \usage{ -parse_query_condition(expr, ta = NULL, debug = FALSE, strict = TRUE) +parse_query_condition( + expr, + ta = NULL, + debug = FALSE, + strict = TRUE, + use_int64 = FALSE +) } \arguments{ \item{expr}{An expression that is understood by the TileDB grammar for @@ -17,6 +23,9 @@ to 'FALSE'.} \item{strict}{A boolean toogle to, if set, errors if a non-existing attribute is selected or filtered on, defaults to 'TRUE'; if 'FALSE' a warning is shown by execution proceeds.} + +\item{use_int64}{A boolean toggle to switch to \code{integer64} if \code{integer} is seen, +default is false to remain as a default four-byte \code{int}} } \value{ A \code{tiledb_query_condition} object diff --git a/man/tiledb_filter.Rd b/man/tiledb_filter.Rd index 2e43bcc102..d09d57b40f 100644 --- a/man/tiledb_filter.Rd +++ b/man/tiledb_filter.Rd @@ -31,6 +31,7 @@ Available filters: \item "CHECKSUM_MD5" \item "CHECKSUM_SHA256" \item "DICTIONARY" +\item "SCALE_FLOAT" (TileDB 2.11.0 or later) } } \details{ From 7af95c0400cdabcdc1af350b75328b7086c6592f Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 14 Jul 2022 14:23:14 -0500 Subject: [PATCH 04/10] Roll micro release to .1 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7cb07ed586..cf055c30f5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tiledb Type: Package -Version: 0.14.1 +Version: 0.14.1.1 Title: Universal Storage Engine for Sparse and Dense Multidimensional Arrays Authors@R: c(person("TileDB, Inc.", role = c("aut", "cph")), person("Dirk", "Eddelbuettel", email = "dirk@tiledb.com", role = "cre")) From 77d970f9d5b0c8d3ba805bc52c678825ec2fcc01 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 14 Jul 2022 16:33:11 -0500 Subject: [PATCH 05/10] Add a dense array with query condition example --- R/QueryCondition.R | 10 ++++++++++ man/parse_query_condition.Rd | 11 +++++++++++ 2 files changed, 21 insertions(+) diff --git a/R/QueryCondition.R b/R/QueryCondition.R index 9ecb9b0de9..ec3e698487 100644 --- a/R/QueryCondition.R +++ b/R/QueryCondition.R @@ -113,6 +113,16 @@ tiledb_query_condition_combine <- function(lhs, rhs, op) { #' @param use_int64 A boolean toggle to switch to \code{integer64} if \code{integer} is seen, #' default is false to remain as a default four-byte \code{int} #' @return A `tiledb_query_condition` object +#' @examples +#' \dontshow{ctx <- tiledb_ctx(limitTileDBCores())} +#' \dontrun{ +#' uri <- "mem://airquality" # change to on-disk for persistence +#' fromDataFrame(airquality, uri, col_index=c("Month", "Day")) # dense array +#' head(tiledb_array(uri, return_as="data.frame")[]) # reordered columns +#' head(tiledb_array(uri, return_as="data.frame", extended=FALSE)[]) # no dims +#' tiledb_array(uri, return_as="data.frame", extended=FALSE, +#' query_condition=parse_query_condition(Temp > 90))[] +#' } #' @export parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE, use_int64=FALSE) { .hasArray <- !is.null(ta) && is(ta, "tiledb_array") diff --git a/man/parse_query_condition.Rd b/man/parse_query_condition.Rd index 23329df391..f066d9006c 100644 --- a/man/parse_query_condition.Rd +++ b/man/parse_query_condition.Rd @@ -34,3 +34,14 @@ A \code{tiledb_query_condition} object The grammar for query conditions is at present constraint to six operators and three boolean types. } +\examples{ +\dontshow{ctx <- tiledb_ctx(limitTileDBCores())} +\dontrun{ +uri <- "mem://airquality" +fromDataFrame(airquality, uri, col_index=c("Month", "Day")) # dense array +head(tiledb_array(uri, return_as="data.frame")[]) # reordered columns +head(tiledb_array(uri, return_as="data.frame", extended=FALSE)[]) # no dims +tiledb_array(uri, return_as="data.frame", extended=FALSE, + query_condition=parse_query_condition(Temp > 90))[] +} +} From ece0f56b32a7fad82199d2dbf82eef0113bd47c5 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 14 Jul 2022 16:33:35 -0500 Subject: [PATCH 06/10] Small fix (and update) to example --- inst/examples/quickstart_dense_memfs.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/inst/examples/quickstart_dense_memfs.R b/inst/examples/quickstart_dense_memfs.R index b2c2d0ebfe..caf26d6047 100644 --- a/inst/examples/quickstart_dense_memfs.R +++ b/inst/examples/quickstart_dense_memfs.R @@ -5,7 +5,7 @@ # # The MIT License # -# Copyright (c) 2018-2021 TileDB, Inc. +# Copyright (c) 2018-2022 TileDB, Inc.edd # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -80,10 +80,9 @@ write_array_via_query <- function(uri) { } write_array_via_query_piped <- function(uri) { - stopifnot(requireNamespace("magrittr", quietly=TRUE)) - library(magrittr) data <- 1:16 - qry <- tiledb_array(uri = uri, "WRITE") + arr <- tiledb_array(uri = uri) + qry <- tiledb_query(arr, "WRITE") qry |> tiledb_query_set_layout("ROW_MAJOR") |> # also default, transpose if COL_MAJOR tiledb_query_set_buffer("a", data) |> From c4c48941b74d5ab345368a7557dde4991e380b19 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 14 Jul 2022 16:44:08 -0500 Subject: [PATCH 07/10] Add a test for query condition on dense array --- inst/tinytest/test_querycondition.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/inst/tinytest/test_querycondition.R b/inst/tinytest/test_querycondition.R index 03be26aa79..cb6116cc48 100644 --- a/inst/tinytest/test_querycondition.R +++ b/inst/tinytest/test_querycondition.R @@ -345,3 +345,10 @@ for (col in c("int8", "uint8", "int16", "uint16", "int32", "uint32", arr <- tiledb_array(tmp, return_as="data.frame", query_condition = qc) expect_equal( NROW(arr[]), 10) # ten rows if we restrict to 'value' > 10 } + +## test on dense array (without dims) and query condition +uri <- tempfile() +fromDataFrame(airquality, uri, col_index=c("Month", "Day")) # dense array +res <- tiledb_array(uri, return_as="data.frame", extended=FALSE, + query_condition=parse_query_condition(Temp > 90))[] +expect_equal(NROW(res), 14) From 5712bbce8aa06a053344f94885645e624359e2f0 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 14 Jul 2022 17:02:46 -0500 Subject: [PATCH 08/10] Update NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 6c58cb0c97..8e1c4ce89d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ ## Improvements +* Support for query conditions has been extended to dense arrays (#447) + ## Bug Fixes ## Build and Test Systems From 1724974fbb0528e74a7cc85980a21dbc293cc0c5 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 14 Jul 2022 19:17:01 -0500 Subject: [PATCH 09/10] Shorten example to essence --- R/QueryCondition.R | 3 +-- man/parse_query_condition.Rd | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/R/QueryCondition.R b/R/QueryCondition.R index ec3e698487..c0ee4e832a 100644 --- a/R/QueryCondition.R +++ b/R/QueryCondition.R @@ -118,8 +118,7 @@ tiledb_query_condition_combine <- function(lhs, rhs, op) { #' \dontrun{ #' uri <- "mem://airquality" # change to on-disk for persistence #' fromDataFrame(airquality, uri, col_index=c("Month", "Day")) # dense array -#' head(tiledb_array(uri, return_as="data.frame")[]) # reordered columns -#' head(tiledb_array(uri, return_as="data.frame", extended=FALSE)[]) # no dims +#' ## query condition on dense array requires extended=FALSE #' tiledb_array(uri, return_as="data.frame", extended=FALSE, #' query_condition=parse_query_condition(Temp > 90))[] #' } diff --git a/man/parse_query_condition.Rd b/man/parse_query_condition.Rd index f066d9006c..92e2dec468 100644 --- a/man/parse_query_condition.Rd +++ b/man/parse_query_condition.Rd @@ -37,10 +37,9 @@ and three boolean types. \examples{ \dontshow{ctx <- tiledb_ctx(limitTileDBCores())} \dontrun{ -uri <- "mem://airquality" +uri <- "mem://airquality" # change to on-disk for persistence fromDataFrame(airquality, uri, col_index=c("Month", "Day")) # dense array -head(tiledb_array(uri, return_as="data.frame")[]) # reordered columns -head(tiledb_array(uri, return_as="data.frame", extended=FALSE)[]) # no dims +## query condition on dense array requires extended=FALSE tiledb_array(uri, return_as="data.frame", extended=FALSE, query_condition=parse_query_condition(Temp > 90))[] } From ae2f909e8079b8ace60ef6e9e15ac6928b12b86c Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Fri, 15 Jul 2022 09:46:28 -0500 Subject: [PATCH 10/10] Clean up fat-fingered typo in example [ci skip] --- inst/examples/quickstart_dense_memfs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/examples/quickstart_dense_memfs.R b/inst/examples/quickstart_dense_memfs.R index caf26d6047..0681f2b78c 100644 --- a/inst/examples/quickstart_dense_memfs.R +++ b/inst/examples/quickstart_dense_memfs.R @@ -5,7 +5,7 @@ # # The MIT License # -# Copyright (c) 2018-2022 TileDB, Inc.edd +# Copyright (c) 2018-2022 TileDB, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal