diff --git a/DESCRIPTION b/DESCRIPTION index 7c8c9bbb42..d8e1bf555e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tiledb Type: Package -Version: 0.21.1.1 +Version: 0.21.1.2 Title: Universal Storage Engine for Sparse and Dense Multidimensional Arrays Authors@R: c(person("TileDB, Inc.", role = c("aut", "cph")), person("Dirk", "Eddelbuettel", email = "dirk@tiledb.com", role = "cre")) diff --git a/NAMESPACE b/NAMESPACE index fd16e9079d..3fa8300d19 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -222,6 +222,7 @@ export(tiledb_query_alloc_buffer_ptr_char) export(tiledb_query_buffer_alloc_ptr) export(tiledb_query_condition) export(tiledb_query_condition_combine) +export(tiledb_query_condition_create) export(tiledb_query_condition_init) export(tiledb_query_condition_set_use_enumeration) export(tiledb_query_create_buffer_ptr) diff --git a/NEWS.md b/NEWS.md index cacff2a8f5..ac9f1d0ecc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,10 @@ * This release of the R package builds against [TileDB 2.17.1](https://github.com/TileDB-Inc/TileDB/releases/tag/2.17.1), and has also been tested against earlier releases as well as the development version (#593) +## Improvements + +* Set conditions are supported in query condition expressions (#597) + ## Bug Fixes * The DESCRIPTION file now correctly refers to macOS 10.14 (#596) diff --git a/R/QueryCondition.R b/R/QueryCondition.R index 6028419928..514d2c9b2b 100644 --- a/R/QueryCondition.R +++ b/R/QueryCondition.R @@ -200,7 +200,7 @@ parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE, use_i op, " (aka ", .mapOpToCharacter(op), ")", " [",ch, "] ", dtype, "\n", sep="") - ## take care of factor (aka "enum" case) and set the daat type to ASCII + ## take care of factor (aka "enum" case) and set the data type to ASCII if (dtype == "INT32" && is_enum) { if (debug) cat(" [factor column] ", ch, " ", attr, " ", dtype, " --> ASCII", " ", is_enum, "\n") dtype <- "ASCII" @@ -238,7 +238,31 @@ parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE, use_i tiledb_query_condition_set_use_enumeration <- function(qc, use_enum, ctx = tiledb_get_context()) { stopifnot("Argument 'qc' must be a query condition object" = is(qc, "tiledb_query_condition"), "Argument 'use_enum' must be logical" = is.logical(use_enum), - "The argument must be a ctx object" = is(ctx, "tiledb_ctx"), + "The 'ctx' argument must be a context object" = is(ctx, "tiledb_ctx"), "This function needs TileDB 2.17.0 or later" = tiledb_version(TRUE) >= "2.17.0") libtiledb_query_condition_set_use_enumeration(ctx@ptr, qc@ptr, use_enum) } + +#' create a query condition for vector 'IN' and 'NOT_IN' operations +#' +#' Uses \sQuote{IN} and \sQuote{NOT_IN} operators on given attribute +#' @param name A character value with the scheme attribute name +#' @param values A vector wiith the given values, supported types are integer, double, +#' integer64 and charactor +#' @param op (optional) A character value with the chosen set operation, this must be one of +#' \sQuote{IN} or \sQuote{NOT_IN}; default to \sQuote{IN} +#' @param ctx (optional) A TileDB Ctx object; if not supplied the default +#' context object is retrieved +#' @return A query condition object is returned +#' @export +tiledb_query_condition_create <- function(name, values, op = "IN", ctx = tiledb_get_context()) { + stopifnot("Argument 'name' must be character" = is.character(name), + "Argument 'values' must be int, double, int64 ir char" = + (is.numeric(values) || bit64::is.integer64(values) || is.character(values)), + "Argument 'op' must be one of 'IN' or 'NOT_IN'" = op %in% c("IN", "NOT_IN"), + "The 'ctx' argument must be a context object" = is(ctx, "tiledb_ctx"), + "This function needs TileDB 2.17.0 or later" = tiledb_version(TRUE) >= "2.17.0") + ptr <- tiledb:::libtiledb_query_condition_create(ctx@ptr, name, values, op) + qc <- new("tiledb_query_condition", ptr = ptr, init = TRUE) + invisible(qc) +} diff --git a/R/RcppExports.R b/R/RcppExports.R index d034f5c409..6ae634bc2b 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -810,6 +810,10 @@ libtiledb_query_condition_set_use_enumeration <- function(ctx, cond, use_enumera invisible(.Call(`_tiledb_libtiledb_query_condition_set_use_enumeration`, ctx, cond, use_enumeration)) } +libtiledb_query_condition_create <- function(ctx, name, vec, cond_op_string) { + .Call(`_tiledb_libtiledb_query_condition_create`, ctx, name, vec, cond_op_string) +} + libtiledb_zip_coords_numeric <- function(coords, coord_length) { .Call(`_tiledb_libtiledb_zip_coords_numeric`, coords, coord_length) } diff --git a/inst/tinytest/test_querycondition.R b/inst/tinytest/test_querycondition.R index 7a084cedbb..b51e4c9133 100644 --- a/inst/tinytest/test_querycondition.R +++ b/inst/tinytest/test_querycondition.R @@ -439,3 +439,47 @@ arr <- tiledb_array(uri, extended=FALSE, return_as="data.frame") qc <- parse_query_condition(datetime > "2023-01-05 00:00:00" && date <= "2023-01-10", ta=arr) query_condition(arr) <- qc if (!isWindows) expect_equal(nrow(arr[]), 5) + +## Test minimal version +if (tiledb_version(TRUE) < "2.17.0") exit_file("Remainder needs 2.17.* or later") +uri <- tempfile() +fromDataFrame(penguins, uri) + +## Int in and not in +qc <- tiledb_query_condition_create("year", c(2009L, 2007L), "IN") +res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[] +expect_true(all(res$year != "2008")) + +qc <- tiledb_query_condition_create("year", c(2009L, 2007L), "NOT_IN") +res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[] +expect_true(all(res$year == "2008")) + +## Double +qc <- tiledb_query_condition_create("bill_length_mm", c(32.1,33.1,33.5), "IN") +res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[] +expect_true(all(res$bill_length_mm <= 33.5)) +expect_equal(nrow(res), 3) + +## Character (automagically converted from factor) +qc <- tiledb_query_condition_create("island", c("Biscoe", "Dream"), "IN") +res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[] +tt <- table(res$island) +expect_equal(tt[["Biscoe"]], 168) +expect_equal(tt[["Dream"]], 124) + +qc <- tiledb_query_condition_create("island", c("Biscoe", "Dream"), "NOT_IN") +res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[] +tt <- table(res$island) +expect_equal(tt[["Torgersen"]], 52) + +## int64 +df <- data.frame(ind=1:10, val=as.integer64(1:10)) +uri <- tempfile() +fromDataFrame(df, uri) +qc <- tiledb_query_condition_create("val", as.integer64(6:10), "IN") +res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[] +expect_true(all(res$val >= as.integer64(6))) + +qc <- tiledb_query_condition_create("val", as.integer64(6:10), "NOT_IN") +res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[] +expect_true(all(res$val <= as.integer64(5))) diff --git a/man/tiledb_query_condition_create.Rd b/man/tiledb_query_condition_create.Rd new file mode 100644 index 0000000000..76c6a04453 --- /dev/null +++ b/man/tiledb_query_condition_create.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/QueryCondition.R +\name{tiledb_query_condition_create} +\alias{tiledb_query_condition_create} +\title{create a query condition for vector 'IN' and 'NOT_IN' operations} +\usage{ +tiledb_query_condition_create( + name, + values, + op = "IN", + ctx = tiledb_get_context() +) +} +\arguments{ +\item{name}{A character value with the scheme attribute name} + +\item{values}{A vector wiith the given values, supported types are integer, double, +integer64 and charactor} + +\item{op}{(optional) A character value with the chosen set operation, this must be one of +\sQuote{IN} or \sQuote{NOT_IN}; default to \sQuote{IN}} + +\item{ctx}{(optional) A TileDB Ctx object; if not supplied the default +context object is retrieved} +} +\value{ +A query condition object is returned +} +\description{ +Uses \sQuote{IN} and \sQuote{NOT_IN} operators on given attribute +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 0132431873..6e8dd50a76 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -2388,6 +2388,20 @@ BEGIN_RCPP return R_NilValue; END_RCPP } +// libtiledb_query_condition_create +XPtr libtiledb_query_condition_create(XPtr ctx, const std::string& name, SEXP vec, const std::string& cond_op_string); +RcppExport SEXP _tiledb_libtiledb_query_condition_create(SEXP ctxSEXP, SEXP nameSEXP, SEXP vecSEXP, SEXP cond_op_stringSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< XPtr >::type ctx(ctxSEXP); + Rcpp::traits::input_parameter< const std::string& >::type name(nameSEXP); + Rcpp::traits::input_parameter< SEXP >::type vec(vecSEXP); + Rcpp::traits::input_parameter< const std::string& >::type cond_op_string(cond_op_stringSEXP); + rcpp_result_gen = Rcpp::wrap(libtiledb_query_condition_create(ctx, name, vec, cond_op_string)); + return rcpp_result_gen; +END_RCPP +} // libtiledb_zip_coords_numeric NumericVector libtiledb_zip_coords_numeric(List coords, R_xlen_t coord_length); RcppExport SEXP _tiledb_libtiledb_zip_coords_numeric(SEXP coordsSEXP, SEXP coord_lengthSEXP) { @@ -3643,6 +3657,7 @@ static const R_CallMethodDef CallEntries[] = { {"_tiledb_libtiledb_query_condition_init", (DL_FUNC) &_tiledb_libtiledb_query_condition_init, 5}, {"_tiledb_libtiledb_query_condition_combine", (DL_FUNC) &_tiledb_libtiledb_query_condition_combine, 3}, {"_tiledb_libtiledb_query_condition_set_use_enumeration", (DL_FUNC) &_tiledb_libtiledb_query_condition_set_use_enumeration, 3}, + {"_tiledb_libtiledb_query_condition_create", (DL_FUNC) &_tiledb_libtiledb_query_condition_create, 4}, {"_tiledb_libtiledb_zip_coords_numeric", (DL_FUNC) &_tiledb_libtiledb_zip_coords_numeric, 2}, {"_tiledb_libtiledb_zip_coords_integer", (DL_FUNC) &_tiledb_libtiledb_zip_coords_integer, 2}, {"_tiledb_libtiledb_create_group", (DL_FUNC) &_tiledb_libtiledb_create_group, 2}, diff --git a/src/libtiledb.cpp b/src/libtiledb.cpp index ed938cb24b..cb811763fc 100644 --- a/src/libtiledb.cpp +++ b/src/libtiledb.cpp @@ -3762,6 +3762,12 @@ const char* _tiledb_query_condition_op_to_string(tiledb_query_condition_op_t op) return "EQ"; case TILEDB_NE: return "NE"; +#if TILEDB_VERSION >= TileDB_Version(2,17,0) + case TILEDB_IN: + return "IN"; + case TILEDB_NOT_IN: + return "NOT_IN"; +#endif default: Rcpp::stop("Unknown condition op (%d)", op); } @@ -3780,6 +3786,12 @@ tiledb_query_condition_op_t _tiledb_query_string_to_condition_op(const std::stri return TILEDB_EQ; } else if (opstr == "NE") { return TILEDB_NE; +#if TILEDB_VERSION >= TileDB_Version(2,17,0) + } else if (opstr == "IN") { + return TILEDB_IN; + } else if (opstr == "NOT_IN") { + return TILEDB_NOT_IN; +#endif } else { Rcpp::stop("Unknown TileDB op string '%s'", opstr.c_str()); } @@ -3893,6 +3905,39 @@ void libtiledb_query_condition_set_use_enumeration(XPtr ctx, #endif } +// [[Rcpp::export]] +XPtr +libtiledb_query_condition_create(XPtr ctx, const std::string& name, + SEXP vec, const std::string& cond_op_string) { + check_xptr_tag(ctx); +#if TILEDB_VERSION >= TileDB_Version(2,17,0) + tiledb_query_condition_op_t op = _tiledb_query_string_to_condition_op(cond_op_string); + // consider three cases of 'vec' based on R types: int, double and int64-as-double + if (TYPEOF(vec) == INTSXP) { + std::vector iv = Rcpp::as>(vec); + auto qc = tiledb::QueryConditionExperimental::create(*ctx.get(), name, iv, op); + return make_xptr(new tiledb::QueryCondition(qc)); + } else if (TYPEOF(vec) == REALSXP) { + if (Rcpp::isInteger64(vec)) { + std::vector dv = Rcpp::fromInteger64(Rcpp::NumericVector(vec)); + auto qc = tiledb::QueryConditionExperimental::create(*ctx.get(), name, dv, op); + return make_xptr(new tiledb::QueryCondition(qc)); + } else { + std::vector dv = Rcpp::as>(vec); + auto qc = tiledb::QueryConditionExperimental::create(*ctx.get(), name, dv, op); + return make_xptr(new tiledb::QueryCondition(qc)); + } + } else if (TYPEOF(vec) == STRSXP) { + std::vector sv = Rcpp::as>(vec); + auto qc = tiledb::QueryConditionExperimental::create(*ctx.get(), name, sv, op); + return make_xptr(new tiledb::QueryCondition(qc)); + } else { + Rcpp::stop("No support (yet) for type '%s'.", Rcpp::type2name(vec)); + } +#endif + return make_xptr(R_NilValue); +} + /** * Array helper functions