Skip to content

Commit

Permalink
Support 'IN' and 'NOT_IN' set operations for query conditions (#597)
Browse files Browse the repository at this point in the history
* Support 'IN' and 'NOT_IN' set operations for query conditions

* Use Rcpp::type2name, fix one typo

* Update NEWS, roll micro release [ci skip]

* Prefer tiledb_query_condition_create() as top-level R function
  • Loading branch information
eddelbuettel committed Sep 29, 2023
1 parent 211bcea commit ea8b912
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tiledb
Type: Package
Version: 0.21.1.1
Version: 0.21.1.2
Title: Universal Storage Engine for Sparse and Dense Multidimensional Arrays
Authors@R: c(person("TileDB, Inc.", role = c("aut", "cph")),
person("Dirk", "Eddelbuettel", email = "dirk@tiledb.com", role = "cre"))
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ export(tiledb_query_alloc_buffer_ptr_char)
export(tiledb_query_buffer_alloc_ptr)
export(tiledb_query_condition)
export(tiledb_query_condition_combine)
export(tiledb_query_condition_create)
export(tiledb_query_condition_init)
export(tiledb_query_condition_set_use_enumeration)
export(tiledb_query_create_buffer_ptr)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

* This release of the R package builds against [TileDB 2.17.1](https://github.com/TileDB-Inc/TileDB/releases/tag/2.17.1), and has also been tested against earlier releases as well as the development version (#593)

## Improvements

* Set conditions are supported in query condition expressions (#597)

## Bug Fixes

* The DESCRIPTION file now correctly refers to macOS 10.14 (#596)
Expand Down
28 changes: 26 additions & 2 deletions R/QueryCondition.R
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE, use_i
op, " (aka ", .mapOpToCharacter(op), ")",
" [",ch, "] ", dtype, "\n", sep="")

## take care of factor (aka "enum" case) and set the daat type to ASCII
## take care of factor (aka "enum" case) and set the data type to ASCII
if (dtype == "INT32" && is_enum) {
if (debug) cat(" [factor column] ", ch, " ", attr, " ", dtype, " --> ASCII", " ", is_enum, "\n")
dtype <- "ASCII"
Expand Down Expand Up @@ -238,7 +238,31 @@ parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE, use_i
tiledb_query_condition_set_use_enumeration <- function(qc, use_enum, ctx = tiledb_get_context()) {
stopifnot("Argument 'qc' must be a query condition object" = is(qc, "tiledb_query_condition"),
"Argument 'use_enum' must be logical" = is.logical(use_enum),
"The argument must be a ctx object" = is(ctx, "tiledb_ctx"),
"The 'ctx' argument must be a context object" = is(ctx, "tiledb_ctx"),
"This function needs TileDB 2.17.0 or later" = tiledb_version(TRUE) >= "2.17.0")
libtiledb_query_condition_set_use_enumeration(ctx@ptr, qc@ptr, use_enum)
}

#' create a query condition for vector 'IN' and 'NOT_IN' operations
#'
#' Uses \sQuote{IN} and \sQuote{NOT_IN} operators on given attribute
#' @param name A character value with the scheme attribute name
#' @param values A vector wiith the given values, supported types are integer, double,
#' integer64 and charactor
#' @param op (optional) A character value with the chosen set operation, this must be one of
#' \sQuote{IN} or \sQuote{NOT_IN}; default to \sQuote{IN}
#' @param ctx (optional) A TileDB Ctx object; if not supplied the default
#' context object is retrieved
#' @return A query condition object is returned
#' @export
tiledb_query_condition_create <- function(name, values, op = "IN", ctx = tiledb_get_context()) {
stopifnot("Argument 'name' must be character" = is.character(name),
"Argument 'values' must be int, double, int64 ir char" =
(is.numeric(values) || bit64::is.integer64(values) || is.character(values)),
"Argument 'op' must be one of 'IN' or 'NOT_IN'" = op %in% c("IN", "NOT_IN"),
"The 'ctx' argument must be a context object" = is(ctx, "tiledb_ctx"),
"This function needs TileDB 2.17.0 or later" = tiledb_version(TRUE) >= "2.17.0")
ptr <- tiledb:::libtiledb_query_condition_create(ctx@ptr, name, values, op)
qc <- new("tiledb_query_condition", ptr = ptr, init = TRUE)
invisible(qc)
}
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,10 @@ libtiledb_query_condition_set_use_enumeration <- function(ctx, cond, use_enumera
invisible(.Call(`_tiledb_libtiledb_query_condition_set_use_enumeration`, ctx, cond, use_enumeration))
}

libtiledb_query_condition_create <- function(ctx, name, vec, cond_op_string) {
.Call(`_tiledb_libtiledb_query_condition_create`, ctx, name, vec, cond_op_string)
}

libtiledb_zip_coords_numeric <- function(coords, coord_length) {
.Call(`_tiledb_libtiledb_zip_coords_numeric`, coords, coord_length)
}
Expand Down
44 changes: 44 additions & 0 deletions inst/tinytest/test_querycondition.R
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,47 @@ arr <- tiledb_array(uri, extended=FALSE, return_as="data.frame")
qc <- parse_query_condition(datetime > "2023-01-05 00:00:00" && date <= "2023-01-10", ta=arr)
query_condition(arr) <- qc
if (!isWindows) expect_equal(nrow(arr[]), 5)

## Test minimal version
if (tiledb_version(TRUE) < "2.17.0") exit_file("Remainder needs 2.17.* or later")
uri <- tempfile()
fromDataFrame(penguins, uri)

## Int in and not in
qc <- tiledb_query_condition_create("year", c(2009L, 2007L), "IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
expect_true(all(res$year != "2008"))

qc <- tiledb_query_condition_create("year", c(2009L, 2007L), "NOT_IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
expect_true(all(res$year == "2008"))

## Double
qc <- tiledb_query_condition_create("bill_length_mm", c(32.1,33.1,33.5), "IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
expect_true(all(res$bill_length_mm <= 33.5))
expect_equal(nrow(res), 3)

## Character (automagically converted from factor)
qc <- tiledb_query_condition_create("island", c("Biscoe", "Dream"), "IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
tt <- table(res$island)
expect_equal(tt[["Biscoe"]], 168)
expect_equal(tt[["Dream"]], 124)

qc <- tiledb_query_condition_create("island", c("Biscoe", "Dream"), "NOT_IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
tt <- table(res$island)
expect_equal(tt[["Torgersen"]], 52)

## int64
df <- data.frame(ind=1:10, val=as.integer64(1:10))
uri <- tempfile()
fromDataFrame(df, uri)
qc <- tiledb_query_condition_create("val", as.integer64(6:10), "IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
expect_true(all(res$val >= as.integer64(6)))

qc <- tiledb_query_condition_create("val", as.integer64(6:10), "NOT_IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
expect_true(all(res$val <= as.integer64(5)))
31 changes: 31 additions & 0 deletions man/tiledb_query_condition_create.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2388,6 +2388,20 @@ BEGIN_RCPP
return R_NilValue;
END_RCPP
}
// libtiledb_query_condition_create
XPtr<tiledb::QueryCondition> libtiledb_query_condition_create(XPtr<tiledb::Context> ctx, const std::string& name, SEXP vec, const std::string& cond_op_string);
RcppExport SEXP _tiledb_libtiledb_query_condition_create(SEXP ctxSEXP, SEXP nameSEXP, SEXP vecSEXP, SEXP cond_op_stringSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtr<tiledb::Context> >::type ctx(ctxSEXP);
Rcpp::traits::input_parameter< const std::string& >::type name(nameSEXP);
Rcpp::traits::input_parameter< SEXP >::type vec(vecSEXP);
Rcpp::traits::input_parameter< const std::string& >::type cond_op_string(cond_op_stringSEXP);
rcpp_result_gen = Rcpp::wrap(libtiledb_query_condition_create(ctx, name, vec, cond_op_string));
return rcpp_result_gen;
END_RCPP
}
// libtiledb_zip_coords_numeric
NumericVector libtiledb_zip_coords_numeric(List coords, R_xlen_t coord_length);
RcppExport SEXP _tiledb_libtiledb_zip_coords_numeric(SEXP coordsSEXP, SEXP coord_lengthSEXP) {
Expand Down Expand Up @@ -3643,6 +3657,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_tiledb_libtiledb_query_condition_init", (DL_FUNC) &_tiledb_libtiledb_query_condition_init, 5},
{"_tiledb_libtiledb_query_condition_combine", (DL_FUNC) &_tiledb_libtiledb_query_condition_combine, 3},
{"_tiledb_libtiledb_query_condition_set_use_enumeration", (DL_FUNC) &_tiledb_libtiledb_query_condition_set_use_enumeration, 3},
{"_tiledb_libtiledb_query_condition_create", (DL_FUNC) &_tiledb_libtiledb_query_condition_create, 4},
{"_tiledb_libtiledb_zip_coords_numeric", (DL_FUNC) &_tiledb_libtiledb_zip_coords_numeric, 2},
{"_tiledb_libtiledb_zip_coords_integer", (DL_FUNC) &_tiledb_libtiledb_zip_coords_integer, 2},
{"_tiledb_libtiledb_create_group", (DL_FUNC) &_tiledb_libtiledb_create_group, 2},
Expand Down
45 changes: 45 additions & 0 deletions src/libtiledb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3762,6 +3762,12 @@ const char* _tiledb_query_condition_op_to_string(tiledb_query_condition_op_t op)
return "EQ";
case TILEDB_NE:
return "NE";
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
case TILEDB_IN:
return "IN";
case TILEDB_NOT_IN:
return "NOT_IN";
#endif
default:
Rcpp::stop("Unknown condition op (%d)", op);
}
Expand All @@ -3780,6 +3786,12 @@ tiledb_query_condition_op_t _tiledb_query_string_to_condition_op(const std::stri
return TILEDB_EQ;
} else if (opstr == "NE") {
return TILEDB_NE;
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
} else if (opstr == "IN") {
return TILEDB_IN;
} else if (opstr == "NOT_IN") {
return TILEDB_NOT_IN;
#endif
} else {
Rcpp::stop("Unknown TileDB op string '%s'", opstr.c_str());
}
Expand Down Expand Up @@ -3893,6 +3905,39 @@ void libtiledb_query_condition_set_use_enumeration(XPtr<tiledb::Context> ctx,
#endif
}

// [[Rcpp::export]]
XPtr<tiledb::QueryCondition>
libtiledb_query_condition_create(XPtr<tiledb::Context> ctx, const std::string& name,
SEXP vec, const std::string& cond_op_string) {
check_xptr_tag<tiledb::Context>(ctx);
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
tiledb_query_condition_op_t op = _tiledb_query_string_to_condition_op(cond_op_string);
// consider three cases of 'vec' based on R types: int, double and int64-as-double
if (TYPEOF(vec) == INTSXP) {
std::vector<int32_t> iv = Rcpp::as<std::vector<int32_t>>(vec);
auto qc = tiledb::QueryConditionExperimental::create<int32_t>(*ctx.get(), name, iv, op);
return make_xptr<tiledb::QueryCondition>(new tiledb::QueryCondition(qc));
} else if (TYPEOF(vec) == REALSXP) {
if (Rcpp::isInteger64(vec)) {
std::vector<int64_t> dv = Rcpp::fromInteger64(Rcpp::NumericVector(vec));
auto qc = tiledb::QueryConditionExperimental::create<int64_t>(*ctx.get(), name, dv, op);
return make_xptr<tiledb::QueryCondition>(new tiledb::QueryCondition(qc));
} else {
std::vector<double> dv = Rcpp::as<std::vector<double>>(vec);
auto qc = tiledb::QueryConditionExperimental::create<double>(*ctx.get(), name, dv, op);
return make_xptr<tiledb::QueryCondition>(new tiledb::QueryCondition(qc));
}
} else if (TYPEOF(vec) == STRSXP) {
std::vector<std::string> sv = Rcpp::as<std::vector<std::string>>(vec);
auto qc = tiledb::QueryConditionExperimental::create(*ctx.get(), name, sv, op);
return make_xptr<tiledb::QueryCondition>(new tiledb::QueryCondition(qc));
} else {
Rcpp::stop("No support (yet) for type '%s'.", Rcpp::type2name(vec));
}
#endif
return make_xptr<tiledb::QueryCondition>(R_NilValue);
}


/**
* Array helper functions
Expand Down

0 comments on commit ea8b912

Please sign in to comment.