Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support 'IN' and 'NOT_IN' set operations for query conditions #597

Merged
merged 4 commits into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ export(tiledb_query_buffer_alloc_ptr)
export(tiledb_query_condition)
export(tiledb_query_condition_combine)
export(tiledb_query_condition_init)
export(tiledb_query_condition_set_comparison)
export(tiledb_query_condition_set_use_enumeration)
export(tiledb_query_create_buffer_ptr)
export(tiledb_query_create_buffer_ptr_char)
Expand Down
26 changes: 25 additions & 1 deletion R/QueryCondition.R
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,31 @@ parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE, use_i
tiledb_query_condition_set_use_enumeration <- function(qc, use_enum, ctx = tiledb_get_context()) {
stopifnot("Argument 'qc' must be a query condition object" = is(qc, "tiledb_query_condition"),
"Argument 'use_enum' must be logical" = is.logical(use_enum),
"The argument must be a ctx object" = is(ctx, "tiledb_ctx"),
"The 'ctx' argument must be a context object" = is(ctx, "tiledb_ctx"),
"This function needs TileDB 2.17.0 or later" = tiledb_version(TRUE) >= "2.17.0")
libtiledb_query_condition_set_use_enumeration(ctx@ptr, qc@ptr, use_enum)
}

#' Perform set operations via query condition
#'
#' Uses \sQuote{IN} and \sQuote{NOT_IN} operators on given attribute
#' @param name A character value with the scheme attribute name
#' @param values A vector wiith the given values, supported type are integer, double,
#' integer64 and charactor
#' @param op (optional) A character value with the chosen set operation, this must be one of
#' \sQuote{IN} or \sQuote{NOT_IN}; default to \sQuote{IN}
#' @param ctx (optional) A TileDB Ctx object; if not supplied the default
#' context object is retrieved
#' @return A query condition object is returned
#' @export
tiledb_query_condition_set_comparison <- function(name, values, op = "IN", ctx = tiledb_get_context()) {
stopifnot("Argument 'name' must be character" = is.character(name),
"Argument 'values' must be int, double, int64 ir char" =
(is.numeric(values) || bit64::is.integer64(values) || is.character(values)),
"Argument 'op' must be one of 'IN' or 'NOT_IN'" = op %in% c("IN", "NOT_IN"),
"The 'ctx' argument must be a context object" = is(ctx, "tiledb_ctx"),
"This function needs TileDB 2.17.0 or later" = tiledb_version(TRUE) >= "2.17.0")
ptr <- tiledb:::libtiledb_query_condition_create(ctx@ptr, name, values, op)
qc <- new("tiledb_query_condition", ptr = ptr, init = TRUE)
invisible(qc)
}
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,10 @@ libtiledb_query_condition_set_use_enumeration <- function(ctx, cond, use_enumera
invisible(.Call(`_tiledb_libtiledb_query_condition_set_use_enumeration`, ctx, cond, use_enumeration))
}

libtiledb_query_condition_create <- function(ctx, name, vec, cond_op_string) {
.Call(`_tiledb_libtiledb_query_condition_create`, ctx, name, vec, cond_op_string)
}

libtiledb_zip_coords_numeric <- function(coords, coord_length) {
.Call(`_tiledb_libtiledb_zip_coords_numeric`, coords, coord_length)
}
Expand Down
44 changes: 44 additions & 0 deletions inst/tinytest/test_querycondition.R
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,47 @@ arr <- tiledb_array(uri, extended=FALSE, return_as="data.frame")
qc <- parse_query_condition(datetime > "2023-01-05 00:00:00" && date <= "2023-01-10", ta=arr)
query_condition(arr) <- qc
if (!isWindows) expect_equal(nrow(arr[]), 5)

## Test minimal version
if (tiledb_version(TRUE) < "2.17.0") exit_file("Remainder needs 2.17.* or later")
uri <- tempfile()
fromDataFrame(penguins, uri)

## Int in and not in
qc <- tiledb_query_condition_set_comparison("year", c(2009L, 2007L), "IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
expect_true(all(res$year != "2008"))

qc <- tiledb_query_condition_set_comparison("year", c(2009L, 2007L), "NOT_IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
expect_true(all(res$year == "2008"))

## Double
qc <- tiledb_query_condition_set_comparison("bill_length_mm", c(32.1,33.1,33.5), "IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
expect_true(all(res$bill_length_mm <= 33.5))
expect_equal(nrow(res), 3)

## Character (automagically converted from factor)
qc <- tiledb_query_condition_set_comparison("island", c("Biscoe", "Dream"), "IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
tt <- table(res$island)
expect_equal(tt[["Biscoe"]], 168)
expect_equal(tt[["Dream"]], 124)

qc <- tiledb_query_condition_set_comparison("island", c("Biscoe", "Dream"), "NOT_IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
tt <- table(res$island)
expect_equal(tt[["Torgersen"]], 52)

## int64
df <- data.frame(ind=1:10, val=as.integer64(1:10))
uri <- tempfile()
fromDataFrame(df, uri)
qc <- tiledb_query_condition_set_comparison("val", as.integer64(6:10), "IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
expect_true(all(res$val >= as.integer64(6)))

qc <- tiledb_query_condition_set_comparison("val", as.integer64(6:10), "NOT_IN")
res <- tiledb_array(uri, return_as="data.frame", query_condition=qc)[]
expect_true(all(res$val <= as.integer64(5)))
31 changes: 31 additions & 0 deletions man/tiledb_query_condition_set_comparison.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2388,6 +2388,20 @@ BEGIN_RCPP
return R_NilValue;
END_RCPP
}
// libtiledb_query_condition_create
XPtr<tiledb::QueryCondition> libtiledb_query_condition_create(XPtr<tiledb::Context> ctx, const std::string& name, SEXP vec, const std::string& cond_op_string);
RcppExport SEXP _tiledb_libtiledb_query_condition_create(SEXP ctxSEXP, SEXP nameSEXP, SEXP vecSEXP, SEXP cond_op_stringSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtr<tiledb::Context> >::type ctx(ctxSEXP);
Rcpp::traits::input_parameter< const std::string& >::type name(nameSEXP);
Rcpp::traits::input_parameter< SEXP >::type vec(vecSEXP);
Rcpp::traits::input_parameter< const std::string& >::type cond_op_string(cond_op_stringSEXP);
rcpp_result_gen = Rcpp::wrap(libtiledb_query_condition_create(ctx, name, vec, cond_op_string));
return rcpp_result_gen;
END_RCPP
}
// libtiledb_zip_coords_numeric
NumericVector libtiledb_zip_coords_numeric(List coords, R_xlen_t coord_length);
RcppExport SEXP _tiledb_libtiledb_zip_coords_numeric(SEXP coordsSEXP, SEXP coord_lengthSEXP) {
Expand Down Expand Up @@ -3643,6 +3657,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_tiledb_libtiledb_query_condition_init", (DL_FUNC) &_tiledb_libtiledb_query_condition_init, 5},
{"_tiledb_libtiledb_query_condition_combine", (DL_FUNC) &_tiledb_libtiledb_query_condition_combine, 3},
{"_tiledb_libtiledb_query_condition_set_use_enumeration", (DL_FUNC) &_tiledb_libtiledb_query_condition_set_use_enumeration, 3},
{"_tiledb_libtiledb_query_condition_create", (DL_FUNC) &_tiledb_libtiledb_query_condition_create, 4},
{"_tiledb_libtiledb_zip_coords_numeric", (DL_FUNC) &_tiledb_libtiledb_zip_coords_numeric, 2},
{"_tiledb_libtiledb_zip_coords_integer", (DL_FUNC) &_tiledb_libtiledb_zip_coords_integer, 2},
{"_tiledb_libtiledb_create_group", (DL_FUNC) &_tiledb_libtiledb_create_group, 2},
Expand Down
45 changes: 45 additions & 0 deletions src/libtiledb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3762,6 +3762,12 @@ const char* _tiledb_query_condition_op_to_string(tiledb_query_condition_op_t op)
return "EQ";
case TILEDB_NE:
return "NE";
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
case TILEDB_IN:
return "IN";
case TILEDB_NOT_IN:
return "NOT_IN";
#endif
default:
Rcpp::stop("Unknown condition op (%d)", op);
}
Expand All @@ -3780,6 +3786,12 @@ tiledb_query_condition_op_t _tiledb_query_string_to_condition_op(const std::stri
return TILEDB_EQ;
} else if (opstr == "NE") {
return TILEDB_NE;
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
} else if (opstr == "IN") {
return TILEDB_IN;
} else if (opstr == "NOT_IN") {
return TILEDB_NOT_IN;
#endif
} else {
Rcpp::stop("Unknown TileDB op string '%s'", opstr.c_str());
}
Expand Down Expand Up @@ -3893,6 +3905,39 @@ void libtiledb_query_condition_set_use_enumeration(XPtr<tiledb::Context> ctx,
#endif
}

// [[Rcpp::export]]
XPtr<tiledb::QueryCondition>
libtiledb_query_condition_create(XPtr<tiledb::Context> ctx, const std::string& name,
SEXP vec, const std::string& cond_op_string) {
check_xptr_tag<tiledb::Context>(ctx);
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
tiledb_query_condition_op_t op = _tiledb_query_string_to_condition_op(cond_op_string);
// consider three cases of 'vec' based on R types: int, double and int64-as-double
if (TYPEOF(vec) == INTSXP) {
std::vector<int32_t> iv = Rcpp::as<std::vector<int32_t>>(vec);
auto qc = tiledb::QueryConditionExperimental::create<int32_t>(*ctx.get(), name, iv, op);
return make_xptr<tiledb::QueryCondition>(new tiledb::QueryCondition(qc));
} else if (TYPEOF(vec) == REALSXP) {
if (Rcpp::isInteger64(vec)) {
std::vector<int64_t> dv = Rcpp::fromInteger64(Rcpp::NumericVector(vec));
auto qc = tiledb::QueryConditionExperimental::create<int64_t>(*ctx.get(), name, dv, op);
return make_xptr<tiledb::QueryCondition>(new tiledb::QueryCondition(qc));
} else {
std::vector<double> dv = Rcpp::as<std::vector<double>>(vec);
auto qc = tiledb::QueryConditionExperimental::create<double>(*ctx.get(), name, dv, op);
return make_xptr<tiledb::QueryCondition>(new tiledb::QueryCondition(qc));
}
} else if (TYPEOF(vec) == STRSXP) {
std::vector<std::string> sv = Rcpp::as<std::vector<std::string>>(vec);
auto qc = tiledb::QueryConditionExperimental::create(*ctx.get(), name, sv, op);
return make_xptr<tiledb::QueryCondition>(new tiledb::QueryCondition(qc));
} else {
Rcpp::stop("No support (yet) for type '%s'.", Rf_type2char(TYPEOF(vec)));
}
#endif
return make_xptr<tiledb::QueryCondition>(R_NilValue);
}


/**
* Array helper functions
Expand Down
Loading