Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend enumeration support to non-string levels #609

Merged
merged 2 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tiledb
Type: Package
Version: 0.21.1.12
Version: 0.21.1.13
Title: Universal Storage Engine for Sparse and Dense Multidimensional Arrays
Authors@R: c(person("TileDB, Inc.", role = c("aut", "cph")),
person("Dirk", "Eddelbuettel", email = "dirk@tiledb.com", role = "cre"))
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

* R objects can be (de-)serialized to and from VFS paths (#608)

* Enumeration support has been extended to some cases only supported by Arrow (#609)

## Bug Fixes

* The DESCRIPTION file now correctly refers to macOS 10.14 (#596)
Expand Down
4 changes: 2 additions & 2 deletions R/ArraySchemaEvolution.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ tiledb_array_schema_evolution_drop_enumeration <- function(object, attrname) {

#' Evolve an Array Schema by adding an empty Enumeration
#'
#' @param asc An ArraySchemaEvolution object
#' @param ase An ArraySchemaEvolution object
#' @param enum_name A character value with the Enumeration name
#' @param type_str A character value with the TileDB type, defaults to \sQuote{ASCII}
#' @param cell_val_num An integer with number values per cell, defaults to \code{NA_integer_} to
Expand All @@ -137,7 +137,7 @@ tiledb_array_schema_evolution_add_enumeration_empty <- function(ase, enum_name,
ordered = FALSE,
ctx = tiledb_get_context()) {
stopifnot("Argument 'ase' must be an Array Schema Evolution object" =
is(object, "tiledb_array_schema_evolution"),
is(ase, "tiledb_array_schema_evolution"),
"Argument 'enum_name' must be character" = is.character(enum_name),
"Argument 'type_str' must be character" = is.character(type_str),
"Argument 'cell_val_num' must be integer" = is.integer(cell_val_num),
Expand Down
26 changes: 25 additions & 1 deletion R/Attribute.R
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,30 @@ tiledb_attribute_set_enumeration_name <- function(attr, enum_name, ctx = tiledb_
#' @export
tiledb_attribute_is_ordered_enumeration_ptr <- function(attr, arrptr, ctx = tiledb_get_context()) {
stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"),
"The 'arr' argument must be an external pointer" = is(arrptr, "externalptr"))
"The 'arrptr' argument must be an external pointer" = is(arrptr, "externalptr"))
libtiledb_attribute_is_ordered_enumeration(ctx@ptr, attr@ptr, arrptr)
}

# internal function to access enumeration data type
#' @noRd
tiledb_attribute_get_enumeration_type <- function(attr, arr, ctx = tiledb_get_context()) {
stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"),
"The 'arr' argument must be an array" = is(arr, "tiledb_array"))
libtiledb_attribute_get_enumeration_type(ctx@ptr, attr@ptr, arr@ptr)
}

# internal function to access enumeration data type
#' @noRd
tiledb_attribute_get_enumeration_type_ptr <- function(attr, arrptr, ctx = tiledb_get_context()) {
stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"),
"The 'arrptr' argument must be an external pointer" = is(arrptr, "externalptr"))
libtiledb_attribute_get_enumeration_type(ctx@ptr, attr@ptr, arrptr)
}

# internal function to get (non-string) enumeration vector
#' @noRd
tiledb_attribute_get_enumeration_vector_ptr <- function(attr, arrptr, ctx = tiledb_get_context()) {
stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"),
"The 'arrptr' argument must be an external pointer" = is(arrptr, "externalptr"))
libtiledb_attribute_get_enumeration_vector(ctx@ptr, attr@ptr, arrptr)
}
8 changes: 8 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,14 @@ libtiledb_attribute_has_enumeration <- function(ctx, attr) {
.Call(`_tiledb_libtiledb_attribute_has_enumeration`, ctx, attr)
}

libtiledb_attribute_get_enumeration_type <- function(ctx, attr, arr) {
.Call(`_tiledb_libtiledb_attribute_get_enumeration_type`, ctx, attr, arr)
}

libtiledb_attribute_get_enumeration_vector <- function(ctx, attr, arr) {
.Call(`_tiledb_libtiledb_attribute_get_enumeration_vector`, ctx, attr, arr)
}

libtiledb_attribute_get_enumeration <- function(ctx, attr, arr) {
.Call(`_tiledb_libtiledb_attribute_get_enumeration`, ctx, attr, arr)
}
Expand Down
22 changes: 18 additions & 4 deletions R/TileDBArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -619,8 +619,18 @@ setMethod("[", "tiledb_array",
ordered_dict <- dictionaries
for (ii in seq_along(dictionaries)) {
if (isTRUE(alldictionary[ii])) {
dictionaries[[ii]] <- tiledb_attribute_get_enumeration_ptr(attrs[[allnames[ii]]], arrptr)
ordered_dict[[ii]] <- tiledb_attribute_is_ordered_enumeration_ptr(attrs[[allnames[ii]]], arrptr)
attr <- attrs[[allnames[ii]]]
tpstr <- tiledb_attribute_get_enumeration_type_ptr(attr, arrptr)
if (tpstr %in% c("ASCII", "UTF8")) {
dictionaries[[ii]] <- tiledb_attribute_get_enumeration_ptr(attr, arrptr)
} else if (tpstr %in% c("FLOAT32", "FLOAT64", "BOOL",
"UINT8", "UINT16", "UINT32", "UINT64",
"INT8", "INT16", "INT32", "INT64")) {
dictionaries[[ii]] <- tiledb_attribute_get_enumeration_vector_ptr(attr, arrptr)
} else {
stop("Unsupported enumeration vector payload of type '%s'", tpstr, call. = FALSE)
}
ordered_dict[[ii]] <- tiledb_attribute_is_ordered_enumeration_ptr(attr, arrptr)
attr(dictionaries[[ii]], "ordered") <- ordered_dict[[ii]]
}
}
Expand Down Expand Up @@ -1015,8 +1025,12 @@ setMethod("[", "tiledb_array",
if (min(col, na.rm=TRUE) == 2 && max(col, na.rm=TRUE) == length(dct) + 1)
col <- col - 1L

attr(col, "levels") <- dct
attr(col, "class") <- if (ord) c("ordered", "factor") else "factor"
if (inherits(dct, "character")) {
attr(col, "levels") <- dct
attr(col, "class") <- if (ord) c("ordered", "factor") else "factor"
} else {
col <- dct[col]
}
}
col
}
Expand Down
4 changes: 2 additions & 2 deletions man/tiledb_array_schema_evolution_add_enumeration_empty.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,32 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// libtiledb_attribute_get_enumeration_type
Rcpp::String libtiledb_attribute_get_enumeration_type(XPtr<tiledb::Context> ctx, XPtr<tiledb::Attribute> attr, XPtr<tiledb::Array> arr);
RcppExport SEXP _tiledb_libtiledb_attribute_get_enumeration_type(SEXP ctxSEXP, SEXP attrSEXP, SEXP arrSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtr<tiledb::Context> >::type ctx(ctxSEXP);
Rcpp::traits::input_parameter< XPtr<tiledb::Attribute> >::type attr(attrSEXP);
Rcpp::traits::input_parameter< XPtr<tiledb::Array> >::type arr(arrSEXP);
rcpp_result_gen = Rcpp::wrap(libtiledb_attribute_get_enumeration_type(ctx, attr, arr));
return rcpp_result_gen;
END_RCPP
}
// libtiledb_attribute_get_enumeration_vector
SEXP libtiledb_attribute_get_enumeration_vector(XPtr<tiledb::Context> ctx, XPtr<tiledb::Attribute> attr, XPtr<tiledb::Array> arr);
RcppExport SEXP _tiledb_libtiledb_attribute_get_enumeration_vector(SEXP ctxSEXP, SEXP attrSEXP, SEXP arrSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtr<tiledb::Context> >::type ctx(ctxSEXP);
Rcpp::traits::input_parameter< XPtr<tiledb::Attribute> >::type attr(attrSEXP);
Rcpp::traits::input_parameter< XPtr<tiledb::Array> >::type arr(arrSEXP);
rcpp_result_gen = Rcpp::wrap(libtiledb_attribute_get_enumeration_vector(ctx, attr, arr));
return rcpp_result_gen;
END_RCPP
}
// libtiledb_attribute_get_enumeration
std::vector<std::string> libtiledb_attribute_get_enumeration(XPtr<tiledb::Context> ctx, XPtr<tiledb::Attribute> attr, XPtr<tiledb::Array> arr);
RcppExport SEXP _tiledb_libtiledb_attribute_get_enumeration(SEXP ctxSEXP, SEXP attrSEXP, SEXP arrSEXP) {
Expand Down Expand Up @@ -3594,6 +3620,8 @@ static const R_CallMethodDef CallEntries[] = {
{"_tiledb_libtiledb_attribute_set_nullable", (DL_FUNC) &_tiledb_libtiledb_attribute_set_nullable, 2},
{"_tiledb_libtiledb_attribute_get_nullable", (DL_FUNC) &_tiledb_libtiledb_attribute_get_nullable, 1},
{"_tiledb_libtiledb_attribute_has_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_has_enumeration, 2},
{"_tiledb_libtiledb_attribute_get_enumeration_type", (DL_FUNC) &_tiledb_libtiledb_attribute_get_enumeration_type, 3},
{"_tiledb_libtiledb_attribute_get_enumeration_vector", (DL_FUNC) &_tiledb_libtiledb_attribute_get_enumeration_vector, 3},
{"_tiledb_libtiledb_attribute_get_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_get_enumeration, 3},
{"_tiledb_libtiledb_attribute_set_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_set_enumeration, 3},
{"_tiledb_libtiledb_attribute_is_ordered_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_is_ordered_enumeration, 3},
Expand Down
63 changes: 63 additions & 0 deletions src/libtiledb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1588,6 +1588,68 @@ bool libtiledb_attribute_has_enumeration(XPtr<tiledb::Context> ctx,
return res;
}

// [[Rcpp::export]]
Rcpp::String libtiledb_attribute_get_enumeration_type(XPtr<tiledb::Context> ctx,
XPtr<tiledb::Attribute> attr,
XPtr<tiledb::Array> arr) {

check_xptr_tag<tiledb::Context>(ctx);
check_xptr_tag<tiledb::Attribute>(attr);
check_xptr_tag<tiledb::Array>(arr);
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
auto enmrname = tiledb::AttributeExperimental::get_enumeration_name(*ctx.get(), *attr.get());
if (enmrname == std::nullopt) {
Rcpp::stop("No enumeration name for attribute");
}
auto enmr = tiledb::ArrayExperimental::get_enumeration(*ctx.get(), *arr.get(), enmrname.value());
if (enmr.ptr() == nullptr) {
Rcpp::stop("No enumeration for given attribute.");
}
Rcpp::String res = Rcpp::as<Rcpp::String>(Rcpp::wrap(_tiledb_datatype_to_string(enmr.type())));
#else
Rcpp::String res = Rcpp::as<Rcpp::String>(NA_STRING);
#endif
return res;
}

// [[Rcpp::export]]
SEXP libtiledb_attribute_get_enumeration_vector(XPtr<tiledb::Context> ctx,
XPtr<tiledb::Attribute> attr,
XPtr<tiledb::Array> arr) {
check_xptr_tag<tiledb::Context>(ctx);
check_xptr_tag<tiledb::Attribute>(attr);
check_xptr_tag<tiledb::Array>(arr);
SEXP res = R_NilValue;
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
auto enmrname = tiledb::AttributeExperimental::get_enumeration_name(*ctx.get(), *attr.get());
if (enmrname == std::nullopt) {
Rcpp::stop("No enumeration name for attribute");
}
auto enmr = tiledb::ArrayExperimental::get_enumeration(*ctx.get(), *arr.get(), enmrname.value());
if (enmr.ptr() == nullptr) {
Rcpp::stop("No enumeration for given attribute.");
}
auto dtype = enmr.type();
if (dtype == TILEDB_FLOAT32 || dtype == TILEDB_FLOAT64) {
auto v = enmr.as_vector<double>();
res = Rcpp::wrap(v);
} else if (dtype == TILEDB_INT8 || dtype == TILEDB_INT16 || dtype == TILEDB_INT32 ||
dtype == TILEDB_UINT8 || dtype == TILEDB_UINT16 || dtype == TILEDB_UINT32) {
auto v = enmr.as_vector<int32_t>();
res = Rcpp::wrap(v);
} else if (dtype == TILEDB_INT64 || dtype == TILEDB_UINT64) {
auto v = enmr.as_vector<int64_t>();
res = Rcpp::toInteger64(v);
} else if (dtype == TILEDB_BOOL) {
auto v = enmr.as_vector<bool>();
res = Rcpp::wrap(v);
} else {
Rcpp::stop("Unsupported non-string type '%s'", _tiledb_datatype_to_string(dtype));
}
#endif
return res;
}

// [[Rcpp::export]]
std::vector<std::string> libtiledb_attribute_get_enumeration(XPtr<tiledb::Context> ctx,
XPtr<tiledb::Attribute> attr,
Expand All @@ -1610,6 +1672,7 @@ std::vector<std::string> libtiledb_attribute_get_enumeration(XPtr<tiledb::Contex
return res;
}


// [[Rcpp::export]]
XPtr<tiledb::Attribute> libtiledb_attribute_set_enumeration(XPtr<tiledb::Context> ctx,
XPtr<tiledb::Attribute> attr,
Expand Down