Skip to content

Commit

Permalink
Extend enumeration support to non-string levels (#609)
Browse files Browse the repository at this point in the history
* Extend enumeration support to non-string levels

While rare to impossible in plain R, this is possible from Arrow as in the SOMA project

* Update NEWS and roll micro release [ci skip]
  • Loading branch information
eddelbuettel committed Nov 2, 2023
1 parent 21a1224 commit b1f7c7a
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 10 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tiledb
Type: Package
Version: 0.21.1.12
Version: 0.21.1.13
Title: Universal Storage Engine for Sparse and Dense Multidimensional Arrays
Authors@R: c(person("TileDB, Inc.", role = c("aut", "cph")),
person("Dirk", "Eddelbuettel", email = "dirk@tiledb.com", role = "cre"))
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

* R objects can be (de-)serialized to and from VFS paths (#608)

* Enumeration support has been extended to some cases only supported by Arrow (#609)

## Bug Fixes

* The DESCRIPTION file now correctly refers to macOS 10.14 (#596)
Expand Down
4 changes: 2 additions & 2 deletions R/ArraySchemaEvolution.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ tiledb_array_schema_evolution_drop_enumeration <- function(object, attrname) {

#' Evolve an Array Schema by adding an empty Enumeration
#'
#' @param asc An ArraySchemaEvolution object
#' @param ase An ArraySchemaEvolution object
#' @param enum_name A character value with the Enumeration name
#' @param type_str A character value with the TileDB type, defaults to \sQuote{ASCII}
#' @param cell_val_num An integer with number values per cell, defaults to \code{NA_integer_} to
Expand All @@ -137,7 +137,7 @@ tiledb_array_schema_evolution_add_enumeration_empty <- function(ase, enum_name,
ordered = FALSE,
ctx = tiledb_get_context()) {
stopifnot("Argument 'ase' must be an Array Schema Evolution object" =
is(object, "tiledb_array_schema_evolution"),
is(ase, "tiledb_array_schema_evolution"),
"Argument 'enum_name' must be character" = is.character(enum_name),
"Argument 'type_str' must be character" = is.character(type_str),
"Argument 'cell_val_num' must be integer" = is.integer(cell_val_num),
Expand Down
26 changes: 25 additions & 1 deletion R/Attribute.R
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,30 @@ tiledb_attribute_set_enumeration_name <- function(attr, enum_name, ctx = tiledb_
#' @export
tiledb_attribute_is_ordered_enumeration_ptr <- function(attr, arrptr, ctx = tiledb_get_context()) {
stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"),
"The 'arr' argument must be an external pointer" = is(arrptr, "externalptr"))
"The 'arrptr' argument must be an external pointer" = is(arrptr, "externalptr"))
libtiledb_attribute_is_ordered_enumeration(ctx@ptr, attr@ptr, arrptr)
}

# internal function to access enumeration data type
#' @noRd
tiledb_attribute_get_enumeration_type <- function(attr, arr, ctx = tiledb_get_context()) {
stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"),
"The 'arr' argument must be an array" = is(arr, "tiledb_array"))
libtiledb_attribute_get_enumeration_type(ctx@ptr, attr@ptr, arr@ptr)
}

# internal function to access enumeration data type
#' @noRd
tiledb_attribute_get_enumeration_type_ptr <- function(attr, arrptr, ctx = tiledb_get_context()) {
stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"),
"The 'arrptr' argument must be an external pointer" = is(arrptr, "externalptr"))
libtiledb_attribute_get_enumeration_type(ctx@ptr, attr@ptr, arrptr)
}

# internal function to get (non-string) enumeration vector
#' @noRd
tiledb_attribute_get_enumeration_vector_ptr <- function(attr, arrptr, ctx = tiledb_get_context()) {
stopifnot("The 'attr' argument must be an attribute" = is(attr, "tiledb_attr"),
"The 'arrptr' argument must be an external pointer" = is(arrptr, "externalptr"))
libtiledb_attribute_get_enumeration_vector(ctx@ptr, attr@ptr, arrptr)
}
8 changes: 8 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,14 @@ libtiledb_attribute_has_enumeration <- function(ctx, attr) {
.Call(`_tiledb_libtiledb_attribute_has_enumeration`, ctx, attr)
}

libtiledb_attribute_get_enumeration_type <- function(ctx, attr, arr) {
.Call(`_tiledb_libtiledb_attribute_get_enumeration_type`, ctx, attr, arr)
}

libtiledb_attribute_get_enumeration_vector <- function(ctx, attr, arr) {
.Call(`_tiledb_libtiledb_attribute_get_enumeration_vector`, ctx, attr, arr)
}

libtiledb_attribute_get_enumeration <- function(ctx, attr, arr) {
.Call(`_tiledb_libtiledb_attribute_get_enumeration`, ctx, attr, arr)
}
Expand Down
22 changes: 18 additions & 4 deletions R/TileDBArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -619,8 +619,18 @@ setMethod("[", "tiledb_array",
ordered_dict <- dictionaries
for (ii in seq_along(dictionaries)) {
if (isTRUE(alldictionary[ii])) {
dictionaries[[ii]] <- tiledb_attribute_get_enumeration_ptr(attrs[[allnames[ii]]], arrptr)
ordered_dict[[ii]] <- tiledb_attribute_is_ordered_enumeration_ptr(attrs[[allnames[ii]]], arrptr)
attr <- attrs[[allnames[ii]]]
tpstr <- tiledb_attribute_get_enumeration_type_ptr(attr, arrptr)
if (tpstr %in% c("ASCII", "UTF8")) {
dictionaries[[ii]] <- tiledb_attribute_get_enumeration_ptr(attr, arrptr)
} else if (tpstr %in% c("FLOAT32", "FLOAT64", "BOOL",
"UINT8", "UINT16", "UINT32", "UINT64",
"INT8", "INT16", "INT32", "INT64")) {
dictionaries[[ii]] <- tiledb_attribute_get_enumeration_vector_ptr(attr, arrptr)
} else {
stop("Unsupported enumeration vector payload of type '%s'", tpstr, call. = FALSE)
}
ordered_dict[[ii]] <- tiledb_attribute_is_ordered_enumeration_ptr(attr, arrptr)
attr(dictionaries[[ii]], "ordered") <- ordered_dict[[ii]]
}
}
Expand Down Expand Up @@ -1015,8 +1025,12 @@ setMethod("[", "tiledb_array",
if (min(col, na.rm=TRUE) == 2 && max(col, na.rm=TRUE) == length(dct) + 1)
col <- col - 1L

attr(col, "levels") <- dct
attr(col, "class") <- if (ord) c("ordered", "factor") else "factor"
if (inherits(dct, "character")) {
attr(col, "levels") <- dct
attr(col, "class") <- if (ord) c("ordered", "factor") else "factor"
} else {
col <- dct[col]
}
}
col
}
Expand Down
4 changes: 2 additions & 2 deletions man/tiledb_array_schema_evolution_add_enumeration_empty.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,32 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// libtiledb_attribute_get_enumeration_type
Rcpp::String libtiledb_attribute_get_enumeration_type(XPtr<tiledb::Context> ctx, XPtr<tiledb::Attribute> attr, XPtr<tiledb::Array> arr);
RcppExport SEXP _tiledb_libtiledb_attribute_get_enumeration_type(SEXP ctxSEXP, SEXP attrSEXP, SEXP arrSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtr<tiledb::Context> >::type ctx(ctxSEXP);
Rcpp::traits::input_parameter< XPtr<tiledb::Attribute> >::type attr(attrSEXP);
Rcpp::traits::input_parameter< XPtr<tiledb::Array> >::type arr(arrSEXP);
rcpp_result_gen = Rcpp::wrap(libtiledb_attribute_get_enumeration_type(ctx, attr, arr));
return rcpp_result_gen;
END_RCPP
}
// libtiledb_attribute_get_enumeration_vector
SEXP libtiledb_attribute_get_enumeration_vector(XPtr<tiledb::Context> ctx, XPtr<tiledb::Attribute> attr, XPtr<tiledb::Array> arr);
RcppExport SEXP _tiledb_libtiledb_attribute_get_enumeration_vector(SEXP ctxSEXP, SEXP attrSEXP, SEXP arrSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtr<tiledb::Context> >::type ctx(ctxSEXP);
Rcpp::traits::input_parameter< XPtr<tiledb::Attribute> >::type attr(attrSEXP);
Rcpp::traits::input_parameter< XPtr<tiledb::Array> >::type arr(arrSEXP);
rcpp_result_gen = Rcpp::wrap(libtiledb_attribute_get_enumeration_vector(ctx, attr, arr));
return rcpp_result_gen;
END_RCPP
}
// libtiledb_attribute_get_enumeration
std::vector<std::string> libtiledb_attribute_get_enumeration(XPtr<tiledb::Context> ctx, XPtr<tiledb::Attribute> attr, XPtr<tiledb::Array> arr);
RcppExport SEXP _tiledb_libtiledb_attribute_get_enumeration(SEXP ctxSEXP, SEXP attrSEXP, SEXP arrSEXP) {
Expand Down Expand Up @@ -3594,6 +3620,8 @@ static const R_CallMethodDef CallEntries[] = {
{"_tiledb_libtiledb_attribute_set_nullable", (DL_FUNC) &_tiledb_libtiledb_attribute_set_nullable, 2},
{"_tiledb_libtiledb_attribute_get_nullable", (DL_FUNC) &_tiledb_libtiledb_attribute_get_nullable, 1},
{"_tiledb_libtiledb_attribute_has_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_has_enumeration, 2},
{"_tiledb_libtiledb_attribute_get_enumeration_type", (DL_FUNC) &_tiledb_libtiledb_attribute_get_enumeration_type, 3},
{"_tiledb_libtiledb_attribute_get_enumeration_vector", (DL_FUNC) &_tiledb_libtiledb_attribute_get_enumeration_vector, 3},
{"_tiledb_libtiledb_attribute_get_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_get_enumeration, 3},
{"_tiledb_libtiledb_attribute_set_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_set_enumeration, 3},
{"_tiledb_libtiledb_attribute_is_ordered_enumeration", (DL_FUNC) &_tiledb_libtiledb_attribute_is_ordered_enumeration, 3},
Expand Down
63 changes: 63 additions & 0 deletions src/libtiledb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1588,6 +1588,68 @@ bool libtiledb_attribute_has_enumeration(XPtr<tiledb::Context> ctx,
return res;
}

// [[Rcpp::export]]
Rcpp::String libtiledb_attribute_get_enumeration_type(XPtr<tiledb::Context> ctx,
XPtr<tiledb::Attribute> attr,
XPtr<tiledb::Array> arr) {

check_xptr_tag<tiledb::Context>(ctx);
check_xptr_tag<tiledb::Attribute>(attr);
check_xptr_tag<tiledb::Array>(arr);
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
auto enmrname = tiledb::AttributeExperimental::get_enumeration_name(*ctx.get(), *attr.get());
if (enmrname == std::nullopt) {
Rcpp::stop("No enumeration name for attribute");
}
auto enmr = tiledb::ArrayExperimental::get_enumeration(*ctx.get(), *arr.get(), enmrname.value());
if (enmr.ptr() == nullptr) {
Rcpp::stop("No enumeration for given attribute.");
}
Rcpp::String res = Rcpp::as<Rcpp::String>(Rcpp::wrap(_tiledb_datatype_to_string(enmr.type())));
#else
Rcpp::String res = Rcpp::as<Rcpp::String>(NA_STRING);
#endif
return res;
}

// [[Rcpp::export]]
SEXP libtiledb_attribute_get_enumeration_vector(XPtr<tiledb::Context> ctx,
XPtr<tiledb::Attribute> attr,
XPtr<tiledb::Array> arr) {
check_xptr_tag<tiledb::Context>(ctx);
check_xptr_tag<tiledb::Attribute>(attr);
check_xptr_tag<tiledb::Array>(arr);
SEXP res = R_NilValue;
#if TILEDB_VERSION >= TileDB_Version(2,17,0)
auto enmrname = tiledb::AttributeExperimental::get_enumeration_name(*ctx.get(), *attr.get());
if (enmrname == std::nullopt) {
Rcpp::stop("No enumeration name for attribute");
}
auto enmr = tiledb::ArrayExperimental::get_enumeration(*ctx.get(), *arr.get(), enmrname.value());
if (enmr.ptr() == nullptr) {
Rcpp::stop("No enumeration for given attribute.");
}
auto dtype = enmr.type();
if (dtype == TILEDB_FLOAT32 || dtype == TILEDB_FLOAT64) {
auto v = enmr.as_vector<double>();
res = Rcpp::wrap(v);
} else if (dtype == TILEDB_INT8 || dtype == TILEDB_INT16 || dtype == TILEDB_INT32 ||
dtype == TILEDB_UINT8 || dtype == TILEDB_UINT16 || dtype == TILEDB_UINT32) {
auto v = enmr.as_vector<int32_t>();
res = Rcpp::wrap(v);
} else if (dtype == TILEDB_INT64 || dtype == TILEDB_UINT64) {
auto v = enmr.as_vector<int64_t>();
res = Rcpp::toInteger64(v);
} else if (dtype == TILEDB_BOOL) {
auto v = enmr.as_vector<bool>();
res = Rcpp::wrap(v);
} else {
Rcpp::stop("Unsupported non-string type '%s'", _tiledb_datatype_to_string(dtype));
}
#endif
return res;
}

// [[Rcpp::export]]
std::vector<std::string> libtiledb_attribute_get_enumeration(XPtr<tiledb::Context> ctx,
XPtr<tiledb::Attribute> attr,
Expand All @@ -1610,6 +1672,7 @@ std::vector<std::string> libtiledb_attribute_get_enumeration(XPtr<tiledb::Contex
return res;
}


// [[Rcpp::export]]
XPtr<tiledb::Attribute> libtiledb_attribute_set_enumeration(XPtr<tiledb::Context> ctx,
XPtr<tiledb::Attribute> attr,
Expand Down

0 comments on commit b1f7c7a

Please sign in to comment.