From 9a62525025a9c5cb37b7c947b04e01c30f0912da Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 18 May 2022 15:29:48 -0500 Subject: [PATCH 1/3] adding BOOL --- DESCRIPTION | 2 +- R/DataFrame.R | 2 ++ src/libtiledb.cpp | 54 ++++++++++++++++++++++++++++++++++++++++++++--- src/libtiledb.h | 2 ++ src/nullable.cpp | 17 +++++++++++++++ 5 files changed, 73 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 84346be19f..b45225a8e5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tiledb Type: Package -Version: 0.13.0 +Version: 0.13.0.1 Title: Universal Storage Engine for Sparse and Dense Multidimensional Arrays Authors@R: c(person("TileDB, Inc.", role = c("aut", "cph")), person("Dirk", "Eddelbuettel", email = "dirk@tiledb.com", role = "cre")) diff --git a/R/DataFrame.R b/R/DataFrame.R index 798caf615c..bafe46d180 100644 --- a/R/DataFrame.R +++ b/R/DataFrame.R @@ -192,6 +192,8 @@ fromDataFrame <- function(obj, uri, col_index=NULL, sparse=TRUE, allows_dups=spa tp <- "DATETIME_NS" else if (cl == "integer64") tp <- "INT64" + else if (cl == "logical") + tp <- "BOOL" else stop("Currently unsupported type: ", cl) if (debug) { diff --git a/src/libtiledb.cpp b/src/libtiledb.cpp index f71827f221..6ff489128f 100644 --- a/src/libtiledb.cpp +++ b/src/libtiledb.cpp @@ -95,6 +95,10 @@ const char* _tiledb_datatype_to_string(tiledb_datatype_t dtype) { #if TILEDB_VERSION >= TileDB_Version(2,7,0) case TILEDB_BLOB: return "BLOB"; +#endif +#if TILEDB_VERSION >= TileDB_Version(2,10,0) + case TILEDB_BOOL: + return "BOOL"; #endif default: Rcpp::stop("unknown tiledb_datatype_t (%d)", dtype); @@ -157,6 +161,10 @@ tiledb_datatype_t _string_to_tiledb_datatype(std::string typestr) { #if TILEDB_VERSION >= TileDB_Version(2,7,0) } else if (typestr == "BLOB") { return TILEDB_BLOB; +#endif +#if TILEDB_VERSION >= TileDB_Version(2,10,0) + } else if (typestr == "BOOL") { + return TILEDB_BOOL; #endif } else { Rcpp::stop("Unknown TileDB type '%s'", typestr.c_str()); @@ -216,6 +224,10 @@ std::string tiledb_datatype_R_type(std::string datatype) { return "DATETIME_US"; case TILEDB_DATETIME_NS: return "DATETIME_NS"; +#if TILEDB_VERSION >= TileDB_Version(2,10,0) + case TILEDB_BOOL: + return "BOOL"; +#endif default: Rcpp::stop("unknown tiledb_datatype_t (%d)", dtype); } @@ -1364,11 +1376,18 @@ XPtr libtiledb_attribute(XPtr ctx, attr_dtype == TILEDB_INT8 || attr_dtype == TILEDB_UINT8 ) { attr = make_xptr(new tiledb::Attribute(*ctx.get(), name, attr_dtype)); +#if TILEDB_VERSION >= TileDB_Version(2,10,0) + } else if (attr_dtype == TILEDB_BOOL) { + attr = make_xptr(new tiledb::Attribute(*ctx.get(), name, attr_dtype)); +#endif } else { Rcpp::stop("Only integer ((U)INT{8,16,32,64}), logical (INT32), real (FLOAT{32,64}), " - "Date (DATEIME_DAY), Datetime (DATETIME_{SEC,MS,US}), " - "nanotime (DATETIME_NS) and character (CHAR,ASCII) attributes " - "are supported -- seeting %s which is not", type.c_str()); + "Date (DATEIME_DAY), Datetime (DATETIME_{SEC,MS,US}), nanotime (DATETIME_NS), " +#if TILEDB_VERSION >= TileDB_Version(2,10,0) + "logical (BOOL), " +#endif + "and character (CHAR,ASCII) attributes are supported " + "-- seeing %s which is not", type.c_str()); } attr->set_filter_list(*fltrlst); #if TILEDB_VERSION >= TileDB_Version(2,2,0) @@ -2775,6 +2794,10 @@ XPtr libtiledb_query_buffer_alloc_ptr(std::string domaintype, #if TILEDB_VERSION >= TileDB_Version(2,7,0) } else if (domaintype == "BLOB") { buf->size = sizeof(int8_t); +#endif +#if TILEDB_VERSION >= TileDB_Version(2,10,0) + } else if (domaintype == "BOOL") { + buf->size = sizeof(uint8_t); #endif } else if (domaintype == "INT64" || domaintype == "UINT64" || @@ -2928,6 +2951,18 @@ XPtr libtiledb_query_buffer_assign_ptr(XPtr buf, std:: x[i] = static_cast(v[i]); } std::memcpy(buf->vec.data(), &(x[0]), buf->ncells*buf->size); +#if TILEDB_VERSION >= TileDB_Version(2,10,0) + } else if (dtype == "BOOL") { + LogicalVector v(vec); + auto n = v.length(); + std::vector x(n); + for (auto i=0; i(v[i]); + } + std::memcpy(buf->vec.data(), &(x[0]), buf->ncells*buf->size); + if (buf->nullable) + getValidityMapFromLogical(v, buf->validity_map); +#endif } else { Rcpp::stop("Assignment to '%s' currently unsupported.", dtype.c_str()); } @@ -3100,6 +3135,19 @@ RObject libtiledb_query_get_buffer_ptr(XPtr buf, bool asint64 = fal // if (buf->nullable) // setValidityMapForRaw(out, buf->validity_map); return out; +#endif +#if TILEDB_VERSION >= TileDB_Version(2,10,0) + } else if (dtype == "BOOL") { + size_t n = buf->ncells; + std::vector uintvec(n); + std::memcpy(uintvec.data(), buf->vec.data(), n*buf->size); + Rcpp::LogicalVector out(buf->ncells); + for (size_t i=0; i(uintvec[i]); // logical is int32_t internally + } + if (buf->nullable) + setValidityMapForLogical(out, buf->validity_map); + return out; #endif } else { Rcpp::stop("Unsupported type '%s'", dtype.c_str()); diff --git a/src/libtiledb.h b/src/libtiledb.h index 477c9862d1..b65ea39c9c 100644 --- a/src/libtiledb.h +++ b/src/libtiledb.h @@ -65,6 +65,8 @@ void getValidityMapFromNumeric(Rcpp::NumericVector & vec, std::vector & void setValidityMapForNumeric(Rcpp::NumericVector & vec, const std::vector & map); void getValidityMapFromInt64(Rcpp::NumericVector & vec, std::vector & map); void setValidityMapForInt64(std::vector & vec, const std::vector & map); +void getValidityMapFromLogical(Rcpp::LogicalVector & vec, std::vector & map); +void setValidityMapForLogical(Rcpp::LogicalVector & vec, const std::vector & map); // type and size helper tiledb_datatype_t _string_to_tiledb_datatype(std::string typestr); diff --git a/src/nullable.cpp b/src/nullable.cpp index b9c4bbfbe3..1563312353 100644 --- a/src/nullable.cpp +++ b/src/nullable.cpp @@ -84,3 +84,20 @@ void setValidityMapForInt64(std::vector & vec, const std::vector & map) { + if (static_cast(vec.size()) != map.size()) + Rcpp::stop("Unequal length between vector (%d) and map (%d) in int getter.", vec.size(), map.size()); + + for (auto i=0; i < vec.size(); i++) + map[i] = (vec[i] == NA_LOGICAL) ? 0 : 1; +} + +void setValidityMapForLogical(Rcpp::LogicalVector & vec, const std::vector & map) { + if (static_cast(vec.size()) != map.size()) + Rcpp::stop("Unequal length between vector (%d) and map (%d) in int setter.", vec.size(), map.size()); + + for (auto i=0; i < vec.size(); i++) + if (map[i] == 0) + vec[i] = NA_LOGICAL; +} From f2beab7caf56e8c70c67eac8bca90e478b79fb88 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 19 May 2022 17:49:09 -0500 Subject: [PATCH 2/3] additional BOOL support --- R/DataFrame.R | 2 +- R/Query.R | 9 ++++++++- src/libtiledb.cpp | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/R/DataFrame.R b/R/DataFrame.R index bafe46d180..149b958052 100644 --- a/R/DataFrame.R +++ b/R/DataFrame.R @@ -193,7 +193,7 @@ fromDataFrame <- function(obj, uri, col_index=NULL, sparse=TRUE, allows_dups=spa else if (cl == "integer64") tp <- "INT64" else if (cl == "logical") - tp <- "BOOL" + tp <- if (tiledb_version(TRUE) >= "2.10.0") "BOOL" else "INT32" else stop("Currently unsupported type: ", cl) if (debug) { diff --git a/R/Query.R b/R/Query.R index d823b69c99..c1250f8245 100644 --- a/R/Query.R +++ b/R/Query.R @@ -114,7 +114,14 @@ tiledb_query_set_buffer <- function(query, attr, buffer) { stopifnot(`Argument 'query' must be a tiledb_query object` = is(query, "tiledb_query"), `Argument 'attr' must be character_variable` = is.character(attr), `Argument 'buffer' must be integer, numeric or logical` = is.numeric(buffer) || is.logical(buffer)) - libtiledb_query_set_buffer(query@ptr, attr, buffer) + if (is.numeric(buffer) || tiledb_version(TRUE) < "2.10.0") { + libtiledb_query_set_buffer(query@ptr, attr, buffer) + } else { # logical now maps to BOOL which is a uint8_t, we need a different approach + nr <- NROW(buffer) + bufptr <- libtiledb_query_buffer_alloc_ptr("BOOL", nr, FALSE) + bufptr <- libtiledb_query_buffer_assign_ptr(bufptr, "BOOL", buffer, FALSE) + query@ptr <- libtiledb_query_set_buffer_ptr(query@ptr, attr, bufptr) + } invisible(query) } diff --git a/src/libtiledb.cpp b/src/libtiledb.cpp index 6ff489128f..fa1e0b8395 100644 --- a/src/libtiledb.cpp +++ b/src/libtiledb.cpp @@ -2579,7 +2579,7 @@ XPtr libtiledb_query_set_buffer(XPtr query, #endif return query; } else if (TYPEOF(buffer) == LGLSXP) { - LogicalVector vec(buffer); + LogicalVector vec(buffer); // note that it is really an int at the element storage #if TILEDB_VERSION >= TileDB_Version(2,4,0) query->set_data_buffer(attr, vec.begin(), vec.length()); #else From 20b611538d42bd83a43059040dd0b0a085b9984f Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Thu, 19 May 2022 18:05:49 -0500 Subject: [PATCH 3/3] unit tests --- inst/tinytest/test_attr.R | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/inst/tinytest/test_attr.R b/inst/tinytest/test_attr.R index 780b5f30e6..d1f7e01065 100644 --- a/inst/tinytest/test_attr.R +++ b/inst/tinytest/test_attr.R @@ -221,3 +221,40 @@ newarr <- tiledb_array(uri, as.data.frame=TRUE) chk <- newarr[] expect_equal(df[,1:10], chk[,1:10]) expect_equivalent(as.numeric(df[,11]), chk[,11]) # we currently return uint64_t as numeric + + +## BOOL support added in 2.10.0 +if (tiledb_version(TRUE) < "2.10.0") exit_file("Remainder needs 2.10.* or later") + +uri <- tempfile() +if (dir.exists(uri)) unlink(uri, recursive=TRUE) + +## high-level +D <- data.frame(key=c(2L,4L,6L,8L), val=c(TRUE,FALSE,NA,TRUE)) +fromDataFrame(D, uri, col_index=1) +arr <- tiledb_array(uri, return_as="data.frame") +res <- arr[] +attr(res, "query_status") <- NULL +expect_equal(D, res) + + +## lower-level testing tiledb_query_set_buffer +if (dir.exists(uri)) unlink(uri, recursive=TRUE) +v <- D[, "val"] +v[3] <- TRUE # without nullable for simplicity +dim <- tiledb_dim(name = "dim", domain = c(0L, 3L), type = "INT32") +sch <- tiledb_array_schema(domain = tiledb_domain(dim), + attrs = tiledb_attr("val", type = "BOOL")) +tiledb_array_create(uri, sch) +arr <- tiledb_array(uri) +qry <- tiledb_query(arr, "WRITE") +qry <- tiledb_query_set_buffer(qry, "val", v) +qry <- tiledb_query_submit(qry) +qry <- tiledb_query_finalize(qry) +expect_equal(tiledb_query_status(qry), "COMPLETE") + +arr2 <- tiledb_array(uri, return_as="data.frame") +res2 <- arr2[0:3] +print(res2) +attr(res2, "query_status") <- NULL +expect_equal(v, res2[,"val"])