From dd3f3d35821ac5b67b14d9cbe850491b83811a16 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Tue, 20 Jul 2021 16:03:31 -0500 Subject: [PATCH 1/5] add start and end timestamp to consolidate and vaccum helpers --- R/TileDBArray.R | 72 ++++++++++++++++++++++++++++++++-------- man/array_consolidate.Rd | 19 ++++++++--- man/array_vacuum.Rd | 16 ++++++--- 3 files changed, 85 insertions(+), 22 deletions(-) diff --git a/R/TileDBArray.R b/R/TileDBArray.R index dafb94db62..236c811666 100644 --- a/R/TileDBArray.R +++ b/R/TileDBArray.R @@ -1180,33 +1180,77 @@ setReplaceMethod("datetimes_as_int64", #' Consolidate fragments of a TileDB Array #' -#' This function invokes a consolidation operation. Parameters can be set via -#' an option configuration object. +#' This function invokes a consolidation operation. Parameters affecting the operation +#' can be set via an optional configuration object. Start and end timestamps can also be +#' set directly. #' @param uri A character value with the URI of a TileDB Array +#' @param start_time An optional timestamp value, if missing config default is used +#' @param end_time An optional timestamp value, if missing config default is used #' @param cfg An optional TileDB Configuration object #' @param ctx An option TileDB Context object -#' @return \code{NULL} is returned invisibly +#' @return The result of the underlying library call is returned. #' @export -array_consolidate <- function(uri, cfg = NULL, ctx = tiledb_get_context()) { - libtiledb_array_consolidate(ctx = ctx@ptr, uri = uri, - # C++ code has Nullable and can instantiate but needs S4 XPtr - cfgptr = if (is.null(cfg)) cfg else cfg@ptr) +array_consolidate <- function(uri, cfg = NULL, + start_time, end_time, + ctx = tiledb_get_context()) { + if (is.null(cfg)) { + cfg <- tiledb_config() + } + + if (!missing(start_time)) { + stopifnot(`start_time must be datetime object` = inherits(start_time, "POSIXt"), + `the 'bit64' package is required` = requireNamespace("bit64", quietly=TRUE)) + cfg["sm.consolidation.timestamp_start"] = as.character(bit64::as.integer64(nanotime::as.nanotime(start_time))) + } + + if (!missing(end_time)) { + stopifnot(`end_time must be datetime object` = inherits(end_time, "POSIXt")) + cfg["sm.consolidation.timestamp_end"] = as.character(bit64::as.integer64(nanotime::as.nanotime(end_time))) + } + + ctx <- tiledb_ctx(cfg) + + libtiledb_array_consolidate(ctx = ctx@ptr, uri = uri, cfgptr = cfg@ptr) } -#' After consolidation, remove consilidated fragments of a TileDB Array +#' After consolidation, remove consolidated fragments of a TileDB Array #' #' This function can remove fragments following a consolidation step. Note that vacuuming #' should \emph{not} be run if one intends to use the TileDB \emph{time-traveling} feature -#' of opening arrays at particular timestamps +#' of opening arrays at particular timestamps. +#' +#' Parameters affecting the operation can be set via an optional configuration object. +#' Start and end timestamps can also be set directly. +#' #' @param uri A character value with the URI of a TileDB Array +#' @param start_time An optional timestamp value, if missing config default is used +#' @param end_time An optional timestamp value, if missing config default is used #' @param cfg An optional TileDB Configuration object #' @param ctx An option TileDB Context object -#' @return \code{NULL} is returned invisibly +#' @return The result of the underlying library call is returned. #' @export -array_vacuum <- function(uri, cfg = NULL, ctx = tiledb_get_context()) { - libtiledb_array_vacuum(ctx = ctx@ptr, uri = uri, - # C++ code has Nullable and can instantiate but needs S4 XPtr - cfgptr = if (is.null(cfg)) cfg else cfg@ptr) +array_vacuum <- function(uri, cfg = NULL, + start_time, end_time, + ctx = tiledb_get_context()) { + + if (is.null(cfg)) { + cfg <- tiledb_config() + } + + if (!missing(start_time)) { + stopifnot(`start_time must be datetime object` = inherits(start_time, "POSIXt"), + `the 'bit64' package is required` = requireNamespace("bit64", quietly=TRUE)) + cfg["sm.consolidation.timestamp_start"] = as.character(bit64::as.integer64(nanotime::as.nanotime(start_time))) + } + + if (!missing(end_time)) { + stopifnot(`end_time must be datetime object` = inherits(end_time, "POSIXt")) + cfg["sm.consolidation.timestamp_end"] = as.character(bit64::as.integer64(nanotime::as.nanotime(end_time))) + } + + ctx <- tiledb_ctx(cfg) + + libtiledb_array_vacuum(ctx = ctx@ptr, uri = uri, cfgptr = cfg@ptr) } #' Get the non-empty domain from a TileDB Array by index diff --git a/man/array_consolidate.Rd b/man/array_consolidate.Rd index 5dd6ae8976..26b56eb315 100644 --- a/man/array_consolidate.Rd +++ b/man/array_consolidate.Rd @@ -4,19 +4,30 @@ \alias{array_consolidate} \title{Consolidate fragments of a TileDB Array} \usage{ -array_consolidate(uri, cfg = NULL, ctx = tiledb_get_context()) +array_consolidate( + uri, + cfg = NULL, + start_time, + end_time, + ctx = tiledb_get_context() +) } \arguments{ \item{uri}{A character value with the URI of a TileDB Array} \item{cfg}{An optional TileDB Configuration object} +\item{start_time}{An optional timestamp value, if missing config default is used} + +\item{end_time}{An optional timestamp value, if missing config default is used} + \item{ctx}{An option TileDB Context object} } \value{ -\code{NULL} is returned invisibly +The result of the underlying library call is returned. } \description{ -This function invokes a consolidation operation. Parameters can be set via -an option configuration object. +This function invokes a consolidation operation. Parameters affecting the operation +can be set via an optional configuration object. Start and end timestamps can also be +set directly. } diff --git a/man/array_vacuum.Rd b/man/array_vacuum.Rd index 4803dd0fef..a4c8a83591 100644 --- a/man/array_vacuum.Rd +++ b/man/array_vacuum.Rd @@ -2,22 +2,30 @@ % Please edit documentation in R/TileDBArray.R \name{array_vacuum} \alias{array_vacuum} -\title{After consolidation, remove consilidated fragments of a TileDB Array} +\title{After consolidation, remove consolidated fragments of a TileDB Array} \usage{ -array_vacuum(uri, cfg = NULL, ctx = tiledb_get_context()) +array_vacuum(uri, cfg = NULL, start_time, end_time, ctx = tiledb_get_context()) } \arguments{ \item{uri}{A character value with the URI of a TileDB Array} \item{cfg}{An optional TileDB Configuration object} +\item{start_time}{An optional timestamp value, if missing config default is used} + +\item{end_time}{An optional timestamp value, if missing config default is used} + \item{ctx}{An option TileDB Context object} } \value{ -\code{NULL} is returned invisibly +The result of the underlying library call is returned. } \description{ This function can remove fragments following a consolidation step. Note that vacuuming should \emph{not} be run if one intends to use the TileDB \emph{time-traveling} feature -of opening arrays at particular timestamps +of opening arrays at particular timestamps. +} +\details{ +Parameters affecting the operation can be set via an optional configuration object. +Start and end timestamps can also be set directly. } From 3a451c0f07f0cc42d873726d441baf35f865f428 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Tue, 20 Jul 2021 17:45:24 -0500 Subject: [PATCH 2/5] simple tests for consolidate and vaccum --- R/TileDBArray.R | 4 ++-- inst/tinytest/test_tiledbarray.R | 13 +++++++++++++ man/array_consolidate.Rd | 2 +- man/array_vacuum.Rd | 2 +- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/R/TileDBArray.R b/R/TileDBArray.R index 236c811666..0b1d0a547a 100644 --- a/R/TileDBArray.R +++ b/R/TileDBArray.R @@ -1188,7 +1188,7 @@ setReplaceMethod("datetimes_as_int64", #' @param end_time An optional timestamp value, if missing config default is used #' @param cfg An optional TileDB Configuration object #' @param ctx An option TileDB Context object -#' @return The result of the underlying library call is returned. +#' @return NULL is returned invisibly #' @export array_consolidate <- function(uri, cfg = NULL, start_time, end_time, @@ -1227,7 +1227,7 @@ array_consolidate <- function(uri, cfg = NULL, #' @param end_time An optional timestamp value, if missing config default is used #' @param cfg An optional TileDB Configuration object #' @param ctx An option TileDB Context object -#' @return The result of the underlying library call is returned. +#' @return NULL is returned invisibly #' @export array_vacuum <- function(uri, cfg = NULL, start_time, end_time, diff --git a/inst/tinytest/test_tiledbarray.R b/inst/tinytest/test_tiledbarray.R index d9e469d86a..bc5bb261d9 100644 --- a/inst/tinytest/test_tiledbarray.R +++ b/inst/tinytest/test_tiledbarray.R @@ -1216,3 +1216,16 @@ chk <- tiledb_array(uri = uri, as.data.frame=TRUE) res <- chk[] expect_equal(dim(res), c(100,6)) expect_equal(colnames(res), c("rows", "cols", "time", "a", "b", "c")) + +## consolidate +expect_equal(array_consolidate(uri), NULL) +expect_error(array_consolidate(uri, start_time="abc")) # not a datetime +expect_error(array_consolidate(uri, end_time="def")) # not a datetime +now <- Sys.time() +expect_equal(array_consolidate(uri, start_time=now-60, end_time=now), NULL) + +## vaccum +expect_equal(array_vacuum(uri), NULL) +expect_error(array_vacuum(uri, start_time="abc")) # not a datetime +expect_error(array_vacuum(uri, end_time="def")) # not a datetime +expect_equal(array_vacuum(uri, start_time=now-60, end_time=now), NULL) diff --git a/man/array_consolidate.Rd b/man/array_consolidate.Rd index 26b56eb315..b64638e12e 100644 --- a/man/array_consolidate.Rd +++ b/man/array_consolidate.Rd @@ -24,7 +24,7 @@ array_consolidate( \item{ctx}{An option TileDB Context object} } \value{ -The result of the underlying library call is returned. +NULL is returned invisibly } \description{ This function invokes a consolidation operation. Parameters affecting the operation diff --git a/man/array_vacuum.Rd b/man/array_vacuum.Rd index a4c8a83591..5ea47bde2d 100644 --- a/man/array_vacuum.Rd +++ b/man/array_vacuum.Rd @@ -18,7 +18,7 @@ array_vacuum(uri, cfg = NULL, start_time, end_time, ctx = tiledb_get_context()) \item{ctx}{An option TileDB Context object} } \value{ -The result of the underlying library call is returned. +NULL is returned invisibly } \description{ This function can remove fragments following a consolidation step. Note that vacuuming From 3f37b40e6b2dd085d893902a2ad2886f1b8a41f8 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 21 Jul 2021 06:05:21 -0500 Subject: [PATCH 3/5] test for TileDB 2.3.0 or later (and bit64 ensured via nanotime) --- R/TileDBArray.R | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/R/TileDBArray.R b/R/TileDBArray.R index 0b1d0a547a..22a8be3fbd 100644 --- a/R/TileDBArray.R +++ b/R/TileDBArray.R @@ -1199,13 +1199,16 @@ array_consolidate <- function(uri, cfg = NULL, if (!missing(start_time)) { stopifnot(`start_time must be datetime object` = inherits(start_time, "POSIXt"), - `the 'bit64' package is required` = requireNamespace("bit64", quietly=TRUE)) - cfg["sm.consolidation.timestamp_start"] = as.character(bit64::as.integer64(nanotime::as.nanotime(start_time))) + `TileDB 2.3.0 or later is required` = tiledb_version(TRUE) >= "2.3.0") + start_time_int64 <- bit64::as.integer64(nanotime::as.nanotime(start_time)) + cfg["sm.consolidation.timestamp_start"] = as.character(start_time_int64) } if (!missing(end_time)) { - stopifnot(`end_time must be datetime object` = inherits(end_time, "POSIXt")) - cfg["sm.consolidation.timestamp_end"] = as.character(bit64::as.integer64(nanotime::as.nanotime(end_time))) + stopifnot(`end_time must be datetime object` = inherits(end_time, "POSIXt"), + `TileDB 2.3.0 or later is required` = tiledb_version(TRUE) >= "2.3.0") + end_time_int64 <- bit64::as.integer64(nanotime::as.nanotime(end_time)) + cfg["sm.consolidation.timestamp_end"] = as.character(end_time_int64) } ctx <- tiledb_ctx(cfg) @@ -1239,13 +1242,16 @@ array_vacuum <- function(uri, cfg = NULL, if (!missing(start_time)) { stopifnot(`start_time must be datetime object` = inherits(start_time, "POSIXt"), - `the 'bit64' package is required` = requireNamespace("bit64", quietly=TRUE)) - cfg["sm.consolidation.timestamp_start"] = as.character(bit64::as.integer64(nanotime::as.nanotime(start_time))) + `TileDB 2.3.0 or later is required` = tiledb_version(TRUE) >= "2.3.0") + start_time_int64 <- bit64::as.integer64(nanotime::as.nanotime(start_time)) + cfg["sm.consolidation.timestamp_start"] = as.character(start_time_int64) } if (!missing(end_time)) { - stopifnot(`end_time must be datetime object` = inherits(end_time, "POSIXt")) - cfg["sm.consolidation.timestamp_end"] = as.character(bit64::as.integer64(nanotime::as.nanotime(end_time))) + stopifnot(`end_time must be datetime object` = inherits(end_time, "POSIXt"), + `TileDB 2.3.0 or later is required` = tiledb_version(TRUE) >= "2.3.0") + end_time_int64 <- bit64::as.integer64(nanotime::as.nanotime(end_time)) + cfg["sm.consolidation.timestamp_end"] = as.character(end_time_int64) } ctx <- tiledb_ctx(cfg) From 476cdfd5e2e4311d3ab8f0e5617d1035f4abc87a Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 21 Jul 2021 06:08:04 -0500 Subject: [PATCH 4/5] remove check for bit64 guaranteed to be present via nanotime --- R/VFS.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/VFS.R b/R/VFS.R index 37d914fa40..17cb9b4cee 100644 --- a/R/VFS.R +++ b/R/VFS.R @@ -443,7 +443,6 @@ tiledb_vfs_write <- function(fh, vec, ctx = tiledb_get_context()) { #' @return The binary file content is returned as an integer vector. #' @export tiledb_vfs_read <- function(fh, offset, nbytes, ctx = tiledb_get_context()) { - if (!requireNamespace("bit64", quietly=TRUE)) stop("The 'bit64' package is needed.") if (missing(offset)) offset <- bit64::as.integer64(0) if (missing(nbytes)) stop("nbytes currently a required parameter") stopifnot(fh_argument=is(fh, "externalptr"), From 1aa48eb7e2c146e2d7d12c8f6a94e990bdfda5fd Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Wed, 21 Jul 2021 07:27:27 -0500 Subject: [PATCH 5/5] timestamps as milliseconds since epoch --- R/TileDBArray.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/TileDBArray.R b/R/TileDBArray.R index 22a8be3fbd..2a1b6e67f2 100644 --- a/R/TileDBArray.R +++ b/R/TileDBArray.R @@ -1200,14 +1200,14 @@ array_consolidate <- function(uri, cfg = NULL, if (!missing(start_time)) { stopifnot(`start_time must be datetime object` = inherits(start_time, "POSIXt"), `TileDB 2.3.0 or later is required` = tiledb_version(TRUE) >= "2.3.0") - start_time_int64 <- bit64::as.integer64(nanotime::as.nanotime(start_time)) + start_time_int64 <- bit64::as.integer64(as.numeric(start_time) * 1000) cfg["sm.consolidation.timestamp_start"] = as.character(start_time_int64) } if (!missing(end_time)) { stopifnot(`end_time must be datetime object` = inherits(end_time, "POSIXt"), `TileDB 2.3.0 or later is required` = tiledb_version(TRUE) >= "2.3.0") - end_time_int64 <- bit64::as.integer64(nanotime::as.nanotime(end_time)) + end_time_int64 <- bit64::as.integer64(as.numeric(end_time) * 1000) cfg["sm.consolidation.timestamp_end"] = as.character(end_time_int64) } @@ -1243,14 +1243,14 @@ array_vacuum <- function(uri, cfg = NULL, if (!missing(start_time)) { stopifnot(`start_time must be datetime object` = inherits(start_time, "POSIXt"), `TileDB 2.3.0 or later is required` = tiledb_version(TRUE) >= "2.3.0") - start_time_int64 <- bit64::as.integer64(nanotime::as.nanotime(start_time)) + start_time_int64 <- bit64::as.integer64(as.numeric(start_time) * 1000) cfg["sm.consolidation.timestamp_start"] = as.character(start_time_int64) } if (!missing(end_time)) { stopifnot(`end_time must be datetime object` = inherits(end_time, "POSIXt"), `TileDB 2.3.0 or later is required` = tiledb_version(TRUE) >= "2.3.0") - end_time_int64 <- bit64::as.integer64(nanotime::as.nanotime(end_time)) + end_time_int64 <- bit64::as.integer64(as.numeric(end_time) * 1000) cfg["sm.consolidation.timestamp_end"] = as.character(end_time_int64) }