diff --git a/NAMESPACE b/NAMESPACE index 111de2a472..988cf30602 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,12 +45,14 @@ export(filter_list) export(fromDataFrame) export(fromMatrix) export(fromSparseMatrix) +export(get_allocation_size_preference) export(get_return_as_preference) export(has_attribute) export(is.anonymous) export(is.integral) export(is.sparse) export(limitTileDBCores) +export(load_allocation_size_preference) export(load_return_as_preference) export(max_chunk_size) export(name) @@ -64,9 +66,11 @@ export(return.array) export(return.data.frame) export(return.matrix) export(return_as) +export(save_allocation_size_preference) export(save_return_as_preference) export(schema) export(selected_ranges) +export(set_allocation_size_preference) export(set_max_chunk_size) export(set_return_as_preference) export(tdb_collect) diff --git a/R/ArraySchema.R b/R/ArraySchema.R index a5f0045b8a..17e0e9e023 100644 --- a/R/ArraySchema.R +++ b/R/ArraySchema.R @@ -783,7 +783,7 @@ tiledb_schema_object <- function(array) { #' Describe a TileDB array schema via code to create it #' -#' @param array A TileDB Array object +#' @param arr A TileDB Array object #' @return Nothing is returned as the function is invoked for the side effect #' of printing the schema via a sequence of R instructions to re-create it. #' @export diff --git a/R/TileDBArray.R b/R/TileDBArray.R index 52ea3d44a5..05c9bd28cb 100644 --- a/R/TileDBArray.R +++ b/R/TileDBArray.R @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2017-2021 TileDB Inc. +# Copyright (c) 2017-2022 TileDB Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -517,6 +517,11 @@ setMethod("[", "tiledb_array", allnullable <- attrnullable } + ## A preference can be set in a local per-user configuration file; if no value + ## is set a fallback from the TileDB config object is used. Note that this memory + ## budget (currently, at least) applies only to character columns. + memory_budget <- get_allocation_size_preference() + if (length(enckey) > 0) { if (length(tstamp) > 0) { arrptr <- libtiledb_array_open_at_with_key(ctx@ptr, uri, "READ", enckey, tstamp) @@ -647,7 +652,6 @@ setMethod("[", "tiledb_array", ## get results (shmem variant) getResultShmem <- function(buf, name, varnum) { #, resrv, qryptr) { if (is.na(varnum)) { - ##vec <- libtiledb_query_result_buffer_elements_vec(qryptr, name) vec <- length_from_vlcbuf(buf) libtiledb_query_get_buffer_var_char(buf, vec[1], vec[2])[,1] } else { @@ -684,7 +688,7 @@ setMethod("[", "tiledb_array", getBuffer <- function(name, type, varnum, nullable, resrv, qryptr, arrptr) { if (is.na(varnum)) { if (type %in% c("CHAR", "ASCII", "UTF8")) { - buf <- libtiledb_query_buffer_var_char_alloc_direct(resrv, resrv*8, nullable) + buf <- libtiledb_query_buffer_var_char_alloc_direct(resrv, memory_budget, nullable) qryptr <- libtiledb_query_set_buffer_var_char(qryptr, name, buf) buf } else { diff --git a/R/Utils.R b/R/Utils.R index 94d5da9484..efd2fd3fec 100644 --- a/R/Utils.R +++ b/R/Utils.R @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2017-2021 TileDB Inc. +# Copyright (c) 2017-2022 TileDB Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -79,11 +79,86 @@ get_return_as_preference <- function() .pkgenv[["return_as"]] ##' @rdname save_return_as_preference ##' @export set_return_as_preference <- function(value = c("asis", "array", "matrix", "data.frame", - "data.table", "tibble")) { + "data.table", "tibble")) { value <- match.arg(value) .pkgenv[["return_as"]] <- value } + +##' Save (or load) allocation size default preference in an optional +##' config file +##' +##' When retrieving data from sparse arrays, allocation sizes cannot +##' be determined \emph{ex ante} as the degree of sparsity is unknown. +##' A configuration value can aide in providing an allocation size +##' value. These functions let the user store such a value for +##' retrieval by the package code. The preference will be enconded in +##' a configuration file as R (version 4.0.0 or later) allows a user- +##' and package specific configuration files. These helper functions +##' sets and retrieve the value, respectively, or retrieve the cached +##' value from the package environment where is it set at package +##' load. +##' +##' The value will be stored as a character value and reparsed so +##' \sQuote{1e6} and \sQuote{1000000} are equivalent, and the fixed +##' (but adjustable) number of digits for numerical precision +##' \emph{use for formatting} will impact the writing. This should +##' have no effect on standard allocation sizes. +##' +##' Note that this memory budget (currently, at least) applies only to +##' character columns. A fallback value of 50mb is used if no user +##' value is set. +##' +##' @note This function requires R version 4.0.0 or later to utilise the per-user +##' config directory accessor function. For older R versions, a fallback from the +##' TileDB configuration object is used. +##' @title Store allocation size preference +##' @param value A numeric value with the desired allocation size (in bytes). +##' @return For the setter, \code{TRUE} is returned invisibly but the function is invoked for the +##' side effect of storing the value. For the getters, the value as a numeric. +##' @export +save_allocation_size_preference <- function(value) { + stopifnot(`This function relies on R version 4.0.0 or later.` = R.version.string >= "4.0.0", + `The 'value' has to be numeric` = is.numeric(value)) + + cfgdir <- tools::R_user_dir(packageName()) + if (!dir.exists(cfgdir)) dir.create(cfgdir) + fname <- file.path(cfgdir, "config.dcf") + con <- file(fname, "w+") + cat("allocation_size:", value, "\n", file=con) + close(con) + set_allocation_size_preference(value) + invisible(TRUE) +} + +##' @rdname save_allocation_size_preference +##' @export +load_allocation_size_preference <- function() { + value <- 50 * 1024 * 1024 # fallback value is 50mb + cfgfile <- .defaultConfigFile() # but check config file + if (cfgfile != "" && file.exists(cfgfile)) { + cfg <- read.dcf(cfgfile) + if ("allocation_size" %in% colnames(cfg)) + value <- as.numeric(cfg[[1, "allocation_size"]]) + } + set_allocation_size_preference(value) + value +} + +##' @rdname save_allocation_size_preference +##' @export +get_allocation_size_preference <- function() .pkgenv[["allocation_size"]] + +##' @rdname save_allocation_size_preference +##' @export +set_allocation_size_preference <- function(value) { + stopifnot(`The 'value' has to be numeric` = is.numeric(value)) + .pkgenv[["allocation_size"]] <- value +} + + + + is.scalar <- function(x, typestr) { (typeof(x) == typestr) && is.atomic(x) && length(x) == 1L } diff --git a/R/zzz.R b/R/zzz.R index be005bd646..3352ed8748 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2017-2021 TileDB Inc. +# Copyright (c) 2017-2022 TileDB Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -49,6 +49,9 @@ ## set a preference for data.frame conversion for tiledb_array and [] access .pkgenv[["return_as"]] <- load_return_as_preference() + + ## set a preference for allocation size defaults + .pkgenv[["allocation_size"]] <- load_allocation_size_preference() } .onAttach <- function(libname, pkgName) { diff --git a/inst/tinytest/test_dimsubset.R b/inst/tinytest/test_dimsubset.R index 7155800f1e..49502fed57 100644 --- a/inst/tinytest/test_dimsubset.R +++ b/inst/tinytest/test_dimsubset.R @@ -17,6 +17,7 @@ dir.create(tmp <- tempfile()) library(nycflights13) +set_allocation_size_preference(1e8) dom <- tiledb_domain(dims = c(tiledb_dim("carrier", NULL, NULL, "ASCII"), tiledb_dim("origin", NULL, NULL, "ASCII"), tiledb_dim("dest", NULL, NULL, "ASCII"), diff --git a/man/describe.Rd b/man/describe.Rd index 4e5a01c2a0..6454b81a65 100644 --- a/man/describe.Rd +++ b/man/describe.Rd @@ -7,7 +7,7 @@ describe(arr) } \arguments{ -\item{array}{A TileDB Array object} +\item{arr}{A TileDB Array object} } \value{ Nothing is returned as the function is invoked for the side effect diff --git a/man/save_allocation_size_preference.Rd b/man/save_allocation_size_preference.Rd new file mode 100644 index 0000000000..9a8eaa5aaa --- /dev/null +++ b/man/save_allocation_size_preference.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Utils.R +\name{save_allocation_size_preference} +\alias{save_allocation_size_preference} +\alias{load_allocation_size_preference} +\alias{get_allocation_size_preference} +\alias{set_allocation_size_preference} +\title{Store allocation size preference} +\usage{ +save_allocation_size_preference(value) + +load_allocation_size_preference() + +get_allocation_size_preference() + +set_allocation_size_preference(value) +} +\arguments{ +\item{value}{A numeric value with the desired allocation size (in bytes).} +} +\value{ +For the setter, \code{TRUE} is returned invisibly but the function is invoked for the +side effect of storing the value. For the getters, the value as a numeric. +} +\description{ +Save (or load) allocation size default preference in an optional +config file +} +\details{ +When retrieving data from sparse arrays, allocation sizes cannot +be determined \emph{ex ante} as the degree of sparsity is unknown. +A configuration value can aide in providing an allocation size +value. These functions let the user store such a value for +retrieval by the package code. The preference will be enconded in +a configuration file as R (version 4.0.0 or later) allows a user- +and package specific configuration files. These helper functions +sets and retrieve the value, respectively, or retrieve the cached +value from the package environment where is it set at package +load. + +The value will be stored as a character value and reparsed so +\sQuote{1e6} and \sQuote{1000000} are equivalent, and the fixed +(but adjustable) number of digits for numerical precision +\emph{use for formatting} will impact the writing. This should +have no effect on standard allocation sizes. + +Note that this memory budget (currently, at least) applies only to +character columns. A fallback value of 50mb is used if no user +value is set. +} +\note{ +This function requires R version 4.0.0 or later to utilise the per-user +config directory accessor function. For older R versions, a fallback from the +TileDB configuration object is used. +}