Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 48 additions & 14 deletions R/TileDBArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,37 @@ setValidity("tiledb_array", function(object) {

})

## Internal helper function to map DATETIME_* data to the internal representation (where
## we mostly follow NumPy). An example is DATETIME_YEAR where the current year (2021) is
## encoded as the offset relative to the _year_ of the epoch, i.e. 51. When an R user submits
## a date type as a min or max value for a range, if would likely be as.Date("2021-01-01")
## which, being an R date, has an internal representation of _days_ since the epoch, i.e.
## as.numeric(as.Date("2021-01-01")) yields 18628.
##
## We also convert to integer64 because that is
.mapDatetime2integer64 <- function(val, dtype) {
## in case it is not a datetime type, or already an int64, return unchanged
if (!grepl("^DATETIME_", dtype) || inherits(val, "integer64"))
return(val)

val <- switch(dtype,
"DATETIME_YEAR" = as.numeric(strftime(val, "%Y")) - 1970,
"DATETIME_MONTH" = 12*(as.numeric(strftime(val, "%Y")) - 1970) + as.numeric(strftime(val, "%m")) - 1,
"DATETIME_WEEK" = as.numeric(val)/7,
"DATETIME_DAY" = as.numeric(val),
"DATETIME_HR" = as.numeric(val)/3600,
"DATETIME_MIN" = as.numeric(val)/60,
"DATETIME_SEC" = as.numeric(val),
"DATETIME_MS" = as.numeric(val) * 1e3,
"DATETIME_US" = as.numeric(val) * 1e6,
"DATETIME_NS" = as.numeric(val),
"DATETIME_PS" = as.numeric(val) * 1e3,
"DATETIME_FS" = as.numeric(val) * 1e6,
"DATETIME_AS" = as.numeric(val) * 1e9)
bit64::as.integer64(val)
}


#' Returns a TileDB array, allowing for specific subset ranges.
#'
#' Heterogenous domains are supported, including timestamps and characters.
Expand Down Expand Up @@ -417,19 +448,20 @@ setMethod("[", "tiledb_array",
(length(x@selected_ranges) >= 1 && is.null(x@selected_ranges[[1]])))) {
## domain values can currently be eg (0,0) rather than a flag, so check explicitly
#domdim <- domain(dimensions(dom)[[1]])
if (nonemptydom[[1]][1] != nonemptydom[[1]][2]) # || nonemptydom[[1]][1] > domdim[1])
qryptr <- libtiledb_query_add_range_with_type(qryptr, 0, dimtypes[1],
nonemptydom[[1]][1], nonemptydom[[1]][2])
if (nonemptydom[[1]][1] != nonemptydom[[1]][2]) { # || nonemptydom[[1]][1] > domdim[1])
vec <- .mapDatetime2integer64(nonemptydom[[1]], dimtypes[1])
qryptr <- libtiledb_query_add_range_with_type(qryptr, 0, dimtypes[1], vec[1], vec[2])
rangeunset <- FALSE
}
}
## if we have is, use it
## if we have it, use it
if (!is.null(i)) {
##if (!identical(eval(is[[1]]),list)) stop("The row argument must be a list.")
if (length(i) == 0) stop("No content to parse in row argument.")
for (ii in 1:length(i)) {
el <- i[[ii]]
qryptr <- libtiledb_query_add_range_with_type(qryptr, 0, dimtypes[1],
min(eval(el)), max(eval(el)))
vec <- .mapDatetime2integer64(c(min(eval(el)), max(eval(el))), dimtypes[1])
qryptr <- libtiledb_query_add_range_with_type(qryptr, 0, dimtypes[1], vec[1], vec[2])
}
rangeunset <- FALSE
}
Expand All @@ -442,9 +474,10 @@ setMethod("[", "tiledb_array",
## domain values can currently be eg (0,0) rather than a flag, so check explicitly
#domdim <- domain(dimensions(dom)[[2]])
if (nonemptydom[[2]][1] != nonemptydom[[2]][2]) # || nonemptydom[[2]][1] > domdim[1])
if (nonemptydom[[2]][1] != nonemptydom[[2]][2])
qryptr <- libtiledb_query_add_range_with_type(qryptr, 1, dimtypes[2],
nonemptydom[[2]][1], nonemptydom[[2]][2])
if (nonemptydom[[2]][1] != nonemptydom[[2]][2]) {
vec <- .mapDatetime2integer64(nonemptydom[[2]], dimtypes[2])
qryptr <- libtiledb_query_add_range_with_type(qryptr, 1, dimtypes[2], vec[1], vec[2])
}
rangeunset <- FALSE
}
}
Expand All @@ -455,18 +488,20 @@ setMethod("[", "tiledb_array",
if (length(j) == 0) stop("No content to parse in col argument.")
for (ii in 1:length(j)) {
el <- j[[ii]]
qryptr <- libtiledb_query_add_range_with_type(qryptr, 1, dimtypes[2],
min(eval(el)), max(eval(el)))
vec <- .mapDatetime2integer64(c(min(eval(el)), max(eval(el))), dimtypes[2])
qryptr <- libtiledb_query_add_range_with_type(qryptr, 1, dimtypes[2], vec[1], vec[2])
rangeunset <- FALSE
}
}

## if ranges selected, use those
for (k in seq_len(length(x@selected_ranges))) {
if (!is.null(x@selected_ranges[[k]])) {
#cat("Adding non-zero dim", k, "\n")
m <- x@selected_ranges[[k]]
for (i in seq_len(nrow(m))) {
qryptr <- libtiledb_query_add_range_with_type(qryptr, k-1, dimtypes[k], m[i,1], m[i,2])
vec <- .mapDatetime2integer64(c(m[i,1], m[i,2]), dimtypes[k])
qryptr <- libtiledb_query_add_range_with_type(qryptr, k-1, dimtypes[k], vec[1], vec[2])
}
rangeunset <- FALSE
}
Expand Down Expand Up @@ -515,7 +550,6 @@ setMethod("[", "tiledb_array",
ressizes <- mapply(getEstimatedSize, allnames, allvarnum, allnullable, alltypes,
MoreArgs=list(qryptr=qryptr), SIMPLIFY=TRUE)
resrv <- max(1, ressizes) # ensure >0 for correct handling of zero-length outputs

## allocate and set buffers
getBuffer <- function(name, type, varnum, nullable, resrv, qryptr, arrptr) {
if (is.na(varnum)) {
Expand Down Expand Up @@ -803,7 +837,7 @@ setMethod("[<-", "tiledb_array",
}
} else {
nr <- NROW(value[[i]])
#cat("Alloc buf", i, " ", colnam, ":", alltypes[i], "nr:", nr, "null:", allnullable[i], "\n")
#cat("Alloc buf", i, " ", colnam, ":", alltypes[i], "nr:", nr, "null:", allnullable[i], "asint64:", asint64, "\n")
buflist[[i]] <- libtiledb_query_buffer_alloc_ptr(arrptr, alltypes[i], nr, allnullable[i])
buflist[[i]] <- libtiledb_query_buffer_assign_ptr(buflist[[i]], alltypes[i], value[[i]], asint64)
qryptr <- libtiledb_query_set_buffer_ptr(qryptr, colnam, buflist[[i]])
Expand Down
81 changes: 61 additions & 20 deletions inst/tinytest/test_dim.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ dimtypes <- c("ASCII", # Variable length string
"DATETIME_FS", # femtosecond
"DATETIME_AS" # attosecond
)

for (dtype in dimtypes) {
if (tiledb_vfs_is_dir(uri)) {
tiledb_vfs_remove_dir(uri)
Expand Down Expand Up @@ -168,41 +167,40 @@ for (dtype in dimtypes) {

arr <- tiledb_array(uri, as.data.frame=TRUE)
dvec <- switch(dtype,
"ASCII" = LETTERS[1:3],
"ASCII" = LETTERS[1:5],
"INT8" =,
"UINT8" =,
"INT16" =,
"UINT16" =,
"UINT32" =,
"INT32" = 1:3, # sequences are integers
"INT32" = 1:5, # sequences are integers
"INT64" =,
"UINT64" = as.integer64(1:3),
"UINT64" = as.integer64(1:5),
"FLOAT32" =,
"FLOAT64" = as.numeric(1:3),
"DATETIME_YEAR" = c(as.Date("2020-01-01"), as.Date("2021-01-01"), as.Date("2022-01-01")),
"DATETIME_MONTH" = c(as.Date("2020-01-01"), as.Date("2020-02-01"), as.Date("2020-03-01")),

"DATETIME_WEEK" = c(as.Date("2020-01-01"), as.Date("2020-01-08"), as.Date("2020-01-15")),
"DATETIME_DAY" = as.Date("2020-01-01") + 0:2,
"DATETIME_HR" = as.POSIXct("2020-01-01 00:00:00") + (0:2)*3600,
"DATETIME_MIN" = as.POSIXct("2020-01-01 00:00:00") + (0:2)*3600,
"DATETIME_SEC" = as.POSIXct("2020-01-01 00:00:00") + (0:2)*3600,
"DATETIME_MS" = as.POSIXct("2000-01-01 00:00:00") + (0:2)*3600 + rep(0.001,3),
## POSIXct can do a bit less than 1 microsec so we set it to 2 on purpose
"DATETIME_US" = as.POSIXct("2000-01-01 00:00:00") + (0:2)*3600 + rep(0.000002,3),
"FLOAT64" = as.numeric(1:5),
"DATETIME_YEAR" = c(as.Date("2020-01-01"), as.Date("2021-01-01"), as.Date("2022-01-01"), as.Date("2023-01-01"), as.Date("2024-01-01")),
"DATETIME_MONTH" = c(as.Date("2020-01-01"), as.Date("2020-02-01"), as.Date("2020-03-01"), as.Date("2020-04-01"), as.Date("2020-05-01")),
"DATETIME_WEEK" = c(as.Date("2020-01-01"), as.Date("2020-01-08"), as.Date("2020-01-15"), as.Date("2020-01-22"), as.Date("2020-01-29")),
"DATETIME_DAY" = as.Date("2020-01-01") + 0:4,
"DATETIME_HR" = as.POSIXct("2020-01-01 00:00:00") + (0:4)*3600,
"DATETIME_MIN" = as.POSIXct("2020-01-01 00:00:00") + (0:4)*3600,
"DATETIME_SEC" = as.POSIXct("2020-01-01 00:00:00") + (0:4)*3600,
"DATETIME_MS" = as.POSIXct("2000-01-01 00:00:00") + (0:4)*3600 + rep(0.001,5),
## POSIXct can do a bit less than 1 microsec so we drop one level
"DATETIME_US" = as.POSIXct("2000-01-01 00:00:00") + (0:4)*3600 + rep(0.00001,5),
"DATETIME_NS" =,
"DATETIME_PS" =,
"DATETIME_FS" =,
"DATETIME_AS" = as.nanotime("1970-01-01T00:00:00.000000001+00:00") + (0:2)*1e9
"DATETIME_AS" = as.nanotime("1970-01-01T00:00:00.000000001+00:00") + (0:4)*1e9
)
avec <- 10^(1:3)
avec <- 10^(1:5)
data <- data.frame(row = dvec, attr = avec)
arr[] <- data

arr2 <- tiledb_array(uri, as.data.frame=TRUE)
readdata <- arr2[]
expect_equal(data[,-1], readdata[,-1])

if (dtype == "UINT64") readdata[,1] <- as.integer64(readdata[,1]) # return doubles here
expect_equal(data, readdata)
if (grepl("^DATETIME", dtype)) {
## check for default date(time) type
expect_false(class(readdata) == "integer64")
Expand All @@ -214,4 +212,47 @@ for (dtype in dimtypes) {
expect_true(class(arr2[][,"row"]) == "integer64")
}

## subset tests
arr3 <- tiledb_array(uri, as.data.frame=TRUE)
if (dtype %in% c("DATETIME_YEAR", "DATETIME_MONTH", "DATETIME_WEEK", "DATETIME_DAY")) {
scaleDate <- function(val, dtype) {
val <- switch(dtype,
"DATETIME_YEAR" = as.numeric(strftime(val, "%Y")) - 1970,
"DATETIME_MONTH" = 12*(as.numeric(strftime(val, "%Y")) - 1970) + as.numeric(strftime(val, "%m")) - 1,
"DATETIME_WEEK" = as.numeric(val)/7,
"DATETIME_DAY" = as.numeric(val))
}
selected_ranges(arr3) <- list(cbind(as.integer64(scaleDate(data[2, "row"], dtype)),
as.integer64(scaleDate(data[4, "row"], dtype))))
} else if (dtype %in% c("DATETIME_HR", "DATETIME_MIN", "DATETIME_SEC",
"DATETIME_MS", "DATETIME_US")) {
scaleDatetime <- function(val, dtype) {
val <- switch(dtype,
"DATETIME_HR" = as.numeric(val)/3600,
"DATETIME_MIN" = as.numeric(val)/60,
"DATETIME_SEC" = as.numeric(val),
"DATETIME_MS" = as.numeric(val) * 1e3,
"DATETIME_US" = as.numeric(val) * 1e6
)
}
selected_ranges(arr3) <- list(cbind(as.integer64(scaleDatetime(data[2, "row"], dtype)),
as.integer64(scaleDatetime(data[4, "row"], dtype))))
} else if (dtype %in% c("DATETIME_NS", "DATETIME_PS", "DATETIME_FS", "DATETIME_AS")) {
scaleDatetime <- function(val, dtype) {
val <- switch(dtype,
"DATETIME_NS" = as.integer64(val),
"DATETIME_PS" = as.integer64(val) * 1e3,
"DATETIME_FS" = as.integer64(val) * 1e6,
"DATETIME_AS" = as.integer64(val) * 1e9
)
}
selected_ranges(arr3) <- list(cbind(as.integer64(scaleDatetime(data[2, "row"], dtype)),
as.integer64(scaleDatetime(data[4, "row"], dtype))))
} else {
selected_ranges(arr3) <- list(cbind(data[2, "row"], data[4, "row"]))
}
readdata <- arr3[]
if (dtype == "UINT64") readdata[,1] <- as.integer64(readdata[,1]) # return doubles here
expect_equivalent(data[2:4,], readdata, info=dtype) # equivalent as not type consistent (int <-> numeric)
expect_equal(NROW(readdata), 3L)
}
7 changes: 4 additions & 3 deletions inst/tinytest/test_tiledbarray.R
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,10 @@ tiledb_array_create(tmp, sch)

x <- tiledb_array(uri = tmp, as.data.frame=TRUE)
df <- data.frame(d1=integer(0), d2=integer(0), val=numeric(0))
## FIXME: cannot currently write zero-length data.frame x[] <- df
val <- x[]
expect_equal(nrow(val), 0L)
## cannot currently write (corner-case) zero-length data.frame via <-
#x[] <- df
#val <- x[]
#expect_equal(nrow(val), 0L)

x[] <- data.frame(d1=1, d2=1, val=1)
selected_ranges(x) <- list(cbind(2,2), cbind(2,2))
Expand Down
29 changes: 20 additions & 9 deletions src/libtiledb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2980,7 +2980,7 @@ XPtr<tiledb::Query> libtiledb_query_add_range_with_type(XPtr<tiledb::Query> quer
if (strides == R_NilValue) {
query->add_range(uidx, start, end);
} else {
uint64_t stride = static_cast<uint64_t>(makeScalarInteger64(as<double>(strides)));
uint64_t stride = makeScalarInteger64(as<double>(strides));
query->add_range(uidx, start, end, stride);
}
} else if (typestr == "UINT32") {
Expand Down Expand Up @@ -3028,15 +3028,26 @@ XPtr<tiledb::Query> libtiledb_query_add_range_with_type(XPtr<tiledb::Query> quer
uint8_t stride = as<uint16_t>(strides);
query->add_range(uidx, start, end, stride);
}
} else if (typestr == "DATETIME_YEAR" ||
} else if (typestr == "DATETIME_YEAR" ||
typestr == "DATETIME_MONTH" ||
typestr == "DATETIME_WEEK" ||
typestr == "DATETIME_DAY" ||
typestr == "DATETIME_HR" ||
typestr == "DATETIME_MIN" ||
typestr == "DATETIME_SEC" ||
typestr == "DATETIME_MS" ||
typestr == "DATETIME_US" ||
typestr == "DATETIME_WEEK" ||
typestr == "DATETIME_DAY" ||
typestr == "DATETIME_HR" ||
typestr == "DATETIME_MIN" ||
typestr == "DATETIME_SEC" ||
typestr == "DATETIME_MS" ||
typestr == "DATETIME_US" ) {
//int64_t start = date_to_int64(as<Date>(starts), _string_to_tiledb_datatype(typestr));
int64_t start = makeScalarInteger64(as<double>(starts));
//int64_t end = date_to_int64(as<Date>(ends), _string_to_tiledb_datatype(typestr));
int64_t end = makeScalarInteger64(as<double>(ends));
if (strides == R_NilValue) {
query->add_range(uidx, start, end);
} else {
int64_t stride = as<int64_t>(strides);
query->add_range(uidx, start, end, stride);
}
} else if (
typestr == "DATETIME_NS" ||
typestr == "DATETIME_FS" ||
typestr == "DATETIME_PS" ||
Expand Down