From ce6a63268cab3fda2ef09c3ac25f6f6c7c7465df Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Mon, 3 May 2021 10:31:43 -0500 Subject: [PATCH 1/4] also set non-empty domains on dimension greater than two --- R/TileDBArray.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/TileDBArray.R b/R/TileDBArray.R index f0faa514d8..b96e9a5a8a 100644 --- a/R/TileDBArray.R +++ b/R/TileDBArray.R @@ -504,7 +504,12 @@ setMethod("[", "tiledb_array", qryptr <- libtiledb_query_add_range_with_type(qryptr, k-1, dimtypes[k], vec[1], vec[2]) } rangeunset <- FALSE + } else if (k > 2) { # cases 1 and 2 covered above in 'i' and 'j' case + #cat("Adding null dim", k, "\n") + vec <- .mapDatetime2integer64(nonemptydom[[k]], dimtypes[k]) + qryptr <- libtiledb_query_add_range_with_type(qryptr, k-1, dimtypes[k], vec[1], vec[2]) } + } ## retrieve est_result_size From 167463c2fe64fb78f76ffb73e2a829bd71fc1e48 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Mon, 3 May 2021 12:11:08 -0500 Subject: [PATCH 2/4] expand named sublist to full list for selected_ranges --- R/TileDBArray.R | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/R/TileDBArray.R b/R/TileDBArray.R index b96e9a5a8a..dbd4affae7 100644 --- a/R/TileDBArray.R +++ b/R/TileDBArray.R @@ -442,6 +442,18 @@ setMethod("[", "tiledb_array", ## ranges seem to interfere with the byte/element adjustment below so set up toggle rangeunset <- TRUE + ## expand a shorter-but-named selected_ranges list + if ( (length(x@selected_ranges) < length(dimnames)) + && (!is.null(names(x@selected_ranges))) ) { + fulllist <- vector(mode="list", length=length(dimnames)) + ind <- match(names(x@selected_ranges), dimnames) + if (any(is.na(ind))) stop("Name for selected ranges does not match dimension names.") + for (ii in seq_len(length(ind))) { + fulllist[[ ind[ii] ]] <- x@selected_ranges[[ii]] + } + x@selected_ranges <- fulllist + } + ## set default range(s) on first dimension if nothing is specified if (is.null(i) && (length(x@selected_ranges) == 0 || @@ -504,7 +516,7 @@ setMethod("[", "tiledb_array", qryptr <- libtiledb_query_add_range_with_type(qryptr, k-1, dimtypes[k], vec[1], vec[2]) } rangeunset <- FALSE - } else if (k > 2) { # cases 1 and 2 covered above in 'i' and 'j' case + } else { # cases 1 and 2 covered above in 'i' and 'j' case #cat("Adding null dim", k, "\n") vec <- .mapDatetime2integer64(nonemptydom[[k]], dimtypes[k]) qryptr <- libtiledb_query_add_range_with_type(qryptr, k-1, dimtypes[k], vec[1], vec[2]) From f7b0afe8c3d0f5aec0716556baeaf9379f0a7c22 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Mon, 3 May 2021 13:49:19 -0500 Subject: [PATCH 3/4] refine when null dim domain may be set --- R/TileDBArray.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/TileDBArray.R b/R/TileDBArray.R index dbd4affae7..880dfefa12 100644 --- a/R/TileDBArray.R +++ b/R/TileDBArray.R @@ -516,10 +516,11 @@ setMethod("[", "tiledb_array", qryptr <- libtiledb_query_add_range_with_type(qryptr, k-1, dimtypes[k], vec[1], vec[2]) } rangeunset <- FALSE - } else { # cases 1 and 2 covered above in 'i' and 'j' case + } else if (k > 2) { # cases 1 and 2 covered above in 'i' and 'j' case #cat("Adding null dim", k, "\n") vec <- .mapDatetime2integer64(nonemptydom[[k]], dimtypes[k]) qryptr <- libtiledb_query_add_range_with_type(qryptr, k-1, dimtypes[k], vec[1], vec[2]) + rangeunset <- FALSE } } From ae76bc12c41a85b126facbbc93f483af300daaf4 Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Tue, 4 May 2021 10:08:07 -0500 Subject: [PATCH 4/4] additional test file (using nycflights13 data package) --- inst/tinytest/test_dimsubset.R | 100 +++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 inst/tinytest/test_dimsubset.R diff --git a/inst/tinytest/test_dimsubset.R b/inst/tinytest/test_dimsubset.R new file mode 100644 index 0000000000..61afc5d2c5 --- /dev/null +++ b/inst/tinytest/test_dimsubset.R @@ -0,0 +1,100 @@ + +## this test file has an implicit dependency on package 'nycflights13' + +library(tinytest) +library(tiledb) + +isOldWindows <- Sys.info()[["sysname"]] == "Windows" && grepl('Windows Server 2008', osVersion) +if (isOldWindows) exit_file("skip this file on old Windows releases") + +if (!requireNamespace("nycflights13", quietly=TRUE)) exit_file("Needed 'nycflights13' package missing") + +ctx <- tiledb_ctx(limitTileDBCores()) + +if (tiledb_version(TRUE) < "2.0.0") exit_file("TileDB Array types required TileDB 2.0.* or greater") + +op <- options() +options(stringsAsFactors=FALSE) # accomodate R 3.* +dir.create(tmp <- tempfile()) + +library(nycflights13) + +dom <- tiledb_domain(dims = c(tiledb_dim("carrier", NULL, NULL, "ASCII"), + tiledb_dim("origin", NULL, NULL, "ASCII"), + tiledb_dim("dest", NULL, NULL, "ASCII"), + tiledb_dim("time_hour", + c(as.POSIXct("2012-01-01 00:00:00"), + as.POSIXct("2014-12-31 23:59:99")), 1000, "DATETIME_SEC"))) + +sch <- tiledb_array_schema(dom, + attrs <- c(tiledb_attr("year", type = "INT32"), + tiledb_attr("month", type = "INT32"), + tiledb_attr("day", type = "INT32"), + tiledb_attr("dep_time", type = "INT32", nullable = TRUE), + tiledb_attr("sched_dep_time", type = "INT32"), + tiledb_attr("dep_delay", type = "FLOAT64", nullable = TRUE), + tiledb_attr("arr_time", type = "INT32"), + tiledb_attr("sched_arr_time", type = "INT32"), + tiledb_attr("arr_delay", type = "FLOAT64", nullable = TRUE), + tiledb_attr("flight", type = "INT32", nullable = TRUE), + tiledb_attr("tailnum", type = "ASCII", ncells = NA, nullable = TRUE), + tiledb_attr("air_time", type = "FLOAT64", nullable = TRUE), + tiledb_attr("distance", type = "FLOAT64"), + tiledb_attr("hour", type = "FLOAT64"), + tiledb_attr("minute", type = "FLOAT64")), + sparse = TRUE, + allows_dups = TRUE) +res <- tiledb_array_create(tmp, sch) + +arr <- tiledb_array(res) +## we reorder the data.frame / tibble on the fly, and yes there are a number of ways to do this +arr[] <- list(carrier = flights$carrier, + origin = flights$origin, + dest = flights$dest, + time_hour = flights$time_hour, + year = flights$year, + month = flights$month, + day = flights$day, + dep_time = flights$dep_time, + sched_dep_time = flights$sched_dep_time, + dep_delay = flights$dep_delay, + arr_time = flights$arr_time, + sched_arr_time = flights$sched_arr_time, + arr_delay = flights$arr_delay, + flight = flights$flight, + tailnum = flights$tailnum, + air_time = flights$air_time, + distance = flights$distance, + hour = flights$hour, + minute = flights$minute) + +newarr <- tiledb_array(tmp, as.data.frame=TRUE) +dat <- newarr[] +expect_equal(nrow(dat), nrow(flights)) +## compare some columns, as we re-order comparing all trickers +expect_equal(dat$carrier, sort(as.character(flights$carrier))) +expect_equal(table(dat$origin), table(flights$origin)) + +selected_ranges(newarr) <- list(cbind("AA","AA"), + cbind("JFK","JFK"), + cbind("BOS", "BOS"), + NULL) +dat <- newarr[] +expect_equal(unique(dat$carrier), "AA") +expect_equal(unique(dat$origin), "JFK") +expect_equal(unique(dat$dest), "BOS") + +selected_ranges(newarr) <- list(dest = cbind("BOS", "BOS"), origin = cbind("LGA", "LGA")) +dat <- newarr[] +expect_equal(unique(dat$dest), "BOS") +expect_equal(unique(dat$origin), "LGA") + +selected_ranges(newarr) <- list(origin = cbind("JFK", "JFK"), carrier = cbind("AA", "AA")) +dat <- newarr[] +expect_equal(unique(dat$carrier), "AA") +expect_equal(unique(dat$origin), "JFK") + +selected_ranges(newarr) <- list(dest = cbind("BOS", "BOS"), origin = cbind("JFK", "LGA")) +dat <- newarr[] +expect_equal(unique(dat$origin), c("JFK", "LGA")) +expect_equal(unique(dat$dest), "BOS")