Skip to content

Commit

Permalink
Generalize query condition parsing to non-INT32 index columns (#614)
Browse files Browse the repository at this point in the history
* Generalize query condition parsing to non-INT32 index columns

* Add test for non-int32 columns and parsing query conditions

* Update NEWS [ci skip]
  • Loading branch information
eddelbuettel committed Nov 6, 2023
1 parent 0cdbed0 commit c0bb18a
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 2 deletions.
8 changes: 7 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
# Ongoing development
# ongoing development

* This release of the R package builds against [TileDB 2.17.4](https://github.com/TileDB-Inc/TileDB/releases/tag/2.17.4), and has also been tested against earlier releases as well as the development version (#611)

## Improvements

* Query conditioning parsing now supports `factor` index columns other than the standard `integer` type (#614)

## Documentation

Expand Down
2 changes: 1 addition & 1 deletion R/QueryCondition.R
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ parse_query_condition <- function(expr, ta=NULL, debug=FALSE, strict=TRUE, use_i
" [",ch, "] ", dtype, "\n", sep="")

## take care of factor (aka "enum" case) and set the data type to ASCII
if (dtype == "INT32" && is_enum) {
if (dtype %in% c("INT8", "INT16", "INT32", "INT64", "UINT8", "UINT16", "UINT32", "UINT64") && is_enum) {
if (debug) cat(" [factor column] ", ch, " ", attr, " ", dtype, " --> ASCII", " ", is_enum, "\n")
dtype <- "ASCII"
}
Expand Down
35 changes: 35 additions & 0 deletions inst/tinytest/test_arrayschemaevolution.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,38 @@ ase <- tiledb_array_schema_evolution_extend_enumeration(ase, arr, "an_enum", c("
tiledb_array_schema_evolution_array_evolve(ase, uri)
arr <- tiledb_array(uri, return_as="data.frame")[]
expect_equal(levels(arr[, "b"]), c("red", "green", "blue", "orange"))


## -- testing query condition on non int32 columns
run_int_col_test <- function(coltype) {
uri <- tempfile()
enums <- c("blue", "green", "red")
dom <- tiledb_domain(dims = tiledb_dim(name="dim", domain=c(0L,100L), tile=10L, type="INT32"))
attrs <- c(tiledb_attr(name="fct", type = coltype, enumeration=enums),
tiledb_attr(name="dbl", type = "FLOAT64"))
schema <- tiledb_array_schema(domain=dom, attrs=attrs, sparse=TRUE, enumerations=list(fct=enums))
tiledb_array_create(uri, schema)

set.seed(42)
df <- data.frame(dim = 1:10, fct = sample(1:length(enums), 10, replace=TRUE) - 1, dbl = rnorm(10))
arr <- tiledb_array(uri)
arr[] <- df

qc <-
res <- tiledb_array(uri, return_as="data.table", query_condition = parse_query_condition(fct == blue, arr))[]
expect_equal(nrow(res), 5)

res <- tiledb_array(uri, return_as="data.table", query_condition = parse_query_condition(fct == green, arr))[]
expect_equal(nrow(res), 3)

res <- tiledb_array(uri, return_as="data.table", query_condition = parse_query_condition(fct == red, arr))[]
expect_equal(nrow(res), 2)

res <- tiledb_array(uri, return_as="data.table", query_condition = parse_query_condition(fct != blue, arr))[]
expect_equal(nrow(res), 5)

expect_error(tiledb_array(uri, return_as="data.table", query_condition = parse_query_condition(fct > blue, arr))[])

unlink(uri)
}
sapply(c("INT8", "INT16", "INT32", "UINT8", "UINT16", "UINT32"), run_int_col_test)

0 comments on commit c0bb18a

Please sign in to comment.