diff --git a/R/DataFrame.R b/R/DataFrame.R index c5780868f6..a062659c2c 100644 --- a/R/DataFrame.R +++ b/R/DataFrame.R @@ -177,7 +177,7 @@ fromDataFrame <- function(obj, uri, col_index=NULL, sparse=FALSE, allows_dups=sp else if (cl == "numeric") tp <- "FLOAT64" else if (cl == "character") - tp <- "CHAR" + tp <- "ASCII" else if (cl == "Date") tp <- "DATETIME_DAY" else if (cl == "POSIXct" || cl == "POSIXlt") @@ -193,14 +193,13 @@ fromDataFrame <- function(obj, uri, col_index=NULL, sparse=FALSE, allows_dups=sp } tiledb_attr(colnames(obj)[ind], type = tp, - ncells = ifelse(tp=="CHAR",NA_integer_,1), + ncells = ifelse(tp %in% c("CHAR","ASCII"), NA_integer_, 1), filter_list = filterlist, nullable = any(is.na(col))) } cols <- seq_len(dims[2]) if (!is.null(col_index)) cols <- cols[-col_index] attributes <- sapply(cols, makeAttr) - schema <- tiledb_array_schema(dom, attrs = attributes, cell_order = cell_order, tile_order = tile_order, sparse=sparse, capacity=capacity) diff --git a/R/QueryCondition.R b/R/QueryCondition.R index ec15a0695f..b4e9847849 100644 --- a/R/QueryCondition.R +++ b/R/QueryCondition.R @@ -107,7 +107,14 @@ tiledb_query_condition_combine <- function(lhs, rhs, op) { parse_query_condition <- function(expr, debug=FALSE) { .isComparisonOperator <- function(x) as.character(x) %in% c(">", ">=", "<", "<=", "==", "!=") .isBooleanOperator <- function(x) as.character(x) %in% c("&&", "||", "!") + .isAscii <- function(x) grepl("^[a-zA-Z_]*$", x) .isInteger <- function(x) as.character(as.integer(x)) == x + .isDouble <- function(x) as.character(as.numeric(x)) == x && grepl("[\\.]", as.character(x)) + .getType <- function(x) { + if (.isAscii(as.character(x))) "ASCII" + else if (.isDouble(as.character(x))) "FLOAT64" + else "INT32" + } .mapOpToCharacter <- function(x) switch(x, `>` = "GT", `>=` = "GE", @@ -135,12 +142,12 @@ parse_query_condition <- function(expr, debug=FALSE) { } else if (.isComparisonOperator(x[1])) { if (debug) cat(" [",as.character(x[2]),"] ", as.character(x[1]), " (aka ", .mapOpToCharacter(as.character(x[1])), ")", - " [",as.character(x[3]), "]", - if (.isInteger(as.character(x[3]))) " int" else " float", - "\n", sep="") + " [",as.character(x[3]), "] ", .getType(x[3]), "\n", sep="") + ch <- as.character(x[3]) + dtype <- .getType(ch) tiledb_query_condition_init(attr = as.character(x[2]), # still need to check again schema - value = as.numeric(as.character(x[3])), - dtype = if (.isInteger(as.character(x[3]))) "INT32" else "FLOAT64", + value = if (dtype == "ASCII") ch else as.numeric(ch), + dtype = dtype, op = .mapOpToCharacter(as.character(x[1]))) } else { stop("Unexpected token in expression: ", format(x)) diff --git a/inst/tinytest/test_querycondition.R b/inst/tinytest/test_querycondition.R index 374120eaa7..9624b88af1 100644 --- a/inst/tinytest/test_querycondition.R +++ b/inst/tinytest/test_querycondition.R @@ -94,7 +94,7 @@ expect_equal(nrow(ndf), 10) tiledb_array_close(arr) rm(qry) -## check b == 115.5 (yes, yes, yes, we know EQ dicey on floats; can remove this if it croaks) +## check b == 115.5 (yes, yes, yes, we know EQ is dicey on floats; can remove this if it croaks) qry <- tiledb_query(arr, "READ") rows <- integer(20) cola <- integer(20) @@ -159,6 +159,7 @@ res <- arr2[] expect_equal(NROW(res), 34L) expect_true(all(res$bill_length_mm < 40)) expect_true(all(res$year == 2009)) + unlink(uri, recursive=TRUE) ## parse query condition support @@ -176,3 +177,20 @@ res <- arrwithqc2[] expect_equal(NROW(res), 34L) expect_true(all(res$bill_length_mm < 40)) expect_true(all(res$year == 2009)) + +unlink(uri, recursive=TRUE) + +## qc and string_ascii +uri <- tempfile() +fromDataFrame(na.omit(penguins), uri, sparse=TRUE) +qc3 <- parse_query_condition(sex == "male") +arrwithqc3 <- tiledb_array(uri, as.data.frame=TRUE, query_condition=qc3) +res <- arrwithqc3[] +expect_equal(NROW(res), 168L) +expect_true(all(res$sex == "male")) + +qc <- tiledb_query_condition_init("sex", "female", "ASCII", "EQ") +arrwithqc <- tiledb_array(uri, as.data.frame=TRUE, query_condition=qc) +res <- arrwithqc[] +expect_equal(NROW(res), 165L) +expect_true(all(res$sex != "male")) diff --git a/src/libtiledb.cpp b/src/libtiledb.cpp index 12a60a2179..f2fea0995a 100644 --- a/src/libtiledb.cpp +++ b/src/libtiledb.cpp @@ -1277,7 +1277,7 @@ XPtr libtiledb_attribute(XPtr ctx, } else if (attr_dtype == TILEDB_FLOAT32) { using DType = tiledb::impl::tiledb_to_type::type; attr = XPtr(new tiledb::Attribute(tiledb::Attribute::create(*ctx.get(), name)), false); - } else if (attr_dtype == TILEDB_CHAR || attr_dtype == TILEDB_STRING_ASCII) { + } else if (attr_dtype == TILEDB_CHAR) { using DType = tiledb::impl::tiledb_to_type::type; attr = XPtr(new tiledb::Attribute(tiledb::Attribute::create(*ctx.get(), name)), false); uint64_t num = static_cast(ncells); @@ -1285,6 +1285,13 @@ XPtr libtiledb_attribute(XPtr ctx, num = TILEDB_VAR_NUM; // R's NA is different from TileDB's NA } attr->set_cell_val_num(num); + } else if (attr_dtype == TILEDB_STRING_ASCII) { + attr = XPtr(new tiledb::Attribute(*ctx.get(), name, TILEDB_STRING_ASCII), false); + uint64_t num = static_cast(ncells); + if (ncells == R_NaInt) { + num = TILEDB_VAR_NUM; // R's NA is different from TileDB's NA + } + attr->set_cell_val_num(num); } else if (attr_dtype == TILEDB_DATETIME_YEAR || attr_dtype == TILEDB_DATETIME_MONTH || attr_dtype == TILEDB_DATETIME_WEEK || @@ -3295,8 +3302,11 @@ void libtiledb_query_condition_init(XPtr query_cond, double v = as(condition_value); uint64_t cond_val_size = sizeof(double); query_cond->init(attr_name, (void*) &v, cond_val_size, op); + } else if (cond_val_type == "ASCII") { + std::string v = as(condition_value); + query_cond->init(attr_name, v, op); } else { - Rcpp::stop("Currently unsupport type: %s", cond_val_type); + Rcpp::stop("Currently unsupported type: %s", cond_val_type); } #endif }