From a3e77094f0e395a943fc42b5e7a70d321718faa0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 14 Jul 2022 10:45:48 -0400 Subject: [PATCH] Bug Fix: Wrong results when using OR condition with nullable attributes (#3308) (#3367) Fixed the bug on nullable attributes by refactoring the query condition so that nullable attributes are processed at the same time as the cell values. I also wrote test cases that test on complex query conditions with nullable attributes. TYPE:BUG DESC: Bug Fix: Wrong results when using OR condition with nullable attributes Co-authored-by: Abigale Kim --- test/regression/CMakeLists.txt | 1 + test/regression/targets/sc-18836.cc | 114 ++++++ tiledb/sm/query/query_condition.cc | 375 +++++++++++++------ tiledb/sm/query/query_condition.h | 48 ++- tiledb/sm/query/test/unit_query_condition.cc | 320 +++++++++++++++- 5 files changed, 736 insertions(+), 122 deletions(-) create mode 100644 test/regression/targets/sc-18836.cc diff --git a/test/regression/CMakeLists.txt b/test/regression/CMakeLists.txt index 415c7c7b0a6..6414e9b3114 100644 --- a/test/regression/CMakeLists.txt +++ b/test/regression/CMakeLists.txt @@ -30,6 +30,7 @@ find_package(Catch_EP REQUIRED) set(SOURCES targets/sc-12024.cc targets/sc-15387.cc + targets/sc-18836.cc ) if (TILEDB_SERIALIZATION) diff --git a/test/regression/targets/sc-18836.cc b/test/regression/targets/sc-18836.cc new file mode 100644 index 00000000000..24c058fa2bf --- /dev/null +++ b/test/regression/targets/sc-18836.cc @@ -0,0 +1,114 @@ +#include +#include +#include +#include + +#include "catch.hpp" + +using namespace tiledb; + +float a2_fill_value = 0.0f; +const std::string array_name = "cpp_qc_nullable_array"; + +void create_array(){ + Context ctx; + + // Create dimension + auto dim = tiledb::Dimension::create(ctx, "dim", {{1, 9}}, 2); + + Domain domain(ctx); + domain.add_dimension(dim); + + Attribute a1(ctx, "a1", TILEDB_INT32); + a1.set_nullable(true); + Attribute a2(ctx, "a2", TILEDB_FLOAT32); + a2.set_fill_value(&a2_fill_value, sizeof(float)); + + ArraySchema schema(ctx, TILEDB_DENSE); + schema.set_domain(domain); + schema.add_attribute(a1).add_attribute(a2); + + Array::create(array_name, schema); + + // Prepare some data for the array + std::vector data1 = {8, 9, 10, 11, 12, 13, 14, 15, 16}; + std::vector a1_validity_buf = {0, 1, 1, 1, 1, 0, 1, 1, 0}; + + std::vector data2 = {13.2f, 14.1f, 14.2f, 15.1f, 15.2f, 15.3f, 16.1f, 18.3f, 19.1f}; + + // Set the subarray to write into + std::vector subarray = {1, 9}; + + // Open array for writing + Array array(ctx, array_name, TILEDB_WRITE); + + Query query(ctx, array); + query.set_layout(TILEDB_ROW_MAJOR) + .set_buffer("a1", data1) + .set_validity_buffer("a1", a1_validity_buf) + .set_buffer("a2", data2) + .set_subarray(subarray); + + query.submit(); + array.close(); +} + +TEST_CASE("Query Condition CPP API: Query Condition OR with nullable attributes (sc-18836)", "[QueryCondition]"){ + Context ctx; + VFS vfs(ctx); + + if (vfs.is_dir(array_name)) + vfs.remove_dir(array_name); + + create_array(); + + // Prepare the array for reading + Array array(ctx, array_name, TILEDB_READ); + + const std::vector subarray = {1, 9}; + + // Prepare the vectors that will hold the results + std::vector a1_buffer(9); + std::vector a2_buffer(9); + std::vector a1_validity_buf(9); + + // Prepare the query + Query query(ctx, array, TILEDB_READ); + query.set_subarray(subarray) + .set_layout(TILEDB_ROW_MAJOR) + .set_buffer("a1", a1_buffer) + .set_validity_buffer("a1", a1_validity_buf) + .set_buffer("a2", a2_buffer); + + QueryCondition qc1(ctx); + float val = 15.1f; + qc1.init("a2", &val, sizeof(float), TILEDB_EQ); + QueryCondition qc2(ctx); + qc2.init("a1", nullptr, 0, TILEDB_EQ); + QueryCondition qc(ctx); + + SECTION("Ordering q1, q2") { + qc = qc1.combine(qc2, TILEDB_OR); + } + + SECTION("Ordering q2, q1") { + qc = qc2.combine(qc1, TILEDB_OR); + } + + query.set_condition(qc); + query.submit(); + array.close(); + + // Print out the results. + int m = std::numeric_limits::min(); + std::vector a1_expected = {8, m, m, 11, m, 13, m, m, 16}; + std::vector a2_expected = {13.2f, 0.0f, 0.0f, 15.1f, 0.0f, 15.3f, 0.0f, 0.0f, 19.1f}; + auto result_num = (int)query.result_buffer_elements()["a1"].second; + for (int r = 0; r < result_num; r++) { + CHECK(a1_buffer[r] == a1_expected[r]); + CHECK(a2_buffer[r] == a2_expected[r]); + } + + if (vfs.is_dir(array_name)) + vfs.remove_dir(array_name); +} \ No newline at end of file diff --git a/tiledb/sm/query/query_condition.cc b/tiledb/sm/query/query_condition.cc index 2c45771d5c4..afa322ef881 100644 --- a/tiledb/sm/query/query_condition.cc +++ b/tiledb/sm/query/query_condition.cc @@ -44,6 +44,7 @@ #include #include #include +#include using namespace tiledb::common; @@ -432,6 +433,7 @@ void QueryCondition::apply_ast_node( cell_size, condition_value_content, condition_value_size); + result_cell_bitmap[starting_index + c] = combination_op( result_cell_bitmap[starting_index + c], (uint8_t)cmp); ++c; @@ -891,6 +893,7 @@ void QueryCondition::apply_ast_node_dense( const uint64_t src_cell, const uint64_t stride, const bool var_size, + const bool nullable, CombinationOp combination_op, span result_buffer) const { const std::string& field_name = node->get_field_name(); @@ -900,6 +903,11 @@ void QueryCondition::apply_ast_node_dense( // Get the nullable buffer. const auto tile_tuple = result_tile->tile_tuple(field_name); + uint8_t* buffer_validity = nullptr; + if (nullable) { + const auto& tile_validity = std::get<2>(*tile_tuple); + buffer_validity = static_cast(tile_validity.data()) + src_cell; + } if (var_size) { // Get var data buffer and tile offsets buffer. @@ -933,7 +941,11 @@ void QueryCondition::apply_ast_node_dense( cell_value, cell_size, condition_value_content, condition_value_size); // Set the value. - result_buffer[c] = combination_op(result_buffer[c], (uint8_t)cmp); + bool buffer_validity_val = buffer_validity == nullptr ? + true : + buffer_validity[start + c * stride] != 0; + result_buffer[c] = + combination_op(result_buffer[c], (uint8_t)cmp && buffer_validity_val); } } else { // Get the fixed size data buffers. @@ -954,7 +966,11 @@ void QueryCondition::apply_ast_node_dense( cell_value, cell_size, condition_value_content, condition_value_size); // Set the value. - result_buffer[c] = combination_op(result_buffer[c], (uint8_t)cmp); + bool buffer_validity_val = buffer_validity == nullptr ? + true : + buffer_validity[start + c * stride] != 0; + result_buffer[c] = + combination_op(result_buffer[c], (uint8_t)cmp && buffer_validity_val); } } } @@ -967,6 +983,7 @@ void QueryCondition::apply_ast_node_dense( const uint64_t src_cell, const uint64_t stride, const bool var_size, + const bool nullable, CombinationOp combination_op, span result_buffer) const { switch (node->get_op()) { @@ -978,6 +995,7 @@ void QueryCondition::apply_ast_node_dense( src_cell, stride, var_size, + nullable, combination_op, result_buffer); break; @@ -989,6 +1007,7 @@ void QueryCondition::apply_ast_node_dense( src_cell, stride, var_size, + nullable, combination_op, result_buffer); break; @@ -1000,6 +1019,7 @@ void QueryCondition::apply_ast_node_dense( src_cell, stride, var_size, + nullable, combination_op, result_buffer); break; @@ -1011,6 +1031,7 @@ void QueryCondition::apply_ast_node_dense( src_cell, stride, var_size, + nullable, combination_op, result_buffer); break; @@ -1022,6 +1043,7 @@ void QueryCondition::apply_ast_node_dense( src_cell, stride, var_size, + nullable, combination_op, result_buffer); break; @@ -1033,6 +1055,7 @@ void QueryCondition::apply_ast_node_dense( src_cell, stride, var_size, + nullable, combination_op, result_buffer); break; @@ -1061,165 +1084,186 @@ void QueryCondition::apply_ast_node_dense( const bool nullable = attribute->nullable(); // Process the validity buffer now. - if (nullable) { + if (nullable && node->get_condition_value_view().content() == nullptr) { const auto tile_tuple = result_tile->tile_tuple(node->get_field_name()); const auto& tile_validity = std::get<2>(*tile_tuple); const auto buffer_validity = static_cast(tile_validity.data()) + src_cell; // Null values can only be specified for equality operators. - if (node->get_condition_value_view().content() == nullptr) { - if (node->get_op() == QueryConditionOp::NE) { - for (uint64_t c = 0; c < result_buffer.size(); ++c) { - result_buffer[c] *= buffer_validity[start + c * stride] != 0; - } - } else { - for (uint64_t c = 0; c < result_buffer.size(); ++c) { - result_buffer[c] *= buffer_validity[start + c * stride] == 0; - } + if (node->get_op() == QueryConditionOp::NE) { + for (uint64_t c = 0; c < result_buffer.size(); ++c) { + result_buffer[c] = combination_op( + buffer_validity[start + c * stride] != 0, result_buffer[c]); } - return; } else { - // Turn off bitmap values for null cells. for (uint64_t c = 0; c < result_buffer.size(); ++c) { - result_buffer[c] *= buffer_validity[start + c * stride] != 0; + result_buffer[c] = combination_op( + buffer_validity[start + c * stride] == 0, result_buffer[c]); } } + return; } switch (attribute->type()) { - case Datatype::INT8: - return apply_ast_node_dense( + case Datatype::INT8: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); + } break; case Datatype::BOOL: - case Datatype::UINT8: - return apply_ast_node_dense( + case Datatype::UINT8: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); - case Datatype::INT16: - return apply_ast_node_dense( + } break; + case Datatype::INT16: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); - case Datatype::UINT16: - return apply_ast_node_dense( + } break; + case Datatype::UINT16: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); - case Datatype::INT32: - return apply_ast_node_dense( + } break; + case Datatype::INT32: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); - case Datatype::UINT32: - return apply_ast_node_dense( + } break; + case Datatype::UINT32: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); - case Datatype::INT64: - return apply_ast_node_dense( + } break; + case Datatype::INT64: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); - case Datatype::UINT64: - return apply_ast_node_dense( + } break; + case Datatype::UINT64: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); - case Datatype::FLOAT32: - return apply_ast_node_dense( + } break; + case Datatype::FLOAT32: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); - case Datatype::FLOAT64: - return apply_ast_node_dense( + } break; + case Datatype::FLOAT64: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); - case Datatype::STRING_ASCII: - return apply_ast_node_dense( + } break; + case Datatype::STRING_ASCII: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); - case Datatype::CHAR: + } break; + case Datatype::CHAR: { if (var_size) { - return apply_ast_node_dense( + apply_ast_node_dense( + node, + result_tile, + start, + src_cell, + stride, + var_size, + nullable, + combination_op, + result_buffer); + } else { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); } - return apply_ast_node_dense( - node, - result_tile, - start, - src_cell, - stride, - var_size, - combination_op, - result_buffer); + } break; case Datatype::DATETIME_YEAR: case Datatype::DATETIME_MONTH: case Datatype::DATETIME_WEEK: @@ -1232,16 +1276,18 @@ void QueryCondition::apply_ast_node_dense( case Datatype::DATETIME_NS: case Datatype::DATETIME_PS: case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return apply_ast_node_dense( + case Datatype::DATETIME_AS: { + apply_ast_node_dense( node, result_tile, start, src_cell, stride, var_size, + nullable, combination_op, result_buffer); + } break; case Datatype::ANY: case Datatype::BLOB: case Datatype::STRING_UTF8: @@ -1553,7 +1599,8 @@ template < typename T, QueryConditionOp Op, typename BitmapType, - typename CombinationOp> + typename CombinationOp, + typename nullable> void QueryCondition::apply_ast_node_sparse( const tdb_unique_ptr& node, ResultTile& result_tile, @@ -1564,6 +1611,14 @@ void QueryCondition::apply_ast_node_sparse( const void* condition_value_content = node->get_condition_value_view().content(); const size_t condition_value_size = node->get_condition_value_view().size(); + uint8_t* buffer_validity = nullptr; + + // Check if the combination op = OR and the attribute is nullable. + if constexpr ( + std::is_same_v> && nullable::value) { + const auto& tile_validity = std::get<2>(*tile_tuple); + buffer_validity = static_cast(tile_validity.data()); + } if (var_size) { // Get var data buffer and tile offsets buffer. @@ -1594,8 +1649,15 @@ void QueryCondition::apply_ast_node_sparse( const bool cmp = BinaryCmp::cmp( cell_value, cell_size, condition_value_content, condition_value_size); - // Set the value. - result_bitmap[c] = combination_op(result_bitmap[c], cmp); + // Set the value, casing on whether the combination op = OR and the + // attribute is nullable. + if constexpr ( + std::is_same_v> && nullable::value) { + result_bitmap[c] = + combination_op(result_bitmap[c], cmp && (buffer_validity[c] != 0)); + } else { + result_bitmap[c] = combination_op(result_bitmap[c], cmp); + } } } else { // Get the fixed size data buffers. @@ -1614,13 +1676,24 @@ void QueryCondition::apply_ast_node_sparse( const bool cmp = BinaryCmp::cmp( cell_value, cell_size, condition_value_content, condition_value_size); - // Set the value. - result_bitmap[c] = combination_op(result_bitmap[c], cmp); + // Set the value, casing on whether the combination op = OR and the + // attribute is nullable. + if constexpr ( + std::is_same_v> && nullable::value) { + result_bitmap[c] = + combination_op(result_bitmap[c], cmp && (buffer_validity[c] != 0)); + } else { + result_bitmap[c] = combination_op(result_bitmap[c], cmp); + } } } } -template +template < + typename T, + typename BitmapType, + typename CombinationOp, + typename nullable> void QueryCondition::apply_ast_node_sparse( const tdb_unique_ptr& node, ResultTile& result_tile, @@ -1629,28 +1702,52 @@ void QueryCondition::apply_ast_node_sparse( std::vector& result_bitmap) const { switch (node->get_op()) { case QueryConditionOp::LT: - apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); + apply_ast_node_sparse< + T, + QueryConditionOp::LT, + BitmapType, + CombinationOp, + nullable>(node, result_tile, var_size, combination_op, result_bitmap); break; case QueryConditionOp::LE: - apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); + apply_ast_node_sparse< + T, + QueryConditionOp::LE, + BitmapType, + CombinationOp, + nullable>(node, result_tile, var_size, combination_op, result_bitmap); break; case QueryConditionOp::GT: - apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); + apply_ast_node_sparse< + T, + QueryConditionOp::GT, + BitmapType, + CombinationOp, + nullable>(node, result_tile, var_size, combination_op, result_bitmap); break; case QueryConditionOp::GE: - apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); + apply_ast_node_sparse< + T, + QueryConditionOp::GE, + BitmapType, + CombinationOp, + nullable>(node, result_tile, var_size, combination_op, result_bitmap); break; case QueryConditionOp::EQ: - apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); + apply_ast_node_sparse< + T, + QueryConditionOp::EQ, + BitmapType, + CombinationOp, + nullable>(node, result_tile, var_size, combination_op, result_bitmap); break; case QueryConditionOp::NE: - apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); + apply_ast_node_sparse< + T, + QueryConditionOp::NE, + BitmapType, + CombinationOp, + nullable>(node, result_tile, var_size, combination_op, result_bitmap); break; default: throw std::runtime_error( @@ -1658,6 +1755,23 @@ void QueryCondition::apply_ast_node_sparse( } } +template +void QueryCondition::apply_ast_node_sparse( + const tdb_unique_ptr& node, + ResultTile& result_tile, + const bool var_size, + const bool nullable, + CombinationOp combination_op, + std::vector& result_bitmap) const { + if (nullable) { + apply_ast_node_sparse( + node, result_tile, var_size, combination_op, result_bitmap); + } else { + apply_ast_node_sparse( + node, result_tile, var_size, combination_op, result_bitmap); + } +} + template void QueryCondition::apply_ast_node_sparse( const tdb_unique_ptr& node, @@ -1686,22 +1800,26 @@ void QueryCondition::apply_ast_node_sparse( const auto tile_tuple = result_tile.tile_tuple(node->get_field_name()); const auto& tile_validity = std::get<2>(*tile_tuple); const auto buffer_validity = static_cast(tile_validity.data()); - - // Null values can only be specified for equality operators. const auto cell_num = result_tile.cell_num(); + if (node->get_condition_value_view().content() == nullptr) { + // Null values can only be specified for equality operators. if (node->get_op() == QueryConditionOp::NE) { for (uint64_t c = 0; c < cell_num; c++) { - result_bitmap[c] *= buffer_validity[c] != 0; + result_bitmap[c] = + combination_op(buffer_validity[c] != 0, result_bitmap[c]); } } else { for (uint64_t c = 0; c < cell_num; c++) { - result_bitmap[c] *= buffer_validity[c] == 0; + result_bitmap[c] = + combination_op(buffer_validity[c] == 0, result_bitmap[c]); } } return; - } else { - // Turn off bitmap values for null cells. + } else if constexpr (std::is_same_v< + CombinationOp, + std::multiplies>) { + // When the combination op is AND, turn off bitmap values for null cells. for (uint64_t c = 0; c < cell_num; c++) { result_bitmap[c] *= buffer_validity[c] != 0; } @@ -1709,46 +1827,69 @@ void QueryCondition::apply_ast_node_sparse( } switch (type) { - case Datatype::INT8: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); - case Datatype::UINT8: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); - case Datatype::INT16: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); - case Datatype::UINT16: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); - case Datatype::INT32: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); - case Datatype::UINT32: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); - case Datatype::INT64: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); - case Datatype::UINT64: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); - case Datatype::FLOAT32: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); - case Datatype::FLOAT64: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); - case Datatype::STRING_ASCII: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); - case Datatype::CHAR: + case Datatype::INT8: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; + case Datatype::UINT8: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; + case Datatype::INT16: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; + case Datatype::UINT16: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; + case Datatype::INT32: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; + case Datatype::UINT32: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; + case Datatype::INT64: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; + case Datatype::UINT64: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; + case Datatype::FLOAT32: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; + case Datatype::FLOAT64: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; + case Datatype::STRING_ASCII: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; + case Datatype::CHAR: { if (var_size) { - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); + apply_ast_node_sparse( + node, + result_tile, + var_size, + nullable, + combination_op, + result_bitmap); + } else { + apply_ast_node_sparse( + node, + result_tile, + var_size, + nullable, + combination_op, + result_bitmap); } - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); + } break; case Datatype::DATETIME_YEAR: case Datatype::DATETIME_MONTH: case Datatype::DATETIME_WEEK: @@ -1761,9 +1902,10 @@ void QueryCondition::apply_ast_node_sparse( case Datatype::DATETIME_NS: case Datatype::DATETIME_PS: case Datatype::DATETIME_FS: - case Datatype::DATETIME_AS: - return apply_ast_node_sparse( - node, result_tile, var_size, combination_op, result_bitmap); + case Datatype::DATETIME_AS: { + apply_ast_node_sparse( + node, result_tile, var_size, nullable, combination_op, result_bitmap); + } break; case Datatype::ANY: case Datatype::BLOB: case Datatype::STRING_UTF8: @@ -1821,7 +1963,8 @@ void QueryCondition::apply_tree_sparse( result_bitmap); } - // Handle the cl'(q, a) case + // Handle the cl'(q, a) case. + // This cases on whether the combination op = OR. } else if constexpr (std::is_same_v>) { std::vector combination_op_bitmap(result_bitmap_size, 1); diff --git a/tiledb/sm/query/query_condition.h b/tiledb/sm/query/query_condition.h index ca9ddb6a0e1..883ad4494ea 100644 --- a/tiledb/sm/query/query_condition.h +++ b/tiledb/sm/query/query_condition.h @@ -245,10 +245,11 @@ class QueryCondition { * @param var_size The attribute is var sized or not. * @param nullable The attribute is nullable or not. * @param fill_value The fill value for the cells. + * @param combination_op The combination op. * @param result_cell_bitmap The input cell bitmap. * @return The filtered cell slabs. */ - template + template void apply_ast_node( const tdb_unique_ptr& node, uint64_t stride, @@ -256,7 +257,7 @@ class QueryCondition { const bool nullable, const ByteVecValue& fill_value, const std::vector& result_cell_slabs, - ConditionOp condition_op, + CombinationOp combination_op, std::vector& result_cell_bitmap) const; /** @@ -267,6 +268,7 @@ class QueryCondition { * @param var_size The attribute is var sized or not. * @param nullable The attribute is nullable or not. * @param fill_value The fill value for the cells. + * @param combination_op The combination op. * @param result_cell_bitmap The input cell bitmap. * @return Status, filtered cell slabs. */ @@ -288,6 +290,7 @@ class QueryCondition { * @param node The value node to apply. * @param array_schema The current array schema. * @param stride The stride between cells. + * @param combination_op The combination op. * @param result_cell_bitmap The input cell bitmap. * @return Status, filtered cell slabs. */ @@ -306,10 +309,11 @@ class QueryCondition { * * @param node The node to apply. * @param array_schema The array schema associated with `result_cell_slabs`. + * @param stride The stride between cells. + * @param combination_op The combination op. * @param result_cell_bitmap A bitmap representation of cell slabs to filter. * Mutated to remove cell slabs that do not meet the criteria in this query * condition. - * @param stride The stride between cells. * @return Filtered cell slabs. */ template > @@ -331,6 +335,7 @@ class QueryCondition { * @param src_cell The cell offset in the source tile. * @param stride The stride between cells. * @param var_size The attribute is var sized or not. + * @param combination_op The combination op. * @param result_buffer The result buffer. */ template @@ -341,6 +346,7 @@ class QueryCondition { const uint64_t src_cell, const uint64_t stride, const bool var_size, + const bool nullable, CombinationOp combination_op, span result_buffer) const; @@ -353,6 +359,7 @@ class QueryCondition { * @param src_cell The cell offset in the source tile. * @param stride The stride between cells. * @param var_size The attribute is var sized or not. + * @param combination_op The combination op. * @param result_buffer The result buffer. * @return Status. */ @@ -364,6 +371,7 @@ class QueryCondition { const uint64_t src_cell, const uint64_t stride, const bool var_size, + const bool nullable, CombinationOp combination_op, span result_buffer) const; @@ -377,6 +385,7 @@ class QueryCondition { * @param start The start cell. * @param src_cell The cell offset in the source tile. * @param stride The stride between cells. + * @param combination_op The combination op. * @param result_buffer The result buffer. * @return Status. */ @@ -400,6 +409,7 @@ class QueryCondition { * @param start The start cell. * @param src_cell The cell offset in the source tile. * @param stride The stride between cells. + * @param combination_op The combination op. * @param result_buffer The buffer to use for results. * @return Void. */ @@ -421,13 +431,15 @@ class QueryCondition { * @param node The node to apply. * @param result_tile The result tile to get the cells from. * @param var_size The attribute is var sized or not. + * @param combination_op The combination op. * @param result_bitmap The result bitmap. */ template < typename T, QueryConditionOp Op, typename BitmapType, - typename CombinationOp> + typename CombinationOp, + typename nullable> void apply_ast_node_sparse( const tdb_unique_ptr& node, ResultTile& result_tile, @@ -436,19 +448,43 @@ class QueryCondition { std::vector& result_bitmap) const; /** - * Applies a value node on a sparse result tile. + * Applies a value node on a sparse result tile, + * templated on the nullable operator. * * @param node The node to apply. * @param result_tile The result tile to get the cells from. * @param var_size The attribute is var sized or not. + * @param combination_op The combination op. * @param result_bitmap The result bitmap. * @return Status. */ + template < + typename T, + typename BitmapType, + typename CombinationOp, + typename nullable> + void apply_ast_node_sparse( + const tdb_unique_ptr& node, + ResultTile& result_tile, + const bool var_size, + CombinationOp combination_op, + std::vector& result_bitmap) const; + + /** + * Applies a value node on a sparse result tile. + * + * @param node The node to apply. + * @param result_tile The result tile to get the cells from. + * @param var_size The attribute is var sized or not. + * @param combination_op The combination op. + * @param result_bitmap The result bitmap. + */ template void apply_ast_node_sparse( const tdb_unique_ptr& node, ResultTile& result_tile, const bool var_size, + const bool nullable, CombinationOp combination_op, std::vector& result_bitmap) const; @@ -459,6 +495,7 @@ class QueryCondition { * @param node The node to apply. * @param array_schema The current array schema. * @param result_tile The result tile to get the cells from. + * @param combination_op The combination op. * @param result_bitmap The result bitmap. * @return Status. */ @@ -476,6 +513,7 @@ class QueryCondition { * @param node The node to apply. * @param array_schema The array schema. * @param result_tile The result tile to get the cells from. + * @param combination_op The combination op. * @param result_bitmap The bitmap to use for results. * @return Void. */ diff --git a/tiledb/sm/query/test/unit_query_condition.cc b/tiledb/sm/query/test/unit_query_condition.cc index c81557515db..fea926323d2 100644 --- a/tiledb/sm/query/test/unit_query_condition.cc +++ b/tiledb/sm/query/test/unit_query_condition.cc @@ -3790,7 +3790,7 @@ void populate_string_test_params_vector( const std::string& field_name, ResultTile* result_tile, std::vector& tp_vec) { - // Construct basic query condition "foo < "eve". + // Construct basic query condition `foo < "eve"`. { char e[] = "eve"; QueryCondition qc; @@ -4117,6 +4117,324 @@ TEST_CASE( } } +/** + * @brief Function that takes a selection of QueryConditions, with their + * expected results, and combines them together. This function is + * exclusively for nullable query conditions. + * + * @param field_name The field name of the query condition. + * @param result_tile The result tile of the array we're running the query on. + * @param tp_vec The vector that stores the test parameter structs. + */ +void populate_nullable_test_params_vector( + const std::string& field_name, + ResultTile* result_tile, + std::vector& tp_vec) { + // Construct basic query condition `foo = null`. + { + QueryCondition qc; + REQUIRE(qc.init(std::string(field_name), nullptr, 0, QueryConditionOp::EQ) + .ok()); + uint64_t cell_count = 5; + std::vector expected_bitmap = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; + std::vector expected_slabs = {{result_tile, 0, 1}, + {result_tile, 2, 1}, + {result_tile, 4, 1}, + {result_tile, 6, 1}, + {result_tile, 8, 1}}; + TestParams tp( + std::move(qc), + cell_count, + std::move(expected_bitmap), + std::move(expected_slabs)); + tp_vec.push_back(tp); + } + + // Construct basic query condition `foo != null`. + { + QueryCondition qc; + REQUIRE(qc.init(std::string(field_name), nullptr, 0, QueryConditionOp::NE) + .ok()); + uint64_t cell_count = 5; + std::vector expected_bitmap = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + std::vector expected_slabs = {{result_tile, 1, 1}, + {result_tile, 3, 1}, + {result_tile, 5, 1}, + {result_tile, 7, 1}, + {result_tile, 9, 1}}; + TestParams tp( + std::move(qc), + cell_count, + std::move(expected_bitmap), + std::move(expected_slabs)); + tp_vec.push_back(tp); + } + + // Construct basic query condition `foo > 2`. + { + QueryCondition qc; + float val = 2.0f; + REQUIRE(qc.init( + std::string(field_name), + &val, + sizeof(float), + QueryConditionOp::GT) + .ok()); + uint64_t cell_count = 4; + std::vector expected_bitmap = {0, 0, 0, 1, 0, 1, 0, 1, 0, 1}; + std::vector expected_slabs = {{result_tile, 3, 1}, + {result_tile, 5, 1}, + {result_tile, 7, 1}, + {result_tile, 9, 1}}; + TestParams tp( + std::move(qc), + cell_count, + std::move(expected_bitmap), + std::move(expected_slabs)); + tp_vec.push_back(tp); + } + + // Construct query condition `foo < 2 || foo > 4`. + { + QueryCondition qc1; + float val = 2.0f; + float val1 = 4.0f; + REQUIRE(qc1.init( + std::string(field_name), + &val, + sizeof(float), + QueryConditionOp::LT) + .ok()); + + QueryCondition qc2; + REQUIRE(qc2.init( + std::string(field_name), + &val1, + sizeof(float), + QueryConditionOp::GT) + .ok()); + QueryCondition qc; + REQUIRE(qc1.combine(qc2, QueryConditionCombinationOp::OR, &qc).ok()); + uint64_t cell_count = 3; + std::vector expected_bitmap = {0, 1, 0, 1, 0, 0, 0, 0, 0, 1}; + std::vector expected_slabs = { + {result_tile, 1, 1}, {result_tile, 3, 1}, {result_tile, 9, 1}}; + TestParams tp( + std::move(qc), + cell_count, + std::move(expected_bitmap), + std::move(expected_slabs)); + tp_vec.push_back(tp); + } + + // Construct query condition `foo > 4 || foo = null`. + { + QueryCondition qc1; + float val = 4.0f; + REQUIRE(qc1.init( + std::string(field_name), + &val, + sizeof(float), + QueryConditionOp::GT) + .ok()); + + QueryCondition qc2; + REQUIRE(qc2.init(std::string(field_name), nullptr, 0, QueryConditionOp::EQ) + .ok()); + QueryCondition qc; + REQUIRE(qc1.combine(qc2, QueryConditionCombinationOp::OR, &qc).ok()); + uint64_t cell_count = 7; + std::vector expected_bitmap = {1, 0, 1, 1, 1, 0, 1, 0, 1, 1}; + std::vector expected_slabs = {{result_tile, 0, 1}, + {result_tile, 2, 3}, + {result_tile, 6, 1}, + {result_tile, 8, 2}}; + TestParams tp( + std::move(qc), + cell_count, + std::move(expected_bitmap), + std::move(expected_slabs)); + tp_vec.push_back(tp); + } + + // Construct query condition `foo = null || foo > 4`. + { + QueryCondition qc1; + float val = 4.0f; + REQUIRE(qc1.init( + std::string(field_name), + &val, + sizeof(float), + QueryConditionOp::GT) + .ok()); + + QueryCondition qc2; + REQUIRE(qc2.init(std::string(field_name), nullptr, 0, QueryConditionOp::EQ) + .ok()); + QueryCondition qc; + REQUIRE(qc2.combine(qc1, QueryConditionCombinationOp::OR, &qc).ok()); + uint64_t cell_count = 7; + std::vector expected_bitmap = {1, 0, 1, 1, 1, 0, 1, 0, 1, 1}; + std::vector expected_slabs = {{result_tile, 0, 1}, + {result_tile, 2, 3}, + {result_tile, 6, 1}, + {result_tile, 8, 2}}; + TestParams tp( + std::move(qc), + cell_count, + std::move(expected_bitmap), + std::move(expected_slabs)); + tp_vec.push_back(tp); + } + + // Construct basic query condition `foo != null || foo > 4`. + { + QueryCondition qc1; + float val = 4.0f; + REQUIRE(qc1.init( + std::string(field_name), + &val, + sizeof(float), + QueryConditionOp::GT) + .ok()); + + QueryCondition qc2; + REQUIRE(qc2.init(std::string(field_name), nullptr, 0, QueryConditionOp::NE) + .ok()); + QueryCondition qc; + REQUIRE(qc2.combine(qc1, QueryConditionCombinationOp::OR, &qc).ok()); + uint64_t cell_count = 5; + std::vector expected_bitmap = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + std::vector expected_slabs = {{result_tile, 1, 1}, + {result_tile, 3, 1}, + {result_tile, 5, 1}, + {result_tile, 7, 1}, + {result_tile, 9, 1}}; + TestParams tp( + std::move(qc), + cell_count, + std::move(expected_bitmap), + std::move(expected_slabs)); + tp_vec.push_back(tp); + } + + // Construct basic query condition `foo > 4 || foo != null`. + { + QueryCondition qc1; + float val = 4.0f; + REQUIRE(qc1.init( + std::string(field_name), + &val, + sizeof(float), + QueryConditionOp::GT) + .ok()); + + QueryCondition qc2; + REQUIRE(qc2.init(std::string(field_name), nullptr, 0, QueryConditionOp::NE) + .ok()); + QueryCondition qc; + REQUIRE(qc1.combine(qc2, QueryConditionCombinationOp::OR, &qc).ok()); + uint64_t cell_count = 5; + std::vector expected_bitmap = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; + std::vector expected_slabs = {{result_tile, 1, 1}, + {result_tile, 3, 1}, + {result_tile, 5, 1}, + {result_tile, 7, 1}, + {result_tile, 9, 1}}; + TestParams tp( + std::move(qc), + cell_count, + std::move(expected_bitmap), + std::move(expected_slabs)); + tp_vec.push_back(tp); + } +} + +TEST_CASE( + "QueryCondition: Test combinations, nullable attributes", + "[QueryCondition][combinations][nullable]") { + // Setup. + const std::string field_name = "foo"; + const uint64_t cells = 10; + const Datatype type = Datatype::FLOAT32; + + // Initialize the array schema. + ArraySchema array_schema; + Attribute attr(field_name, type); + REQUIRE(attr.set_nullable(true).ok()); + REQUIRE(array_schema.add_attribute(tdb::make_shared(HERE(), &attr)) + .ok()); + Domain domain; + Dimension dim("dim1", Datatype::UINT32); + uint32_t bounds[2] = {1, cells}; + Range range(bounds, 2 * sizeof(uint32_t)); + REQUIRE(dim.set_domain(range).ok()); + REQUIRE( + domain + .add_dimension(tdb::make_shared(HERE(), &dim)) + .ok()); + REQUIRE( + array_schema.set_domain(make_shared(HERE(), &domain)) + .ok()); + + // Initialize the result tile. + ResultTile result_tile(0, 0, array_schema); + result_tile.init_attr_tile(field_name); + ResultTile::TileTuple* const tile_tuple = result_tile.tile_tuple(field_name); + Tile* const tile = &std::get<0>(*tile_tuple); + + // Initialize and populate the data tile. + REQUIRE(tile->init_unfiltered( + constants::format_version, + type, + cells * sizeof(float), + sizeof(float), + 0) + .ok()); + + std::vector values = { + 3.4, 1.3, 2.2, 4.5, 2.8, 2.1, 1.7, 3.3, 1.9, 4.2}; + REQUIRE(tile->write(values.data(), 0, cells * sizeof(float)).ok()); + + Tile* const tile_validity = &std::get<2>(*tile_tuple); + REQUIRE(tile_validity + ->init_unfiltered( + constants::format_version, + constants::cell_validity_type, + 10 * constants::cell_validity_size, + constants::cell_validity_size, + 0) + .ok()); + + std::vector validity(cells); + for (uint64_t i = 0; i < cells; ++i) { + validity[i] = i % 2; + } + REQUIRE( + tile_validity->write(validity.data(), 0, cells * sizeof(uint8_t)).ok()); + + std::vector tp_vec; + populate_nullable_test_params_vector(field_name, &result_tile, tp_vec); + + SECTION("Validate apply.") { + for (auto& elem : tp_vec) { + validate_qc_apply(elem, cells, array_schema, result_tile); + } + } + + SECTION("Validate apply_sparse.") { + for (auto& elem : tp_vec) { + validate_qc_apply_sparse(elem, cells, array_schema, result_tile); + } + } + + SECTION("Validate apply_dense.") { + for (auto& elem : tp_vec) { + validate_qc_apply_dense(elem, cells, array_schema, result_tile); + } + } +} + TEST_CASE( "QueryCondition: Test empty/null strings sparse", "[QueryCondition][empty_string][null_string][sparse]") {