Skip to content

Commit

Permalink
Use sparse global ordered reader for unordered queries with no dups.
Browse files Browse the repository at this point in the history
This change modifies the sparse global order reader to process unordered
queries made on arrays with duplicates. As we don't care about the
order of the data, we can ignore the constraint that we only support
one range for global order.

This also fixes an issue in bitmap computation for multiplicities,
where the relevant ranges were not computed properly for count bitmaps.

---
TYPE: IMPROVEMENT
DESC: Use sparse global ordered reader for unordered queries with no dups.
  • Loading branch information
KiterLuc committed May 24, 2022
1 parent 8bb17c9 commit d1abd13
Show file tree
Hide file tree
Showing 19 changed files with 545 additions and 220 deletions.
23 changes: 23 additions & 0 deletions test/src/helpers.cc
Expand Up @@ -1199,6 +1199,24 @@ std::string get_commit_dir(std::string array_dir) {
return array_dir + "/" + tiledb::sm::constants::array_commits_dir_name;
}

template <class T>
void check_counts(T* vals, uint64_t num, std::vector<uint64_t> expected) {
auto expected_size = static_cast<T>(expected.size());
std::vector<uint64_t> counts(expected.size());
for (uint64_t i = 0; i < num; i++) {
CHECK(vals[i] >= 0);
CHECK(vals[i] < expected_size);

if (vals[i] >= 0 && vals[i] < expected_size) {
counts[vals[i]]++;
}
}

for (uint64_t i = 0; i < expected.size(); i++) {
CHECK(counts[i] == expected[i]);
}
}

template void check_subarray<int8_t>(
tiledb::sm::Subarray& subarray, const SubarrayRanges<int8_t>& ranges);
template void check_subarray<uint8_t>(
Expand Down Expand Up @@ -1567,6 +1585,11 @@ template void read_array<double>(
tiledb_layout_t layout,
const QueryBuffers& buffers);

template void check_counts<int32_t>(
int32_t* vals, uint64_t num, std::vector<uint64_t> expected);
template void check_counts<uint64_t>(
uint64_t* vals, uint64_t num, std::vector<uint64_t> expected);

} // End of namespace test

} // End of namespace tiledb
6 changes: 6 additions & 0 deletions test/src/helpers.h
Expand Up @@ -692,6 +692,12 @@ std::string get_fragment_dir(std::string array_dir);
*/
std::string get_commit_dir(std::string array_dir);

/**
* Check count of values against a vector of expected counts for an array.
*/
template <class T>
void check_counts(T* vals, uint64_t num, std::vector<uint64_t> expected);

} // End of namespace test

} // End of namespace tiledb
Expand Down
96 changes: 41 additions & 55 deletions test/src/unit-capi-sparse_array.cc
Expand Up @@ -3278,29 +3278,11 @@ TEST_CASE_METHOD(
REQUIRE(rc == TILEDB_OK);
REQUIRE(status == TILEDB_COMPLETED);

CHECK(a1[0] == 1);
CHECK(a1[1] == 2);
CHECK(a1[2] == 5);
CHECK(a1[3] == 6);
CHECK(a1[4] == 7);
CHECK(a1[5] == 1);
CHECK(a1[6] == 2);
check_counts(a1, 7, {0, 2, 2, 0, 0, 1, 1, 1});
CHECK(a1_size == 7 * sizeof(int));
check_counts(coords_dim1, 7, {0, 4, 0, 2, 1});
check_counts(coords_dim2, 7, {0, 0, 3, 1, 3});
CHECK(coords_size == 7 * sizeof(uint64_t));
CHECK(coords_dim1[0] == 1);
CHECK(coords_dim2[0] == 2);
CHECK(coords_dim1[1] == 1);
CHECK(coords_dim2[1] == 4);
CHECK(coords_dim1[2] == 4);
CHECK(coords_dim2[2] == 2);
CHECK(coords_dim1[3] == 3);
CHECK(coords_dim2[3] == 3);
CHECK(coords_dim1[4] == 3);
CHECK(coords_dim2[4] == 4);
CHECK(coords_dim1[5] == 1);
CHECK(coords_dim2[5] == 2);
CHECK(coords_dim1[6] == 1);
CHECK(coords_dim2[6] == 4);

// Close array
CHECK(tiledb_array_close(ctx, array) == TILEDB_OK);
Expand Down Expand Up @@ -3376,32 +3358,48 @@ TEST_CASE_METHOD(
REQUIRE(rc == TILEDB_OK);
REQUIRE(status == TILEDB_INCOMPLETE);

CHECK(a1_size == 2 * sizeof(int));
CHECK(a1[0] == 1);
CHECK(a1[1] == 2);
CHECK(coords_size == 2 * sizeof(uint64_t));
CHECK(coords_dim1[0] == 1);
CHECK(coords_dim2[0] == 2);
CHECK(coords_dim1[1] == 1);
CHECK(coords_dim2[1] == 4);
if (use_refactored_sparse_global_order_reader()) {
CHECK(a1_size == 3 * sizeof(int));
check_counts(a1, 3, {0, 1, 1, 0, 0, 1});
CHECK(coords_size == 3 * sizeof(uint64_t));
check_counts(coords_dim1, 3, {0, 2, 0, 0, 1});
check_counts(coords_dim2, 3, {0, 0, 2, 0, 1});
} else {
CHECK(a1_size == 2 * sizeof(int));
CHECK(a1[0] == 1);
CHECK(a1[1] == 2);
CHECK(coords_size == 2 * sizeof(uint64_t));
CHECK(coords_dim1[0] == 1);
CHECK(coords_dim2[0] == 2);
CHECK(coords_dim1[1] == 1);
CHECK(coords_dim2[1] == 4);
}

rc = tiledb_query_submit(ctx, query);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_get_status(ctx_, query, &status);
REQUIRE(rc == TILEDB_OK);
REQUIRE(status == TILEDB_COMPLETED);

CHECK(a1_size == 3 * sizeof(int));
CHECK(a1[0] == 5);
CHECK(a1[1] == 6);
CHECK(a1[2] == 7);
CHECK(coords_size == 3 * sizeof(uint64_t));
CHECK(coords_dim1[0] == 4);
CHECK(coords_dim2[0] == 2);
CHECK(coords_dim1[1] == 3);
CHECK(coords_dim2[1] == 3);
CHECK(coords_dim1[2] == 3);
CHECK(coords_dim2[2] == 4);
if (use_refactored_sparse_global_order_reader()) {
CHECK(a1_size == 2 * sizeof(int));
check_counts(a1, 2, {0, 0, 0, 0, 0, 0, 1, 1});
CHECK(coords_size == 2 * sizeof(uint64_t));
check_counts(coords_dim1, 2, {0, 0, 0, 2});
check_counts(coords_dim2, 2, {0, 0, 0, 1, 1});
} else {
CHECK(a1_size == 3 * sizeof(int));
CHECK(a1[0] == 5);
CHECK(a1[1] == 6);
CHECK(a1[2] == 7);
CHECK(coords_size == 3 * sizeof(uint64_t));
CHECK(coords_dim1[0] == 4);
CHECK(coords_dim2[0] == 2);
CHECK(coords_dim1[1] == 3);
CHECK(coords_dim2[1] == 3);
CHECK(coords_dim1[2] == 3);
CHECK(coords_dim2[2] == 4);
}

// Close array
CHECK(tiledb_array_close(ctx, array) == TILEDB_OK);
Expand Down Expand Up @@ -3478,22 +3476,10 @@ TEST_CASE_METHOD(
REQUIRE(status == TILEDB_COMPLETED);

CHECK(a1_size == 5 * sizeof(int));
CHECK(a1[0] == 1);
CHECK(a1[1] == 2);
CHECK(a1[2] == 5);
CHECK(a1[3] == 6);
CHECK(a1[4] == 7);
check_counts(a1, 5, {0, 1, 1, 0, 0, 1, 1, 1});
CHECK(coords_size == 5 * sizeof(uint64_t));
CHECK(coords_dim1[0] == 1);
CHECK(coords_dim2[0] == 2);
CHECK(coords_dim1[1] == 1);
CHECK(coords_dim2[1] == 4);
CHECK(coords_dim1[2] == 4);
CHECK(coords_dim2[2] == 2);
CHECK(coords_dim1[3] == 3);
CHECK(coords_dim2[3] == 3);
CHECK(coords_dim1[4] == 3);
CHECK(coords_dim2[4] == 4);
check_counts(coords_dim1, 5, {0, 2, 0, 2, 1});
check_counts(coords_dim2, 5, {0, 0, 2, 1, 2});

// Close array
CHECK(tiledb_array_close(ctx, array) == TILEDB_OK);
Expand Down
12 changes: 5 additions & 7 deletions test/src/unit-cppapi-hilbert.cc
Expand Up @@ -377,16 +377,14 @@ TEST_CASE(
CHECK_NOTHROW(query_r.submit());
CHECK(query_r.query_status() == tiledb::Query::Status::COMPLETE);
// check number of results
CHECK(query_r.result_buffer_elements()["a"].second == 6);
uint64_t num = query_r.result_buffer_elements()["a"].second;
CHECK(num == 6);
array_r.close();

// Check results
std::vector<int32_t> c_buff_a = {2, 3, 1, 2, 4, 1, 0};
std::vector<int32_t> c_buff_d1 = {1, 1, 4, 1, 5, 4, 0};
std::vector<int32_t> c_buff_d2 = {3, 1, 2, 3, 4, 2, 0};
CHECK(r_buff_a == c_buff_a);
CHECK(r_buff_d1 == c_buff_d1);
CHECK(r_buff_d2 == c_buff_d2);
check_counts(r_buff_a.data(), num, {0, 2, 2, 1, 1});
check_counts(r_buff_d1.data(), num, {0, 3, 0, 0, 2, 1});
check_counts(r_buff_d2.data(), num, {0, 1, 2, 2, 1});
}

// Remove array
Expand Down
46 changes: 36 additions & 10 deletions test/src/unit-cppapi-subarray.cc
Expand Up @@ -926,16 +926,29 @@ TEST_CASE(
REQUIRE(st == tiledb::Query::Status::INCOMPLETE);
auto result_elts = query.result_buffer_elements();
auto result_num = result_elts["rows"].second;
REQUIRE(result_num == 1);
REQUIRE(data[0] == 'l');

if (use_refactored_sparse_global_order_reader()) {
REQUIRE(result_num == 2);
REQUIRE(data[0] == 'l');
REQUIRE(data[1] == 'l');
} else {
REQUIRE(result_num == 1);
REQUIRE(data[0] == 'l');
}

st = query.submit();
REQUIRE(st == tiledb::Query::Status::COMPLETE);
result_elts = query.result_buffer_elements();
result_num = result_elts["rows"].second;
REQUIRE(result_num == 2);
REQUIRE(data[0] == 'l');
REQUIRE(data[1] == 'm');

if (use_refactored_sparse_global_order_reader()) {
REQUIRE(result_num == 1);
REQUIRE(data[0] == 'm');
} else {
REQUIRE(result_num == 2);
REQUIRE(data[0] == 'l');
REQUIRE(data[1] == 'm');
}

// Close array.
array.close();
Expand Down Expand Up @@ -1013,17 +1026,30 @@ TEST_CASE(
REQUIRE(st == tiledb::Query::Status::INCOMPLETE);
auto result_elts = query.result_buffer_elements();
auto result_num = result_elts["rows"].second;
REQUIRE(result_num == 1);
REQUIRE(data[0] == 'l');

if (use_refactored_sparse_global_order_reader()) {
REQUIRE(result_num == 2);
REQUIRE(data[0] == 'l');
REQUIRE(data[1] == 'l');
} else {
REQUIRE(result_num == 1);
REQUIRE(data[0] == 'l');
}

query.set_subarray(subarray);
st = query.submit();
REQUIRE(st == tiledb::Query::Status::COMPLETE);
result_elts = query.result_buffer_elements();
result_num = result_elts["rows"].second;
REQUIRE(result_num == 2);
REQUIRE(data[0] == 'l');
REQUIRE(data[1] == 'm');

if (use_refactored_sparse_global_order_reader()) {
REQUIRE(result_num == 1);
REQUIRE(data[0] == 'm');
} else {
REQUIRE(result_num == 2);
REQUIRE(data[0] == 'l');
REQUIRE(data[1] == 'm');
}

// Close array.
array.close();
Expand Down
10 changes: 5 additions & 5 deletions test/src/unit-result-coords.cc
Expand Up @@ -58,7 +58,7 @@ struct CResultCoordsFx {
CResultCoordsFx();
~CResultCoordsFx();

GlobalOrderResultTile make_tile_with_num_cells(uint64_t num_cells);
GlobalOrderResultTile<uint8_t> make_tile_with_num_cells(uint64_t num_cells);
};

CResultCoordsFx::CResultCoordsFx() {
Expand Down Expand Up @@ -116,9 +116,9 @@ CResultCoordsFx::~CResultCoordsFx() {
tiledb_vfs_free(&vfs_);
}

GlobalOrderResultTile CResultCoordsFx::make_tile_with_num_cells(
GlobalOrderResultTile<uint8_t> CResultCoordsFx::make_tile_with_num_cells(
uint64_t num_cells) {
GlobalOrderResultTile result_tile(
GlobalOrderResultTile<uint8_t> result_tile(
0, 0, array_->array_->array_schema_latest());
auto tile_tuple = result_tile.tile_tuple(constants::coords);
Tile* const tile = &std::get<0>(*tile_tuple);
Expand All @@ -145,8 +145,8 @@ class Cmp {
}

bool operator()(
const GlobalOrderResultCoords& a,
const GlobalOrderResultCoords& b) const {
const GlobalOrderResultCoords<uint8_t>& a,
const GlobalOrderResultCoords<uint8_t>& b) const {
if (a.pos_ == b.pos_) {
return true;
}
Expand Down
6 changes: 4 additions & 2 deletions test/src/unit-sparse-global-order-reader.cc
Expand Up @@ -451,7 +451,8 @@ TEST_CASE_METHOD(

// Check the internal loop count against expected value.
auto stats =
((sm::SparseGlobalOrderReader*)query->query_->strategy())->stats();
((sm::SparseGlobalOrderReader<uint8_t>*)query->query_->strategy())
->stats();
REQUIRE(stats != nullptr);
auto counters = stats->counters();
REQUIRE(counters != nullptr);
Expand Down Expand Up @@ -646,7 +647,8 @@ TEST_CASE_METHOD(

// Check the internal loop count against expected value.
auto stats =
((sm::SparseGlobalOrderReader*)query->query_->strategy())->stats();
((sm::SparseGlobalOrderReader<uint8_t>*)query->query_->strategy())
->stats();
REQUIRE(stats != nullptr);
auto counters = stats->counters();
REQUIRE(counters != nullptr);
Expand Down

0 comments on commit d1abd13

Please sign in to comment.