diff --git a/tiledb/sm/array/array.h b/tiledb/sm/array/array.h index d9f0c8b6f48..fffcee67387 100644 --- a/tiledb/sm/array/array.h +++ b/tiledb/sm/array/array.h @@ -764,6 +764,7 @@ class Array { * has not been computed or loaded it will be loaded first */ const NDRange non_empty_domain(); + /** * Retrieves the array metadata object that is already loaded. If it's not yet * loaded it will be empty. diff --git a/tiledb/sm/query/readers/sparse_global_order_reader.cc b/tiledb/sm/query/readers/sparse_global_order_reader.cc index e2ddeb8ce51..8eebf95e5e1 100644 --- a/tiledb/sm/query/readers/sparse_global_order_reader.cc +++ b/tiledb/sm/query/readers/sparse_global_order_reader.cc @@ -119,6 +119,8 @@ Status SparseGlobalOrderReader::dowork() { auto timer_se = stats_->start_timer("dowork"); stats_->add_counter("loop_num", 1); + subarray_.reset_default_ranges(); + // Check that the query condition is valid. if (condition_.has_value()) { throw_if_not_ok(condition_->check(array_schema_)); diff --git a/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc b/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc index e5740481c82..1364d2cedb2 100644 --- a/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc +++ b/tiledb/sm/query/readers/sparse_unordered_with_dups_reader.cc @@ -122,12 +122,13 @@ void SparseUnorderedWithDupsReader::refresh_config() { template Status SparseUnorderedWithDupsReader::dowork() { - // Subarray is not known to be explicitly set until buffers are deserialized - include_coords_ = subarray_.is_set(); - auto timer_se = stats_->start_timer("dowork"); stats_->add_counter("loop_num", 1); + // Subarray is not known to be explicitly set until buffers are deserialized + subarray_.reset_default_ranges(); + include_coords_ = subarray_.is_set(); + // Make sure user didn't request delete timestamps. if (buffers_.count(constants::delete_timestamps) != 0) { return logger_->status(Status_SparseUnorderedWithDupsReaderError( diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 4f4d6e62f5e..8cff533f0ec 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -2131,6 +2131,29 @@ void Subarray::add_default_label_ranges(dimension_size_type dim_num) { label_range_subset_.resize(dim_num, nullopt); } +void Subarray::reset_default_ranges() { + if (array_->non_empty_domain_computed()) { + auto dim_num = array_->array_schema_latest().dim_num(); + auto& domain{array_->array_schema_latest().domain()}; + + // Process all dimensions one by one. + for (unsigned d = 0; d < dim_num; d++) { + // Only enter the check if there are only one range set on the dimension. + if (!is_default_[d] && range_subset_[d].num_ranges() == 1) { + // If the range set is the same as the non empty domain. + auto& ned = array_->non_empty_domain()[d]; + if (ned == range_subset_[d][0]) { + // Reset the default flag and reset the range subset to be default. + is_default_[d] = true; + auto dim{domain.dimension_ptr(d)}; + range_subset_[d] = RangeSetAndSuperset( + dim->type(), dim->domain(), true, coalesce_ranges_); + } + } + } + } +} + void Subarray::compute_range_offsets() { range_offsets_.clear(); diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index 4b2fa5563f0..665f0b3a859 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -1306,6 +1306,17 @@ class Subarray { */ void add_default_label_ranges(dimension_size_type dim_num); + /** + * Reset ranges to default if possible before a read operation for sparse + * reads. We have a lot of optimizations in the sparse readers when no ranges + * are specified. Python will set ranges that are equal to the non empty + * domain, which will negate those optimizations. When the non empty domain is + * computed for the array, it is low performance cost to see if the ranges set + * are equal to the non empty domain. If they are, we can reset them to be + * default. + */ + void reset_default_ranges(); + private: /* ********************************* */ /* PRIVATE DATA TYPES */