diff --git a/src/Interpreters/BloomFilter.cpp b/src/Interpreters/BloomFilter.cpp index 99d7e256cf19..7bf50a0312be 100644 --- a/src/Interpreters/BloomFilter.cpp +++ b/src/Interpreters/BloomFilter.cpp @@ -41,8 +41,15 @@ BloomFilter::BloomFilter(const BloomFilterParameters & params) BloomFilter::BloomFilter(size_t size_, size_t hashes_, size_t seed_) : size(size_), hashes(hashes_), seed(seed_), words((size + sizeof(UnderType) - 1) / sizeof(UnderType)), filter(words, 0) { - assert(size != 0); - assert(hashes != 0); + chassert(size != 0); + chassert(hashes != 0); +} + +void BloomFilter::resize(size_t size_) +{ + size = size_; + words = ((size + sizeof(UnderType) - 1) / sizeof(UnderType)); + filter.resize(words); } bool BloomFilter::find(const char * data, size_t len) diff --git a/src/Interpreters/BloomFilter.h b/src/Interpreters/BloomFilter.h index 1fb9895cc27b..8ebdfd879e62 100644 --- a/src/Interpreters/BloomFilter.h +++ b/src/Interpreters/BloomFilter.h @@ -37,6 +37,7 @@ class BloomFilter /// seed -- random seed for hash functions generation. BloomFilter(size_t size_, size_t hashes_, size_t seed_); + void resize(size_t size_); bool find(const char * data, size_t len); void add(const char * data, size_t len); void clear(); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 419950c30370..be5e7c5a938f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1676,7 +1676,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark) { if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin) - granule = reader.read(); + reader.read(granule); auto ann_condition = std::dynamic_pointer_cast(condition); if (ann_condition != nullptr) @@ -1794,7 +1794,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex( { for (size_t i = 0; i < readers.size(); ++i) { - granules[i] = readers[i]->read(); + readers[i]->read(granules[i]); granules_filled = true; } } diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h index bc16f096be70..fbfa0fd27fc4 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -25,8 +25,8 @@ struct MergeTreeIndexGranuleFullText final : public IMergeTreeIndexGranule bool empty() const override { return !has_elems; } - String index_name; - BloomFilterParameters params; + const String index_name; + const BloomFilterParameters params; std::vector bloom_filters; bool has_elems; diff --git a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp index 7db3aa3a6b13..8355cac8033a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp @@ -16,10 +16,11 @@ namespace ErrorCodes } MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(size_t bits_per_row_, size_t hash_functions_, size_t index_columns_) - : bits_per_row(bits_per_row_), hash_functions(hash_functions_) + : bits_per_row(bits_per_row_), hash_functions(hash_functions_), bloom_filters(index_columns_) { total_rows = 0; - bloom_filters.resize(index_columns_); + for (size_t column = 0; column < index_columns_; ++column) + bloom_filters[column] = std::make_shared(bits_per_row, hash_functions, 0); } MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter( @@ -55,8 +56,6 @@ bool MergeTreeIndexGranuleBloomFilter::empty() const void MergeTreeIndexGranuleBloomFilter::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) { - if (!empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot read data to a non-empty bloom filter index."); if (version != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index version {}.", version); @@ -67,7 +66,7 @@ void MergeTreeIndexGranuleBloomFilter::deserializeBinary(ReadBuffer & istr, Merg size_t read_size = bytes_size; for (auto & filter : bloom_filters) { - filter = std::make_shared(bytes_size, hash_functions, 0); + filter->resize(bytes_size); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ read_size = filter->getFilter().size() * sizeof(BloomFilter::UnderType); #endif diff --git a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h index 35335f5d0d22..a3434daa5a43 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h @@ -22,9 +22,10 @@ class MergeTreeIndexGranuleBloomFilter final : public IMergeTreeIndexGranule const std::vector & getFilters() const { return bloom_filters; } private: + const size_t bits_per_row; + const size_t hash_functions; + size_t total_rows = 0; - size_t bits_per_row; - size_t hash_functions; std::vector bloom_filters; void fillingBloomFilter(BloomFilterPtr & bf, const HashSet & hashes) const; diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp index f04c20dc50e7..908fc98728e7 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -73,11 +73,11 @@ void MergeTreeIndexGranuleInverted::deserializeBinary(ReadBuffer & istr, MergeTr { size_serialization->deserializeBinary(field_rows, istr, {}); size_t filter_size = field_rows.get(); + gin_filter.getFilter().resize(filter_size); if (filter_size == 0) continue; - gin_filter.getFilter().assign(filter_size, {}); istr.readStrict(reinterpret_cast(gin_filter.getFilter().data()), filter_size * sizeof(GinSegmentWithRowIdRangeVector::value_type)); } has_elems = true; diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.h b/src/Storages/MergeTree/MergeTreeIndexInverted.h index 96d12128bb45..86bf7613646e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.h +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.h @@ -24,8 +24,8 @@ struct MergeTreeIndexGranuleInverted final : public IMergeTreeIndexGranule bool empty() const override { return !has_elems; } - String index_name; - GinFilterParameters params; + const String index_name; + const GinFilterParameters params; GinFilters gin_filters; bool has_elems; }; diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h index 085ccf19aec4..4517a1953621 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -25,8 +25,9 @@ struct MergeTreeIndexGranuleMinMax final : public IMergeTreeIndexGranule bool empty() const override { return hyperrectangle.empty(); } - String index_name; - Block index_sample_block; + const String index_name; + const Block index_sample_block; + std::vector hyperrectangle; }; diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp index ab1fd8dfa746..f0b1aacc239f 100644 --- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -69,11 +69,12 @@ void MergeTreeIndexReader::seek(size_t mark) stream->seekToMark(mark); } -MergeTreeIndexGranulePtr MergeTreeIndexReader::read() +void MergeTreeIndexReader::read(MergeTreeIndexGranulePtr & granule) { - auto granule = index->createIndexGranule(); + if (granule == nullptr) + granule = index->createIndexGranule(); + granule->deserializeBinary(*stream->getDataBuffer(), version); - return granule; } } diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.h b/src/Storages/MergeTree/MergeTreeIndexReader.h index 799dae154bfd..664c7684b7be 100644 --- a/src/Storages/MergeTree/MergeTreeIndexReader.h +++ b/src/Storages/MergeTree/MergeTreeIndexReader.h @@ -23,7 +23,7 @@ class MergeTreeIndexReader void seek(size_t mark); - MergeTreeIndexGranulePtr read(); + void read(MergeTreeIndexGranulePtr & granule); private: MergeTreeIndexPtr index; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 7516e6f6984e..022988c24536 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -35,9 +35,10 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule ~MergeTreeIndexGranuleSet() override = default; - String index_name; - size_t max_rows; - Block index_sample_block; + const String index_name; + const size_t max_rows; + const Block index_sample_block; + Block block; }; diff --git a/tests/performance/min_max_index.xml b/tests/performance/min_max_index.xml new file mode 100644 index 000000000000..b7b5d4fb991b --- /dev/null +++ b/tests/performance/min_max_index.xml @@ -0,0 +1,11 @@ + + CREATE TABLE index_test (z UInt32, INDEX i_x (mortonDecode(2, z).1) TYPE minmax, INDEX i_y (mortonDecode(2, z).2) TYPE minmax) ENGINE = MergeTree ORDER BY z + + INSERT INTO index_test SELECT number FROM numbers(0x100000000) WHERE rand() % 3 = 1 + + = 20000 AND mortonDecode(2, z).1 <= 20100 AND mortonDecode(2, z).2 >= 10000 AND mortonDecode(2, z).2 <= 10100 + ]]> + + DROP TABLE IF EXISTS index_test +