Skip to content

Commit

Permalink
Merge pull request #55683 from amosbird/issue-55653
Browse files Browse the repository at this point in the history
Reuse granule during skip index reading
  • Loading branch information
alexey-milovidov committed Oct 29, 2023
2 parents 8b57822 + 602f01f commit 64b6e68
Show file tree
Hide file tree
Showing 13 changed files with 47 additions and 25 deletions.
11 changes: 9 additions & 2 deletions src/Interpreters/BloomFilter.cpp
Expand Up @@ -41,8 +41,15 @@ BloomFilter::BloomFilter(const BloomFilterParameters & params)
BloomFilter::BloomFilter(size_t size_, size_t hashes_, size_t seed_)
: size(size_), hashes(hashes_), seed(seed_), words((size + sizeof(UnderType) - 1) / sizeof(UnderType)), filter(words, 0)
{
assert(size != 0);
assert(hashes != 0);
chassert(size != 0);
chassert(hashes != 0);
}

void BloomFilter::resize(size_t size_)
{
size = size_;
words = ((size + sizeof(UnderType) - 1) / sizeof(UnderType));
filter.resize(words);
}

bool BloomFilter::find(const char * data, size_t len)
Expand Down
1 change: 1 addition & 0 deletions src/Interpreters/BloomFilter.h
Expand Up @@ -37,6 +37,7 @@ class BloomFilter
/// seed -- random seed for hash functions generation.
BloomFilter(size_t size_, size_t hashes_, size_t seed_);

void resize(size_t size_);
bool find(const char * data, size_t len);
void add(const char * data, size_t len);
void clear();
Expand Down
4 changes: 2 additions & 2 deletions src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
Expand Up @@ -1676,7 +1676,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark)
{
if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin)
granule = reader.read();
reader.read(granule);

auto ann_condition = std::dynamic_pointer_cast<IMergeTreeIndexConditionApproximateNearestNeighbor>(condition);
if (ann_condition != nullptr)
Expand Down Expand Up @@ -1794,7 +1794,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
{
for (size_t i = 0; i < readers.size(); ++i)
{
granules[i] = readers[i]->read();
readers[i]->read(granules[i]);
granules_filled = true;
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/Storages/MergeTree/MergeTreeIndexFullText.h
Expand Up @@ -25,8 +25,8 @@ struct MergeTreeIndexGranuleFullText final : public IMergeTreeIndexGranule

bool empty() const override { return !has_elems; }

String index_name;
BloomFilterParameters params;
const String index_name;
const BloomFilterParameters params;

std::vector<BloomFilter> bloom_filters;
bool has_elems;
Expand Down
9 changes: 4 additions & 5 deletions src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp
Expand Up @@ -16,10 +16,11 @@ namespace ErrorCodes
}

MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(size_t bits_per_row_, size_t hash_functions_, size_t index_columns_)
: bits_per_row(bits_per_row_), hash_functions(hash_functions_)
: bits_per_row(bits_per_row_), hash_functions(hash_functions_), bloom_filters(index_columns_)
{
total_rows = 0;
bloom_filters.resize(index_columns_);
for (size_t column = 0; column < index_columns_; ++column)
bloom_filters[column] = std::make_shared<BloomFilter>(bits_per_row, hash_functions, 0);
}

MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(
Expand Down Expand Up @@ -55,8 +56,6 @@ bool MergeTreeIndexGranuleBloomFilter::empty() const

void MergeTreeIndexGranuleBloomFilter::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version)
{
if (!empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot read data to a non-empty bloom filter index.");
if (version != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index version {}.", version);

Expand All @@ -67,7 +66,7 @@ void MergeTreeIndexGranuleBloomFilter::deserializeBinary(ReadBuffer & istr, Merg
size_t read_size = bytes_size;
for (auto & filter : bloom_filters)
{
filter = std::make_shared<BloomFilter>(bytes_size, hash_functions, 0);
filter->resize(bytes_size);
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
read_size = filter->getFilter().size() * sizeof(BloomFilter::UnderType);
#endif
Expand Down
5 changes: 3 additions & 2 deletions src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h
Expand Up @@ -22,9 +22,10 @@ class MergeTreeIndexGranuleBloomFilter final : public IMergeTreeIndexGranule
const std::vector<BloomFilterPtr> & getFilters() const { return bloom_filters; }

private:
const size_t bits_per_row;
const size_t hash_functions;

size_t total_rows = 0;
size_t bits_per_row;
size_t hash_functions;
std::vector<BloomFilterPtr> bloom_filters;

void fillingBloomFilter(BloomFilterPtr & bf, const HashSet<UInt64> & hashes) const;
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/MergeTreeIndexInverted.cpp
Expand Up @@ -73,11 +73,11 @@ void MergeTreeIndexGranuleInverted::deserializeBinary(ReadBuffer & istr, MergeTr
{
size_serialization->deserializeBinary(field_rows, istr, {});
size_t filter_size = field_rows.get<size_t>();
gin_filter.getFilter().resize(filter_size);

if (filter_size == 0)
continue;

gin_filter.getFilter().assign(filter_size, {});
istr.readStrict(reinterpret_cast<char *>(gin_filter.getFilter().data()), filter_size * sizeof(GinSegmentWithRowIdRangeVector::value_type));
}
has_elems = true;
Expand Down
4 changes: 2 additions & 2 deletions src/Storages/MergeTree/MergeTreeIndexInverted.h
Expand Up @@ -24,8 +24,8 @@ struct MergeTreeIndexGranuleInverted final : public IMergeTreeIndexGranule

bool empty() const override { return !has_elems; }

String index_name;
GinFilterParameters params;
const String index_name;
const GinFilterParameters params;
GinFilters gin_filters;
bool has_elems;
};
Expand Down
5 changes: 3 additions & 2 deletions src/Storages/MergeTree/MergeTreeIndexMinMax.h
Expand Up @@ -25,8 +25,9 @@ struct MergeTreeIndexGranuleMinMax final : public IMergeTreeIndexGranule

bool empty() const override { return hyperrectangle.empty(); }

String index_name;
Block index_sample_block;
const String index_name;
const Block index_sample_block;

std::vector<Range> hyperrectangle;
};

Expand Down
7 changes: 4 additions & 3 deletions src/Storages/MergeTree/MergeTreeIndexReader.cpp
Expand Up @@ -69,11 +69,12 @@ void MergeTreeIndexReader::seek(size_t mark)
stream->seekToMark(mark);
}

MergeTreeIndexGranulePtr MergeTreeIndexReader::read()
void MergeTreeIndexReader::read(MergeTreeIndexGranulePtr & granule)
{
auto granule = index->createIndexGranule();
if (granule == nullptr)
granule = index->createIndexGranule();

granule->deserializeBinary(*stream->getDataBuffer(), version);
return granule;
}

}
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/MergeTreeIndexReader.h
Expand Up @@ -23,7 +23,7 @@ class MergeTreeIndexReader

void seek(size_t mark);

MergeTreeIndexGranulePtr read();
void read(MergeTreeIndexGranulePtr & granule);

private:
MergeTreeIndexPtr index;
Expand Down
7 changes: 4 additions & 3 deletions src/Storages/MergeTree/MergeTreeIndexSet.h
Expand Up @@ -35,9 +35,10 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule

~MergeTreeIndexGranuleSet() override = default;

String index_name;
size_t max_rows;
Block index_sample_block;
const String index_name;
const size_t max_rows;
const Block index_sample_block;

Block block;
};

Expand Down
11 changes: 11 additions & 0 deletions tests/performance/min_max_index.xml
@@ -0,0 +1,11 @@
<test>
<create_query>CREATE TABLE index_test (z UInt32, INDEX i_x (mortonDecode(2, z).1) TYPE minmax, INDEX i_y (mortonDecode(2, z).2) TYPE minmax) ENGINE = MergeTree ORDER BY z</create_query>

<fill_query>INSERT INTO index_test SELECT number FROM numbers(0x100000000) WHERE rand() % 3 = 1</fill_query>

<query><![CDATA[
SELECT count() FROM index_test WHERE mortonDecode(2, z).1 >= 20000 AND mortonDecode(2, z).1 <= 20100 AND mortonDecode(2, z).2 >= 10000 AND mortonDecode(2, z).2 <= 10100
]]></query>

<drop_query>DROP TABLE IF EXISTS index_test</drop_query>
</test>

0 comments on commit 64b6e68

Please sign in to comment.