Skip to content

Commit

Permalink
Merge pull request #53616 from wzb5212/skip_idx_bug_fix
Browse files Browse the repository at this point in the history
Fix number of dropped granules in EXPLAIN PLAN index=1
  • Loading branch information
rschu1ze committed Aug 24, 2023
2 parents a190efe + 8365a36 commit 45d924c
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 36 deletions.
26 changes: 5 additions & 21 deletions src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
Expand Up @@ -981,22 +981,21 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
const auto & index_and_condition = skip_indexes.useful_indices[idx];
auto & stat = useful_indices_stat[idx];
stat.total_parts.fetch_add(1, std::memory_order_relaxed);
stat.total_granules.fetch_add(ranges.ranges.getNumberOfMarks(), std::memory_order_relaxed);
size_t total_granules = ranges.ranges.getNumberOfMarks();
stat.total_granules.fetch_add(total_granules, std::memory_order_relaxed);

size_t granules_dropped = 0;
ranges.ranges = filterMarksUsingIndex(
index_and_condition.index,
index_and_condition.condition,
part,
ranges.ranges,
settings,
reader_settings,
granules_dropped,
mark_cache.get(),
uncompressed_cache.get(),
log);

stat.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed);
stat.granules_dropped.fetch_add(total_granules - ranges.ranges.getNumberOfMarks(), std::memory_order_relaxed);
if (ranges.ranges.empty())
stat.parts_dropped.fetch_add(1, std::memory_order_relaxed);
}
Expand All @@ -1010,17 +1009,15 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
auto & stat = merged_indices_stat[idx];
stat.total_parts.fetch_add(1, std::memory_order_relaxed);

size_t total_granules = 0;
size_t granules_dropped = 0;
size_t total_granules = ranges.ranges.getNumberOfMarks();
ranges.ranges = filterMarksUsingMergedIndex(
indices_and_condition.indices, indices_and_condition.condition,
part, ranges.ranges,
settings, reader_settings,
total_granules, granules_dropped,
mark_cache.get(), uncompressed_cache.get(), log);

stat.total_granules.fetch_add(total_granules, std::memory_order_relaxed);
stat.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed);
stat.granules_dropped.fetch_add(total_granules - ranges.ranges.getNumberOfMarks(), std::memory_order_relaxed);

if (ranges.ranges.empty())
stat.parts_dropped.fetch_add(1, std::memory_order_relaxed);
Expand Down Expand Up @@ -1578,7 +1575,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings,
size_t & granules_dropped,
MarkCache * mark_cache,
UncompressedCache * uncompressed_cache,
Poco::Logger * log)
Expand Down Expand Up @@ -1648,8 +1644,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
{
// vector of indexes of useful ranges
auto result = ann_condition->getUsefulRanges(granule);
if (result.empty())
++granules_dropped;

for (auto range : result)
{
Expand All @@ -1674,10 +1668,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
result = cache_in_store.store ? gin_filter_condition->mayBeTrueOnGranuleInPart(granule, cache_in_store) : true;

if (!result)
{
++granules_dropped;
continue;
}

MarkRange data_range(
std::max(ranges[i].begin, index_mark * index_granularity),
Expand All @@ -1702,8 +1693,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings,
size_t & total_granules,
size_t & granules_dropped,
MarkCache * mark_cache,
UncompressedCache * uncompressed_cache,
Poco::Logger * log)
Expand Down Expand Up @@ -1760,8 +1749,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
for (auto & reader : readers)
reader->seek(index_range.begin);

total_granules += index_range.end - index_range.begin;

for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark)
{
if (index_mark != index_range.begin || !granules_filled || last_index_mark != index_range.begin)
Expand All @@ -1774,10 +1761,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
}

if (!condition->mayBeTrueOnGranule(granules))
{
++granules_dropped;
continue;
}

MarkRange data_range(
std::max(range.begin, index_mark * index_granularity),
Expand Down
3 changes: 0 additions & 3 deletions src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
Expand Up @@ -93,7 +93,6 @@ class MergeTreeDataSelectExecutor
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings,
size_t & granules_dropped,
MarkCache * mark_cache,
UncompressedCache * uncompressed_cache,
Poco::Logger * log);
Expand All @@ -105,8 +104,6 @@ class MergeTreeDataSelectExecutor
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings,
size_t & total_granules,
size_t & granules_dropped,
MarkCache * mark_cache,
UncompressedCache * uncompressed_cache,
Poco::Logger * log);
Expand Down
8 changes: 4 additions & 4 deletions tests/queries/0_stateless/01786_explain_merge_tree.reference
Expand Up @@ -24,12 +24,12 @@
Name: t_minmax
Description: minmax GRANULARITY 2
Parts: 1/2
Granules: 4/6
Granules: 3/6
Skip
Name: t_set
Description: set GRANULARITY 2
Parts: 1/1
Granules: 2/4
Granules: 2/3
-----------------
"Node Type": "ReadFromMergeTree",
"Description": "default.test_index",
Expand Down Expand Up @@ -68,15 +68,15 @@
"Initial Parts": 2,
"Selected Parts": 1,
"Initial Granules": 6,
"Selected Granules": 4
"Selected Granules": 3
},
{
"Type": "Skip",
"Name": "t_set",
"Description": "set GRANULARITY 2",
"Initial Parts": 1,
"Selected Parts": 1,
"Initial Granules": 4,
"Initial Granules": 3,
"Selected Granules": 2
}
]
Expand Down
8 changes: 4 additions & 4 deletions tests/queries/0_stateless/02354_annoy_index.reference
Expand Up @@ -94,7 +94,7 @@ Expression ((Projection + Before ORDER BY))
Name: annoy_index
Description: annoy GRANULARITY 2
Parts: 0/1
Granules: 2/4
Granules: 0/4
ORDER BY type, L2Distance, check that index is used
Expression (Projection)
Limit (preliminary LIMIT (without OFFSET))
Expand All @@ -110,7 +110,7 @@ Expression (Projection)
Name: annoy_index
Description: annoy GRANULARITY 2
Parts: 1/1
Granules: 4/4
Granules: 2/4
--- Test with Array, GRANULARITY = 4, index_granularity = 4 ---
WHERE type, L2Distance, check that index is used
Expression ((Projection + Before ORDER BY))
Expand All @@ -125,7 +125,7 @@ Expression ((Projection + Before ORDER BY))
Name: annoy_index
Description: annoy GRANULARITY 4
Parts: 0/1
Granules: 3/4
Granules: 0/4
ORDER BY type, L2Distance, check that index is used
Expression (Projection)
Limit (preliminary LIMIT (without OFFSET))
Expand All @@ -141,7 +141,7 @@ Expression (Projection)
Name: annoy_index
Description: annoy GRANULARITY 4
Parts: 1/1
Granules: 4/4
Granules: 1/4
--- Test correctness of Annoy index with > 1 mark
1 [1,0,0,0]
9000 [9000,0,0,0]
Expand Down
8 changes: 4 additions & 4 deletions tests/queries/0_stateless/02354_usearch_index.reference
Expand Up @@ -93,7 +93,7 @@ Expression ((Projection + Before ORDER BY))
Name: usearch_index
Description: usearch GRANULARITY 2
Parts: 0/1
Granules: 2/4
Granules: 0/4
ORDER BY type, L2Distance, check that index is used
Expression (Projection)
Limit (preliminary LIMIT (without OFFSET))
Expand All @@ -109,7 +109,7 @@ Expression (Projection)
Name: usearch_index
Description: usearch GRANULARITY 2
Parts: 1/1
Granules: 4/4
Granules: 2/4
--- Test with Array, GRANULARITY = 4, index_granularity = 4 ---
WHERE type, L2Distance, check that index is used
Expression ((Projection + Before ORDER BY))
Expand All @@ -124,7 +124,7 @@ Expression ((Projection + Before ORDER BY))
Name: usearch_index
Description: usearch GRANULARITY 4
Parts: 0/1
Granules: 3/4
Granules: 0/4
ORDER BY type, L2Distance, check that index is used
Expression (Projection)
Limit (preliminary LIMIT (without OFFSET))
Expand All @@ -140,4 +140,4 @@ Expression (Projection)
Name: usearch_index
Description: usearch GRANULARITY 4
Parts: 1/1
Granules: 4/4
Granules: 1/4
@@ -0,0 +1,28 @@
Expression ((Project names + Projection))
Filter ((WHERE + Change column names to column identifiers))
ReadFromMergeTree (default.test_skip_idx)
Indexes:
Skip
Name: name_idx_g2
Description: minmax GRANULARITY 2
Parts: 1/1
Granules: 2/5
Skip
Name: name_idx_g1
Description: minmax GRANULARITY 1
Parts: 1/1
Granules: 1/2
Expression ((Project names + Projection))
Filter ((WHERE + Change column names to column identifiers))
ReadFromMergeTree (default.test_skip_idx)
Indexes:
Skip
Name: name_idx_g2
Description: minmax GRANULARITY 2
Parts: 1/1
Granules: 2/5
Skip
Name: name_idx_g1
Description: minmax GRANULARITY 1
Parts: 1/1
Granules: 2/2
24 changes: 24 additions & 0 deletions tests/queries/0_stateless/02866_size_of_marks_skip_idx_explain.sql
@@ -0,0 +1,24 @@
-- Tags: no-random-merge-tree-settings

SET optimize_move_to_prewhere = 1;
SET convert_query_to_cnf = 0;
SET optimize_read_in_order = 1;

SET allow_experimental_analyzer = 1; -- slightly different operator names than w/o

DROP TABLE IF EXISTS test_skip_idx;

CREATE TABLE test_skip_idx (
id UInt32,
INDEX name_idx_g2 id TYPE minmax GRANULARITY 2,
INDEX name_idx_g1 id TYPE minmax GRANULARITY 1)
ENGINE = MergeTree
ORDER BY tuple()
SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;

INSERT INTO test_skip_idx SELECT number FROM system.numbers LIMIT 5 OFFSET 1;

EXPLAIN indexes = 1 SELECT * FROM test_skip_idx WHERE id < 2;
EXPLAIN indexes = 1 SELECT * FROM test_skip_idx WHERE id < 3;

DROP TABLE test_skip_idx;

0 comments on commit 45d924c

Please sign in to comment.