Skip to content

Commit

Permalink
Merge pull request #53511 from Enmk/fix-ReplacingMergeTree-do_not_mer…
Browse files Browse the repository at this point in the history
…ge_across_partitions_select_final

Fixed SELECTing from ReplacingMergeTree with do_not_merge_across_partitions_select_final
  • Loading branch information
alexey-milovidov committed Aug 28, 2023
2 parents cba1d66 + f896c63 commit b7d8ebf
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/Processors/QueryPlan/ReadFromMergeTree.cpp
Expand Up @@ -1038,7 +1038,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
/// MergeTreeReadPool and MergeTreeThreadSelectProcessor for parallel select.
if (num_streams > 1 && settings.do_not_merge_across_partitions_select_final &&
std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 &&
parts_to_merge_ranges[range_index]->data_part->info.level > 0)
parts_to_merge_ranges[range_index]->data_part->info.level > 0
&& data.merging_params.is_deleted_column.empty())
{
sum_marks_in_lonely_parts += parts_to_merge_ranges[range_index]->getMarksCount();
lonely_parts.push_back(std::move(*parts_to_merge_ranges[range_index]));
Expand Down Expand Up @@ -1094,7 +1095,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
/// with level > 0 then we won't postprocess this part
if (settings.do_not_merge_across_partitions_select_final &&
std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 &&
parts_to_merge_ranges[range_index]->data_part->info.level > 0)
parts_to_merge_ranges[range_index]->data_part->info.level > 0 &&
data.merging_params.is_deleted_column.empty())
{
partition_pipes.emplace_back(Pipe::unitePipes(std::move(pipes)));
continue;
Expand Down
@@ -0,0 +1,31 @@
--- Based on https://github.com/ClickHouse/ClickHouse/issues/49685
--- Verify that ReplacingMergeTree properly handles _is_deleted:
--- SELECT FINAL should take `_is_deleted` into consideration when there is only one partition.
-- { echoOn }

DROP TABLE IF EXISTS t;
CREATE TABLE t
(
`account_id` UInt64,
`_is_deleted` UInt8,
`_version` UInt64
)
ENGINE = ReplacingMergeTree(_version, _is_deleted)
ORDER BY (account_id);
INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3);
-- Mark the first 100 rows as deleted.
INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2);
-- Put everything in one partition
OPTIMIZE TABLE t FINAL;
SELECT count() FROM t;
1000
SELECT count() FROM t FINAL;
900
-- Both should produce the same number of rows.
-- Previously, `do_not_merge_across_partitions_select_final = 1` showed more rows,
-- as if no rows were deleted.
SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1;
900
SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0;
900
DROP TABLE t;
@@ -0,0 +1,32 @@
--- Based on https://github.com/ClickHouse/ClickHouse/issues/49685
--- Verify that ReplacingMergeTree properly handles _is_deleted:
--- SELECT FINAL should take `_is_deleted` into consideration when there is only one partition.
-- { echoOn }

DROP TABLE IF EXISTS t;
CREATE TABLE t
(
`account_id` UInt64,
`_is_deleted` UInt8,
`_version` UInt64
)
ENGINE = ReplacingMergeTree(_version, _is_deleted)
ORDER BY (account_id);

INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3);
-- Mark the first 100 rows as deleted.
INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2);

-- Put everything in one partition
OPTIMIZE TABLE t FINAL;

SELECT count() FROM t;
SELECT count() FROM t FINAL;

-- Both should produce the same number of rows.
-- Previously, `do_not_merge_across_partitions_select_final = 1` showed more rows,
-- as if no rows were deleted.
SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1;
SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0;

DROP TABLE t;

0 comments on commit b7d8ebf

Please sign in to comment.