Skip to content

Commit

Permalink
Merge pull request #38708 from ClickHouse/backport/22.5/38371
Browse files Browse the repository at this point in the history
Backport #38371 to 22.5: Fix `DISTINCT` with `LIMIT` in distributed queries
  • Loading branch information
CurtizJ committed Jul 3, 2022
2 parents 2509804 + 381552f commit 44520b8
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/Interpreters/InterpreterSelectQuery.cpp
Expand Up @@ -1191,8 +1191,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
query_info.input_order_info ? query_info.input_order_info
: (query_info.projection ? query_info.projection->input_order_info : nullptr));

if (expressions.has_order_by && query.limitLength())
executeDistinct(query_plan, false, expressions.selected_columns, true);
/// pre_distinct = false, because if we have limit and distinct,
/// we need to merge streams to one and calculate overall distinct.
/// Otherwise we can take several equal values from different streams
/// according to limit and skip some distinct values.
if (query.limitLength())
executeDistinct(query_plan, false, expressions.selected_columns, false);

if (expressions.hasLimitBy())
{
Expand Down
@@ -0,0 +1,6 @@
-1
1
11
12
13
14
26 changes: 26 additions & 0 deletions tests/queries/0_stateless/02344_distinct_limit_distiributed.sql
@@ -0,0 +1,26 @@
drop table if exists t_distinct_limit;

create table t_distinct_limit (d Date, id Int64)
engine = MergeTree partition by toYYYYMM(d) order by d;

set max_threads = 10;

insert into t_distinct_limit select '2021-12-15', -1 from numbers(1e6);
insert into t_distinct_limit select '2021-12-15', -1 from numbers(1e6);
insert into t_distinct_limit select '2021-12-15', -1 from numbers(1e6);
insert into t_distinct_limit select '2022-12-15', 1 from numbers(1e6);
insert into t_distinct_limit select '2022-12-15', 1 from numbers(1e6);
insert into t_distinct_limit select '2022-12-16', 11 from numbers(1);
insert into t_distinct_limit select '2023-12-16', 12 from numbers(1);
insert into t_distinct_limit select '2023-12-16', 13 from numbers(1);
insert into t_distinct_limit select '2023-12-16', 14 from numbers(1);

set max_block_size = 1024;

select id from
(
select distinct id from remote('127.0.0.1,127.0.0.2', currentDatabase(),t_distinct_limit) limit 10
)
order by id;

drop table if exists t_distinct_limit;

0 comments on commit 44520b8

Please sign in to comment.