Skip to content

Commit

Permalink
Fix PREWHERE with FINAL (forbid reusing expression calculated on PREW…
Browse files Browse the repository at this point in the history
…HERE step)

In case of FINAL the result of the expression for PREWHERE can be
changed after applying merge algorithm.

Note, analyzer does not has this bug because it simply does not reuse
columns from PREWHERE (they does not passed via
GlobalPlannerContext::createColumnIdentifier())

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
v2: update tests references (see https://s3.amazonaws.com/clickhouse-test-reports/54436/baaa688f40fcd064952f62dc8c62ddc752dd0046/fast_test.html)
  • Loading branch information
azat committed Sep 8, 2023
1 parent 0095124 commit c5918fc
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 9 deletions.
11 changes: 6 additions & 5 deletions src/Interpreters/ExpressionAnalyzer.cpp
Expand Up @@ -1213,11 +1213,12 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
/// Reasons:
/// 1. Remove remove source columns which are used only in prewhere actions during prewhere actions execution.
/// Example: select A prewhere B > 0. B can be removed at prewhere step.
/// 2. Store side columns which were calculated during prewhere actions execution if they are used.
/// Example: select F(A) prewhere F(A) > 0. F(A) can be saved from prewhere step.
/// 2. Do NOT store side columns which were calculated during prewhere actions execution if they are used.
/// Example: select F(A) prewhere F(A) > 0. F(A) CANNOT be saved from prewhere step,
/// since it can be changed after applying merge algorithm.
/// 3. Check if we can remove filter column at prewhere step. If we can, action will store single REMOVE_COLUMN.
ColumnsWithTypeAndName columns = prewhere_actions->getResultColumns();
auto required_columns = prewhere_actions->getRequiredColumns();

NameSet prewhere_input_names;
NameSet unused_source_columns;

Expand All @@ -1228,13 +1229,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
{
if (!prewhere_input_names.contains(column.name))
{
columns.emplace_back(column.type, column.name);
required_columns.emplace_back(NameAndTypePair{column.name, column.type});
unused_source_columns.emplace(column.name);
}
}

chain.steps.emplace_back(
std::make_unique<ExpressionActionsChain::ExpressionActionsStep>(std::make_shared<ActionsDAG>(std::move(columns))));
std::make_unique<ExpressionActionsChain::ExpressionActionsStep>(std::make_shared<ActionsDAG>(std::move(required_columns))));
chain.steps.back()->additional_input = std::move(unused_source_columns);
chain.getLastActions();
chain.addStep();
Expand Down
Expand Up @@ -111,13 +111,13 @@ select * from (explain plan actions = 1 select * from tab where (a + b) * c = 8
select * from tab where d + 1 = 2 order by (d + 1) * 4, (a + b) * c;
1 1 1 1
1 1 1 1
select * from (explain plan actions = 1 select * from tab where d + 1 = 2 order by (d + 1) * 4, (a + b) * c) where explain ilike '%sort description%';
select * from (explain plan actions = 1 select * from tab where d + 1 = 2 order by (d + 1) * 4, (a + b) * c settings optimize_move_to_prewhere=0) where explain ilike '%sort description%';
Prefix sort description: multiply(plus(d, 1), 4) ASC, multiply(plus(a, b), c) ASC
Result sort description: multiply(plus(d, 1), 4) ASC, multiply(plus(a, b), c) ASC
select * from tab where d + 1 = 3 and (a + b) = 4 and c = 2 order by (d + 1) * 4, sin(a / b);
2 2 2 2
2 2 2 2
select * from (explain plan actions = 1 select * from tab where d + 1 = 3 and (a + b) = 4 and c = 2 order by (d + 1) * 4, sin(a / b)) where explain ilike '%sort description%';
select * from (explain plan actions = 1 select * from tab where d + 1 = 3 and (a + b) = 4 and c = 2 order by (d + 1) * 4, sin(a / b) settings optimize_move_to_prewhere=0) where explain ilike '%sort description%';
Prefix sort description: multiply(plus(d, 1), 4) ASC, sin(divide(a, b)) ASC
Result sort description: multiply(plus(d, 1), 4) ASC, sin(divide(a, b)) ASC
-- Wrong order with fixed point
Expand Down
Expand Up @@ -42,10 +42,10 @@ select * from tab where (a + b) * c = 8 order by sin(a / b);
select * from (explain plan actions = 1 select * from tab where (a + b) * c = 8 order by sin(a / b)) where explain ilike '%sort description%';

select * from tab where d + 1 = 2 order by (d + 1) * 4, (a + b) * c;
select * from (explain plan actions = 1 select * from tab where d + 1 = 2 order by (d + 1) * 4, (a + b) * c) where explain ilike '%sort description%';
select * from (explain plan actions = 1 select * from tab where d + 1 = 2 order by (d + 1) * 4, (a + b) * c settings optimize_move_to_prewhere=0) where explain ilike '%sort description%';

select * from tab where d + 1 = 3 and (a + b) = 4 and c = 2 order by (d + 1) * 4, sin(a / b);
select * from (explain plan actions = 1 select * from tab where d + 1 = 3 and (a + b) = 4 and c = 2 order by (d + 1) * 4, sin(a / b)) where explain ilike '%sort description%';
select * from (explain plan actions = 1 select * from tab where d + 1 = 3 and (a + b) = 4 and c = 2 order by (d + 1) * 4, sin(a / b) settings optimize_move_to_prewhere=0) where explain ilike '%sort description%';

-- Wrong order with fixed point
select * from tab where (a + b) * c = 8 order by sin(b / a);
Expand Down
Empty file.
9 changes: 9 additions & 0 deletions tests/queries/0_stateless/02872_prewhere_filter.sql
@@ -0,0 +1,9 @@
drop table if exists data;

create table data (key Int, val1 SimpleAggregateFunction(max, Nullable(Int)), val2 SimpleAggregateFunction(min, Int)) engine=AggregatingMergeTree() order by key;
system stop merges data;

insert into data values (1,10,100);
insert into data values (1,20,10);

select key, val1, val2, assumeNotNull(val1) > val2 x1, val1 > val2 x2 from data final prewhere assumeNotNull(val1) > 0 where x1 != x2 settings max_threads=1;

0 comments on commit c5918fc

Please sign in to comment.