Skip to content

Commit

Permalink
Merge pull request #51734 from ClickHouse/backport/23.6/51610
Browse files Browse the repository at this point in the history
Backport #51610 to 23.6: Fix for moving 'IN' conditions to PREWHERE
  • Loading branch information
davenger committed Jul 11, 2023
2 parents 750445d + cc5d82a commit c834992
Show file tree
Hide file tree
Showing 13 changed files with 126 additions and 13 deletions.
3 changes: 3 additions & 0 deletions src/Processors/QueryPlan/Optimizations/Optimizations.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ namespace QueryPlanOptimizations
void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes);
/// Second pass is used to apply read-in-order and attach a predicate to PK.
void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes);
/// Third pass is used to apply filters such as key conditions and skip indexes to the storages that support them.
/// After that it add CreateSetsStep for the subqueries that has not be used in the filters.
void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes);

/// Optimization (first pass) is a function applied to QueryPlan::Node.
/// It can read and update subtree of specified node.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ void optimizePrimaryKeyCondition(const Stack & stack)
else
break;
}

source_step_with_filter->onAddFilterFinish();
}

}
31 changes: 30 additions & 1 deletion src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
optimizePrewhere(stack, nodes);
optimizePrimaryKeyCondition(stack);
enableMemoryBoundMerging(*stack.back().node, nodes);
addPlansForSets(*stack.back().node, nodes);

stack.pop_back();
}
Expand All @@ -178,5 +177,35 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
"No projection is used when optimize_use_projections = 1 and force_optimize_projection = 1");
}

void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
{
Stack stack;
stack.push_back({.node = &root});

while (!stack.empty())
{
/// NOTE: frame cannot be safely used after stack was modified.
auto & frame = stack.back();

/// Traverse all children first.
if (frame.next_child < frame.node->children.size())
{
auto next_frame = Frame{.node = frame.node->children[frame.next_child]};
++frame.next_child;
stack.push_back(next_frame);
continue;
}

if (auto * source_step_with_filter = dynamic_cast<SourceStepWithFilter *>(frame.node->step.get()))
{
source_step_with_filter->applyFilters();
}

addPlansForSets(*frame.node, nodes);

stack.pop_back();
}
}

}
}
1 change: 1 addition & 0 deletions src/Processors/QueryPlan/QueryPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_sett

QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes);
QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes);
QueryPlanOptimizations::optimizeTreeThirdPass(*root, nodes);

updateDataStreams(*root);
}
Expand Down
9 changes: 3 additions & 6 deletions src/Processors/QueryPlan/ReadFromMergeTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1299,13 +1299,10 @@ static void buildIndexes(
indexes->skip_indexes = std::move(skip_indexes);
}

void ReadFromMergeTree::onAddFilterFinish()
void ReadFromMergeTree::applyFilters()
{
if (!filter_nodes.nodes.empty())
{
auto filter_actions_dag = buildFilterDAG(context, prewhere_info, filter_nodes, query_info);
buildIndexes(indexes, filter_actions_dag, data, context, query_info, metadata_for_reading);
}
auto filter_actions_dag = buildFilterDAG(context, prewhere_info, filter_nodes, query_info);
buildIndexes(indexes, filter_actions_dag, data, context, query_info, metadata_for_reading);
}

MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
Expand Down
2 changes: 1 addition & 1 deletion src/Processors/QueryPlan/ReadFromMergeTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ class ReadFromMergeTree final : public SourceStepWithFilter
size_t getNumStreams() const { return requested_num_streams; }
bool isParallelReadingEnabled() const { return read_task_callback != std::nullopt; }

void onAddFilterFinish() override;
void applyFilters() override;

private:
static MergeTreeDataSelectAnalysisResultPtr selectRangesToReadImpl(
Expand Down
3 changes: 2 additions & 1 deletion src/Processors/QueryPlan/SourceStepWithFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ class SourceStepWithFilter : public ISourceStep
filter_dags.push_back(std::move(filter_dag));
}

virtual void onAddFilterFinish() {}
/// Apply filters that can optimize reading from storage.
virtual void applyFilters() {}

protected:
std::vector<ActionsDAGPtr> filter_dags;
Expand Down
3 changes: 3 additions & 0 deletions src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & colu
if (node.isConstant())
return;

if (node.isSubqueryOrSet())
return;

if (!node.isFunction())
{
auto column_name = node.getColumnName();
Expand Down
15 changes: 15 additions & 0 deletions src/Storages/MergeTree/RPNBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,21 @@ bool RPNBuilderTreeNode::isConstant() const
}
}

bool RPNBuilderTreeNode::isSubqueryOrSet() const
{
if (ast_node)
{
return
typeid_cast<const ASTSubquery *>(ast_node) ||
typeid_cast<const ASTTableIdentifier *>(ast_node);
}
else
{
const auto * node_without_alias = getNodeWithoutAlias(dag_node);
return node_without_alias->result_type->getTypeId() == TypeIndex::Set;
}
}

ColumnWithTypeAndName RPNBuilderTreeNode::getConstantColumn() const
{
if (!isConstant())
Expand Down
2 changes: 2 additions & 0 deletions src/Storages/MergeTree/RPNBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ class RPNBuilderTreeNode
/// Is node constant
bool isConstant() const;

bool isSubqueryOrSet() const;

/** Get constant as constant column.
* Node must be constant before calling these method, otherwise logical exception is thrown.
*/
Expand Down
4 changes: 2 additions & 2 deletions src/Storages/System/StorageSystemZooKeeper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ class ReadFromSystemZooKeeper final : public SourceStepWithFilter

void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override;

void onAddFilterFinish() override;
void applyFilters() override;

private:
void fillData(MutableColumns & res_columns);
Expand Down Expand Up @@ -421,7 +421,7 @@ static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, Cont
}


void ReadFromSystemZooKeeper::onAddFilterFinish()
void ReadFromSystemZooKeeper::applyFilters()
{
paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper);
}
Expand Down
8 changes: 8 additions & 0 deletions tests/queries/0_stateless/02809_prewhere_and_in.reference
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
PREWHERE a IN
PREWHERE a IN
PREWHERE a IN
PREWHERE a IN
PREWHERE b NOT IN
PREWHERE b NOT IN
PREWHERE b NOT IN
PREWHERE b NOT IN
56 changes: 56 additions & 0 deletions tests/queries/0_stateless/02809_prewhere_and_in.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
DROP TABLE IF EXISTS t_02809;

CREATE TABLE t_02809(a Int64, b Int64, s String)
ENGINE=MergeTree order by tuple()
AS SELECT number, number%10, toString(arrayMap(i-> cityHash64(i*number), range(50))) FROM numbers(10000);

CREATE TABLE t_02809_set(c Int64)
ENGINE=Set()
AS SELECT * FROM numbers(10);

CREATE TABLE t_02809_aux(c Int64)
ENGINE=Memory()
AS SELECT * FROM numbers(10);


SET optimize_move_to_prewhere=1;

-- Queries with 'IN'
SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE a IN (SELECT * FROM system.one)
) WHERE explain LIKE '%WHERE%';

SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE a IN (1,2,3)
) WHERE explain LIKE '%WHERE%';

SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE a IN t_02809_set
) WHERE explain LIKE '%WHERE%';

SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE a IN t_02809_aux
) WHERE explain LIKE '%WHERE%';


-- Queries with 'NOT IN'
SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE b NOT IN (SELECT * FROM system.one)
) WHERE explain LIKE '%WHERE%';

SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE b NOT IN (1,2,3)
) WHERE explain LIKE '%WHERE%';

SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE b NOT IN t_02809_set
) WHERE explain LIKE '%WHERE%';

SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX
SELECT * FROM t_02809 WHERE b NOT IN t_02809_aux
) WHERE explain LIKE '%WHERE%';


DROP TABLE t_02809;
DROP TABLE t_02809_set;
DROP TABLE t_02809_aux;

0 comments on commit c834992

Please sign in to comment.