From 09cd955aa69453f6a272f21cbd41f7ab2ec1e2ba Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 28 Sep 2021 12:46:34 +0300 Subject: [PATCH 1/3] Remove filter column from HAVING when it is not needed. --- .../2025_having_filter_column.reference | 0 .../0_stateless/2025_having_filter_column.sql | 40 +++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 tests/queries/0_stateless/2025_having_filter_column.reference create mode 100644 tests/queries/0_stateless/2025_having_filter_column.sql diff --git a/tests/queries/0_stateless/2025_having_filter_column.reference b/tests/queries/0_stateless/2025_having_filter_column.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/2025_having_filter_column.sql b/tests/queries/0_stateless/2025_having_filter_column.sql new file mode 100644 index 000000000000..aab419adc160 --- /dev/null +++ b/tests/queries/0_stateless/2025_having_filter_column.sql @@ -0,0 +1,40 @@ +drop table if exists test; + +-- #29010 +CREATE TABLE test +( + d DateTime, + a String, + b UInt64 +) +ENGINE = MergeTree +PARTITION BY toDate(d) +ORDER BY d; + +SELECT * +FROM ( + SELECT + a, + max((d, b)).2 AS value + FROM test + GROUP BY rollup(a) +) +WHERE a <> ''; + +-- the same query, but after syntax optimization +SELECT + a, + value +FROM +( + SELECT + a, + max((d, b)).2 AS value + FROM test + GROUP BY a + WITH ROLLUP + HAVING a != '' +) +WHERE a != ''; + +drop table if exists test; From 1a4ee1f77f95d8e8e4db737a9f88988ed0d666dc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 28 Sep 2021 13:01:47 +0300 Subject: [PATCH 2/3] Remove filter column from HAVING when it is not needed. --- src/Interpreters/ExpressionAnalyzer.cpp | 38 ++++++++++++------- src/Interpreters/ExpressionAnalyzer.h | 9 ++++- src/Interpreters/InterpreterSelectQuery.cpp | 13 ++++--- src/Interpreters/InterpreterSelectQuery.h | 4 +- src/Processors/QueryPlan/TotalsHavingStep.cpp | 10 ++++- src/Processors/QueryPlan/TotalsHavingStep.h | 2 + .../Transforms/TotalsHavingTransform.cpp | 21 +++++++++- .../Transforms/TotalsHavingTransform.h | 4 +- 8 files changed, 75 insertions(+), 26 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 1dd63695ad4d..182553cb91bb 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1481,18 +1481,15 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( const Settings & settings = context->getSettingsRef(); const ConstStoragePtr & storage = query_analyzer.storage(); - bool finalized = false; - size_t where_step_num = 0; + ssize_t prewhere_step_num = -1; + ssize_t where_step_num = -1; + ssize_t having_step_num = -1; auto finalize_chain = [&](ExpressionActionsChain & chain) { chain.finalize(); - if (!finalized) - { - finalize(chain, where_step_num, query); - finalized = true; - } + finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); chain.clear(); }; @@ -1523,6 +1520,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere)) { + /// Prewhere is always the first one. + prewhere_step_num = 0; prewhere_info = std::make_shared(actions, query.prewhere()->getColumnName()); if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings)) @@ -1591,6 +1590,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (query_analyzer.appendHaving(chain, only_types || !second_stage)) { + having_step_num = chain.steps.size() - 1; before_having = chain.getLastActions(); chain.addStep(); } @@ -1691,13 +1691,16 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( checkActions(); } -void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query) +void ExpressionAnalysisResult::finalize( + const ExpressionActionsChain & chain, + ssize_t & prewhere_step_num, + ssize_t & where_step_num, + ssize_t & having_step_num, + const ASTSelectQuery & query) { - size_t next_step_i = 0; - - if (hasPrewhere()) + if (prewhere_step_num >= 0) { - const ExpressionActionsChain::Step & step = *chain.steps.at(next_step_i++); + const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); prewhere_info->prewhere_actions->projectInput(false); NameSet columns_to_remove; @@ -1710,12 +1713,21 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si } columns_to_remove_after_prewhere = std::move(columns_to_remove); + prewhere_step_num = -1; } - if (hasWhere()) + if (where_step_num >= 0) { where_column_name = query.where()->getColumnName(); remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second; + where_step_num = -1; + } + + if (having_step_num >= 0) + { + having_column_name = query.having()->getColumnName(); + remove_having_filter = chain.steps.at(having_step_num)->required_output.find(having_column_name)->second; + having_step_num = -1; } } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 5e3a7af8e9ee..c785b085a570 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -229,6 +229,8 @@ struct ExpressionAnalysisResult ActionsDAGPtr before_where; ActionsDAGPtr before_aggregation; ActionsDAGPtr before_having; + String having_column_name; + bool remove_having_filter = false; ActionsDAGPtr before_window; ActionsDAGPtr before_order_by; ActionsDAGPtr before_limit_by; @@ -274,7 +276,12 @@ struct ExpressionAnalysisResult void removeExtraColumns() const; void checkActions() const; - void finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query); + void finalize( + const ExpressionActionsChain & chain, + ssize_t & prewhere_step_num, + ssize_t & where_step_num, + ssize_t & having_step_num, + const ASTSelectQuery & query); }; /// SelectQuery specific ExpressionAnalyzer part. diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 5fe9948f857f..e5adb03e08ef 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1248,7 +1248,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu { bool final = !query.group_by_with_rollup && !query.group_by_with_cube; executeTotalsAndHaving( - query_plan, expressions.hasHaving(), expressions.before_having, aggregate_overflow_row, final); + query_plan, expressions.hasHaving(), expressions.before_having, expressions.remove_having_filter, aggregate_overflow_row, final); } if (query.group_by_with_rollup) @@ -1262,11 +1262,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu throw Exception( "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING", ErrorCodes::NOT_IMPLEMENTED); - executeHaving(query_plan, expressions.before_having); + executeHaving(query_plan, expressions.before_having, expressions.remove_having_filter); } } else if (expressions.hasHaving()) - executeHaving(query_plan, expressions.before_having); + executeHaving(query_plan, expressions.before_having, expressions.remove_having_filter); } else if (query.group_by_with_totals || query.group_by_with_rollup || query.group_by_with_cube) throw Exception("WITH TOTALS, ROLLUP or CUBE are not supported without aggregation", ErrorCodes::NOT_IMPLEMENTED); @@ -2133,10 +2133,10 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool } -void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression) +void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter) { auto having_step - = std::make_unique(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), false); + = std::make_unique(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), remove_filter); having_step->setStepDescription("HAVING"); query_plan.addStep(std::move(having_step)); @@ -2144,7 +2144,7 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions void InterpreterSelectQuery::executeTotalsAndHaving( - QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool overflow_row, bool final) + QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final) { const Settings & settings = context->getSettingsRef(); @@ -2153,6 +2153,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving( overflow_row, expression, has_having ? getSelectQuery().having()->getColumnName() : "", + remove_filter, settings.totals_mode, settings.totals_auto_threshold, final); diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index aec3b0b8bd38..99c95a8d6248 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -131,8 +131,8 @@ class InterpreterSelectQuery : public IInterpreterUnionOrSelectQuery void executeAggregation( QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info); void executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final); - void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool overflow_row, bool final); - void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression); + void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final); + void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter); static void executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description); /// FIXME should go through ActionsDAG to behave as a proper function void executeWindow(QueryPlan & query_plan); diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index db82538d5a0f..4cac12639a89 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -30,6 +30,7 @@ TotalsHavingStep::TotalsHavingStep( bool overflow_row_, const ActionsDAGPtr & actions_dag_, const std::string & filter_column_, + bool remove_filter_, TotalsMode totals_mode_, double auto_include_threshold_, bool final_) @@ -38,11 +39,14 @@ TotalsHavingStep::TotalsHavingStep( TotalsHavingTransform::transformHeader( input_stream_.header, actions_dag_.get(), + filter_column_, + remove_filter_, final_), getTraits(!filter_column_.empty())) , overflow_row(overflow_row_) , actions_dag(actions_dag_) , filter_column_name(filter_column_) + , remove_filter(remove_filter_) , totals_mode(totals_mode_) , auto_include_threshold(auto_include_threshold_) , final(final_) @@ -58,6 +62,7 @@ void TotalsHavingStep::transformPipeline(QueryPipeline & pipeline, const BuildQu overflow_row, expression_actions, filter_column_name, + remove_filter, totals_mode, auto_include_threshold, final); @@ -85,7 +90,10 @@ static String totalsModeToString(TotalsMode totals_mode, double auto_include_thr void TotalsHavingStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, ' '); - settings.out << prefix << "Filter column: " << filter_column_name << '\n'; + settings.out << prefix << "Filter column: " << filter_column_name; + if (remove_filter) + settings.out << " (removed)"; + settings.out << '\n'; settings.out << prefix << "Mode: " << totalsModeToString(totals_mode, auto_include_threshold) << '\n'; if (actions_dag) diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index bc053c96970b..1ad98a70a01f 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -18,6 +18,7 @@ class TotalsHavingStep : public ITransformingStep bool overflow_row_, const ActionsDAGPtr & actions_dag_, const std::string & filter_column_, + bool remove_filter_, TotalsMode totals_mode_, double auto_include_threshold_, bool final_); @@ -35,6 +36,7 @@ class TotalsHavingStep : public ITransformingStep bool overflow_row; ActionsDAGPtr actions_dag; String filter_column_name; + bool remove_filter; TotalsMode totals_mode; double auto_include_threshold; bool final; diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 9724d332f15e..8497b4b0069f 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -28,13 +28,22 @@ void finalizeChunk(Chunk & chunk) chunk.setColumns(std::move(columns), num_rows); } -Block TotalsHavingTransform::transformHeader(Block block, const ActionsDAG * expression, bool final) +Block TotalsHavingTransform::transformHeader( + Block block, + const ActionsDAG * expression, + const std::string & filter_column_name, + bool remove_filter, + bool final) { if (final) finalizeBlock(block); if (expression) + { block = expression->updateHeader(std::move(block)); + if (remove_filter) + block.erase(filter_column_name); + } return block; } @@ -44,13 +53,15 @@ TotalsHavingTransform::TotalsHavingTransform( bool overflow_row_, const ExpressionActionsPtr & expression_, const std::string & filter_column_, + bool remove_filter_, TotalsMode totals_mode_, double auto_include_threshold_, bool final_) - : ISimpleTransform(header, transformHeader(header, expression_ ? &expression_->getActionsDAG() : nullptr, final_), true) + : ISimpleTransform(header, transformHeader(header, expression_ ? &expression_->getActionsDAG() : nullptr, filter_column_, remove_filter_, final_), true) , overflow_row(overflow_row_) , expression(expression_) , filter_column_name(filter_column_) + , remove_filter(remove_filter_) , totals_mode(totals_mode_) , auto_include_threshold(auto_include_threshold_) , final(final_) @@ -67,6 +78,8 @@ TotalsHavingTransform::TotalsHavingTransform( auto totals_header = finalized_header; size_t num_rows = totals_header.rows(); expression->execute(totals_header, num_rows); + if (remove_filter) + totals_header.erase(filter_column_name); outputs.emplace_back(totals_header, this); } else @@ -167,6 +180,8 @@ void TotalsHavingTransform::transform(Chunk & chunk) } expression->execute(finalized_block, num_rows); + if (remove_filter) + finalized_block.erase(filter_column_name); auto columns = finalized_block.getColumns(); ColumnPtr filter_column_ptr = columns[filter_column_pos]; @@ -270,6 +285,8 @@ void TotalsHavingTransform::prepareTotals() size_t num_rows = totals.getNumRows(); auto block = finalized_header.cloneWithColumns(totals.detachColumns()); expression->execute(block, num_rows); + if (remove_filter) + block.erase(filter_column_name); /// Note: after expression totals may have several rows if `arrayJoin` was used in expression. totals = Chunk(block.getColumns(), num_rows); } diff --git a/src/Processors/Transforms/TotalsHavingTransform.h b/src/Processors/Transforms/TotalsHavingTransform.h index d42543d311ae..03635054c653 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.h +++ b/src/Processors/Transforms/TotalsHavingTransform.h @@ -28,6 +28,7 @@ class TotalsHavingTransform : public ISimpleTransform bool overflow_row_, const ExpressionActionsPtr & expression_, const std::string & filter_column_, + bool remove_filter_, TotalsMode totals_mode_, double auto_include_threshold_, bool final_); @@ -39,7 +40,7 @@ class TotalsHavingTransform : public ISimpleTransform Status prepare() override; void work() override; - static Block transformHeader(Block block, const ActionsDAG * expression, bool final); + static Block transformHeader(Block block, const ActionsDAG * expression, const std::string & filter_column_name, bool remove_filter, bool final); protected: void transform(Chunk & chunk) override; @@ -55,6 +56,7 @@ class TotalsHavingTransform : public ISimpleTransform bool overflow_row; ExpressionActionsPtr expression; String filter_column_name; + bool remove_filter; TotalsMode totals_mode; double auto_include_threshold; bool final; From ebe4cb0bbb86024e37db833dff1da1be9b351a6e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 28 Sep 2021 15:32:43 +0300 Subject: [PATCH 3/3] Fix tests. --- src/Processors/Transforms/TotalsHavingTransform.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 8497b4b0069f..c475b87e08f6 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -66,9 +66,6 @@ TotalsHavingTransform::TotalsHavingTransform( , auto_include_threshold(auto_include_threshold_) , final(final_) { - if (!filter_column_name.empty()) - filter_column_pos = outputs.front().getHeader().getPositionByName(filter_column_name); - finalized_header = getInputPort().getHeader(); finalizeBlock(finalized_header); @@ -78,12 +75,17 @@ TotalsHavingTransform::TotalsHavingTransform( auto totals_header = finalized_header; size_t num_rows = totals_header.rows(); expression->execute(totals_header, num_rows); + filter_column_pos = totals_header.getPositionByName(filter_column_name); if (remove_filter) totals_header.erase(filter_column_name); outputs.emplace_back(totals_header, this); } else + { + if (!filter_column_name.empty()) + filter_column_pos = finalized_header.getPositionByName(filter_column_name); outputs.emplace_back(finalized_header, this); + } /// Initialize current totals with initial state. current_totals.reserve(header.columns()); @@ -180,11 +182,11 @@ void TotalsHavingTransform::transform(Chunk & chunk) } expression->execute(finalized_block, num_rows); + ColumnPtr filter_column_ptr = finalized_block.getByPosition(filter_column_pos).column; if (remove_filter) finalized_block.erase(filter_column_name); auto columns = finalized_block.getColumns(); - ColumnPtr filter_column_ptr = columns[filter_column_pos]; ConstantFilterDescription const_filter_description(*filter_column_ptr); if (const_filter_description.always_true)