Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid alias duplicates in PASTE JOIN and fix aliases requirement #58654

Merged
merged 19 commits into from Jan 22, 2024
Merged
36 changes: 36 additions & 0 deletions src/Analyzer/Passes/QueryAnalysisPass.cpp
Expand Up @@ -1207,6 +1207,8 @@ class QueryAnalyzer

static void validateJoinTableExpressionWithoutAlias(const QueryTreeNodePtr & join_node, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope);

static void checkDuplicateTableNamesOrAlias(QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope);

static std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into);

static void expandGroupByAll(QueryNode & query_tree_node_typed);
Expand Down Expand Up @@ -2243,6 +2245,9 @@ void QueryAnalyzer::validateJoinTableExpressionWithoutAlias(const QueryTreeNodeP
if (table_expression_has_alias)
return;

if (join_node->as<JoinNode &>().getKind() == JoinKind::Paste)
yariks5s marked this conversation as resolved.
Show resolved Hide resolved
return;

auto * query_node = table_expression_node->as<QueryNode>();
auto * union_node = table_expression_node->as<UnionNode>();
if ((query_node && !query_node->getCTEName().empty()) || (union_node && !union_node->getCTEName().empty()))
Expand Down Expand Up @@ -6777,6 +6782,34 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
}
}

void QueryAnalyzer::checkDuplicateTableNamesOrAlias(QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope)
{
if (!scope.context->getSettingsRef().joined_subquery_requires_alias)
return;

if (join_node->as<JoinNode &>().getKind() != JoinKind::Paste)
return;

Names column_names;
auto * left_node = left_table_expr->as<QueryNode>();
auto * right_node = right_table_expr->as<QueryNode>();

for (const auto & name_and_type : left_node->getProjectionColumns())
column_names.push_back(name_and_type.name);
for (const auto & name_and_type : right_node->getProjectionColumns())
column_names.push_back(name_and_type.name);

if (column_names.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Names of projection columns cannot be empty");

std::sort(column_names.begin(), column_names.end());
for (size_t i = 0; i < column_names.size() - 1; i++) // Check if there is not any duplicates because it will lead to broken result
yariks5s marked this conversation as resolved.
Show resolved Hide resolved
if (column_names[i] == column_names[i+1])
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Name of columns and aliases should be unique for this query (you can add/change aliases so they will not be duplicated)"
"While processing '{}'", join_node->formatASTForErrorMessage());
}

/// Resolve join node in scope
void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor)
{
Expand All @@ -6788,6 +6821,9 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS
resolveQueryJoinTreeNode(join_node_typed.getRightTableExpression(), scope, expressions_visitor);
validateJoinTableExpressionWithoutAlias(join_node, join_node_typed.getRightTableExpression(), scope);

if (!join_node_typed.getLeftTableExpression()->hasAlias() && !join_node_typed.getRightTableExpression()->hasAlias())
checkDuplicateTableNamesOrAlias(join_node, join_node_typed.getLeftTableExpression(), join_node_typed.getRightTableExpression(), scope);

if (join_node_typed.isOnJoinExpression())
{
expressions_visitor.visit(join_node_typed.getJoinExpression());
Expand Down
1 change: 1 addition & 0 deletions src/Parsers/ExpressionElementParsers.cpp
Expand Up @@ -1451,6 +1451,7 @@ const char * ParserAlias::restricted_keywords[] =
"ASOF",
"BETWEEN",
"CROSS",
"PASTE",
Copy link
Member

@vdimir vdimir Jan 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw, I recently though about it and found out that it can be breaking change and we need to mention it properly in a changelog. Because paste is parsed as a keyword instead of alias name. wee need to consider which queries may stop work if someone uses paste as subquery name: (SELECT ... ) paste ..., (SELECT ... ) AS paste ... and so on, and explain it in a changelog. Maybe even something already changed when we introduced PASTE join, but for me it became clear only after we found this thing with restricted_keywords

"FINAL",
"FORMAT",
"FROM",
Expand Down
6 changes: 6 additions & 0 deletions tests/queries/0_stateless/02933_paste_join.reference
Expand Up @@ -82,3 +82,9 @@ UInt64
7 2
8 1
9 0
0 0
1 1
2 2
3 3
4 4
5 5
7 changes: 6 additions & 1 deletion tests/queries/0_stateless/02933_paste_join.sql
@@ -1,6 +1,6 @@
select * from (SELECT number as a FROM numbers(10)) t1 PASTE JOIN (select number as a from numbers(10)) t2;
select * from (SELECT number as a FROM numbers(10)) t1 PASTE JOIN (select number as a from numbers(10) order by a desc) t2;
create table if not exists test (num UInt64) engine=Memory;
create table if not exists test (number UInt64) engine=Memory;
insert into test select number from numbers(6);
insert into test select number from numbers(5);
SELECT * FROM (SELECT 1) t1 PASTE JOIN (SELECT 2) SETTINGS joined_subquery_requires_alias=0;
Expand Down Expand Up @@ -35,3 +35,8 @@ SET max_threads = 2;
select * from (SELECT number as a FROM numbers_mt(10)) t1 PASTE JOIN (select number as a from numbers(10) ORDER BY a DESC) t2 SETTINGS max_block_size=10;
select * from (SELECT number as a FROM numbers(10)) t1 ANY PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR }
select * from (SELECT number as a FROM numbers(10)) t1 ALL PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR }

SELECT * FROM (SELECT number FROM test) PASTE JOIN (SELECT number FROM numbers(10) ORDER BY number DESC ) SETTINGS joined_subquery_requires_alias = 1, allow_experimental_analyzer = 1; -- { serverError BAD_ARGUMENTS }
TRUNCATE TABLE test;
INSERT INTO test SELECT number from numbers(6);
SELECT * FROM (SELECT number FROM test) PASTE JOIN (SELECT number FROM numbers(6) ORDER BY number) SETTINGS joined_subquery_requires_alias = 0;