Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid alias duplicates in PASTE JOIN and fix aliases requirement #58654

Merged
merged 19 commits into from Jan 22, 2024
Merged
41 changes: 40 additions & 1 deletion src/Analyzer/Passes/QueryAnalysisPass.cpp
Expand Up @@ -1207,6 +1207,8 @@ class QueryAnalyzer

static void validateJoinTableExpressionWithoutAlias(const QueryTreeNodePtr & join_node, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope);

static void checkDuplicateTableNamesOrAlias(const QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope);

static std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into);

static void expandGroupByAll(QueryNode & query_tree_node_typed);
Expand Down Expand Up @@ -2249,7 +2251,13 @@ void QueryAnalyzer::validateJoinTableExpressionWithoutAlias(const QueryTreeNodeP
return;

auto table_expression_node_type = table_expression_node->getNodeType();
if (table_expression_node_type == QueryTreeNodeType::TABLE_FUNCTION ||
auto * join_typed = join_node->as<JoinNode>();
if (table_expression_node_type == QueryTreeNodeType::QUERY && join_typed->getKind() == JoinKind::Paste)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I already forgot what table_expression_node corresponds to. Why do we check only one node while checkDuplicateTableNamesOrAlias expects left and right expression to be query ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We check only one node each time because we pass into validateJoinTableExpressionWithoutAlias only one argument (either left or right), so we call this function two times for each of the values.

{
checkDuplicateTableNamesOrAlias(join_node, join_typed->getLeftTableExpression(), join_typed->getRightTableExpression(), scope);
return;
}
else if (table_expression_node_type == QueryTreeNodeType::TABLE_FUNCTION ||
table_expression_node_type == QueryTreeNodeType::QUERY ||
table_expression_node_type == QueryTreeNodeType::UNION)
throw Exception(ErrorCodes::ALIAS_REQUIRED,
Expand Down Expand Up @@ -6777,6 +6785,36 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
}
}

void QueryAnalyzer::checkDuplicateTableNamesOrAlias(const QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope)
{
Names column_names;
if (!scope.context->getSettingsRef().joined_subquery_requires_alias)
return;

if (join_node->as<JoinNode &>().getKind() != JoinKind::Paste)
return;

auto * left_node = left_table_expr->as<QueryNode>();
auto * right_node = right_table_expr->as<QueryNode>();

if (left_node)
for (const auto & name_and_type : left_node->getProjectionColumns())
column_names.push_back(name_and_type.name);
if (right_node)
for (const auto & name_and_type : right_node->getProjectionColumns())
column_names.push_back(name_and_type.name);

if (column_names.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Names of projection columns cannot be empty");

std::sort(column_names.begin(), column_names.end());
for (size_t i = 0; i < column_names.size() - 1; i++) // Check if there is not any duplicates because it will lead to broken result
yariks5s marked this conversation as resolved.
Show resolved Hide resolved
if (column_names[i] == column_names[i+1])
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Name of columns and aliases should be unique for this query (you can add/change aliases so they will not be duplicated)"
"While processing '{}'", join_node->formatASTForErrorMessage());
}

/// Resolve join node in scope
void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor)
{
Expand Down Expand Up @@ -7130,6 +7168,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
initializeQueryJoinTreeNode(query_node_typed.getJoinTree(), scope);
scope.alias_name_to_table_expression_node.clear();

Names column_names;
yariks5s marked this conversation as resolved.
Show resolved Hide resolved
resolveQueryJoinTreeNode(query_node_typed.getJoinTree(), scope, visitor);
}

Expand Down
1 change: 1 addition & 0 deletions src/Parsers/ExpressionElementParsers.cpp
Expand Up @@ -1451,6 +1451,7 @@ const char * ParserAlias::restricted_keywords[] =
"ASOF",
"BETWEEN",
"CROSS",
"PASTE",
Copy link
Member

@vdimir vdimir Jan 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw, I recently though about it and found out that it can be breaking change and we need to mention it properly in a changelog. Because paste is parsed as a keyword instead of alias name. wee need to consider which queries may stop work if someone uses paste as subquery name: (SELECT ... ) paste ..., (SELECT ... ) AS paste ... and so on, and explain it in a changelog. Maybe even something already changed when we introduced PASTE join, but for me it became clear only after we found this thing with restricted_keywords

"FINAL",
"FORMAT",
"FROM",
Expand Down
19 changes: 19 additions & 0 deletions tests/queries/0_stateless/02933_paste_join.reference
Expand Up @@ -82,3 +82,22 @@ UInt64
7 2
8 1
9 0
0 0
1 1
2 2
3 3
4 4
5 5
0 0
1 1
2 2
3 3
4 4
5 5
0 0
1 1
2 2
3 3
4 4
5 5
1 2 3
10 changes: 9 additions & 1 deletion tests/queries/0_stateless/02933_paste_join.sql
@@ -1,6 +1,6 @@
select * from (SELECT number as a FROM numbers(10)) t1 PASTE JOIN (select number as a from numbers(10)) t2;
select * from (SELECT number as a FROM numbers(10)) t1 PASTE JOIN (select number as a from numbers(10) order by a desc) t2;
create table if not exists test (num UInt64) engine=Memory;
create table if not exists test (number UInt64) engine=Memory;
insert into test select number from numbers(6);
insert into test select number from numbers(5);
SELECT * FROM (SELECT 1) t1 PASTE JOIN (SELECT 2) SETTINGS joined_subquery_requires_alias=0;
Expand Down Expand Up @@ -35,3 +35,11 @@ SET max_threads = 2;
select * from (SELECT number as a FROM numbers_mt(10)) t1 PASTE JOIN (select number as a from numbers(10) ORDER BY a DESC) t2 SETTINGS max_block_size=10;
select * from (SELECT number as a FROM numbers(10)) t1 ANY PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR }
select * from (SELECT number as a FROM numbers(10)) t1 ALL PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR }

TRUNCATE TABLE test;
INSERT INTO test SELECT number from numbers(6);
SELECT * FROM (SELECT number FROM test) PASTE JOIN (SELECT number FROM numbers(6) ORDER BY number) SETTINGS joined_subquery_requires_alias = 0;
SELECT * FROM (SELECT number FROM test PASTE JOIN (Select number FROM numbers(7))) PASTE JOIN (SELECT number FROM numbers(6) PASTE JOIN (SELECT number FROM test)) SETTINGS joined_subquery_requires_alias = 0;
SELECT * FROM (SELECT number FROM test PASTE JOIN (SELECT number FROM test PASTE JOIN (Select number FROM numbers(7)))) PASTE JOIN (SELECT number FROM numbers(6) PASTE JOIN (SELECT number FROM test)) SETTINGS joined_subquery_requires_alias = 0;
SELECT * FROM (SELECT 1 AS a) PASTE JOIN (SELECT 2 AS b) PASTE JOIN (SELECT 3 AS c) SETTINGS allow_experimental_analyzer = 1;
SELECT * FROM (SELECT 1 AS a) PASTE JOIN (SELECT 2 AS b) PASTE JOIN (SELECT 3 AS a) SETTINGS allow_experimental_analyzer = 1; -- { serverError AMBIGUOUS_COLUMN_NAME }