diff --git a/dbms/src/Core/Names.h b/dbms/src/Core/Names.h index 5c3384112aef..ff8252084acf 100644 --- a/dbms/src/Core/Names.h +++ b/dbms/src/Core/Names.h @@ -12,5 +12,6 @@ namespace DB using Names = std::vector; using NameSet = std::unordered_set; using NameToNameMap = std::unordered_map; +using NameToNameSetMap = std::unordered_map; } diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp index 0a58050339ac..a72784ffd966 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -125,6 +125,21 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery) if (!done_with_join) { + if (subquery.joined_block_actions) + subquery.joined_block_actions->execute(block); + + for (const auto & name_with_alias : subquery.joined_block_aliases) + { + if (block.has(name_with_alias.first)) + { + auto pos = block.getPositionByName(name_with_alias.first); + auto column = block.getByPosition(pos); + block.erase(pos); + column.name = name_with_alias.second; + block.insert(std::move(column)); + } + } + if (!subquery.join->insertFromBlock(block)) done_with_join = true; } diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index ebf3ef3aac86..1a00f5c43d79 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -41,6 +41,8 @@ Names ExpressionAction::getNeededColumns() const res.insert(res.end(), array_joined_columns.begin(), array_joined_columns.end()); + res.insert(res.end(), join_key_names_left.begin(), join_key_names_left.end()); + for (const auto & column : projection) res.push_back(column.first); @@ -146,11 +148,14 @@ ExpressionAction ExpressionAction::arrayJoin(const NameSet & array_joined_column return a; } -ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr join_, const NamesAndTypesList & columns_added_by_join_) +ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr join_, + const Names & join_key_names_left, + const NamesAndTypesList & columns_added_by_join_) { ExpressionAction a; a.type = JOIN; - a.join = join_; + a.join = std::move(join_); + a.join_key_names_left = join_key_names_left; a.columns_added_by_join = columns_added_by_join_; return a; } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 8da5fe2a2792..993ba772d75a 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -34,11 +34,14 @@ using DataTypePtr = std::shared_ptr; class IBlockInputStream; using BlockInputStreamPtr = std::shared_ptr; +class ExpressionActions; /** Action on the block. */ struct ExpressionAction { +private: + using ExpressionActionsPtr = std::shared_ptr; public: enum Type { @@ -85,6 +88,7 @@ struct ExpressionAction /// For JOIN std::shared_ptr join; + Names join_key_names_left; NamesAndTypesList columns_added_by_join; /// For PROJECT. @@ -103,7 +107,8 @@ struct ExpressionAction static ExpressionAction project(const NamesWithAliases & projected_columns_); static ExpressionAction project(const Names & projected_columns_); static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); - static ExpressionAction ordinaryJoin(std::shared_ptr join_, const NamesAndTypesList & columns_added_by_join_); + static ExpressionAction ordinaryJoin(std::shared_ptr join_, const Names & join_key_names_left, + const NamesAndTypesList & columns_added_by_join_); /// Which columns necessary to perform this action. Names getNeededColumns() const; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 633de1aac111..82287ec38788 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -62,6 +62,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -161,6 +164,34 @@ void removeDuplicateColumns(NamesAndTypesList & columns) } +String DatabaseAndTableWithAlias::getQualifiedNamePrefix() const +{ + return (!alias.empty() ? alias : (database + '.' + table)) + '.'; +} + + +void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const +{ + if (auto identifier = typeid_cast(ast.get())) + { + String prefix = getQualifiedNamePrefix(); + identifier->name.insert(identifier->name.begin(), prefix.begin(), prefix.end()); + + Names qualifiers; + if (!alias.empty()) + qualifiers.push_back(alias); + else + { + qualifiers.push_back(database); + qualifiers.push_back(table); + } + + for (const auto & qualifier : qualifiers) + identifier->children.emplace_back(std::make_shared(qualifier)); + } +} + + ExpressionAnalyzer::ExpressionAnalyzer( const ASTPtr & ast_, const Context & context_, @@ -258,112 +289,171 @@ ExpressionAnalyzer::ExpressionAnalyzer( analyzeAggregation(); } - -void ExpressionAnalyzer::translateQualifiedNames() +static DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression, + const Context & context) { - String database_name; - String table_name; - String alias; - - if (!select_query || !select_query->tables || select_query->tables->children.empty()) - return; - - ASTTablesInSelectQueryElement & element = static_cast(*select_query->tables->children[0]); - - if (!element.table_expression) /// This is ARRAY JOIN without a table at the left side. - return; - - ASTTableExpression & table_expression = static_cast(*element.table_expression); + DatabaseAndTableWithAlias database_and_table_with_alias; if (table_expression.database_and_table_name) { - const ASTIdentifier & identifier = static_cast(*table_expression.database_and_table_name); + const auto & identifier = static_cast(*table_expression.database_and_table_name); - alias = identifier.tryGetAlias(); + database_and_table_with_alias.alias = identifier.tryGetAlias(); if (table_expression.database_and_table_name->children.empty()) { - database_name = context.getCurrentDatabase(); - table_name = identifier.name; + database_and_table_with_alias.database = context.getCurrentDatabase(); + database_and_table_with_alias.table = identifier.name; } else { if (table_expression.database_and_table_name->children.size() != 2) throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR); - database_name = static_cast(*identifier.children[0]).name; - table_name = static_cast(*identifier.children[1]).name; + database_and_table_with_alias.database = static_cast(*identifier.children[0]).name; + database_and_table_with_alias.table = static_cast(*identifier.children[1]).name; } } else if (table_expression.table_function) { - alias = table_expression.table_function->tryGetAlias(); + database_and_table_with_alias.alias = table_expression.table_function->tryGetAlias(); } else if (table_expression.subquery) { - alias = table_expression.subquery->tryGetAlias(); + database_and_table_with_alias.alias = table_expression.subquery->tryGetAlias(); } else throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR); - translateQualifiedNamesImpl(ast, database_name, table_name, alias); + return database_and_table_with_alias; +}; + + +void ExpressionAnalyzer::translateQualifiedNames() +{ + if (!select_query || !select_query->tables || select_query->tables->children.empty()) + return; + + auto & element = static_cast(*select_query->tables->children[0]); + + if (!element.table_expression) /// This is ARRAY JOIN without a table at the left side. + return; + + auto & table_expression = static_cast(*element.table_expression); + auto * join = select_query->join(); + + std::vector tables = {getTableNameWithAliasFromTableExpression(table_expression, context)}; + + if (join) + { + const auto & join_table_expression = static_cast(*join->table_expression); + tables.emplace_back(getTableNameWithAliasFromTableExpression(join_table_expression, context)); + } + + translateQualifiedNamesImpl(ast, tables); } -void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const String & database_name, const String & table_name, const String & alias) +/// Get the number of components of identifier which are correspond to 'alias.', 'table.' or 'databas.table.' from names. +static size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier, + const DatabaseAndTableWithAlias & names) { - if (ASTIdentifier * ident = typeid_cast(ast.get())) + size_t num_qualifiers_to_strip = 0; + + auto get_identifier_name = [](const ASTPtr & ast) { return static_cast(*ast).name; }; + + /// It is compound identifier + if (!identifier.children.empty()) { - if (ident->kind == ASTIdentifier::Column) + size_t num_components = identifier.children.size(); + + /// database.table.column + if (num_components >= 3 + && !names.database.empty() + && get_identifier_name(identifier.children[0]) == names.database + && get_identifier_name(identifier.children[1]) == names.table) { - /// It is compound identifier - if (!ast->children.empty()) + num_qualifiers_to_strip = 2; + } + + /// table.column or alias.column. If num_components > 2, it is like table.nested.column. + if (num_components >= 2 + && ((!names.table.empty() && get_identifier_name(identifier.children[0]) == names.table) + || (!names.alias.empty() && get_identifier_name(identifier.children[0]) == names.alias))) + { + num_qualifiers_to_strip = 1; + } + } + + return num_qualifiers_to_strip; +} + + +/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left. +/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'. +static void stripIdentifier(ASTPtr & ast, size_t num_qualifiers_to_strip) +{ + ASTIdentifier * identifier = typeid_cast(ast.get()); + + if (!identifier) + throw Exception("ASTIdentifier expected for stripIdentifier", ErrorCodes::LOGICAL_ERROR); + + if (num_qualifiers_to_strip) + { + size_t num_components = identifier->children.size(); + + /// plain column + if (num_components - num_qualifiers_to_strip == 1) + { + String node_alias = identifier->tryGetAlias(); + ast = identifier->children.back(); + if (!node_alias.empty()) + ast->setAlias(node_alias); + } + else + /// nested column + { + identifier->children.erase(identifier->children.begin(), identifier->children.begin() + num_qualifiers_to_strip); + String new_name; + for (const auto & child : identifier->children) { - size_t num_components = ast->children.size(); - size_t num_qualifiers_to_strip = 0; - - /// database.table.column - if (num_components >= 3 - && !database_name.empty() - && static_cast(*ast->children[0]).name == database_name - && static_cast(*ast->children[1]).name == table_name) - { - num_qualifiers_to_strip = 2; - } + if (!new_name.empty()) + new_name += '.'; + new_name += static_cast(*child.get()).name; + } + identifier->name = new_name; + } + } +} - /// table.column or alias.column. If num_components > 2, it is like table.nested.column. - if (num_components >= 2 - && ((!table_name.empty() && static_cast(*ast->children[0]).name == table_name) - || (!alias.empty() && static_cast(*ast->children[0]).name == alias))) - { - num_qualifiers_to_strip = 1; - } - if (num_qualifiers_to_strip) +void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const std::vector & tables) +{ + if (auto * identifier = typeid_cast(ast.get())) + { + if (identifier->kind == ASTIdentifier::Column) + { + /// Select first table name with max number of qualifiers which can be stripped. + size_t max_num_qualifiers_to_strip = 0; + size_t best_table_pos = 0; + + for (size_t table_pos = 0; table_pos < tables.size(); ++table_pos) + { + const auto & table = tables[table_pos]; + auto num_qualifiers_to_strip = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, table); + + if (num_qualifiers_to_strip > max_num_qualifiers_to_strip) { - /// plain column - if (num_components - num_qualifiers_to_strip == 1) - { - String node_alias = ast->tryGetAlias(); - ast = ast->children.back(); - if (!node_alias.empty()) - ast->setAlias(node_alias); - } - else - /// nested column - { - ident->children.erase(ident->children.begin(), ident->children.begin() + num_qualifiers_to_strip); - String new_name; - for (const auto & child : ident->children) - { - if (!new_name.empty()) - new_name += '.'; - new_name += static_cast(*child.get()).name; - } - ident->name = new_name; - } + max_num_qualifiers_to_strip = num_qualifiers_to_strip; + best_table_pos = table_pos; } } + + stripIdentifier(ast, max_num_qualifiers_to_strip); + + /// In case if column from the joined table are in source columns, change it's name to qualified. + if (best_table_pos && source_columns.contains(ast->getColumnName())) + tables[best_table_pos].makeQualifiedName(ast); } } else if (typeid_cast(ast.get())) @@ -379,19 +469,28 @@ void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const String if (num_components > 2) throw Exception("Qualified asterisk cannot have more than two qualifiers", ErrorCodes::UNKNOWN_ELEMENT_IN_AST); - /// database.table.*, table.* or alias.* - if ( (num_components == 2 - && !database_name.empty() - && static_cast(*ident->children[0]).name == database_name - && static_cast(*ident->children[1]).name == table_name) - || (num_components == 0 - && ((!table_name.empty() && ident->name == table_name) - || (!alias.empty() && ident->name == alias)))) + for (const auto & table_names : tables) { - /// Replace to plain asterisk. - ast = std::make_shared(); + /// database.table.*, table.* or alias.* + if ((num_components == 2 + && !table_names.database.empty() + && static_cast(*ident->children[0]).name == table_names.database + && static_cast(*ident->children[1]).name == table_names.table) + || (num_components == 0 + && ((!table_names.table.empty() && ident->name == table_names.table) + || (!table_names.alias.empty() && ident->name == table_names.alias)))) + { + /// Replace to plain asterisk. + ast = std::make_shared(); + } } } + else if (auto * join = typeid_cast(ast.get())) + { + /// Don't translate on_expression here in order to resolve equation parts later. + if (join->using_expression_list) + translateQualifiedNamesImpl(join->using_expression_list, tables); + } else { for (auto & child : ast->children) @@ -400,7 +499,7 @@ void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const String if (!typeid_cast(child.get()) && !typeid_cast(child.get())) { - translateQualifiedNamesImpl(child, database_name, table_name, alias); + translateQualifiedNamesImpl(child, tables); } } } @@ -527,8 +626,12 @@ void ExpressionAnalyzer::analyzeAggregation() const ASTTablesInSelectQueryElement * join = select_query->join(); if (join) { - if (static_cast(*join->table_join).using_expression_list) - getRootActions(static_cast(*join->table_join).using_expression_list, true, false, temp_actions); + const auto table_join = static_cast(*join->table_join); + if (table_join.using_expression_list) + getRootActions(table_join.using_expression_list, true, false, temp_actions); + if (table_join.on_expression) + for (const auto & key_ast : analyzed_join.key_asts_left) + getRootActions(key_ast, true, false, temp_actions); addJoinAction(temp_actions, true); } @@ -1530,7 +1633,8 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & { NamesAndTypesList temp_columns = source_columns; temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end()); - temp_columns.insert(temp_columns.end(), columns_added_by_join.begin(), columns_added_by_join.end()); + for (const auto & joined_column : analyzed_join.columns_added_by_join) + temp_columns.push_back(joined_column.name_and_type); ExpressionActionsPtr temp_actions = std::make_shared(temp_columns, settings); getRootActions(func->arguments->children.at(0), true, false, temp_actions); @@ -1815,16 +1919,15 @@ const Block & ScopeStack::getSampleBlock() const void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, bool only_consts, ExpressionActionsPtr & actions) { ScopeStack scopes(actions, settings); + ProjectionManipulatorPtr projection_manipulator; if (!isThereArrayJoin(ast) && settings.enable_conditional_computation && !only_consts) - { projection_manipulator = std::make_shared(scopes, context); - } else - { projection_manipulator = std::make_shared(scopes); - } + getActionsImpl(ast, no_subqueries, only_consts, scopes, projection_manipulator); + actions = scopes.popLevel(); } @@ -1983,6 +2086,28 @@ bool ExpressionAnalyzer::isThereArrayJoin(const ASTPtr & ast) } } +void ExpressionAnalyzer::getActionsFromJoinKeys(const ASTTableJoin & table_join, bool no_subqueries, bool only_consts, + ExpressionActionsPtr & actions) +{ + ScopeStack scopes(actions, settings); + + ProjectionManipulatorPtr projection_manipulator; + if (!isThereArrayJoin(ast) && settings.enable_conditional_computation && !only_consts) + projection_manipulator = std::make_shared(scopes, context); + else + projection_manipulator = std::make_shared(scopes); + + if (table_join.using_expression_list) + getActionsImpl(table_join.using_expression_list, no_subqueries, only_consts, scopes, projection_manipulator); + else if (table_join.on_expression) + { + for (const auto & ast : analyzed_join.key_asts_left) + getActionsImpl(ast, no_subqueries, only_consts, scopes, projection_manipulator); + } + + actions = scopes.popLevel(); +} + void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack, ProjectionManipulatorPtr projection_manipulator) { @@ -2413,13 +2538,65 @@ bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool on void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only_types) const { if (only_types) - actions->add(ExpressionAction::ordinaryJoin(nullptr, columns_added_by_join)); + actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzed_join.key_names_left, + analyzed_join.getColumnsAddedByJoin())); else for (auto & subquery_for_set : subqueries_for_sets) if (subquery_for_set.second.join) - actions->add(ExpressionAction::ordinaryJoin(subquery_for_set.second.join, columns_added_by_join)); + actions->add(ExpressionAction::ordinaryJoin(subquery_for_set.second.join, analyzed_join.key_names_left, + analyzed_join.getColumnsAddedByJoin())); } + +void ExpressionAnalyzer::AnalyzedJoin::createJoinedBlockActions(const ASTSelectQuery * select_query, + const Context & context) +{ + if (!select_query) + return; + + const ASTTablesInSelectQueryElement * join = select_query->join(); + + if (!join) + return; + + const auto & join_params = static_cast(*join->table_join); + + /// Create custom expression list with join keys from right table. + auto expression_list = std::make_shared(); + ASTs & children = expression_list->children; + + if (join_params.on_expression) + for (const auto & join_right_key : key_asts_right) + children.emplace_back(join_right_key); + + NameSet required_columns_set(key_names_right.begin(), key_names_right.end()); + for (const auto & joined_column : columns_added_by_join) + required_columns_set.insert(joined_column.original_name); + + required_columns_set.insert(key_names_right.begin(), key_names_right.end()); + + required_columns_from_joined_table.insert(required_columns_from_joined_table.end(), + required_columns_set.begin(), required_columns_set.end()); + + ExpressionAnalyzer analyzer(expression_list, context, nullptr, columns_from_joined_table, required_columns_from_joined_table); + joined_block_actions = analyzer.getActions(false); + + for (const auto & column_required_from_actions : joined_block_actions->getRequiredColumns()) + if (!required_columns_set.count(column_required_from_actions)) + required_columns_from_joined_table.push_back(column_required_from_actions); +} + + +NamesAndTypesList ExpressionAnalyzer::AnalyzedJoin::getColumnsAddedByJoin() const +{ + NamesAndTypesList result; + for (const auto & joined_column : columns_added_by_join) + result.push_back(joined_column.name_and_type); + + return result; +} + + bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) { assertSelect(); @@ -2430,12 +2607,11 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty initChain(chain, source_columns); ExpressionActionsChain::Step & step = chain.steps.back(); - const ASTTablesInSelectQueryElement & join_element = static_cast(*select_query->join()); - const ASTTableJoin & join_params = static_cast(*join_element.table_join); - const ASTTableExpression & table_to_join = static_cast(*join_element.table_expression); + const auto & join_element = static_cast(*select_query->join()); + const auto & join_params = static_cast(*join_element.table_join); + const auto & table_to_join = static_cast(*join_element.table_expression); - if (join_params.using_expression_list) - getRootActions(join_params.using_expression_list, only_types, false, step.actions); + getActionsFromJoinKeys(join_params, only_types, false, step.actions); /// Two JOINs are not supported with the same subquery, but different USINGs. auto join_hash = join_element.getTreeHash(); @@ -2446,7 +2622,8 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty /// TODO This syntax does not support specifying a database name. if (table_to_join.database_and_table_name) { - auto database_table = getDatabaseAndTableNameFromIdentifier(static_cast(*table_to_join.database_and_table_name)); + const auto & identifier = static_cast(*table_to_join.database_and_table_name); + auto database_table = getDatabaseAndTableNameFromIdentifier(identifier); StoragePtr table = context.tryGetTable(database_table.first, database_table.second); if (table) @@ -2467,14 +2644,10 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (!subquery_for_set.join) { JoinPtr join = std::make_shared( - join_key_names_left, join_key_names_right, + analyzed_join.key_names_left, analyzed_join.key_names_right, analyzed_join.columns_added_by_join_from_right_keys, settings.join_use_nulls, SizeLimits(settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode), join_params.kind, join_params.strictness); - Names required_joined_columns(join_key_names_right.begin(), join_key_names_right.end()); - for (const auto & name_type : columns_added_by_join) - required_joined_columns.push_back(name_type.name); - /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. @@ -2483,20 +2656,44 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (!subquery_for_set.source) { ASTPtr table; + if (table_to_join.database_and_table_name) table = table_to_join.database_and_table_name; else table = table_to_join.subquery; - auto interpreter = interpretSubquery(table, context, subquery_depth, required_joined_columns); + auto interpreter = interpretSubquery(table, context, subquery_depth, analyzed_join.required_columns_from_joined_table); subquery_for_set.source = std::make_shared( interpreter->getSampleBlock(), [interpreter]() mutable { return interpreter->execute().in; }); } + /// Alias duplicating columns. + for (const auto & joined_column : analyzed_join.columns_added_by_join) + { + const auto & qualified_name = joined_column.name_and_type.name; + if (joined_column.original_name != qualified_name) + subquery_for_set.joined_block_aliases.emplace_back(joined_column.original_name, qualified_name); + } + + auto sample_block = subquery_for_set.source->getHeader(); + analyzed_join.joined_block_actions->execute(sample_block); + for (const auto & name_with_alias : subquery_for_set.joined_block_aliases) + { + if (sample_block.has(name_with_alias.first)) + { + auto pos = sample_block.getPositionByName(name_with_alias.first); + auto column = sample_block.getByPosition(pos); + sample_block.erase(pos); + column.name = name_with_alias.second; + sample_block.insert(std::move(column)); + } + } + /// TODO You do not need to set this up when JOIN is only needed on remote servers. subquery_for_set.join = join; - subquery_for_set.join->setSampleBlock(subquery_for_set.source->getHeader()); + subquery_for_set.join->setSampleBlock(sample_block); + subquery_for_set.joined_block_actions = analyzed_join.joined_block_actions; } addJoinAction(step.actions, false); @@ -2774,19 +2971,30 @@ void ExpressionAnalyzer::collectUsedColumns() * (Do not assume that they are required for reading from the "left" table). */ NameSet available_joined_columns; - collectJoinedColumns(available_joined_columns, columns_added_by_join); + collectJoinedColumns(available_joined_columns); NameSet required_joined_columns; + + for (const auto & left_key_ast : analyzed_join.key_asts_left) + getRequiredSourceColumnsImpl(left_key_ast, available_columns, required, ignored, {}, required_joined_columns); + getRequiredSourceColumnsImpl(ast, available_columns, required, ignored, available_joined_columns, required_joined_columns); - for (NamesAndTypesList::iterator it = columns_added_by_join.begin(); it != columns_added_by_join.end();) + for (auto it = analyzed_join.columns_added_by_join.begin(); it != analyzed_join.columns_added_by_join.end();) { - if (required_joined_columns.count(it->name)) + if (required_joined_columns.count(it->name_and_type.name)) ++it; else - columns_added_by_join.erase(it++); + analyzed_join.columns_added_by_join.erase(it++); } + analyzed_join.createJoinedBlockActions(select_query, context); + + /// Some columns from right join key may be used in query. This columns will be appended to block during join. + for (const auto & right_key_name : analyzed_join.key_names_right) + if (required_joined_columns.count(right_key_name)) + analyzed_join.columns_added_by_join_from_right_keys.insert(right_key_name); + /// Insert the columns required for the ARRAY JOIN calculation into the required columns list. NameSet array_join_sources; for (const auto & result_source : array_join_result_to_source) @@ -2832,7 +3040,163 @@ void ExpressionAnalyzer::collectUsedColumns() throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER); } -void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAndTypesList & joined_columns_name_type) + +void ExpressionAnalyzer::collectJoinedColumnsFromJoinOnExpr() +{ + const auto & tables = static_cast(*select_query->tables); + const auto * left_tables_element = static_cast(tables.children.at(0).get()); + const auto * right_tables_element = select_query->join(); + + if (!left_tables_element || !right_tables_element) + return; + + const auto & table_join = static_cast(*right_tables_element->table_join); + if (!table_join.on_expression) + return; + + const auto & left_table_expression = static_cast(*left_tables_element->table_expression); + const auto & right_table_expression = static_cast(*right_tables_element->table_expression); + + auto left_source_names = getTableNameWithAliasFromTableExpression(left_table_expression, context); + auto right_source_names = getTableNameWithAliasFromTableExpression(right_table_expression, context); + + /// Stores examples of columns which are only from one table. + struct TableBelonging + { + const ASTIdentifier * example_only_from_left = nullptr; + const ASTIdentifier * example_only_from_right = nullptr; + }; + + /// Check all identifiers in ast and decide their possible table belonging. + /// Throws if there are two identifiers definitely from different tables. + std::function get_table_belonging; + get_table_belonging = [&](const ASTPtr & ast) -> TableBelonging + { + auto * identifier = typeid_cast(ast.get()); + if (identifier) + { + if (identifier->kind == ASTIdentifier::Column) + { + auto left_num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, left_source_names); + auto right_num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, right_source_names); + + /// Assume that component from definite table if num_components is greater than for the other table. + if (left_num_components > right_num_components) + return {identifier, nullptr}; + if (left_num_components < right_num_components) + return {nullptr, identifier}; + } + return {}; + } + + TableBelonging table_belonging; + for (const auto & child : ast->children) + { + auto children_belonging = get_table_belonging(child); + if (!table_belonging.example_only_from_left) + table_belonging.example_only_from_left = children_belonging.example_only_from_left; + if (!table_belonging.example_only_from_right) + table_belonging.example_only_from_right = children_belonging.example_only_from_right; + } + + if (table_belonging.example_only_from_left && table_belonging.example_only_from_right) + throw Exception("Invalid columns in JOIN ON section. Columns " + + table_belonging.example_only_from_left->getAliasOrColumnName() + " and " + + table_belonging.example_only_from_right->getAliasOrColumnName() + + " are from different tables.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + + return table_belonging; + }; + + std::function translate_qualified_names; + translate_qualified_names = [&](ASTPtr & ast, const DatabaseAndTableWithAlias & source_names) + { + auto * identifier = typeid_cast(ast.get()); + if (identifier) + { + if (identifier->kind == ASTIdentifier::Column) + { + auto num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, source_names); + stripIdentifier(ast, num_components); + } + return; + } + + for (auto & child : ast->children) + translate_qualified_names(child, source_names); + }; + + const auto supported_syntax = " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) " + "[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]"; + auto throwSyntaxException = [&](const String & msg) + { + throw Exception("Invalid expression for JOIN ON. " + msg + supported_syntax, ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + }; + + /// For equal expression find out corresponding table for each part, translate qualified names and add asts to join keys. + auto add_columns_from_equals_expr = [&](const ASTPtr & expr) + { + auto * func_equals = typeid_cast(expr.get()); + if (!func_equals || func_equals->name != "equals") + throwSyntaxException("Expected equals expression, got " + queryToString(expr) + "."); + + ASTPtr left_ast = func_equals->arguments->children.at(0)->clone(); + ASTPtr right_ast = func_equals->arguments->children.at(1)->clone(); + + auto left_table_belonging = get_table_belonging(left_ast); + auto right_table_belonging = get_table_belonging(right_ast); + + bool can_be_left_part_from_left_table = left_table_belonging.example_only_from_right == nullptr; + bool can_be_left_part_from_right_table = left_table_belonging.example_only_from_left == nullptr; + bool can_be_right_part_from_left_table = right_table_belonging.example_only_from_right == nullptr; + bool can_be_right_part_from_right_table = right_table_belonging.example_only_from_left == nullptr; + + auto add_join_keys = [&](ASTPtr & ast_to_left_table, ASTPtr & ast_to_right_table) + { + translate_qualified_names(ast_to_left_table, left_source_names); + translate_qualified_names(ast_to_right_table, right_source_names); + + analyzed_join.key_asts_left.push_back(ast_to_left_table); + analyzed_join.key_names_left.push_back(ast_to_left_table->getColumnName()); + analyzed_join.key_asts_right.push_back(ast_to_right_table); + analyzed_join.key_names_right.push_back(ast_to_right_table->getAliasOrColumnName()); + }; + + /// Default variant when all identifiers may be from any table. + if (can_be_left_part_from_left_table && can_be_right_part_from_right_table) + add_join_keys(left_ast, right_ast); + else if (can_be_left_part_from_right_table && can_be_right_part_from_left_table) + add_join_keys(right_ast, left_ast); + else + { + auto * left_example = left_table_belonging.example_only_from_left ? + left_table_belonging.example_only_from_left : + left_table_belonging.example_only_from_right; + + auto * right_example = right_table_belonging.example_only_from_left ? + right_table_belonging.example_only_from_left : + right_table_belonging.example_only_from_right; + + auto left_name = queryToString(*left_example); + auto right_name = queryToString(*right_example); + auto expr_name = queryToString(expr); + + throwSyntaxException("In expression " + expr_name + " columns " + left_name + " and " + right_name + + " are from the same table but from different arguments of equal function."); + } + }; + + auto * func = typeid_cast(table_join.on_expression.get()); + if (func && func->name == "and") + { + for (const auto & expr : func->arguments->children) + add_columns_from_equals_expr(expr); + } + else + add_columns_from_equals_expr(table_join.on_expression); +} + +void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns) { if (!select_query) return; @@ -2842,13 +3206,15 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd if (!node) return; - const ASTTableJoin & table_join = static_cast(*node->table_join); - const ASTTableExpression & table_expression = static_cast(*node->table_expression); + const auto & table_join = static_cast(*node->table_join); + const auto & table_expression = static_cast(*node->table_expression); + auto joined_table_name = getTableNameWithAliasFromTableExpression(table_expression, context); Block nested_result_sample; if (table_expression.database_and_table_name) { - auto database_table = getDatabaseAndTableNameFromIdentifier(static_cast(*table_expression.database_and_table_name)); + const auto & identifier = static_cast(*table_expression.database_and_table_name); + auto database_table = getDatabaseAndTableNameFromIdentifier(identifier); const auto & table = context.getTable(database_table.first, database_table.second); nested_result_sample = table->getSampleBlockNonMaterialized(); } @@ -2857,13 +3223,17 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd const auto & subquery = table_expression.subquery->children.at(0); nested_result_sample = InterpreterSelectWithUnionQuery::getSampleBlock(subquery, context); } + analyzed_join.columns_from_joined_table = nested_result_sample.getNamesAndTypesList(); - auto add_name_to_join_keys = [](Names & join_keys, const String & name, const char * where) + auto add_name_to_join_keys = [](Names & join_keys, ASTs & join_asts, const String & name, const ASTPtr & ast) { if (join_keys.end() == std::find(join_keys.begin(), join_keys.end(), name)) + { join_keys.push_back(name); + join_asts.push_back(ast); + } else - throw Exception("Duplicate column " + name + " " + where, ErrorCodes::DUPLICATE_COLUMN); + throw Exception("Duplicate column " + name + " in USING list", ErrorCodes::DUPLICATE_COLUMN); }; if (table_join.using_expression_list) @@ -2871,51 +3241,39 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd auto & keys = typeid_cast(*table_join.using_expression_list); for (const auto & key : keys.children) { - add_name_to_join_keys(join_key_names_left, key->getColumnName(), "in USING list"); - add_name_to_join_keys(join_key_names_right, key->getAliasOrColumnName(), "in USING list"); + add_name_to_join_keys(analyzed_join.key_names_left, analyzed_join.key_asts_left, key->getColumnName(), key); + add_name_to_join_keys(analyzed_join.key_names_right, analyzed_join.key_asts_right, key->getAliasOrColumnName(), key); } } else if (table_join.on_expression) - { - const auto supported_syntax = - "\nSupported syntax: JOIN ON [table.]column = [table.]column [AND [table.]column = [table.]column ...]"; - auto throwSyntaxException = [&](const String & msg) - { - throw Exception("Invalid expression for JOIN ON. " + msg + supported_syntax, ErrorCodes::INVALID_JOIN_ON_EXPRESSION); - }; - - auto add_columns_from_equals_expr = [&](const ASTPtr & expr) - { - auto * func_equals = typeid_cast(expr.get()); - if (!func_equals || func_equals->name != "equals") - throwSyntaxException("Expected equals expression, got " + queryToString(expr)); - - String left_name = func_equals->arguments->children.at(0)->getAliasOrColumnName(); - String right_name = func_equals->arguments->children.at(1)->getAliasOrColumnName(); - add_name_to_join_keys(join_key_names_left, left_name, "in JOIN ON expression for left table"); - add_name_to_join_keys(join_key_names_right, right_name, "in JOIN ON expression for right table"); - }; + collectJoinedColumnsFromJoinOnExpr(); - auto * func = typeid_cast(table_join.on_expression.get()); - if (func && func->name == "and") - { - for (auto expr : func->children) - add_columns_from_equals_expr(expr); - } - else - add_columns_from_equals_expr(table_join.on_expression); - } + /// When we use JOIN ON syntax, non_joined_columns are columns from join_key_names_left, + /// because even if a column from join_key_names_right, we may need to join it if it has different name. + /// If we use USING syntax, join_key_names_left and join_key_names_right are almost the same, but we need to use + /// join_key_names_right in order to support aliases in USING list. Example: + /// SELECT x FROM tab1 ANY LEFT JOIN tab2 USING (x as y) - will join column x from tab1 with column y from tab2. + auto & not_joined_columns = table_join.using_expression_list ? analyzed_join.key_names_right : analyzed_join.key_names_left; for (const auto i : ext::range(0, nested_result_sample.columns())) { const auto & col = nested_result_sample.safeGetByPosition(i); - if (join_key_names_right.end() == std::find(join_key_names_right.begin(), join_key_names_right.end(), col.name) - && !joined_columns.count(col.name)) /// Duplicate columns in the subquery for JOIN do not make sense. + if (not_joined_columns.end() == std::find(not_joined_columns.begin(), not_joined_columns.end(), col.name)) { - joined_columns.insert(col.name); + auto name = col.name; + /// Change name for duplicate column form joined table. + if (source_columns.contains(name)) + name = joined_table_name.getQualifiedNamePrefix() + name; + + if (joined_columns.count(name)) /// Duplicate columns in the subquery for JOIN do not make sense. + continue; + + joined_columns.insert(name); - bool make_nullable = settings.join_use_nulls && (table_join.kind == ASTTableJoin::Kind::Left || table_join.kind == ASTTableJoin::Kind::Full); - joined_columns_name_type.emplace_back(col.name, make_nullable ? makeNullable(col.type) : col.type); + bool make_nullable = settings.join_use_nulls && (table_join.kind == ASTTableJoin::Kind::Left || + table_join.kind == ASTTableJoin::Kind::Full); + auto type = make_nullable ? makeNullable(col.type) : col.type; + analyzed_join.columns_added_by_join.emplace_back(NameAndTypePair(name, std::move(type)), col.name); } } } @@ -3008,7 +3366,8 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, */ if (!typeid_cast(child.get()) && !typeid_cast(child.get()) - && !typeid_cast(child.get())) + && !typeid_cast(child.get()) + && !typeid_cast(child.get())) getRequiredSourceColumnsImpl(child, available_columns, required_source_columns, ignored_names, available_joined_columns, required_joined_columns); } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 4b8d21daa0f4..084e91ce5581 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -51,6 +52,10 @@ struct SubqueryForSet /// If set, build it from result. SetPtr set; JoinPtr join; + /// Apply this actions to joined block. + ExpressionActionsPtr joined_block_actions; + /// Rename column from joined block from this list. + NamesWithAliases joined_block_aliases; /// If set, put the result into the table. /// This is a temporary table for transferring to remote servers for distributed query processing. @@ -86,6 +91,19 @@ struct ScopeStack const Block & getSampleBlock() const; }; +struct DatabaseAndTableWithAlias +{ + String database; + String table; + String alias; + + /// "alias." or "database.table." if alias is empty + String getQualifiedNamePrefix() const; + + /// If ast is ASTIdentifier, prepend getQualifiedNamePrefix() to it's name. + void makeQualifiedName(const ASTPtr & ast) const; +}; + /** Transforms an expression from a syntax tree into a sequence of actions to execute it. * * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer. @@ -207,19 +225,57 @@ class ExpressionAnalyzer : private boost::noncopyable PreparedSets prepared_sets; - /// NOTE: So far, only one JOIN per query is supported. + struct AnalyzedJoin + { - /** Query of the form `SELECT expr(x) AS FROM t1 ANY LEFT JOIN (SELECT expr(x) AS k FROM t2) USING k` - * The join is made by column k. - * During the JOIN, - * - in the "right" table, it will be available by alias `k`, since `Project` action for the subquery was executed. - * - in the "left" table, it will be accessible by the name `expr(x)`, since `Project` action has not been executed yet. - * You must remember both of these options. - */ - Names join_key_names_left; - Names join_key_names_right; + /// NOTE: So far, only one JOIN per query is supported. + + /** Query of the form `SELECT expr(x) AS k FROM t1 ANY LEFT JOIN (SELECT expr(x) AS k FROM t2) USING k` + * The join is made by column k. + * During the JOIN, + * - in the "right" table, it will be available by alias `k`, since `Project` action for the subquery was executed. + * - in the "left" table, it will be accessible by the name `expr(x)`, since `Project` action has not been executed yet. + * You must remember both of these options. + * + * Query of the form `SELECT ... from t1 ANY LEFT JOIN (SELECT ... from t2) ON expr(t1 columns) = expr(t2 columns)` + * to the subquery will be added expression `expr(t2 columns)`. + * It's possible to use name `expr(t2 columns)`. + */ + Names key_names_left; + Names key_names_right; + ASTs key_asts_left; + ASTs key_asts_right; + + struct JoinedColumn + { + /// Column will be joined to block. + NameAndTypePair name_and_type; + /// original column name from joined source. + String original_name; + + JoinedColumn(const NameAndTypePair & name_and_type_, const String & original_name_) + : name_and_type(name_and_type_), original_name(original_name_) {} + }; + + using JoinedColumnsList = std::list; + + /// All columns which can be read from joined table. + NamesAndTypesList columns_from_joined_table; + /// Columns which will be used in query to the joined query. + Names required_columns_from_joined_table; + /// Columns which will be added to block, possible including some columns from right join key. + JoinedColumnsList columns_added_by_join; + /// Such columns will be copied from left join keys during join. + NameSet columns_added_by_join_from_right_keys; + /// Actions which need to be calculated on joined block. + ExpressionActionsPtr joined_block_actions; + + void createJoinedBlockActions(const ASTSelectQuery * select_query, const Context & context); + + NamesAndTypesList getColumnsAddedByJoin() const; + }; - NamesAndTypesList columns_added_by_join; + AnalyzedJoin analyzed_join; using Aliases = std::unordered_map; Aliases aliases; @@ -251,7 +307,9 @@ class ExpressionAnalyzer : private boost::noncopyable /** Find the columns that are obtained by JOIN. */ - void collectJoinedColumns(NameSet & joined_columns, NamesAndTypesList & joined_columns_name_type); + void collectJoinedColumns(NameSet & joined_columns); + /// Parse JOIN ON expression and collect ASTs for joined columns. + void collectJoinedColumnsFromJoinOnExpr(); /** Create a dictionary of aliases. */ @@ -308,6 +366,9 @@ class ExpressionAnalyzer : private boost::noncopyable void getActionsImpl(const ASTPtr & ast, bool no_subqueries, bool only_consts, ScopeStack & actions_stack, ProjectionManipulatorPtr projection_manipulator); + /// If ast is ASTSelectQuery with JOIN, add actions for JOIN key columns. + void getActionsFromJoinKeys(const ASTTableJoin & table_join, bool no_subqueries, bool only_consts, ExpressionActionsPtr & actions); + void getRootActions(const ASTPtr & ast, bool no_subqueries, bool only_consts, ExpressionActionsPtr & actions); void getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries); @@ -354,7 +415,7 @@ class ExpressionAnalyzer : private boost::noncopyable * only one ("main") table is supported. Ambiguity is not detected or resolved. */ void translateQualifiedNames(); - void translateQualifiedNamesImpl(ASTPtr & node, const String & database_name, const String & table_name, const String & alias); + void translateQualifiedNamesImpl(ASTPtr & node, const std::vector & tables); /** Sometimes we have to calculate more columns in SELECT clause than will be returned from query. * This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 7409af75986d..e49fb089d18a 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -30,11 +30,12 @@ namespace ErrorCodes } -Join::Join(const Names & key_names_left_, const Names & key_names_right_, bool use_nulls_, - const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_) +Join::Join(const Names & key_names_left_, const Names & key_names_right_, const NameSet & needed_key_names_right_, + bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_) : kind(kind_), strictness(strictness_), key_names_left(key_names_left_), key_names_right(key_names_right_), + needed_key_names_right(needed_key_names_right_), use_nulls(use_nulls_), log(&Logger::get("Join")), limits(limits) @@ -776,6 +777,19 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const if (offsets_to_replicate) for (size_t i = 0; i < existing_columns; ++i) block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); + + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) + { + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !block.has(right_name)) + { + const auto & col = block.getByName(left_name); + block.insert({col.column, col.type, right_name}); + } + } } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index dbaa1b1812b1..38527aa3cec5 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -219,8 +219,8 @@ struct JoinKeyGetterHashed class Join { public: - Join(const Names & key_names_left_, const Names & key_names_right_, bool use_nulls_, - const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_); + Join(const Names & key_names_left_, const Names & key_names_right_, const NameSet & needed_key_names_right_, + bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_); bool empty() { return type == Type::EMPTY; } @@ -361,6 +361,8 @@ class Join const Names key_names_left; /// Names of key columns (columns for equi-JOIN) in "right" table (in the order they appear in USING clause). const Names key_names_right; + /// Names of key columns in the "right" table which should stay in block after join. + const NameSet needed_key_names_right; /// Substitute NULLs for non-JOINed rows. bool use_nulls; diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index 49b07db2772e..35d39c5b29ca 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -35,7 +35,7 @@ StorageJoin::StorageJoin( /// NOTE StorageJoin doesn't use join_use_nulls setting. - join = std::make_shared(key_names, key_names, false /* use_nulls */, SizeLimits(), kind, strictness); + join = std::make_shared(key_names, key_names, NameSet(), false /* use_nulls */, SizeLimits(), kind, strictness); join->setSampleBlock(getSampleBlock().sortColumns()); restore(); } @@ -48,7 +48,7 @@ void StorageJoin::truncate(const ASTPtr &) Poco::File(path + "tmp/").createDirectories(); increment = 0; - join = std::make_shared(key_names, key_names, false /* use_nulls */, SizeLimits(), kind, strictness); + join = std::make_shared(key_names, key_names, NameSet(), false /* use_nulls */, SizeLimits(), kind, strictness); join->setSampleBlock(getSampleBlock().sortColumns()); }; diff --git a/dbms/tests/queries/0_stateless/00674_join_on_syntax.reference b/dbms/tests/queries/0_stateless/00674_join_on_syntax.reference new file mode 100644 index 000000000000..7a41a90c03f3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00674_join_on_syntax.reference @@ -0,0 +1,53 @@ +joind columns from right table +1 +1 2 +1 2 +1 3 +1 2 3 +join on expression +2 +2 2 +2 3 +1 +2 +1 2 2 3 +1 2 2 3 3 +1 2 2 3 +join on and chain +2 3 +2 3 +2 3 2 3 +1 +1 3 +1 2 2 3 +2 4 +join on aliases +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +1 2 2 3 +join on complex expression +2 3 +2 3 +2 3 +2 3 +2 3 +duplicate column names +{"a1":1,"test.tab1_copy.a1":2} +{"a1":1,"test.tab1_copy.a1":2} +{"a1":1,"copy.a1":2} +{"a1":1,"copy.a1":2} +{"a1":1,"copy.a1":2} diff --git a/dbms/tests/queries/0_stateless/00674_join_on_syntax.sql b/dbms/tests/queries/0_stateless/00674_join_on_syntax.sql new file mode 100644 index 000000000000..7fb60e64d04a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00674_join_on_syntax.sql @@ -0,0 +1,84 @@ +drop table if exists test.tab1; +drop table if exists test.tab2; +drop table if exists test.tab3; +drop table if exists test.tab1_copy; + +create table test.tab1 (a1 Int32, b1 Int32) engine = MergeTree order by a1; +create table test.tab2 (a2 Int32, b2 Int32) engine = MergeTree order by a2; +create table test.tab3 (a3 Int32, b3 Int32) engine = MergeTree order by a3; +create table test.tab1_copy (a1 Int32, b1 Int32) engine = MergeTree order by a1; + +insert into test.tab1 values (1, 2); +insert into test.tab2 values (2, 3); +insert into test.tab3 values (2, 3); +insert into test.tab1_copy values (2, 3); + + +select 'joind columns from right table'; +select a1 from test.tab1 any left join test.tab2 on b1 = a2; +select a1, b1 from test.tab1 any left join test.tab2 on b1 = a2; +select a1, a2 from test.tab1 any left join test.tab2 on b1 = a2; +select a1, b2 from test.tab1 any left join test.tab2 on b1 = a2; +select a1, a2, b2 from test.tab1 any left join test.tab2 on b1 = a2; + + +select 'join on expression'; +select b1 from test.tab1 any left join test.tab2 on toInt32(a1 + 1) = a2; +select b1, a2 from test.tab1 any left join test.tab2 on toInt32(a1 + 1) = a2; +select b1, b2 from test.tab1 any left join test.tab2 on toInt32(a1 + 1) = a2; +select a1 from test.tab1 any left join test.tab2 on b1 + 1 = a2 + 1; +select a2 from test.tab1 any left join test.tab2 on b1 + 1 = a2 + 1; +select a1, b1, a2, b2 from test.tab1 any left join test.tab2 on b1 + 1 = a2 + 1; +select a1, b1, a2, b2, a2 + 1 from test.tab1 any left join test.tab2 on b1 + 1 = a2 + 1; +select a1, b1, a2, b2 from test.tab1 any left join test.tab2 on a1 + 4 = b2 + 2; + + +select 'join on and chain'; +select a2, b2 from test.tab2 any left join test.tab3 on a2 = a3 and b2 = b3; +select a3, b3 from test.tab2 any left join test.tab3 on a2 = a3 and b2 = b3; +select a2, b2, a3, b3 from test.tab2 any left join test.tab3 on a2 = a3 and b2 = b3; +select a1 from test.tab1 any left join test.tab2 on b1 + 1 = a2 + 1 and a1 + 4 = b2 + 2; +select a1, b2 from test.tab1 any left join test.tab2 on b1 + 1 = a2 + 1 and a1 + 4 = b2 + 2; +select a1, b1, a2, b2 from test.tab1 any left join test.tab2 on b1 + 1 = a2 + 1 and a1 + 4 = b2 + 2; +select a2, b2 + 1 from test.tab1 any left join test.tab2 on b1 + 1 = a2 + 1 and a1 + 4 = b2 + 2; + + +select 'join on aliases'; +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on first.b1 = second.a2; +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on second.a2 = first.b1; + +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on tab1.b1 = tab2.a2; +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on tab2.a2 = tab1.b1; + +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on test.tab1.b1 = test.tab2.a2; +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on test.tab2.a2 = test.tab1.b1; + +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on first.b1 = tab2.a2; +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on tab2.a2 = first.b1; +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on first.b1 = test.tab2.a2; +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on test.tab2.a2 = first.b1; + +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on tab1.b1 = second.a2; +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on second.a2 = tab1.b1; +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on test.tab1.b1 = second.a2; +select a1, a2, b1, b2 from test.tab1 first any left join test.tab2 second on second.a2 = test.tab1.b1; + +select a1, a2, first.b1, second.b2 from test.tab1 first any left join test.tab2 second on b1 = a2; +select a1, a2, tab1.b1, tab2.b2 from test.tab1 first any left join test.tab2 second on b1 = a2; +select a1, a2, test.tab1.b1, test.tab2.b2 from test.tab1 first any left join test.tab2 second on b1 = a2; + + +select 'join on complex expression'; +select a2, b2 from test.tab2 any left join test.tab3 on a2 + b2 = a3 + b3; +select a2, b2 from test.tab2 any left join test.tab3 on a3 + tab3.b3 = a2 + b2; +select a2, b2 from test.tab2 second any left join test.tab3 on a3 + b3 = a2 + second.b2; +select a2, b2 from test.tab2 second any left join test.tab3 third on third.a3 + tab3.b3 = tab2.a2 + second.b2; +select a2, b2 from test.tab2 second any left join test.tab3 third on third.a3 + test.tab3.b3 = test.tab2.a2 + second.b2; + +select 'duplicate column names'; +select a1, tab1_copy.a1 from test.tab1 any left join test.tab1_copy on tab1.b1 + 3 = b1 + 2 FORMAT JSONEachRow; +select a1, test.tab1_copy.a1 from test.tab1 any left join test.tab1_copy on tab1.b1 + 3 = b1 + 2 FORMAT JSONEachRow; +select a1, copy.a1 from test.tab1 any left join test.tab1_copy copy on tab1.b1 + 3 = b1 + 2 FORMAT JSONEachRow; +select a1, tab1_copy.a1 from test.tab1 any left join test.tab1_copy copy on tab1.b1 + 3 = b1 + 2 FORMAT JSONEachRow; +select a1, test.tab1_copy.a1 from test.tab1 any left join test.tab1_copy copy on tab1.b1 + 3 = b1 + 2 FORMAT JSONEachRow; +