Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MaterializeMySQL Engine column comments support #25199

Merged
merged 9 commits into from Jun 17, 2021
Merged
51 changes: 42 additions & 9 deletions src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
Expand Up @@ -67,10 +67,10 @@ static inline String resolveDatabase(
return current_database != replica_clickhouse_database ? "" : replica_clickhouse_database;
}

static inline NamesAndTypesList getColumnsList(ASTExpressionList * columns_define)
static NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition)
{
NamesAndTypesList columns_name_and_type;
for (const auto & declare_column_ast : columns_define->children)
for (const auto & declare_column_ast : columns_definition->children)
{
const auto & declare_column = declare_column_ast->as<MySQLParser::ASTDeclareColumn>();

Expand Down Expand Up @@ -117,6 +117,38 @@ static inline NamesAndTypesList getColumnsList(ASTExpressionList * columns_defin
return columns_name_and_type;
}

static ColumnsDescription getColumnsDescriptionFromList(const NamesAndTypesList & columns_name_and_type, const ASTExpressionList * columns_definition)
{
if (columns_name_and_type.size() != columns_definition->children.size())
throw Exception("Columns of different size provided.", ErrorCodes::LOGICAL_ERROR);

ColumnsDescription columns_description;
ColumnDescription column_description;

for (
auto [column_name_and_type, declare_column_ast] = std::tuple{columns_name_and_type.begin(), columns_definition->children.begin()};
column_name_and_type != columns_name_and_type.end();
column_name_and_type++,
declare_column_ast++
Comment on lines +128 to +132
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems a little over-complicated given that column_name_and_type was actually initialized as getColumnsList(columns_definition) a few line before call to this getColumnsDescription.

It could be simpler to first create columns_description (by using implementation of getColumnsList plus one check for comment in column_options - there are already such checks for other options so it will be a more correct place for that, and returning columns_description there instead) and then get columns_name_and_type from columns_description->getAllPhysical().

But I think it is also ok to leave your variant if you really want..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was what I tried initially. Problem for this implementation is modifying type in GetKeys (modifyPrimaryKeysToNonNullable) for ColumnsDescription, since there is no good way (or I didn't found it) to change internal ColumnDescription by iterator.
So in my implementation columns_name_and_type is mostly needed to create final types and names for columns, to later initialize descriptions from them.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm, why there is a problem with getKeys - I was suggesting to only modify getColumnsList to return ColumnsDescription, instead of NameAndTypesList. And on the next line to get columns_with_type_and_name via columns_description->getAllPhysical(). All other methods will still use columns_with_type_and_name. This will simplify the code a lot and there will be no need for getColumnsDescriptionFromList method, which actually does the same thing, which is done in getColumnsList but for other column features.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since getKeys changes columns_with_type_and_name that we generate with getColumnsList, and if we just modify getColumnsList to return ColumnsDescription - we will miss this changes in our descriptions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I agree.
By the way, lets rename getColumnsDescriptionFromList -> createColumnsDescription

)
{
const auto & declare_column = (*declare_column_ast)->as<MySQLParser::ASTDeclareColumn>();
String comment;
if (declare_column->column_options)
if (const auto * options = declare_column->column_options->as<MySQLParser::ASTDeclareOptions>())
if (options->changes.count("comment"))
comment = options->changes.at("comment")->as<ASTLiteral>()->value.safeGet<String>();

column_description.name = column_name_and_type->name;
column_description.type = column_name_and_type->type;
if (!comment.empty())
column_description.comment = std::move(comment);
columns_description.add(column_description);
}

return columns_description;
}

static NamesAndTypesList getNames(const ASTFunction & expr, ContextPtr context, const NamesAndTypesList & columns)
{
if (expr.arguments->children.empty())
Expand Down Expand Up @@ -157,8 +189,8 @@ static NamesAndTypesList modifyPrimaryKeysToNonNullable(const NamesAndTypesList
return non_nullable_primary_keys;
}

static inline std::tuple<NamesAndTypesList, NamesAndTypesList, NamesAndTypesList, NameSet> getKeys(
ASTExpressionList * columns_define, ASTExpressionList * indices_define, ContextPtr context, NamesAndTypesList & columns)
static std::tuple<NamesAndTypesList, NamesAndTypesList, NamesAndTypesList, NameSet> getKeys(
ASTExpressionList * columns_definition, ASTExpressionList * indices_define, ContextPtr context, NamesAndTypesList & columns)
{
NameSet increment_columns;
auto keys = makeASTFunction("tuple");
Expand Down Expand Up @@ -211,7 +243,7 @@ static inline std::tuple<NamesAndTypesList, NamesAndTypesList, NamesAndTypesList
}
}

for (const auto & declare_column_ast : columns_define->children)
for (const auto & declare_column_ast : columns_definition->children)
{
const auto & declare_column = declare_column_ast->as<MySQLParser::ASTDeclareColumn>();

Expand Down Expand Up @@ -393,6 +425,7 @@ ASTs InterpreterCreateImpl::getRewrittenQueries(

NamesAndTypesList columns_name_and_type = getColumnsList(create_defines->columns);
const auto & [primary_keys, unique_keys, keys, increment_columns] = getKeys(create_defines->columns, create_defines->indices, context, columns_name_and_type);
ColumnsDescription columns_description = getColumnsDescriptionFromList(columns_name_and_type, create_defines->columns);

if (primary_keys.empty())
throw Exception("The " + backQuoteIfNeed(mysql_database) + "." + backQuoteIfNeed(create_query.table)
Expand All @@ -415,7 +448,7 @@ ASTs InterpreterCreateImpl::getRewrittenQueries(
/// Add _sign and _version columns.
String sign_column_name = getUniqueColumnName(columns_name_and_type, "_sign");
String version_column_name = getUniqueColumnName(columns_name_and_type, "_version");
columns->set(columns->columns, InterpreterCreateQuery::formatColumns(columns_name_and_type));
columns->set(columns->columns, InterpreterCreateQuery::formatColumns(columns_description));
columns->columns->children.emplace_back(create_materialized_column_declaration(sign_column_name, "Int8", UInt64(1)));
columns->columns->children.emplace_back(create_materialized_column_declaration(version_column_name, "UInt64", UInt64(1)));

Expand Down Expand Up @@ -483,8 +516,8 @@ ASTs InterpreterRenameImpl::getRewrittenQueries(
ASTRenameQuery::Elements elements;
for (const auto & rename_element : rename_query.elements)
{
const auto & to_database = resolveDatabase(rename_element.to.database, mysql_database, mapped_to_database, context);
const auto & from_database = resolveDatabase(rename_element.from.database, mysql_database, mapped_to_database, context);
const auto & to_database = resolveDatabase(rename_element.to.database, mysql_database, mapped_to_database, context);
const auto & from_database = resolveDatabase(rename_element.from.database, mysql_database, mapped_to_database, context);

if ((from_database == mapped_to_database || to_database == mapped_to_database) && to_database != from_database)
throw Exception("Cannot rename with other database for external ddl query.", ErrorCodes::NOT_IMPLEMENTED);
Expand Down Expand Up @@ -647,7 +680,7 @@ ASTs InterpreterAlterImpl::getRewrittenQueries(
}
else if (alter_command->type == MySQLParser::ASTAlterCommand::RENAME_TABLE)
{
const auto & to_database = resolveDatabase(alter_command->new_database_name, mysql_database, mapped_to_database, context);
const auto & to_database = resolveDatabase(alter_command->new_database_name, mysql_database, mapped_to_database, context);

if (to_database != mapped_to_database)
throw Exception("Cannot rename with other database for external ddl query.", ErrorCodes::NOT_IMPLEMENTED);
Expand Down
18 changes: 15 additions & 3 deletions src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp
Expand Up @@ -61,7 +61,7 @@ TEST(MySQLCreateRewritten, ColumnsDataType)

EXPECT_EQ(queryToString(tryRewrittenCreateQuery(
"CREATE TABLE `test_database`.`test_table_1`(`key` INT NOT NULL PRIMARY KEY, test " + test_type + " COMMENT 'test_comment' NOT NULL)", context_holder.context)),
"CREATE TABLE test_database.test_table_1 (`key` Int32, `test` " + mapped_type +
"CREATE TABLE test_database.test_table_1 (`key` Int32, `test` " + mapped_type + " COMMENT 'test_comment'" +
MATERIALIZEMYSQL_TABLE_COLUMNS + ") ENGINE = "
"ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY tuple(key)");

Expand All @@ -75,7 +75,7 @@ TEST(MySQLCreateRewritten, ColumnsDataType)

EXPECT_EQ(queryToString(tryRewrittenCreateQuery(
"CREATE TABLE `test_database`.`test_table_1`(`key` INT NOT NULL PRIMARY KEY, test " + test_type + " COMMENT 'test_comment' UNSIGNED)", context_holder.context)),
"CREATE TABLE test_database.test_table_1 (`key` Int32, `test` Nullable(U" + mapped_type + ")" +
"CREATE TABLE test_database.test_table_1 (`key` Int32, `test` Nullable(U" + mapped_type + ")" + " COMMENT 'test_comment'" +
MATERIALIZEMYSQL_TABLE_COLUMNS + ") ENGINE = "
"ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY tuple(key)");

Expand All @@ -87,7 +87,7 @@ TEST(MySQLCreateRewritten, ColumnsDataType)

EXPECT_EQ(queryToString(tryRewrittenCreateQuery(
"CREATE TABLE `test_database`.`test_table_1`(`key` INT NOT NULL PRIMARY KEY, test " + test_type + " COMMENT 'test_comment' UNSIGNED NOT NULL)", context_holder.context)),
"CREATE TABLE test_database.test_table_1 (`key` Int32, `test` U" + mapped_type +
"CREATE TABLE test_database.test_table_1 (`key` Int32, `test` U" + mapped_type + " COMMENT 'test_comment'" +
MATERIALIZEMYSQL_TABLE_COLUMNS + ") ENGINE = "
"ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY tuple(key)");
}
Expand Down Expand Up @@ -223,3 +223,15 @@ TEST(MySQLCreateRewritten, UniqueKeysConvert)
std::string(MATERIALIZEMYSQL_TABLE_COLUMNS) +
") ENGINE = ReplacingMergeTree(_version) PARTITION BY intDiv(id, 18446744073709551) ORDER BY (code, name, tenant_id, id)");
}

TEST(MySQLCreateRewritten, QueryWithColumnComments)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be you can also make integration test in tests/integration/test_materialize_mysql_database/test.py?

{
tryRegisterFunctions();
const auto & context_holder = getContext();

EXPECT_EQ(queryToString(tryRewrittenCreateQuery(
"CREATE TABLE `test_database`.`test_table_1`(`key` INT NOT NULL PRIMARY KEY, `test` INT COMMENT 'test_comment')", context_holder.context)),
"CREATE TABLE test_database.test_table_1 (`key` Int32, `test` Nullable(Int32) COMMENT 'test_comment'" +
std::string(MATERIALIZEMYSQL_TABLE_COLUMNS) +
") ENGINE = ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY tuple(key)");
}
2 changes: 2 additions & 0 deletions tests/integration/parallel.json
Expand Up @@ -175,6 +175,8 @@
"test_materialize_mysql_database/test.py::test_system_parts_table[clickhouse_node1]",
"test_materialize_mysql_database/test.py::test_system_tables_table[clickhouse_node0]",
"test_materialize_mysql_database/test.py::test_system_tables_table[clickhouse_node1]",
"test_materialize_mysql_database/test.py::test_materialize_with_column_comments[clickhouse_node0]",
"test_materialize_mysql_database/test.py::test_materialize_with_column_comments[clickhouse_node1]",
"test_materialize_mysql_database/test.py::test_utf8mb4[clickhouse_node0]",
"test_materialize_mysql_database/test.py::test_utf8mb4[clickhouse_node1]",
"test_parts_delete_zookeeper/test.py::test_merge_doesnt_work_without_zookeeper",
Expand Down
2 changes: 2 additions & 0 deletions tests/integration/parallel_skip.json
Expand Up @@ -179,6 +179,8 @@
"test_materialize_mysql_database/test.py::test_system_parts_table[clickhouse_node1]",
"test_materialize_mysql_database/test.py::test_system_tables_table[clickhouse_node0]",
"test_materialize_mysql_database/test.py::test_system_tables_table[clickhouse_node1]",
"test_materialize_mysql_database/test.py::test_materialize_with_column_comments[clickhouse_node0]",
"test_materialize_mysql_database/test.py::test_materialize_with_column_comments[clickhouse_node1]",
"test_materialize_mysql_database/test.py::test_utf8mb4[clickhouse_node0]",
"test_materialize_mysql_database/test.py::test_utf8mb4[clickhouse_node1]",
"test_parts_delete_zookeeper/test.py::test_merge_doesnt_work_without_zookeeper",
Expand Down
Expand Up @@ -843,6 +843,16 @@ def system_tables_test(clickhouse_node, mysql_node, service_name):
clickhouse_node.query("CREATE DATABASE system_tables_test ENGINE = MaterializeMySQL('{}:3306', 'system_tables_test', 'root', 'clickhouse')".format(service_name))
check_query(clickhouse_node, "SELECT partition_key, sorting_key, primary_key FROM system.tables WHERE database = 'system_tables_test' AND name = 'test'", "intDiv(id, 4294967)\tid\tid\n")

def materialize_with_column_comments_test(clickhouse_node, mysql_node, service_name):
mysql_node.query("DROP DATABASE IF EXISTS materialize_with_column_comments_test")
clickhouse_node.query("DROP DATABASE IF EXISTS materialize_with_column_comments_test")
mysql_node.query("CREATE DATABASE materialize_with_column_comments_test")
mysql_node.query("CREATE TABLE materialize_with_column_comments_test.test (id int NOT NULL PRIMARY KEY, value VARCHAR(255) COMMENT 'test comment') ENGINE=InnoDB")
clickhouse_node.query("CREATE DATABASE materialize_with_column_comments_test ENGINE = MaterializeMySQL('{}:3306', 'materialize_with_column_comments_test', 'root', 'clickhouse')".format(service_name))
check_query(clickhouse_node, "DESCRIBE TABLE materialize_with_column_comments_test.test", "id\tInt32\t\t\t\t\t\nvalue\tNullable(String)\t\t\ttest comment\t\t\n_sign\tInt8\tMATERIALIZED\t1\t\t\t\n_version\tUInt64\tMATERIALIZED\t1\t\t\t\n")
clickhouse_node.query("DROP DATABASE materialize_with_column_comments_test")
mysql_node.query("DROP DATABASE materialize_with_column_comments_test")

def move_to_prewhere_and_column_filtering(clickhouse_node, mysql_node, service_name):
clickhouse_node.query("DROP DATABASE IF EXISTS cond_on_key_col")
mysql_node.query("DROP DATABASE IF EXISTS cond_on_key_col")
Expand Down
5 changes: 5 additions & 0 deletions tests/integration/test_materialize_mysql_database/test.py
Expand Up @@ -218,6 +218,11 @@ def test_system_tables_table(started_cluster, started_mysql_8_0, started_mysql_5
materialize_with_ddl.system_tables_test(clickhouse_node, started_mysql_5_7, "mysql57")
materialize_with_ddl.system_tables_test(clickhouse_node, started_mysql_8_0, "mysql80")

@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary])
def test_materialize_with_column_comments(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
materialize_with_ddl.materialize_with_column_comments_test(clickhouse_node, started_mysql_5_7, "mysql57")
materialize_with_ddl.materialize_with_column_comments_test(clickhouse_node, started_mysql_8_0, "mysql80")


@pytest.mark.parametrize(('clickhouse_node'), [node_disable_bytes_settings, node_disable_rows_settings])
def test_mysql_settings(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node):
Expand Down