From 0a80d451d21f1e8cc1314142ab9bf0ceb06d686a Mon Sep 17 00:00:00 2001 From: Eridanus Date: Mon, 3 Oct 2022 00:09:22 +0800 Subject: [PATCH 1/3] Rewrite countDistinctIf with count_distinct_implementation configuration. --- src/Core/Settings.h | 3 ++- src/Core/SettingsChangesHistory.h | 1 + src/Interpreters/TreeRewriter.cpp | 9 +++++++++ .../02456_optimize_rewrite_count_distinct_if.reference | 6 ++++++ .../02456_optimize_rewrite_count_distinct_if.sql | 8 ++++++++ 5 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.reference create mode 100644 tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 26b6fd56ade7..cb49182c83e5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -29,7 +29,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) /** List of settings: type, name, default value, description, flags * - * This looks rather unconvenient. It is done that way to avoid repeating settings in different places. + * This looks rather inconvenient. It is done that way to avoid repeating settings in different places. * Note: as an alternative, we could implement settings to be completely dynamic in form of map: String -> Field, * but we are not going to do it, because settings is used everywhere as static struct fields. * @@ -480,6 +480,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ + M(Bool, optimize_rewrite_count_distinct_if, false, "Rewrite countDistinctIf with count_distinct_implementation configuration", 0) \ M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \ M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \ M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index b78b812da865..419e731b81c4 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -78,6 +78,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"22.10", {{"optimize_rewrite_count_distinct_if", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 2f5bfd009389..e938aa00d0f2 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -103,6 +103,9 @@ using CustomizeCountDistinctVisitor = InDepthNodeVisitor>, true>; +char countdistinctif[] = "countdistinctif"; +using CustomizeCountDistinctIfVisitor = InDepthNodeVisitor>, true>; + char in[] = "in"; using CustomizeInVisitor = InDepthNodeVisitor>, true>; @@ -1424,6 +1427,12 @@ void TreeRewriter::normalize( CustomizeIfDistinctVisitor::Data data_distinct_if{"DistinctIf"}; CustomizeIfDistinctVisitor(data_distinct_if).visit(query); + if (settings.optimize_rewrite_count_distinct_if) + { + CustomizeCountDistinctIfVisitor::Data data_count_distinct_if{settings.count_distinct_implementation.toString() + "If"}; + CustomizeCountDistinctIfVisitor(data_count_distinct_if).visit(query); + } + ExistsExpressionVisitor::Data exists; ExistsExpressionVisitor(exists).visit(query); diff --git a/tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.reference b/tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.reference new file mode 100644 index 000000000000..85aab5282d3a --- /dev/null +++ b/tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.reference @@ -0,0 +1,6 @@ +2 +SELECT countDistinctIf(number % 10, (number % 5) = 2) +FROM numbers_mt(100000000) +2 +SELECT uniqExactIf(number % 10, (number % 5) = 2) +FROM numbers_mt(100000000) diff --git a/tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.sql b/tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.sql new file mode 100644 index 000000000000..0b7ab21e0358 --- /dev/null +++ b/tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.sql @@ -0,0 +1,8 @@ +SET optimize_rewrite_count_distinct_if = FALSE; +SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(100000000); +EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(100000000); + +SET optimize_rewrite_count_distinct_if = TRUE; +SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(100000000); +EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(100000000); + From 4ad9898217bcc7d6f9eb9fb65a0b4e2f07d485d6 Mon Sep 17 00:00:00 2001 From: Eridanus <45489268+Eridanus117@users.noreply.github.com> Date: Sat, 28 Jan 2023 04:19:16 +0000 Subject: [PATCH 2/3] fix version --- src/Core/SettingsChangesHistory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 63452e2d1711..338021c965fe 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,12 +85,12 @@ static std::map sett {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, + {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}, + {"optimize_rewrite_count_distinct_if", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.10", {{"optimize_rewrite_count_distinct_if", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, From e4032e7eecbb28fb7d66dda8ea4e8ff3aaf1d9bf Mon Sep 17 00:00:00 2001 From: Eridanus <45489268+Eridanus117@users.noreply.github.com> Date: Sat, 28 Jan 2023 09:43:56 +0000 Subject: [PATCH 3/3] fix test --- ...=> 02541_optimize_rewrite_count_distinct_if.reference} | 4 ++-- ...f.sql => 02541_optimize_rewrite_count_distinct_if.sql} | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) rename tests/queries/0_stateless/{02456_optimize_rewrite_count_distinct_if.reference => 02541_optimize_rewrite_count_distinct_if.reference} (66%) rename tests/queries/0_stateless/{02456_optimize_rewrite_count_distinct_if.sql => 02541_optimize_rewrite_count_distinct_if.sql} (78%) diff --git a/tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.reference b/tests/queries/0_stateless/02541_optimize_rewrite_count_distinct_if.reference similarity index 66% rename from tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.reference rename to tests/queries/0_stateless/02541_optimize_rewrite_count_distinct_if.reference index 85aab5282d3a..6108ea25324c 100644 --- a/tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.reference +++ b/tests/queries/0_stateless/02541_optimize_rewrite_count_distinct_if.reference @@ -1,6 +1,6 @@ 2 SELECT countDistinctIf(number % 10, (number % 5) = 2) -FROM numbers_mt(100000000) +FROM numbers_mt(1000000) 2 SELECT uniqExactIf(number % 10, (number % 5) = 2) -FROM numbers_mt(100000000) +FROM numbers_mt(1000000) diff --git a/tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.sql b/tests/queries/0_stateless/02541_optimize_rewrite_count_distinct_if.sql similarity index 78% rename from tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.sql rename to tests/queries/0_stateless/02541_optimize_rewrite_count_distinct_if.sql index 0b7ab21e0358..ceda9a16f489 100644 --- a/tests/queries/0_stateless/02456_optimize_rewrite_count_distinct_if.sql +++ b/tests/queries/0_stateless/02541_optimize_rewrite_count_distinct_if.sql @@ -1,8 +1,8 @@ SET optimize_rewrite_count_distinct_if = FALSE; -SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(100000000); -EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(100000000); +SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(1000000); +EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(1000000); SET optimize_rewrite_count_distinct_if = TRUE; -SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(100000000); -EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(100000000); +SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(1000000); +EXPLAIN SYNTAX SELECT countDistinctIf(number % 10, number % 5 = 2) FROM numbers_mt(1000000);