From f559767d2201e6f3a88a97cb72a5be6646b38585 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 15 Feb 2024 16:56:04 +0100 Subject: [PATCH 1/4] Fix finished_mutations_to_keep=0 for MergeTree (as docs says 0 is to keep everything) Signed-off-by: Azat Khuzhin --- src/Storages/StorageMergeTree.cpp | 5 ++++- .../02994_merge_tree_mutations_cleanup.reference | 4 ++++ .../02994_merge_tree_mutations_cleanup.sql.j2 | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.reference create mode 100644 tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.sql.j2 diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 663e7f435b78..6bb8695d6731 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1479,8 +1479,11 @@ UInt64 StorageMergeTree::getCurrentMutationVersion( size_t StorageMergeTree::clearOldMutations(bool truncate) { - size_t finished_mutations_to_keep = truncate ? 0 : getSettings()->finished_mutations_to_keep; + size_t finished_mutations_to_keep = getSettings()->finished_mutations_to_keep; + if (!truncate && !finished_mutations_to_keep) + return 0; + finished_mutations_to_keep = truncate ? 0 : finished_mutations_to_keep; std::vector mutations_to_delete; { std::lock_guard lock(currently_processing_in_background_mutex); diff --git a/tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.reference b/tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.reference new file mode 100644 index 000000000000..8a3a458c753e --- /dev/null +++ b/tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.reference @@ -0,0 +1,4 @@ +mutations after ALTER for data_rmt 1 +mutations after cleanup for data_rmt 1 +mutations after ALTER for data_mt 1 +mutations after cleanup for data_mt 1 diff --git a/tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.sql.j2 b/tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.sql.j2 new file mode 100644 index 000000000000..2740b6070c00 --- /dev/null +++ b/tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.sql.j2 @@ -0,0 +1,14 @@ +drop table if exists data_rmt; +drop table if exists data_mt; + +create table data_rmt (key Int) engine=ReplicatedMergeTree('/tables/{database}/data', '{table}') order by tuple() settings finished_mutations_to_keep=0, merge_tree_clear_old_parts_interval_seconds=1; +create table data_mt (key Int) engine=MergeTree() order by tuple() settings finished_mutations_to_keep=0, merge_tree_clear_old_parts_interval_seconds=1; + +{% for table in ['data_rmt', 'data_mt'] %} +alter table {{table}} delete where 1; +select 'mutations after ALTER for {{table}}', count() from system.mutations where database = currentDatabase() and table = '{{table}}'; +-- merge_tree_clear_old_parts_interval_seconds=1, but wait few seconds more +select sleep(5) settings function_sleep_max_microseconds_per_block=10e6 format Null; +select 'mutations after cleanup for {{table}}', count() from system.mutations where database = currentDatabase() and table = '{{table}}'; +drop table {{table}}; +{% endfor %} From 6c74ab18c0d42082d49786fddb4612c813f86c0e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 22 Feb 2024 15:10:52 +0100 Subject: [PATCH 2/4] Do not clean mutations for MergeTree on DROP PART (to match ReplicatedMergeTree) Even if this should be done, only related mutations should be cleaned, not all. Signed-off-by: Azat Khuzhin --- src/Storages/StorageMergeTree.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 6bb8695d6731..2345cc8e79de 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1902,8 +1902,6 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt } } - /// Old part objects is needed to be destroyed before clearing them from filesystem. - clearOldMutations(true); clearOldPartsFromFilesystem(); clearEmptyParts(); } @@ -1988,8 +1986,6 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont } } - /// Old parts are needed to be destroyed before clearing them from filesystem. - clearOldMutations(true); clearOldPartsFromFilesystem(); clearEmptyParts(); } From 4bd6d4456e2a6267677941437389558b1eed1402 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Feb 2024 21:25:48 +0100 Subject: [PATCH 3/4] Fix expectations for test_all_projection_files_are_dropped_when_part_is_dropped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since after DROP PART mutations are not cleaned anymore. Here an example: objects_at_the_end = list_objects(cluster) > assert objects_at_the_end == objects_empty_table E AssertionError: assert ['data/evt/iczupcswcatzvjikqwmovahturdht', 'data/oxq/dmkgqbdzeeiwmukogoxawfoxxnrqs'] == ['data/oxq/dmkgqbdzeeiwmukogoxawfoxxnrqs'] E At index 0 diff: 'data/evt/iczupcswcatzvjikqwmovahturdht' != 'data/oxq/dmkgqbdzeeiwmukogoxawfoxxnrqs' E Left contains one more item: 'data/oxq/dmkgqbdzeeiwmukogoxawfoxxnrqs' E Full diff: E [ E + 'data/evt/iczupcswcatzvjikqwmovahturdht', E 'data/oxq/dmkgqbdzeeiwmukogoxawfoxxnrqs', E ] test_replicated_zero_copy_projection_mutation/test.py:155: AssertionError And decoded paths: node1 :) select local_path from system.blob_storage_log where remote_path = 'data/evt/iczupcswcatzvjikqwmovahturdht' SELECT local_path FROM system.blob_storage_log WHERE remote_path = 'data/evt/iczupcswcatzvjikqwmovahturdht' Query id: 9ee5a9c0-c3b7-46ad-82bd-64c8bcbda78d ┌─local_path────────────────────────────────────────────────────────┐ │ store/bce/bcea71c9-35cd-4368-9504-c563253b1964/tmp_mutation_1.txt │ │ store/bce/bcea71c9-35cd-4368-9504-c563253b1964/mutation_1.txt │ └───────────────────────────────────────────────────────────────────┘ Signed-off-by: Azat Khuzhin --- .../test_replicated_zero_copy_projection_mutation/test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_replicated_zero_copy_projection_mutation/test.py b/tests/integration/test_replicated_zero_copy_projection_mutation/test.py index 4839919e23dc..e0af85c38874 100644 --- a/tests/integration/test_replicated_zero_copy_projection_mutation/test.py +++ b/tests/integration/test_replicated_zero_copy_projection_mutation/test.py @@ -131,14 +131,13 @@ def test_all_projection_files_are_dropped_when_part_is_dropped( """ ) - objects_empty_table = list_objects(cluster) - node.query( "ALTER TABLE test_all_projection_files_are_dropped ADD projection b_order (SELECT a, b ORDER BY b)" ) node.query( "ALTER TABLE test_all_projection_files_are_dropped MATERIALIZE projection b_order" ) + objects_empty_table = list_objects(cluster) node.query( """ From 3c08e198adb9bb62f1480f94470741ae988b249e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 7 Mar 2024 09:59:19 +0100 Subject: [PATCH 4/4] tests: wait for mutations in 02994_merge_tree_mutations_cleanup.sql.j2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise it fails in private fork CI Co-authored-by: János Benjamin Antal --- .../0_stateless/02994_merge_tree_mutations_cleanup.sql.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.sql.j2 b/tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.sql.j2 index 2740b6070c00..1b9be79dbe4d 100644 --- a/tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.sql.j2 +++ b/tests/queries/0_stateless/02994_merge_tree_mutations_cleanup.sql.j2 @@ -5,7 +5,7 @@ create table data_rmt (key Int) engine=ReplicatedMergeTree('/tables/{database}/d create table data_mt (key Int) engine=MergeTree() order by tuple() settings finished_mutations_to_keep=0, merge_tree_clear_old_parts_interval_seconds=1; {% for table in ['data_rmt', 'data_mt'] %} -alter table {{table}} delete where 1; +alter table {{table}} delete where 1 settings mutations_sync = 1; select 'mutations after ALTER for {{table}}', count() from system.mutations where database = currentDatabase() and table = '{{table}}'; -- merge_tree_clear_old_parts_interval_seconds=1, but wait few seconds more select sleep(5) settings function_sleep_max_microseconds_per_block=10e6 format Null;