From ca22e7acfb4162fc39a8865562da9e660dd2e315 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 28 Oct 2021 21:00:33 +0300 Subject: [PATCH 1/2] Use existing local remote_replica_path var in StorageReplicatedMergeTree::dropReplica() --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f4a50f2e5531..acc81c2c2299 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -852,7 +852,7 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con LOG_INFO(logger, "Removing replica {}, marking it as lost", remote_replica_path); /// Mark itself lost before removing, because the following recursive removal may fail /// and partially dropped replica may be considered as alive one (until someone will mark it lost) - zookeeper->trySet(zookeeper_path + "/replicas/" + replica + "/is_lost", "1"); + zookeeper->trySet(remote_replica_path + "/is_lost", "1"); /// It may left some garbage if replica_path subtree are concurrently modified zookeeper->tryRemoveRecursive(remote_replica_path); if (zookeeper->exists(remote_replica_path)) From 60a411581f3178c963675d6c6b70fb7eb4a5c8c2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 28 Oct 2021 21:00:33 +0300 Subject: [PATCH 2/2] Fix possible "The local set of parts of X doesn't look like the set of parts in ZooKeeper" error If during removing replica_path from zookeeper, some error occurred (zookeeper goes away), then it may not remove everything from zookeeper. And on DETACH/ATTACH (or server restart, like stress tests does in the analysis from this comment [1]), it will trigger an error: The local set of parts of table test_1.alter_table_4 doesn't look like the set of parts in ZooKeeper: [1]: https://github.com/ClickHouse/ClickHouse/pull/28296#issuecomment-915829943 Fix this, by removing "metadata" at first, and only after this everything else, this will avoid this error, since on ATTACH such table will be marked as read-only. v2: forget to remove remote_replica_path itself v3: fix test_drop_replica by adding a check for remote_replica_path existence --- src/Storages/StorageReplicatedMergeTree.cpp | 37 ++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index acc81c2c2299..63bb8af91487 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -849,12 +849,47 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con throw Exception("Table was not dropped because ZooKeeper session has expired.", ErrorCodes::TABLE_WAS_NOT_DROPPED); auto remote_replica_path = zookeeper_path + "/replicas/" + replica; + LOG_INFO(logger, "Removing replica {}, marking it as lost", remote_replica_path); /// Mark itself lost before removing, because the following recursive removal may fail /// and partially dropped replica may be considered as alive one (until someone will mark it lost) zookeeper->trySet(remote_replica_path + "/is_lost", "1"); + + /// NOTE: we should check for remote_replica_path existence, + /// since otherwise DROP REPLICA will fail if the replica had been already removed. + if (!zookeeper->exists(remote_replica_path)) + { + LOG_INFO(logger, "Removing replica {} does not exist", remote_replica_path); + return; + } + + /// Analog of removeRecursive(remote_replica_path) + /// but it removes "metadata" firstly. + /// + /// This will allow to mark table as readonly + /// and skip any checks of parts between on-disk and in the zookeeper. + /// + /// Without this removeRecursive() may remove "parts" first + /// and on DETACH/ATTACH (or server restart) it will trigger the following error: + /// + /// "The local set of parts of table X doesn't look like the set of parts in ZooKeeper" + /// + { + Strings children = zookeeper->getChildren(remote_replica_path); + + if (std::find(children.begin(), children.end(), "metadata") != children.end()) + zookeeper->remove(fs::path(remote_replica_path) / "metadata"); + + for (const auto & child : children) + { + if (child != "metadata") + zookeeper->removeRecursive(fs::path(remote_replica_path) / child); + } + + zookeeper->remove(remote_replica_path); + } + /// It may left some garbage if replica_path subtree are concurrently modified - zookeeper->tryRemoveRecursive(remote_replica_path); if (zookeeper->exists(remote_replica_path)) LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path);