diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index bad67e494c4c..01e8d5943340 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Backups/WithRetries.h b/src/Backups/WithRetries.h index 3a6e28996b93..f795a963911e 100644 --- a/src/Backups/WithRetries.h +++ b/src/Backups/WithRetries.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/Storages/MergeTree/ZooKeeperRetries.h b/src/Common/ZooKeeper/ZooKeeperRetries.h similarity index 100% rename from src/Storages/MergeTree/ZooKeeperRetries.h rename to src/Common/ZooKeeper/ZooKeeperRetries.h diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h index 7daf9babf9f4..d33655538757 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.h +++ b/src/Interpreters/executeDDLQueryOnCluster.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace zkutil diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index bc23204e7d39..29f3183be646 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 37fe9074950c..7a2b830b0883 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -426,7 +428,30 @@ void ReadFromSystemZooKeeper::applyFilters() void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) { - zkutil::ZooKeeperPtr zookeeper = context->getZooKeeper(); + QueryStatusPtr query_status = context->getProcessListElement(); + + const auto & settings = context->getSettingsRef(); + /// Use insert settings for now in order not to introduce new settings. + /// Hopefully insert settings will also be unified and replaced with some generic retry settings. + ZooKeeperRetriesInfo retries_seetings( + settings.insert_keeper_max_retries, + settings.insert_keeper_retry_initial_backoff_ms, + settings.insert_keeper_retry_max_backoff_ms); + + ZooKeeperWithFaultInjection::Ptr zookeeper; + /// Handles reconnects when needed + auto get_zookeeper = [&] () + { + if (!zookeeper || zookeeper->expired()) + { + zookeeper = ZooKeeperWithFaultInjection::createInstance( + settings.insert_keeper_fault_injection_probability, + settings.insert_keeper_fault_injection_seed, + context->getZooKeeper(), + "", nullptr); + } + return zookeeper; + }; if (paths.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -448,6 +473,9 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) std::unordered_set added; while (!paths.empty()) { + if (query_status) + query_status->checkTimeLimit(); + list_tasks.clear(); std::vector paths_to_list; while (!paths.empty() && static_cast(list_tasks.size()) < max_inflight_requests) @@ -470,7 +498,10 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) paths_to_list.emplace_back(task.path_corrected); list_tasks.emplace_back(std::move(task)); } - auto list_responses = zookeeper->tryGetChildren(paths_to_list); + + zkutil::ZooKeeper::MultiTryGetChildrenResponse list_responses; + ZooKeeperRetriesControl("", nullptr, retries_seetings, query_status).retryLoop( + [&]() { list_responses = get_zookeeper()->tryGetChildren(paths_to_list); }); struct GetTask { @@ -514,7 +545,9 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) } } - auto get_responses = zookeeper->tryGet(paths_to_get); + zkutil::ZooKeeper::MultiTryGetResponse get_responses; + ZooKeeperRetriesControl("", nullptr, retries_seetings, query_status).retryLoop( + [&]() { get_responses = get_zookeeper()->tryGet(paths_to_get); }); for (size_t i = 0, size = get_tasks.size(); i < size; ++i) { diff --git a/tests/queries/0_stateless/02975_system_zookeeper_retries.reference b/tests/queries/0_stateless/02975_system_zookeeper_retries.reference new file mode 100644 index 000000000000..9a636ba56d04 --- /dev/null +++ b/tests/queries/0_stateless/02975_system_zookeeper_retries.reference @@ -0,0 +1,3 @@ +/keeper api_version +/keeper feature_flags +1 diff --git a/tests/queries/0_stateless/02975_system_zookeeper_retries.sql b/tests/queries/0_stateless/02975_system_zookeeper_retries.sql new file mode 100644 index 000000000000..8b402ec6d65c --- /dev/null +++ b/tests/queries/0_stateless/02975_system_zookeeper_retries.sql @@ -0,0 +1,22 @@ +-- Tags: zookeeper, no-parallel, no-fasttest + +SELECT path, name +FROM system.zookeeper +WHERE path = '/keeper' +ORDER BY path, name +SETTINGS + insert_keeper_retry_initial_backoff_ms = 1, + insert_keeper_retry_max_backoff_ms = 20, + insert_keeper_fault_injection_probability=0.3, + insert_keeper_fault_injection_seed=4, + log_comment='02975_system_zookeeper_retries'; + + +SYSTEM FLUSH LOGS; + +-- Check that there where zk session failures +SELECT ProfileEvents['ZooKeeperHardwareExceptions'] > 0 +FROM system.query_log +WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND log_comment='02975_system_zookeeper_retries' +ORDER BY event_time_microseconds DESC +LIMIT 1;