Skip to content

Commit

Permalink
Wake up the heartbeat thread immediately (#5081)
Browse files Browse the repository at this point in the history
Co-authored-by: oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
  • Loading branch information
liujuncheng and oneflow-ci-bot committed Jun 16, 2021
1 parent 90d3277 commit 203f024
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
7 changes: 5 additions & 2 deletions oneflow/core/control/ctrl_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,12 @@ GrpcCtrlClient::GrpcCtrlClient(const ProcessCtx& process_ctx) : process_ctx_(pro
LoadServerRequest request;
LoadServerResponse response;
while (true) {
const auto wait_duration = std::chrono::seconds(sleep_second_dis(gen));
{
std::unique_lock<std::mutex> lck(need_heartbeat_thread_stop_mtx_);
if (need_heartbeat_thread_stop_) { break; }
const bool stopped = need_heartbeat_thread_stop_cv_.wait_for(
lck, wait_duration, [&]() { return need_heartbeat_thread_stop_; });
if (stopped) { break; }
}
for (size_t i = 0; i < rpc_client_.GetStubSize(); ++i) {
grpc::ClientContext client_ctx;
Expand All @@ -51,7 +54,6 @@ GrpcCtrlClient::GrpcCtrlClient(const ProcessCtx& process_ctx) : process_ctx_(pro
&client_ctx, request, &response))
<< "Machine " << i << " lost";
}
std::this_thread::sleep_for(std::chrono::seconds(sleep_second_dis(gen)));
}
});
}
Expand Down Expand Up @@ -118,6 +120,7 @@ void GrpcCtrlClient::StopHeartbeat() {
std::unique_lock<std::mutex> lck(need_heartbeat_thread_stop_mtx_);
already_stopped = need_heartbeat_thread_stop_;
need_heartbeat_thread_stop_ = true;
need_heartbeat_thread_stop_cv_.notify_all();
}
if (!already_stopped) { heartbeat_thread_.join(); }
}
Expand Down
1 change: 1 addition & 0 deletions oneflow/core/rpc/include/grpc.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class GrpcCtrlClient final : public CtrlClient {
ProcessCtx process_ctx_;
bool need_heartbeat_thread_stop_;
std::mutex need_heartbeat_thread_stop_mtx_;
std::condition_variable need_heartbeat_thread_stop_cv_;
std::thread heartbeat_thread_;
RpcClient rpc_client_;
};
Expand Down

0 comments on commit 203f024

Please sign in to comment.