From ac925b00f4e4df530f19b689130e009b498ee53f Mon Sep 17 00:00:00 2001 From: Davide Faconti Date: Tue, 3 Feb 2026 09:09:52 +0100 Subject: [PATCH] Fix Groot2Publisher destructor infinite loop (#1057) The destructor could hang indefinitely when the ZMQ server thread was waiting on recv() while active_server_ remained true. Changes: - Set active_server_=false before joining threads - Call zmq_context.shutdown() to interrupt blocking recv() - Add try-catch around ZMQ operations to handle context termination - Reorder destructor to remove hooks after threads are joined Includes tests for: - Destructor completion after exception - Destructor with multiple tree nodes attached - Rapid create/destroy cycles Co-Authored-By: Claude Opus 4.5 --- src/loggers/groot2_publisher.cpp | 35 ++++++++++++++++--- tests/gtest_groot2_publisher.cpp | 58 ++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 5 deletions(-) diff --git a/src/loggers/groot2_publisher.cpp b/src/loggers/groot2_publisher.cpp index 22219b732..c7b23a094 100644 --- a/src/loggers/groot2_publisher.cpp +++ b/src/loggers/groot2_publisher.cpp @@ -183,9 +183,15 @@ std::chrono::milliseconds Groot2Publisher::maxHeartbeatDelay() const Groot2Publisher::~Groot2Publisher() { - removeAllHooks(); - + // First, signal threads to stop _p->active_server = false; + + // Shutdown the ZMQ context to unblock any recv() calls immediately. + // This prevents waiting for the recv timeout (100ms) before threads can exit. + // Context shutdown will cause all blocking operations to return with ETERM error. + _p->context.shutdown(); + + // Now join the threads - they should exit quickly if(_p->server_thread.joinable()) { _p->server_thread.join(); @@ -196,6 +202,9 @@ Groot2Publisher::~Groot2Publisher() _p->heartbeat_thread.join(); } + // Remove hooks after threads are stopped to avoid race conditions + removeAllHooks(); + flush(); { @@ -260,9 +269,17 @@ void Groot2Publisher::serverLoop() while(_p->active_server) { zmq::multipart_t requestMsg; - if(!requestMsg.recv(socket) || requestMsg.size() == 0) + try { - continue; + if(!requestMsg.recv(socket) || requestMsg.size() == 0) + { + continue; + } + } + catch(const zmq::error_t&) + { + // Context was terminated or socket error - exit the loop + break; } // this heartbeat will help establishing if Groot is connected or not @@ -490,7 +507,15 @@ void Groot2Publisher::serverLoop() } } // send the reply - reply_msg.send(socket); + try + { + reply_msg.send(socket); + } + catch(const zmq::error_t&) + { + // Context was terminated or socket error - exit the loop + break; + } } } diff --git a/tests/gtest_groot2_publisher.cpp b/tests/gtest_groot2_publisher.cpp index 41a3d1432..86a8e6cfe 100644 --- a/tests/gtest_groot2_publisher.cpp +++ b/tests/gtest_groot2_publisher.cpp @@ -18,6 +18,17 @@ static const char* xml_text = R"( )"; +static const char* xml_text_sequence = R"( + + + + + + + + +)"; + void throwRuntimeError() { BT::BehaviorTreeFactory factory; @@ -46,3 +57,50 @@ TEST(Groot2PublisherTest, EnsureNoInfiniteLoopOnThrow) }, ::testing::ExitedWithCode(EXIT_SUCCESS), ".*"); } + +// Test that destructor completes quickly even after exception +// This test runs multiple times to catch race conditions +TEST(Groot2PublisherTest, DestructorCompletesAfterException) +{ + for(int i = 0; i < 5; i++) + { + BT::BehaviorTreeFactory factory; + factory.registerSimpleAction("ThrowRuntimeError", + [](BT::TreeNode&) -> BT::NodeStatus { + throw BT::RuntimeError("Test exception"); + }); + + auto tree = factory.createTreeFromText(xml_text); + BT::Groot2Publisher publisher(tree, 1667 + i * 2); + EXPECT_THROW(tree.tickExactlyOnce(), BT::RuntimeError); + } +} + +// Test that destructor completes quickly when tree has multiple nodes +TEST(Groot2PublisherTest, DestructorCompletesWithMultipleNodes) +{ + BT::BehaviorTreeFactory factory; + factory.registerSimpleAction("ThrowRuntimeError", [](BT::TreeNode&) -> BT::NodeStatus { + throw BT::RuntimeError("Test exception in sequence"); + }); + + auto tree = factory.createTreeFromText(xml_text_sequence); + BT::Groot2Publisher publisher(tree, 1677); + EXPECT_THROW(tree.tickExactlyOnce(), BT::RuntimeError); +} + +// Test rapid creation and destruction of publishers +TEST(Groot2PublisherTest, RapidCreateDestroy) +{ + for(int i = 0; i < 3; i++) + { + BT::BehaviorTreeFactory factory; + factory.registerSimpleAction( + "ThrowRuntimeError", + [](BT::TreeNode&) -> BT::NodeStatus { throw BT::RuntimeError("Rapid test"); }); + + auto tree = factory.createTreeFromText(xml_text); + BT::Groot2Publisher publisher(tree, 1687 + i * 2); + EXPECT_THROW(tree.tickExactlyOnce(), BT::RuntimeError); + } +}