From d2f10d2d5c0a23b00e6af91adb1fd904c279b0d5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 19 Nov 2025 10:10:58 +1030 Subject: [PATCH] lightningd: fix race with mutual connect. 65dccea5bde4 "pytest: fix flake in test_reconnect_signed" accidentally introduced a bug, where the connect command may not return. If we call "connect" while a connection is still being processed through the peer_connected hooks, we would call peer_channels_cleanup(), which (if the peer has no channels) would free the peer. Then when the peer_connected hook returned, it would lookup the peer, see it was gone, and silently return. The connect_succeeded() function was never called, and the connect command never woken. Signed-off-by: Rusty Russell Changelog-None: bug introduced this release. --- lightningd/connect_control.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/lightningd/connect_control.c b/lightningd/connect_control.c index c2a28522f069..e57bc975a0b2 100644 --- a/lightningd/connect_control.c +++ b/lightningd/connect_control.c @@ -219,21 +219,28 @@ static struct command_result *json_connect(struct command *cmd, /* If we know about peer, see if it's already connected. */ peer = peer_by_id(cmd->ld, &id_addr.id); - if (peer && peer->connected == PEER_CONNECTED) { - log_debug(cmd->ld->log, "Already connected via %s", - fmt_wireaddr_internal(tmpctx, + if (peer) { + switch (peer->connected) { + case PEER_CONNECTED: + log_debug(cmd->ld->log, "Already connected via %s", + fmt_wireaddr_internal(tmpctx, &peer->addr)); - return connect_cmd_succeed(cmd, peer, - peer->connected_incoming, - &peer->addr); + return connect_cmd_succeed(cmd, peer, + peer->connected_incoming, + &peer->addr); + case PEER_DISCONNECTED: + /* When a peer disconnects, we give subds time to clean themselves up + * (this lets connectd ensure they've seen the final messages). But + * now it's going to try to reconnect, we've gotta force them out. */ + peer_channels_cleanup(peer); + break; + case PEER_CONNECTING: + /* Just wait until connection finished. Though we still ask connectd to connect, + * it's going to ignore it. */ + break; + } } - /* When a peer disconnects, we give subds time to clean themselves up - * (this lets connectd ensure they've seen the final messages). But - * now it's going to try to reconnect, we've gotta force them out. */ - if (peer) - peer_channels_cleanup(peer); - subd_send_msg(cmd->ld->connectd, take(towire_connectd_connect_to_peer(NULL, &id_addr.id, addr, true,