Skip to content

Commit 64a017d

Browse files
committed
lightningd: more graceful shutdown.
Be more graceful in shutting down: this should fix the issue where bookkeeper gets upset that its commands are rejected during shutdown, and generally make things more graceful. 1. Stop any new RPC connections. 2. Stop any per-peer daemons (channeld, etc). 3. Shut down plugins. 4. Stop all existing RPC connections. 5. Stop global daemons. 6. Free up peer, chanen HTLC datastructures. 7. Close database. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Changelog-Changed: Plugins: RPC operations are now still available during shutdown.
1 parent 697a1c1 commit 64a017d

File tree

10 files changed

+80
-53
lines changed

10 files changed

+80
-53
lines changed

lightningd/gossip_control.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,9 @@ static void handle_local_channel_update(struct lightningd *ld, const u8 *msg)
125125
* us. */
126126
channel = any_channel_by_scid(ld, &scid, true);
127127
if (!channel) {
128-
log_broken(ld->log, "Local update for bad scid %s",
129-
type_to_string(tmpctx, struct short_channel_id,
130-
&scid));
128+
log_unusual(ld->log, "Local update for bad scid %s",
129+
type_to_string(tmpctx, struct short_channel_id,
130+
&scid));
131131
return;
132132
}
133133

lightningd/jsonrpc.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -933,11 +933,6 @@ parse_request(struct json_connection *jcon, const jsmntok_t tok[])
933933
json_tok_full(jcon->buffer, method));
934934
}
935935

936-
if (jcon->ld->state == LD_STATE_SHUTDOWN) {
937-
return command_fail(c, LIGHTNINGD_SHUTDOWN,
938-
"lightningd is shutting down");
939-
}
940-
941936
rpc_hook = tal(c, struct rpc_command_hook_payload);
942937
rpc_hook->cmd = c;
943938
/* Duplicate since we might outlive the connection */
@@ -1268,8 +1263,21 @@ void jsonrpc_listen(struct jsonrpc *jsonrpc, struct lightningd *ld)
12681263

12691264
if (listen(fd, 128) != 0)
12701265
err(1, "Listening on '%s'", rpc_filename);
1271-
jsonrpc->rpc_listener = io_new_listener(
1272-
ld->rpc_filename, fd, incoming_jcon_connected, ld);
1266+
1267+
/* All conns will be tal children of jsonrpc: good for freeing later! */
1268+
jsonrpc->rpc_listener
1269+
= io_new_listener(jsonrpc, fd, incoming_jcon_connected, ld);
1270+
}
1271+
1272+
void jsonrpc_stop_listening(struct jsonrpc *jsonrpc)
1273+
{
1274+
jsonrpc->rpc_listener = tal_free(jsonrpc->rpc_listener);
1275+
}
1276+
1277+
void jsonrpc_stop_all(struct lightningd *ld)
1278+
{
1279+
/* Closes all conns. */
1280+
ld->jsonrpc = tal_free(ld->jsonrpc);
12731281
}
12741282

12751283
static struct command_result *param_command(struct command *cmd,

lightningd/jsonrpc.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,12 +173,24 @@ void jsonrpc_setup(struct lightningd *ld);
173173

174174

175175
/**
176-
* Start listeing on ld->rpc_filename.
176+
* Start listening on ld->rpc_filename.
177177
*
178178
* Sets up the listener effectively starting the RPC interface.
179179
*/
180180
void jsonrpc_listen(struct jsonrpc *rpc, struct lightningd *ld);
181181

182+
/**
183+
* Stop listening on ld->rpc_filename.
184+
*
185+
* No new connections from here in.
186+
*/
187+
void jsonrpc_stop_listening(struct jsonrpc *jsonrpc);
188+
189+
/**
190+
* Kill any remaining JSON-RPC connections.
191+
*/
192+
void jsonrpc_stop_all(struct lightningd *ld);
193+
182194
/**
183195
* Add a new command/method to the JSON-RPC interface.
184196
*

lightningd/lightningd.c

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ static const char *find_daemon_dir(struct lightningd *ld, const char *argv0)
515515
* is an awesome runtime memory usage detector for C and C++ programs). In
516516
* some ways it would be neater not to do this, but it turns out some
517517
* transient objects still need cleaning. */
518-
static void shutdown_subdaemons(struct lightningd *ld)
518+
static void free_all_channels(struct lightningd *ld)
519519
{
520520
struct peer *p;
521521

@@ -529,19 +529,6 @@ static void shutdown_subdaemons(struct lightningd *ld)
529529
* writes, which must be inside a transaction. */
530530
db_begin_transaction(ld->wallet->db);
531531

532-
/* Let everyone shutdown cleanly. */
533-
close(ld->hsm_fd);
534-
/*~ The three "global" daemons, which we shutdown explicitly: we
535-
* give them 10 seconds to exit gracefully before killing them. */
536-
ld->connectd = subd_shutdown(ld->connectd, 10);
537-
ld->gossip = subd_shutdown(ld->gossip, 10);
538-
ld->hsm = subd_shutdown(ld->hsm, 10);
539-
540-
/*~ Closing the hsmd means all other subdaemons should be exiting;
541-
* deal with that cleanly before we start freeing internal
542-
* structures. */
543-
subd_shutdown_remaining(ld);
544-
545532
/* Now we free all the HTLCs */
546533
free_htlcs(ld, NULL);
547534

@@ -579,6 +566,18 @@ static void shutdown_subdaemons(struct lightningd *ld)
579566
db_commit_transaction(ld->wallet->db);
580567
}
581568

569+
static void shutdown_global_subdaemons(struct lightningd *ld)
570+
{
571+
/* Let everyone shutdown cleanly. */
572+
close(ld->hsm_fd);
573+
574+
/*~ The three "global" daemons, which we shutdown explicitly: we
575+
* give them 10 seconds to exit gracefully before killing them. */
576+
ld->connectd = subd_shutdown(ld->connectd, 10);
577+
ld->gossip = subd_shutdown(ld->gossip, 10);
578+
ld->hsm = subd_shutdown(ld->hsm, 10);
579+
}
580+
582581
/*~ Our wallet logic needs to know what outputs we might be interested in. We
583582
* use BIP32 (a.k.a. "HD wallet") to generate keys from a single seed, so we
584583
* keep the maximum-ever-used key index in the db, and add them all to the
@@ -1200,7 +1199,10 @@ int main(int argc, char *argv[])
12001199
assert(io_loop_ret == ld);
12011200
log_debug(ld->log, "io_loop_with_timers: %s", __func__);
12021201

1203-
/* Fail JSON RPC requests and ignore plugin's responses */
1202+
/* Stop *new* JSON RPC requests. */
1203+
jsonrpc_stop_listening(ld->jsonrpc);
1204+
1205+
/* Give permission for things to get destroyed without getting upset. */
12041206
ld->state = LD_STATE_SHUTDOWN;
12051207

12061208
stop_fd = -1;
@@ -1221,13 +1223,24 @@ int main(int argc, char *argv[])
12211223

12221224
/* We're not going to collect our children. */
12231225
remove_sigchild_handler(sigchld_conn);
1224-
shutdown_subdaemons(ld);
12251226

1226-
/* Tell plugins we're shutting down, closes the db. */
1227+
/* Get rid of per-channel subdaemons. */
1228+
subd_shutdown_nonglobals(ld);
1229+
1230+
/* Tell plugins we're shutting down, use force if necessary. */
12271231
shutdown_plugins(ld);
12281232

1229-
/* Cleanup JSON RPC separately: destructors assume some list_head * in ld */
1230-
tal_free(ld->jsonrpc);
1233+
/* Now kill any remaining connections */
1234+
jsonrpc_stop_all(ld);
1235+
1236+
/* Get rid of major subdaemons. */
1237+
shutdown_global_subdaemons(ld);
1238+
1239+
/* Clean up internal peer/channel/htlc structures. */
1240+
free_all_channels(ld);
1241+
1242+
/* Now close database */
1243+
ld->wallet->db = tal_free(ld->wallet->db);
12311244

12321245
/* Clean our our HTLC maps, since they use malloc. */
12331246
htlc_in_map_clear(&ld->htlcs_in);

lightningd/plugin.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -581,11 +581,6 @@ static const char *plugin_response_handle(struct plugin *plugin,
581581
"Received a JSON-RPC response for non-existent request");
582582
}
583583

584-
/* Ignore responses when shutting down */
585-
if (plugin->plugins->ld->state == LD_STATE_SHUTDOWN) {
586-
return NULL;
587-
}
588-
589584
/* We expect the request->cb to copy if needed */
590585
pd = plugin_detect_destruction(plugin);
591586
request->response_cb(plugin->buffer, toks, idtok, request->response_cb_arg);
@@ -2124,9 +2119,6 @@ void shutdown_plugins(struct lightningd *ld)
21242119
{
21252120
struct plugin *p, *next;
21262121

2127-
/* The next io_loop does not need db access, close it. */
2128-
ld->wallet->db = tal_free(ld->wallet->db);
2129-
21302122
/* Tell them all to shutdown; if they care. */
21312123
list_for_each_safe(&ld->plugins->plugins, p, next, list) {
21322124
/* Kill immediately, deletes self from list. */

lightningd/plugin_hook.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,8 @@ static void plugin_hook_callback(const char *buffer, const jsmntok_t *toks,
167167
tal_del_destructor(last, plugin_hook_killed);
168168
tal_free(last);
169169

170-
if (r->ld->state == LD_STATE_SHUTDOWN) {
170+
/* Actually, if it dies during shutdown, *don't* process result! */
171+
if (!buffer && r->ld->state == LD_STATE_SHUTDOWN) {
171172
log_debug(r->ld->log,
172173
"Abandoning plugin hook call due to shutdown");
173174
return;

lightningd/subd.c

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -902,15 +902,13 @@ struct subd *subd_shutdown(struct subd *sd, unsigned int seconds)
902902
return tal_free(sd);
903903
}
904904

905-
void subd_shutdown_remaining(struct lightningd *ld)
905+
void subd_shutdown_nonglobals(struct lightningd *ld)
906906
{
907-
struct subd *subd;
907+
struct subd *subd, *next;
908908

909-
/* We give them a second to finish exiting, before we kill
910-
* them in destroy_subd() */
911-
sleep(1);
912-
913-
while ((subd = list_top(&ld->subds, struct subd, list)) != NULL) {
909+
list_for_each_safe(&ld->subds, subd, next, list) {
910+
if (!subd->channel)
911+
continue;
914912
/* Destructor removes from list */
915913
io_close(subd->conn);
916914
}

lightningd/subd.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ struct subd_req *subd_req_(const tal_t *ctx,
211211
void subd_release_channel(struct subd *owner, const void *channel);
212212

213213
/**
214-
* subd_shutdown - try to politely shut down a subdaemon.
214+
* subd_shutdown - try to politely shut down a (global) subdaemon.
215215
* @subd: subd to shutdown.
216216
* @seconds: maximum seconds to wait for it to exit.
217217
*
@@ -225,13 +225,10 @@ void subd_release_channel(struct subd *owner, const void *channel);
225225
struct subd *subd_shutdown(struct subd *subd, unsigned int seconds);
226226

227227
/**
228-
* subd_shutdown_remaining - kill all remaining (per-peer) subds
228+
* subd_shutdown_nonglobals - kill all per-peer subds
229229
* @ld: lightningd
230-
*
231-
* They should already be exiting (since we shutdown hsmd), but
232-
* make sure they have.
233230
*/
234-
void subd_shutdown_remaining(struct lightningd *ld);
231+
void subd_shutdown_nonglobals(struct lightningd *ld);
235232

236233
/* Ugly helper to get full pathname of the current binary. */
237234
const char *find_my_abspath(const tal_t *ctx, const char *argv0);

lightningd/test/run-find_my_abspath.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ void jsonrpc_listen(struct jsonrpc *rpc UNNEEDED, struct lightningd *ld UNNEEDED
117117
/* Generated stub for jsonrpc_setup */
118118
void jsonrpc_setup(struct lightningd *ld UNNEEDED)
119119
{ fprintf(stderr, "jsonrpc_setup called!\n"); abort(); }
120+
/* Generated stub for jsonrpc_stop_all */
121+
void jsonrpc_stop_all(struct lightningd *ld UNNEEDED)
122+
{ fprintf(stderr, "jsonrpc_stop_all called!\n"); abort(); }
123+
/* Generated stub for jsonrpc_stop_listening */
124+
void jsonrpc_stop_listening(struct jsonrpc *jsonrpc UNNEEDED)
125+
{ fprintf(stderr, "jsonrpc_stop_listening called!\n"); abort(); }
120126
/* Generated stub for load_channels_from_wallet */
121127
struct htlc_in_map *load_channels_from_wallet(struct lightningd *ld UNNEEDED)
122128
{ fprintf(stderr, "load_channels_from_wallet called!\n"); abort(); }

tests/test_plugin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2542,7 +2542,7 @@ def test_plugin_shutdown(node_factory):
25422542
l1.rpc.plugin_start(p, dont_shutdown=True)
25432543
l1.rpc.stop()
25442544
l1.daemon.wait_for_logs(['test_libplugin: shutdown called',
2545-
'misc_notifications.py: via lightningd shutdown, datastore failed',
2545+
'misc_notifications.py: .* Connection refused',
25462546
'test_libplugin: failed to self-terminate in time, killing.'])
25472547

25482548

0 commit comments

Comments
 (0)