From 7edd8ce661f86a235ec69e4bd9229853994ec25a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 08:37:28 +1030 Subject: [PATCH 01/20] devtools: enhance dump-gossipstore to show some details of messages. Not a complete decode, just the highlights (what channel was announced or updated, what node was announced). Signed-off-by: Rusty Russell --- devtools/dump-gossipstore.c | 74 +++++++++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 7 deletions(-) diff --git a/devtools/dump-gossipstore.c b/devtools/dump-gossipstore.c index 2d69b0637040..eec6e3c16869 100644 --- a/devtools/dump-gossipstore.c +++ b/devtools/dump-gossipstore.c @@ -14,6 +14,61 @@ #define GSTORE_MAJOR 0 #define GSTORE_MINOR 15 +static bool is_channel_announce(const u8 *msg, struct short_channel_id **scid) +{ + secp256k1_ecdsa_signature sig; + u8 *features; + struct bitcoin_blkid chain_hash; + struct node_id node; + struct pubkey key; + + if (fromwire_peektype(msg) != WIRE_CHANNEL_ANNOUNCEMENT) + return false; + + *scid = tal(msg, struct short_channel_id); + if (!fromwire_channel_announcement(msg, msg, &sig, &sig, &sig, &sig, &features, + &chain_hash, *scid, &node, &node, &key, &key)) + *scid = tal_free(*scid); + return true; +} + +static bool is_channel_update(const u8 *msg, struct short_channel_id_dir **scidd) +{ + secp256k1_ecdsa_signature sig; + struct bitcoin_blkid chain_hash; + u32 u32val; + u8 message_flags, channel_flags; + u16 cltv_expiry_delta; + struct amount_msat msat; + + if (fromwire_peektype(msg) != WIRE_CHANNEL_UPDATE) + return false; + + *scidd = tal(msg, struct short_channel_id_dir); + if (fromwire_channel_update(msg, &sig, &chain_hash, &(*scidd)->scid, &u32val, &message_flags, &channel_flags, &cltv_expiry_delta, &msat, &u32val, &u32val, &msat)) + (*scidd)->dir = (channel_flags & ROUTING_FLAGS_DIRECTION); + else + *scidd = tal_free(*scidd); + return true; +} + +static bool is_node_announcement(const u8 *msg, struct node_id **node) +{ + secp256k1_ecdsa_signature sig; + u8 *u8arr; + u32 timestamp; + u8 rgb_color[3], alias[32]; + struct tlv_node_ann_tlvs *tlvs; + + if (fromwire_peektype(msg) != WIRE_NODE_ANNOUNCEMENT) + return false; + + *node = tal(msg, struct node_id); + if (!fromwire_node_announcement(msg, msg, &sig, &u8arr, ×tamp, *node, rgb_color, alias, &u8arr, &tlvs)) + *node = tal_free(*node); + return true; +} + int main(int argc, char *argv[]) { int fd; @@ -64,7 +119,9 @@ int main(int argc, char *argv[]) while (read(fd, &hdr, sizeof(hdr)) == sizeof(hdr)) { struct amount_sat sat; - struct short_channel_id scid; + struct short_channel_id scid, *scidptr; + struct short_channel_id_dir *sciddptr; + struct node_id *nodeptr; u16 flags = be16_to_cpu(hdr.flags); u16 msglen = be16_to_cpu(hdr.len); u8 *msg, *inner; @@ -95,17 +152,20 @@ int main(int argc, char *argv[]) if (fromwire_gossip_store_channel_amount(msg, &sat)) { printf("channel_amount: %s\n", fmt_amount_sat(tmpctx, sat)); - } else if (fromwire_peektype(msg) == WIRE_CHANNEL_ANNOUNCEMENT) { - printf("t=%u channel_announcement: %s\n", + } else if (is_channel_announce(msg, &scidptr)) { + printf("t=%u channel_announcement(%s): %s\n", be32_to_cpu(hdr.timestamp), + scidptr ? fmt_short_channel_id(tmpctx, *scidptr) : "?", tal_hex(msg, msg)); - } else if (fromwire_peektype(msg) == WIRE_CHANNEL_UPDATE) { - printf("t=%u channel_update: %s\n", + } else if (is_channel_update(msg, &sciddptr)) { + printf("t=%u channel_update(%s): %s\n", be32_to_cpu(hdr.timestamp), + sciddptr ? fmt_short_channel_id_dir(tmpctx, sciddptr) : "?", tal_hex(msg, msg)); - } else if (fromwire_peektype(msg) == WIRE_NODE_ANNOUNCEMENT) { - printf("t=%u node_announcement: %s\n", + } else if (is_node_announcement(msg, &nodeptr)) { + printf("t=%u node_announcement(%s): %s\n", be32_to_cpu(hdr.timestamp), + nodeptr ? fmt_node_id(tmpctx, nodeptr) : "?", tal_hex(msg, msg)); } else if (fromwire_gossip_store_private_channel_obs(msg, msg, &sat, &inner)) { From c9581e9e382cbf472b8a723b320cf00a3191d1b6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 08:38:28 +1030 Subject: [PATCH 02/20] pytest: fix bogus test_gossip_store_compact_noappend test. It didn't do anything, since the dev_compact_gossip_store command was removed. When we make it do something, it crashes since old_len is 0: ``` gossipd: gossip_store_compact: bad version gossipd: FATAL SIGNAL 6 (version v25.12rc3-1-g9e6c715-modded) ... gossipd: backtrace: ./stdlib/abort.c:79 (__GI_abort) 0x7119bd8288fe gossipd: backtrace: ./assert/assert.c:96 (__assert_fail_base) 0x7119bd82881a gossipd: backtrace: ./assert/assert.c:105 (__assert_fail) 0x7119bd83b516 gossipd: backtrace: gossipd/gossip_store.c:52 (append_msg) 0x56294de240eb gossipd: backtrace: gossipd/gossip_store.c:358 (gossip_store_compact) 0x56294 gossipd: backtrace: gossipd/gossip_store.c:395 (gossip_store_new) 0x56294de24 gossipd: backtrace: gossipd/gossmap_manage.c:455 (setup_gossmap) 0x56294de255 gossipd: backtrace: gossipd/gossmap_manage.c:488 (gossmap_manage_new) 0x56294 gossipd: backtrace: gossipd/gossipd.c:400 (gossip_init) 0x56294de22de9 ``` Signed-off-by: Rusty Russell --- gossipd/gossip_store.c | 2 +- tests/test_gossip.py | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/gossipd/gossip_store.c b/gossipd/gossip_store.c index eff86c679555..075cb1969633 100644 --- a/gossipd/gossip_store.c +++ b/gossipd/gossip_store.c @@ -199,7 +199,7 @@ static int gossip_store_compact(struct daemon *daemon, const char *bad; *populated = false; - old_len = 0; + old_len = 1; new_fd = open(GOSSIP_STORE_TEMP_FILENAME, O_RDWR|O_TRUNC|O_CREAT, 0600); if (new_fd < 0) { diff --git a/tests/test_gossip.py b/tests/test_gossip.py index 64980a15ae24..ad3f057cde14 100644 --- a/tests/test_gossip.py +++ b/tests/test_gossip.py @@ -1294,7 +1294,7 @@ def test_gossip_store_load_amount_truncated(node_factory): # May preceed the Started msg waited for in 'start'. wait_for(lambda: l1.daemon.is_in_log(r'\*\*BROKEN\*\* gossipd: gossip_store only processed 1 bytes of 445 \(expected 445\)')) wait_for(lambda: l1.daemon.is_in_log(r'\*\*BROKEN\*\* gossipd: gossip_store: Moving to gossip_store.corrupt')) - wait_for(lambda: l1.daemon.is_in_log(r'gossip_store: Read 0/0/0/0 cannounce/cupdate/nannounce/delete from store in 0 bytes, now 1 bytes \(populated=false\)')) + wait_for(lambda: l1.daemon.is_in_log(r'gossip_store: Read 0/0/0/0 cannounce/cupdate/nannounce/delete from store in 1 bytes, now 1 bytes \(populated=false\)')) assert os.path.exists(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) @@ -1581,8 +1581,8 @@ def test_getroute_exclude(node_factory, bitcoind): l1.rpc.getroute(l4.info['id'], 1, 1, exclude=[chan_l2l3, l5.info['id'], chan_l2l4]) -def setup_gossip_store_test(node_factory, bitcoind): - l1, l2, l3 = node_factory.line_graph(3, wait_for_announce=True) +def setup_gossip_store_test(node_factory, bitcoind, opts=None): + l1, l2, l3 = node_factory.line_graph(3, wait_for_announce=True, opts=opts) # Now, replace the one channel_update, so it's past the node announcements. l2.rpc.setchannel(l3.info['id'], 20, 1000) @@ -1608,16 +1608,20 @@ def setup_gossip_store_test(node_factory, bitcoind): return l2 -def test_gossip_store_compact_noappend(node_factory, bitcoind): - l2 = setup_gossip_store_test(node_factory, bitcoind) +def test_gossip_store_corrupt(node_factory, bitcoind): + l2 = setup_gossip_store_test(node_factory, bitcoind, opts=[{}, {'broken_log': 'gossipd:.*bad version'}, {}]) + l2.stop() # It should truncate this, not leave junk! - with open(os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.tmp'), 'wb') as f: + with open(os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, 'gossip_store'), 'wb') as f: f.write(bytearray.fromhex("07deadbeef")) + l2.start() - l2.restart() wait_for(lambda: l2.daemon.is_in_log('gossip_store: Read ')) - assert not l2.daemon.is_in_log('gossip_store:.*truncate') + assert l2.daemon.is_in_log('gossip_store_compact: bad version') + + # Will simply overwrite, due to old version. + assert not os.path.exists(os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) def test_gossip_store_load_complex(node_factory, bitcoind): From d1e4c6400aaa7138cb9104cc5671702344ab59e8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 08:39:28 +1030 Subject: [PATCH 03/20] devtools/gossmap-compress: generate better scids. Our poor scid generation clashes badly with simplified hashing (the next patch) leading to l1's startup time when using a generated map moving from 4 seconds to 14 seconds. Under CI it actually timed out several tests. Fixing our fake scids to be more "random" reduces it to 1.5 seconds. Signed-off-by: Rusty Russell --- devtools/gossmap-compress.c | 4 +-- tests/test_askrene.py | 66 ++++++++++++++++++------------------- tests/test_gossip.py | 2 +- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/devtools/gossmap-compress.c b/devtools/gossmap-compress.c index ac995f9115ce..be91e7f051b3 100644 --- a/devtools/gossmap-compress.c +++ b/devtools/gossmap-compress.c @@ -353,7 +353,7 @@ static void write_announce(int outfd, node_id_from_pubkey(&nodeid2, &id1); } /* Use i to avoid clashing scids even if two nodes have > 1 channel */ - if (!mk_short_channel_id(&scid, node1, node2, i & 0xFFFF)) + if (!mk_short_channel_id(&scid, i + node1, node2, i & 0xFFFF)) abort(); msg = towire_channel_announcement(NULL, &vals.sig, &vals.sig, &vals.sig, &vals.sig, @@ -406,7 +406,7 @@ static void write_update(int outfd, memset(&vals, 0, sizeof(vals)); /* Use i to avoid clashing scids even if two nodes have > 1 channel */ - if (!mk_short_channel_id(&scid, node1, node2, i & 0xFFFF)) + if (!mk_short_channel_id(&scid, i + node1, node2, i & 0xFFFF)) abort(); /* If node ids are backward, dir is reversed */ diff --git a/tests/test_askrene.py b/tests/test_askrene.py index ae1c277105de..332d001268e5 100644 --- a/tests/test_askrene.py +++ b/tests/test_askrene.py @@ -760,7 +760,7 @@ def test_getroutes(node_factory): 'next_node_id': nodemap[1], 'amount_msat': 103020, 'delay': 99 + 6 + 6}, - {'short_channel_id_dir': f'1x3x2/{dir13}', + {'short_channel_id_dir': f'3x3x2/{dir13}', 'next_node_id': nodemap[3], 'amount_msat': 102000, 'delay': 99 + 6} @@ -801,7 +801,7 @@ def test_getroutes(node_factory): 'routes': [{'probability_ppm': 900000, 'final_cltv': 99, 'amount_msat': 1000000, - 'path': [{'short_channel_id_dir': f'0x2x3/{dir02}', + 'path': [{'short_channel_id_dir': f'3x2x3/{dir02}', 'next_node_id': nodemap[2], 'amount_msat': 1000001, 'delay': 99 + 6}]}]} @@ -811,11 +811,11 @@ def test_getroutes(node_factory): nodemap[0], nodemap[2], 10000000, - [[{'short_channel_id_dir': f'0x2x1/{dir02}', + [[{'short_channel_id_dir': f'1x2x1/{dir02}', 'next_node_id': nodemap[2], 'amount_msat': 4500004, 'delay': 99 + 6}], - [{'short_channel_id_dir': f'0x2x3/{dir02}', + [{'short_channel_id_dir': f'3x2x3/{dir02}', 'next_node_id': nodemap[2], 'amount_msat': 5500005, 'delay': 99 + 6}]]) @@ -855,7 +855,7 @@ def test_getroutes_single_path(node_factory): [ [ { - "short_channel_id_dir": "1x2x2/1", + "short_channel_id_dir": "3x2x2/1", "next_node_id": nodemap[2], "amount_msat": 10000010, "delay": 99 + 6, @@ -893,7 +893,7 @@ def test_getroutes_single_path(node_factory): "delay": 99 + 6 + 6, }, { - "short_channel_id_dir": "1x2x2/1", + "short_channel_id_dir": "3x2x2/1", "next_node_id": nodemap[2], "amount_msat": 10000010, "delay": 99 + 6, @@ -938,7 +938,7 @@ def test_getroutes_fee_fallback(node_factory): 10000, maxfee_msat=201, paths=[[{'short_channel_id_dir': f'0x1x0/{dir01}'}, - {'short_channel_id_dir': f'1x3x2/{dir13}'}]]) + {'short_channel_id_dir': f'3x3x2/{dir13}'}]]) # maxfee exceeded? lower prob path. check_getroute_paths(l1, @@ -946,8 +946,8 @@ def test_getroutes_fee_fallback(node_factory): nodemap[3], 10000, maxfee_msat=200, - paths=[[{'short_channel_id_dir': f'0x2x1/{dir02}'}, - {'short_channel_id_dir': f'2x3x3/{dir23}'}]]) + paths=[[{'short_channel_id_dir': f'1x2x1/{dir02}'}, + {'short_channel_id_dir': f'5x3x3/{dir23}'}]]) def test_getroutes_auto_sourcefree(node_factory): @@ -1006,7 +1006,7 @@ def test_getroutes_auto_sourcefree(node_factory): 'next_node_id': nodemap[1], 'amount_msat': 102000, 'delay': 99 + 6}, - {'short_channel_id_dir': f'1x3x2/{dir13}', + {'short_channel_id_dir': f'3x3x2/{dir13}', 'next_node_id': nodemap[3], 'amount_msat': 102000, 'delay': 99 + 6} @@ -1071,7 +1071,7 @@ def test_getroutes_maxdelay(node_factory): 'routes': [{'probability_ppm': 999999, 'final_cltv': 99, 'amount_msat': 1000, - 'path': [{'short_channel_id_dir': f'0x1x1/{dir01}', + 'path': [{'short_channel_id_dir': f'1x1x1/{dir01}', 'next_node_id': nodemap[1], 'amount_msat': 1020, 'delay': 139}]}]} @@ -1122,7 +1122,7 @@ def test_getroutes_auto_localchans(node_factory): layers=['auto.localchans'], paths=[[{'short_channel_id_dir': scid21dir, 'amount_msat': 102012, 'delay': 99 + 6 + 6 + 6}, {'short_channel_id_dir': f'0x1x0/{dir01}', 'amount_msat': 102010, 'delay': 99 + 6 + 6}, - {'short_channel_id_dir': f'1x2x1/{dir12}', 'amount_msat': 101000, 'delay': 99 + 6}]]) + {'short_channel_id_dir': f'2x2x1/{dir12}', 'amount_msat': 101000, 'delay': 99 + 6}]]) # This should get self-discount correct check_getroute_paths(l2, @@ -1133,7 +1133,7 @@ def test_getroutes_auto_localchans(node_factory): layers=['auto.localchans', 'auto.sourcefree'], paths=[[{'short_channel_id_dir': scid21dir, 'amount_msat': 102010, 'delay': 99 + 6 + 6}, {'short_channel_id_dir': f'0x1x0/{dir01}', 'amount_msat': 102010, 'delay': 99 + 6 + 6}, - {'short_channel_id_dir': f'1x2x1/{dir12}', 'amount_msat': 101000, 'delay': 99 + 6}]]) + {'short_channel_id_dir': f'2x2x1/{dir12}', 'amount_msat': 101000, 'delay': 99 + 6}]]) def test_fees_dont_exceed_constraints(node_factory): @@ -1296,10 +1296,10 @@ def test_limits_fake_gossmap(node_factory, bitcoind): # Create a layer like auto.localchans would from "spendable" dir01 = direction(nodemap[0], nodemap[1]) spendable = {f'0x1x0/{dir01}': 87718000, - f'0x1x1/{dir01}': 87718000, - f'0x1x2/{dir01}': 186718000, - f'0x1x3/{dir01}': 285718000, - f'0x1x4/{dir01}': 384718000} + f'1x1x1/{dir01}': 87718000, + f'2x1x2/{dir01}': 186718000, + f'3x1x3/{dir01}': 285718000, + f'4x1x4/{dir01}': 384718000} # Sanity check that these exist! for scidd in spendable: @@ -1379,12 +1379,12 @@ def test_max_htlc(node_factory, bitcoind): dir01 = direction(nodemap[0], nodemap[1]) check_route_as_expected(routes['routes'], [[{'short_channel_id_dir': f'0x1x0/{dir01}', 'amount_msat': 1_000_001, 'delay': 10 + 6}], - [{'short_channel_id_dir': f'0x1x1/{dir01}', 'amount_msat': 19_000_019, 'delay': 10 + 6}]]) + [{'short_channel_id_dir': f'1x1x1/{dir01}', 'amount_msat': 19_000_019, 'delay': 10 + 6}]]) # If we can't use channel 2, we fail. l1.rpc.askrene_create_layer('removechan2') l1.rpc.askrene_inform_channel(layer='removechan2', - short_channel_id_dir=f'0x1x1/{dir01}', + short_channel_id_dir=f'1x1x1/{dir01}', amount_msat=1, inform='constrained') @@ -1413,7 +1413,7 @@ def test_min_htlc(node_factory, bitcoind): dir01 = direction(nodemap[0], nodemap[1]) check_route_as_expected(routes['routes'], - [[{'short_channel_id_dir': f'0x1x1/{dir01}', 'amount_msat': 1_000, 'delay': 10 + 6}]]) + [[{'short_channel_id_dir': f'1x1x1/{dir01}', 'amount_msat': 1_000, 'delay': 10 + 6}]]) def test_min_htlc_after_excess(node_factory, bitcoind): @@ -1433,7 +1433,7 @@ def test_min_htlc_after_excess(node_factory, bitcoind): # These were obviously having a bad day at the time of the snapshot: canned_gossmap_badnodes = { - 19: "We could not find a usable set of paths. The shortest path is 103x1x0->0x2134x0->0x333x988->19x333x16169, but 0x2134x0/0 exceeds htlc_maximum_msat ~1000448msat", + 19: "We could not find a usable set of paths. The shortest path is 103x1x0->0x2134x0->988x333x988->16188x333x16169, but 0x2134x0/0 exceeds htlc_maximum_msat ~1000448msat", 53: "We could not find a usable set of paths. The destination has disabled 177 of 177 channels, leaving capacity only 0msat of 4003677000msat.", 69: "We could not find a usable set of paths. The destination has disabled 151 of 151 channels, leaving capacity only 0msat of 9092303000msat.", 72: "We could not find a usable set of paths. The destination has disabled 146 of 146 channels, leaving capacity only 0msat of 1996000000msat.", @@ -2118,7 +2118,7 @@ def test_includefees(node_factory): "delay": 99 + 5 + 5, }, { - "short_channel_id_dir": "1x2x1/1", + "short_channel_id_dir": "2x2x1/1", "next_node_id": nodemap[2], "amount_msat": 1022, "delay": 99 + 5, @@ -2145,13 +2145,13 @@ def test_includefees(node_factory): "delay": 99 + 5 + 5 + 5, }, { - "short_channel_id_dir": "1x2x1/1", + "short_channel_id_dir": "2x2x1/1", "next_node_id": nodemap[2], "amount_msat": 1055, "delay": 99 + 5 + 5, }, { - "short_channel_id_dir": "2x3x2/0", + "short_channel_id_dir": "4x3x2/0", "next_node_id": nodemap[3], "amount_msat": 1033, "delay": 99 + 5, @@ -2201,7 +2201,7 @@ def test_includefees(node_factory): "delay": 99 + 5 + 5, }, { - "short_channel_id_dir": "1x2x1/1", + "short_channel_id_dir": "2x2x1/1", "next_node_id": nodemap[2], "amount_msat": 990, "delay": 99 + 5, @@ -2228,13 +2228,13 @@ def test_includefees(node_factory): "delay": 99 + 5 + 5 + 5, }, { - "short_channel_id_dir": "1x2x1/1", + "short_channel_id_dir": "2x2x1/1", "next_node_id": nodemap[2], "amount_msat": 990, "delay": 99 + 5 + 5, }, { - "short_channel_id_dir": "2x3x2/0", + "short_channel_id_dir": "4x3x2/0", "next_node_id": nodemap[3], "amount_msat": 969, "delay": 99 + 5, @@ -2283,7 +2283,7 @@ def test_includefees(node_factory): "delay": 99 + 5, }, { - "short_channel_id_dir": "1x2x1/1", + "short_channel_id_dir": "2x2x1/1", "next_node_id": nodemap[2], "amount_msat": 1022, "delay": 99 + 5, @@ -2310,13 +2310,13 @@ def test_includefees(node_factory): "delay": 99 + 5 + 5, }, { - "short_channel_id_dir": "1x2x1/1", + "short_channel_id_dir": "2x2x1/1", "next_node_id": nodemap[2], "amount_msat": 1055, "delay": 99 + 5 + 5, }, { - "short_channel_id_dir": "2x3x2/0", + "short_channel_id_dir": "4x3x2/0", "next_node_id": nodemap[3], "amount_msat": 1033, "delay": 99 + 5, @@ -2365,7 +2365,7 @@ def test_includefees(node_factory): "delay": 99 + 5, }, { - "short_channel_id_dir": "1x2x1/1", + "short_channel_id_dir": "2x2x1/1", "next_node_id": nodemap[2], "amount_msat": 1000, "delay": 99 + 5, @@ -2392,13 +2392,13 @@ def test_includefees(node_factory): "delay": 99 + 5 + 5, }, { - "short_channel_id_dir": "1x2x1/1", + "short_channel_id_dir": "2x2x1/1", "next_node_id": nodemap[2], "amount_msat": 1000, "delay": 99 + 5 + 5, }, { - "short_channel_id_dir": "2x3x2/0", + "short_channel_id_dir": "4x3x2/0", "next_node_id": nodemap[3], "amount_msat": 979, "delay": 99 + 5, diff --git a/tests/test_gossip.py b/tests/test_gossip.py index ad3f057cde14..d6b03dd02b1d 100644 --- a/tests/test_gossip.py +++ b/tests/test_gossip.py @@ -2189,7 +2189,7 @@ def test_generate_gossip_store(node_factory): expected.append({'source': n1, 'destination': n2, - 'short_channel_id': '{}x{}x{}'.format(c.node1, c.node2, chancount), + 'short_channel_id': '{}x{}x{}'.format(c.node1 + chancount, c.node2, chancount), 'direction': expected_dir, 'public': True, 'amount_msat': c.capacity_sats * 1000, From eef10c93e9ad41e4afd2ff86296c43681ac026e1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 08:40:28 +1030 Subject: [PATCH 04/20] gossmap: reduce load times by 20% It's actually quite quick to load a cache-hot 308,874,377 byte gossip_store (normal -Og build), but perf does show time spent in siphash(), which is a bit overkill here, so drop that: Before: Time to load: 66718983-78037766(7.00553e+07+/-2.8e+06)nsec After: Time to load: 54510433-57991725(5.61457e+07+/-1e+06)nsec We could save maybe 10% more by disabling checksums, but having that assurance is nice. Signed-off-by: Rusty Russell --- common/gossmap.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/common/gossmap.c b/common/gossmap.c index 9d52569d5384..744890bba6f7 100644 --- a/common/gossmap.c +++ b/common/gossmap.c @@ -29,11 +29,7 @@ static bool chanidx_eq_id(const ptrint_t *pidx, struct short_channel_id pidxid = chanidx_id(pidx); return short_channel_id_eq(pidxid, scid); } -static size_t scid_hash(const struct short_channel_id scid) -{ - return siphash24(siphash_seed(), &scid, sizeof(scid)); -} -HTABLE_DEFINE_NODUPS_TYPE(ptrint_t, chanidx_id, scid_hash, chanidx_eq_id, +HTABLE_DEFINE_NODUPS_TYPE(ptrint_t, chanidx_id, short_channel_id_hash, chanidx_eq_id, chanidx_htable); static struct node_id nodeidx_id(const ptrint_t *pidx); @@ -42,9 +38,16 @@ static bool nodeidx_eq_id(const ptrint_t *pidx, const struct node_id id) struct node_id pidxid = nodeidx_id(pidx); return node_id_eq(&pidxid, &id); } +/* You need to spend sats to create a channel to advertize your nodeid, + * so creating clashes is not free: we can be lazy! */ static size_t nodeid_hash(const struct node_id id) { - return siphash24(siphash_seed(), &id, PUBKEY_CMPR_LEN); + size_t val; + size_t off = siphash_seed()->u.u8[0] % 16; + + BUILD_ASSERT(15 + sizeof(val) < sizeof(id.k)); + memcpy(&val, id.k + off, sizeof(val)); + return val; } HTABLE_DEFINE_NODUPS_TYPE(ptrint_t, nodeidx_id, nodeid_hash, nodeidx_eq_id, nodeidx_htable); From c7ab481ad31fc85f3f58d5daae7572e02069b2b6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 08:41:28 +1030 Subject: [PATCH 05/20] gossip_store: add UUID entry at front of the store. We also put this in the store_ended message, too: so you can tell if the equivalent_offset there really refers to this new entry (or if two or more rewrites have happened). Signed-off-by: Rusty Russell --- common/gossip_store.h | 2 +- connectd/gossip_store.c | 3 ++- devtools/dump-gossipstore.c | 26 +++++++++++++++++++++++++- gossipd/gossip_store.c | 11 +++++++---- gossipd/gossip_store_wire.csv | 4 ++++ tests/test_gossip.py | 2 +- tools/lightning-downgrade.c | 2 +- 7 files changed, 41 insertions(+), 9 deletions(-) diff --git a/common/gossip_store.h b/common/gossip_store.h index 013f8003a2e1..1545e86853fa 100644 --- a/common/gossip_store.h +++ b/common/gossip_store.h @@ -15,7 +15,7 @@ struct gossip_rcvd_filter; /* First byte of file is the version. * * Top three bits mean incompatible change. - * As of this writing, major == 0, minor == 15. + * As of this writing, major == 0, minor == 16. */ #define GOSSIP_STORE_MAJOR_VERSION_MASK 0xE0 #define GOSSIP_STORE_MINOR_VERSION_MASK 0x1F diff --git a/connectd/gossip_store.c b/connectd/gossip_store.c index 60da90f30be6..92298458d53e 100644 --- a/connectd/gossip_store.c +++ b/connectd/gossip_store.c @@ -28,8 +28,9 @@ static size_t reopen_gossip_store(int *gossip_store_fd, const u8 *msg) { u64 equivalent_offset; int newfd; + u8 uuid[32]; - if (!fromwire_gossip_store_ended(msg, &equivalent_offset)) + if (!fromwire_gossip_store_ended(msg, &equivalent_offset, uuid)) status_failed(STATUS_FAIL_GOSSIP_IO, "Bad gossipd GOSSIP_STORE_ENDED msg: %s", tal_hex(tmpctx, msg)); diff --git a/devtools/dump-gossipstore.c b/devtools/dump-gossipstore.c index eec6e3c16869..32b6fcc1d970 100644 --- a/devtools/dump-gossipstore.c +++ b/devtools/dump-gossipstore.c @@ -6,13 +6,27 @@ #include #include #include +#include #include #include #include /* Current versions we support */ #define GSTORE_MAJOR 0 -#define GSTORE_MINOR 15 +#define GSTORE_MINOR 16 + +/* Ended marker for <= 15 */ +static bool fromwire_gossip_store_ended_obs(const void *p, u64 *equivalent_offset) +{ + const u8 *cursor = p; + size_t plen = tal_count(p); + + if (fromwire_u16(&cursor, &plen) != WIRE_GOSSIP_STORE_ENDED) + return false; + *equivalent_offset = fromwire_u64(&cursor, &plen); + return cursor != NULL; +} + static bool is_channel_announce(const u8 *msg, struct short_channel_id **scid) { @@ -127,6 +141,8 @@ int main(int argc, char *argv[]) u8 *msg, *inner; bool deleted, dying, complete; u32 blockheight; + u64 offset; + u8 uuid[32]; deleted = (flags & GOSSIP_STORE_DELETED_BIT); dying = (flags & GOSSIP_STORE_DYING_BIT); @@ -183,6 +199,14 @@ int main(int argc, char *argv[]) printf("dying channel: %s (deadline %u)\n", fmt_short_channel_id(tmpctx, scid), blockheight); + } else if (fromwire_gossip_store_ended(msg, &offset, uuid)) { + printf("gossip store ended: offset %"PRIu64" in uuid %s\n", + offset, tal_hexstr(tmpctx, uuid, sizeof(uuid))); + } else if (fromwire_gossip_store_ended_obs(msg, &offset)) { + printf("gossip store ended (v <= 15): offset %"PRIu64"\n", + offset); + } else if (fromwire_gossip_store_uuid(msg, uuid)) { + printf("uuid %s\n", tal_hexstr(tmpctx, uuid, sizeof(uuid))); } else { printf("Unknown message %u: %s\n", fromwire_peektype(msg), tal_hex(msg, msg)); diff --git a/gossipd/gossip_store.c b/gossipd/gossip_store.c index 075cb1969633..32c2ae7a7386 100644 --- a/gossipd/gossip_store.c +++ b/gossipd/gossip_store.c @@ -20,8 +20,8 @@ #define GOSSIP_STORE_ZOMBIE_BIT_V13 0x1000U #define GOSSIP_STORE_TEMP_FILENAME "gossip_store.tmp" -/* We write it as major version 0, minor version 14 */ -#define GOSSIP_STORE_VER ((0 << 5) | 15) +/* We write it as major version 0, minor version 16 */ +#define GOSSIP_STORE_VER ((0 << 5) | 16) struct gossip_store { /* Back pointer. */ @@ -85,10 +85,11 @@ static bool append_msg(int fd, const u8 *msg, u32 timestamp, u64 *len, * v13 removed private gossip entries * v14 removed zombie and spam flags * v15 added the complete flag + * v16 add uuid field, ended field uuid extension */ static bool can_upgrade(u8 oldversion) { - return oldversion >= 9 && oldversion <= 14; + return oldversion >= 9 && oldversion <= 15; } /* On upgrade, do best effort on private channels: hand them to @@ -355,7 +356,9 @@ static int gossip_store_compact(struct daemon *daemon, /* Create end marker now new file exists. */ if (old_fd != -1) { - append_msg(old_fd, towire_gossip_store_ended(tmpctx, *total_len), + /* FIXME: real uuid! */ + u8 uuid[32] = {0}; + append_msg(old_fd, towire_gossip_store_ended(tmpctx, *total_len, uuid), 0, &old_len, NULL); close(old_fd); } diff --git a/gossipd/gossip_store_wire.csv b/gossipd/gossip_store_wire.csv index b0f81c371293..d371f9e83087 100644 --- a/gossipd/gossip_store_wire.csv +++ b/gossipd/gossip_store_wire.csv @@ -23,7 +23,11 @@ msgdata,gossip_store_delete_chan,scid,short_channel_id, msgtype,gossip_store_ended,4105 msgdata,gossip_store_ended,equivalent_offset,u64, +msgdata,gossip_store_ended,uuid,u8,32 msgtype,gossip_store_chan_dying,4106 msgdata,gossip_store_chan_dying,scid,short_channel_id, msgdata,gossip_store_chan_dying,blockheight,u32, + +msgtype,gossip_store_uuid,4107 +msgdata,gossip_store_uuid,uuid,u8,32 diff --git a/tests/test_gossip.py b/tests/test_gossip.py index d6b03dd02b1d..b29f3b97288f 100644 --- a/tests/test_gossip.py +++ b/tests/test_gossip.py @@ -1207,7 +1207,7 @@ def test_gossip_store_load(node_factory): l1.start() # May preceed the Started msg waited for in 'start'. - wait_for(lambda: l1.daemon.is_in_log('Read 1/1/1/0 cannounce/cupdate/nannounce/delete from store in 800 bytes, now 778 bytes')) + wait_for(lambda: l1.daemon.is_in_log('Read 1/1/1/0 cannounce/cupdate/nannounce/delete from store in 832 bytes, now 778 bytes')) assert not l1.daemon.is_in_log('gossip_store.*truncating') diff --git a/tools/lightning-downgrade.c b/tools/lightning-downgrade.c index 3bf41c6cbff9..e47d43b2d76c 100644 --- a/tools/lightning-downgrade.c +++ b/tools/lightning-downgrade.c @@ -168,7 +168,7 @@ static const char *downgrade_askrene_layers(const tal_t *ctx, struct db *db) static const struct db_version db_versions[] = { { "v25.09", 276, downgrade_askrene_layers, false }, - { "v25.12", 280, NULL, true }, + { "v25.12", 280, NULL, false }, }; static const struct db_version *version_db(const char *version) From 6f135a5d8a477dc47536718622c038dc79fa4c6b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 08:42:28 +1030 Subject: [PATCH 06/20] common/gossmap: use the UUID record on reopen. Signed-off-by: Rusty Russell --- common/gossmap.c | 103 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 79 insertions(+), 24 deletions(-) diff --git a/common/gossmap.c b/common/gossmap.c index 744890bba6f7..09d9272c47ee 100644 --- a/common/gossmap.c +++ b/common/gossmap.c @@ -641,21 +641,91 @@ static void node_announcement(struct gossmap *map, u64 nann_off) n->nann_off = nann_off; } +/* Mutual recursion */ +static bool map_catchup(struct gossmap *map, bool must_be_clean, bool *changed); + +static void init_map_structs(struct gossmap *map) +{ + /* Since channel_announcement is ~430 bytes, and channel_update is 136, + * node_announcement is 144, and current topology has 35000 channels + * and 10000 nodes, let's assume each channel gets about 750 bytes. + * + * We halve this, since often some records are deleted. */ + map->channels = tal(map, struct chanidx_htable); + chanidx_htable_init_sized(map->channels, map->map_size / 750 / 2); + map->nodes = tal(map, struct nodeidx_htable); + nodeidx_htable_init_sized(map->nodes, map->map_size / 2500 / 2); + + map->num_chan_arr = map->map_size / 750 / 2 + 1; + map->chan_arr = tal_arr(map, struct gossmap_chan, map->num_chan_arr); + map->freed_chans = init_chan_arr(map->chan_arr, 0); + map->num_node_arr = map->map_size / 2500 / 2 + 1; + map->node_arr = tal_arr(map, struct gossmap_node, map->num_node_arr); + map->freed_nodes = init_node_arr(map->node_arr, 0); +} + static bool reopen_store(struct gossmap *map, u64 ended_off) { - int fd; + u64 equivalent_off; + u8 verbyte; + u8 expected_uuid[32], uuid[32]; + struct gossip_hdr ghdr; + bool changed; + + /* This tells us the equivalent offset in new map */ + equivalent_off = map_be64(map, ended_off + 2); + map_copy(map, ended_off + 2 + 8, expected_uuid, sizeof(expected_uuid)); + close(map->fd); - fd = open(map->fname, O_RDONLY); - if (fd < 0) + map->fd = open(map->fname, O_RDONLY); + if (map->fd < 0) err(1, "Failed to reopen %s", map->fname); - /* This tells us the equivalent offset in new map */ - map->map_end = map_be64(map, ended_off + 2); + /* If mmap isn't disabled, we try to mmap if we can. */ + map->map_size = lseek(map->fd, 0, SEEK_END); + if (map->mmap) { + map->mmap = mmap(NULL, map->map_size, PROT_READ, MAP_SHARED, map->fd, 0); + if (map->mmap == MAP_FAILED) + map->mmap = NULL; + } - close(map->fd); - map->fd = fd; + if (map->map_size < 1 + sizeof(ghdr) + 2 + sizeof(uuid)) + errx(1, "Truncated gossip_store len %"PRIu64, map->map_size); + + /* version then ghdr then uuid. */ + verbyte = map_u8(map, 0); + if (GOSSIP_STORE_MAJOR_VERSION(verbyte) != 0 || GOSSIP_STORE_MINOR_VERSION(verbyte) < 16) + errx(1, "Bad gossip_store version %u", verbyte); + + if (map_be16(map, 1 + sizeof(struct gossip_hdr)) != WIRE_GOSSIP_STORE_UUID) + errx(1, "First gossip_store record is not uuid?"); + map_copy(map, 1 + sizeof(struct gossip_hdr) + 2, uuid, sizeof(uuid)); + + /* FIXME: we can skip if uuid is as expected, but we'd have to re-calc all + * our offsets anyway. It's not worth it, given how fast we are. */ + if (memcmp(uuid, expected_uuid, sizeof(uuid)) == 0) { + map->logcb(map->cbarg, LOG_DBG, + "Reopened gossip_store, reduced to offset %"PRIu64, + equivalent_off); + } else { + /* Start from scratch */ + map->logcb(map->cbarg, LOG_INFORM, + "Reopened gossip_store, but we missed some (%s vs %s)", + tal_hexstr(tmpctx, uuid, sizeof(uuid)), + tal_hexstr(tmpctx, expected_uuid, sizeof(expected_uuid))); + } + + tal_free(map->channels); + tal_free(map->nodes); + tal_free(map->chan_arr); + tal_free(map->node_arr); + init_map_structs(map); + map->map_end = 1; map->generation++; - return gossmap_refresh(map); + + /* Now do reload. */ + map_catchup(map, false, &changed); + return changed; } /* Extra sanity check (if it's cheap): does crc match? */ @@ -804,22 +874,7 @@ static bool load_gossip_store(struct gossmap *map, bool must_be_clean) return false; } - /* Since channel_announcement is ~430 bytes, and channel_update is 136, - * node_announcement is 144, and current topology has 35000 channels - * and 10000 nodes, let's assume each channel gets about 750 bytes. - * - * We halve this, since often some records are deleted. */ - map->channels = tal(map, struct chanidx_htable); - chanidx_htable_init_sized(map->channels, map->map_size / 750 / 2); - map->nodes = tal(map, struct nodeidx_htable); - nodeidx_htable_init_sized(map->nodes, map->map_size / 2500 / 2); - - map->num_chan_arr = map->map_size / 750 / 2 + 1; - map->chan_arr = tal_arr(map, struct gossmap_chan, map->num_chan_arr); - map->freed_chans = init_chan_arr(map->chan_arr, 0); - map->num_node_arr = map->map_size / 2500 / 2 + 1; - map->node_arr = tal_arr(map, struct gossmap_node, map->num_node_arr); - map->freed_nodes = init_node_arr(map->node_arr, 0); + init_map_structs(map); map->map_end = 1; return map_catchup(map, must_be_clean, &updated); From 403460f50bc32a43783544eae132fbb71fc01519 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 08:43:28 +1030 Subject: [PATCH 07/20] gossipd: write uuid record on startup. This is the first record, and ignored by everything else. Signed-off-by: Rusty Russell --- common/gossmap.c | 3 +++ gossipd/gossip_store.c | 35 ++++++++++++++++++++++++++++++++++- tests/test_gossip.py | 12 ++++++------ 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/common/gossmap.c b/common/gossmap.c index 09d9272c47ee..69a3dbefe136 100644 --- a/common/gossmap.c +++ b/common/gossmap.c @@ -832,6 +832,9 @@ static bool map_catchup(struct gossmap *map, bool must_be_clean, bool *changed) } else if (type == WIRE_GOSSIP_STORE_CHAN_DYING) { /* We don't really care until it's deleted */ continue; + } else if (type == WIRE_GOSSIP_STORE_UUID) { + /* We handled this reopen, otherwise we don't care. */ + continue; } else { map->logcb(map->cbarg, LOG_BROKEN, "Unknown record %u@%u (size %zu) in gossmap: ignoring", diff --git a/gossipd/gossip_store.c b/gossipd/gossip_store.c index 32c2ae7a7386..91c6c28fe5db 100644 --- a/gossipd/gossip_store.c +++ b/gossipd/gossip_store.c @@ -179,6 +179,18 @@ static bool upgrade_field(u8 oldversion, return true; } +static u8 *new_uuid_record(const tal_t *ctx, int fd, u64 *off) +{ + u8 *uuid = tal_arr(ctx, u8, 32); + + for (size_t i = 0; i < tal_bytelen(uuid); i++) + uuid[i] = pseudorand(256); + append_msg(fd, towire_gossip_store_uuid(tmpctx, uuid), 0, off, NULL); + /* append_msg does not change file offset, so do that now. */ + lseek(fd, 0, SEEK_END); + return uuid; +} + /* Read gossip store entries, copy non-deleted ones. Check basic * validity, but this code is written as simply and robustly as * possible! @@ -198,6 +210,7 @@ static int gossip_store_compact(struct daemon *daemon, struct stat st; struct timemono start = time_mono(); const char *bad; + u8 *uuid = NULL; *populated = false; old_len = 1; @@ -241,6 +254,10 @@ static int gossip_store_compact(struct daemon *daemon, cur_off = old_len = sizeof(oldversion); + /* Make up uuid for old version */ + if (oldversion < 16) + uuid = new_uuid_record(tmpctx, new_fd, total_len); + /* Read everything, write non-deleted ones to new_fd. If something goes wrong, * we end up with truncated store. */ while (read_all(old_fd, &hdr, sizeof(hdr))) { @@ -327,6 +344,13 @@ static int gossip_store_compact(struct daemon *daemon, case WIRE_NODE_ANNOUNCEMENT: nannounces++; break; + case WIRE_GOSSIP_STORE_UUID: + uuid = tal_arr(tmpctx, u8, 32); + if (!fromwire_gossip_store_uuid(msg, uuid)) { + bad = "Corrupt uuid"; + goto badmsg; + } + break; } if (!write_all(new_fd, &hdr, sizeof(hdr)) @@ -348,6 +372,16 @@ static int gossip_store_compact(struct daemon *daemon, } rename_new: + /* If we didn't copy a uuid, do so now. */ + if (!uuid) { + if (lseek(new_fd, 0, SEEK_END) != 1) { + bad = tal_fmt(tmpctx, + "missing uuid in version %u", oldversion); + goto badmsg; + } + uuid = new_uuid_record(tmpctx, new_fd, total_len); + } + if (rename(GOSSIP_STORE_TEMP_FILENAME, GOSSIP_STORE_FILENAME) != 0) { status_failed(STATUS_FAIL_INTERNAL_ERROR, "gossip_store_compact: rename failed: %s", @@ -357,7 +391,6 @@ static int gossip_store_compact(struct daemon *daemon, /* Create end marker now new file exists. */ if (old_fd != -1) { /* FIXME: real uuid! */ - u8 uuid[32] = {0}; append_msg(old_fd, towire_gossip_store_ended(tmpctx, *total_len, uuid), 0, &old_len, NULL); close(old_fd); diff --git a/tests/test_gossip.py b/tests/test_gossip.py index b29f3b97288f..e6a2bb075241 100644 --- a/tests/test_gossip.py +++ b/tests/test_gossip.py @@ -1207,7 +1207,7 @@ def test_gossip_store_load(node_factory): l1.start() # May preceed the Started msg waited for in 'start'. - wait_for(lambda: l1.daemon.is_in_log('Read 1/1/1/0 cannounce/cupdate/nannounce/delete from store in 832 bytes, now 778 bytes')) + wait_for(lambda: l1.daemon.is_in_log('Read 1/1/1/0 cannounce/cupdate/nannounce/delete from store in 832 bytes, now 824 bytes')) assert not l1.daemon.is_in_log('gossip_store.*truncating') @@ -1275,13 +1275,13 @@ def test_gossip_store_load_announce_before_update(node_factory): l1.start() # May preceed the Started msg waited for in 'start'. - wait_for(lambda: l1.daemon.is_in_log('Read 1/1/1/1 cannounce/cupdate/nannounce/delete from store in 950 bytes, now 778 bytes')) + wait_for(lambda: l1.daemon.is_in_log('Read 1/1/1/1 cannounce/cupdate/nannounce/delete from store in 982 bytes, now 824 bytes')) assert not l1.daemon.is_in_log('gossip_store.*truncating') def test_gossip_store_load_amount_truncated(node_factory): """Make sure we can read canned gossip store with truncated amount""" - l1 = node_factory.get_node(start=False, broken_log=r'gossip_store only processed 1 bytes of 445 \(expected 445\)|Moving to gossip_store.corrupt|plugin-cln-renepay:.*unable to fetch channel capacity') + l1 = node_factory.get_node(start=False, broken_log=r'gossip_store only processed 47 bytes of 491 \(expected 491\)|Moving to gossip_store.corrupt|plugin-cln-renepay:.*unable to fetch channel capacity') with open(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, 'gossip_store'), 'wb') as f: f.write(bytearray.fromhex("0c" # GOSSIP_STORE_VERSION "000001b0" # len @@ -1292,9 +1292,9 @@ def test_gossip_store_load_amount_truncated(node_factory): l1.start() # May preceed the Started msg waited for in 'start'. - wait_for(lambda: l1.daemon.is_in_log(r'\*\*BROKEN\*\* gossipd: gossip_store only processed 1 bytes of 445 \(expected 445\)')) + wait_for(lambda: l1.daemon.is_in_log(r'\*\*BROKEN\*\* gossipd: gossip_store only processed 47 bytes of 491 \(expected 491\)')) wait_for(lambda: l1.daemon.is_in_log(r'\*\*BROKEN\*\* gossipd: gossip_store: Moving to gossip_store.corrupt')) - wait_for(lambda: l1.daemon.is_in_log(r'gossip_store: Read 0/0/0/0 cannounce/cupdate/nannounce/delete from store in 1 bytes, now 1 bytes \(populated=false\)')) + wait_for(lambda: l1.daemon.is_in_log(r'gossip_store: Read 0/0/0/0 cannounce/cupdate/nannounce/delete from store in 1 bytes, now 47 bytes \(populated=false\)')) assert os.path.exists(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) @@ -1658,7 +1658,7 @@ def test_gossip_store_load_no_channel_update(node_factory): l1.start() # May preceed the Started msg waited for in 'start'. - wait_for(lambda: l1.daemon.is_in_log('Read 1/0/1/0 cannounce/cupdate/nannounce/delete from store in 650 bytes, now 628 bytes')) + wait_for(lambda: l1.daemon.is_in_log('Read 1/0/1/0 cannounce/cupdate/nannounce/delete from store in 682 bytes, now 674 bytes')) assert not os.path.exists(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) From e1ca104a4b56580ed83d59e1593d6a1d8d725fa5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 08:44:28 +1030 Subject: [PATCH 08/20] common: move gossip_store_wire.csv into common/ from gossipd/ It's used by common/gossip_store.c, which is used by many things other than gossipd. This file belongs in common. Signed-off-by: Rusty Russell --- Makefile | 7 ++++++- common/Makefile | 6 ++---- common/gossip_store.c | 2 +- {gossipd => common}/gossip_store_wire.csv | 0 common/gossmap.c | 2 +- common/test/Makefile | 2 +- common/test/run-route-specific.c | 8 ++++---- common/test/run-route.c | 8 ++++---- connectd/Makefile | 2 +- connectd/gossip_store.c | 2 +- contrib/pyln-client/pyln/client/gossmap.py | 2 +- devtools/Makefile | 5 ----- devtools/convert-gossmap.c | 2 +- devtools/create-gossipstore.c | 2 +- devtools/dump-gossipstore.c | 2 +- devtools/gossmap-compress.c | 2 +- gossipd/Makefile | 1 - gossipd/gossip_store.c | 2 +- gossipd/gossmap_manage.c | 2 +- plugins/Makefile | 2 +- plugins/renepay/test/Makefile | 1 - plugins/renepay/test/common.h | 2 +- plugins/sql.c | 2 +- plugins/test/Makefile | 2 -- plugins/test/run-route-calc.c | 2 +- plugins/test/run-route-overlong.c | 2 +- 26 files changed, 33 insertions(+), 39 deletions(-) rename {gossipd => common}/gossip_store_wire.csv (100%) diff --git a/Makefile b/Makefile index fd6c8aa7a203..fd20d6526137 100644 --- a/Makefile +++ b/Makefile @@ -489,9 +489,14 @@ mkdocs.yml: $(MANPAGES:=.md) # Every single object file. ALL_OBJS := $(ALL_C_SOURCES:.c=.o) +WIREGEN_FILES := $(filter %printgen.h %printgen.c %wiregen.h %wiregen.c, $(ALL_C_HEADERS) $(ALL_C_SOURCES)) + +# Always make wiregen files before any object file +$(ALL_OBJS): $(WIREGEN_FILES) + # We always regen wiregen and printgen files, since SHA256STAMP protects against # spurious rebuilds. -$(filter %printgen.h %printgen.c %wiregen.h %wiregen.c, $(ALL_C_HEADERS) $(ALL_C_SOURCES)): $(FORCE) +$(WIREGEN_FILES): $(FORCE) ifneq ($(TEST_GROUP_COUNT),) PYTEST_OPTS += --test-group=$(TEST_GROUP) --test-group-count=$(TEST_GROUP_COUNT) diff --git a/common/Makefile b/common/Makefile index 333e3321cb6a..419261166fce 100644 --- a/common/Makefile +++ b/common/Makefile @@ -111,7 +111,7 @@ COMMON_SRC_NOGEN := \ common/wire_error.c -COMMON_SRC_GEN := common/status_wiregen.c common/peer_status_wiregen.c common/scb_wiregen.c +COMMON_SRC_GEN := common/status_wiregen.c common/peer_status_wiregen.c common/scb_wiregen.c common/gossip_store_wiregen.c COMMON_HEADERS_NOGEN := $(COMMON_SRC_NOGEN:.c=.h) \ common/closing_fee.h \ @@ -124,7 +124,7 @@ COMMON_HEADERS_NOGEN := $(COMMON_SRC_NOGEN:.c=.h) \ common/jsonrpc_errors.h \ common/overflows.h -COMMON_HEADERS_GEN := common/htlc_state_names_gen.h common/status_wiregen.h common/peer_status_wiregen.h common/scb_wiregen.h +COMMON_HEADERS_GEN := common/htlc_state_names_gen.h common/status_wiregen.h common/peer_status_wiregen.h common/scb_wiregen.h common/gossip_store_wiregen.h COMMON_HEADERS := $(COMMON_HEADERS_GEN) $(COMMON_HEADERS_NOGEN) COMMON_SRC := $(COMMON_SRC_NOGEN) $(COMMON_SRC_GEN) @@ -157,8 +157,6 @@ ALL_C_SOURCES += $(COMMON_SRC) common/htlc_state_names_gen.h: common/htlc_state.h ccan/ccan/cdump/tools/cdump-enumstr ccan/ccan/cdump/tools/cdump-enumstr common/htlc_state.h > $@ -common/gossip_store.o: gossipd/gossip_store_wiregen.h - check-source-bolt: $(COMMON_SRC_NOGEN:%=bolt-check/%) $(COMMON_HEADERS:%=bolt-check/%) check-whitespace: $(COMMON_SRC_NOGEN:%=check-whitespace/%) $(COMMON_HEADERS:%=check-whitespace/%) diff --git a/common/gossip_store.c b/common/gossip_store.c index 3f10dd82b3ce..298988a13bb6 100644 --- a/common/gossip_store.c +++ b/common/gossip_store.c @@ -1,6 +1,6 @@ #include "config.h" #include -#include +#include #include /* We cheat and read first two bytes of message too. */ diff --git a/gossipd/gossip_store_wire.csv b/common/gossip_store_wire.csv similarity index 100% rename from gossipd/gossip_store_wire.csv rename to common/gossip_store_wire.csv diff --git a/common/gossmap.c b/common/gossmap.c index 69a3dbefe136..d0bb948fa689 100644 --- a/common/gossmap.c +++ b/common/gossmap.c @@ -6,12 +6,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include diff --git a/common/test/Makefile b/common/test/Makefile index ab192ce61dc5..988ef151cb85 100644 --- a/common/test/Makefile +++ b/common/test/Makefile @@ -47,7 +47,7 @@ common/test/run-route common/test/run-route-specific common/test/run-route-inflo common/node_id.o \ common/pseudorand.o \ common/route.o \ - gossipd/gossip_store_wiregen.o \ + common/gossip_store_wiregen.o \ wire/fromwire.o \ wire/peer_wiregen.o \ wire/towire.o diff --git a/common/test/run-route-specific.c b/common/test/run-route-specific.c index 6e73f459d0ee..74653e206822 100644 --- a/common/test/run-route-specific.c +++ b/common/test/run-route-specific.c @@ -7,22 +7,22 @@ */ #include "config.h" #include +#include #include #include #include -#include #include +#include +#include #include #include #include #include #include #include -#include -#include #include -#include #include +#include /* AUTOGENERATED MOCKS START */ /* Generated stub for fromwire_bigsize */ diff --git a/common/test/run-route.c b/common/test/run-route.c index 53c7359181de..50f3f3b9b07d 100644 --- a/common/test/run-route.c +++ b/common/test/run-route.c @@ -1,21 +1,21 @@ #include "config.h" #include +#include #include #include #include -#include #include +#include +#include #include #include #include #include #include #include -#include -#include #include -#include #include +#include /* AUTOGENERATED MOCKS START */ /* Generated stub for fromwire_bigsize */ diff --git a/connectd/Makefile b/connectd/Makefile index 33e6e8b6ffb5..4cec6fdde40f 100644 --- a/connectd/Makefile +++ b/connectd/Makefile @@ -35,7 +35,7 @@ ALL_PROGRAMS += lightningd/lightning_websocketd LIGHTNINGD_CONTROL_HEADERS += connectd/connectd_wiregen.h LIGHTNINGD_CONTROL_OBJS += connectd/connectd_wiregen.o -lightningd/lightning_connectd: $(CONNECTD_OBJS) $(HSMD_CLIENT_OBJS) gossipd/gossipd_wiregen.o gossipd/gossip_store_wiregen.o libcommon.a +lightningd/lightning_connectd: $(CONNECTD_OBJS) $(HSMD_CLIENT_OBJS) gossipd/gossipd_wiregen.o libcommon.a lightningd/lightning_websocketd: $(WEBSOCKETD_OBJS) libcommon.a diff --git a/connectd/gossip_store.c b/connectd/gossip_store.c index 92298458d53e..3c1808f63d9a 100644 --- a/connectd/gossip_store.c +++ b/connectd/gossip_store.c @@ -1,11 +1,11 @@ #include "config.h" #include +#include #include #include #include #include #include -#include #include #include #include diff --git a/contrib/pyln-client/pyln/client/gossmap.py b/contrib/pyln-client/pyln/client/gossmap.py index b797f20290ea..920974a4cb9f 100755 --- a/contrib/pyln-client/pyln/client/gossmap.py +++ b/contrib/pyln-client/pyln/client/gossmap.py @@ -18,7 +18,7 @@ GOSSIP_STORE_LEN_PUSH_BIT = 0x4000 GOSSIP_STORE_LEN_COMPLETE_BIT = 0x2000 -# These duplicate constants in lightning/gossipd/gossip_store_wiregen.h +# These duplicate constants in lightning/common/gossip_store_wiregen.h WIRE_GOSSIP_STORE_PRIVATE_CHANNEL = 4104 WIRE_GOSSIP_STORE_PRIVATE_UPDATE = 4102 WIRE_GOSSIP_STORE_DELETE_CHAN = 4103 diff --git a/devtools/Makefile b/devtools/Makefile index b7c4874032e3..3d84e9279ed9 100644 --- a/devtools/Makefile +++ b/devtools/Makefile @@ -20,11 +20,6 @@ $(DEVTOOLS): %: %.o libcommon.a $(DEVTOOLS_TOOL_OBJS): wire/wire.h # Some devtools require extra objects -DEVTOOLS_NEEDS_GOSSIP_STORE := devtools/gossmap-compress devtools/dump-gossipstore devtools/convert-gossmap devtools/create-gossipstore - -$(DEVTOOLS_NEEDS_GOSSIP_STORE): gossipd/gossip_store_wiregen.o -$(DEVTOOLS_NEEDS_GOSSIP_STORE:=.o): gossipd/gossip_store_wiregen.h - devtools/decodemsg: devtools/print_wire.o devtools/decodemsg.o: devtools/print_wire.h diff --git a/devtools/convert-gossmap.c b/devtools/convert-gossmap.c index c5ee23c9dfc9..efc30855cd42 100644 --- a/devtools/convert-gossmap.c +++ b/devtools/convert-gossmap.c @@ -3,8 +3,8 @@ #include #include #include +#include #include -#include #include #include diff --git a/devtools/create-gossipstore.c b/devtools/create-gossipstore.c index 320ad9f37e1a..226324e34f50 100644 --- a/devtools/create-gossipstore.c +++ b/devtools/create-gossipstore.c @@ -4,9 +4,9 @@ #include #include #include +#include #include #include -#include #include #include #include diff --git a/devtools/dump-gossipstore.c b/devtools/dump-gossipstore.c index 32b6fcc1d970..40604c369f29 100644 --- a/devtools/dump-gossipstore.c +++ b/devtools/dump-gossipstore.c @@ -3,9 +3,9 @@ #include #include #include +#include #include #include -#include #include #include #include diff --git a/devtools/gossmap-compress.c b/devtools/gossmap-compress.c index be91e7f051b3..146dad475dae 100644 --- a/devtools/gossmap-compress.c +++ b/devtools/gossmap-compress.c @@ -7,12 +7,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include diff --git a/gossipd/Makefile b/gossipd/Makefile index 1ee7a2756b5e..e92cd1f2b81b 100644 --- a/gossipd/Makefile +++ b/gossipd/Makefile @@ -1,7 +1,6 @@ #! /usr/bin/make GOSSIPD_HEADERS_WSRC := gossipd/gossipd_wiregen.h \ - gossipd/gossip_store_wiregen.h \ gossipd/gossipd.h \ gossipd/gossip_store.h \ gossipd/gossmap_manage.h \ diff --git a/gossipd/gossip_store.c b/gossipd/gossip_store.c index 91c6c28fe5db..2c358becec96 100644 --- a/gossipd/gossip_store.c +++ b/gossipd/gossip_store.c @@ -5,12 +5,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include diff --git a/gossipd/gossmap_manage.c b/gossipd/gossmap_manage.c index 556104c703d2..4ae675d8d81d 100644 --- a/gossipd/gossmap_manage.c +++ b/gossipd/gossmap_manage.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -12,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/plugins/Makefile b/plugins/Makefile index 7ca2622e2276..8a01fa339741 100644 --- a/plugins/Makefile +++ b/plugins/Makefile @@ -231,7 +231,7 @@ plugins/sql-schema_gen.h: $(SQL_SCHEMA_PARTS) ) plugins/sql.o: plugins/sql-schema_gen.h -plugins/sql: $(PLUGIN_SQL_OBJS) $(PLUGIN_LIB_OBJS) gossipd/gossip_store_wiregen.o libcommon.a +plugins/sql: $(PLUGIN_SQL_OBJS) $(PLUGIN_LIB_OBJS) libcommon.a # Generated from PLUGINS definition in plugins/Makefile ALL_C_HEADERS += plugins/list_of_builtin_plugins_gen.h diff --git a/plugins/renepay/test/Makefile b/plugins/renepay/test/Makefile index 2c86948dfe55..f6dcb33cda00 100644 --- a/plugins/renepay/test/Makefile +++ b/plugins/renepay/test/Makefile @@ -11,7 +11,6 @@ $(PLUGIN_RENEPAY_TEST_OBJS): $(PLUGIN_RENEPAY_SRC) plugins/renepay/test/common.h $(PLUGIN_RENEPAY_TEST_PROGRAMS): \ plugins/renepay/dijkstra.o \ plugins/renepay/chan_extra.o \ - gossipd/gossip_store_wiregen.o \ libcommon.a check-renepay: $(PLUGIN_RENEPAY_TEST_PROGRAMS:%=unittest/%) diff --git a/plugins/renepay/test/common.h b/plugins/renepay/test/common.h index 4eea8adc613f..80146873bf1f 100644 --- a/plugins/renepay/test/common.h +++ b/plugins/renepay/test/common.h @@ -3,7 +3,7 @@ #include "config.h" #include #include -#include +#include #include #include #include diff --git a/plugins/sql.c b/plugins/sql.c index 7f8631b000e5..24c3b28c5a40 100644 --- a/plugins/sql.c +++ b/plugins/sql.c @@ -5,13 +5,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include diff --git a/plugins/test/Makefile b/plugins/test/Makefile index 167e0deb1571..5ec83035b651 100644 --- a/plugins/test/Makefile +++ b/plugins/test/Makefile @@ -8,7 +8,6 @@ ALL_C_SOURCES += $(PLUGIN_TEST_SRC) ALL_TEST_PROGRAMS += $(PLUGIN_TEST_PROGRAMS) plugins/test/run-route-overlong: \ - gossipd/gossip_store_wiregen.o \ plugins/channel_hint.o plugins/test/run-route-calc: \ @@ -16,7 +15,6 @@ plugins/test/run-route-calc: \ common/gossmap.o \ common/node_id.o \ common/route.o \ - gossipd/gossip_store_wiregen.o \ plugins/channel_hint.o $(PLUGIN_TEST_PROGRAMS): libcommon.a diff --git a/plugins/test/run-route-calc.c b/plugins/test/run-route-calc.c index de62e98155fc..554965a465cc 100644 --- a/plugins/test/run-route-calc.c +++ b/plugins/test/run-route-calc.c @@ -7,9 +7,9 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/plugins/test/run-route-overlong.c b/plugins/test/run-route-overlong.c index fe2575cb5602..ee5d02ee520f 100644 --- a/plugins/test/run-route-overlong.c +++ b/plugins/test/run-route-overlong.c @@ -3,9 +3,9 @@ #include #include #include +#include #include #include -#include #include #include From fee42848e17e691b8589d79f39b7c6d86019ff20 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 08:45:28 +1030 Subject: [PATCH 09/20] gossmap: add callback for gossipd to see dying messages. gossmap doesn't care, so gossipd currently has to iterate through the store to find them at startup. Create a callback for gossipd to use instead. Signed-off-by: Rusty Russell --- common/gossmap.c | 78 +++++++++++++++++++++++++---- common/gossmap.h | 17 +++++-- common/test/run-gossmap_canned.c | 3 ++ common/test/run-gossmap_local.c | 3 ++ connectd/test/run-crc32_of_update.c | 4 ++ gossipd/gossmap_manage.c | 4 +- gossipd/test/run-next_block_range.c | 4 ++ 7 files changed, 98 insertions(+), 15 deletions(-) diff --git a/common/gossmap.c b/common/gossmap.c index d0bb948fa689..45ea204b28c7 100644 --- a/common/gossmap.c +++ b/common/gossmap.c @@ -641,8 +641,47 @@ static void node_announcement(struct gossmap *map, u64 nann_off) n->nann_off = nann_off; } +static bool report_dying_cb(struct gossmap *map, + u64 dying_off, + u16 msglen, + void (*dyingcb)(struct short_channel_id scid, + u32 blockheight, + u64 offset, + void *cb_arg), + void *cb_arg) +{ + struct short_channel_id scid; + u32 blockheight; + u8 *msg; + + msg = tal_arr(NULL, u8, msglen); + map_copy(map, dying_off, msg, msglen); + + if (!fromwire_gossip_store_chan_dying(msg, &scid, &blockheight)) { + map->logcb(map->cbarg, + LOG_BROKEN, + "Invalid chan_dying message @%"PRIu64 + "/%"PRIu64": %s", + dying_off, map->map_size, msglen, + tal_hex(tmpctx, msg)); + tal_free(msg); + return false; + } + tal_free(msg); + + dyingcb(scid, blockheight, dying_off, cb_arg); + return true; +} + /* Mutual recursion */ -static bool map_catchup(struct gossmap *map, bool must_be_clean, bool *changed); +static bool map_catchup(struct gossmap *map, + void (*dyingcb)(struct short_channel_id scid, + u32 blockheight, + u64 offset, + void *cb_arg), + void *cb_arg, + bool must_be_clean, + bool *changed); static void init_map_structs(struct gossmap *map) { @@ -724,7 +763,7 @@ static bool reopen_store(struct gossmap *map, u64 ended_off) map->generation++; /* Now do reload. */ - map_catchup(map, false, &changed); + map_catchup(map, NULL, NULL, false, &changed); return changed; } @@ -744,7 +783,14 @@ static bool csum_matches(const struct gossmap *map, } /* Returns false only if must_be_clean is true. */ -static bool map_catchup(struct gossmap *map, bool must_be_clean, bool *changed) +static bool map_catchup(struct gossmap *map, + void (*dyingcb)(struct short_channel_id scid, + u32 blockheight, + u64 offset, + void *cb_arg), + void *cb_arg, + bool must_be_clean, + bool *changed) { size_t reclen, num_bad_cupdates = 0; @@ -830,6 +876,8 @@ static bool map_catchup(struct gossmap *map, bool must_be_clean, bool *changed) /* We absorbed this in add_channel; ignore */ continue; } else if (type == WIRE_GOSSIP_STORE_CHAN_DYING) { + if (dyingcb && !report_dying_cb(map, off, msglen, dyingcb, cb_arg)) + return false; /* We don't really care until it's deleted */ continue; } else if (type == WIRE_GOSSIP_STORE_UUID) { @@ -856,7 +904,13 @@ static bool map_catchup(struct gossmap *map, bool must_be_clean, bool *changed) return true; } -static bool load_gossip_store(struct gossmap *map, bool must_be_clean) +static bool load_gossip_store(struct gossmap *map, + void (*dyingcb)(struct short_channel_id scid, + u32 blockheight, + u64 offset, + void *cb_arg), + void *cb_arg, + bool must_be_clean) { bool updated; @@ -880,7 +934,7 @@ static bool load_gossip_store(struct gossmap *map, bool must_be_clean) init_map_structs(map); map->map_end = 1; - return map_catchup(map, must_be_clean, &updated); + return map_catchup(map, dyingcb, cb_arg, must_be_clean, &updated); } static void destroy_map(struct gossmap *map) @@ -1292,7 +1346,7 @@ bool gossmap_refresh(struct gossmap *map) } } - map_catchup(map, false, &changed); + map_catchup(map, NULL, NULL, false, &changed); return changed; } @@ -1319,7 +1373,11 @@ struct gossmap *gossmap_load_(const tal_t *ctx, enum log_level level, const char *fmt, ...), - void *cbarg) + void (*dyingcb)(struct short_channel_id scid, + u32 blockheight, + u64 offset, + void *cb_arg), + void *cb_arg) { map = tal(ctx, struct gossmap); map->generation = 0; @@ -1331,15 +1389,15 @@ struct gossmap *gossmap_load_(const tal_t *ctx, map->logcb = logcb; else map->logcb = log_stderr; - map->cbarg = cbarg; + map->cbarg = cb_arg; tal_add_destructor(map, destroy_map); - if (!load_gossip_store(map, expected_len != 0)) + if (!load_gossip_store(map, dyingcb, cb_arg, expected_len != 0)) return tal_free(map); if (expected_len != 0 && (map->map_size != map->map_end || map->map_size != expected_len)) { - logcb(cbarg, LOG_BROKEN, + logcb(cb_arg, LOG_BROKEN, "gossip_store only processed %"PRIu64 " bytes of %"PRIu64" (expected %"PRIu64")", map->map_end, map->map_size, expected_len); diff --git a/common/gossmap.h b/common/gossmap.h index 1b60a1fdcc00..0c1a0425772d 100644 --- a/common/gossmap.h +++ b/common/gossmap.h @@ -46,17 +46,21 @@ struct gossmap_chan { enum log_level, \ const char *fmt, \ ...), \ - (cbarg)) + NULL, (cbarg)) /* If we're the author of the gossmap, it should have no redundant records, corruption, etc. * So this fails if that's not the case. */ -#define gossmap_load_initial(ctx, filename, expected_len, logcb, cbarg) \ +#define gossmap_load_initial(ctx, filename, expected_len, logcb, dyingcb, cb_arg) \ gossmap_load_((ctx), (filename), (expected_len), \ - typesafe_cb_postargs(void, void *, (logcb), (cbarg), \ + typesafe_cb_postargs(void, void *, (logcb), (cb_arg), \ enum log_level, \ const char *fmt, \ ...), \ - (cbarg)) + typesafe_cb_preargs(void, void *, (dyingcb), (cb_arg), \ + struct short_channel_id, \ + u32, \ + u64), \ + (cb_arg)) struct gossmap *gossmap_load_(const tal_t *ctx, const char *filename, @@ -65,6 +69,10 @@ struct gossmap *gossmap_load_(const tal_t *ctx, enum log_level level, const char *fmt, ...), + void (*dyingcb)(struct short_channel_id scid, + u32 blockheight, + u64 offset, + void *cb_arg), void *cb_arg); /* Disable mmap. Noop if already disabled. */ @@ -305,4 +313,5 @@ u64 gossmap_lengths(const struct gossmap *map, u64 *total); /* Debugging: connectd wants to enumerate fds */ int gossmap_fd(const struct gossmap *map); + #endif /* LIGHTNING_COMMON_GOSSMAP_H */ diff --git a/common/test/run-gossmap_canned.c b/common/test/run-gossmap_canned.c index fd5af5e00163..8ae7210dc5f3 100644 --- a/common/test/run-gossmap_canned.c +++ b/common/test/run-gossmap_canned.c @@ -19,6 +19,9 @@ bigsize_t fromwire_bigsize(const u8 **cursor UNNEEDED, size_t *max UNNEEDED) bool fromwire_channel_id(const u8 **cursor UNNEEDED, size_t *max UNNEEDED, struct channel_id *channel_id UNNEEDED) { fprintf(stderr, "fromwire_channel_id called!\n"); abort(); } +/* Generated stub for fromwire_gossip_store_chan_dying */ +bool fromwire_gossip_store_chan_dying(const void *p UNNEEDED, struct short_channel_id *scid UNNEEDED, u32 *blockheight UNNEEDED) +{ fprintf(stderr, "fromwire_gossip_store_chan_dying called!\n"); abort(); } /* Generated stub for sciddir_or_pubkey_from_node_id */ bool sciddir_or_pubkey_from_node_id(struct sciddir_or_pubkey *sciddpk UNNEEDED, const struct node_id *node_id UNNEEDED) diff --git a/common/test/run-gossmap_local.c b/common/test/run-gossmap_local.c index b425710ac9b8..0575bcae1131 100644 --- a/common/test/run-gossmap_local.c +++ b/common/test/run-gossmap_local.c @@ -19,6 +19,9 @@ bigsize_t fromwire_bigsize(const u8 **cursor UNNEEDED, size_t *max UNNEEDED) bool fromwire_channel_id(const u8 **cursor UNNEEDED, size_t *max UNNEEDED, struct channel_id *channel_id UNNEEDED) { fprintf(stderr, "fromwire_channel_id called!\n"); abort(); } +/* Generated stub for fromwire_gossip_store_chan_dying */ +bool fromwire_gossip_store_chan_dying(const void *p UNNEEDED, struct short_channel_id *scid UNNEEDED, u32 *blockheight UNNEEDED) +{ fprintf(stderr, "fromwire_gossip_store_chan_dying called!\n"); abort(); } /* Generated stub for sciddir_or_pubkey_from_node_id */ bool sciddir_or_pubkey_from_node_id(struct sciddir_or_pubkey *sciddpk UNNEEDED, const struct node_id *node_id UNNEEDED) diff --git a/connectd/test/run-crc32_of_update.c b/connectd/test/run-crc32_of_update.c index 85521395ff6d..62c3c5fefad0 100644 --- a/connectd/test/run-crc32_of_update.c +++ b/connectd/test/run-crc32_of_update.c @@ -10,12 +10,16 @@ int unused_main(int argc, char *argv[]); #include #include #include +#include #include /* AUTOGENERATED MOCKS START */ /* Generated stub for fromwire_connectd_dev_set_max_scids_encode_size */ bool fromwire_connectd_dev_set_max_scids_encode_size(const void *p UNNEEDED, u32 *max UNNEEDED) { fprintf(stderr, "fromwire_connectd_dev_set_max_scids_encode_size called!\n"); abort(); } +/* Generated stub for fromwire_gossip_store_chan_dying */ +bool fromwire_gossip_store_chan_dying(const void *p UNNEEDED, struct short_channel_id *scid UNNEEDED, u32 *blockheight UNNEEDED) +{ fprintf(stderr, "fromwire_gossip_store_chan_dying called!\n"); abort(); } /* Generated stub for get_gossmap */ struct gossmap *get_gossmap(struct daemon *daemon UNNEEDED) { fprintf(stderr, "get_gossmap called!\n"); abort(); } diff --git a/gossipd/gossmap_manage.c b/gossipd/gossmap_manage.c index 4ae675d8d81d..cc4c6264d461 100644 --- a/gossipd/gossmap_manage.c +++ b/gossipd/gossmap_manage.c @@ -464,7 +464,9 @@ static bool setup_gossmap(struct gossmap_manage *gm, /* This actually loads it into memory, with strict checks. */ gm->raw_gossmap = gossmap_load_initial(gm, GOSSIP_STORE_FILENAME, expected_len, - gossmap_logcb, daemon); + gossmap_logcb, + NULL, + daemon); if (!gm->raw_gossmap) { gm->gs = tal_free(gm->gs); return false; diff --git a/gossipd/test/run-next_block_range.c b/gossipd/test/run-next_block_range.c index 60b9530a72d1..f4dae400a704 100644 --- a/gossipd/test/run-next_block_range.c +++ b/gossipd/test/run-next_block_range.c @@ -9,6 +9,7 @@ #include #include #include +#include #include /* AUTOGENERATED MOCKS START */ @@ -19,6 +20,9 @@ struct peer *find_peer(struct daemon *daemon UNNEEDED, const struct node_id *id struct peer *first_random_peer(struct daemon *daemon UNNEEDED, struct peer_node_id_map_iter *it UNNEEDED) { fprintf(stderr, "first_random_peer called!\n"); abort(); } +/* Generated stub for fromwire_gossip_store_chan_dying */ +bool fromwire_gossip_store_chan_dying(const void *p UNNEEDED, struct short_channel_id *scid UNNEEDED, u32 *blockheight UNNEEDED) +{ fprintf(stderr, "fromwire_gossip_store_chan_dying called!\n"); abort(); } /* Generated stub for gossmap_manage_get_gossmap */ struct gossmap *gossmap_manage_get_gossmap(struct gossmap_manage *gm UNNEEDED) { fprintf(stderr, "gossmap_manage_get_gossmap called!\n"); abort(); } From c9bc7f25721a5e219a9128bf02d585b07cb4083a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 08:46:28 +1030 Subject: [PATCH 10/20] gossmap: keep stats on live/deleted records. This way gossmap_manage can decide when to compact. Signed-off-by: Rusty Russell --- common/gossmap.c | 43 +++++++++++++++++++++++++++++++++++++++++-- common/gossmap.h | 3 +++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/common/gossmap.c b/common/gossmap.c index 45ea204b28c7..693ebd0ef5ac 100644 --- a/common/gossmap.c +++ b/common/gossmap.c @@ -89,6 +89,9 @@ struct gossmap { /* local channel_update messages, if any. */ u8 *local_updates; + /* How many live and dead records? */ + size_t num_live, num_dead; + /* Optional logging callback */ void (*logcb)(void *cbarg, enum log_level level, @@ -320,6 +323,13 @@ static void remove_node(struct gossmap *map, struct gossmap_node *node) u32 nodeidx = gossmap_node_idx(map, node); if (!nodeidx_htable_del(map->nodes, node2ptrint(node))) abort(); + + /* If we had a node_announcement, it's now dead. */ + if (gossmap_node_announced(node)) { + map->num_live--; + map->num_dead++; + } + node->nann_off = map->freed_nodes; free(node->chan_idxs); node->chan_idxs = NULL; @@ -414,6 +424,8 @@ void gossmap_remove_chan(struct gossmap *map, struct gossmap_chan *chan) chan->cann_off = map->freed_chans; chan->plus_scid_off = 0; map->freed_chans = chanidx; + map->num_live--; + map->num_dead++; } void gossmap_remove_node(struct gossmap *map, struct gossmap_node *node) @@ -583,10 +595,17 @@ static bool update_channel(struct gossmap *map, u64 cupdate_off) if (!chan) return ret; + /* Are we replacing an existing one? Then old one is dead. */ + if (gossmap_chan_set(chan, scidd.dir)) + map->num_dead++; + else + map->num_live++; + /* Preserve this */ hc.nodeidx = chan->half[scidd.dir].nodeidx; chan->half[scidd.dir] = hc; chan->cupdate_off[scidd.dir] = cupdate_off; + return ret; } @@ -637,8 +656,15 @@ static void node_announcement(struct gossmap *map, u64 nann_off) feature_len = map_be16(map, nann_off + feature_len_off); map_nodeid(map, nann_off + feature_len_off + 2 + feature_len + 4, &id); - if ((n = gossmap_find_node(map, &id))) + if ((n = gossmap_find_node(map, &id))) { + /* Did this replace old announcement? If so, that's dead. */ + if (gossmap_node_announced(n)) { + map->num_live--; + map->num_dead++; + } n->nann_off = nann_off; + } + map->num_live++; } static bool report_dying_cb(struct gossmap *map, @@ -762,6 +788,9 @@ static bool reopen_store(struct gossmap *map, u64 ended_off) map->map_end = 1; map->generation++; + /* This isn't quite true, as there may be deleted ones, but not many. */ + map->num_dead = 0; + /* Now do reload. */ map_catchup(map, NULL, NULL, false, &changed); return changed; @@ -812,8 +841,10 @@ static bool map_catchup(struct gossmap *map, if (!(flags & GOSSIP_STORE_COMPLETED_BIT)) break; - if (flags & GOSSIP_STORE_DELETED_BIT) + if (flags & GOSSIP_STORE_DELETED_BIT) { + map->num_dead++; continue; + } /* Partial write, should not happen with completed records. */ if (map->map_end + reclen > map->map_size) @@ -863,6 +894,7 @@ static bool map_catchup(struct gossmap *map, break; if (redundant && must_be_clean) return false; + map->num_live++; } else if (type == WIRE_CHANNEL_UPDATE) num_bad_cupdates += update_channel(map, off); else if (type == WIRE_GOSSIP_STORE_DELETE_CHAN) @@ -1383,6 +1415,7 @@ struct gossmap *gossmap_load_(const tal_t *ctx, map->generation = 0; map->fname = tal_strdup(map, filename); map->fd = open(map->fname, O_RDONLY); + map->num_live = map->num_dead = 0; if (map->fd < 0) return tal_free(map); if (logcb) @@ -1992,3 +2025,9 @@ void gossmap_disable_mmap(struct gossmap *map) munmap(map->mmap, map->map_size); map->mmap = NULL; } + +void gossmap_stats(const struct gossmap *map, u64 *num_live, u64 *num_dead) +{ + *num_live = map->num_live; + *num_dead = map->num_dead; +} diff --git a/common/gossmap.h b/common/gossmap.h index 0c1a0425772d..c8bfbf19fb42 100644 --- a/common/gossmap.h +++ b/common/gossmap.h @@ -308,6 +308,9 @@ void gossmap_iter_fast_forward(const struct gossmap *map, /* Moves iterator to the end. */ void gossmap_iter_end(const struct gossmap *map, struct gossmap_iter *iter); +/* How dense is this? */ +void gossmap_stats(const struct gossmap *map, u64 *num_live, u64 *num_dead); + /* For debugging: returns length read, and total known length of file */ u64 gossmap_lengths(const struct gossmap *map, u64 *total); From b7c385793a67eb6f253465e468ab222e39ad1deb Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 19:09:50 +1030 Subject: [PATCH 11/20] gossipd: use gossmap to load the dying entries. Signed-off-by: Rusty Russell --- gossipd/gossmap_manage.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/gossipd/gossmap_manage.c b/gossipd/gossmap_manage.c index cc4c6264d461..6260673ab187 100644 --- a/gossipd/gossmap_manage.c +++ b/gossipd/gossmap_manage.c @@ -430,7 +430,7 @@ static void start_prune_timer(struct gossmap_manage *gm) static void reprocess_queued_msgs(struct gossmap_manage *gm); -static void gossmap_logcb(struct daemon *daemon, +static void gossmap_logcb(struct gossmap_manage *gm, enum log_level level, const char *fmt, ...) @@ -442,31 +442,46 @@ static void gossmap_logcb(struct daemon *daemon, va_end(ap); } +static void gossmap_add_dying_chan(struct short_channel_id scid, + u32 blockheight, + u64 offset, + struct gossmap_manage *gm) +{ + struct chan_dying cd; + + cd.scid = scid; + cd.deadline = blockheight; + cd.gossmap_offset = offset; + tal_arr_expand(&gm->dying_channels, cd); +} + static bool setup_gossmap(struct gossmap_manage *gm, - struct daemon *daemon, - struct chan_dying **dying) + struct daemon *daemon) { u64 expected_len; + struct chan_dying *dying = NULL; - *dying = NULL; + gm->dying_channels = tal_arr(gm, struct chan_dying, 0); /* This does simple sanitry checks, compacts, and creates if * necessary */ gm->gs = gossip_store_new(gm, daemon, &gm->gossip_store_populated, - dying); + &dying); if (!gm->gs) return false; + /* FIXME: Remove */ + tal_free(dying); expected_len = gossip_store_len_written(gm->gs); /* This actually loads it into memory, with strict checks. */ gm->raw_gossmap = gossmap_load_initial(gm, GOSSIP_STORE_FILENAME, expected_len, gossmap_logcb, - NULL, - daemon); + gossmap_add_dying_chan, + gm); if (!gm->raw_gossmap) { gm->gs = tal_free(gm->gs); return false; @@ -487,10 +502,10 @@ struct gossmap_manage *gossmap_manage_new(const tal_t *ctx, { struct gossmap_manage *gm = tal(ctx, struct gossmap_manage); - if (!setup_gossmap(gm, daemon, &gm->dying_channels)) { + if (!setup_gossmap(gm, daemon)) { tal_free(gm->dying_channels); gossip_store_corrupt(); - if (!setup_gossmap(gm, daemon, &gm->dying_channels)) + if (!setup_gossmap(gm, daemon)) status_failed(STATUS_FAIL_INTERNAL_ERROR, "Could not re-initialize %s", GOSSIP_STORE_FILENAME); } From d2a5530a22af7a5a002fede5c3100b570506936c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 19:10:24 +1030 Subject: [PATCH 12/20] gossipd: don't gather dying channels during compaction. Signed-off-by: Rusty Russell --- gossipd/gossip_store.c | 23 +++-------------------- gossipd/gossip_store.h | 4 +--- gossipd/gossmap_manage.c | 6 +----- 3 files changed, 5 insertions(+), 28 deletions(-) diff --git a/gossipd/gossip_store.c b/gossipd/gossip_store.c index 2c358becec96..92d8d732e463 100644 --- a/gossipd/gossip_store.c +++ b/gossipd/gossip_store.c @@ -199,8 +199,7 @@ static u8 *new_uuid_record(const tal_t *ctx, int fd, u64 *off) */ static int gossip_store_compact(struct daemon *daemon, u64 *total_len, - bool *populated, - struct chan_dying **dying) + bool *populated) { size_t cannounces = 0, cupdates = 0, nannounces = 0, deleted = 0; int old_fd, new_fd; @@ -324,20 +323,6 @@ static int gossip_store_compact(struct daemon *daemon, case WIRE_CHANNEL_ANNOUNCEMENT: cannounces++; break; - case WIRE_GOSSIP_STORE_CHAN_DYING: { - struct chan_dying cd; - - if (!fromwire_gossip_store_chan_dying(msg, - &cd.scid, - &cd.deadline)) { - bad = "Bad gossip_store_chan_dying"; - goto badmsg; - } - /* By convention, these offsets are *after* header */ - cd.gossmap_offset = *total_len + sizeof(hdr); - tal_arr_expand(dying, cd); - break; - } case WIRE_CHANNEL_UPDATE: cupdates++; break; @@ -421,14 +406,12 @@ void gossip_store_corrupt(void) struct gossip_store *gossip_store_new(const tal_t *ctx, struct daemon *daemon, - bool *populated, - struct chan_dying **dying) + bool *populated) { struct gossip_store *gs = tal(ctx, struct gossip_store); gs->daemon = daemon; - *dying = tal_arr(ctx, struct chan_dying, 0); - gs->fd = gossip_store_compact(daemon, &gs->len, populated, dying); + gs->fd = gossip_store_compact(daemon, &gs->len, populated); if (gs->fd < 0) return tal_free(gs); tal_add_destructor(gs, gossip_store_destroy); diff --git a/gossipd/gossip_store.h b/gossipd/gossip_store.h index 853b772d772a..5d8391692ad1 100644 --- a/gossipd/gossip_store.h +++ b/gossipd/gossip_store.h @@ -27,14 +27,12 @@ struct chan_dying { * @ctx: the context to allocate from * @daemon: the daemon context * @populated: set to false if store is empty/obviously partial. - * @dying: an array of channels we found dying markers for. * * Returns NULL on error. */ struct gossip_store *gossip_store_new(const tal_t *ctx, struct daemon *daemon, - bool *populated, - struct chan_dying **dying); + bool *populated); /** * Move the old gossip store out the way. Log a broken message about it. diff --git a/gossipd/gossmap_manage.c b/gossipd/gossmap_manage.c index 6260673ab187..5aa97c96a5e8 100644 --- a/gossipd/gossmap_manage.c +++ b/gossipd/gossmap_manage.c @@ -459,7 +459,6 @@ static bool setup_gossmap(struct gossmap_manage *gm, struct daemon *daemon) { u64 expected_len; - struct chan_dying *dying = NULL; gm->dying_channels = tal_arr(gm, struct chan_dying, 0); @@ -467,13 +466,10 @@ static bool setup_gossmap(struct gossmap_manage *gm, * necessary */ gm->gs = gossip_store_new(gm, daemon, - &gm->gossip_store_populated, - &dying); + &gm->gossip_store_populated); if (!gm->gs) return false; - /* FIXME: Remove */ - tal_free(dying); expected_len = gossip_store_len_written(gm->gs); /* This actually loads it into memory, with strict checks. */ From c8154c7c8cdfd0633c8a60e8c15e9bea8de41296 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 19:11:06 +1030 Subject: [PATCH 13/20] gossipd: don't compact on startup. We now only need to walk it if we're doing an upgrade. Signed-off-by: Rusty Russell Changelog-Changed: `gossipd` no longer compacts gossip_store on startup (improving start times significantly). --- gossipd/gossip_store.c | 182 +++++++++++++++++++-------------------- gossipd/gossmap_manage.c | 9 +- tests/test_gossip.py | 34 ++++---- 3 files changed, 116 insertions(+), 109 deletions(-) diff --git a/gossipd/gossip_store.c b/gossipd/gossip_store.c index 92d8d732e463..3836653308be 100644 --- a/gossipd/gossip_store.c +++ b/gossipd/gossip_store.c @@ -185,23 +185,40 @@ static u8 *new_uuid_record(const tal_t *ctx, int fd, u64 *off) for (size_t i = 0; i < tal_bytelen(uuid); i++) uuid[i] = pseudorand(256); - append_msg(fd, towire_gossip_store_uuid(tmpctx, uuid), 0, off, NULL); + if (!append_msg(fd, towire_gossip_store_uuid(tmpctx, uuid), 0, off, NULL)) + return tal_free(uuid); /* append_msg does not change file offset, so do that now. */ lseek(fd, 0, SEEK_END); return uuid; } -/* Read gossip store entries, copy non-deleted ones. Check basic - * validity, but this code is written as simply and robustly as - * possible! - * - * Returns fd of new store, or -1 if it was grossly invalid. - */ -static int gossip_store_compact(struct daemon *daemon, +static int make_new_gossip_store(u64 *total_len) +{ + u8 version = GOSSIP_STORE_VER; + int new_fd = open(GOSSIP_STORE_TEMP_FILENAME, O_RDWR|O_TRUNC|O_CREAT, 0600); + + *total_len = sizeof(version); + + if (new_fd < 0 + || !write_all(new_fd, &version, sizeof(version)) + || !new_uuid_record(tmpctx, new_fd, total_len)) { + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Creating new gossip_store file: %s", + strerror(errno)); + } + if (rename(GOSSIP_STORE_TEMP_FILENAME, GOSSIP_STORE_FILENAME) != 0) { + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Renaming gossip_store failed: %s", + strerror(errno)); + } + return new_fd; +} + +/* If this returns -1, we cannot upgrade. */ +static int gossip_store_upgrade(struct daemon *daemon, u64 *total_len, bool *populated) { - size_t cannounces = 0, cupdates = 0, nannounces = 0, deleted = 0; int old_fd, new_fd; u64 old_len, cur_off; struct gossip_hdr hdr; @@ -209,11 +226,49 @@ static int gossip_store_compact(struct daemon *daemon, struct stat st; struct timemono start = time_mono(); const char *bad; - u8 *uuid = NULL; + u8 *uuid; - *populated = false; - old_len = 1; + old_fd = open(GOSSIP_STORE_FILENAME, O_RDWR); + if (old_fd == -1) { + if (errno == ENOENT) { + *populated = false; + return make_new_gossip_store(total_len); + } + + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Reading gossip_store file: %s", + strerror(errno)); + } + + if (!read_all(old_fd, &oldversion, sizeof(oldversion))) { + status_broken("Cannot read gossip_store version"); + goto upgrade_failed; + } + + if (fstat(old_fd, &st) != 0) { + status_broken("Could not stat gossip_store: %s", + strerror(errno)); + goto upgrade_failed; + } + + /* If we have any contents (beyond uuid), and the file is less + * than 1 hour old, say "seems good" */ + *populated = (st.st_mtime > time_now().ts.tv_sec - 3600 + && st.st_size > 1 + sizeof(hdr) + 2 + 32); + + /* No upgrade necessary? We're done. */ + if (oldversion == GOSSIP_STORE_VER) { + *total_len = st.st_size; + return old_fd; + } + + if (!can_upgrade(oldversion)) { + status_unusual("Cannot upgrade gossip_store version %u", + oldversion); + goto upgrade_failed; + } + /* OK, create new gossip store to convert into */ new_fd = open(GOSSIP_STORE_TEMP_FILENAME, O_RDWR|O_TRUNC|O_CREAT, 0600); if (new_fd < 0) { status_failed(STATUS_FAIL_INTERNAL_ERROR, @@ -226,36 +281,20 @@ static int gossip_store_compact(struct daemon *daemon, "Writing new gossip_store file: %s", strerror(errno)); } - *total_len = sizeof(version); - /* RDWR since we add closed marker at end! */ - old_fd = open(GOSSIP_STORE_FILENAME, O_RDWR); - if (old_fd == -1) { - if (errno == ENOENT) - goto rename_new; + *total_len = sizeof(version); + cur_off = old_len = sizeof(oldversion); + /* Create a fresh uuid, make sure we're after it. */ + uuid = new_uuid_record(tmpctx, new_fd, total_len); + if (!uuid) { status_failed(STATUS_FAIL_INTERNAL_ERROR, - "Reading gossip_store file: %s", - strerror(errno)); - }; - - if (fstat(old_fd, &st) != 0) { - status_broken("Could not stat gossip_store: %s", + "Writing new gossip_store file: %s", strerror(errno)); - goto rename_new; - } - - if (!read_all(old_fd, &oldversion, sizeof(oldversion)) - || (oldversion != version && !can_upgrade(oldversion))) { - status_broken("gossip_store_compact: bad version"); - goto rename_new; } - - cur_off = old_len = sizeof(oldversion); - - /* Make up uuid for old version */ - if (oldversion < 16) - uuid = new_uuid_record(tmpctx, new_fd, total_len); + assert(*total_len == lseek(new_fd, 0, SEEK_END)); + /* Move to the end (new_uuid_record uses pwrite, not write) */ + lseek(new_fd, *total_len, SEEK_SET); /* Read everything, write non-deleted ones to new_fd. If something goes wrong, * we end up with truncated store. */ @@ -270,14 +309,13 @@ static int gossip_store_compact(struct daemon *daemon, status_unusual("gossip_store_compact: store ends early at %"PRIu64, old_len); tal_free(msg); - goto rename_new; + goto upgrade_failed_close_new; } cur_off = old_len; old_len += sizeof(hdr) + msglen; if (be16_to_cpu(hdr.flags) & GOSSIP_STORE_DELETED_BIT) { - deleted++; tal_free(msg); continue; } @@ -300,10 +338,8 @@ static int gossip_store_compact(struct daemon *daemon, } /* It can tell us to delete record entirely. */ - if (msg == NULL) { - deleted++; + if (msg == NULL) continue; - } /* Recalc msglen and header */ msglen = tal_bytelen(msg); @@ -314,32 +350,19 @@ static int gossip_store_compact(struct daemon *daemon, /* Don't write out old tombstones */ if (fromwire_peektype(msg) == WIRE_GOSSIP_STORE_DELETE_CHAN) { - deleted++; tal_free(msg); continue; } - switch (fromwire_peektype(msg)) { - case WIRE_CHANNEL_ANNOUNCEMENT: - cannounces++; - break; - case WIRE_CHANNEL_UPDATE: - cupdates++; - break; - case WIRE_NODE_ANNOUNCEMENT: - nannounces++; - break; - case WIRE_GOSSIP_STORE_UUID: - uuid = tal_arr(tmpctx, u8, 32); - if (!fromwire_gossip_store_uuid(msg, uuid)) { - bad = "Corrupt uuid"; - goto badmsg; - } - break; + /* Ignore uuid: fresh file will have fresh uuid */ + if (fromwire_peektype(msg) == WIRE_GOSSIP_STORE_UUID) { + tal_free(msg); + continue; } if (!write_all(new_fd, &hdr, sizeof(hdr)) || !write_all(new_fd, msg, msglen)) { + /* We fail hard here, since we're probably out of space. */ status_failed(STATUS_FAIL_INTERNAL_ERROR, "gossip_store_compact: writing msg len %zu to new store: %s", msglen, strerror(errno)); @@ -350,23 +373,6 @@ static int gossip_store_compact(struct daemon *daemon, assert(*total_len == lseek(new_fd, 0, SEEK_END)); - /* If we have any contents, and the file is less than 1 hour - * old, say "seems good" */ - if (st.st_mtime > clock_time().ts.tv_sec - 3600 && *total_len > 1) { - *populated = true; - } - -rename_new: - /* If we didn't copy a uuid, do so now. */ - if (!uuid) { - if (lseek(new_fd, 0, SEEK_END) != 1) { - bad = tal_fmt(tmpctx, - "missing uuid in version %u", oldversion); - goto badmsg; - } - uuid = new_uuid_record(tmpctx, new_fd, total_len); - } - if (rename(GOSSIP_STORE_TEMP_FILENAME, GOSSIP_STORE_FILENAME) != 0) { status_failed(STATUS_FAIL_INTERNAL_ERROR, "gossip_store_compact: rename failed: %s", @@ -374,26 +380,20 @@ static int gossip_store_compact(struct daemon *daemon, } /* Create end marker now new file exists. */ - if (old_fd != -1) { - /* FIXME: real uuid! */ - append_msg(old_fd, towire_gossip_store_ended(tmpctx, *total_len, uuid), - 0, &old_len, NULL); - close(old_fd); - } + append_msg(old_fd, towire_gossip_store_ended(tmpctx, *total_len, uuid), 0, &old_len, NULL); + close(old_fd); - status_debug("Store compact time: %"PRIu64" msec", + status_debug("Time to convert version %u store: %"PRIu64" msec", + oldversion, time_to_msec(timemono_between(time_mono(), start))); - status_debug("gossip_store: Read %zu/%zu/%zu/%zu cannounce/cupdate/nannounce/delete from store in %"PRIu64" bytes, now %"PRIu64" bytes (populated=%s)", - cannounces, cupdates, nannounces, deleted, - old_len, *total_len, - *populated ? "true": "false"); return new_fd; badmsg: - /* Caller will presumably try gossip_store_reset. */ status_broken("gossip_store: %s (offset %"PRIu64").", bad, cur_off); - close(old_fd); +upgrade_failed_close_new: close(new_fd); +upgrade_failed: + close(old_fd); return -1; } @@ -411,7 +411,7 @@ struct gossip_store *gossip_store_new(const tal_t *ctx, struct gossip_store *gs = tal(ctx, struct gossip_store); gs->daemon = daemon; - gs->fd = gossip_store_compact(daemon, &gs->len, populated); + gs->fd = gossip_store_upgrade(daemon, &gs->len, populated); if (gs->fd < 0) return tal_free(gs); tal_add_destructor(gs, gossip_store_destroy); diff --git a/gossipd/gossmap_manage.c b/gossipd/gossmap_manage.c index 5aa97c96a5e8..c6cf486258aa 100644 --- a/gossipd/gossmap_manage.c +++ b/gossipd/gossmap_manage.c @@ -458,12 +458,11 @@ static void gossmap_add_dying_chan(struct short_channel_id scid, static bool setup_gossmap(struct gossmap_manage *gm, struct daemon *daemon) { - u64 expected_len; + u64 expected_len, num_live, num_dead; gm->dying_channels = tal_arr(gm, struct chan_dying, 0); - /* This does simple sanitry checks, compacts, and creates if - * necessary */ + /* This does creates or converts if necessary. */ gm->gs = gossip_store_new(gm, daemon, &gm->gossip_store_populated); @@ -483,6 +482,10 @@ static bool setup_gossmap(struct gossmap_manage *gm, return false; } gm->last_writes = tal_arr(gm, const u8 *, 0); + + gossmap_stats(gm->raw_gossmap, &num_live, &num_dead); + status_debug("gossip_store: %"PRIu64" live records, %"PRIu64" deleted", + num_live, num_dead); return true; } diff --git a/tests/test_gossip.py b/tests/test_gossip.py index e6a2bb075241..440a9a7c1b6d 100644 --- a/tests/test_gossip.py +++ b/tests/test_gossip.py @@ -1207,8 +1207,9 @@ def test_gossip_store_load(node_factory): l1.start() # May preceed the Started msg waited for in 'start'. - wait_for(lambda: l1.daemon.is_in_log('Read 1/1/1/0 cannounce/cupdate/nannounce/delete from store in 832 bytes, now 824 bytes')) - assert not l1.daemon.is_in_log('gossip_store.*truncating') + l1.daemon.logsearch_start = 0 + l1.daemon.wait_for_log("gossipd: Time to convert version 12 store") + l1.daemon.wait_for_log("gossipd: gossip_store: 3 live records, 0 deleted") def test_gossip_store_v10_upgrade(node_factory): @@ -1275,8 +1276,10 @@ def test_gossip_store_load_announce_before_update(node_factory): l1.start() # May preceed the Started msg waited for in 'start'. - wait_for(lambda: l1.daemon.is_in_log('Read 1/1/1/1 cannounce/cupdate/nannounce/delete from store in 982 bytes, now 824 bytes')) - assert not l1.daemon.is_in_log('gossip_store.*truncating') + l1.daemon.logsearch_start = 0 + l1.daemon.wait_for_log("gossipd: Time to convert version 12 store: [0-9]* msec") + l1.daemon.wait_for_log("gossipd: gossip_store: 3 live records, 0 deleted") + assert not l1.daemon.is_in_log('gossip_store.*corrupt') def test_gossip_store_load_amount_truncated(node_factory): @@ -1294,7 +1297,7 @@ def test_gossip_store_load_amount_truncated(node_factory): # May preceed the Started msg waited for in 'start'. wait_for(lambda: l1.daemon.is_in_log(r'\*\*BROKEN\*\* gossipd: gossip_store only processed 47 bytes of 491 \(expected 491\)')) wait_for(lambda: l1.daemon.is_in_log(r'\*\*BROKEN\*\* gossipd: gossip_store: Moving to gossip_store.corrupt')) - wait_for(lambda: l1.daemon.is_in_log(r'gossip_store: Read 0/0/0/0 cannounce/cupdate/nannounce/delete from store in 1 bytes, now 47 bytes \(populated=false\)')) + wait_for(lambda: l1.daemon.is_in_log('gossip_store: 0 live records, 0 deleted')) assert os.path.exists(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) @@ -1609,7 +1612,7 @@ def setup_gossip_store_test(node_factory, bitcoind, opts=None): def test_gossip_store_corrupt(node_factory, bitcoind): - l2 = setup_gossip_store_test(node_factory, bitcoind, opts=[{}, {'broken_log': 'gossipd:.*bad version'}, {}]) + l2 = setup_gossip_store_test(node_factory, bitcoind, opts=[{}, {'broken_log': 'gossip_store: Moving to gossip_store.corrupt'}, {}]) l2.stop() # It should truncate this, not leave junk! @@ -1617,11 +1620,9 @@ def test_gossip_store_corrupt(node_factory, bitcoind): f.write(bytearray.fromhex("07deadbeef")) l2.start() - wait_for(lambda: l2.daemon.is_in_log('gossip_store: Read ')) - assert l2.daemon.is_in_log('gossip_store_compact: bad version') - - # Will simply overwrite, due to old version. - assert not os.path.exists(os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) + # Complains, moves. + wait_for(lambda: l2.daemon.is_in_log('gossipd: Cannot upgrade gossip_store version 7')) + assert os.path.exists(os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) def test_gossip_store_load_complex(node_factory, bitcoind): @@ -1629,7 +1630,7 @@ def test_gossip_store_load_complex(node_factory, bitcoind): l2.restart() - wait_for(lambda: l2.daemon.is_in_log('gossip_store: Read ')) + wait_for(lambda: l2.daemon.is_in_log('gossip_store: 9 live records, 2 deleted')) def test_gossip_store_load_no_channel_update(node_factory): @@ -1658,11 +1659,14 @@ def test_gossip_store_load_no_channel_update(node_factory): l1.start() # May preceed the Started msg waited for in 'start'. - wait_for(lambda: l1.daemon.is_in_log('Read 1/0/1/0 cannounce/cupdate/nannounce/delete from store in 682 bytes, now 674 bytes')) + l1.daemon.logsearch_start = 0 + l1.daemon.wait_for_log("gossipd: Time to convert version 13 store: [0-9]* msec") + # One is the uuid we insert! + l1.daemon.wait_for_log("gossipd: gossip_store: 2 live records, 0 deleted") assert not os.path.exists(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) -def test_gossip_store_compact_on_load(node_factory, bitcoind): +def test_gossip_store_load_uncompacted(node_factory, bitcoind): l2 = setup_gossip_store_test(node_factory, bitcoind) gs_path = os.path.join(l2.daemon.lightning_dir, TEST_NETWORK, 'gossip_store') @@ -1673,7 +1677,7 @@ def test_gossip_store_compact_on_load(node_factory, bitcoind): l2.restart() # These appear before we're fully started, so will already in log: - assert l2.daemon.is_in_log('gossip_store: Read 2/4/3/2 cannounce/cupdate/nannounce/delete from store') + assert l2.daemon.is_in_log('gossip_store: 9 live records, 2 deleted') def test_gossip_announce_invalid_block(node_factory, bitcoind): From de487ec53d2606eee0d0b1c48c1f50551cf5c6d5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 19:11:11 +1030 Subject: [PATCH 14/20] devtools/gossmap-compress: create latest gossip_store version This saves gossipd from converting it: ``` lightningd-1 2026-02-02T00:50:49.505Z DEBUG gossipd: Time to convert version 14 store: 890 msec ``` Reducing node startup time from 1.4 seconds to 0.5 seconds. Signed-off-by: Rusty Russell --- devtools/gossmap-compress.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/devtools/gossmap-compress.c b/devtools/gossmap-compress.c index 146dad475dae..5c0be91b5e39 100644 --- a/devtools/gossmap-compress.c +++ b/devtools/gossmap-compress.c @@ -79,7 +79,7 @@ static unsigned int verbose = 0; #define GC_HEADER "GOSSMAP_COMPRESSv1" #define GC_HEADERLEN (sizeof(GC_HEADER)) -#define GOSSIP_STORE_VER ((0 << 5) | 14) +#define GOSSIP_STORE_VER ((0 << 5) | 16) /* Backwards, we want larger first */ static int cmp_node_num_chans(struct gossmap_node *const *a, @@ -295,7 +295,7 @@ static void write_msg_to_gstore(int outfd, const u8 *msg TAKES) { struct gossip_hdr hdr; - hdr.flags = 0; + hdr.flags = CPU_TO_BE16(GOSSIP_STORE_COMPLETED_BIT); hdr.len = cpu_to_be16(tal_bytelen(msg)); hdr.timestamp = 0; hdr.crc = cpu_to_be32(crc32c(0, msg, tal_bytelen(msg))); @@ -513,6 +513,17 @@ static const char *get_alias(const tal_t *ctx, return tal_strndup(ctx, (const char *)alias, 32); } +static void write_uuid(int outfd) +{ + const u8 uuid[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + }; + write_msg_to_gstore(outfd, take(towire_gossip_store_uuid(NULL, uuid))); +} + int main(int argc, char *argv[]) { int outfd; @@ -791,6 +802,7 @@ int main(int argc, char *argv[]) /* Now write out gossmap */ if (write(outfd, &version, 1) != 1) err(1, "Failed to write output"); + write_uuid(outfd); for (size_t i = 0; i < channel_count; i++) { write_announce(outfd, chans[i].node1, From d6056ef29eeac2b6190c7f958820aab3a12777a9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 19:11:11 +1030 Subject: [PATCH 15/20] gossipd: put the last_writes array inside struct gossip_store. This is the file responsible for all the writing, so it should be responsible for the rewriting if necessary (rather than gossmap_manage). Signed-off-by: Rusty Russell --- gossipd/gossip_store.c | 21 ++++++++++++++++----- gossipd/gossip_store.h | 12 +++++++----- gossipd/gossmap_manage.c | 30 +++++++++++------------------- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/gossipd/gossip_store.c b/gossipd/gossip_store.c index 3836653308be..93b8d36fd04c 100644 --- a/gossipd/gossip_store.c +++ b/gossipd/gossip_store.c @@ -35,6 +35,10 @@ struct gossip_store { /* Timestamp of store when we opened it (0 if we created it) */ u32 timestamp; + + /* Last writes since previous sync, in case it messes up and + * we need to force it. */ + const u8 **last_writes; }; static void gossip_store_destroy(struct gossip_store *gs) @@ -414,20 +418,22 @@ struct gossip_store *gossip_store_new(const tal_t *ctx, gs->fd = gossip_store_upgrade(daemon, &gs->len, populated); if (gs->fd < 0) return tal_free(gs); + gs->last_writes = tal_arr(gs, const u8 *, 0); tal_add_destructor(gs, gossip_store_destroy); return gs; } -void gossip_store_fsync(const struct gossip_store *gs) +static void gossip_store_fsync(const struct gossip_store *gs) { if (fsync(gs->fd) != 0) status_failed(STATUS_FAIL_INTERNAL_ERROR, "gossmap fsync failed: %s", strerror(errno)); } -void gossip_store_rewrite_end(struct gossip_store *gs, const u8 **msgs) +void gossip_store_rewrite_end(struct gossip_store *gs) { u64 offset = gs->len; + const u8 **msgs = gs->last_writes; for (size_t i = 0; i < tal_count(msgs); i++) { /* Don't overwrite version byte */ @@ -447,10 +453,15 @@ void gossip_store_rewrite_end(struct gossip_store *gs, const u8 **msgs) gossip_store_fsync(gs); } +void gossip_store_writes_confirmed(struct gossip_store *gs) +{ + tal_free(gs->last_writes); + gs->last_writes = tal_arr(gs, const u8 *, 0); +} + u64 gossip_store_add(struct gossip_store *gs, const u8 *gossip_msg, - u32 timestamp, - const u8 ***msgs) + u32 timestamp) { u64 off = gs->len, filelen; @@ -470,7 +481,7 @@ u64 gossip_store_add(struct gossip_store *gs, filelen, off); } - if (!append_msg(gs->fd, gossip_msg, timestamp, &gs->len, msgs)) { + if (!append_msg(gs->fd, gossip_msg, timestamp, &gs->len, &gs->last_writes)) { status_failed(STATUS_FAIL_INTERNAL_ERROR, "Failed writing to gossip store: %s", strerror(errno)); diff --git a/gossipd/gossip_store.h b/gossipd/gossip_store.h index 5d8391692ad1..81e76d7d0273 100644 --- a/gossipd/gossip_store.h +++ b/gossipd/gossip_store.h @@ -44,14 +44,12 @@ void gossip_store_corrupt(void); * @gs: gossip store * @gossip_msg: the gossip message to insert. * @timestamp: the timestamp for filtering of this messsage. - * @msgs: the option pointer to a u8 *array to append the written msgs to. * * Returns the offset (after the gossip_hdr). */ u64 gossip_store_add(struct gossip_store *gs, const u8 *gossip_msg, - u32 timestamp, - const u8 ***msgs); + u32 timestamp); /** * Delete the record at this offset (offset is that of @@ -107,12 +105,16 @@ void gossip_store_set_timestamp(struct gossip_store *gs, u64 offset, u32 timesta /** * We've seen (ZFS on Linux) writes not show up in the gossip store. * This lets us rewrite the last bytes. */ -void gossip_store_rewrite_end(struct gossip_store *gs, const u8 **msgs); +void gossip_store_rewrite_end(struct gossip_store *gs); + +/** + * Once we've checked the contents are good, the last_writes storage can + * be reset. */ +void gossip_store_writes_confirmed(struct gossip_store *gs); /** * For debugging. */ u64 gossip_store_len_written(const struct gossip_store *gs); -void gossip_store_fsync(const struct gossip_store *gs); #endif /* LIGHTNING_GOSSIPD_GOSSIP_STORE_H */ diff --git a/gossipd/gossmap_manage.c b/gossipd/gossmap_manage.c index c6cf486258aa..a9d7f2b8e199 100644 --- a/gossipd/gossmap_manage.c +++ b/gossipd/gossmap_manage.c @@ -65,10 +65,6 @@ struct gossmap_manage { /* gossip map itself (access via gossmap_manage_get_gossmap, so it's fresh!) */ struct gossmap *raw_gossmap; - /* Last writes to gossmap since previous sync, in case it - * messes up and we need to force it. */ - const u8 **last_writes; - /* The gossip_store, which writes to the gossip_store file */ struct gossip_store *gs; @@ -268,7 +264,7 @@ static void remove_channel(struct gossmap_manage *gm, /* Put in tombstone marker. */ gossip_store_add(gm->gs, towire_gossip_store_delete_chan(tmpctx, scid), - 0, &gm->last_writes); + 0); /* Delete from store */ gossip_store_del(gm->gs, chan->cann_off, WIRE_CHANNEL_ANNOUNCEMENT); @@ -309,7 +305,7 @@ static void remove_channel(struct gossmap_manage *gm, timestamp = gossip_store_get_timestamp(gm->gs, node->nann_off); gossip_store_del(gm->gs, node->nann_off, WIRE_NODE_ANNOUNCEMENT); - offset = gossip_store_add(gm->gs, nannounce, timestamp, &gm->last_writes); + offset = gossip_store_add(gm->gs, nannounce, timestamp); } else { /* Are all remaining channels dying but we weren't? * Can happen if we removed this channel immediately @@ -481,7 +477,6 @@ static bool setup_gossmap(struct gossmap_manage *gm, gm->gs = tal_free(gm->gs); return false; } - gm->last_writes = tal_arr(gm, const u8 *, 0); gossmap_stats(gm->raw_gossmap, &num_live, &num_dead); status_debug("gossip_store: %"PRIu64" live records, %"PRIu64" deleted", @@ -636,10 +631,9 @@ const char *gossmap_manage_channel_announcement(const tal_t *ctx, */ if (known_amount) { /* Set with timestamp 0 (we will update once we have a channel_update) */ - gossip_store_add(gm->gs, announce, 0, &gm->last_writes); + gossip_store_add(gm->gs, announce, 0); gossip_store_add(gm->gs, - towire_gossip_store_channel_amount(tmpctx, *known_amount), 0, - &gm->last_writes); + towire_gossip_store_channel_amount(tmpctx, *known_amount), 0); node_announcements_not_dying(gm, gossmap, pca); tal_free(pca); @@ -770,10 +764,9 @@ void gossmap_manage_handle_get_txout_reply(struct gossmap_manage *gm, const u8 * } /* Set with timestamp 0 (we will update once we have a channel_update) */ - gossip_store_add(gm->gs, pca->channel_announcement, 0, &gm->last_writes); + gossip_store_add(gm->gs, pca->channel_announcement, 0); gossip_store_add(gm->gs, - towire_gossip_store_channel_amount(tmpctx, sat), 0, - &gm->last_writes); + towire_gossip_store_channel_amount(tmpctx, sat), 0); /* If we looking specifically for this, we no longer are. */ remove_unknown_scid(gm->daemon->seeker, &scid, true); @@ -875,7 +868,7 @@ static const char *process_channel_update(const tal_t *ctx, } /* OK, apply the new one */ - offset = gossip_store_add(gm->gs, update, timestamp, &gm->last_writes); + offset = gossip_store_add(gm->gs, update, timestamp); /* If channel is dying, make sure update is also marked dying! */ if (gossmap_chan_is_dying(gossmap, chan)) { @@ -1054,7 +1047,7 @@ static void process_node_announcement(struct gossmap_manage *gm, } /* OK, apply the new one */ - offset = gossip_store_add(gm->gs, nannounce, timestamp, &gm->last_writes); + offset = gossip_store_add(gm->gs, nannounce, timestamp); /* If all channels are dying, make sure this is marked too. */ if (all_node_channels_dying(gossmap, node, NULL)) { gossip_store_set_flag(gm->gs, offset, @@ -1381,7 +1374,7 @@ void gossmap_manage_channel_spent(struct gossmap_manage *gm, /* Save to gossip_store in case we restart */ msg = towire_gossip_store_chan_dying(tmpctx, cd.scid, cd.deadline); - cd.gossmap_offset = gossip_store_add(gm->gs, msg, 0, &gm->last_writes); + cd.gossmap_offset = gossip_store_add(gm->gs, msg, 0); tal_arr_expand(&gm->dying_channels, cd); /* Mark it dying, so we don't gossip it */ @@ -1485,7 +1478,7 @@ struct gossmap *gossmap_manage_get_gossmap(struct gossmap_manage *gm) gossmap_disable_mmap(gm->raw_gossmap); /* Try rewriting the last few records, syncing. */ - gossip_store_rewrite_end(gm->gs, gm->last_writes); + gossip_store_rewrite_end(gm->gs); gossmap_refresh(gm->raw_gossmap); map_used = gossmap_lengths(gm->raw_gossmap, &map_size); @@ -1497,8 +1490,7 @@ struct gossmap *gossmap_manage_get_gossmap(struct gossmap_manage *gm) } /* Free up last_writes, since we've seen it on disk */ - tal_free(gm->last_writes); - gm->last_writes = tal_arr(gm, const u8 *, 0); + gossip_store_writes_confirmed(gm->gs); return gm->raw_gossmap; } From 5983087a00729929b03d86d5dbc18fd0630a352b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 19:11:11 +1030 Subject: [PATCH 16/20] common: expose gossip_store "header and type" single-read struct. gossip_store.c uses this to avoid two reads, and we want to use it elsewhere too. Also fix old comment on gossip_store_readhdr(). Signed-off-by: Rusty Russell --- common/gossip_store.c | 14 +++----------- common/gossip_store.h | 15 ++++++++++----- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/common/gossip_store.c b/common/gossip_store.c index 298988a13bb6..94a1e6a8eb7c 100644 --- a/common/gossip_store.c +++ b/common/gossip_store.c @@ -3,25 +3,17 @@ #include #include -/* We cheat and read first two bytes of message too. */ -struct hdr_and_type { - struct gossip_hdr hdr; - be16 type; -}; -/* Beware padding! */ -#define HDR_AND_TYPE_SIZE (sizeof(struct gossip_hdr) + sizeof(u16)) - bool gossip_store_readhdr(int gossip_store_fd, size_t off, size_t *len, u32 *timestamp, u16 *flags, u16 *type) { - struct hdr_and_type buf; + struct gossip_hdr_and_type buf; int r; - r = pread(gossip_store_fd, &buf, HDR_AND_TYPE_SIZE, off); - if (r != HDR_AND_TYPE_SIZE) + r = pread(gossip_store_fd, &buf, GOSSIP_HDR_AND_TYPE_SIZE, off); + if (r != GOSSIP_HDR_AND_TYPE_SIZE) return false; if (!(buf.hdr.flags & CPU_TO_BE16(GOSSIP_STORE_COMPLETED_BIT))) return false; diff --git a/common/gossip_store.h b/common/gossip_store.h index 1545e86853fa..de3d86aabbe7 100644 --- a/common/gossip_store.h +++ b/common/gossip_store.h @@ -50,6 +50,14 @@ struct gossip_hdr { beint32_t timestamp; /* timestamp of msg. */ }; +/* Useful to read gossip_hdr and type of msg. */ +struct gossip_hdr_and_type { + struct gossip_hdr hdr; + be16 type; +}; +/* Beware padding! */ +#define GOSSIP_HDR_AND_TYPE_SIZE (sizeof(struct gossip_hdr) + sizeof(u16)) + /** * Direct store accessor: read gossip msg hdr from store. * @gossip_store_fd: the readable file descriptor @@ -59,11 +67,8 @@ struct gossip_hdr { * @flags (out): if non-NULL, set to the flags. * @type (out): if non-NULL, set to the msg type. * - * Returns false if there are no more gossip msgs. If you - * want to read the message, use gossip_store_next, if you - * want to skip, simply add sizeof(gossip_hdr) + *len to *off. - * Note: it's possible that entire record isn't there yet, - * so gossip_store_next can fail. + * Returns false if there are no more gossip msgs, or message + * is incomplete. To iterate, simply add sizeof(gossip_hdr) + *len to off. */ bool gossip_store_readhdr(int gossip_store_fd, size_t off, size_t *len, From 763f851b01c748e440cbf90aa711cd9112430d63 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 19:11:11 +1030 Subject: [PATCH 17/20] gossipd: lightningd/lightning_gossip_compactd A new subprocess run by gossipd to create a compacted gossip store. It's pretty simple: a linear compaction of the file. Once it's done the amount it was told to, then gossipd waits until it completes the last bit. Signed-off-by: Rusty Russell --- Makefile | 1 + gossipd/Makefile | 9 ++- gossipd/compactd.c | 154 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 gossipd/compactd.c diff --git a/Makefile b/Makefile index fd20d6526137..e59e9f0b5bc1 100644 --- a/Makefile +++ b/Makefile @@ -468,6 +468,7 @@ PKGLIBEXEC_PROGRAMS = \ lightningd/lightning_connectd \ lightningd/lightning_dualopend \ lightningd/lightning_gossipd \ + lightningd/lightning_gossip_compactd \ lightningd/lightning_hsmd \ lightningd/lightning_onchaind \ lightningd/lightning_openingd \ diff --git a/gossipd/Makefile b/gossipd/Makefile index e92cd1f2b81b..6ca4bbbef0ad 100644 --- a/gossipd/Makefile +++ b/gossipd/Makefile @@ -14,10 +14,13 @@ GOSSIPD_SRC := $(GOSSIPD_HEADERS_WSRC:.h=.c) GOSSIPD_OBJS := $(GOSSIPD_SRC:.c=.o) $(GOSSIPD_OBJS): $(GOSSIPD_HEADERS) +GOSSIPD_COMPACT_SRC := gossipd/compactd.c +GOSSIPD_COMPACT_OBJS := $(GOSSIPD_COMPACT_SRC:.c=.o) + # Make sure these depend on everything. -ALL_C_SOURCES += $(GOSSIPD_SRC) +ALL_C_SOURCES += $(GOSSIPD_SRC) $(GOSSIPD_COMPACT_SRC) ALL_C_HEADERS += $(GOSSIPD_HEADERS) -ALL_PROGRAMS += lightningd/lightning_gossipd +ALL_PROGRAMS += lightningd/lightning_gossipd lightningd/lightning_gossip_compactd # Here's what lightningd depends on LIGHTNINGD_CONTROL_HEADERS += gossipd/gossipd_wiregen.h @@ -25,6 +28,8 @@ LIGHTNINGD_CONTROL_OBJS += gossipd/gossipd_wiregen.o lightningd/lightning_gossipd: $(GOSSIPD_OBJS) $(HSMD_CLIENT_OBJS) connectd/connectd_gossipd_wiregen.o libcommon.a +lightningd/lightning_gossip_compactd: $(GOSSIPD_COMPACT_OBJS) libcommon.a + # The CSV was removed, but the generated files may live on! clean: clean-old-gossip-gen diff --git a/gossipd/compactd.c b/gossipd/compactd.c new file mode 100644 index 000000000000..c2a83be6cf9e --- /dev/null +++ b/gossipd/compactd.c @@ -0,0 +1,154 @@ +/*~ This is a cute little standalone program that copies the gossip store, minus + * any deleted records. gossipd fires us up to create a compacted gossip store. + * When we're done, we tell it (via stdout) and it tells us when it's ready to + * wait for the final part. + */ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void writex(int fd, const void *p, size_t len) +{ + if (!write_all(fd, p, len)) + err(1, "Could not write new gossip_store"); +} + +static size_t readx(int fd, void *p, size_t len) +{ + if (!read_all(fd, p, len)) + err(1, "Could not read old gossip_store"); + return len; +} + +static size_t skipx(int fd, size_t len) +{ + if (lseek(fd, len, SEEK_CUR) == (off_t)-1) + err(1, "Could not seek on old gossip_store"); + return len; +} + +static void writerec(int fd, const void *msg) +{ + struct gossip_hdr ghdr; + + ghdr.flags = cpu_to_be16(GOSSIP_STORE_COMPLETED_BIT); + ghdr.len = cpu_to_be16(tal_bytelen(msg)); + ghdr.timestamp = 0; + ghdr.crc = cpu_to_be32(crc32c(be32_to_cpu(ghdr.timestamp), + msg, tal_bytelen(msg))); + writex(fd, &ghdr, sizeof(ghdr)); + writex(fd, msg, tal_bytelen(msg)); +} + +static u64 copy_records(int oldfd, int newfd, u64 old_off, u64 limit, + bool keep_delete_chan) +{ + u8 buffer[65535]; + + while (old_off < limit) { + size_t reclen; + struct gossip_hdr_and_type hdr; + + old_off += readx(oldfd, &hdr, GOSSIP_HDR_AND_TYPE_SIZE); + + /* We read 2 bytes already */ + reclen = be16_to_cpu(hdr.hdr.len) - 2; + + /* Skip old uuid and deleted records, */ + if (be16_to_cpu(hdr.type) == WIRE_GOSSIP_STORE_UUID + || (be16_to_cpu(hdr.hdr.flags) & GOSSIP_STORE_DELETED_BIT)) { + old_off += skipx(oldfd, reclen); + continue; + } + + /* Are we supposed to skip deleted markers? */ + if (!keep_delete_chan + && be16_to_cpu(hdr.type) == WIRE_GOSSIP_STORE_DELETE_CHAN) { + old_off += skipx(oldfd, reclen); + continue; + } + + if (!((be16_to_cpu(hdr.hdr.flags) & GOSSIP_STORE_COMPLETED_BIT))) + errx(1, "Incomplete gossip_store record at %"PRIu64, + old_off - GOSSIP_HDR_AND_TYPE_SIZE); + + old_off += readx(oldfd, buffer, reclen); + + writex(newfd, &hdr, GOSSIP_HDR_AND_TYPE_SIZE); + writex(newfd, buffer, reclen); + } + + return old_off; +} + +int main(int argc, char *argv[]) +{ + int oldfd, newfd; + u8 gsversion, byte; + u8 uuid[32]; + u64 old_off, limit; + + common_setup(argv[0]); + /* Not really a subdaemon (we don't use status_xxx) but we can pretend */ + if (argc == 2 && streq(argv[1], "--version")) { + printf("%s\n", version()); + exit(0); + } + + if (argc != 5) + errx(1, "Usage: %s ", + argv[0]); + + oldfd = open(argv[1], O_RDONLY); + if (oldfd < 0) + err(1, "Could not open old gossip_store %s", argv[1]); + newfd = open(argv[2], O_WRONLY|O_CREAT|O_TRUNC, 0600); + if (newfd < 0) + err(1, "Could not open new gossip_store %s", argv[2]); + limit = atol(argv[3]); + if (!hex_decode(argv[4], strlen(argv[4]), uuid, sizeof(uuid))) + errx(1, "Invalid uuid %s", argv[1]); + + /* Copy version byte */ + old_off = readx(oldfd, &gsversion, sizeof(gsversion)); + writex(newfd, &gsversion, sizeof(gsversion)); + + /* Create uuid hdr. */ + writerec(newfd, towire_gossip_store_uuid(tmpctx, uuid)); + + old_off = copy_records(oldfd, newfd, old_off, limit, false); + /* We should hit limit exactly */ + if (old_off != limit) + errx(1, "We reached offset %"PRIu64" past initial limit %"PRIu64, + old_off, limit); + + /* Now we tell gossipd we're done, and it pauses while we copy the last bit. + * Note that we need to keep any "delete_channel" records here, since that + * would have happened since we copied the first part, and we might have + * missed the deleted bit on those channels. */ + byte = 0; + writex(STDOUT_FILENO, &byte, sizeof(byte)); + readx(STDIN_FILENO, &byte, sizeof(byte)); + + limit = lseek(oldfd, 0, SEEK_END); + lseek(oldfd, old_off, SEEK_SET); + old_off = copy_records(oldfd, newfd, old_off, limit, true); + + /* We should hit EOF exactly */ + if (old_off != limit) + errx(1, "We reached offset %"PRIu64" before file size %"PRIu64, + old_off, limit); + + common_shutdown(); +} From 9d01bea30bb3a09822e487ceed9a91b2b642706d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 19:11:11 +1030 Subject: [PATCH 18/20] gossipd: code to invoke compactd and reopen store. This isn't called anywhere yet. Signed-off-by: Rusty Russell --- gossipd/gossip_store.c | 47 ++++++--- gossipd/gossip_store.h | 13 +++ gossipd/gossipd.c | 13 +-- gossipd/gossipd.h | 3 + gossipd/gossipd_wire.csv | 3 +- gossipd/gossmap_manage.c | 198 ++++++++++++++++++++++++++++++++++++ lightningd/gossip_control.c | 5 +- lightningd/lightningd.c | 3 +- 8 files changed, 261 insertions(+), 24 deletions(-) diff --git a/gossipd/gossip_store.c b/gossipd/gossip_store.c index 93b8d36fd04c..d7d9a446d491 100644 --- a/gossipd/gossip_store.c +++ b/gossipd/gossip_store.c @@ -218,6 +218,25 @@ static int make_new_gossip_store(u64 *total_len) return new_fd; } +static int gossip_store_open(u64 *total_len, bool *recent) +{ + struct stat st; + int fd = open(GOSSIP_STORE_FILENAME, O_RDWR); + if (fd == -1) + return -1; + + if (fstat(fd, &st) != 0) { + close_noerr(fd); + return -1; + } + + if (recent) + *recent = (st.st_mtime > clock_time().ts.tv_sec - 3600); + + *total_len = st.st_size; + return fd; +} + /* If this returns -1, we cannot upgrade. */ static int gossip_store_upgrade(struct daemon *daemon, u64 *total_len, @@ -227,12 +246,12 @@ static int gossip_store_upgrade(struct daemon *daemon, u64 old_len, cur_off; struct gossip_hdr hdr; u8 oldversion, version = GOSSIP_STORE_VER; - struct stat st; struct timemono start = time_mono(); const char *bad; + bool recent; u8 *uuid; - old_fd = open(GOSSIP_STORE_FILENAME, O_RDWR); + old_fd = gossip_store_open(total_len, &recent); if (old_fd == -1) { if (errno == ENOENT) { *populated = false; @@ -249,22 +268,13 @@ static int gossip_store_upgrade(struct daemon *daemon, goto upgrade_failed; } - if (fstat(old_fd, &st) != 0) { - status_broken("Could not stat gossip_store: %s", - strerror(errno)); - goto upgrade_failed; - } - /* If we have any contents (beyond uuid), and the file is less * than 1 hour old, say "seems good" */ - *populated = (st.st_mtime > time_now().ts.tv_sec - 3600 - && st.st_size > 1 + sizeof(hdr) + 2 + 32); + *populated = recent && *total_len > 1 + sizeof(hdr) + 2 + 32; /* No upgrade necessary? We're done. */ - if (oldversion == GOSSIP_STORE_VER) { - *total_len = st.st_size; + if (oldversion == GOSSIP_STORE_VER) return old_fd; - } if (!can_upgrade(oldversion)) { status_unusual("Cannot upgrade gossip_store version %u", @@ -423,7 +433,16 @@ struct gossip_store *gossip_store_new(const tal_t *ctx, return gs; } -static void gossip_store_fsync(const struct gossip_store *gs) +void gossip_store_reopen(struct gossip_store *gs) +{ + close(gs->fd); + gs->fd = gossip_store_open(&gs->len, NULL); + if (gs->fd < 0) + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "gossmap reopen failed: %s", strerror(errno)); +} + +void gossip_store_fsync(const struct gossip_store *gs) { if (fsync(gs->fd) != 0) status_failed(STATUS_FAIL_INTERNAL_ERROR, diff --git a/gossipd/gossip_store.h b/gossipd/gossip_store.h index 81e76d7d0273..ee7130609894 100644 --- a/gossipd/gossip_store.h +++ b/gossipd/gossip_store.h @@ -34,11 +34,24 @@ struct gossip_store *gossip_store_new(const tal_t *ctx, struct daemon *daemon, bool *populated); +/** + * Reopen the gossip_store + * @gs: the gossip store. + * + * We've rewritten it, so reopen the file. + */ +void gossip_store_reopen(struct gossip_store *gs); + /** * Move the old gossip store out the way. Log a broken message about it. */ void gossip_store_corrupt(void); +/** + * Extra paranoia: make sure it's on disk. Don't call often!\ + */ +void gossip_store_fsync(const struct gossip_store *gs); + /** * Append a gossip message to the gossip_store * @gs: gossip store diff --git a/gossipd/gossipd.c b/gossipd/gossipd.c index f82833b68097..db0f771661e2 100644 --- a/gossipd/gossipd.c +++ b/gossipd/gossipd.c @@ -372,12 +372,13 @@ static void master_or_connectd_gone(struct daemon_conn *dc UNUSED) static void gossip_init(struct daemon *daemon, const u8 *msg) { if (!fromwire_gossipd_init(daemon, msg, - &chainparams, - &daemon->our_features, - &daemon->id, - &daemon->dev_fast_gossip, - &daemon->dev_fast_gossip_prune, - &daemon->autoconnect_seeker_peers)) { + &chainparams, + &daemon->our_features, + &daemon->id, + &daemon->autoconnect_seeker_peers, + &daemon->compactd_helper, + &daemon->dev_fast_gossip, + &daemon->dev_fast_gossip_prune)) { master_badmsg(WIRE_GOSSIPD_INIT, msg); } diff --git a/gossipd/gossipd.h b/gossipd/gossipd.h index d8aa5447af30..02515c7b6a97 100644 --- a/gossipd/gossipd.h +++ b/gossipd/gossipd.h @@ -63,6 +63,9 @@ struct daemon { /* Features lightningd told us to set. */ struct feature_set *our_features; + /* Program to run to compact the datastore */ + char *compactd_helper; + /* Speed up gossip. */ bool dev_fast_gossip; diff --git a/gossipd/gossipd_wire.csv b/gossipd/gossipd_wire.csv index 1721b3aeed4f..79de7512d0d0 100644 --- a/gossipd/gossipd_wire.csv +++ b/gossipd/gossipd_wire.csv @@ -9,9 +9,10 @@ msgtype,gossipd_init,3000 msgdata,gossipd_init,chainparams,chainparams, msgdata,gossipd_init,our_features,feature_set, msgdata,gossipd_init,id,node_id, +msgdata,gossipd_init,autoconnect_seeker_peers,u32, +msgdata,gossipd_init,compactd_helper,wirestring, msgdata,gossipd_init,dev_fast_gossip,bool, msgdata,gossipd_init,dev_fast_gossip_prune,bool, -msgdata,gossipd_init,autoconnect_seeker_peers,u32, # Gossipd tells us all our public channel_updates before init_reply. msgtype,gossipd_init_cupdate,3101 diff --git a/gossipd/gossmap_manage.c b/gossipd/gossmap_manage.c index a9d7f2b8e199..9f9571a15cfa 100644 --- a/gossipd/gossmap_manage.c +++ b/gossipd/gossmap_manage.c @@ -1,6 +1,9 @@ #include "config.h" #include #include +#include +#include +#include #include #include #include @@ -12,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -19,6 +24,12 @@ #include #include #include +#include +#include +#include +#include + +#define GOSSIP_STORE_COMPACT_FILENAME "gossip_store.compact" struct pending_cannounce { const u8 *scriptpubkey; @@ -56,6 +67,15 @@ struct cannounce_map { bool flood_reported; }; +struct compactd { + struct io_conn *in_conn; + u64 old_size; + u8 ignored; + int outfd; + pid_t pid; + u8 uuid[32]; +}; + struct gossmap_manage { struct daemon *daemon; @@ -92,6 +112,9 @@ struct gossmap_manage { /* Are we populated yet? */ bool gossip_store_populated; + + /* Non-NULL if a compactd is running. */ + struct compactd *compactd; }; /* Timer recursion */ @@ -506,6 +529,7 @@ struct gossmap_manage *gossmap_manage_new(const tal_t *ctx, assert(gm->gs); assert(gm->raw_gossmap); gm->daemon = daemon; + gm->compactd = NULL; map_init(&gm->pending_ann_map, "pending announcements"); gm->pending_cupdates = tal_arr(gm, struct pending_cupdate *, 0); @@ -1557,3 +1581,177 @@ bool gossmap_manage_populated(const struct gossmap_manage *gm) { return gm->gossip_store_populated; } + +static void compactd_done(struct io_conn *unused, struct gossmap_manage *gm) +{ + int status; + struct stat st; + + if (waitpid(gm->compactd->pid, &status, 0) < 0) + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Waiting for %u: %s", + (unsigned int)gm->compactd->pid, + strerror(errno)); + + if (!WIFEXITED(status)) { + status_broken("compactd failed with signal %u", + WTERMSIG(status)); + goto failed; + } + if (WEXITSTATUS(status) != 0) { + status_broken("compactd exited with status %u", + WEXITSTATUS(status)); + goto failed; + } + + if (stat(GOSSIP_STORE_COMPACT_FILENAME, &st) != 0) { + status_broken("compactd did not create file? %s", + strerror(errno)); + goto failed; + } + + status_debug("compaction done: %"PRIu64" -> %"PRIu64" bytes", + gm->compactd->old_size, (u64)st.st_size); + + /* Switch gossmap to new one, as a sanity check (rather than + * writing end marker and letting it reopen) */ + tal_free(gm->raw_gossmap); + gm->raw_gossmap = gossmap_load_initial(gm, GOSSIP_STORE_COMPACT_FILENAME, + st.st_size, + gossmap_logcb, + NULL, + gm); + if (!gm->raw_gossmap) + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "compacted gossip_store is invalid"); + + if (rename(GOSSIP_STORE_COMPACT_FILENAME, GOSSIP_STORE_FILENAME) != 0) + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Error renaming gossip store: %s", + strerror(errno)); + + /* Now append record to old one, so everyone will switch */ + gossip_store_add(gm->gs, + towire_gossip_store_ended(tmpctx, st.st_size, gm->compactd->uuid), + 0); + gossip_store_reopen(gm->gs); + +failed: + gm->compactd = tal_free(gm->compactd); +} + +/* When it's caught up to where we were, we wait. */ +static struct io_plan *compactd_read_done(struct io_conn *conn, + struct gossmap_manage *gm) +{ + status_debug("compactd caught up, waiting for final bytes."); + + /* Make sure everything has hit storage in the current version. */ + gossip_store_fsync(gm->gs); + gossmap_manage_get_gossmap(gm); + + /* Tell it to do the remainder, then we wait for it to exit in destructor. */ + write_all(gm->compactd->outfd, "", 1); + return io_close(conn); +} + +static struct io_plan *init_compactd_conn_in(struct io_conn *conn, + struct gossmap_manage *gm) +{ + return io_read(conn, &gm->compactd->ignored, sizeof(gm->compactd->ignored), + compactd_read_done, gm); +} +/* Returns false if already running */ +static UNNEEDED bool gossmap_compact(struct gossmap_manage *gm) +{ + int childin[2], execfail[2], childout[2]; + int saved_errno; + + /* Only one at a time please! */ + if (gm->compactd) + return false; + + /* This checks contents: we want to make sure compactd sees an + * up-to-date version. */ + gossmap_manage_get_gossmap(gm); + + gm->compactd = tal(gm, struct compactd); + for (size_t i = 0; i < ARRAY_SIZE(gm->compactd->uuid); i++) + gm->compactd->uuid[i] = pseudorand(256); + + gm->compactd->old_size = gossip_store_len_written(gm->gs); + status_debug("Executing lightning_gossip_compactd %s %s %s %s", + GOSSIP_STORE_FILENAME, + GOSSIP_STORE_COMPACT_FILENAME, + tal_fmt(tmpctx, "%"PRIu64, gm->compactd->old_size), + tal_hexstr(tmpctx, gm->compactd->uuid, sizeof(gm->compactd->uuid))); + + if (pipe(childin) != 0 || pipe(childout) != 0 || pipe(execfail) != 0) + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Could not create pipes for compactd: %s", + strerror(errno)); + + if (fcntl(execfail[1], F_SETFD, fcntl(execfail[1], F_GETFD) + | FD_CLOEXEC) < 0) + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Could not set cloexec on compactd fd: %s", + strerror(errno)); + + gm->compactd->pid = fork(); + if (gm->compactd->pid < 0) + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Could not fork for compactd: %s", + strerror(errno)); + + if (gm->compactd->pid == 0) { + close(childin[0]); + close(childout[1]); + close(execfail[0]); + + /* In practice, low fds are all open, so we don't have + * to handle those horrible cases */ + assert(childin[1] > 2); + assert(childout[0] > 2); + if (dup2(childin[1], STDOUT_FILENO) == -1) + err(1, "Failed to duplicate fd to stdout"); + close(childin[1]); + if (dup2(childout[0], STDIN_FILENO) == -1) + err(1, "Failed to duplicate fd to stdin"); + close(childin[2]); + closefrom_limit(0); + closefrom(3); + /* Tell compactd helper what we read so far. */ + execlp(gm->daemon->compactd_helper, + gm->daemon->compactd_helper, + GOSSIP_STORE_FILENAME, + GOSSIP_STORE_COMPACT_FILENAME, + tal_fmt(tmpctx, "%"PRIu64, gm->compactd->old_size), + tal_hexstr(tmpctx, gm->compactd->uuid, sizeof(gm->compactd->uuid)), + NULL); + saved_errno = errno; + /* Gcc's warn-unused-result fail. */ + if (write(execfail[1], &saved_errno, sizeof(saved_errno))) { + ; + } + exit(127); + } + close(childin[1]); + close(childout[0]); + close(execfail[1]); + + /* Child will close this without writing on successful exec. */ + if (read(execfail[0], &saved_errno, sizeof(saved_errno)) == sizeof(saved_errno)) { + close(execfail[0]); + waitpid(gm->compactd->pid, NULL, 0); + status_failed(STATUS_FAIL_INTERNAL_ERROR, + "Exec of %s failed: %s", + gm->daemon->compactd_helper, strerror(saved_errno)); + } + close(execfail[0]); + + gm->compactd->outfd = childout[1]; + gm->compactd->in_conn = io_new_conn(gm->compactd, childin[0], + init_compactd_conn_in, gm); + io_set_finish(gm->compactd->in_conn, compactd_done, gm); + return true; +} diff --git a/lightningd/gossip_control.c b/lightningd/gossip_control.c index a43d967c7d1e..038eb1f7a96d 100644 --- a/lightningd/gossip_control.c +++ b/lightningd/gossip_control.c @@ -319,9 +319,10 @@ void gossip_init(struct lightningd *ld, int connectd_fd) chainparams, ld->our_features, &ld->our_nodeid, + ld->autoconnect_seeker_peers, + subdaemon_path(tmpctx, ld, "lightning_gossip_compactd"), ld->dev_fast_gossip, - ld->dev_fast_gossip_prune, - ld->autoconnect_seeker_peers); + ld->dev_fast_gossip_prune); subd_req(ld->gossip, ld->gossip, take(msg), -1, 0, gossipd_init_done, NULL); diff --git a/lightningd/lightningd.c b/lightningd/lightningd.c index 504a5adb22ef..219784e79b8c 100644 --- a/lightningd/lightningd.c +++ b/lightningd/lightningd.c @@ -381,9 +381,10 @@ static const char *subdaemons[] = { "lightning_closingd", "lightning_connectd", "lightning_gossipd", + "lightning_gossip_compactd", "lightning_hsmd", "lightning_onchaind", - "lightning_openingd" + "lightning_openingd", }; /* Return true if called with a recognized subdaemon e.g. "hsmd" */ From 3e669d0b1b40fd1994dc28b5421cc72fd2666d47 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 19:11:11 +1030 Subject: [PATCH 19/20] gossipd: compact when gossip store is 80% deleted records. Signed-off-by: Rusty Russell Changelog-Added: `gossipd` now uses a `lightning_gossip_compactd` helper to compact the gossip_store on demand, keeping it under about 210MB. --- gossipd/gossipd.c | 2 ++ gossipd/gossmap_manage.c | 29 +++++++++++++++++++++++++++-- gossipd/gossmap_manage.h | 6 ++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/gossipd/gossipd.c b/gossipd/gossipd.c index db0f771661e2..5cb2541c6559 100644 --- a/gossipd/gossipd.c +++ b/gossipd/gossipd.c @@ -282,6 +282,8 @@ static void handle_recv_gossip(struct daemon *daemon, const u8 *outermsg) handled_msg: if (err) queue_peer_msg(daemon, &source, take(err)); + /* We need to keep gossmap to reasonable size */ + gossmap_manage_maybe_compact(daemon->gm); } /*~ connectd's input handler is very simple. */ diff --git a/gossipd/gossmap_manage.c b/gossipd/gossmap_manage.c index 9f9571a15cfa..61ce44d872d2 100644 --- a/gossipd/gossmap_manage.c +++ b/gossipd/gossmap_manage.c @@ -1662,7 +1662,7 @@ static struct io_plan *init_compactd_conn_in(struct io_conn *conn, compactd_read_done, gm); } /* Returns false if already running */ -static UNNEEDED bool gossmap_compact(struct gossmap_manage *gm) +static bool gossmap_compact(struct gossmap_manage *gm) { int childin[2], execfail[2], childout[2]; int saved_errno; @@ -1717,7 +1717,7 @@ static UNNEEDED bool gossmap_compact(struct gossmap_manage *gm) close(childin[1]); if (dup2(childout[0], STDIN_FILENO) == -1) err(1, "Failed to duplicate fd to stdin"); - close(childin[2]); + close(childout[0]); closefrom_limit(0); closefrom(3); /* Tell compactd helper what we read so far. */ @@ -1755,3 +1755,28 @@ static UNNEEDED bool gossmap_compact(struct gossmap_manage *gm) io_set_finish(gm->compactd->in_conn, compactd_done, gm); return true; } + +void gossmap_manage_maybe_compact(struct gossmap_manage *gm) +{ + u64 num_live, num_dead; + struct gossmap *gossmap = gossmap_manage_get_gossmap(gm); + bool compact_started; + + gossmap_stats(gossmap, &num_live, &num_dead); + + /* Don't get out of bed for less that 10MB */ + if (gossip_store_len_written(gm->gs) < 10000000) + return; + + /* Compact when the density would be 5x better */ + if (num_dead < 4 * num_live) + return; + + compact_started = gossmap_compact(gm); + status_debug("%s gossmap compaction:" + " %"PRIu64" with" + " %"PRIu64" live records and %"PRIu64" dead records", + compact_started ? "Beginning" : "Already running", + gossip_store_len_written(gm->gs), + num_live, num_dead); +} diff --git a/gossipd/gossmap_manage.h b/gossipd/gossmap_manage.h index 74781d55947d..c135722710ea 100644 --- a/gossipd/gossmap_manage.h +++ b/gossipd/gossmap_manage.h @@ -112,6 +112,12 @@ void gossmap_manage_tell_lightningd_locals(struct daemon *daemon, */ bool gossmap_manage_populated(const struct gossmap_manage *gm); +/** + * gossmap_manage_maybe_compact: rewrite gossmap if it's getting giant. + * @gm: the gossmap_manage context + */ +void gossmap_manage_maybe_compact(struct gossmap_manage *gm); + /* For memleak to see inside of maps */ void gossmap_manage_memleak(struct htable *memtable, const struct gossmap_manage *gm); #endif /* LIGHTNING_GOSSIPD_GOSSMAP_MANAGE_H */ From 10176dc06347ed8d94f85a328e9259760a01bca3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Feb 2026 19:11:11 +1030 Subject: [PATCH 20/20] gossipd: dev-compact-gossip-store to manually invoke compaction. And tests! Signed-off-by: Rusty Russell --- gossipd/gossipd.c | 7 +++ gossipd/gossipd_wire.csv | 7 +++ gossipd/gossmap_manage.c | 47 +++++++++++++-- gossipd/gossmap_manage.h | 4 ++ lightningd/gossip_control.c | 45 ++++++++++++++ tests/plugins/compacter-slow.sh | 11 ++++ tests/test_gossip.py | 101 ++++++++++++++++++++++++++++++++ tools/bench-gossipd.sh | 8 ++- 8 files changed, 223 insertions(+), 7 deletions(-) create mode 100755 tests/plugins/compacter-slow.sh diff --git a/gossipd/gossipd.c b/gossipd/gossipd.c index 5cb2541c6559..6c0d07396d51 100644 --- a/gossipd/gossipd.c +++ b/gossipd/gossipd.c @@ -542,6 +542,12 @@ static struct io_plan *recv_req(struct io_conn *conn, goto done; } /* fall thru */ + case WIRE_GOSSIPD_DEV_COMPACT_STORE: + if (daemon->developer) { + gossmap_manage_handle_dev_compact_store(daemon->gm, msg); + goto done; + } + /* fall thru */ /* We send these, we don't receive them */ case WIRE_GOSSIPD_INIT_CUPDATE: @@ -549,6 +555,7 @@ static struct io_plan *recv_req(struct io_conn *conn, case WIRE_GOSSIPD_INIT_REPLY: case WIRE_GOSSIPD_GET_TXOUT: case WIRE_GOSSIPD_DEV_MEMLEAK_REPLY: + case WIRE_GOSSIPD_DEV_COMPACT_STORE_REPLY: case WIRE_GOSSIPD_ADDGOSSIP_REPLY: case WIRE_GOSSIPD_NEW_BLOCKHEIGHT_REPLY: case WIRE_GOSSIPD_REMOTE_CHANNEL_UPDATE: diff --git a/gossipd/gossipd_wire.csv b/gossipd/gossipd_wire.csv index 79de7512d0d0..8d407aa75dc4 100644 --- a/gossipd/gossipd_wire.csv +++ b/gossipd/gossipd_wire.csv @@ -50,6 +50,13 @@ msgtype,gossipd_dev_memleak,3033 msgtype,gossipd_dev_memleak_reply,3133 msgdata,gossipd_dev_memleak_reply,leak,bool, +# master -> gossipd: please rewrite the gossip_store +msgtype,gossipd_dev_compact_store,3034 + +# gossipd -> master: empty string means no problem. +msgtype,gossipd_dev_compact_store_reply,3134 +msgdata,gossipd_dev_compact_store_reply,result,wirestring, + # master -> gossipd: blockheight increased. msgtype,gossipd_new_blockheight,3026 msgdata,gossipd_new_blockheight,blockheight,u32, diff --git a/gossipd/gossmap_manage.c b/gossipd/gossmap_manage.c index 61ce44d872d2..3f5d2c351fe6 100644 --- a/gossipd/gossmap_manage.c +++ b/gossipd/gossmap_manage.c @@ -70,6 +70,7 @@ struct cannounce_map { struct compactd { struct io_conn *in_conn; u64 old_size; + bool dev_compact; u8 ignored; int outfd; pid_t pid; @@ -1582,6 +1583,24 @@ bool gossmap_manage_populated(const struct gossmap_manage *gm) return gm->gossip_store_populated; } +static void compactd_broken(const struct gossmap_manage *gm, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + status_vfmt(LOG_BROKEN, NULL, fmt, ap); + va_end(ap); + + if (gm->compactd->dev_compact) { + va_start(ap, fmt); + daemon_conn_send(gm->daemon->master, + take(towire_gossipd_dev_compact_store_reply(NULL, + tal_vfmt(tmpctx, fmt, ap)))); + va_end(ap); + } +} + static void compactd_done(struct io_conn *unused, struct gossmap_manage *gm) { int status; @@ -1594,18 +1613,18 @@ static void compactd_done(struct io_conn *unused, struct gossmap_manage *gm) strerror(errno)); if (!WIFEXITED(status)) { - status_broken("compactd failed with signal %u", - WTERMSIG(status)); + compactd_broken(gm, "compactd failed with signal %u", + WTERMSIG(status)); goto failed; } if (WEXITSTATUS(status) != 0) { - status_broken("compactd exited with status %u", + compactd_broken(gm, "compactd exited with status %u", WEXITSTATUS(status)); goto failed; } if (stat(GOSSIP_STORE_COMPACT_FILENAME, &st) != 0) { - status_broken("compactd did not create file? %s", + compactd_broken(gm, "compactd did not create file? %s", strerror(errno)); goto failed; } @@ -1635,6 +1654,9 @@ static void compactd_done(struct io_conn *unused, struct gossmap_manage *gm) towire_gossip_store_ended(tmpctx, st.st_size, gm->compactd->uuid), 0); gossip_store_reopen(gm->gs); + if (gm->compactd->dev_compact) + daemon_conn_send(gm->daemon->master, + take(towire_gossipd_dev_compact_store_reply(NULL, ""))); failed: gm->compactd = tal_free(gm->compactd); @@ -1662,7 +1684,7 @@ static struct io_plan *init_compactd_conn_in(struct io_conn *conn, compactd_read_done, gm); } /* Returns false if already running */ -static bool gossmap_compact(struct gossmap_manage *gm) +static bool gossmap_compact(struct gossmap_manage *gm, bool dev_compact) { int childin[2], execfail[2], childout[2]; int saved_errno; @@ -1749,6 +1771,7 @@ static bool gossmap_compact(struct gossmap_manage *gm) } close(execfail[0]); + gm->compactd->dev_compact = dev_compact; gm->compactd->outfd = childout[1]; gm->compactd->in_conn = io_new_conn(gm->compactd, childin[0], init_compactd_conn_in, gm); @@ -1772,7 +1795,7 @@ void gossmap_manage_maybe_compact(struct gossmap_manage *gm) if (num_dead < 4 * num_live) return; - compact_started = gossmap_compact(gm); + compact_started = gossmap_compact(gm, false); status_debug("%s gossmap compaction:" " %"PRIu64" with" " %"PRIu64" live records and %"PRIu64" dead records", @@ -1780,3 +1803,15 @@ void gossmap_manage_maybe_compact(struct gossmap_manage *gm) gossip_store_len_written(gm->gs), num_live, num_dead); } + +void gossmap_manage_handle_dev_compact_store(struct gossmap_manage *gm, const u8 *msg) +{ + if (!fromwire_gossipd_dev_compact_store(msg)) + master_badmsg(WIRE_GOSSIPD_DEV_COMPACT_STORE, msg); + + if (!gossmap_compact(gm, true)) + daemon_conn_send(gm->daemon->master, + take(towire_gossipd_dev_compact_store_reply(NULL, + "Already compacting"))); +} + diff --git a/gossipd/gossmap_manage.h b/gossipd/gossmap_manage.h index c135722710ea..464c61f4cf9d 100644 --- a/gossipd/gossmap_manage.h +++ b/gossipd/gossmap_manage.h @@ -118,6 +118,10 @@ bool gossmap_manage_populated(const struct gossmap_manage *gm); */ void gossmap_manage_maybe_compact(struct gossmap_manage *gm); +/* For testing */ +void gossmap_manage_handle_dev_compact_store(struct gossmap_manage *gm, + const u8 *msg); + /* For memleak to see inside of maps */ void gossmap_manage_memleak(struct htable *memtable, const struct gossmap_manage *gm); #endif /* LIGHTNING_GOSSIPD_GOSSMAP_MANAGE_H */ diff --git a/lightningd/gossip_control.c b/lightningd/gossip_control.c index 038eb1f7a96d..9c698cd324b5 100644 --- a/lightningd/gossip_control.c +++ b/lightningd/gossip_control.c @@ -196,11 +196,13 @@ static unsigned gossip_msg(struct subd *gossip, const u8 *msg, const int *fds) case WIRE_GOSSIPD_GET_TXOUT_REPLY: case WIRE_GOSSIPD_OUTPOINTS_SPENT: case WIRE_GOSSIPD_DEV_MEMLEAK: + case WIRE_GOSSIPD_DEV_COMPACT_STORE: case WIRE_GOSSIPD_NEW_BLOCKHEIGHT: case WIRE_GOSSIPD_ADDGOSSIP: /* This is a reply, so never gets through to here. */ case WIRE_GOSSIPD_INIT_REPLY: case WIRE_GOSSIPD_DEV_MEMLEAK_REPLY: + case WIRE_GOSSIPD_DEV_COMPACT_STORE_REPLY: case WIRE_GOSSIPD_ADDGOSSIP_REPLY: case WIRE_GOSSIPD_NEW_BLOCKHEIGHT_REPLY: break; @@ -488,3 +490,46 @@ static const struct json_command dev_set_max_scids_encode_size = { .dev_only = true, }; AUTODATA(json_command, &dev_set_max_scids_encode_size); + +static void dev_compact_gossip_store_reply(struct subd *gossip UNUSED, + const u8 *reply, + const int *fds UNUSED, + struct command *cmd) +{ + char *result; + + if (!fromwire_gossipd_dev_compact_store_reply(cmd, reply, &result)) { + was_pending(command_fail(cmd, LIGHTNINGD, + "Gossip gave bad dev_gossip_compact_store_reply")); + return; + } + + if (streq(result, "")) + was_pending(command_success(cmd, json_stream_success(cmd))); + else + was_pending(command_fail(cmd, LIGHTNINGD, + "gossip_compact_store failed: %s", result)); +} + +static struct command_result *json_dev_compact_gossip_store(struct command *cmd, + const char *buffer, + const jsmntok_t *obj UNNEEDED, + const jsmntok_t *params) +{ + u8 *msg; + + if (!param(cmd, buffer, params, NULL)) + return command_param_failed(); + + msg = towire_gossipd_dev_compact_store(NULL); + subd_req(cmd->ld->gossip, cmd->ld->gossip, + take(msg), -1, 0, dev_compact_gossip_store_reply, cmd); + return command_still_pending(cmd); +} + +static const struct json_command dev_compact_gossip_store = { + "dev-compact-gossip-store", + json_dev_compact_gossip_store, + .dev_only = true, +}; +AUTODATA(json_command, &dev_compact_gossip_store); diff --git a/tests/plugins/compacter-slow.sh b/tests/plugins/compacter-slow.sh new file mode 100755 index 000000000000..0812c62e79b2 --- /dev/null +++ b/tests/plugins/compacter-slow.sh @@ -0,0 +1,11 @@ +#! /bin/sh -e +# This pretends to be lightning_gossip_compactd, but waits until the file "compactd-continue" +# exists. This lets us test race conditions. + +if [ x"$1" != x"--version" ]; then + while [ ! -f "compactd-continue" ]; do + sleep 1 + done +fi + +exec "$(dirname "$0")"/../../lightningd/lightning_gossip_compactd "$@" diff --git a/tests/test_gossip.py b/tests/test_gossip.py index 440a9a7c1b6d..e69fabc4cb9e 100644 --- a/tests/test_gossip.py +++ b/tests/test_gossip.py @@ -1281,6 +1281,9 @@ def test_gossip_store_load_announce_before_update(node_factory): l1.daemon.wait_for_log("gossipd: gossip_store: 3 live records, 0 deleted") assert not l1.daemon.is_in_log('gossip_store.*corrupt') + # Extra sanity check if we can. + l1.rpc.call('dev-compact-gossip-store') + def test_gossip_store_load_amount_truncated(node_factory): """Make sure we can read canned gossip store with truncated amount""" @@ -1300,6 +1303,11 @@ def test_gossip_store_load_amount_truncated(node_factory): wait_for(lambda: l1.daemon.is_in_log('gossip_store: 0 live records, 0 deleted')) assert os.path.exists(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, 'gossip_store.corrupt')) + # Extra sanity check if we can. + l1.rpc.call('dev-compact-gossip-store') + l1.restart() + l1.rpc.call('dev-compact-gossip-store') + @pytest.mark.openchannel('v1') @pytest.mark.openchannel('v2') @@ -1628,10 +1636,103 @@ def test_gossip_store_corrupt(node_factory, bitcoind): def test_gossip_store_load_complex(node_factory, bitcoind): l2 = setup_gossip_store_test(node_factory, bitcoind) + l2.rpc.call('dev-compact-gossip-store') + l2.restart() + + wait_for(lambda: l2.daemon.is_in_log('gossip_store: 9 live records, 0 deleted')) + + +def test_gossip_store_compact(node_factory, bitcoind): + l2 = setup_gossip_store_test(node_factory, bitcoind) + + # Now compact store. + l2.rpc.call('dev-compact-gossip-store') + l2.daemon.wait_for_logs(['gossipd: compaction done: 2362 -> 2062 bytes', + 'connectd: Reopened gossip_store, reduced to offset 2062']) + + # Should still be connected. + time.sleep(1) + assert len(l2.rpc.listpeers()['peers']) == 2 + + # Should restart ok. l2.restart() + wait_for(lambda: l2.daemon.is_in_log('gossipd: gossip_store: 9 live records, 0 deleted')) + + +def test_gossip_store_compact_while_extending(node_factory, bitcoind, executor): + """We change gossip_store file (additions, deletion) during compaction""" + l1, l2, l3 = node_factory.line_graph(3, wait_for_announce=True, + opts=[{'subdaemon': 'gossip_compactd:../tests/plugins/compacter-slow.sh'}, {}, {}]) + l2.rpc.setchannel(l3.info['id'], 20, 1000) + l3.rpc.setchannel(l2.info['id'], 21, 1001) + + # Wait for it to hit l2's gossip store. + wait_for(lambda: sorted([c['fee_per_millionth'] for c in l1.rpc.listchannels()['channels']]) == [10, 10, 1000, 1001]) + + # We start compaction, but gossip store continues + fut = executor.submit(l1.rpc.call, 'dev-compact-gossip-store') + # Make sure it started! + l1.daemon.wait_for_log('Executing lightning_gossip_compactd') + + # We make another channel, remove old one + l4 = node_factory.get_node() + node_factory.join_nodes([l3, l4], wait_for_announce=True) + scid34 = only_one(l4.rpc.listpeerchannels()['channels'])['short_channel_id'] + wait_for(lambda: len(l1.rpc.listchannels(scid34)['channels']) == 2) + + scid23 = only_one(l2.rpc.listpeerchannels(l3.info['id'])['channels'])['short_channel_id'] + l2.rpc.close(l3.info['id']) + bitcoind.generate_block(13, wait_for_mempool=1) + wait_for(lambda: l1.rpc.listchannels(scid23) == {'channels': []}) + l1.rpc.setchannel(l2.info['id'], 41, 1004) + scid12 = only_one(l1.rpc.listpeerchannels()['channels'])['short_channel_id'] + wait_for(lambda: sorted([c['fee_per_millionth'] for c in l1.rpc.listchannels(scid12)['channels']]) == [10, 1004]) + + pre_channels = l1.rpc.listchannels() + pre_nodes = sorted(l1.rpc.listnodes()['nodes'], key=lambda n: n['nodeid']) + + # Compaction "continues". + with open(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK, 'compactd-continue'), 'wb') as f: + f.write(b'') + + fut.result(TIMEOUT) + l1.daemon.wait_for_logs(['gossipd: compaction done', + 'connectd: Reopened gossip_store, reduced to offset 2245']) + + post_channels = l1.rpc.listchannels() + post_nodes = sorted(l1.rpc.listnodes()['nodes'], key=lambda n: n['nodeid']) + l1.daemon.wait_for_log('topology: Reopened gossip_store, reduced to offset 2245') + + assert post_channels == pre_channels + assert post_nodes == pre_nodes + + +def test_gossip_store_compact_miss_update(node_factory, bitcoind, executor): + """If we compact twice, you should notice the UUID difference.""" + l2 = setup_gossip_store_test(node_factory, bitcoind) + + pre_channels = l2.rpc.listchannels() + + # Now compact store twice. + l2.rpc.call('dev-compact-gossip-store') + l2.rpc.call('dev-compact-gossip-store') + + post_channels = l2.rpc.listchannels() + l2.daemon.wait_for_log('topology: Reopened gossip_store, but we missed some') + assert pre_channels == post_channels + + +def test_gossip_store_compact_restart(node_factory, bitcoind): + l2 = setup_gossip_store_test(node_factory, bitcoind) + + # Should restart ok. + l2.restart() wait_for(lambda: l2.daemon.is_in_log('gossip_store: 9 live records, 2 deleted')) + # Now compact store. + l2.rpc.call('dev-compact-gossip-store') + def test_gossip_store_load_no_channel_update(node_factory): """Make sure we can read truncated gossip store with a channel_announcement and no channel_update""" diff --git a/tools/bench-gossipd.sh b/tools/bench-gossipd.sh index 4bea1552ce22..b98295b58c27 100755 --- a/tools/bench-gossipd.sh +++ b/tools/bench-gossipd.sh @@ -5,7 +5,7 @@ set -e DIR="" TARGETS="" -DEFAULT_TARGETS=" store_load_msec vsz_kb listnodes_sec listchannels_sec routing_sec peer_write_all_sec peer_read_all_sec " +DEFAULT_TARGETS=" store_load_msec vsz_kb store_rewrite_sec listnodes_sec listchannels_sec routing_sec peer_write_all_sec peer_read_all_sec " MCP_DIR=../million-channels-project/data/1M/gossip/ CSV=false @@ -111,6 +111,12 @@ if [ -z "${TARGETS##* vsz_kb *}" ]; then ps -o vsz= -p "$(pidof lightning_gossipd)" | print_stat vsz_kb fi +# How long does rewriting the store take? +if [ -z "${TARGETS##* store_rewrite_sec *}" ]; then + # shellcheck disable=SC2086 + /usr/bin/time --append -f %e $LCLI1 dev-compact-gossip-store 2>&1 > /dev/null | print_stat store_rewrite_sec +fi + # Now, how long does listnodes take? if [ -z "${TARGETS##* listnodes_sec *}" ]; then # shellcheck disable=SC2086