Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix gossip code when channels are marked dying #7057

Merged
merged 12 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions common/gossip_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ struct gossip_rcvd_filter;
* gossip_hdr -- On-disk format header.
*/
struct gossip_hdr {
beint16_t flags; /* Length of message after header. */
beint16_t len; /* GOSSIP_STORE_xxx_BIT flags. */
beint16_t flags; /* GOSSIP_STORE_xxx_BIT flags. */
beint16_t len; /* Length of message after header. */
beint32_t crc; /* crc of message of timestamp, after header. */
beint32_t timestamp; /* timestamp of msg. */
};
Expand Down
13 changes: 13 additions & 0 deletions common/gossmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1093,6 +1093,19 @@ bool gossmap_chan_is_localmod(const struct gossmap *map,
return c->cann_off >= map->map_size;
}

bool gossmap_chan_is_dying(const struct gossmap *map,
const struct gossmap_chan *c)
{
struct gossip_hdr ghdr;
size_t off;

/* FIXME: put this flag in plus_scid_off instead? */
off = c->cann_off - sizeof(ghdr);
map_copy(map, off, &ghdr, sizeof(ghdr));

return ghdr.flags & CPU_TO_BE16(GOSSIP_STORE_DYING_BIT);
}

bool gossmap_chan_get_capacity(const struct gossmap *map,
const struct gossmap_chan *c,
struct amount_sat *amount)
Expand Down
4 changes: 4 additions & 0 deletions common/gossmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ void gossmap_remove_localmods(struct gossmap *map,
bool gossmap_chan_is_localmod(const struct gossmap *map,
const struct gossmap_chan *c);

/* Is this channel dying? */
bool gossmap_chan_is_dying(const struct gossmap *map,
const struct gossmap_chan *c);

/* Each channel has a unique (low) index. */
u32 gossmap_node_idx(const struct gossmap *map, const struct gossmap_node *node);
u32 gossmap_chan_idx(const struct gossmap *map, const struct gossmap_chan *chan);
Expand Down
6 changes: 4 additions & 2 deletions connectd/gossip_store.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,10 @@ u8 *gossip_store_next(const tal_t *ctx,

/* Skip any timestamp filtered */
timestamp = be32_to_cpu(hdr.timestamp);
if (!timestamp_filter(timestamp_min, timestamp_max,
timestamp)) {
/* Note: channel_announcements without a channel_update yet
* will have 0 timestamp (we don't know). Better to send them. */
if (timestamp &&
!timestamp_filter(timestamp_min, timestamp_max, timestamp)) {
*off += r + msglen;
continue;
}
Expand Down
21 changes: 19 additions & 2 deletions contrib/pyln-testing/pyln/testing/fixtures.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from concurrent import futures
from pyln.testing.db import SqliteDbProvider, PostgresDbProvider
from pyln.testing.utils import NodeFactory, BitcoinD, ElementsD, env, LightningNode, TEST_DEBUG
from pyln.testing.utils import NodeFactory, BitcoinD, ElementsD, env, LightningNode, TEST_DEBUG, TEST_NETWORK
from pyln.client import Millisatoshi
from typing import Dict

Expand All @@ -12,6 +12,7 @@
import re
import shutil
import string
import subprocess
import sys
import tempfile
import time
Expand Down Expand Up @@ -465,16 +466,24 @@ def node_factory(request, directory, test_name, bitcoind, executor, db_provider,
teardown_checks.add_error(e)

def map_node_error(nodes, f, msg):
ret = False
for n in nodes:
if n and f(n):
ret = True
teardown_checks.add_node_error(n, msg.format(n=n))
return ret

map_node_error(nf.nodes, printValgrindErrors, "reported valgrind errors")
map_node_error(nf.nodes, printCrashLog, "had crash.log files")
map_node_error(nf.nodes, lambda n: not n.allow_broken_log and n.daemon.is_in_log(r'\*\*BROKEN\*\*'), "had BROKEN messages")
map_node_error(nf.nodes, lambda n: not n.allow_warning and n.daemon.is_in_log(r' WARNING:'), "had warning messages")
map_node_error(nf.nodes, checkReconnect, "had unexpected reconnections")
map_node_error(nf.nodes, checkBadGossip, "had bad gossip messages")

# On any bad gossip complaints, print out every nodes' gossip_store
if map_node_error(nf.nodes, checkBadGossip, "had bad gossip messages"):
for n in nf.nodes:
dumpGossipStore(n)

map_node_error(nf.nodes, lambda n: n.daemon.is_in_log('Bad reestablish'), "had bad reestablish")
map_node_error(nf.nodes, lambda n: n.daemon.is_in_log('bad hsm request'), "had bad hsm requests")
map_node_error(nf.nodes, lambda n: n.daemon.is_in_log(r'Accessing a null column'), "Accessing a null column")
Expand Down Expand Up @@ -553,6 +562,14 @@ def checkReconnect(node):
return 0


def dumpGossipStore(node):
gs_path = os.path.join(node.daemon.lightning_dir, TEST_NETWORK, 'gossip_store')
gs = subprocess.run(['devtools/dump-gossipstore', '--print-deleted', gs_path],
stdout=subprocess.PIPE)
print("GOSSIP STORE CONTENTS for {}:\n".format(node.daemon.prefix))
print(gs.stdout.decode())


def checkBadGossip(node):
if node.allow_bad_gossip:
return 0
Expand Down
2 changes: 1 addition & 1 deletion devtools/dump-gossipstore.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

/* Current versions we support */
#define GSTORE_MAJOR 0
#define GSTORE_MINOR 12
#define GSTORE_MINOR 14

int main(int argc, char *argv[])
{
Expand Down
67 changes: 38 additions & 29 deletions gossipd/gossip_store.c
Original file line number Diff line number Diff line change
Expand Up @@ -484,10 +484,11 @@ static const u8 *gossip_store_get_with_hdr(const tal_t *ctx,
return msg;
}

static bool check_msg_type(struct gossip_store *gs, u64 offset, int flag, int type)
/* Populates hdr */
static bool check_msg_type(struct gossip_store *gs, u64 offset, int flag, int type,
struct gossip_hdr *hdr)
{
struct gossip_hdr hdr;
const u8 *msg = gossip_store_get_with_hdr(tmpctx, gs, offset, &hdr);
const u8 *msg = gossip_store_get_with_hdr(tmpctx, gs, offset, hdr);

if (fromwire_peektype(msg) == type)
return true;
Expand All @@ -503,38 +504,46 @@ static bool check_msg_type(struct gossip_store *gs, u64 offset, int flag, int ty
u64 gossip_store_set_flag(struct gossip_store *gs,
u64 offset, u16 flag, int type)
{
struct {
beint16_t beflags;
beint16_t belen;
} hdr;
u64 hdr_off = offset - sizeof(struct gossip_hdr);

/* Should never try to overwrite version */
assert(offset > sizeof(struct gossip_hdr));

/* FIXME: debugging a gs->len overrun issue reported in #6270 */
if (pread(gs->fd, &hdr, sizeof(hdr), hdr_off) != sizeof(hdr)) {
status_broken("gossip_store pread fail during flag %u @%"PRIu64" type: %i"
" gs->len: %"PRIu64, flag, hdr_off, type, gs->len);
return offset;
}
if (offset + be16_to_cpu(hdr.belen) > gs->len) {
status_broken("gossip_store overrun during flag-%u @%"PRIu64" type: %i"
" gs->len: %"PRIu64, flag, hdr_off, type, gs->len);
return offset;
}
struct gossip_hdr hdr;

if (!check_msg_type(gs, offset, flag, type))
if (!check_msg_type(gs, offset, flag, type, &hdr))
return offset;

assert((be16_to_cpu(hdr.beflags) & flag) == 0);
hdr.beflags |= cpu_to_be16(flag);
if (pwrite(gs->fd, &hdr.beflags, sizeof(hdr.beflags), hdr_off) != sizeof(hdr.beflags))
assert((be16_to_cpu(hdr.flags) & flag) == 0);
hdr.flags |= cpu_to_be16(flag);
if (pwrite(gs->fd, &hdr, sizeof(hdr), offset - sizeof(hdr)) != sizeof(hdr))
status_failed(STATUS_FAIL_INTERNAL_ERROR,
"Failed writing flags to delete @%"PRIu64": %s",
"Failed writing set flags @%"PRIu64": %s",
offset, strerror(errno));

return offset + be16_to_cpu(hdr.belen) + sizeof(struct gossip_hdr);
return offset + be16_to_cpu(hdr.len) + sizeof(struct gossip_hdr);
}

u16 gossip_store_get_flags(struct gossip_store *gs,
u64 offset, int type)
{
struct gossip_hdr hdr;

if (!check_msg_type(gs, offset, -1, type, &hdr))
return 0;

return be16_to_cpu(hdr.flags);
}

void gossip_store_clear_flag(struct gossip_store *gs,
u64 offset, u16 flag, int type)
{
struct gossip_hdr hdr;

if (!check_msg_type(gs, offset, flag, type, &hdr))
return;

assert(be16_to_cpu(hdr.flags) & flag);
hdr.flags &= ~cpu_to_be16(flag);
if (pwrite(gs->fd, &hdr, sizeof(hdr), offset - sizeof(hdr)) != sizeof(hdr))
status_failed(STATUS_FAIL_INTERNAL_ERROR,
"Failed writing clear flags @%"PRIu64": %s",
offset, strerror(errno));
}

void gossip_store_del(struct gossip_store *gs,
Expand Down
18 changes: 18 additions & 0 deletions gossipd/gossip_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,24 @@ void gossip_store_del(struct gossip_store *gs,
u64 gossip_store_set_flag(struct gossip_store *gs,
u64 offset, u16 flag, int type);

/**
* Clear a flag the record at this offset (offset is that of
* record!). OK if it's not currently set.
*
* In developer mode, checks that type is correct.
*/
void gossip_store_clear_flag(struct gossip_store *gs,
u64 offset, u16 flag, int type);

/**
* Get flags from the record at this offset (offset is that of
* record!).
*
* In developer mode, checks that type is correct.
*/
u16 gossip_store_get_flags(struct gossip_store *gs,
u64 offset, int type);

/**
* Direct store accessor: get timestamp header for a record.
*
Expand Down