Skip to content

Commit

Permalink
lnpeer.reestablish_chan: enforce order of replaying commitsig/revack
Browse files Browse the repository at this point in the history
When replaying messages during channel-reestablishment,
previously we first resent all update messages, along with potential commitment_signed messages,
and then we potentially resent a single revoke_and_ack.

This can result in incorrect behaviour in case both a commitment_signed and a revoke_and_ack needs to be resent.
When replaying messages, the relative order of commitment_signed and revoke_and_messages needs to be preserved.
(the order of updates (htlc/fee) in relation to the revack messages does not matter)

implements lightning/bolts#810

The logic here is somewhat based on what c-lightning does:
https://github.com/ElementsProject/lightning/blob/01e5f1886e31816e652f417a1ff789a26aaeec3b/channeld/channeld.c#L3059
  • Loading branch information
SomberNight committed May 24, 2022
1 parent 7abc7c7 commit c827b6f
Show file tree
Hide file tree
Showing 3 changed files with 182 additions and 24 deletions.
8 changes: 8 additions & 0 deletions electrum/lnhtlc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self, log:'StoredDict', *, initial_feerate=None):
log[LOCAL] = deepcopy(initial)
log[REMOTE] = deepcopy(initial)
log[LOCAL]['unacked_updates'] = {}
log[LOCAL]['was_revoke_last'] = False

# maybe bootstrap fee_updates if initial_feerate was provided
if initial_feerate is not None:
Expand Down Expand Up @@ -155,6 +156,7 @@ def _new_feeupdate(self, fee_update: FeeUpdate, subject: HTLCOwner) -> None:
def send_ctx(self) -> None:
assert self.ctn_latest(REMOTE) == self.ctn_oldest_unrevoked(REMOTE), (self.ctn_latest(REMOTE), self.ctn_oldest_unrevoked(REMOTE))
self._set_revack_pending(REMOTE, True)
self.log[LOCAL]['was_revoke_last'] = False

@with_lock
def recv_ctx(self) -> None:
Expand All @@ -165,6 +167,7 @@ def recv_ctx(self) -> None:
def send_rev(self) -> None:
self.log[LOCAL]['ctn'] += 1
self._set_revack_pending(LOCAL, False)
self.log[LOCAL]['was_revoke_last'] = True
# htlcs
for htlc_id in self._maybe_active_htlc_ids[REMOTE]:
ctns = self.log[REMOTE]['locked_in'][htlc_id]
Expand Down Expand Up @@ -287,6 +290,11 @@ def get_unacked_local_updates(self) -> Dict[int, Sequence[bytes]]:
return {ctn: [bfh(msg) for msg in messages]
for ctn, messages in self.log[LOCAL]['unacked_updates'].items()}

@with_lock
def was_revoke_last(self) -> bool:
"""Whether we sent a revoke_and_ack after the last commitment_signed we sent."""
return self.log[LOCAL].get('was_revoke_last') or False

##### Queries re HTLCs:

def get_htlc_by_id(self, htlc_proposer: HTLCOwner, htlc_id: int) -> UpdateAddHtlc:
Expand Down
54 changes: 35 additions & 19 deletions electrum/lnpeer.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def __init__(
self._htlc_switch_iterstart_event = asyncio.Event()
self._htlc_switch_iterdone_event = asyncio.Event()
self._received_revack_event = asyncio.Event()
self.received_commitsig_event = asyncio.Event()
self.downstream_htlc_resolved_event = asyncio.Event()

def send_message(self, message_name: str, **kwargs):
Expand Down Expand Up @@ -1221,32 +1222,45 @@ async def reestablish_channel(self, chan: Channel):
await fut
we_must_resend_revoke_and_ack, their_next_local_ctn = fut.result()

# Replay un-acked local updates (including commitment_signed) byte-for-byte.
# If we have sent them a commitment signature that they "lost" (due to disconnect),
# we need to make sure we replay the same local updates, as otherwise they could
# end up with two (or more) signed valid commitment transactions at the same ctn.
# Multiple valid ctxs at the same ctn is a major headache for pre-signing spending txns,
# e.g. for watchtowers, hence we must ensure these ctxs coincide.
# We replay the local updates even if they were not yet committed.
unacked = chan.hm.get_unacked_local_updates()
n_replayed_msgs = 0
for ctn, messages in unacked.items():
if ctn < their_next_local_ctn:
# They claim to have received these messages and the corresponding
# commitment_signed, hence we must not replay them.
continue
for raw_upd_msg in messages:
self.transport.send_bytes(raw_upd_msg)
n_replayed_msgs += 1
self.logger.info(f'channel_reestablish ({chan.get_id_for_log()}): replayed {n_replayed_msgs} unacked messages')
if we_must_resend_revoke_and_ack:
def replay_updates_and_commitsig():
# Replay un-acked local updates (including commitment_signed) byte-for-byte.
# If we have sent them a commitment signature that they "lost" (due to disconnect),
# we need to make sure we replay the same local updates, as otherwise they could
# end up with two (or more) signed valid commitment transactions at the same ctn.
# Multiple valid ctxs at the same ctn is a major headache for pre-signing spending txns,
# e.g. for watchtowers, hence we must ensure these ctxs coincide.
# We replay the local updates even if they were not yet committed.
unacked = chan.hm.get_unacked_local_updates()
replayed_msgs = []
for ctn, messages in unacked.items():
if ctn < their_next_local_ctn:
# They claim to have received these messages and the corresponding
# commitment_signed, hence we must not replay them.
continue
for raw_upd_msg in messages:
self.transport.send_bytes(raw_upd_msg)
replayed_msgs.append(raw_upd_msg)
self.logger.info(f'channel_reestablish ({chan.get_id_for_log()}): replayed {len(replayed_msgs)} unacked messages. '
f'{[decode_msg(raw_upd_msg)[0] for raw_upd_msg in replayed_msgs]}')

def resend_revoke_and_ack():
last_secret, last_point = chan.get_secret_and_point(LOCAL, oldest_unrevoked_local_ctn - 1)
next_secret, next_point = chan.get_secret_and_point(LOCAL, oldest_unrevoked_local_ctn + 1)
self.send_message(
"revoke_and_ack",
channel_id=chan.channel_id,
per_commitment_secret=last_secret,
next_per_commitment_point=next_point)

was_revoke_last = chan.hm.was_revoke_last() # preserve relative order of last revack and commitsig
if we_must_resend_revoke_and_ack and not was_revoke_last:
self.logger.info(f'channel_reestablish ({chan.get_id_for_log()}): replaying a revoke_and_ack first.')
resend_revoke_and_ack()
replay_updates_and_commitsig()
if we_must_resend_revoke_and_ack and was_revoke_last:
self.logger.info(f'channel_reestablish ({chan.get_id_for_log()}): replaying a revoke_and_ack last.')
resend_revoke_and_ack()

chan.peer_state = PeerState.GOOD
if chan.is_funded() and their_next_local_ctn == next_local_ctn == 1:
self.send_funding_locked(chan)
Expand Down Expand Up @@ -1478,6 +1492,8 @@ def on_commitment_signed(self, chan: Channel, payload):
htlc_sigs = list(chunks(data, 64))
chan.receive_new_commitment(payload["signature"], htlc_sigs)
self.send_revoke_and_ack(chan)
self.received_commitsig_event.set()
self.received_commitsig_event.clear()

def on_update_fulfill_htlc(self, chan: Channel, payload):
preimage = payload["payment_preimage"]
Expand Down
144 changes: 139 additions & 5 deletions electrum/tests/test_lnpeer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from electrum.logging import console_stderr_handler, Logger
from electrum.lnworker import PaymentInfo, RECEIVED
from electrum.lnonion import OnionFailureCode
from electrum.lnutil import derive_payment_secret_from_payment_preimage
from electrum.lnutil import derive_payment_secret_from_payment_preimage, UpdateAddHtlc
from electrum.lnutil import LOCAL, REMOTE
from electrum.invoices import PR_PAID, PR_UNPAID
from electrum.interface import GracefulDisconnect
Expand Down Expand Up @@ -256,7 +256,7 @@ async def create_routes_from_invoice(self, amount_msat: int, decoded_invoice: Ln

class MockTransport:
def __init__(self, name):
self.queue = asyncio.Queue()
self.queue = asyncio.Queue() # incoming messages
self._name = name
self.peer_addr = None

Expand All @@ -265,7 +265,11 @@ def name(self):

async def read_messages(self):
while True:
yield await self.queue.get()
data = await self.queue.get()
if isinstance(data, asyncio.Event): # to artificially delay messages
await data.wait()
continue
yield data

class NoFeaturesTransport(MockTransport):
"""
Expand Down Expand Up @@ -382,8 +386,14 @@ async def cleanup_lnworkers():

super().tearDown()

def prepare_peers(self, alice_channel: Channel, bob_channel: Channel):
k1, k2 = keypair(), keypair()
def prepare_peers(
self, alice_channel: Channel, bob_channel: Channel,
*, k1: Keypair = None, k2: Keypair = None,
):
if k1 is None:
k1 = keypair()
if k2 is None:
k2 = keypair()
alice_channel.node_id = k2.pubkey
bob_channel.node_id = k1.pubkey
t1, t2 = transport_pair(k1, k2, alice_channel.name, bob_channel.name)
Expand Down Expand Up @@ -557,6 +567,130 @@ async def f():
self.assertEqual(alice_channel_0.peer_state, PeerState.BAD)
self.assertEqual(bob_channel._state, ChannelState.FORCE_CLOSING)

@staticmethod
def _send_fake_htlc(peer: Peer, chan: Channel) -> UpdateAddHtlc:
htlc = UpdateAddHtlc(amount_msat=10000, payment_hash=os.urandom(32), cltv_expiry=999, timestamp=1)
htlc = chan.add_htlc(htlc)
peer.send_message(
"update_add_htlc",
channel_id=chan.channel_id,
id=htlc.htlc_id,
cltv_expiry=htlc.cltv_expiry,
amount_msat=htlc.amount_msat,
payment_hash=htlc.payment_hash,
onion_routing_packet=1366 * b"0",
)
return htlc

def test_reestablish_replay_messages_rev_then_sig(self):
"""
See https://github.com/lightning/bolts/pull/810#issue-728299277
Rev then Sig
A B
<---add-----
----add---->
<---sig-----
----rev----x
----sig----x
A needs to retransmit:
----add--> (note that 'rev' can be first too)
----rev-->
----sig-->
"""
chan_AB, chan_BA = create_test_channels()
k1, k2 = keypair(), keypair()
# note: we don't start peer.htlc_switch() so that the fake htlcs are left alone.
async def f():
p1, p2, w1, w2, _q1, _q2 = self.prepare_peers(chan_AB, chan_BA, k1=k1, k2=k2)
async with OldTaskGroup() as group:
await group.spawn(p1._message_loop())
await group.spawn(p2._message_loop())
await asyncio.sleep(0.1)
self._send_fake_htlc(p2, chan_BA)
self._send_fake_htlc(p1, chan_AB)
p2.transport.queue.put_nowait(asyncio.Event()) # break Bob's incoming pipe
self.assertTrue(p2.maybe_send_commitment(chan_BA))
await p1.received_commitsig_event.wait()
await group.cancel_remaining()
# simulating disconnection. recreate transports.
p1, p2, w1, w2, _q1, _q2 = self.prepare_peers(chan_AB, chan_BA, k1=k1, k2=k2)
for chan in (chan_AB, chan_BA):
chan.peer_state = PeerState.DISCONNECTED
async with OldTaskGroup() as group:
await group.spawn(p1._message_loop())
await group.spawn(p2._message_loop())
await asyncio.sleep(0.1)
with self.assertLogs('electrum', level='INFO') as logs:
async with OldTaskGroup() as group2:
await group2.spawn(p1.reestablish_channel(chan_AB))
await group2.spawn(p2.reestablish_channel(chan_BA))
self.assertTrue(any(("alice->bob" in msg and
"replaying a revoke_and_ack first" in msg) for msg in logs.output))
self.assertTrue(any(("alice->bob" in msg and
"replayed 2 unacked messages. ['update_add_htlc', 'commitment_signed']" in msg) for msg in logs.output))
self.assertEqual(chan_AB.peer_state, PeerState.GOOD)
self.assertEqual(chan_BA.peer_state, PeerState.GOOD)
raise SuccessfulTest()
with self.assertRaises(SuccessfulTest):
run(f())

def test_reestablish_replay_messages_sig_then_rev(self):
"""
See https://github.com/lightning/bolts/pull/810#issue-728299277
Sig then Rev
A B
<---add-----
----add---->
----sig----x
<---sig-----
----rev----x
A needs to retransmit:
----add-->
----sig-->
----rev-->
"""
chan_AB, chan_BA = create_test_channels()
k1, k2 = keypair(), keypair()
# note: we don't start peer.htlc_switch() so that the fake htlcs are left alone.
async def f():
p1, p2, w1, w2, _q1, _q2 = self.prepare_peers(chan_AB, chan_BA, k1=k1, k2=k2)
async with OldTaskGroup() as group:
await group.spawn(p1._message_loop())
await group.spawn(p2._message_loop())
await asyncio.sleep(0.1)
self._send_fake_htlc(p2, chan_BA)
self._send_fake_htlc(p1, chan_AB)
p2.transport.queue.put_nowait(asyncio.Event()) # break Bob's incoming pipe
self.assertTrue(p1.maybe_send_commitment(chan_AB))
self.assertTrue(p2.maybe_send_commitment(chan_BA))
await p1.received_commitsig_event.wait()
await group.cancel_remaining()
# simulating disconnection. recreate transports.
p1, p2, w1, w2, _q1, _q2 = self.prepare_peers(chan_AB, chan_BA, k1=k1, k2=k2)
for chan in (chan_AB, chan_BA):
chan.peer_state = PeerState.DISCONNECTED
async with OldTaskGroup() as group:
await group.spawn(p1._message_loop())
await group.spawn(p2._message_loop())
await asyncio.sleep(0.1)
with self.assertLogs('electrum', level='INFO') as logs:
async with OldTaskGroup() as group2:
await group2.spawn(p1.reestablish_channel(chan_AB))
await group2.spawn(p2.reestablish_channel(chan_BA))
self.assertTrue(any(("alice->bob" in msg and
"replaying a revoke_and_ack last" in msg) for msg in logs.output))
self.assertTrue(any(("alice->bob" in msg and
"replayed 2 unacked messages. ['update_add_htlc', 'commitment_signed']" in msg) for msg in logs.output))
self.assertEqual(chan_AB.peer_state, PeerState.GOOD)
self.assertEqual(chan_BA.peer_state, PeerState.GOOD)
raise SuccessfulTest()
with self.assertRaises(SuccessfulTest):
run(f())

@needs_test_with_all_chacha20_implementations
def test_payment(self):
"""Alice pays Bob a single HTLC via direct channel."""
Expand Down

0 comments on commit c827b6f

Please sign in to comment.