From 4c64c5589678820e15607fdfbfd69bf4128e08fc Mon Sep 17 00:00:00 2001 From: Matee ullah Malik Date: Tue, 26 May 2026 11:51:15 +0000 Subject: [PATCH] fix(p2p/kademlia): guard handlePing against nil/malformed Sender MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A peer sending a Ping with nil or structurally-invalid Sender (e.g. nil *Node, or *Node with nil ID) caused gob.Encode of the response to walk a nil pointer and SIGSEGV inside encoding/gob.encUint8Array. Because the per-conn goroutine spawned at serve() L595 had no upstream recover() and handlePing — unlike handleFindNode — lacked its own defer s.handlePanic(), the entire supernode process died. Observed in production on lumera-devnet-1 val4 (goroutine 2067, repanicked). Fix is two layers, both required: 1. handlePing: nil-guard message/Sender/Sender.ID, sanitise the peer- supplied Node before reflecting it back on the wire (do not echo attacker-controlled HashedID), and wrap the function in a deferred recover() that converts any future panic in this path to an error return rather than process death. 2. serve loop: wrap the bare 'go s.handleConn(ctx, conn)' in a top- level recover() so any future request-handler that forgets its own defer s.handlePanic() cannot crash the process either. Adds handle_ping_test.go covering nil message, nil Sender, empty Sender ID, and a panic-recovery smoke test (constructs a *Network without a *DHT and asserts handlePing returns an error rather than panicking to the caller). Risk: low. Behaviour change only on inputs that previously crashed the process; any well-formed Ping is unaffected. Sanitisation drops the peer-supplied HashedID and recomputes it locally via SetHashedID, matching how dht.go does it everywhere else. Refs: ops RCA at /root/lumera/kademlia-panic-rca.md --- p2p/kademlia/handle_ping_test.go | 87 ++++++++++++++++++++++++++++++++ p2p/kademlia/network.go | 56 +++++++++++++++++--- 2 files changed, 137 insertions(+), 6 deletions(-) create mode 100644 p2p/kademlia/handle_ping_test.go diff --git a/p2p/kademlia/handle_ping_test.go b/p2p/kademlia/handle_ping_test.go new file mode 100644 index 00000000..99b9a1a5 --- /dev/null +++ b/p2p/kademlia/handle_ping_test.go @@ -0,0 +1,87 @@ +package kademlia + +import ( + "context" + "testing" +) + +// TestHandlePing_NilMessage ensures the request path rejects a nil message +// without dereferencing it. Regression for the kademlia handlePing nil-sender +// panic that crashed lumera-devnet-1 val4's supernode process (RCA: a peer +// sending a Ping with nil Sender caused gob.Encode to walk a nil *Node and +// SIGSEGV, killing the entire process because no upstream recover() existed). +func TestHandlePing_NilMessage(t *testing.T) { + s := &Network{} + res, err := s.handlePing(context.Background(), nil) + if err == nil { + t.Fatalf("nil message must return error; got res=%v err=nil", res) + } + if res != nil { + t.Fatalf("nil message must return nil response; got %v", res) + } +} + +// TestHandlePing_NilSender ensures the request path rejects a Message with no +// Sender, before invoking dht.newMessage (which would deref the nil pointer). +func TestHandlePing_NilSender(t *testing.T) { + s := &Network{} + res, err := s.handlePing(context.Background(), &Message{MessageType: Ping}) + if err == nil { + t.Fatalf("nil sender must return error; got res=%v err=nil", res) + } + if res != nil { + t.Fatalf("nil sender must return nil response; got %v", res) + } +} + +// TestHandlePing_EmptySenderID ensures a Sender with empty ID is rejected. +// gob would happily encode an empty []byte but the encoded Receiver would be +// a self-reference with no identity — useless and a sanitisation gap. +func TestHandlePing_EmptySenderID(t *testing.T) { + s := &Network{} + cases := []struct { + name string + id []byte + }{ + {"nil-id", nil}, + {"empty-slice-id", []byte{}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + msg := &Message{MessageType: Ping, Sender: &Node{ID: tc.id, IP: "1.2.3.4", Port: 4445}} + res, err := s.handlePing(context.Background(), msg) + if err == nil { + t.Fatalf("empty sender ID must return error; got res=%v err=nil", res) + } + if res != nil { + t.Fatalf("empty sender ID must return nil response; got %v", res) + } + }) + } +} + +// TestHandlePing_PanicRecovered constructs a Message that — once it reaches +// dht.newMessage — would panic because the *Network has no DHT wired up. +// The deferred recover() in handlePing must turn that panic into an error +// return, NOT crash the test (which would crash the supernode in production). +// +// We satisfy the nil-guard with a valid Sender so the panic comes from the +// dht.newMessage call (nil *DHT receiver). This is the defence-in-depth +// layer of the fix: even if a future code path reintroduces a nil deref, +// the request handler must not kill the process. +func TestHandlePing_PanicRecovered(t *testing.T) { + s := &Network{} // s.dht == nil → newMessage(...) will panic on receiver deref + msg := &Message{ + MessageType: Ping, + Sender: &Node{ID: []byte("peer-id-32-bytes-or-whatever-fits"), IP: "1.2.3.4", Port: 4445}, + } + defer func() { + if r := recover(); r != nil { + t.Fatalf("handlePing leaked a panic to caller: %v", r) + } + }() + res, err := s.handlePing(context.Background(), msg) + if err == nil { + t.Fatalf("expected error from recovered panic; got res=%v err=nil", res) + } +} diff --git a/p2p/kademlia/network.go b/p2p/kademlia/network.go index 58aec7d3..4e159b97 100644 --- a/p2p/kademlia/network.go +++ b/p2p/kademlia/network.go @@ -353,11 +353,40 @@ func (s *Network) handleReplicateRequest(ctx context.Context, req *ReplicateData return nil } -func (s *Network) handlePing(ctx context.Context, message *Message) ([]byte, error) { - // new a response message - resMsg := s.dht.newMessage(Ping, message.Sender, nil) +func (s *Network) handlePing(ctx context.Context, message *Message) (res []byte, err error) { + // Recover any panic in the request path so a single malformed Ping cannot + // crash the supernode process. See SuperNode RCA (kademlia handlePing nil + // sender panic): without this guard, gob.Encode walks a peer-supplied nil + // *Node and SIGSEGVs inside encUint8Array, killing the goroutine and (since + // there is no upstream recover()) the entire process. + defer func() { + if r := recover(); r != nil { + logtrace.Error(ctx, "handlePing panic recovered", logtrace.Fields{ + logtrace.FieldModule: "p2p", + "panic": fmt.Sprintf("%v", r), + }) + res, err = nil, errors.New("handlePing: recovered panic") + } + }() - go s.dht.addNode(context.Background(), message.Sender) + if message == nil || message.Sender == nil || len(message.Sender.ID) == 0 { + return nil, errors.New("handlePing: invalid sender") + } + + // Sanitize: build our own *Node from the peer-supplied fields so we do not + // reflect attacker-controlled HashedID (or any other future field) back on + // the wire when we encode the response. + sender := &Node{ + ID: message.Sender.ID, + IP: message.Sender.IP, + Port: message.Sender.Port, + Version: message.Sender.Version, + } + sender.SetHashedID() + + resMsg := s.dht.newMessage(Ping, sender, nil) + + go s.dht.addNode(context.Background(), sender) return s.encodeMesage(resMsg) } @@ -591,8 +620,23 @@ func (s *Network) serve(ctx context.Context) { return } - // handle the connection requests - go s.handleConn(ctx, conn) + // handle the connection requests with a top-level recover so a single + // malformed peer cannot crash the supernode process. Per-handler + // recovers (e.g. handlePing's, handleFindNode's) catch most cases, but + // this outer guard is defense-in-depth for any future code path that + // forgets to install its own. + go func(c net.Conn) { + defer func() { + if r := recover(); r != nil { + logtrace.Error(ctx, "handleConn panic recovered", logtrace.Fields{ + logtrace.FieldModule: "p2p", + "panic": fmt.Sprintf("%v", r), + }) + _ = c.Close() + } + }() + s.handleConn(ctx, c) + }(conn) } }