Skip to content

Commit

Permalink
Introduced verification of big ledger peer snapshot file
Browse files Browse the repository at this point in the history
  • Loading branch information
crocodile-dentist committed Jul 15, 2024
1 parent ddbd7f1 commit 5d39777
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 11 deletions.
4 changes: 3 additions & 1 deletion ouroboros-network/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
peers
* Implemented separate configurable peer selection targets for Praos and
Genesis consensus modes. Genesis mode may use more big ledger peers when
a node is syncing up.
* Implemented verification of big ledger peer snapshot when syncing reaches
the point at which the snapshot was taken. An error is raised when there's
a mismatch detected.

## 0.16.1.1 -- 2024-06-28

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ import Ouroboros.Network.PeerSelection.State.KnownPeers qualified as KnownPeers
import Ouroboros.Network.PeerSelection.State.LocalRootPeers (HotValency (..),
LocalRootPeers (..), WarmValency (..))
import Ouroboros.Network.PeerSelection.State.LocalRootPeers qualified as LocalRootPeers
import Ouroboros.Network.Point
import Ouroboros.Network.Protocol.PeerSharing.Type (PeerSharingResult (..))

import Ouroboros.Network.Testing.Data.Script
Expand Down Expand Up @@ -917,6 +918,7 @@ traceNum TraceOutboundGovernorCriticalFailure {} = 53
traceNum TraceDebugState {} = 54
traceNum TraceChurnAction {} = 55
traceNum TraceChurnTimeout {} = 56
traceNum TraceVerifyPeerSnapshot {} = 57

allTraceNames :: Map Int String
allTraceNames =
Expand Down Expand Up @@ -978,6 +980,7 @@ allTraceNames =
, (54, "TraceDebugState")
, (55, "TraceChurnAction")
, (56, "TraceChurnTimeout")
, (57, "TraceVerifyPeerSnapshot")
]


Expand Down Expand Up @@ -3767,6 +3770,8 @@ _governorFindingPublicRoots targetNumberOfRootPeers readDomains readUseBootstrap
transformPeerSelectionAction requestPublicRootPeers }
policy
interfaces
(pure Nothing)
consensusInterface
where
tracer :: Show a => Tracer IO a
tracer = Tracer (BS.putStrLn . BS.pack . show)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ import Ouroboros.Network.ExitPolicy
import Ouroboros.Network.PeerSelection.Governor hiding (PeerSelectionState (..))
import Ouroboros.Network.PeerSelection.Governor qualified as Governor
import Ouroboros.Network.PeerSelection.State.LocalRootPeers qualified as LocalRootPeers
import Ouroboros.Network.Point

import Ouroboros.Network.Testing.Data.Script (PickScript, Script (..),
ScriptDelay (..), TimedScript, arbitraryPickScript,
Expand Down Expand Up @@ -236,6 +237,10 @@ governorAction mockEnv@GovernorMockEnvironment {
countersVar <- StrictTVar.newTVarIO emptyPeerSelectionCounters
policy <- mockPeerSelectionPolicy mockEnv
let initialPeerTargets = fst . NonEmpty.head $ targets'
consensusInterface = LedgerPeersConsensusInterface {
lpGetLatestSlot = pure Origin,
lpGetLedgerStateJudgement = pure TooOld,
lpGetLedgerPeers = pure [] }

actions <-
case consensusMode of
Expand Down Expand Up @@ -294,6 +299,8 @@ governorAction mockEnv@GovernorMockEnvironment {
actions
policy
interfaces
(pure Nothing)
consensusInterface
atomically retry
atomically retry -- block to allow the governor to run

Expand Down Expand Up @@ -757,6 +764,7 @@ tracerTracePeerSelection = contramap f tracerTestTraceEvent
f a@(TraceDebugState !_ !_) = GovernorEvent a
f a@(TraceChurnAction !_ !_ !_) = GovernorEvent a
f a@(TraceChurnTimeout !_ !_ !_) = GovernorEvent a
f a@(TraceVerifyPeerSnapshot !_) = GovernorEvent a

tracerDebugPeerSelection :: Tracer (IOSim s) (DebugPeerSelection PeerAddr)
tracerDebugPeerSelection = GovernorDebug `contramap` tracerTestTraceEvent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1096,7 +1096,8 @@ prop_peer_selection_trace_coverage defaultBearerInfo diffScript =
show a
peerSelectionTraceMap a@TraceChurnTimeout {} =
show a

peerSelectionTraceMap (TraceVerifyPeerSnapshot result) =
"TraceVerifyPeerSnapshot " ++ show result
eventsSeenNames = map peerSelectionTraceMap events

-- TODO: Add checkCoverage here
Expand Down
2 changes: 2 additions & 0 deletions ouroboros-network/src/Ouroboros/Network/Diffusion/P2P.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,8 @@ runM Interfaces
debugStateVar = dbgVar,
readUseLedgerPeers = daReadUseLedgerPeers
}
daReadLedgerPeerSnapshot
daLedgerPeersCtx


--
Expand Down
29 changes: 24 additions & 5 deletions ouroboros-network/src/Ouroboros/Network/PeerSelection/Governor.hs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ import Ouroboros.Network.PeerSelection.Governor.KnownPeers qualified as KnownPee
import Ouroboros.Network.PeerSelection.Governor.Monitor qualified as Monitor
import Ouroboros.Network.PeerSelection.Governor.RootPeers qualified as RootPeers
import Ouroboros.Network.PeerSelection.Governor.Types
import Ouroboros.Network.PeerSelection.LedgerPeers.Type (UseLedgerPeers (..))
import Ouroboros.Network.PeerSelection.LedgerPeers.Type (LedgerPeerSnapshot,
LedgerPeersConsensusInterface, UseLedgerPeers (..))
import Ouroboros.Network.PeerSelection.LocalRootPeers
(OutboundConnectionsState (..))
import Ouroboros.Network.PeerSelection.PeerSharing (PeerSharing (..))
Expand Down Expand Up @@ -479,8 +480,10 @@ peerSelectionGovernor :: ( Alternative (STM m)
-> PeerSelectionActions peeraddr peerconn m
-> PeerSelectionPolicy peeraddr m
-> PeerSelectionInterfaces peeraddr peerconn m
-> STM m (Maybe LedgerPeerSnapshot)
-> LedgerPeersConsensusInterface m
-> m Void
peerSelectionGovernor tracer debugTracer countersTracer fuzzRng consensusMode actions policy interfaces =
peerSelectionGovernor tracer debugTracer countersTracer fuzzRng consensusMode actions policy interfaces readBigLedgerSnapshot ledgerPeersCtx =
JobPool.withJobPool $ \jobPool ->
peerSelectionGovernorLoop
tracer
Expand All @@ -491,6 +494,8 @@ peerSelectionGovernor tracer debugTracer countersTracer fuzzRng consensusMode ac
interfaces
jobPool
(emptyPeerSelectionState fuzzRng consensusMode)
readBigLedgerSnapshot
ledgerPeersCtx

-- | Our pattern here is a loop with two sets of guarded actions:
--
Expand Down Expand Up @@ -525,6 +530,8 @@ peerSelectionGovernorLoop :: forall m peeraddr peerconn.
-> PeerSelectionInterfaces peeraddr peerconn m
-> JobPool () m (Completion m peeraddr peerconn)
-> PeerSelectionState peeraddr peerconn
-> STM m (Maybe LedgerPeerSnapshot)
-> LedgerPeersConsensusInterface m
-> m Void
peerSelectionGovernorLoop tracer
debugTracer
Expand All @@ -537,8 +544,16 @@ peerSelectionGovernorLoop tracer
debugStateVar
}
jobPool
pst = do
loop pst (Time 0) `catch` (\e -> traceWith tracer (TraceOutboundGovernorCriticalFailure e) >> throwIO e)
pst
readBigLedgerSnapshot
ledgerPeersCtx =
handle (\e -> traceWith tracer (TraceOutboundGovernorCriticalFailure e) >> throwIO e) $ do
-- we run the verification job pre-emptively at startup because we may be stuck
-- in the TooOld ledger state before we catch up. Once ledger state turns YoungEnough,
-- this job ends. If for any reason ledger state turns TooOld from YoungEnough, this job
-- is restarted by the monitorLedgerStateJudgement job.
JobPool.forkJob jobPool (Monitor.jobVerifyPeerSnapshot readBigLedgerSnapshot ledgerPeersCtx)
loop pst (Time 0)
where
loop :: PeerSelectionState peeraddr peerconn
-> Time
Expand Down Expand Up @@ -603,6 +618,10 @@ peerSelectionGovernorLoop tracer
-- Trace peer selection
traverse_ (traceWith tracer) decisionTrace

case decisionTrace of
[TraceVerifyPeerSnapshot False] -> throwIO BigLedgerPeerSnapshotError
_otherwise -> pure ()

mapM_ (JobPool.forkJob jobPool) decisionJobs
loop st'' dbgUpdateAt'

Expand Down Expand Up @@ -645,7 +664,7 @@ peerSelectionGovernorLoop tracer
-- Check the definition site for more details, but in short, when the
-- node changes to 'TooOld' state it will go through a purging phase which
-- the 'waitForTheSystemToQuiesce' monitoring action will wait for.
<> Monitor.monitorLedgerStateJudgement actions st
<> Monitor.monitorLedgerStateJudgement actions readBigLedgerSnapshot ledgerPeersCtx st
-- In Praos consensus mode,
-- When the node transitions to 'TooOld' state the node will wait until
-- it reaches a clean (quiesced) state free of non-trusted peers, before
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
module Ouroboros.Network.PeerSelection.Governor.Monitor
( targetPeers
, jobs
, jobVerifyPeerSnapshot
, connections
, localRoots
, monitorLedgerStateJudgement
Expand All @@ -21,11 +22,11 @@ module Ouroboros.Network.PeerSelection.Governor.Monitor

import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Maybe (fromMaybe, isJust)
import Data.Maybe (fromMaybe, isJust, maybeToList)
import Data.Set (Set)
import Data.Set qualified as Set

import Control.Concurrent.JobPool (JobPool)
import Control.Concurrent.JobPool (Job (..), JobPool)
import Control.Concurrent.JobPool qualified as JobPool
import Control.Exception (assert)
import Control.Monad.Class.MonadSTM
Expand All @@ -42,7 +43,9 @@ import Ouroboros.Network.PeerSelection.Governor.ActivePeers
import Ouroboros.Network.PeerSelection.Governor.Types hiding
(PeerSelectionCounters)
import Ouroboros.Network.PeerSelection.LedgerPeers.Type
(LedgerStateJudgement (..))
(LedgerPeerSnapshot (..), LedgerPeersConsensusInterface (..),
LedgerStateJudgement (..))
import Ouroboros.Network.PeerSelection.LedgerPeers.Utils
import Ouroboros.Network.PeerSelection.PeerTrustable (PeerTrustable (..))
import Ouroboros.Network.PeerSelection.PublicRootPeers qualified as PublicRootPeers
import Ouroboros.Network.PeerSelection.State.EstablishedPeers qualified as EstablishedPeers
Expand Down Expand Up @@ -598,9 +601,13 @@ monitorLedgerStateJudgement :: ( MonadSTM m
, Ord peeraddr
)
=> PeerSelectionActions peeraddr peerconn m
-> STM m (Maybe LedgerPeerSnapshot)
-> LedgerPeersConsensusInterface m
-> PeerSelectionState peeraddr peerconn
-> Guarded (STM m) (TimedDecision m peeraddr peerconn)
monitorLedgerStateJudgement PeerSelectionActions{ readLedgerStateJudgement }
readLedgerPeerSnapshot
ledgerPeersCtx
st@PeerSelectionState{ bootstrapPeersFlag,
publicRootPeers,
knownPeers,
Expand All @@ -617,7 +624,10 @@ monitorLedgerStateJudgement PeerSelectionActions{ readLedgerStateJudgement }
return $ \_now ->
Decision {
decisionTrace = [TraceLedgerStateJudgementChanged lsj],
decisionJobs = [],
decisionJobs = case lsj of
TooOld ->
[jobVerifyPeerSnapshot readLedgerPeerSnapshot ledgerPeersCtx]
_otherwise -> [],
decisionState = st {
ledgerStateJudgement = lsj } }

Expand Down Expand Up @@ -744,3 +754,37 @@ waitForSystemToQuiesce st@PeerSelectionState{
}
}
| otherwise = GuardedSkip Nothing

jobVerifyPeerSnapshot :: MonadSTM m
=> STM m (Maybe LedgerPeerSnapshot)
-> LedgerPeersConsensusInterface m
-> Job () m (Completion m peeraddr peerconn)
jobVerifyPeerSnapshot readBigLedgerSnapshot
LedgerPeersConsensusInterface {
lpGetLatestSlot,
lpGetLedgerStateJudgement,
lpGetLedgerPeers }
= Job job (const (completion Nothing Nothing)) () "jobVerifyPeerSnapshot"
where
completion queueJob trace = return . Completion $ \st _now ->
Decision {
decisionTrace = maybeToList trace,
decisionState = st,
decisionJobs = maybeToList queueJob }

job = atomically $ do
lsj <- lpGetLedgerStateJudgement
case lsj of
TooOld -> do
maybeLps <- readBigLedgerSnapshot
(snapshotSlotNo, snapshotPools) <-
case maybeLps of
Just (LedgerPeerSnapshot result) -> pure result
Nothing -> retry

check . (snapshotSlotNo ==) =<< lpGetLatestSlot
result <- (snapshotPools ==) . accumulateBigLedgerStake <$> lpGetLedgerPeers
completion Nothing (Just $ TraceVerifyPeerSnapshot result)

YoungEnough ->
completion Nothing Nothing
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ module Ouroboros.Network.PeerSelection.Governor.Types
, DebugPeerSelection (..)
-- * Error types
, BootstrapPeersCriticalTimeoutError (..)
, BigLedgerPeerSnapshotError (..)
) where

import Data.Map.Strict (Map)
Expand Down Expand Up @@ -1707,6 +1708,7 @@ data TracePeerSelection peeraddr =
| TraceOnlyBootstrapPeers
| TraceBootstrapPeersFlagChangedWhilstInSensitiveState
| TraceUseBootstrapPeersChanged UseBootstrapPeers
| TraceVerifyPeerSnapshot Bool

--
-- Critical Failures
Expand Down Expand Up @@ -1744,6 +1746,14 @@ instance Exception BootstrapPeersCriticalTimeoutError where
displayException BootstrapPeersCriticalTimeoutError =
"The peer selection did not converged to a clean state in 15 minutes. Something is wrong!"

data BigLedgerPeerSnapshotError = BigLedgerPeerSnapshotError
deriving (Eq, Show)

instance Exception BigLedgerPeerSnapshotError where
displayException _ = "Immutable tip has reached the slot matching the big ledger peer"
<> " snapshot file specified in the topology configuration, but the"
<> " data it contains is not consistent with the ledger"

data DebugPeerSelection peeraddr where
TraceGovernorState :: forall peeraddr peerconn.
Show peerconn
Expand Down

0 comments on commit 5d39777

Please sign in to comment.