Skip to content

Commit

Permalink
churn: implemented explicit synchronisation
Browse files Browse the repository at this point in the history
Chrun now explicitly synchronises with outbound governor using
`PeerSelectionCounters`.  Each churn action can timeout.

Co-authored-by: Armando Santos (@bolt12)
Co-authored-by: Marcin Szamotulski (@coot)
  • Loading branch information
coot committed May 7, 2024
1 parent f29288d commit bc1e4fb
Show file tree
Hide file tree
Showing 10 changed files with 479 additions and 168 deletions.
2 changes: 2 additions & 0 deletions ouroboros-network/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
* Reduce public root retry timer.
* Don't classify a config file with publicRoot/bootstrapPeers IP addresss only
as a DNS error.
* Improved Churn governor by synchronizing according to the counters instead
of relying on `threadDelay`.

## 0.14.0.0 -- 2024-04-04

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,8 @@ traceNum TraceBootstrapPeersFlagChangedWhilstInSensitiveState = 51
traceNum TraceUseBootstrapPeersChanged {} = 52
traceNum TraceOutboundGovernorCriticalFailure {} = 53
traceNum TraceDebugState {} = 54
traceNum TraceChurnAction {} = 55
traceNum TraceChurnTimeout {} = 56

allTraceNames :: Map Int String
allTraceNames =
Expand Down Expand Up @@ -778,7 +780,8 @@ allTraceNames =
, (52, "TraceUseBootstrapPeersChanged")
, (53, "TraceOutboundGovernorCriticalFailure")
, (54, "TraceDebugState")
, (55, "TracePickInboundPeers")
, (55, "TraceChurnAction")
, (56, "TraceChurnTimeout")
]


Expand Down Expand Up @@ -3344,10 +3347,12 @@ _governorFindingPublicRoots targetNumberOfRootPeers readDomains readUseBootstrap
(ioDNSActions LookupReqAAndAAAA) $ \requestPublicRootPeers -> do
publicStateVar <- makePublicPeerSelectionStateVar
debugVar <- newTVarIO $ emptyPeerSelectionState (mkStdGen 42) []
countersVar <- newTVarIO $ emptyPeerSelectionCounters []
peerSelectionGovernor
tracer tracer tracer
-- TODO: #3182 Rng seed should come from quickcheck.
(mkStdGen 42)
countersVar
publicStateVar
debugVar
actions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ governorAction mockEnv = do
usbVar <- playTimedScript (contramap TraceEnvSetUseBootstrapPeers tracerMockEnv)
(useBootstrapPeers mockEnv)
debugVar <- StrictTVar.newTVarIO (emptyPeerSelectionState (mkStdGen 42) [])
countersVar <- StrictTVar.newTVarIO (emptyPeerSelectionCounters [])
policy <- mockPeerSelectionPolicy mockEnv
actions <- mockPeerSelectionActions tracerMockEnv mockEnv (readTVar usbVar) (readTVar lsjVar) policy
exploreRaces -- explore races within the governor
Expand All @@ -222,6 +223,7 @@ governorAction mockEnv = do
tracerDebugPeerSelection
tracerTracePeerSelectionCounters
(mkStdGen 42)
countersVar
publicStateVar
debugVar
actions
Expand Down Expand Up @@ -659,6 +661,8 @@ tracerTracePeerSelection = contramap f tracerTestTraceEvent
f a@(TraceUseBootstrapPeersChanged !_) = GovernorEvent a
f a@(TraceOutboundGovernorCriticalFailure !_) = GovernorEvent a
f a@(TraceDebugState !_ !_) = GovernorEvent a
f a@(TraceChurnAction !_ !_) = GovernorEvent a
f a@(TraceChurnTimeout !_ !_) = GovernorEvent a

tracerDebugPeerSelection :: Tracer (IOSim s) (DebugPeerSelection PeerAddr)
tracerDebugPeerSelection = GovernorDebug `contramap` tracerTestTraceEvent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,10 @@ prop_peer_selection_trace_coverage defaultBearerInfo diffScript =
"TraceOutboundGovernorCriticalFailure"
peerSelectionTraceMap TraceDebugState {} =
"TraceDebugState"
peerSelectionTraceMap TraceChurnAction {} =
"TraceChurnTimeout"
peerSelectionTraceMap TraceChurnTimeout {} =
"TraceChurnTimeout"

eventsSeenNames = map peerSelectionTraceMap events

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ defaultNumBootstrapPeers = DefaultNumBootstrapPeers 30
defaultPeerSelectionTargets :: PeerSelectionTargets
defaultPeerSelectionTargets =
PeerSelectionTargets {
targetNumberOfRootPeers = 85,
targetNumberOfRootPeers = 60,
targetNumberOfKnownPeers = 85,
targetNumberOfEstablishedPeers = 40,
targetNumberOfActivePeers = 15,
Expand Down
7 changes: 6 additions & 1 deletion ouroboros-network/src/Ouroboros/Network/Diffusion/P2P.hs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ import Ouroboros.Network.PeerSelection.Governor.Types
(ChurnMode (ChurnModeNormal), DebugPeerSelection (..),
PeerSelectionActions, PeerSelectionCounters (..),
PeerSelectionPolicy (..), PeerSelectionState,
TracePeerSelection (..), emptyPeerSelectionState)
TracePeerSelection (..), emptyPeerSelectionCounters,
emptyPeerSelectionState)
#ifdef POSIX
import Ouroboros.Network.PeerSelection.Governor.Types
(makeDebugPeerSelectionState)
Expand Down Expand Up @@ -809,6 +810,8 @@ runM Interfaces
min 2 (targetNumberOfActivePeers daPeerSelectionTargets)
}

countersVar <- newTVarIO (emptyPeerSelectionCounters [])

-- Design notes:
-- - We split the following code into two parts:
-- - Part (a): plumb data flow (in particular arguments and tracersr)
Expand Down Expand Up @@ -998,6 +1001,7 @@ runM Interfaces
peerSelectionTracer
dtTracePeerSelectionCounters
fuzzRng
countersVar
daPublicPeerSelectionVar
dbgVar
peerSelectionActions
Expand All @@ -1017,6 +1021,7 @@ runM Interfaces
daBlockFetchMode
daPeerSelectionTargets
peerSelectionTargetsVar
(readTVar countersVar)
daReadUseBootstrapPeers

--
Expand Down
8 changes: 8 additions & 0 deletions ouroboros-network/src/Ouroboros/Network/Diffusion/Policies.hs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ minChainSyncTimeout = 135
maxChainSyncTimeout :: DiffTime
maxChainSyncTimeout = 269

-- | Churn timeouts after 60s trying to establish a connection.
--
-- This doesn't mean the connection is terminated after it, just churns moves
-- on.
--
churnEstablishConnectionTimeout :: DiffTime
churnEstablishConnectionTimeout = 60


-- | Number of events tracked by 'PeerMetrics'. This corresponds to one hour of
-- blocks on mainnet.
Expand Down

0 comments on commit bc1e4fb

Please sign in to comment.