Skip to content

Commit

Permalink
churn: added ChurnCounters tracer
Browse files Browse the repository at this point in the history
  • Loading branch information
coot committed Apr 16, 2024
1 parent bfe543d commit 1ecaf98
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 29 deletions.
Expand Up @@ -657,8 +657,8 @@ tracerTracePeerSelection = contramap f tracerTestTraceEvent
f a@(TraceUseBootstrapPeersChanged !_) = GovernorEvent a
f a@(TraceOutboundGovernorCriticalFailure !_) = GovernorEvent a
f a@(TraceDebugState !_ !_) = GovernorEvent a
f a@(TraceChurnAction !_) = GovernorEvent a
f a@(TraceChurnTimeout !_) = GovernorEvent a
f a@(TraceChurnAction !_ !_) = GovernorEvent a
f a@(TraceChurnTimeout !_ !_) = GovernorEvent a

tracerDebugPeerSelection :: Tracer (IOSim s) (DebugPeerSelection PeerAddr)
tracerDebugPeerSelection = GovernorDebug `contramap` tracerTestTraceEvent
Expand Down
22 changes: 11 additions & 11 deletions ouroboros-network/sim-tests-lib/Test/Ouroboros/Network/Testnet.hs
Expand Up @@ -3471,14 +3471,14 @@ prop_churn_notimeouts diffScript =
<$> events
where
noChurnTimeout :: TracePeerSelection NtNAddr -> Bool
noChurnTimeout (TraceChurnTimeout DecreasedActivePeers) = False
noChurnTimeout (TraceChurnTimeout DecreasedActiveBigLedgerPeers) = False
noChurnTimeout (TraceChurnTimeout DecreasedEstablishedPeers) = False
noChurnTimeout (TraceChurnTimeout DecreasedEstablishedBigLedgerPeers) = False
noChurnTimeout (TraceChurnTimeout DecreasedKnownPeers) = False
noChurnTimeout (TraceChurnTimeout DecreasedKnownBigLedgerPeers) = False
noChurnTimeout TraceChurnTimeout {} = True
noChurnTimeout _ = True
noChurnTimeout (TraceChurnTimeout DecreasedActivePeers _) = False
noChurnTimeout (TraceChurnTimeout DecreasedActiveBigLedgerPeers _) = False
noChurnTimeout (TraceChurnTimeout DecreasedEstablishedPeers _) = False
noChurnTimeout (TraceChurnTimeout DecreasedEstablishedBigLedgerPeers _) = False
noChurnTimeout (TraceChurnTimeout DecreasedKnownPeers _) = False
noChurnTimeout (TraceChurnTimeout DecreasedKnownBigLedgerPeers _) = False
noChurnTimeout TraceChurnTimeout {} = True
noChurnTimeout _ = True


-- | Verify that churn trace consists of repeated list of actions:
Expand Down Expand Up @@ -3526,9 +3526,9 @@ prop_churn_steps bearerInfo diffScript =
in counterexample (intercalate "\n" (show <$> evsList))
. churnTracePredicate
. mapMaybe (\case
(_, TraceChurnAction a) -> Just a
(_, TraceChurnTimeout a) -> Just a
_ -> Nothing)
(_, TraceChurnAction a _) -> Just a
(_, TraceChurnTimeout a _) -> Just a
_ -> Nothing)
$ evsList
)
<$> events
Expand Down
Expand Up @@ -1257,6 +1257,7 @@ diffusionSimulation
. tracerWithTime
$ nodeTracer
, Diff.P2P.dtTracePeerSelectionCounters = nullTracer
, Diff.P2P.dtTraceChurnCounters = nullTracer
, Diff.P2P.dtPeerSelectionActionsTracer = contramap
DiffusionPeerSelectionActionsTrace
. tracerWithName ntnAddr
Expand Down
6 changes: 6 additions & 0 deletions ouroboros-network/src/Ouroboros/Network/Diffusion/P2P.hs
Expand Up @@ -170,6 +170,9 @@ data TracersExtra ntnAddr ntnVersion ntnVersionData
, dtTracePeerSelectionCounters
:: Tracer m PeerSelectionCounters

, dtTraceChurnCounters
:: Tracer m Governor.ChurnCounters

, dtPeerSelectionActionsTracer
:: Tracer m (PeerSelectionActionsTrace ntnAddr ntnVersion)

Expand Down Expand Up @@ -223,6 +226,7 @@ nullTracers =
, dtTracePublicRootPeersTracer = nullTracer
, dtTraceLedgerPeersTracer = nullTracer
, dtTracePeerSelectionTracer = nullTracer
, dtTraceChurnCounters = nullTracer
, dtDebugPeerSelectionInitiatorTracer = nullTracer
, dtDebugPeerSelectionInitiatorResponderTracer = nullTracer
, dtTracePeerSelectionCounters = nullTracer
Expand Down Expand Up @@ -598,6 +602,7 @@ runM Interfaces
}
TracersExtra
{ dtTracePeerSelectionTracer
, dtTraceChurnCounters
, dtDebugPeerSelectionInitiatorTracer
, dtDebugPeerSelectionInitiatorResponderTracer
, dtTracePeerSelectionCounters
Expand Down Expand Up @@ -1010,6 +1015,7 @@ runM Interfaces
--
let peerChurnGovernor' = Governor.peerChurnGovernor
dtTracePeerSelectionTracer
dtTraceChurnCounters
daDeadlineChurnInterval
daBulkChurnInterval
(policyPeerShareOverallTimeout peerSelectionPolicy)
Expand Down
56 changes: 43 additions & 13 deletions ouroboros-network/src/Ouroboros/Network/PeerSelection/Churn.hs
Expand Up @@ -6,7 +6,10 @@

-- | This subsystem manages the discovery and selection of /upstream/ peers.
--
module Ouroboros.Network.PeerSelection.Churn (peerChurnGovernor) where
module Ouroboros.Network.PeerSelection.Churn
( peerChurnGovernor
, ChurnCounters (..)
) where

import Data.Void (Void)

Expand All @@ -31,6 +34,8 @@ import Ouroboros.Network.PeerSelection.PeerMetric
type ModifyPeerSelectionTargets = PeerSelectionTargets -> PeerSelectionTargets
type CheckPeerSelectionCounters = PeerSelectionCounters -> PeerSelectionTargets -> Bool

data ChurnCounters = ChurnCounter ChurnAction Int

-- | Churn governor.
--
-- At every churn interval decrease active peers for a short while (1s), so that
Expand All @@ -46,6 +51,7 @@ peerChurnGovernor :: forall m peeraddr.
, MonadCatch m
)
=> Tracer m (TracePeerSelection peeraddr)
-> Tracer m ChurnCounters
-> DiffTime
-- ^ the base for churn interval in the deadline mode.
-> DiffTime
Expand All @@ -63,7 +69,7 @@ peerChurnGovernor :: forall m peeraddr.
-> STM m PeerSelectionCounters
-> STM m UseBootstrapPeers
-> m Void
peerChurnGovernor tracer
peerChurnGovernor tracer churnTracer
deadlineChurnInterval bulkChurnInterval requestPeersTimeout
_metrics churnModeVar inRng getFetchMode base
peerSelectionVar readCounters
Expand Down Expand Up @@ -100,31 +106,43 @@ peerChurnGovernor tracer
updateTargets
:: ChurnAction
-- ^ churn actions for tracing
-> (PeerSelectionCounters -> Int)
-- ^ counter getter
-> DiffTime
-- ^ timeout
-> ModifyPeerSelectionTargets
-- ^ update counters function
-> CheckPeerSelectionCounters
-- ^ check counters
-> m ()
updateTargets churnAction timeoutDelay modifyTargets checkCounters = do
updateTargets churnAction getCounter timeoutDelay modifyTargets checkCounters = do
-- update targets, and return the new targets
targets <- atomically $ stateTVar peerSelectionVar ((\a -> (a, a)) . modifyTargets)
(c, targets) <- atomically $
(,) <$> (getCounter <$> readCounters)
<*> stateTVar peerSelectionVar ((\a -> (a, a)) . modifyTargets)

-- create timeout and block on counters
bracketOnError (registerDelayCancellable timeoutDelay)
(\(_readTimeout, cancelTimeout) -> cancelTimeout)
(\( readTimeout, cancelTimeout) -> do
-- block until counters reached the targets, or the timeout fires
a <- atomically $ runFirstToFinish $
FirstToFinish ((readCounters>>= check . flip checkCounters targets) $> True)
<>
FirstToFinish (readTimeout >>= \case TimeoutPending -> retry
_ -> pure False)
if a
then cancelTimeout
>> traceWith tracer (TraceChurnAction churnAction)
else traceWith tracer (TraceChurnTimeout churnAction)
a <- atomically $ do
counters <- readCounters
runFirstToFinish $
FirstToFinish (check (checkCounters counters targets) $> (Right $ getCounter counters ))
<>
FirstToFinish (readTimeout >>= \case TimeoutPending -> retry
_ -> pure (Left $ getCounter counters))
case a of
Right c' -> do
let r = c' - c
traceWith tracer (TraceChurnAction churnAction r)
traceWith churnTracer (ChurnCounter churnAction r)
Left c' -> do
cancelTimeout
let r = c' - c
traceWith tracer (TraceChurnTimeout churnAction r)
traceWith churnTracer (ChurnCounter churnAction r)
)

--
Expand Down Expand Up @@ -406,72 +424,84 @@ peerChurnGovernor tracer

-- Purge the worst active peers.
updateTargets DecreasedActivePeers
numberOfActivePeers
deactivateTimeout -- chainsync might timeout after 5mins
(decreaseActivePeers churnMode)
checkActivePeersDecreased

-- Pick new active peers.
updateTargets IncreasedActivePeers
numberOfActivePeers
shortTimeout
(increaseActivePeers churnMode)
checkActivePeersIncreased

-- Purge the worst active big ledger peers.
updateTargets DecreasedActiveBigLedgerPeers
numberOfActiveBigLedgerPeers
deactivateTimeout -- chainsync might timeout after 5mins
(decreaseActiveBigLedgerPeers churnMode)
(checkActiveBigLedgerPeersDecreased)

-- Pick new active big ledger peers.
updateTargets IncreasedActiveBigLedgerPeers
numberOfActiveBigLedgerPeers
shortTimeout
(increaseActiveBigLedgerPeers churnMode)
checkActiveBigLedgerPeersIncreased

-- Forget the worst performing established peers.
updateTargets DecreasedEstablishedPeers
numberOfEstablishedPeers
(1 + closeConnectionTimeout)
(decreaseEstablishedPeers churnMode ubp)
(checkEstablishedPeersDecreased)

-- Forget the worst performing established big ledger peers.
updateTargets DecreasedEstablishedBigLedgerPeers
numberOfEstablishedBigLedgerPeers
(1 + closeConnectionTimeout)
decreaseEstablishedBigLedgerPeers
checkEstablishedBigLedgerPeersDecreased

-- Forget the worst performing known peers (root peers, ledger peers)
updateTargets DecreasedKnownPeers
numberOfKnownPeers
shortTimeout
decreaseKnownPeers
checkKnownPeersDecreased

-- Pick new known peers
updateTargets IncreasedKnownPeers
numberOfKnownPeers
(2 * requestPeersTimeout + shortTimeout)
increaseKnownPeers
checkKnownPeersIncreased

-- Forget the worst performing known big ledger peers.
updateTargets DecreasedKnownBigLedgerPeers
numberOfKnownBigLedgerPeers
shortTimeout
decreaseKnownBigLedgerPeers
checkKnownBigLedgerPeersDecreased

-- Pick new known big ledger peers
updateTargets IncreasedKnownBigLedgerPeers
numberOfKnownBigLedgerPeers
(2 * requestPeersTimeout + shortTimeout)
increaseKnownBigLedgerPeers
checkKnownBigLedgerPeersIncreased

-- Pick new non-active peers
updateTargets IncreasedEstablishedPeers
numberOfEstablishedPeers
churnEstablishConnectionTimeout
(increaseEstablishedPeers churnMode ubp)
checkEstablishedPeersIncreased

-- Pick new non-active big ledger peers
updateTargets IncreasedEstablishedBigLedgerPeers
numberOfEstablishedBigLedgerPeers
churnEstablishConnectionTimeout
increaseEstablishedBigLedgerPeers
checkEstablishedBigLedgerPeersIncreased
Expand Down
Expand Up @@ -22,6 +22,7 @@ module Ouroboros.Network.PeerSelection.Governor
, peerSelectionGovernor
-- * Peer churn governor
, peerChurnGovernor
, ChurnCounters (..)
-- * Internals exported for testing
, assertPeerSelectionState
, sanePeerSelectionTargets
Expand Down Expand Up @@ -52,7 +53,8 @@ import Control.Monad.Class.MonadTimer.SI
import Control.Tracer (Tracer (..), traceWith)
import System.Random

import Ouroboros.Network.PeerSelection.Churn (peerChurnGovernor)
import Ouroboros.Network.PeerSelection.Churn (ChurnCounters (..),
peerChurnGovernor)
import Ouroboros.Network.PeerSelection.Governor.ActivePeers qualified as ActivePeers
import Ouroboros.Network.PeerSelection.Governor.BigLedgerPeers qualified as BigLedgerPeers
import Ouroboros.Network.PeerSelection.Governor.EstablishedPeers qualified as EstablishedPeers
Expand Down
Expand Up @@ -1344,8 +1344,8 @@ data TracePeerSelection peeraddr =

| TraceChurnWait DiffTime
| TraceChurnMode ChurnMode
| TraceChurnAction ChurnAction
| TraceChurnTimeout ChurnAction
| TraceChurnAction ChurnAction Int
| TraceChurnTimeout ChurnAction Int

| TraceLedgerStateJudgementChanged LedgerStateJudgement
| TraceOnlyBootstrapPeers
Expand Down

0 comments on commit 1ecaf98

Please sign in to comment.