diff --git a/ouroboros-consensus/src/Ouroboros/Consensus/BlockchainTime/WallClock.hs b/ouroboros-consensus/src/Ouroboros/Consensus/BlockchainTime/WallClock.hs index ffd9aa9bd5e..3eb95664af3 100644 --- a/ouroboros-consensus/src/Ouroboros/Consensus/BlockchainTime/WallClock.hs +++ b/ouroboros-consensus/src/Ouroboros/Consensus/BlockchainTime/WallClock.hs @@ -1,21 +1,26 @@ +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} module Ouroboros.Consensus.BlockchainTime.WallClock ( realBlockchainTime , TraceBlockchainTimeEvent(..) + , SystemClockMovedBackException(..) -- * Low-level API (exported primarily for testing) , getWallClockSlot , waitUntilNextSlot , nominalDelay ) where +import Control.Exception (Exception) import Control.Monad import Data.Time (NominalDiffTime, diffUTCTime) import Data.Void import Control.Tracer (Tracer, traceWith) +import Control.Monad.Class.MonadThrow + import Ouroboros.Network.Block (SlotNo) import Ouroboros.Consensus.BlockchainTime.API @@ -52,28 +57,36 @@ realBlockchainTime registry tracer start ls = do threadDelay (nominalDelay delay) -- Fork thread that continuously updates the current slot - first <- fst <$> getWallClockSlot start lsVar - slot <- newTVarM first - void $ forkLinkedThread registry $ loop lsVar slot + first <- fst <$> getWallClockSlot start lsVar + slotVar <- newTVarM first + void $ forkLinkedThread registry $ loop lsVar slotVar first -- The API is now a simple STM one return BlockchainTime { - getCurrentSlot = readTVar slot + getCurrentSlot = readTVar slotVar , onSlotChange_ = fmap cancelThread . - onEachChange registry id (Just first) (readTVar slot) + onEachChange registry id (Just first) (readTVar slotVar) } where -- In each iteration of the loop, we recompute how long to wait until -- the next slot. This minimizes clock skew. - loop :: StrictTVar m FocusedSlotLengths -> StrictTVar m SlotNo -> m Void - loop lsVar slot = forever $ do - next <- waitUntilNextSlot start lsVar - atomically $ writeTVar slot next + loop :: StrictTVar m FocusedSlotLengths + -> StrictTVar m SlotNo + -> SlotNo + -> m Void + loop lsVar slotVar = go + where + go :: SlotNo -> m Void + go current = do + next <- waitUntilNextSlot start lsVar current + atomically $ writeTVar slotVar next + go next {------------------------------------------------------------------------------- Stateful wrappers around Ouroboros.Consensus.BlockchainTime.SlotLengths -------------------------------------------------------------------------------} +-- | Get current slot and time spent in that slot getWallClockSlot :: IOLike m => SystemStart -> StrictTVar m FocusedSlotLengths @@ -82,16 +95,57 @@ getWallClockSlot start lsVar = do now <- getCurrentTime atomically $ updateTVar lsVar $ slotFromUTCTime start now +-- | Wait until the next slot +-- +-- Takes the current slot number to guard against system clock changes. Any +-- clock changes that would result in the slot number to /decrease/ will result +-- in a fatal 'SystemClockMovedBackException'. When this exception is thrown, +-- the node will shut down, and should be restarted with (full?) validation +-- enabled: it is conceivable that blocks got moved to the immutable DB that, +-- due to the clock change, should not be considered immutable anymore. waitUntilNextSlot :: IOLike m => SystemStart -> StrictTVar m FocusedSlotLengths + -> SlotNo -- ^ Current slot number -> m SlotNo -waitUntilNextSlot start lsVar = do - now <- getCurrentTime - (delay, nextSlot) <- atomically $ updateTVar lsVar $ +waitUntilNextSlot start lsVar oldCurrent = do + now <- getCurrentTime + (delay, _nextSlot) <- atomically $ updateTVar lsVar $ delayUntilNextSlot start now threadDelay (nominalDelay delay) - return nextSlot + + -- At this point we expect to be in 'nextSlot', but the actual now-current + -- slot might be different: + -- + -- o If the system is under heavy load, we might have missed some slots. If + -- this is the case, that's okay, and we just report the actual + -- now-current slot as the next slot. + -- o If the system clock is adjusted back a tiny bit (maybe due to an NTP + -- client running on the system), it's possible that we are still in the + -- /old/ current slot. If this happens, we just wait again; nothing bad + -- has happened, we just stay in one slot for longer. + -- o If the system clock is adjusted back more than that, we might be in + -- a slot number /before/ the old current slot. In that case, we throw + -- an exception (see discussion above). + + (newCurrent, _timeInNewCurrent) <- getWallClockSlot start lsVar + + if | newCurrent > oldCurrent -> + return newCurrent + | newCurrent == oldCurrent -> + waitUntilNextSlot start lsVar oldCurrent + | otherwise -> + throwM $ SystemClockMovedBack now oldCurrent newCurrent + +data SystemClockMovedBackException = + -- | The system clock got moved back so far that the slot number decreased + -- + -- We record the time at which we discovered the clock change, the slot + -- number before the clock change, and the slot number after the change. + SystemClockMovedBack UTCTime SlotNo SlotNo + deriving (Show) + +instance Exception SystemClockMovedBackException {------------------------------------------------------------------------------- Auxiliary: conversions diff --git a/ouroboros-consensus/test-consensus/Test/Consensus/BlockchainTime/WallClock.hs b/ouroboros-consensus/test-consensus/Test/Consensus/BlockchainTime/WallClock.hs index 0376fbd3801..c3fb64e46d8 100644 --- a/ouroboros-consensus/test-consensus/Test/Consensus/BlockchainTime/WallClock.hs +++ b/ouroboros-consensus/test-consensus/Test/Consensus/BlockchainTime/WallClock.hs @@ -46,7 +46,7 @@ instance Arbitrary TestDelayIO where -- | Just as a sanity check, also run the tests in IO -- --- We override the maxinum number of tests since there are slow. +-- We override the maximum number of tests since there are slow. -- -- NOTE: If the system is under very heavy load, this test /could/ fail: -- the slot number after the delay could be later than the one we expect. @@ -61,7 +61,7 @@ prop_delayNextSlot TestDelayIO{..} = tdioStart <- pickSystemStart lsVar <- mkLsVar atStart <- fst <$> getWallClockSlot tdioStart lsVar - nextSlot <- waitUntilNextSlot tdioStart lsVar + nextSlot <- waitUntilNextSlot tdioStart lsVar atStart afterDelay <- fst <$> getWallClockSlot tdioStart lsVar assertEqual "atStart + 1" (atStart + 1) afterDelay assertEqual "nextSlot" nextSlot afterDelay