From 9478f2a37934c3d5e7b2baee3929deb496fbe760 Mon Sep 17 00:00:00 2001 From: Armando Santos Date: Fri, 8 Oct 2021 13:02:07 +0100 Subject: [PATCH] Add test to check order of ConnectionManager trans - In unregisterInboundConnectionImpl: OutboundDupState Ticking -> OutboundDupState Expired - In withConnectionManager cleanup, where we put the state in TerminatedState is missing a trace. However, since this can be async the connState can come out of order and/or not making much sense. - In requestOutboundConnectionImpl bracketOnError: TerminatedState -> Unknown - In includeInboundConnectionImpl: on readPromise Left handleError case, where we put the state in either TerminatingState or TerminatedState - In unregisterInboundConnectionImpl: Only trace TerminatingState -> TerminatedState, after having written to the connVar - In a possible race between withConnectionManager finally block and forkConnectionHandler cleanup function, where the latter can run first and break an assumption made by withConnectionManager that it is the first to run so a connection shouldn't be in TerminatingState (which is not the case, since there is a race condition). So an out of order and possibly not making much sense transition is logged. - When accept call returns first than connect and the connVar gets overwritten. In requestOutboundConnectionImpl cleanup function we wrongly trace * -> TerminatedState transition of a removed connVar, we shouldn't care about that connVar anymore. - In promotedToWarmRemoteImpl, in OutboundIdleState case we are not updating the state correctly. --- .../Network/ConnectionManager/Core.hs | 360 ++++++++++++------ .../test/Test/Ouroboros/Network/Server2.hs | 56 +++ 2 files changed, 306 insertions(+), 110 deletions(-) diff --git a/ouroboros-network-framework/src/Ouroboros/Network/ConnectionManager/Core.hs b/ouroboros-network-framework/src/Ouroboros/Network/ConnectionManager/Core.hs index dfe527ff7e4..a5868331b39 100644 --- a/ouroboros-network-framework/src/Ouroboros/Network/ConnectionManager/Core.hs +++ b/ouroboros-network-framework/src/Ouroboros/Network/ConnectionManager/Core.hs @@ -34,13 +34,13 @@ import Control.Monad.Class.MonadSTM.Strict import qualified Control.Monad.Class.MonadSTM as LazySTM import Control.Tracer (Tracer, traceWith, contramap) import Data.Foldable (traverse_, foldMap') -import Data.Functor (($>)) +import Data.Functor (($>), void) import Data.Maybe (maybeToList) import Data.Proxy (Proxy (..)) import Data.Typeable (Typeable) import GHC.Stack (CallStack, HasCallStack, callStack) -import Data.Map (Map) +import Data.Map (Map, traverseWithKey) import qualified Data.Map as Map import qualified Data.Set as Set @@ -568,19 +568,37 @@ withConnectionManager ConnectionManagerArguments { k connectionManager `finally` do traceWith tracer TrShutdown + state <- atomically $ readTMVar stateVar - traverse_ - (\connVar -> do + void $ traverseWithKey + (\peerAddr connVar -> do -- cleanup handler for that thread will close socket associated -- with the thread. We put each connection in 'TerminatedState' to - -- guarantee, that non of the connection threads will enter + -- try that none of the connection threads will enter -- 'TerminatingState' (and thus delay shutdown for 'tcp_WAIT_TIME' - -- seconds) when receiving the 'AsyncCancelled' exception. - connState <- atomically $ do + -- seconds) when receiving the 'AsyncCancelled' exception. However this code + -- races with the one in 'forkConnectionHandler' and in the case that this + -- finally block loses the race the AsyncCancelled received should unblock + -- the threadDelay. + -- + -- TODO: Check if this block of code had only AsyncCancelled exceptions in + -- mind, and if given that when an AsyncCancelled is received threadDelay + -- will be unblocked; if it is really necessary the extra effort of putting + -- connections in TerminatedState. + (connState, tr, shouldTrace) <- atomically $ do connState <- readTVar connVar - writeTVar connVar (TerminatedState Nothing) - return connState - traverse_ cancel (getConnThread connState) ) + let connState' = TerminatedState Nothing + tr = TransitionTrace peerAddr (mkTransition connState connState') + absConnState = abstractState (Known connState) + shouldTrace = absConnState /= TerminatedSt + + writeTVar connVar connState' + return (connState, tr, shouldTrace) + + when shouldTrace $ + traceWith trTracer tr + traverse_ cancel (getConnThread connState) + ) state where traceCounters :: StrictTMVar m (ConnectionManagerState peerAddr handle handleError version m) -> m () @@ -648,34 +666,35 @@ withConnectionManager ConnectionManagerArguments { connState <- readTVar connVar let connState' = TerminatedState Nothing transition = mkTransition connState connState' + transitionTrace = TransitionTrace peerAddr transition case connState of ReservedOutboundState -> do writeTVar connVar connState' - return $ There (Just transition) + return $ There (Just transitionTrace) UnnegotiatedState {} -> do writeTVar connVar connState' - return $ There (Just transition) + return $ There (Just transitionTrace) OutboundUniState {} -> do writeTVar connVar connState' - return $ There (Just transition) + return $ There (Just transitionTrace) OutboundDupState {} -> do writeTVar connVar connState' - return $ There (Just transition) + return $ There (Just transitionTrace) OutboundIdleState {} -> do writeTVar connVar connState' - return $ There (Just transition) + return $ There (Just transitionTrace) InboundIdleState {} -> do writeTVar connVar connState' - return $ There (Just transition) + return $ There (Just transitionTrace) InboundState {} -> do writeTVar connVar connState' - return $ There (Just transition) + return $ There (Just transitionTrace) DuplexState {} -> do writeTVar connVar connState' - return $ There (Just transition) + return $ There (Just transitionTrace) TerminatingState {} -> do return $ Here (connVar, transition) - TerminatedState {} -> + TerminatedState {} -> do return $ There Nothing case wConnVar of @@ -685,8 +704,7 @@ withConnectionManager ConnectionManagerArguments { , Left Unknown ) There mbTransition -> do - traverse_ (traceWith trTracer . TransitionTrace peerAddr) - mbTransition + traverse_ (traceWith trTracer) mbTransition close cmSnocket socket return ( Map.delete peerAddr state , Left (Known (TerminatedState Nothing)) @@ -725,8 +743,10 @@ withConnectionManager ConnectionManagerArguments { -- - `Terminate: TerminatingState → TerminatedState` transition. traceWith tracer (TrConnectionTimeWaitDone connId) - -- TerminatingState -> TerminatedState transition trs <- atomically $ do + connState <- readTVar connVar + let transition' = transition { fromState = Known connState } + shouldTrace = abstractState (Known connState) /= TerminatedSt writeTVar connVar (TerminatedState Nothing) -- We have to be careful when deleting it from -- 'ConnectionManagerState'. @@ -746,12 +766,19 @@ withConnectionManager ConnectionManagerArguments { then -- Key was present in the dictionary (stateVar) and -- removed so we trace the removal. - return [ transition - , Transition - { fromState = Known (TerminatedState Nothing) - , toState = Unknown - } - ] + return $ + if shouldTrace + then [ transition' + , Transition + { fromState = Known (TerminatedState Nothing) + , toState = Unknown + } + ] + else [ Transition + { fromState = Known (TerminatedState Nothing) + , toState = Unknown + } + ] -- Key was not present in the dictionary (stateVar), -- so we do not trace anything as it was already traced upon -- deletion. @@ -761,7 +788,7 @@ withConnectionManager ConnectionManagerArguments { -- Key was overwritten in the dictionary (stateVar), -- so we do not trace anything as it was already traced upon -- overwritting. - else return [ transition ] + else return [ ] traverse_ (traceWith trTracer . TransitionTrace peerAddr) trs traceCounters stateVar @@ -850,16 +877,69 @@ withConnectionManager ConnectionManagerArguments { res <- atomically $ readPromise reader case res of Left handleError -> do - atomically $ do - writeTVar connVar $ - case classifyHandleError handleError of - HandshakeFailure -> - TerminatingState connId connThread - (Just handleError) - HandshakeProtocolViolation -> - TerminatedState (Just handleError) - modifyTMVarPure_ stateVar (Map.delete peerAddr) + transitions <- atomically $ do + connState <- readTVar connVar + + let connState' = + case classifyHandleError handleError of + HandshakeFailure -> + TerminatingState connId connThread + (Just handleError) + HandshakeProtocolViolation -> + TerminatedState (Just handleError) + transition = mkTransition connState connState' + absConnState = abstractState (Known connState) + shouldTrace = absConnState /= TerminatedSt + + -- 'handleError' might be either a handshake negotiation + -- a protocol failure (an IO exception, a timeout or + -- codec failure). In the first case we should not reset + -- the connection as this is not a protocol error. + writeTVar connVar connState' + + updated <- + modifyTMVarPure + stateVar + ( \state -> + case Map.lookup peerAddr state of + Nothing -> (state, False) + Just v -> + if eqTVar (Proxy :: Proxy m) connVar v + then (Map.delete peerAddr state , True) + else (state , False) + ) + + if updated + then + -- Key was present in the dictionary (stateVar) and + -- removed so we trace the removal. + return $ + if shouldTrace + then [ transition + , Transition + { fromState = Known (TerminatedState Nothing) + , toState = Unknown + } + ] + else [ Transition + { fromState = Known (TerminatedState Nothing) + , toState = Unknown + } + ] + -- Key was not present in the dictionary (stateVar), + -- so we do not trace anything as it was already traced upon + -- deletion. + -- + -- OR + -- + -- Key was overwritten in the dictionary (stateVar), + -- so we do not trace anything as it was already traced upon + -- overwritting. + else return [ ] + + traverse_ (traceWith trTracer . TransitionTrace peerAddr) transitions traceCounters stateVar + return (Disconnected connId (Just handleError)) Right (handle, version) -> do @@ -980,9 +1060,10 @@ withConnectionManager ConnectionManagerArguments { -- → OutboundState Duplex -- @ OutboundDupState connId connThread handle Ticking -> do - writeTVar connVar (OutboundDupState connId connThread handle Expired) + let connState' = OutboundDupState connId connThread handle Expired + writeTVar connVar connState' return ( Nothing - , Nothing + , Just (mkTransition connState connState') , OperationSuccess KeepTr ) OutboundDupState _connId _connThread _handle Expired -> assert False $ @@ -1235,51 +1316,77 @@ withConnectionManager ConnectionManagerArguments { (socket, connId) <- unmask $ bracketOnError (openToConnect cmSnocket peerAddr) - -- we use 'uninterruptibleMask_' since 'modifyTMVarPure_' - -- can block. (\socket -> uninterruptibleMask_ $ do close cmSnocket socket - tr <- atomically $ do - connState <- readTVar connVar - let connState' = TerminatedState Nothing - writeTVar connVar connState' - modifyTMVarPure_ stateVar $ - (Map.update (\connVar' -> + res <- atomically $ do + state <- takeTMVar stateVar + + case Map.lookup peerAddr state of + -- Lookup failed, which means connection was already removed. + -- So we just update the connVar and trace accordingly. + Nothing -> do + connState <- readTVar connVar + let connState' = TerminatedState Nothing + writeTVar connVar connState' + putTMVar stateVar state + return $ + Right ( mkTransition connState connState' + , Transition (Known connState') Unknown) + + -- Current connVar. + Just connVar' -> + -- If accept call returned first than connect then the + -- connVar will be replaced. If it was replaced then we do + -- not need to do anything. Otherwise, we need to remove + -- the connVar from the state and trace accordingly. if eqTVar (Proxy :: Proxy m) connVar' connVar - then Nothing - else Just connVar') - peerAddr) - return (mkTransition connState connState') + then do + connState <- readTVar connVar + let state' = Map.delete peerAddr state + connState' = TerminatedState Nothing + writeTVar connVar connState' + putTMVar stateVar state' + return $ + Right ( mkTransition connState connState' + , Transition (Known connState') Unknown) + else do + putTMVar stateVar state + return $ + Left () + + case res of + Left _ -> pure () + Right (tr, tr') -> do + traceWith trTracer (TransitionTrace peerAddr tr) + traceWith trTracer (TransitionTrace peerAddr tr') traceCounters stateVar - traceWith trTracer (TransitionTrace peerAddr tr) - ) - $ \socket -> do - traceWith tracer (TrConnectionNotFound provenance peerAddr) - addr <- - case cmAddressType peerAddr of - Nothing -> pure Nothing - Just IPv4Address -> - traverse_ (bind cmSnocket socket) - cmIPv4Address - $> cmIPv4Address - Just IPv6Address -> - traverse_ (bind cmSnocket socket) - cmIPv6Address - $> cmIPv6Address - - traceWith tracer (TrConnect addr peerAddr) - connect cmSnocket socket peerAddr - `catch` \e -> do - traceWith tracer (TrConnectError addr peerAddr e) - -- the handler attached by `bracketOnError` will - -- reset the state - throwIO e - localAddress <- getLocalAddr cmSnocket socket - let connId = ConnectionId { localAddress - , remoteAddress = peerAddr - } - return (socket, connId) + $ \socket -> do + traceWith tracer (TrConnectionNotFound provenance peerAddr) + addr <- + case cmAddressType peerAddr of + Nothing -> pure Nothing + Just IPv4Address -> + traverse_ (bind cmSnocket socket) + cmIPv4Address + $> cmIPv4Address + Just IPv6Address -> + traverse_ (bind cmSnocket socket) + cmIPv6Address + $> cmIPv6Address + + traceWith tracer (TrConnect addr peerAddr) + connect cmSnocket socket peerAddr + `catch` \e -> do + traceWith tracer (TrConnectError addr peerAddr e) + -- the handler attached by `bracketOnError` will + -- reset the state + throwIO e + localAddress <- getLocalAddr cmSnocket socket + let connId = ConnectionId { localAddress + , remoteAddress = peerAddr + } + return (socket, connId) -- -- fork connection handler; it will unmask exceptions @@ -1309,28 +1416,71 @@ withConnectionManager ConnectionManagerArguments { res <- atomically (readPromise reader) case res of Left handleError -> do - modifyTMVar stateVar $ \state -> do + transitions <- atomically $ do + connState <- readTVar connVar + + let connState' = + case classifyHandleError handleError of + HandshakeFailure -> + TerminatingState connId connThread + (Just handleError) + HandshakeProtocolViolation -> + TerminatedState (Just handleError) + transition = mkTransition connState connState' + absConnState = abstractState (Known connState) + shouldTrace = absConnState /= TerminatedSt + -- 'handleError' might be either a handshake negotiation -- a protocol failure (an IO exception, a timeout or -- codec failure). In the first case we should not reset -- the connection as this is not a protocol error. - atomically $ writeTVar connVar $ - case classifyHandleError handleError of - HandshakeFailure -> - TerminatingState connId connThread - (Just handleError) - HandshakeProtocolViolation -> - TerminatedState (Just handleError) - - return ( Map.update - (\connVar' -> - if eqTVar (Proxy :: Proxy m) connVar' connVar - then Nothing - else Just connVar') - peerAddr - state - , Disconnected connId (Just handleError) - ) + writeTVar connVar connState' + + updated <- + modifyTMVarPure + stateVar + ( \state -> + case Map.lookup peerAddr state of + Nothing -> (state, False) + Just v -> + if eqTVar (Proxy :: Proxy m) connVar v + then (Map.delete peerAddr state , True) + else (state , False) + ) + + if updated + then + -- Key was present in the dictionary (stateVar) and + -- removed so we trace the removal. + return $ + if shouldTrace + then [ transition + , Transition + { fromState = Known (TerminatedState Nothing) + , toState = Unknown + } + ] + else [ Transition + { fromState = Known (TerminatedState Nothing) + , toState = Unknown + } + ] + -- Key was not present in the dictionary (stateVar), + -- so we do not trace anything as it was already traced upon + -- deletion. + -- + -- OR + -- + -- Key was overwritten in the dictionary (stateVar), + -- so we do not trace anything as it was already traced upon + -- overwritting. + else return [ ] + + + traverse_ (traceWith trTracer . TransitionTrace peerAddr) transitions + traceCounters stateVar + + return (Disconnected connId (Just handleError)) Right (handle, version) -> do let dataFlow = connectionDataFlow version @@ -1729,7 +1879,7 @@ withConnectionManager ConnectionManagerArguments { :: StrictTMVar m (ConnectionManagerState peerAddr handle handleError version m) -> peerAddr -> m (OperationResult AbstractState) - promotedToWarmRemoteImpl stateVar peerAddr = do + promotedToWarmRemoteImpl stateVar peerAddr = mask_ $ do result <- atomically $ do state <- readTMVar stateVar let mbConnVar = Map.lookup peerAddr state @@ -1805,7 +1955,7 @@ withConnectionManager ConnectionManagerArguments { return (OperationSuccess tr, Nothing) OutboundIdleState connId connThread handle dataFlow@Duplex -> do -- @ - -- Awake^{Duplex}_{Remote} : OutboundIdleState Duplex + -- Awake^{Duplex}_{Remote} : OutboundIdleState^\tau Duplex -- → InboundState Duplex -- @ let connState' = InboundState connId connThread handle dataFlow @@ -1996,16 +2146,6 @@ modifyTMVarPure v k = do putTMVar v a' return b - --- | Like 'modifyMVar_' but pure. --- -modifyTMVarPure_ :: MonadSTM m - => StrictTMVar m a - -> (a -> a) - -> STM m () -modifyTMVarPure_ v k = takeTMVar v >>= putTMVar v . k - - -- -- Exceptions -- diff --git a/ouroboros-network-framework/test/Test/Ouroboros/Network/Server2.hs b/ouroboros-network-framework/test/Test/Ouroboros/Network/Server2.hs index 87f7b866349..4ff2472a682 100644 --- a/ouroboros-network-framework/test/Test/Ouroboros/Network/Server2.hs +++ b/ouroboros-network-framework/test/Test/Ouroboros/Network/Server2.hs @@ -125,6 +125,8 @@ tests = prop_multinode_cm_Sim , testProperty "multinode_ig_Sim" prop_multinode_ig_Sim + , testProperty "multinode_cm_order_Sim" + prop_multinode_cm_order_Sim , testProperty "unit_connection_terminated_when_negotiating" unit_connection_terminated_when_negotiating , testGroup "generators" @@ -1929,6 +1931,35 @@ data Three a b c deriving Show +-- Assuming all transitions in the transition list are valid, we only need to +-- look at the 'toState' of the current transition and the 'fromState' of the +-- next transition. +verifyAbstractTransitionOrder :: [AbstractTransition] + -> AllProperty +verifyAbstractTransitionOrder [] = mempty +verifyAbstractTransitionOrder (h:t) = go t h + where + go :: [AbstractTransition] -> AbstractTransition -> AllProperty + -- All transitions must end in the 'UnknownConnectionSt', and since we assume all + -- transitions are valid we do not have to check the 'fromState' + go [] (Transition _ UnknownConnectionSt) = mempty + go [] tr@(Transition _ _) = + AllProperty + $ counterexample + ("\nUnexpected last transition: " ++ show tr) + (property False) + -- All transitions have to be in correct order, which means that the current + -- state we are looking at (current toState) needs to be equal to the next + -- 'fromState', in order for the transition chain to be correct. + go (next@(Transition nextFromState _) : ts) + curr@(Transition _ currToState) = + (AllProperty + $ counterexample + ("\nUnexpected transition order!\nWent from: " + ++ show curr ++ "\nto: " ++ show next) + (property (currToState == nextFromState))) + <> go ts next + -- | Property wrapping `multinodeExperiment`. -- @@ -1993,6 +2024,31 @@ prop_multinode_cm_Sim serverAcc (ArbDataFlow dataFlow) absBi script@(MultiNodeSc (Script (toBearerInfo absBi :| [noAttenuation])) maxAcceptedConnectionsLimit l +prop_multinode_cm_order_Sim :: Int -> ArbDataFlow -> AbsBearerInfo -> MultiNodeScript Int TestAddr -> Property +prop_multinode_cm_order_Sim serverAcc (ArbDataFlow dataFlow) absBi script@(MultiNodeScript l) = + let trace = runSimTrace sim + + evsATT :: Octopus (Value ()) (AbstractTransitionTrace SimAddr) + evsATT = octopusWithNameTraceEvents trace + + in tabulate "ConnectionEvents" (map showCEvs l) + . counterexample (ppScript script) + . counterexample (ppOctopus show show evsATT) + -- . counterexample (ppTrace_ trace) + . getAllProperty + . bifoldMap + ( \ case + MainReturn {} -> mempty + _ -> AllProperty (property False) + ) + verifyAbstractTransitionOrder + . splitConns + $ evsATT + where + sim :: IOSim s () + sim = multiNodeSim serverAcc dataFlow + (Script (toBearerInfo absBi :| [noAttenuation])) + maxAcceptedConnectionsLimit l -- | Property wrapping `multinodeExperiment`. --