From d6921bd52257491ce6006d06aa0b519960b339f9 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Mon, 9 Dec 2024 12:54:14 +0100 Subject: [PATCH 1/5] Boilerplate for the implementation of n-way unions We add a new `unions` function to the `Internal` module, which should eventually contain the full implementation of unions. For now, the function contains boilerplate for: * Checking that all input tables have the same configuration, and * Checking that all input tables are in the same session, and * Duplicating references to the table contents for each of the tables * Creating a new table with unioned table contents The unioning of table contents is not yet implemented and will follow in some later set of PRs. --- .../Database/LSMTree/Extras/NoThunks.hs | 7 +- src/Database/LSMTree.hs | 16 +- src/Database/LSMTree/Internal.hs | 180 ++++++++++++++++-- src/Database/LSMTree/Internal/Paths.hs | 1 + src/Database/LSMTree/Monoidal.hs | 16 +- src/Database/LSMTree/Normal.hs | 4 +- 6 files changed, 185 insertions(+), 39 deletions(-) diff --git a/src-extras/Database/LSMTree/Extras/NoThunks.hs b/src-extras/Database/LSMTree/Extras/NoThunks.hs index 228102e31..b0ff43c18 100644 --- a/src-extras/Database/LSMTree/Extras/NoThunks.hs +++ b/src-extras/Database/LSMTree/Extras/NoThunks.hs @@ -125,15 +125,16 @@ deriving anyclass instance (NoThunksIOLike m, Typeable m, Typeable h, Typeable ( deriving stock instance Generic (Internal.Table m h) -- | Does not check 'NoThunks' for the 'Internal.Session' that this -- 'Internal.Table' belongs to. -deriving anyclass instance (NoThunksIOLike m, Typeable m, Typeable h, Typeable (PrimState m)) - => NoThunks (Internal.Table m h) +deriving via AllowThunksIn '["tableSession"] (Table m h) + instance (NoThunksIOLike m, Typeable m, Typeable h, Typeable (PrimState m)) + => NoThunks (Internal.Table m h) deriving stock instance Generic (TableState m h) deriving anyclass instance (NoThunksIOLike m, Typeable m, Typeable h, Typeable (PrimState m)) => NoThunks (TableState m h) deriving stock instance Generic (TableEnv m h) -deriving via AllowThunksIn ["tableSession", "tableSessionEnv"] (TableEnv m h) +deriving via AllowThunksIn '["tableSessionEnv"] (TableEnv m h) instance (NoThunksIOLike m, Typeable m, Typeable h, Typeable (PrimState m)) => NoThunks (TableEnv m h) diff --git a/src/Database/LSMTree.hs b/src/Database/LSMTree.hs index 75bfa7f4c..6b0229379 100644 --- a/src/Database/LSMTree.hs +++ b/src/Database/LSMTree.hs @@ -518,28 +518,24 @@ duplicate (Internal.Table' t) = Internal.Table' <$!> Internal.duplicate t -------------------------------------------------------------------------------} {-# SPECIALISE union :: - ResolveValue v - => Table IO k v b + Table IO k v b -> Table IO k v b -> IO (Table IO k v b) #-} union :: forall m k v b. - ( IOLike m - , ResolveValue v - ) + IOLike m => Table m k v b -> Table m k v b -> m (Table m k v b) -union = error "union: not yet implemented" $ union @m @k @v @b +union = error "union: not yet implemented" $ Internal.union @m {-# SPECIALISE unions :: - ResolveValue v - => V.Vector (Table IO k v b) + V.Vector (Table IO k v b) -> IO (Table IO k v b) #-} unions :: forall m k v b. - (IOLike m, ResolveValue v) + IOLike m => V.Vector (Table m k v b) -> m (Table m k v b) -unions = error "unions: not yet implemented" $ unions @m @k @v +unions = error "unions: not yet implemented" $ Internal.unions @m {------------------------------------------------------------------------------- Monoidal value resolution diff --git a/src/Database/LSMTree/Internal.hs b/src/Database/LSMTree/Internal.hs index e76d9e1c7..a2a194195 100644 --- a/src/Database/LSMTree/Internal.hs +++ b/src/Database/LSMTree/Internal.hs @@ -66,6 +66,9 @@ module Database.LSMTree.Internal ( , listSnapshots -- * Mutiple writable tables , duplicate + -- * Table union + , union + , unions ) where import Codec.CBOR.Read @@ -218,6 +221,16 @@ data LSMTreeError = -- The 'Int' index indicates which 'BlobRef' was invalid. Many may be -- invalid but only the first is reported. | ErrBlobRefInvalid Int + -- | 'unions' was called on zero tables. Use 'new' instead. + | ErrUnionsZeroTables + -- | 'unions' was called on tables that are not in the same session. + | ErrUnionsSessionMismatch + Int -- ^ Vector index of table @t1@ involved in the mismatch + Int -- ^ Vector index of table @t2@ involved in the mismatch + -- | 'unions' was called on tables that do not have the same configuration. + | ErrUnionsTableConfigMismatch + Int -- ^ Vector index of table @t1@ involved in the mismatch + Int -- ^ Vector index of table @t2@ involved in the mismatch deriving stock (Show, Eq) deriving anyclass (Exception) @@ -243,6 +256,9 @@ data LSMTreeTrace = | TraceCursor Word64 -- ^ Cursor identifier CursorTrace + -- Unions + | TraceUnions + Int -- ^ Number of unioned tables deriving stock Show data TableTrace = @@ -284,6 +300,9 @@ data Session m h = Session { -- to the session's fields (see 'withOpenSession'). We use more -- fine-grained synchronisation for various mutable parts of an open -- session. + -- + -- INVARIANT: once the session state is changed from 'SessionOpen' to + -- 'SessionClosed', it is never changed back to 'SessionOpen' again. sessionState :: !(RWVar m (SessionState m h)) , sessionTracer :: !(Tracer m LSMTreeTrace) } @@ -298,6 +317,9 @@ data SessionState m h = data SessionEnv m h = SessionEnv { -- | The path to the directory in which this sesion is live. This is a path -- relative to root of the 'HasFS' instance. + -- + -- INVARIANT: the session root is never changed during the lifetime of a + -- session. sessionRoot :: !SessionRoot , sessionHasFS :: !(HasFS m h) , sessionHasBlockIO :: !(HasBlockIO m h) @@ -310,8 +332,9 @@ data SessionEnv m h = SessionEnv { -- -- Tables are assigned unique identifiers using 'sessionUniqCounter' to -- ensure that modifications to the set of known tables are independent. - -- Each identifier is added only once in 'new', 'openSnapshot' or - -- 'duplicate', and is deleted only once in 'close' or 'closeSession'. + -- Each identifier is added only once in 'new', 'openSnapshot', 'duplicate', + -- 'union', or 'unions', and is deleted only once in 'close' or + -- 'closeSession'. -- -- * A new table may only insert its own identifier when it has acquired the -- 'sessionState' read-lock. This is to prevent races with 'closeSession'. @@ -526,11 +549,18 @@ data Table m h = Table { , tableState :: !(RWVar m (TableState m h)) , tableArenaManager :: !(ArenaManager (PrimState m)) , tableTracer :: !(Tracer m TableTrace) + -- === Session-inherited + + -- | The session that this table belongs to. + -- + -- INVARIANT: a table only ever belongs to one session, and can't be + -- transferred to a different session. + , tableSession :: !(Session m h) } instance NFData (Table m h) where - rnf (Table a b c d) = - rnf a `seq` rnf b `seq` rnf c `seq` rwhnf d + rnf (Table a b c d e) = + rnf a `seq` rnf b `seq` rnf c `seq` rwhnf d `seq` rwhnf e -- | A table may assume that its corresponding session is still open as -- long as the table is open. A session's global resources, and therefore @@ -543,14 +573,9 @@ data TableState m h = data TableEnv m h = TableEnv { -- === Session-inherited - -- | The session that this table belongs to. - -- - -- NOTE: Consider using the 'tableSessionEnv' field and helper functions - -- like 'tableHasFS' instead of acquiring the session lock. - tableSession :: !(Session m h) -- | Use this instead of 'tableSession' for easy access. An open table may -- assume that its session is open. - , tableSessionEnv :: !(SessionEnv m h) + tableSessionEnv :: !(SessionEnv m h) -- === Table-specific @@ -688,12 +713,11 @@ newWith reg sesh seshEnv conf !am !tc = do -- /updated/ set of tracked tables. contentVar <- RW.new $ tc tableVar <- RW.new $ TableOpen $ TableEnv { - tableSession = sesh - , tableSessionEnv = seshEnv + tableSessionEnv = seshEnv , tableId = uniqueToWord64 tableId , tableContent = contentVar } - let !t = Table conf tableVar am tr + let !t = Table conf tableVar am tr sesh -- Track the current table freeTemp reg $ modifyMVar_ (sessionOpenTables seshEnv) $ pure . Map.insert (uniqueToWord64 tableId) t @@ -936,7 +960,7 @@ newCursor :: -> Table m h -> m (Cursor m h) newCursor !offsetKey t = withOpenTable t $ \thEnv -> do - let cursorSession = tableSession thEnv + let cursorSession = tableSession t let cursorSessionEnv = tableSessionEnv thEnv cursorId <- uniqueToWord64 <$> incrUniqCounter (sessionUniqCounter cursorSessionEnv) @@ -1281,3 +1305,131 @@ duplicate t@Table{..} = do tableConfig tableArenaManager content + + +{------------------------------------------------------------------------------- + Table union +-------------------------------------------------------------------------------} + +{-# SPECIALISE union :: Table IO h -> Table IO h -> IO (Table IO h) #-} +-- | See 'Database.LSMTree.Normal.union'. +union :: + (MonadMask m, MonadMVar m, MonadST m, MonadSTM m) + => Table m h + -> Table m h + -> m (Table m h) +union t1 t2 = unions $ V.fromList [t1, t2] + +{-# SPECIALISE unions :: V.Vector (Table IO h) -> IO (Table IO h) #-} +-- | See 'Database.LSMTree.Normal.unions'. +unions :: + (MonadMask m, MonadMVar m, MonadST m, MonadSTM m) + => V.Vector (Table m h) + -> m (Table m h) +unions ts + | n == 0 = throwIO ErrUnionsZeroTables + | otherwise = do + conf <- + case vmatch (V.map tableConfig ts) of + Left (i, j) -> throwIO $ ErrUnionsTableConfigMismatch i j + Right conf -> pure conf + + sesh <- + vmatchSessions ts >>= \case + Left (i, j) -> throwIO $ ErrUnionsSessionMismatch i j + Right sesh -> pure sesh + + traceWith (sessionTracer sesh) $ TraceUnions n + + -- We acquire a read-lock on the session open-state to prevent races, see + -- 'sessionOpenTables'. + modifyWithTempRegistry + (atomically $ RW.unsafeAcquireReadAccess (sessionState sesh)) + (\_ -> atomically $ RW.unsafeReleaseReadAccess (sessionState sesh)) $ \reg -> \case + SessionClosed -> throwIO ErrSessionClosed + seshState@(SessionOpen seshEnv) -> do + contents <- + V.forM ts $ \t -> do + withOpenTable t $ \tEnv -> + -- The table contents escape the read access, but we just added references + -- to each run so it is safe. + RW.withReadAccess (tableContent tEnv) (duplicateTableContent reg) + + content <- + error "unions: combine contents into merging tree" $ -- TODO + contents + + t <- + newWith + reg + sesh + seshEnv + conf + (error "unions: ArenaManager") -- TODO + content + + pure (seshState, t) + where + n = V.length ts + +-- | Like 'vmatchBy', but the match function is @(==)@. +vmatch :: Eq a => V.Vector a -> Either (Int, Int) a +vmatch = vmatchBy (==) + +-- | Check that all values in the vector match. If so, return the matched value. +-- If there is a mismatch, return the vector indices of the mismatching values. +-- +-- Assumes the input vector is non-empty. +vmatchBy :: (a -> a -> Bool) -> V.Vector a -> Either (Int, Int) a +vmatchBy eq xs0 = + case V.uncons xs0 of + Nothing -> + error "vmatch: empty vector " + Just (x, xs) -> + case V.iforM_ xs $ vmatchOne x of + Left i -> Left (0, i) + Right () -> Right x + where + vmatchOne x i y = + if (x `eq` y) + then Left i + else Right () + +-- | Check that all tables in the session match. If so, return the matched +-- session. If there is a mismatch, return the vector indices of the mismatching +-- tables. +-- +-- Assumes the input vector is non-empty. +-- +-- TODO: compare LockFileHandle instead of SessionRoot (?). We can write an Eq +-- instance for LockFileHandle based on pointer equality, just like base does +-- for Handle. +vmatchSessions :: + (MonadSTM m, MonadThrow m) + => V.Vector (Table m h) + -> m (Either (Int, Int) (Session m h)) +vmatchSessions ts0 + | Just (t, ts) <- V.uncons ts0 + = withSessionRoot t $ \root -> do + eith <- go root 1 ts + pure $ case eith of + Left i -> Left (0, i) + Right () -> Right (tableSession t) + | otherwise + = error "vmatchSessions: empty vector" + where + -- Check that the session roots for all tables are the same. There can only + -- be one *open/active* session per directory because of cooperative file + -- locks, so each unique *open* session has a unique session root. We check + -- that all the table's sessions are open at the same time while comparing + -- the session roots. + go root !i ts + | Just (t', ts') <- V.uncons ts + = withSessionRoot t' $ \root' -> + if root == root' + then go root (i+1) ts' + else pure (Left i) + | otherwise + = pure (Right ()) + + withSessionRoot t k = withOpenSession (tableSession t) $ k . sessionRoot diff --git a/src/Database/LSMTree/Internal/Paths.hs b/src/Database/LSMTree/Internal/Paths.hs index 61e10afb3..f75ba0aab 100644 --- a/src/Database/LSMTree/Internal/Paths.hs +++ b/src/Database/LSMTree/Internal/Paths.hs @@ -59,6 +59,7 @@ import System.FS.API newtype SessionRoot = SessionRoot { getSessionRoot :: FsPath } + deriving stock Eq lockFile :: SessionRoot -> FsPath lockFile (SessionRoot dir) = dir mkFsPath ["lock"] diff --git a/src/Database/LSMTree/Monoidal.hs b/src/Database/LSMTree/Monoidal.hs index e846b5794..deef7e665 100644 --- a/src/Database/LSMTree/Monoidal.hs +++ b/src/Database/LSMTree/Monoidal.hs @@ -649,8 +649,7 @@ duplicate (Internal.MonoidalTable t) = Internal.MonoidalTable <$> Internal.dupli -------------------------------------------------------------------------------} {-# SPECIALISE union :: - ResolveValue v - => Table IO k v + Table IO k v -> Table IO k v -> IO (Table IO k v) #-} -- | Union two full tables, creating a new table. @@ -665,17 +664,14 @@ duplicate (Internal.MonoidalTable t) = Internal.MonoidalTable <$> Internal.dupli -- NOTE: unioning tables creates a new table, but does not close the tables that -- were used as inputs. union :: forall m k v. - ( IOLike m - , ResolveValue v - ) + IOLike m => Table m k v -> Table m k v -> m (Table m k v) -union = error "union: not yet implemented" $ union @m @k @v +union = error "union: not yet implemented" $ Internal.union @m {-# SPECIALISE unions :: - ResolveValue v - => V.Vector (Table IO k v) + V.Vector (Table IO k v) -> IO (Table IO k v) #-} -- | Like 'union', but for @n@ tables. -- @@ -686,10 +682,10 @@ union = error "union: not yet implemented" $ union @m @k @v -- -- * Unioning 0 tables is an exception. unions :: forall m k v. - (IOLike m, ResolveValue v) + IOLike m => V.Vector (Table m k v) -> m (Table m k v) -unions = error "unions: not yet implemented" $ unions @m @k @v +unions = error "unions: not yet implemented" $ Internal.unions @m {------------------------------------------------------------------------------- Monoidal value resolution diff --git a/src/Database/LSMTree/Normal.hs b/src/Database/LSMTree/Normal.hs index 4ab0d6fc0..b5badc1f4 100644 --- a/src/Database/LSMTree/Normal.hs +++ b/src/Database/LSMTree/Normal.hs @@ -787,7 +787,7 @@ union :: forall m k v b. => Table m k v b -> Table m k v b -> m (Table m k v b) -union = error "union: not yet implemented" $ union @m @k @v +union = error "union: not yet implemented" $ Internal.union @m {-# SPECIALISE unions :: V.Vector (Table IO k v b) -> IO (Table IO k v b) #-} -- | Like 'union', but for @n@ tables. @@ -802,4 +802,4 @@ unions :: forall m k v b. IOLike m => V.Vector (Table m k v b) -> m (Table m k v b) -unions = error "union: not yet implemented" $ union @m @k @v +unions = error "unions: not yet implemented" $ Internal.unions @m From cdc56008128519134cb18569ae7c47777e6b3936 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Wed, 11 Dec 2024 10:50:29 +0100 Subject: [PATCH 2/5] Use the internal `union`s implementation in the public API We have to check table types before calling the internal `union`s code from the public API. --- src/Database/LSMTree.hs | 24 +++++++++++++++++++++--- src/Database/LSMTree/Internal.hs | 4 ++++ src/Database/LSMTree/Monoidal.hs | 24 ++++++++++++++++++++++-- src/Database/LSMTree/Normal.hs | 24 +++++++++++++++++++++--- 4 files changed, 68 insertions(+), 8 deletions(-) diff --git a/src/Database/LSMTree.hs b/src/Database/LSMTree.hs index 6b0229379..20f52748e 100644 --- a/src/Database/LSMTree.hs +++ b/src/Database/LSMTree.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE MagicHash #-} + -- | This module is experimental. It is mainly used for testing purposes. -- -- See the 'Normal' and 'Monoidal' modules for documentation. @@ -105,7 +107,7 @@ import Control.Monad.Class.MonadThrow import Data.Bifunctor (Bifunctor (..)) import Data.Coerce (coerce) import Data.Kind (Type) -import Data.Typeable (Proxy (..), eqT, type (:~:) (Refl)) +import Data.Typeable (Proxy (..), Typeable, eqT, type (:~:) (Refl)) import qualified Data.Vector as V import Database.LSMTree.Common (BlobRef (BlobRef), IOLike, Range (..), SerialiseKey, SerialiseValue, Session, SnapshotName, @@ -122,6 +124,7 @@ import qualified Database.LSMTree.Internal.Vector as V import Database.LSMTree.Monoidal (ResolveValue (..), resolveDeserialised, resolveValueAssociativity, resolveValueValidOutput) +import GHC.Exts (Proxy#, proxy#) {------------------------------------------------------------------------------- Tables @@ -526,7 +529,7 @@ union :: forall m k v b. => Table m k v b -> Table m k v b -> m (Table m k v b) -union = error "union: not yet implemented" $ Internal.union @m +union t1 t2 = unions $ V.fromList [t1, t2] {-# SPECIALISE unions :: V.Vector (Table IO k v b) @@ -535,7 +538,22 @@ unions :: forall m k v b. IOLike m => V.Vector (Table m k v b) -> m (Table m k v b) -unions = error "unions: not yet implemented" $ Internal.unions @m +unions ts0 + | Just (Internal.Table' (t' :: Internal.Table m h), ts) + <- V.uncons ts0 + = do ts' <- V.imapM (checkTableType (proxy# @h)) ts + Internal.Table' <$> Internal.unions (t' `V.cons` ts') + | otherwise = throwIO Internal.ErrUnionsZeroTables + where + checkTableType :: + forall h. Typeable h + => Proxy# h + -> Int + -> Table m k v b + -> m (Internal.Table m h) + checkTableType _ i (Internal.Table' (t :: Internal.Table m h')) + | Just Refl <- eqT @h @h' = pure t + | otherwise = throwIO (Internal.ErrUnionsTableTypeMismatch 0 i) {------------------------------------------------------------------------------- Monoidal value resolution diff --git a/src/Database/LSMTree/Internal.hs b/src/Database/LSMTree/Internal.hs index a2a194195..b41b2fc09 100644 --- a/src/Database/LSMTree/Internal.hs +++ b/src/Database/LSMTree/Internal.hs @@ -223,6 +223,10 @@ data LSMTreeError = | ErrBlobRefInvalid Int -- | 'unions' was called on zero tables. Use 'new' instead. | ErrUnionsZeroTables + -- | 'unions' was called on tables that are not of the same type. + | ErrUnionsTableTypeMismatch + Int -- ^ Vector index of table @t1@ involved in the mismatch + Int -- ^ Vector index of table @t2@ involved in the mismatch -- | 'unions' was called on tables that are not in the same session. | ErrUnionsSessionMismatch Int -- ^ Vector index of table @t1@ involved in the mismatch diff --git a/src/Database/LSMTree/Monoidal.hs b/src/Database/LSMTree/Monoidal.hs index deef7e665..c258faa93 100644 --- a/src/Database/LSMTree/Monoidal.hs +++ b/src/Database/LSMTree/Monoidal.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE MagicHash #-} + -- | On disk key-value tables, implemented as Log Structured Merge (LSM) trees. -- -- This module is the API for \"monoidal\" tables, as opposed to \"normal\" @@ -120,11 +122,13 @@ module Database.LSMTree.Monoidal ( import Control.DeepSeq import Control.Exception (assert) import Control.Monad ((<$!>)) +import Control.Monad.Class.MonadThrow import Data.Bifunctor (Bifunctor (..)) import Data.Coerce (coerce) import Data.Kind (Type) import Data.Monoid (Sum (..)) import Data.Proxy (Proxy (Proxy)) +import Data.Typeable (Typeable, eqT, type (:~:) (Refl)) import qualified Data.Vector as V import Database.LSMTree.Common (IOLike, Range (..), SerialiseKey, SerialiseValue (..), Session, SnapshotName, closeSession, @@ -136,6 +140,7 @@ import Database.LSMTree.Internal.RawBytes (RawBytes) import qualified Database.LSMTree.Internal.Serialise as Internal import qualified Database.LSMTree.Internal.Snapshot as Internal import qualified Database.LSMTree.Internal.Vector as V +import GHC.Exts (Proxy#, proxy#) -- $resource-management -- See "Database.LSMTree.Normal#g:resource" @@ -668,7 +673,7 @@ union :: forall m k v. => Table m k v -> Table m k v -> m (Table m k v) -union = error "union: not yet implemented" $ Internal.union @m +union t1 t2 = unions $ V.fromList [t1, t2] {-# SPECIALISE unions :: V.Vector (Table IO k v) @@ -685,7 +690,22 @@ unions :: forall m k v. IOLike m => V.Vector (Table m k v) -> m (Table m k v) -unions = error "unions: not yet implemented" $ Internal.unions @m +unions ts0 + | Just (Internal.MonoidalTable (t' :: Internal.Table m h), ts) + <- V.uncons ts0 + = do ts' <- V.imapM (checkTableType (proxy# @h)) ts + Internal.MonoidalTable <$> Internal.unions (t' `V.cons` ts') + | otherwise = throwIO Internal.ErrUnionsZeroTables + where + checkTableType :: + forall h. Typeable h + => Proxy# h + -> Int + -> Table m k v + -> m (Internal.Table m h) + checkTableType _ i (Internal.MonoidalTable (t :: Internal.Table m h')) + | Just Refl <- eqT @h @h' = pure t + | otherwise = throwIO (Internal.ErrUnionsTableTypeMismatch 0 i) {------------------------------------------------------------------------------- Monoidal value resolution diff --git a/src/Database/LSMTree/Normal.hs b/src/Database/LSMTree/Normal.hs index b5badc1f4..37e67c566 100644 --- a/src/Database/LSMTree/Normal.hs +++ b/src/Database/LSMTree/Normal.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE MagicHash #-} + -- | On disk key-value tables, implemented as Log Structured Merge (LSM) trees. -- -- This module is the API for \"normal\" tables, as opposed to \"monoidal\" @@ -116,7 +118,7 @@ import Control.Monad import Control.Monad.Class.MonadThrow import Data.Bifunctor (Bifunctor (..)) import Data.Kind (Type) -import Data.Typeable (eqT, type (:~:) (Refl)) +import Data.Typeable (Typeable, eqT, type (:~:) (Refl)) import qualified Data.Vector as V import Database.LSMTree.Common (BlobRef (BlobRef), IOLike, Range (..), SerialiseKey, SerialiseValue, Session, SnapshotName, @@ -129,6 +131,7 @@ import qualified Database.LSMTree.Internal.Entry as Entry import qualified Database.LSMTree.Internal.Serialise as Internal import qualified Database.LSMTree.Internal.Snapshot as Internal import qualified Database.LSMTree.Internal.Vector as V +import GHC.Exts (Proxy#, proxy#) -- $resource-management -- Sessions, tables and cursors use resources and as such need to be @@ -787,7 +790,7 @@ union :: forall m k v b. => Table m k v b -> Table m k v b -> m (Table m k v b) -union = error "union: not yet implemented" $ Internal.union @m +union t1 t2 = unions $ V.fromList [t1, t2] {-# SPECIALISE unions :: V.Vector (Table IO k v b) -> IO (Table IO k v b) #-} -- | Like 'union', but for @n@ tables. @@ -802,4 +805,19 @@ unions :: forall m k v b. IOLike m => V.Vector (Table m k v b) -> m (Table m k v b) -unions = error "unions: not yet implemented" $ Internal.unions @m +unions ts0 + | Just (Internal.NormalTable (t' :: Internal.Table m h), ts) + <- V.uncons ts0 + = do ts' <- V.imapM (checkTableType (proxy# @h)) ts + Internal.NormalTable <$> Internal.unions (t' `V.cons` ts') + | otherwise = throwIO Internal.ErrUnionsZeroTables + where + checkTableType :: + forall h. Typeable h + => Proxy# h + -> Int + -> Table m k v b + -> m (Internal.Table m h) + checkTableType _ i (Internal.NormalTable (t :: Internal.Table m h')) + | Just Refl <- eqT @h @h' = pure t + | otherwise = throwIO (Internal.ErrUnionsTableTypeMismatch 0 i) From b21691b861e14002d30f9fe5bbcb05cd4fb911b1 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Wed, 11 Dec 2024 10:52:18 +0100 Subject: [PATCH 3/5] Fill in the proper expected exception in unions unit tests --- test/Test/Database/LSMTree/UnitTests.hs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/test/Test/Database/LSMTree/UnitTests.hs b/test/Test/Database/LSMTree/UnitTests.hs index ee3ff6aea..f4c121361 100644 --- a/test/Test/Database/LSMTree/UnitTests.hs +++ b/test/Test/Database/LSMTree/UnitTests.hs @@ -36,9 +36,7 @@ tests = -- Properties - , testProperty "prop_unions_0" $ - -- TODO: enable once unions are implemented - QC.expectFailure prop_unions_0 + , testProperty "prop_unions_0" $ prop_unions_0 , testProperty "prop_unions_1" $ -- TODO: enable once unions are implemented QC.expectFailure prop_unions_1 @@ -161,12 +159,8 @@ unit_snapshots = prop_unions_0 :: Property prop_unions_0 = QC.once $ QC.ioProperty $ - assertException err $ + assertException ErrUnionsZeroTables $ void $ unions @_ @Key1 @Value1 @Blob1 V.empty - where - -- TODO: fill in once unions has an implementation - err :: LSMTreeError - err = error "unit_unions_0: unions has no implementation yet" -- | Unions of 1 table are equivalent to duplicate prop_unions_1 :: Property From 0a6ae76e45920d21999057946dfb2e512ed59073 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Thu, 12 Dec 2024 17:00:31 +0100 Subject: [PATCH 4/5] Use NonEmpty lists in unions --- src/Database/LSMTree.hs | 22 +-- src/Database/LSMTree/Internal.hs | 158 ++++++++++----------- src/Database/LSMTree/Monoidal.hs | 28 ++-- src/Database/LSMTree/Normal.hs | 29 ++-- test/Database/LSMTree/Class.hs | 5 +- test/Database/LSMTree/Model/IO.hs | 6 +- test/Database/LSMTree/Model/Session.hs | 19 ++- test/Database/LSMTree/Model/Table.hs | 5 +- test/Test/Database/LSMTree/StateMachine.hs | 16 ++- test/Test/Database/LSMTree/UnitTests.hs | 12 +- 10 files changed, 140 insertions(+), 160 deletions(-) diff --git a/src/Database/LSMTree.hs b/src/Database/LSMTree.hs index 20f52748e..54c7bbe17 100644 --- a/src/Database/LSMTree.hs +++ b/src/Database/LSMTree.hs @@ -107,6 +107,7 @@ import Control.Monad.Class.MonadThrow import Data.Bifunctor (Bifunctor (..)) import Data.Coerce (coerce) import Data.Kind (Type) +import Data.List.NonEmpty (NonEmpty (..)) import Data.Typeable (Proxy (..), Typeable, eqT, type (:~:) (Refl)) import qualified Data.Vector as V import Database.LSMTree.Common (BlobRef (BlobRef), IOLike, Range (..), @@ -529,21 +530,20 @@ union :: forall m k v b. => Table m k v b -> Table m k v b -> m (Table m k v b) -union t1 t2 = unions $ V.fromList [t1, t2] +union t1 t2 = unions $ t1 :| [t2] {-# SPECIALISE unions :: - V.Vector (Table IO k v b) + NonEmpty (Table IO k v b) -> IO (Table IO k v b) #-} unions :: forall m k v b. IOLike m - => V.Vector (Table m k v b) + => NonEmpty (Table m k v b) -> m (Table m k v b) -unions ts0 - | Just (Internal.Table' (t' :: Internal.Table m h), ts) - <- V.uncons ts0 - = do ts' <- V.imapM (checkTableType (proxy# @h)) ts - Internal.Table' <$> Internal.unions (t' `V.cons` ts') - | otherwise = throwIO Internal.ErrUnionsZeroTables +unions (t :| ts) = + case t of + Internal.Table' (t' :: Internal.Table m h) -> do + ts' <- zipWithM (checkTableType (proxy# @h)) [1..] ts + Internal.Table' <$> Internal.unions (t' :| ts') where checkTableType :: forall h. Typeable h @@ -551,8 +551,8 @@ unions ts0 -> Int -> Table m k v b -> m (Internal.Table m h) - checkTableType _ i (Internal.Table' (t :: Internal.Table m h')) - | Just Refl <- eqT @h @h' = pure t + checkTableType _ i (Internal.Table' (t' :: Internal.Table m h')) + | Just Refl <- eqT @h @h' = pure t' | otherwise = throwIO (Internal.ErrUnionsTableTypeMismatch 0 i) {------------------------------------------------------------------------------- diff --git a/src/Database/LSMTree/Internal.hs b/src/Database/LSMTree/Internal.hs index b41b2fc09..90386a715 100644 --- a/src/Database/LSMTree/Internal.hs +++ b/src/Database/LSMTree/Internal.hs @@ -77,7 +77,7 @@ import Control.Concurrent.Class.MonadSTM (MonadSTM (..)) import Control.Concurrent.Class.MonadSTM.RWVar (RWVar) import qualified Control.Concurrent.Class.MonadSTM.RWVar as RW import Control.DeepSeq -import Control.Monad (unless) +import Control.Monad (forM, unless, zipWithM_) import Control.Monad.Class.MonadST (MonadST (..)) import Control.Monad.Class.MonadThrow import Control.Monad.Primitive @@ -88,6 +88,8 @@ import Data.Arena (ArenaManager, newArenaManager) import Data.Foldable import Data.Functor.Compose (Compose (..)) import Data.Kind +import Data.List.NonEmpty (NonEmpty (..)) +import qualified Data.List.NonEmpty as NE import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Maybe (catMaybes) @@ -221,8 +223,6 @@ data LSMTreeError = -- The 'Int' index indicates which 'BlobRef' was invalid. Many may be -- invalid but only the first is reported. | ErrBlobRefInvalid Int - -- | 'unions' was called on zero tables. Use 'new' instead. - | ErrUnionsZeroTables -- | 'unions' was called on tables that are not of the same type. | ErrUnionsTableTypeMismatch Int -- ^ Vector index of table @t1@ involved in the mismatch @@ -1322,118 +1322,110 @@ union :: => Table m h -> Table m h -> m (Table m h) -union t1 t2 = unions $ V.fromList [t1, t2] +union t1 t2 = unions $ t1 :| [t2] -{-# SPECIALISE unions :: V.Vector (Table IO h) -> IO (Table IO h) #-} +{-# SPECIALISE unions :: NonEmpty (Table IO h) -> IO (Table IO h) #-} -- | See 'Database.LSMTree.Normal.unions'. unions :: (MonadMask m, MonadMVar m, MonadST m, MonadSTM m) - => V.Vector (Table m h) + => NonEmpty (Table m h) -> m (Table m h) -unions ts - | n == 0 = throwIO ErrUnionsZeroTables - | otherwise = do - conf <- - case vmatch (V.map tableConfig ts) of - Left (i, j) -> throwIO $ ErrUnionsTableConfigMismatch i j - Right conf -> pure conf +unions ts = do + sesh <- + matchSessions ts >>= \case + Left (i, j) -> throwIO $ ErrUnionsSessionMismatch i j + Right sesh -> pure sesh + + traceWith (sessionTracer sesh) $ TraceUnions n + + -- TODO: Do we really need the configs to match exactly? It's okay as a + -- requirement for now, but we might want to revisit it. Some settings don't + -- really need to match for things to work, but of course we'd still need to + -- answer the question of which config to use for the new table, possibly + -- requiring to supply it manually? Or, we could generalise the exact match + -- to have a config compatibility test and config merge? + conf <- + case match (fmap tableConfig ts) of + Left (i, j) -> throwIO $ ErrUnionsTableConfigMismatch i j + Right conf -> pure conf - sesh <- - vmatchSessions ts >>= \case - Left (i, j) -> throwIO $ ErrUnionsSessionMismatch i j - Right sesh -> pure sesh - - traceWith (sessionTracer sesh) $ TraceUnions n - - -- We acquire a read-lock on the session open-state to prevent races, see - -- 'sessionOpenTables'. - modifyWithTempRegistry - (atomically $ RW.unsafeAcquireReadAccess (sessionState sesh)) - (\_ -> atomically $ RW.unsafeReleaseReadAccess (sessionState sesh)) $ \reg -> \case - SessionClosed -> throwIO ErrSessionClosed - seshState@(SessionOpen seshEnv) -> do - contents <- - V.forM ts $ \t -> do - withOpenTable t $ \tEnv -> - -- The table contents escape the read access, but we just added references - -- to each run so it is safe. - RW.withReadAccess (tableContent tEnv) (duplicateTableContent reg) - - content <- - error "unions: combine contents into merging tree" $ -- TODO - contents - - t <- - newWith - reg - sesh - seshEnv - conf - (error "unions: ArenaManager") -- TODO - content - - pure (seshState, t) + -- We acquire a read-lock on the session open-state to prevent races, see + -- 'sessionOpenTables'. + modifyWithTempRegistry + (atomically $ RW.unsafeAcquireReadAccess (sessionState sesh)) + (\_ -> atomically $ RW.unsafeReleaseReadAccess (sessionState sesh)) $ \reg -> \case + SessionClosed -> throwIO ErrSessionClosed + seshState@(SessionOpen seshEnv) -> do + contents <- + forM ts $ \t -> do + withOpenTable t $ \tEnv -> + -- The table contents escape the read access, but we just added references + -- to each run so it is safe. + RW.withReadAccess (tableContent tEnv) (duplicateTableContent reg) + + content <- + error "unions: combine contents into merging tree" $ -- TODO + contents + + t <- + newWith + reg + sesh + seshEnv + conf + (error "unions: ArenaManager") -- TODO + content + + pure (seshState, t) where - n = V.length ts - --- | Like 'vmatchBy', but the match function is @(==)@. -vmatch :: Eq a => V.Vector a -> Either (Int, Int) a -vmatch = vmatchBy (==) - --- | Check that all values in the vector match. If so, return the matched value. --- If there is a mismatch, return the vector indices of the mismatching values. --- --- Assumes the input vector is non-empty. -vmatchBy :: (a -> a -> Bool) -> V.Vector a -> Either (Int, Int) a -vmatchBy eq xs0 = - case V.uncons xs0 of - Nothing -> - error "vmatch: empty vector " - Just (x, xs) -> - case V.iforM_ xs $ vmatchOne x of - Left i -> Left (0, i) - Right () -> Right x + n = NE.length ts + +-- | Like 'matchBy', but the match function is @(==)@. +match :: Eq a => NonEmpty a -> Either (Int, Int) a +match = matchBy (==) + +-- | Check that all values in the list match. If so, return the matched value. +-- If there is a mismatch, return the list indices of the mismatching values. +matchBy :: forall a. (a -> a -> Bool) -> NonEmpty a -> Either (Int, Int) a +matchBy eq (x0 :| xs) = + case zipWithM_ (matchOne x0) [1..] xs of + Left i -> Left (0, i) + Right () -> Right x0 where - vmatchOne x i y = + matchOne :: a -> Int -> a -> Either Int () + matchOne x i y = if (x `eq` y) then Left i else Right () -- | Check that all tables in the session match. If so, return the matched --- session. If there is a mismatch, return the vector indices of the mismatching +-- session. If there is a mismatch, return the list indices of the mismatching -- tables. -- --- Assumes the input vector is non-empty. --- -- TODO: compare LockFileHandle instead of SessionRoot (?). We can write an Eq -- instance for LockFileHandle based on pointer equality, just like base does -- for Handle. -vmatchSessions :: +matchSessions :: (MonadSTM m, MonadThrow m) - => V.Vector (Table m h) + => NonEmpty (Table m h) -> m (Either (Int, Int) (Session m h)) -vmatchSessions ts0 - | Just (t, ts) <- V.uncons ts0 - = withSessionRoot t $ \root -> do +matchSessions = \(t :| ts) -> + withSessionRoot t $ \root -> do eith <- go root 1 ts pure $ case eith of Left i -> Left (0, i) Right () -> Right (tableSession t) - | otherwise - = error "vmatchSessions: empty vector" where -- Check that the session roots for all tables are the same. There can only -- be one *open/active* session per directory because of cooperative file -- locks, so each unique *open* session has a unique session root. We check -- that all the table's sessions are open at the same time while comparing -- the session roots. - go root !i ts - | Just (t', ts') <- V.uncons ts - = withSessionRoot t' $ \root' -> + go _ _ [] = pure (Right ()) + go root !i (t':ts') = + withSessionRoot t' $ \root' -> if root == root' then go root (i+1) ts' else pure (Left i) - | otherwise - = pure (Right ()) withSessionRoot t k = withOpenSession (tableSession t) $ k . sessionRoot diff --git a/src/Database/LSMTree/Monoidal.hs b/src/Database/LSMTree/Monoidal.hs index c258faa93..00b461406 100644 --- a/src/Database/LSMTree/Monoidal.hs +++ b/src/Database/LSMTree/Monoidal.hs @@ -121,11 +121,12 @@ module Database.LSMTree.Monoidal ( import Control.DeepSeq import Control.Exception (assert) -import Control.Monad ((<$!>)) +import Control.Monad (zipWithM, (<$!>)) import Control.Monad.Class.MonadThrow import Data.Bifunctor (Bifunctor (..)) import Data.Coerce (coerce) import Data.Kind (Type) +import Data.List.NonEmpty (NonEmpty (..)) import Data.Monoid (Sum (..)) import Data.Proxy (Proxy (Proxy)) import Data.Typeable (Typeable, eqT, type (:~:) (Refl)) @@ -673,29 +674,24 @@ union :: forall m k v. => Table m k v -> Table m k v -> m (Table m k v) -union t1 t2 = unions $ V.fromList [t1, t2] +union t1 t2 = unions $ t1 :| [t2] {-# SPECIALISE unions :: - V.Vector (Table IO k v) + NonEmpty (Table IO k v) -> IO (Table IO k v) #-} -- | Like 'union', but for @n@ tables. -- -- A good mental model of this operation is @'Data.Map.Lazy.unionsWith' (<>)@ on -- @'Data.Map.Lazy.Map' k v@. --- --- Exceptions: --- --- * Unioning 0 tables is an exception. unions :: forall m k v. IOLike m - => V.Vector (Table m k v) + => NonEmpty (Table m k v) -> m (Table m k v) -unions ts0 - | Just (Internal.MonoidalTable (t' :: Internal.Table m h), ts) - <- V.uncons ts0 - = do ts' <- V.imapM (checkTableType (proxy# @h)) ts - Internal.MonoidalTable <$> Internal.unions (t' `V.cons` ts') - | otherwise = throwIO Internal.ErrUnionsZeroTables +unions (t :| ts) = + case t of + Internal.MonoidalTable (t' :: Internal.Table m h) -> do + ts' <- zipWithM (checkTableType (proxy# @h)) [1..] ts + Internal.MonoidalTable <$> Internal.unions (t' :| ts') where checkTableType :: forall h. Typeable h @@ -703,8 +699,8 @@ unions ts0 -> Int -> Table m k v -> m (Internal.Table m h) - checkTableType _ i (Internal.MonoidalTable (t :: Internal.Table m h')) - | Just Refl <- eqT @h @h' = pure t + checkTableType _ i (Internal.MonoidalTable (t' :: Internal.Table m h')) + | Just Refl <- eqT @h @h' = pure t' | otherwise = throwIO (Internal.ErrUnionsTableTypeMismatch 0 i) {------------------------------------------------------------------------------- diff --git a/src/Database/LSMTree/Normal.hs b/src/Database/LSMTree/Normal.hs index 37e67c566..b214b4161 100644 --- a/src/Database/LSMTree/Normal.hs +++ b/src/Database/LSMTree/Normal.hs @@ -118,6 +118,7 @@ import Control.Monad import Control.Monad.Class.MonadThrow import Data.Bifunctor (Bifunctor (..)) import Data.Kind (Type) +import Data.List.NonEmpty (NonEmpty (..)) import Data.Typeable (Typeable, eqT, type (:~:) (Refl)) import qualified Data.Vector as V import Database.LSMTree.Common (BlobRef (BlobRef), IOLike, Range (..), @@ -790,27 +791,25 @@ union :: forall m k v b. => Table m k v b -> Table m k v b -> m (Table m k v b) -union t1 t2 = unions $ V.fromList [t1, t2] +union t1 t2 = unions $ t1 :| [t2] -{-# SPECIALISE unions :: V.Vector (Table IO k v b) -> IO (Table IO k v b) #-} +{-# SPECIALISE unions :: NonEmpty (Table IO k v b) -> IO (Table IO k v b) #-} -- | Like 'union', but for @n@ tables. -- -- A good mental model of this operation is @'Data.Map.Lazy.unions'@ on -- @'Data.Map.Lazy.Map' k v@. --- --- Exceptions: --- --- * Unioning 0 tables is an exception. +{-# SPECIALISE unions :: + NonEmpty (Table IO k v b) + -> IO (Table IO k v b) #-} unions :: forall m k v b. IOLike m - => V.Vector (Table m k v b) + => NonEmpty (Table m k v b) -> m (Table m k v b) -unions ts0 - | Just (Internal.NormalTable (t' :: Internal.Table m h), ts) - <- V.uncons ts0 - = do ts' <- V.imapM (checkTableType (proxy# @h)) ts - Internal.NormalTable <$> Internal.unions (t' `V.cons` ts') - | otherwise = throwIO Internal.ErrUnionsZeroTables +unions (t :| ts) = + case t of + Internal.NormalTable (t' :: Internal.Table m h) -> do + ts' <- zipWithM (checkTableType (proxy# @h)) [1..] ts + Internal.NormalTable <$> Internal.unions (t' :| ts') where checkTableType :: forall h. Typeable h @@ -818,6 +817,6 @@ unions ts0 -> Int -> Table m k v b -> m (Internal.Table m h) - checkTableType _ i (Internal.NormalTable (t :: Internal.Table m h')) - | Just Refl <- eqT @h @h' = pure t + checkTableType _ i (Internal.NormalTable (t' :: Internal.Table m h')) + | Just Refl <- eqT @h @h' = pure t' | otherwise = throwIO (Internal.ErrUnionsTableTypeMismatch 0 i) diff --git a/test/Database/LSMTree/Class.hs b/test/Database/LSMTree/Class.hs index e0b44ee63..4944aaf33 100644 --- a/test/Database/LSMTree/Class.hs +++ b/test/Database/LSMTree/Class.hs @@ -16,6 +16,7 @@ module Database.LSMTree.Class ( import Control.Monad.Class.MonadThrow (MonadThrow (..)) import Data.Kind (Constraint, Type) +import Data.List.NonEmpty (NonEmpty) import Data.Typeable (Proxy (..)) import qualified Data.Vector as V import Database.LSMTree as Types (LookupResult (..), QueryResult (..), @@ -167,7 +168,7 @@ class (IsSession (Session h)) => IsTable h where ( IOLike m , C k v b ) - => V.Vector (h m k v b) + => NonEmpty (h m k v b) -> m (h m k v b) withTableNew :: forall h m k v b a. @@ -204,7 +205,7 @@ withTableUnion table1 table2 = bracket (table1 `union` table2) close withTableUnions :: forall h m k v b a. (IOLike m, IsTable h, C k v b) - => V.Vector (h m k v b) + => NonEmpty (h m k v b) -> (h m k v b -> m a) -> m a withTableUnions tables = bracket (unions tables) close diff --git a/test/Database/LSMTree/Model/IO.hs b/test/Database/LSMTree/Model/IO.hs index a207f26a0..8ab6d75db 100644 --- a/test/Database/LSMTree/Model/IO.hs +++ b/test/Database/LSMTree/Model/IO.hs @@ -17,7 +17,7 @@ module Database.LSMTree.Model.IO ( import Control.Concurrent.Class.MonadSTM.Strict import Control.Exception (Exception) import Control.Monad.Class.MonadThrow (MonadThrow (..)) -import qualified Data.Vector as V +import qualified Data.List.NonEmpty as NE import qualified Database.LSMTree.Class as Class import Database.LSMTree.Model.Session (TableConfig (..)) import qualified Database.LSMTree.Model.Session as Model @@ -93,6 +93,6 @@ instance Class.IsTable Table where Table s1 <$> runInOpenSession s1 (Model.union Model.getResolve t1 t2) unions ts = - Table s <$> runInOpenSession s (Model.unions Model.getResolve (V.map thTable ts)) + Table s <$> runInOpenSession s (Model.unions Model.getResolve (fmap thTable ts)) where - Table s _ = V.head ts + Table s _ = NE.head ts diff --git a/test/Database/LSMTree/Model/Session.hs b/test/Database/LSMTree/Model/Session.hs index ed9df828c..50f4c6368 100644 --- a/test/Database/LSMTree/Model/Session.hs +++ b/test/Database/LSMTree/Model/Session.hs @@ -76,7 +76,7 @@ module Database.LSMTree.Model.Session ( , unions ) where -import Control.Monad (when) +import Control.Monad (forM, when) import Control.Monad.Except (ExceptT (..), MonadError (..), runExceptT) import Control.Monad.Identity (Identity (runIdentity)) @@ -84,6 +84,7 @@ import Control.Monad.State.Strict (MonadState (..), StateT (..), gets, modify) import Data.Data import Data.Dynamic +import Data.List.NonEmpty (NonEmpty (..)) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Maybe (fromJust) @@ -638,14 +639,10 @@ unions :: , C k v b ) => ResolveSerialisedValue v - -> V.Vector (Table k v b) + -> NonEmpty (Table k v b) -> m (Table k v b) -unions r tables - | n == 0 = throwError ErrUnionsZeroTables - | otherwise = do - tables' <- V.forM tables $ \table -> do - (_, table') <- guardTableIsOpen table - pure table' - newTableWith TableConfig $ Model.unions r tables' - where - n = V.length tables +unions r tables = do + tables' <- forM tables $ \table -> do + (_, table') <- guardTableIsOpen table + pure table' + newTableWith TableConfig $ Model.unions r tables' diff --git a/test/Database/LSMTree/Model/Table.hs b/test/Database/LSMTree/Model/Table.hs index 74d9d11ca..f04ec43e0 100644 --- a/test/Database/LSMTree/Model/Table.hs +++ b/test/Database/LSMTree/Model/Table.hs @@ -47,6 +47,7 @@ import qualified Crypto.Hash.SHA256 as SHA256 import Data.Bifunctor import qualified Data.ByteString as BS import Data.Kind (Type) +import Data.List.NonEmpty (NonEmpty (..)) import Data.Map (Map) import qualified Data.Map.Range as Map.R import qualified Data.Map.Strict as Map @@ -317,7 +318,7 @@ union r (Table xs) (Table ys) = -- | Like 'union', but for @n@ tables. unions :: ResolveSerialisedValue v - -> V.Vector (Table k v b) + -> NonEmpty (Table k v b) -> Table k v b unions r tables = - Table (Map.unionsWith (resolveValueAndBlob r) (V.map values tables)) + Table (Map.unionsWith (resolveValueAndBlob r) (fmap values tables)) diff --git a/test/Test/Database/LSMTree/StateMachine.hs b/test/Test/Database/LSMTree/StateMachine.hs index eff10bce0..cac7fd0b0 100644 --- a/test/Test/Database/LSMTree/StateMachine.hs +++ b/test/Test/Database/LSMTree/StateMachine.hs @@ -77,6 +77,8 @@ import Data.Bifunctor (Bifunctor (..)) import Data.Constraint (Dict (..)) import Data.Either (partitionEithers) import Data.Kind (Type) +import Data.List.NonEmpty (NonEmpty (..)) +import qualified Data.List.NonEmpty as NE import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Maybe (catMaybes, fromJust, fromMaybe) @@ -491,7 +493,7 @@ instance ( Show (Class.TableConfig h) -> Var h (WrapTable h IO k v b) -> Act h (WrapTable h IO k v b) Unions :: C k v b - => V.Vector (Var h (WrapTable h IO k v b)) + => NonEmpty (Var h (WrapTable h IO k v b)) -> Act h (WrapTable h IO k v b) initialState = Lockstep.Defaults.initialState initModelState @@ -687,7 +689,7 @@ instance ( Eq (Class.TableConfig h) ListSnapshots -> [] Duplicate tableVar -> [SomeGVar tableVar] Union table1Var table2Var -> [SomeGVar table1Var, SomeGVar table2Var] - Unions tableVars -> [SomeGVar tableVar | tableVar <- V.toList tableVars] + Unions tableVars -> [SomeGVar tableVar | tableVar <- NE.toList tableVars] arbitraryWithVars :: ModelVarContext (ModelState h) @@ -978,7 +980,7 @@ runModel lookUp = \case . Model.runModelM (Model.union Model.getResolve (getTable $ lookUp table1Var) (getTable $ lookUp table2Var)) Unions tableVars -> wrap MTable - . Model.runModelM (Model.unions Model.getResolve (V.map (getTable . lookUp) tableVars)) + . Model.runModelM (Model.unions Model.getResolve (fmap (getTable . lookUp) tableVars)) where getTable :: ModelValue (ModelState h) (WrapTable h IO k v b) @@ -1058,7 +1060,7 @@ runIO action lookUp = ReaderT $ \(session, handler) -> do Union table1Var table2Var -> catchErr handler $ WrapTable <$> Class.union (unwrapTable $ lookUp' table1Var) (unwrapTable $ lookUp' table2Var) Unions tableVars -> catchErr handler $ - WrapTable <$> Class.unions (V.map (unwrapTable . lookUp') tableVars) + WrapTable <$> Class.unions (fmap (unwrapTable . lookUp') tableVars) lookUp' :: Var h x -> Realized IO x lookUp' = lookUpGVar (Proxy @(RealMonad h IO)) lookUp @@ -1114,7 +1116,7 @@ runIOSim action lookUp = ReaderT $ \(session, handler) -> Union table1Var table2Var -> catchErr handler $ WrapTable <$> Class.union (unwrapTable $ lookUp' table1Var) (unwrapTable $ lookUp' table2Var) Unions tableVars -> catchErr handler $ - WrapTable <$> Class.unions (V.map (unwrapTable . lookUp') tableVars) + WrapTable <$> Class.unions (fmap (unwrapTable . lookUp') tableVars) lookUp' :: Var h x -> Realized (IOSim s) x lookUp' = lookUpGVar (Proxy @(RealMonad h (IOSim s))) lookUp @@ -1324,12 +1326,12 @@ arbitraryActionWithVars _ label ctx (ModelState st _stats) = -- Unit tests for 0-way and 1-way unions are included in the UnitTests -- module. n-way unions for n>3 lead to larger unions, which are less likely -- to be finished before the end of an action sequence. - genUnionsTableVars :: Gen (V.Vector (Var h (WrapTable h IO k v b))) + genUnionsTableVars :: Gen (NonEmpty (Var h (WrapTable h IO k v b))) genUnionsTableVars = do tableVar1 <- genTableVar tableVar2 <- genTableVar mtableVar3 <- QC.liftArbitrary genTableVar - pure $ V.fromList $ catMaybes [ + pure $ NE.fromList $ catMaybes [ Just tableVar1, Just tableVar2, mtableVar3 ] diff --git a/test/Test/Database/LSMTree/UnitTests.hs b/test/Test/Database/LSMTree/UnitTests.hs index f4c121361..604c892a9 100644 --- a/test/Test/Database/LSMTree/UnitTests.hs +++ b/test/Test/Database/LSMTree/UnitTests.hs @@ -5,10 +5,10 @@ module Test.Database.LSMTree.UnitTests (tests) where -import Control.Monad (void) import Control.Tracer (nullTracer) import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as BS +import Data.List.NonEmpty (NonEmpty (..)) import qualified Data.Map as Map import qualified Data.Vector as V import Data.Word @@ -36,7 +36,6 @@ tests = -- Properties - , testProperty "prop_unions_0" $ prop_unions_0 , testProperty "prop_unions_1" $ -- TODO: enable once unions are implemented QC.expectFailure prop_unions_1 @@ -155,13 +154,6 @@ unit_snapshots = snap1 = "table1" snap2 = "table2" --- | Unions of 0 tables fail with an exception -prop_unions_0 :: Property -prop_unions_0 = - QC.once $ QC.ioProperty $ - assertException ErrUnionsZeroTables $ - void $ unions @_ @Key1 @Value1 @Blob1 V.empty - -- | Unions of 1 table are equivalent to duplicate prop_unions_1 :: Property prop_unions_1 = @@ -171,7 +163,7 @@ prop_unions_1 = withTable @_ @Key1 @Value1 @Blob1 sess defaultTableConfig $ \table -> do inserts table [(Key1 17, Value1 42, Nothing)] - bracket (unions $ V.singleton table) close $ \table' -> + bracket (unions $ table :| []) close $ \table' -> bracket (duplicate table) close $ \table'' -> do inserts table [(Key1 17, Value1 43, Nothing)] inserts table [(Key1 17, Value1 44, Nothing)] From cb6827e935604b0e04c0b6c487ed32e8b263f328 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Mon, 16 Dec 2024 17:25:26 +0100 Subject: [PATCH 5/5] Trace table identifiers in union trace messages --- src/Database/LSMTree/Internal.hs | 34 +++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/Database/LSMTree/Internal.hs b/src/Database/LSMTree/Internal.hs index 90386a715..bcdfa0ca7 100644 --- a/src/Database/LSMTree/Internal.hs +++ b/src/Database/LSMTree/Internal.hs @@ -262,7 +262,7 @@ data LSMTreeTrace = CursorTrace -- Unions | TraceUnions - Int -- ^ Number of unioned tables + (NonEmpty Word64) -- ^ Table identifiers deriving stock Show data TableTrace = @@ -553,6 +553,12 @@ data Table m h = Table { , tableState :: !(RWVar m (TableState m h)) , tableArenaManager :: !(ArenaManager (PrimState m)) , tableTracer :: !(Tracer m TableTrace) + -- | Session-unique identifier for this table. + -- + -- INVARIANT: a table's identifier is never changed during the lifetime of + -- the table. + , tableId :: !Word64 + -- === Session-inherited -- | The session that this table belongs to. @@ -563,8 +569,8 @@ data Table m h = Table { } instance NFData (Table m h) where - rnf (Table a b c d e) = - rnf a `seq` rnf b `seq` rnf c `seq` rwhnf d `seq` rwhnf e + rnf (Table a b c d e f) = + rnf a `seq` rnf b `seq` rnf c `seq` rwhnf d `seq` rnf e`seq` rwhnf f -- | A table may assume that its corresponding session is still open as -- long as the table is open. A session's global resources, and therefore @@ -583,8 +589,6 @@ data TableEnv m h = TableEnv { -- === Table-specific - -- | Session-unique identifier for this table. - , tableId :: !Word64 -- | All of the state being in a single 'StrictMVar' is a relatively simple -- solution, but there could be more concurrency. For example, while inserts -- are in progress, lookups could still look at the old state without @@ -615,12 +619,12 @@ tableSessionUniqCounter :: TableEnv m h -> UniqCounter m tableSessionUniqCounter = sessionUniqCounter . tableSessionEnv {-# INLINE tableSessionUntrackTable #-} -{-# SPECIALISE tableSessionUntrackTable :: TableEnv IO h -> IO () #-} +{-# SPECIALISE tableSessionUntrackTable :: Word64 -> TableEnv IO h -> IO () #-} -- | Open tables are tracked in the corresponding session, so when a table is -- closed it should become untracked (forgotten). -tableSessionUntrackTable :: MonadMVar m => TableEnv m h -> m () -tableSessionUntrackTable thEnv = - modifyMVar_ (sessionOpenTables (tableSessionEnv thEnv)) $ pure . Map.delete (tableId thEnv) +tableSessionUntrackTable :: MonadMVar m => Word64 -> TableEnv m h -> m () +tableSessionUntrackTable tableId thEnv = + modifyMVar_ (sessionOpenTables (tableSessionEnv thEnv)) $ pure . Map.delete tableId -- | 'withOpenTable' ensures that the table stays open for the duration of the -- provided continuation. @@ -718,10 +722,10 @@ newWith reg sesh seshEnv conf !am !tc = do contentVar <- RW.new $ tc tableVar <- RW.new $ TableOpen $ TableEnv { tableSessionEnv = seshEnv - , tableId = uniqueToWord64 tableId , tableContent = contentVar } - let !t = Table conf tableVar am tr sesh + let !tid = uniqueToWord64 tableId + !t = Table conf tableVar am tr tid sesh -- Track the current table freeTemp reg $ modifyMVar_ (sessionOpenTables seshEnv) $ pure . Map.insert (uniqueToWord64 tableId) t @@ -743,7 +747,7 @@ close t = do -- Since we have a write lock on the table state, we know that we are the -- only thread currently closing the table. We can safely make the session -- forget about this table. - freeTemp reg (tableSessionUntrackTable thEnv) + freeTemp reg (tableSessionUntrackTable (tableId t) thEnv) RW.withWriteAccess_ (tableContent thEnv) $ \tc -> do releaseTableContent reg tc pure tc @@ -969,7 +973,7 @@ newCursor !offsetKey t = withOpenTable t $ \thEnv -> do cursorId <- uniqueToWord64 <$> incrUniqCounter (sessionUniqCounter cursorSessionEnv) let cursorTracer = TraceCursor cursorId `contramap` sessionTracer cursorSession - traceWith cursorTracer $ TraceCreateCursor (tableId thEnv) + traceWith cursorTracer $ TraceCreateCursor (tableId t) -- We acquire a read-lock on the session open-state to prevent races, see -- 'sessionOpenTables'. @@ -1336,7 +1340,7 @@ unions ts = do Left (i, j) -> throwIO $ ErrUnionsSessionMismatch i j Right sesh -> pure sesh - traceWith (sessionTracer sesh) $ TraceUnions n + traceWith (sessionTracer sesh) $ TraceUnions (NE.map tableId ts) -- TODO: Do we really need the configs to match exactly? It's okay as a -- requirement for now, but we might want to revisit it. Some settings don't @@ -1377,8 +1381,6 @@ unions ts = do content pure (seshState, t) - where - n = NE.length ts -- | Like 'matchBy', but the match function is @(==)@. match :: Eq a => NonEmpty a -> Either (Int, Int) a