diff --git a/bench/micro/Bench/Database/LSMTree/Internal/IndexCompact.hs b/bench/micro/Bench/Database/LSMTree/Internal/IndexCompact.hs index aa266c48e..25a788488 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/IndexCompact.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/IndexCompact.hs @@ -22,6 +22,7 @@ import qualified Data.Vector.Unboxed.Mutable as VUM import Data.Word import Database.LSMTree.Extras import Database.LSMTree.Extras.Generators +import Database.LSMTree.Extras.Index import Database.LSMTree.Extras.Random import Database.LSMTree.Extras.UTxO import Database.LSMTree.Internal.IndexCompact diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 74d1e84cc..44dffea61 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -300,6 +300,7 @@ library extras exposed-modules: Database.LSMTree.Extras Database.LSMTree.Extras.Generators + Database.LSMTree.Extras.Index Database.LSMTree.Extras.NoThunks Database.LSMTree.Extras.Orphans Database.LSMTree.Extras.Random diff --git a/src-extras/Database/LSMTree/Extras/Generators.hs b/src-extras/Database/LSMTree/Extras/Generators.hs index ba6351b01..2a7009181 100644 --- a/src-extras/Database/LSMTree/Extras/Generators.hs +++ b/src-extras/Database/LSMTree/Extras/Generators.hs @@ -48,10 +48,10 @@ import qualified Data.Vector.Primitive as VP import Data.Word import Database.LSMTree.Common (Range (..)) import Database.LSMTree.Extras +import Database.LSMTree.Extras.Index (Append (..)) import Database.LSMTree.Extras.Orphans () import Database.LSMTree.Internal.BlobRef (BlobSpan (..)) import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..)) -import Database.LSMTree.Internal.IndexCompactAcc (Append (..)) import qualified Database.LSMTree.Internal.Merge as Merge import Database.LSMTree.Internal.Page (PageNo (..)) import Database.LSMTree.Internal.RawBytes as RB diff --git a/src-extras/Database/LSMTree/Extras/Index.hs b/src-extras/Database/LSMTree/Extras/Index.hs new file mode 100644 index 000000000..9045d2b15 --- /dev/null +++ b/src-extras/Database/LSMTree/Extras/Index.hs @@ -0,0 +1,72 @@ +module Database.LSMTree.Extras.Index +( + Append (AppendSinglePage, AppendMultiPage), + append, + append' +) +where + +import Control.DeepSeq (NFData (rnf)) +import Control.Monad.ST.Strict (ST) +import Data.Foldable (toList) +import Data.Word (Word32) +import Database.LSMTree.Internal.Chunk (Chunk) +import Database.LSMTree.Internal.IndexCompactAcc (IndexCompactAcc) +import qualified Database.LSMTree.Internal.IndexCompactAcc as IndexCompact + (appendMulti, appendSingle) +import Database.LSMTree.Internal.IndexOrdinaryAcc (IndexOrdinaryAcc) +import qualified Database.LSMTree.Internal.IndexOrdinaryAcc as IndexOrdinary + (appendMulti, appendSingle) +import Database.LSMTree.Internal.Serialise (SerialisedKey) + +-- | Instruction for appending pages, to be used in conjunction with indexes. +data Append + = {-| + Append a single page that fully comprises one or more key–value pairs. + -} + AppendSinglePage + SerialisedKey -- ^ Minimum key + SerialisedKey -- ^ Maximum key + | {-| + Append multiple pages that together comprise a single key–value pair. + -} + AppendMultiPage + SerialisedKey -- ^ Sole key + Word32 -- ^ Number of overflow pages + +instance NFData Append where + + rnf (AppendSinglePage minKey maxKey) + = rnf minKey `seq` rnf maxKey + rnf (AppendMultiPage key overflowPageCount) + = rnf key `seq` rnf overflowPageCount + +{-| + Add information about appended pages to an index under incremental + construction. + + Internally, 'append' uses 'IndexCompact.appendSingle' and + 'IndexCompact.appendMulti', and the usage restrictions of those functions + apply also here. +-} +append :: Append -> IndexCompactAcc s -> ST s [Chunk] +append instruction indexAcc = case instruction of + AppendSinglePage minKey maxKey + -> toList <$> IndexCompact.appendSingle (minKey, maxKey) indexAcc + AppendMultiPage key overflowPageCount + -> IndexCompact.appendMulti (key, overflowPageCount) indexAcc + +{-| + A variant of 'append' for ordinary indexes, which is only used temporarily + until there is a type class of index types. + + Internally, 'append'' uses 'IndexOrdinary.appendSingle' and + 'IndexOrdinary.appendMulti', and the usage restrictions of those functions + apply also here. +-} +append' :: Append -> IndexOrdinaryAcc s -> ST s [Chunk] +append' instruction indexAcc = case instruction of + AppendSinglePage minKey maxKey + -> toList <$> IndexOrdinary.appendSingle (minKey, maxKey) indexAcc + AppendMultiPage key overflowPageCount + -> IndexOrdinary.appendMulti (key, overflowPageCount) indexAcc diff --git a/src/Database/LSMTree/Internal/IndexCompactAcc.hs b/src/Database/LSMTree/Internal/IndexCompactAcc.hs index 3ba0a1ae7..22960015a 100644 --- a/src/Database/LSMTree/Internal/IndexCompactAcc.hs +++ b/src/Database/LSMTree/Internal/IndexCompactAcc.hs @@ -14,8 +14,6 @@ module Database.LSMTree.Internal.IndexCompactAcc ( -- $construction-invariants IndexCompactAcc (..) , new - , Append (..) - , append , appendSingle , appendMulti , unsafeEnd @@ -30,11 +28,9 @@ module Database.LSMTree.Internal.IndexCompactAcc ( import Control.Exception (assert) #endif -import Control.DeepSeq (NFData (..)) import Control.Monad (when) import Control.Monad.ST.Strict import Data.Bit hiding (flipBit) -import Data.Foldable (toList) import Data.List.NonEmpty (NonEmpty) import qualified Data.List.NonEmpty as NE import Data.Map.Range (Bound (..)) @@ -126,26 +122,6 @@ newPinnedMVec64 lenWords = do setByteArray mba 0 lenWords (0 :: Word64) return (VUM.MV_Word64 (VPM.MVector 0 lenWords mba)) --- | Min\/max key-info for pages -data Append = - -- | One or more keys are in this page, and their values fit within a single - -- page. - AppendSinglePage SerialisedKey SerialisedKey - -- | There is only one key in this page, and it's value does not fit within - -- a single page. - | AppendMultiPage SerialisedKey Word32 -- ^ Number of overflow pages - -instance NFData Append where - rnf (AppendSinglePage kmin kmax) = rnf kmin `seq` rnf kmax - rnf (AppendMultiPage k nOverflow) = rnf k `seq` rnf nOverflow - --- | Append a new page entry to a mutable compact index. --- --- INVARIANTS: see [construction invariants](#construction-invariants). -append :: Append -> IndexCompactAcc s -> ST s [Chunk] -append (AppendSinglePage kmin kmax) ica = toList <$> appendSingle (kmin, kmax) ica -append (AppendMultiPage k n) ica = appendMulti (k, n) ica - -- | Append a single page to a mutable compact index. -- -- INVARIANTS: see [construction invariants](#construction-invariants). diff --git a/src/Database/LSMTree/Internal/IndexOrdinaryAcc.hs b/src/Database/LSMTree/Internal/IndexOrdinaryAcc.hs index ee7f41c7a..28c447dce 100644 --- a/src/Database/LSMTree/Internal/IndexOrdinaryAcc.hs +++ b/src/Database/LSMTree/Internal/IndexOrdinaryAcc.hs @@ -8,7 +8,8 @@ module Database.LSMTree.Internal.IndexOrdinaryAcc ( IndexOrdinaryAcc, new, - append, + appendSingle, + appendMulti, unsafeEnd ) where @@ -17,12 +18,11 @@ import Prelude hiding (take) import Control.Exception (assert) import Control.Monad.ST.Strict (ST) +import Data.Maybe (maybeToList) import qualified Data.Vector.Primitive as Primitive (Vector, length) -import Data.Word (Word16, Word8) +import Data.Word (Word16, Word32, Word8) import Database.LSMTree.Internal.Chunk (Baler, Chunk, createBaler, feedBaler, unsafeEndBaler) -import Database.LSMTree.Internal.IndexCompactAcc - (Append (AppendMultiPage, AppendSinglePage)) import Database.LSMTree.Internal.IndexOrdinary (IndexOrdinary (IndexOrdinary)) import Database.LSMTree.Internal.Serialise @@ -51,41 +51,58 @@ new initialKeyBufferSize minChunkSize = IndexOrdinaryAcc <$> Growing.new initialKeyBufferSize <*> createBaler minChunkSize +-- Yields the serialisation of an element of a key list. +keyListElem :: SerialisedKey -> [Primitive.Vector Word8] +keyListElem (SerialisedKey' keyBytes) = [keySizeBytes, keyBytes] where + + keySize :: Int + !keySize = Primitive.length keyBytes + + keySizeAsWord16 :: Word16 + !keySizeAsWord16 = assert (keySize <= fromIntegral (maxBound :: Word16)) $ + fromIntegral keySize + + keySizeBytes :: Primitive.Vector Word8 + !keySizeBytes = byteVectorFromPrim keySizeAsWord16 + {-| - Appends keys to the key list of an index and outputs newly available chunks - of the serialised key list. + Adds information about a single page that fully comprises one or more + key–value pairs to an index and outputs newly available chunks of the + serialised key list. - __Warning:__ Appending keys whose length cannot be represented by a 16-bit - word may result in a corrupted serialised key list. + __Warning:__ Using keys whose length cannot be represented by a 16-bit word + may result in a corrupted serialised key list. -} -append :: Append -> IndexOrdinaryAcc s -> ST s (Maybe Chunk) -append instruction (IndexOrdinaryAcc lastKeys baler) - = case instruction of - AppendSinglePage _ key -> do - Growing.append lastKeys 1 key - feedBaler (keyListElem key) baler - AppendMultiPage key overflowPageCount -> do - let - - pageCount :: Int - !pageCount = succ (fromIntegral overflowPageCount) - - Growing.append lastKeys pageCount key - feedBaler (concat (replicate pageCount (keyListElem key))) baler - where +appendSingle :: (SerialisedKey, SerialisedKey) + -> IndexOrdinaryAcc s + -> ST s (Maybe Chunk) +appendSingle (_, key) (IndexOrdinaryAcc lastKeys baler) + = do + Growing.append lastKeys 1 key + feedBaler (keyListElem key) baler - keyListElem :: SerialisedKey -> [Primitive.Vector Word8] - keyListElem (SerialisedKey' keyBytes) = [keySizeBytes, keyBytes] where +{-| + Adds information about multiple pages that together comprise a single + key–value pair to an index and outputs newly available chunks of the + serialised key list. - keySize :: Int - !keySize = Primitive.length keyBytes + __Warning:__ Using keys whose length cannot be represented by a 16-bit word + may result in a corrupted serialised key list. +-} +appendMulti :: (SerialisedKey, Word32) + -> IndexOrdinaryAcc s + -> ST s [Chunk] +appendMulti (key, overflowPageCount) (IndexOrdinaryAcc lastKeys baler) + = do + Growing.append lastKeys pageCount key + maybeToList <$> feedBaler keyListElems baler + where - keySizeAsWord16 :: Word16 - !keySizeAsWord16 = assert (keySize <= fromIntegral (maxBound :: Word16)) $ - fromIntegral keySize + pageCount :: Int + !pageCount = succ (fromIntegral overflowPageCount) - keySizeBytes :: Primitive.Vector Word8 - !keySizeBytes = byteVectorFromPrim keySizeAsWord16 + keyListElems :: [Primitive.Vector Word8] + keyListElems = concat (replicate pageCount (keyListElem key)) {-| Returns the constructed index, along with a final chunk in case the diff --git a/test/Test/Database/LSMTree/Internal/IndexCompact.hs b/test/Test/Database/LSMTree/Internal/IndexCompact.hs index 2fff83ff9..5eb2ffba8 100644 --- a/test/Test/Database/LSMTree/Internal/IndexCompact.hs +++ b/test/Test/Database/LSMTree/Internal/IndexCompact.hs @@ -28,6 +28,7 @@ import qualified Data.Vector.Unboxed.Base as VU import Data.Word import Database.LSMTree.Extras import Database.LSMTree.Extras.Generators as Gen +import Database.LSMTree.Extras.Index as Cons (Append (..), append) import Database.LSMTree.Internal.BitMath import Database.LSMTree.Internal.Chunk as Chunk (toByteString) import Database.LSMTree.Internal.Entry (NumEntries (..)) diff --git a/test/Test/Database/LSMTree/Internal/IndexOrdinary.hs b/test/Test/Database/LSMTree/Internal/IndexOrdinary.hs index 0ce43e1a1..9a5173d52 100644 --- a/test/Test/Database/LSMTree/Internal/IndexOrdinary.hs +++ b/test/Test/Database/LSMTree/Internal/IndexOrdinary.hs @@ -16,7 +16,7 @@ import qualified Data.ByteString.Short as ShortByteString (length, pack) import Data.Either (isLeft) import Data.List (genericReplicate) import qualified Data.List as List (tail) -import Data.Maybe (catMaybes) +import Data.Maybe (maybeToList) import Data.Primitive.ByteArray (ByteArray (ByteArray), ByteArray#) import Data.Vector (Vector, all, fromList, head, last, length, notElem, splitAt, tail, takeWhile, toList, (!)) @@ -25,15 +25,14 @@ import qualified Data.Vector.Primitive as Primitive (Vector (Vector), concat, import Data.Word (Word16, Word32, Word64, Word8) import Database.LSMTree.Extras.Generators (LogicalPageSummaries, toAppends) +import Database.LSMTree.Extras.Index + (Append (AppendMultiPage, AppendSinglePage), append') import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteVector) import Database.LSMTree.Internal.Entry (NumEntries (NumEntries)) -import Database.LSMTree.Internal.IndexCompactAcc - (Append (AppendMultiPage, AppendSinglePage)) import Database.LSMTree.Internal.IndexOrdinary (IndexOrdinary (IndexOrdinary), fromSBS, search, toLastKeys) -import Database.LSMTree.Internal.IndexOrdinaryAcc (append, new, - unsafeEnd) +import Database.LSMTree.Internal.IndexOrdinaryAcc (new, unsafeEnd) import Database.LSMTree.Internal.Page (PageNo (PageNo), PageSpan (PageSpan)) import Database.LSMTree.Internal.Serialise @@ -259,14 +258,14 @@ lastKeysBlockFromAppends appends = lastKeysBlock where incrementalConstruction :: [Append] -> (IndexOrdinary, Primitive.Vector Word8) incrementalConstruction appends = runST $ do acc <- new initialKeyBufferSize minChunkSize - commonChunks <- mapM (flip append acc) appends + commonChunks <- concat <$> mapM (flip append' acc) appends (remnant, unserialised) <- unsafeEnd acc let serialised :: Primitive.Vector Word8 - serialised = Primitive.concat $ - map Chunk.toByteVector $ - catMaybes (commonChunks ++ [remnant]) + serialised = Primitive.concat $ + map Chunk.toByteVector $ + commonChunks ++ maybeToList remnant return (unserialised, serialised) where