Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import qualified Data.Vector.Unboxed.Mutable as VUM
import Data.Word
import Database.LSMTree.Extras
import Database.LSMTree.Extras.Generators
import Database.LSMTree.Extras.Index
import Database.LSMTree.Extras.Random
import Database.LSMTree.Extras.UTxO
import Database.LSMTree.Internal.IndexCompact
Expand Down
1 change: 1 addition & 0 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ library extras
exposed-modules:
Database.LSMTree.Extras
Database.LSMTree.Extras.Generators
Database.LSMTree.Extras.Index
Database.LSMTree.Extras.NoThunks
Database.LSMTree.Extras.Orphans
Database.LSMTree.Extras.Random
Expand Down
2 changes: 1 addition & 1 deletion src-extras/Database/LSMTree/Extras/Generators.hs
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ import qualified Data.Vector.Primitive as VP
import Data.Word
import Database.LSMTree.Common (Range (..))
import Database.LSMTree.Extras
import Database.LSMTree.Extras.Index (Append (..))
import Database.LSMTree.Extras.Orphans ()
import Database.LSMTree.Internal.BlobRef (BlobSpan (..))
import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..))
import Database.LSMTree.Internal.IndexCompactAcc (Append (..))
import qualified Database.LSMTree.Internal.Merge as Merge
import Database.LSMTree.Internal.Page (PageNo (..))
import Database.LSMTree.Internal.RawBytes as RB
Expand Down
72 changes: 72 additions & 0 deletions src-extras/Database/LSMTree/Extras/Index.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
module Database.LSMTree.Extras.Index
(
Append (AppendSinglePage, AppendMultiPage),
append,
append'
)
where

import Control.DeepSeq (NFData (rnf))
import Control.Monad.ST.Strict (ST)
import Data.Foldable (toList)
import Data.Word (Word32)
import Database.LSMTree.Internal.Chunk (Chunk)
import Database.LSMTree.Internal.IndexCompactAcc (IndexCompactAcc)
import qualified Database.LSMTree.Internal.IndexCompactAcc as IndexCompact
(appendMulti, appendSingle)
import Database.LSMTree.Internal.IndexOrdinaryAcc (IndexOrdinaryAcc)
import qualified Database.LSMTree.Internal.IndexOrdinaryAcc as IndexOrdinary
(appendMulti, appendSingle)
import Database.LSMTree.Internal.Serialise (SerialisedKey)

-- | Instruction for appending pages, to be used in conjunction with indexes.
data Append
= {-|
Append a single page that fully comprises one or more key–value pairs.
-}
AppendSinglePage
SerialisedKey -- ^ Minimum key
SerialisedKey -- ^ Maximum key
| {-|
Append multiple pages that together comprise a single key–value pair.
-}
AppendMultiPage
SerialisedKey -- ^ Sole key
Word32 -- ^ Number of overflow pages

instance NFData Append where

rnf (AppendSinglePage minKey maxKey)
= rnf minKey `seq` rnf maxKey
rnf (AppendMultiPage key overflowPageCount)
= rnf key `seq` rnf overflowPageCount

{-|
Add information about appended pages to an index under incremental
construction.

Internally, 'append' uses 'IndexCompact.appendSingle' and
'IndexCompact.appendMulti', and the usage restrictions of those functions
apply also here.
-}
append :: Append -> IndexCompactAcc s -> ST s [Chunk]
append instruction indexAcc = case instruction of
AppendSinglePage minKey maxKey
-> toList <$> IndexCompact.appendSingle (minKey, maxKey) indexAcc
AppendMultiPage key overflowPageCount
-> IndexCompact.appendMulti (key, overflowPageCount) indexAcc

{-|
A variant of 'append' for ordinary indexes, which is only used temporarily
until there is a type class of index types.

Internally, 'append'' uses 'IndexOrdinary.appendSingle' and
'IndexOrdinary.appendMulti', and the usage restrictions of those functions
apply also here.
-}
append' :: Append -> IndexOrdinaryAcc s -> ST s [Chunk]
append' instruction indexAcc = case instruction of
AppendSinglePage minKey maxKey
-> toList <$> IndexOrdinary.appendSingle (minKey, maxKey) indexAcc
AppendMultiPage key overflowPageCount
-> IndexOrdinary.appendMulti (key, overflowPageCount) indexAcc
24 changes: 0 additions & 24 deletions src/Database/LSMTree/Internal/IndexCompactAcc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ module Database.LSMTree.Internal.IndexCompactAcc (
-- $construction-invariants
IndexCompactAcc (..)
, new
, Append (..)
, append
, appendSingle
, appendMulti
, unsafeEnd
Expand All @@ -30,11 +28,9 @@ module Database.LSMTree.Internal.IndexCompactAcc (
import Control.Exception (assert)
#endif

import Control.DeepSeq (NFData (..))
import Control.Monad (when)
import Control.Monad.ST.Strict
import Data.Bit hiding (flipBit)
import Data.Foldable (toList)
import Data.List.NonEmpty (NonEmpty)
import qualified Data.List.NonEmpty as NE
import Data.Map.Range (Bound (..))
Expand Down Expand Up @@ -126,26 +122,6 @@ newPinnedMVec64 lenWords = do
setByteArray mba 0 lenWords (0 :: Word64)
return (VUM.MV_Word64 (VPM.MVector 0 lenWords mba))

-- | Min\/max key-info for pages
data Append =
-- | One or more keys are in this page, and their values fit within a single
-- page.
AppendSinglePage SerialisedKey SerialisedKey
-- | There is only one key in this page, and it's value does not fit within
-- a single page.
| AppendMultiPage SerialisedKey Word32 -- ^ Number of overflow pages

instance NFData Append where
rnf (AppendSinglePage kmin kmax) = rnf kmin `seq` rnf kmax
rnf (AppendMultiPage k nOverflow) = rnf k `seq` rnf nOverflow

-- | Append a new page entry to a mutable compact index.
--
-- INVARIANTS: see [construction invariants](#construction-invariants).
append :: Append -> IndexCompactAcc s -> ST s [Chunk]
append (AppendSinglePage kmin kmax) ica = toList <$> appendSingle (kmin, kmax) ica
append (AppendMultiPage k n) ica = appendMulti (k, n) ica

Comment on lines -129 to -148
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a small suggestion for PR review ergonomics: when code is moved to a different module and subsequently changed as well, it's hard to see from the diff what the interesting code changes are. My usual approach is to move the code in one commit, and do the interesting changes in a different commit.

Right now we're only talking about a small bit of code, so it's fine and doesn't have to change in this PR, but it might be good to keep in mind for future PRs

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My usual approach is to move the code in one commit, and do the interesting changes in a different commit.

Sounds very reasonable to me. I’ll try to remember this approach and use it in future pull requests of mine. Sadly, Git essentially doesn’t track file movement. Well, its manual page calls it right at the beginning “the stupid content tracker”. 😉

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One scenario where Git is smart enough is when a file is first moved and then its contents are changed. The diff will show that the file is renamed, instead of showing that one file was deleted and one new file was created. And the diff of the renamed file just shows the changed lines, not the unchanged lines. But that's about as smart as it gets in my experience

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and it’s only based on heuristics: as soon as the old and the new content differ too much, Git will treat the old and the new file as unrelated, which according to my experience can happen even if they are morally the same file and you as a human can easily see the similarities of the old and the new content.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, now I wonder whether this mishap might have been because of me not actually moving the file but only moving its contents. Does Git perhaps track inode numbers? 🤔

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found the answer: Git does not track inode numbers. It detects renames based on content only. See the documentation of the -M option in the git-diff(1) man page.

-- | Append a single page to a mutable compact index.
--
-- INVARIANTS: see [construction invariants](#construction-invariants).
Expand Down
81 changes: 49 additions & 32 deletions src/Database/LSMTree/Internal/IndexOrdinaryAcc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ module Database.LSMTree.Internal.IndexOrdinaryAcc
(
IndexOrdinaryAcc,
new,
append,
appendSingle,
appendMulti,
unsafeEnd
)
where
Expand All @@ -17,12 +18,11 @@ import Prelude hiding (take)

import Control.Exception (assert)
import Control.Monad.ST.Strict (ST)
import Data.Maybe (maybeToList)
import qualified Data.Vector.Primitive as Primitive (Vector, length)
import Data.Word (Word16, Word8)
import Data.Word (Word16, Word32, Word8)
import Database.LSMTree.Internal.Chunk (Baler, Chunk, createBaler,
feedBaler, unsafeEndBaler)
import Database.LSMTree.Internal.IndexCompactAcc
(Append (AppendMultiPage, AppendSinglePage))
import Database.LSMTree.Internal.IndexOrdinary
(IndexOrdinary (IndexOrdinary))
import Database.LSMTree.Internal.Serialise
Expand Down Expand Up @@ -51,41 +51,58 @@ new initialKeyBufferSize minChunkSize = IndexOrdinaryAcc <$>
Growing.new initialKeyBufferSize <*>
createBaler minChunkSize

-- Yields the serialisation of an element of a key list.
keyListElem :: SerialisedKey -> [Primitive.Vector Word8]
keyListElem (SerialisedKey' keyBytes) = [keySizeBytes, keyBytes] where

keySize :: Int
!keySize = Primitive.length keyBytes

keySizeAsWord16 :: Word16
!keySizeAsWord16 = assert (keySize <= fromIntegral (maxBound :: Word16)) $
fromIntegral keySize

keySizeBytes :: Primitive.Vector Word8
!keySizeBytes = byteVectorFromPrim keySizeAsWord16

{-|
Appends keys to the key list of an index and outputs newly available chunks
of the serialised key list.
Adds information about a single page that fully comprises one or more
key–value pairs to an index and outputs newly available chunks of the
serialised key list.

__Warning:__ Appending keys whose length cannot be represented by a 16-bit
word may result in a corrupted serialised key list.
__Warning:__ Using keys whose length cannot be represented by a 16-bit word
may result in a corrupted serialised key list.
-}
append :: Append -> IndexOrdinaryAcc s -> ST s (Maybe Chunk)
append instruction (IndexOrdinaryAcc lastKeys baler)
= case instruction of
AppendSinglePage _ key -> do
Growing.append lastKeys 1 key
feedBaler (keyListElem key) baler
AppendMultiPage key overflowPageCount -> do
let

pageCount :: Int
!pageCount = succ (fromIntegral overflowPageCount)

Growing.append lastKeys pageCount key
feedBaler (concat (replicate pageCount (keyListElem key))) baler
where
appendSingle :: (SerialisedKey, SerialisedKey)
-> IndexOrdinaryAcc s
-> ST s (Maybe Chunk)
appendSingle (_, key) (IndexOrdinaryAcc lastKeys baler)
= do
Growing.append lastKeys 1 key
feedBaler (keyListElem key) baler

keyListElem :: SerialisedKey -> [Primitive.Vector Word8]
keyListElem (SerialisedKey' keyBytes) = [keySizeBytes, keyBytes] where
{-|
Adds information about multiple pages that together comprise a single
key–value pair to an index and outputs newly available chunks of the
serialised key list.

keySize :: Int
!keySize = Primitive.length keyBytes
__Warning:__ Using keys whose length cannot be represented by a 16-bit word
may result in a corrupted serialised key list.
-}
appendMulti :: (SerialisedKey, Word32)
-> IndexOrdinaryAcc s
-> ST s [Chunk]
appendMulti (key, overflowPageCount) (IndexOrdinaryAcc lastKeys baler)
= do
Growing.append lastKeys pageCount key
maybeToList <$> feedBaler keyListElems baler
where

keySizeAsWord16 :: Word16
!keySizeAsWord16 = assert (keySize <= fromIntegral (maxBound :: Word16)) $
fromIntegral keySize
pageCount :: Int
!pageCount = succ (fromIntegral overflowPageCount)

keySizeBytes :: Primitive.Vector Word8
!keySizeBytes = byteVectorFromPrim keySizeAsWord16
keyListElems :: [Primitive.Vector Word8]
keyListElems = concat (replicate pageCount (keyListElem key))

{-|
Returns the constructed index, along with a final chunk in case the
Expand Down
1 change: 1 addition & 0 deletions test/Test/Database/LSMTree/Internal/IndexCompact.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import qualified Data.Vector.Unboxed.Base as VU
import Data.Word
import Database.LSMTree.Extras
import Database.LSMTree.Extras.Generators as Gen
import Database.LSMTree.Extras.Index as Cons (Append (..), append)
import Database.LSMTree.Internal.BitMath
import Database.LSMTree.Internal.Chunk as Chunk (toByteString)
import Database.LSMTree.Internal.Entry (NumEntries (..))
Expand Down
17 changes: 8 additions & 9 deletions test/Test/Database/LSMTree/Internal/IndexOrdinary.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import qualified Data.ByteString.Short as ShortByteString (length, pack)
import Data.Either (isLeft)
import Data.List (genericReplicate)
import qualified Data.List as List (tail)
import Data.Maybe (catMaybes)
import Data.Maybe (maybeToList)
import Data.Primitive.ByteArray (ByteArray (ByteArray), ByteArray#)
import Data.Vector (Vector, all, fromList, head, last, length,
notElem, splitAt, tail, takeWhile, toList, (!))
Expand All @@ -25,15 +25,14 @@ import qualified Data.Vector.Primitive as Primitive (Vector (Vector), concat,
import Data.Word (Word16, Word32, Word64, Word8)
import Database.LSMTree.Extras.Generators (LogicalPageSummaries,
toAppends)
import Database.LSMTree.Extras.Index
(Append (AppendMultiPage, AppendSinglePage), append')
import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteVector)
import Database.LSMTree.Internal.Entry (NumEntries (NumEntries))
import Database.LSMTree.Internal.IndexCompactAcc
(Append (AppendMultiPage, AppendSinglePage))
import Database.LSMTree.Internal.IndexOrdinary
(IndexOrdinary (IndexOrdinary), fromSBS, search,
toLastKeys)
import Database.LSMTree.Internal.IndexOrdinaryAcc (append, new,
unsafeEnd)
import Database.LSMTree.Internal.IndexOrdinaryAcc (new, unsafeEnd)
import Database.LSMTree.Internal.Page (PageNo (PageNo),
PageSpan (PageSpan))
import Database.LSMTree.Internal.Serialise
Expand Down Expand Up @@ -259,14 +258,14 @@ lastKeysBlockFromAppends appends = lastKeysBlock where
incrementalConstruction :: [Append] -> (IndexOrdinary, Primitive.Vector Word8)
incrementalConstruction appends = runST $ do
acc <- new initialKeyBufferSize minChunkSize
commonChunks <- mapM (flip append acc) appends
commonChunks <- concat <$> mapM (flip append' acc) appends
(remnant, unserialised) <- unsafeEnd acc
let

serialised :: Primitive.Vector Word8
serialised = Primitive.concat $
map Chunk.toByteVector $
catMaybes (commonChunks ++ [remnant])
serialised = Primitive.concat $
map Chunk.toByteVector $
commonChunks ++ maybeToList remnant

return (unserialised, serialised)
where
Expand Down