Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ library
Database.LSMTree.Internal.UniqCounter
Database.LSMTree.Internal.Unsliced
Database.LSMTree.Internal.Vector
Database.LSMTree.Internal.Vector.Growing
Database.LSMTree.Internal.WriteBuffer
Database.LSMTree.Internal.WriteBufferBlobs
Database.LSMTree.Monoidal
Expand Down
51 changes: 16 additions & 35 deletions src/Database/LSMTree/Internal/IndexOrdinaryAcc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ import Prelude hiding (take)

import Control.Exception (assert)
import Control.Monad.ST.Strict (ST)
import Data.Primitive.PrimVar (PrimVar, newPrimVar, readPrimVar,
writePrimVar)
import Data.Vector (force, take, unsafeFreeze)
import Data.Vector.Mutable (MVector)
import qualified Data.Vector.Mutable as Mutable (unsafeNew, write)
import qualified Data.Vector.Primitive as Primitive (Vector, length)
import Data.Word (Word16, Word8)
import Database.LSMTree.Internal.Chunk (Baler, Chunk, createBaler,
Expand All @@ -33,33 +28,28 @@ import Database.LSMTree.Internal.IndexOrdinary
import Database.LSMTree.Internal.Serialise
(SerialisedKey (SerialisedKey'))
import Database.LSMTree.Internal.Vector (byteVectorFromPrim)
import Database.LSMTree.Internal.Vector.Growing (GrowingVector)
import qualified Database.LSMTree.Internal.Vector.Growing as Growing (append,
freeze, new)

{-|
A general-purpose fence pointer index under incremental construction.

A value @IndexOrdinaryAcc buffer keyCountRef baler@ denotes a partially
constructed index with the following properties:

* The keys that the index assigns to pages are stored as a prefix of the
mutable vector @buffer@.
* The reference @keyCountRef@ points to the number of those keys.
* The @baler@ object is used by the index for incremental output of the
serialised key list.
A value @IndexOrdinaryAcc lastKeys baler@ denotes a partially constructed
index that assigns keys to pages according to @lastKeys@ and uses @baler@
for incremental output of the serialised key list.
-}
data IndexOrdinaryAcc s = IndexOrdinaryAcc
!(MVector s SerialisedKey)
!(PrimVar s Int)
!(GrowingVector s SerialisedKey)
!(Baler s)

-- | Creates a new, initially empty, index.
new :: Int -- ^ Maximum number of keys
new :: Int -- ^ Initial size of the key buffer
-> Int -- ^ Minimum chunk size in bytes
-> ST s (IndexOrdinaryAcc s) -- ^ Construction of the index
new maxKeyCount minChunkSize = assert (maxKeyCount >= 0) $
IndexOrdinaryAcc <$>
Mutable.unsafeNew maxKeyCount <*>
newPrimVar 0 <*>
createBaler minChunkSize
new initialKeyBufferSize minChunkSize = IndexOrdinaryAcc <$>
Growing.new initialKeyBufferSize <*>
createBaler minChunkSize

{-|
Appends keys to the key list of an index and outputs newly available chunks
Expand All @@ -69,26 +59,18 @@ new maxKeyCount minChunkSize = assert (maxKeyCount >= 0) $
word may result in a corrupted serialised key list.
-}
append :: Append -> IndexOrdinaryAcc s -> ST s (Maybe Chunk)
append instruction (IndexOrdinaryAcc buffer keyCountRef baler)
append instruction (IndexOrdinaryAcc lastKeys baler)
= case instruction of
AppendSinglePage _ key -> do
keyCount <- readPrimVar keyCountRef
Mutable.write buffer keyCount key
writePrimVar keyCountRef (succ keyCount)
Growing.append lastKeys 1 key
feedBaler (keyListElem key) baler
AppendMultiPage key overflowPageCount -> do
keyCount <- readPrimVar keyCountRef
let

pageCount :: Int
!pageCount = succ (fromIntegral overflowPageCount)

keyCount' :: Int
!keyCount' = keyCount + pageCount

mapM_ (flip (Mutable.write buffer) key)
[keyCount .. pred keyCount']
writePrimVar keyCountRef keyCount'
Growing.append lastKeys pageCount key
feedBaler (concat (replicate pageCount (keyListElem key))) baler
where

Expand All @@ -112,8 +94,7 @@ append instruction (IndexOrdinaryAcc buffer keyCountRef baler)
@index@ is not used afterwards.
-}
unsafeEnd :: IndexOrdinaryAcc s -> ST s (Maybe Chunk, IndexOrdinary)
unsafeEnd (IndexOrdinaryAcc buffer keyCountRef baler) = do
keyCount <- readPrimVar keyCountRef
keys <- force <$> take keyCount <$> unsafeFreeze buffer
unsafeEnd (IndexOrdinaryAcc lastKeys baler) = do
keys <- Growing.freeze lastKeys
remnant <- unsafeEndBaler baler
return (remnant, IndexOrdinary keys)
120 changes: 120 additions & 0 deletions src/Database/LSMTree/Internal/Vector/Growing.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}

{- HLINT ignore "Avoid restricted alias" -}

-- | Vectors with support for appending elements.
module Database.LSMTree.Internal.Vector.Growing
(
GrowingVector,
new,
append,
freeze
)
where

import Prelude hiding (init, last, length)

import Control.Monad (when)
import Control.Monad.ST.Strict (ST)
import Data.Primitive.PrimVar (PrimVar, newPrimVar, readPrimVar,
writePrimVar)
import Data.STRef.Strict (STRef, newSTRef, readSTRef, writeSTRef)
import Data.Vector (Vector)
import qualified Data.Vector as Mutable (freeze)
import Data.Vector.Mutable (MVector)
import qualified Data.Vector.Mutable as Mutable (grow, length, new, set, slice,
take)

{-|
A vector with support for appending elements.

Internally, the elements of a growing vector are stored in a buffer. This
buffer is automatically enlarged whenever this is needed for storing
additional elements. On each such enlargement, the size of the buffer is
multiplied by a power of 2, whose exponent is chosen just big enough to make
the final buffer size at least as high as the new vector length.

Note that, while buffer sizes and vector lengths are represented as 'Int'
values internally, the above-described buffer enlargement scheme has the
consequence that the largest possible buffer size and thus the largest
possible vector length may not be the maximum 'Int' value. That said, they
are always greater than half the maximum 'Int' value, which should be enough
for all practical purposes.
-}
data GrowingVector s a = GrowingVector
!(STRef s (MVector s a)) -- Reference to the buffer
!(PrimVar s Int) -- Reference to the length

-- | Creates a new, initially empty, vector.
new :: Int -- ^ Initial buffer size
-> ST s (GrowingVector s a) -- ^ Construction of the vector
new illegalInitialBufferSize | illegalInitialBufferSize <= 0
= error "Initial buffer size not positive"
new initialBufferSize
= do
buffer <- Mutable.new initialBufferSize
bufferRef <- newSTRef $! buffer
lengthRef <- newPrimVar 0
return (GrowingVector bufferRef lengthRef)

{-|
Appends a value a certain number of times to a vector. If a negative number
is provided as the count, the vector is not changed.
-}
append :: forall s a . GrowingVector s a -> Int -> a -> ST s ()
append _ pseudoCount _ | pseudoCount <= 0
= return ()
append (GrowingVector bufferRef lengthRef) count val
= do
length <- readPrimVar lengthRef
makeRoom
buffer' <- readSTRef bufferRef
Mutable.set (Mutable.slice length count buffer') val
where

makeRoom :: ST s ()
makeRoom = do
length <- readPrimVar lengthRef
when (count > maxBound - length) (error "New length too large")
buffer <- readSTRef bufferRef
let

bufferSize :: Int
!bufferSize = Mutable.length buffer

length' :: Int
!length' = length + count

when (bufferSize < length') $ do
let

higherBufferSizes :: [Int]
higherBufferSizes = tail (init ++ [last]) where

init :: [Int]
last :: Int
(init, last : _) = span (<= maxBound `div` 2) $
iterate (* 2) bufferSize
{-NOTE:
In order to prevent overflow, we have to start with the
current buffer size here, although we know that it is
not sufficient.
-}

sufficientBufferSizes :: [Int]
sufficientBufferSizes = dropWhile (< length') higherBufferSizes

case sufficientBufferSizes of
[]
-> error "No sufficient buffer size available"
bufferSize' : _
-> Mutable.grow buffer (bufferSize' - bufferSize) >>=
(writeSTRef bufferRef $!)
writePrimVar lengthRef length'

-- | Turns a growing vector into an ordinary vector.
freeze :: GrowingVector s a -> ST s (Vector a)
freeze (GrowingVector bufferRef lengthRef) = do
buffer <- readSTRef bufferRef
length <- readPrimVar lengthRef
Mutable.freeze (Mutable.take length buffer)
17 changes: 7 additions & 10 deletions test/Test/Database/LSMTree/Internal/IndexOrdinary.hs
Original file line number Diff line number Diff line change
Expand Up @@ -225,13 +225,6 @@ serialisedIndex entryCount lastKeys

-- ** Construction via appending

-- Yields the number of keys that an append operation adds to an index.
appendedKeysCount :: Append -> Int
appendedKeysCount (AppendSinglePage _ _)
= 1
appendedKeysCount (AppendMultiPage _ overflowPageCount)
= succ (fromIntegral overflowPageCount)

-- Yields the keys that an append operation adds to an index.
appendedKeys :: Append -> [SerialisedKey]
appendedKeys (AppendSinglePage _ lastKey)
Expand Down Expand Up @@ -265,7 +258,7 @@ lastKeysBlockFromAppends appends = lastKeysBlock where
-}
incrementalConstruction :: [Append] -> (IndexOrdinary, Primitive.Vector Word8)
incrementalConstruction appends = runST $ do
acc <- new keyCount minChunkSize
acc <- new initialKeyBufferSize minChunkSize
commonChunks <- mapM (flip append acc) appends
(remnant, unserialised) <- unsafeEnd acc
let
Expand All @@ -277,8 +270,12 @@ incrementalConstruction appends = runST $ do
return (unserialised, serialised)
where

keyCount :: Int
keyCount = sum (map appendedKeysCount appends)
{-
We do not need to vary the initial key buffer size, since we are not
testing growing vectors here.
-}
initialKeyBufferSize :: Int
initialKeyBufferSize = 0x100

{-
We do not need to vary the minimum chunk size, since we are not testing
Expand Down