Skip to content

Commit

Permalink
Tighten the representation of operations in the model
Browse files Browse the repository at this point in the history
Previously, the model/prototype allowed an optional blob ref with every
operation. Our real implementation however only allows a blob to be
associated with an insert. We now think the latter makes more sense, so
we now tighten the model to do the same:

data Operation = Insert  Value (Maybe BlobRef)
               | Mupsert Value
               | Delete

This also makes some of the tests simpler, since they match up more
directly.
  • Loading branch information
dcoutts committed Apr 29, 2024
1 parent 4045e3e commit f6a0359
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 175 deletions.
3 changes: 1 addition & 2 deletions bench/micro/Bench/Database/LSMTree/Internal/RawPage.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ benchmarks = rawpage `deepseq` bgroup "Bench.Database.LSMTree.Internal.RawPage"
missing = SerialisedKey $ RB.pack [1, 2, 3]

keys :: [Key]
keys = case page of
PageLogical xs -> map (\(k,_,_) -> k) xs
keys = case page of PageLogical xs -> map fst xs

existingHead :: SerialisedKey
existingHead = SerialisedKey $ RB.fromByteString $ unKey $ head keys
Expand Down
97 changes: 52 additions & 45 deletions prototypes/FormatPage.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import Data.Bits
import Data.Coerce (coerce)
import Data.Function (on)
import Data.List (foldl', nubBy, sortBy, unfoldr)
import Data.Maybe (fromJust, isJust)
import Data.Maybe (fromJust)
import Data.Word

import qualified Data.Binary.Get as Bin
Expand All @@ -52,7 +52,7 @@ import Test.Tasty.QuickCheck (testProperty)

-- | Logically, a page is a sequence of key,operation pairs (with optional
-- blobrefs), sorted by key.
newtype PageLogical = PageLogical [(Key, Operation, Maybe BlobRef)]
newtype PageLogical = PageLogical [(Key, Operation)]
deriving (Eq, Show)

newtype Key = Key ByteString deriving (Eq, Ord, Show)
Expand All @@ -61,7 +61,7 @@ newtype Value = Value ByteString deriving (Eq, Show)
unKey :: Key -> ByteString
unKey = coerce

data Operation = Insert Value
data Operation = Insert Value (Maybe BlobRef)
| Mupsert Value
| Delete
deriving (Eq, Show)
Expand Down Expand Up @@ -145,34 +145,38 @@ data PageSize = PageSize {
pageSizeEmpty :: PageSize
pageSizeEmpty = PageSize 0 0 10

pageSizeAddElem :: (Key, Operation, Maybe BlobRef)
pageSizeAddElem :: (Key, Operation)
-> PageSize -> Maybe PageSize
pageSizeAddElem (Key key, op, mblobref) (PageSize n b sz)
pageSizeAddElem (Key key, op) (PageSize n b sz)
| sz' <= 4096 || n' == 1 = Just (PageSize n' b' sz')
| otherwise = Nothing
where
n' = n+1
b' | isJust mblobref = b+1
b' | opHasBlobRef op = b+1
| otherwise = b
sz' = sz
+ (if n `mod` 64 == 0 then 8 else 0) -- blobrefs bitmap
+ (if n `mod` 32 == 0 then 8 else 0) -- operations bitmap
+ (if isJust mblobref then 12 else 0) -- blobref entry
+ (if opHasBlobRef op then 12 else 0) -- blobref entry
+ 2 -- key offsets
+ (case n of { 0 -> 4; 1 -> 0; _ -> 2}) -- value offsets
+ BS.length key
+ (case op of
Insert (Value v) -> BS.length v
Mupsert (Value v) -> BS.length v
Delete -> 0)
Insert (Value v) _ -> BS.length v
Mupsert (Value v) -> BS.length v
Delete -> 0)

opHasBlobRef :: Operation -> Bool
opHasBlobRef (Insert _ (Just _blobref)) = True
opHasBlobRef _ = False

calcPageSize :: PageLogical -> Maybe PageSize
calcPageSize (PageLogical kops) =
go pageSizeEmpty kops
where
go !pgsz [] = Just pgsz
go !pgsz ((key, op, mblobref):kops') =
case pageSizeAddElem (key, op, mblobref) pgsz of
go !pgsz ((key, op):kops') =
case pageSizeAddElem (key, op) pgsz of
Nothing -> Nothing
Just pgsz' -> go pgsz' kops'

Expand All @@ -182,24 +186,24 @@ encodePage (PageLogical kops) =
where
pageNumKeys, pageNumBlobs :: Word16
pageNumKeys = fromIntegral (length kops)
pageNumBlobs = fromIntegral (length [ b | (_,_, Just b) <- kops ])
pageNumBlobs = fromIntegral (length (filter (opHasBlobRef . snd) kops))

pageSizesOffsets@PageSizesOffsets {offKeys, offValues}
= calcPageSizeOffsets
pageNumKeys pageNumBlobs
(fromIntegral (BS.length pageKeys))
(fromIntegral (BS.length pageValues))

pageBlobRefBitmap = [ isJust mblobref | (_,_, mblobref) <- kops ]
pageOperations = [ toOperationEnum op | (_,op,_) <- kops ]
pageBlobRefs = [ blobref | (_,_, Just blobref) <- kops ]
pageBlobRefBitmap = [ opHasBlobRef op | (_,op) <- kops ]
pageOperations = [ toOperationEnum op | (_,op) <- kops ]
pageBlobRefs = [ blobref | (_,Insert _ (Just blobref)) <- kops ]

keys = [ k | (k,_,_) <- kops ]
values = [ v | (_,op,_) <- kops
keys = [ k | (k,_) <- kops ]
values = [ v | (_,op) <- kops
, let v = case op of
Insert v' -> v'
Mupsert v' -> v'
Delete -> Value (BS.empty)
Insert v' _ -> v'
Mupsert v' -> v'
Delete -> Value (BS.empty)
]

pageKeyOffsets = init $ scanl (\o k -> o + keyLen16 k)
Expand Down Expand Up @@ -313,12 +317,12 @@ decodePage :: PageIntermediate -> PageLogical
decodePage PageIntermediate{pageSizesOffsets = PageSizesOffsets{..}, ..} =
PageLogical
[ let op = case opEnum of
OpInsert -> Insert (Value value)
OpInsert -> Insert (Value value) mblobref
OpMupsert -> Mupsert (Value value)
OpDelete -> Delete
mblobref | hasBlobref = Just (pageBlobRefs !! idxBlobref)
| otherwise = Nothing
in (Key key, op, mblobref)
in (Key key, op)
| opEnum <- pageOperations
| hasBlobref <- pageBlobRefBitmap
| idxBlobref <- scanl (\o b -> if b then o+1 else o) 0 pageBlobRefBitmap
Expand Down Expand Up @@ -386,13 +390,13 @@ prop_shrink_invariant page = case mapM_ invariant (shrink page) of
invariant :: PageLogical -> Either (Key, Key) ()
invariant (PageLogical xs0) = go xs0
where
go :: [(Key, b, c)] -> Either (Key, Key) ()
go [] = Right ()
go ((k,_,_):xs) = go1 k xs
go :: [(Key, op)] -> Either (Key, Key) ()
go [] = Right ()
go ((k,_):xs) = go1 k xs

go1 :: Key -> [(Key, b, c)] -> Either (Key, Key) ()
go1 :: Key -> [(Key, op)] -> Either (Key, Key) ()
go1 _ [] = Right ()
go1 k1 ((k2,_,_):xs) =
go1 k1 ((k2,_):xs) =
if k1 < k2
then go1 k2 xs
else Left (k1, k2)
Expand Down Expand Up @@ -421,22 +425,22 @@ prop_size_distribution p@(PageLogical es) =
tabulate "page size in bytes"
[ showPageSizeBytes pageSizeBytes ] $
tabulate "key size in bytes"
[ showKeyValueSizeBytes (BS.length k) | (Key k, _, _) <- es ] $
[ showKeyValueSizeBytes (BS.length k) | (Key k, _) <- es ] $
tabulate "value size in bytes"
[ showKeyValueSizeBytes (BS.length v)
| (_, op, _) <- es
| (_, op) <- es
, Value v <- case op of
Insert v -> [v]
Mupsert v -> [v]
Delete -> []
Insert v _ -> [v]
Mupsert v -> [v]
Delete -> []
] $
cover 0.5 (pageSizeBytes > 4096) "page over 4k" $

property $ (if pageSizeElems > 1
then pageSizeBytes <= 4096
else True)
&& (pageSizeElems == length [ e | e <- es ])
&& (pageSizeBlobs == length [ b | (_,_,Just b) <- es ])
&& (pageSizeBlobs == length [ b | (_,Insert _ (Just b)) <- es ])
where
showNumElems n
| n == 0 = "0"
Expand Down Expand Up @@ -494,7 +498,7 @@ maxKeySize = 4096 - overhead -- 4052
where
overhead =
(pageSizeBytes . fromJust . calcPageSize . PageLogical)
[(Key BS.empty, Delete, Just (BlobRef 0 0))]
[(Key BS.empty, Insert (Value BS.empty) (Just (BlobRef 0 0)))]

instance Arbitrary Key where
arbitrary = do
Expand All @@ -516,13 +520,15 @@ instance Arbitrary Operation where
arbitrary = genOperation arbitrary

shrink :: Operation -> [Operation]
shrink Delete = []
shrink (Insert v) = Delete : [ Insert v' | v' <- shrink v ]
shrink (Mupsert v) = Insert v : [ Mupsert v' | v' <- shrink v ]
shrink Delete = []
shrink (Insert v mb) = Delete
: [ Insert v' mb' | (v', mb') <- shrink (v, mb) ]
shrink (Mupsert v) = Insert v Nothing
: [ Mupsert v' | v' <- shrink v ]

genOperation :: Gen Value -> Gen Operation
genOperation gv = oneof
[ Insert <$> gv
[ Insert <$> gv <*> arbitrary
, Mupsert <$> gv
, pure Delete
]
Expand Down Expand Up @@ -556,15 +562,16 @@ instance Arbitrary PageLogical where
genFullPageLogical :: Gen Key -> Gen Value -> Gen PageLogical
genFullPageLogical gk gv = go [] pageSizeEmpty
where
go :: [(Key, Operation, Maybe BlobRef)] -> PageSize -> Gen PageLogical
go :: [(Key, Operation)] -> PageSize -> Gen PageLogical
go es sz = do
e <- (,,) <$> gk <*> genOperation gv <*> arbitrary
e <- (,) <$> gk <*> genOperation gv
case pageSizeAddElem e sz of
Nothing -> return (mkPageLogical es)
Just sz' -> go (e:es) sz'

-- | Create 'PageLogical' enforcing the invariant.
mkPageLogical :: [(Key, Operation, Maybe BlobRef)] -> PageLogical
mkPageLogical xs = PageLogical (nubBy ((==) `on` fstOf3) (sortBy (compare `on` fstOf3) xs))
where
fstOf3 (k,_,_) = k
mkPageLogical :: [(Key, Operation)] -> PageLogical
mkPageLogical =
PageLogical
. nubBy ((==) `on` fst)
. sortBy (compare `on` fst)
8 changes: 4 additions & 4 deletions src/Database/LSMTree/Internal/PageAcc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -40,28 +40,28 @@ import Database.LSMTree.Internal.Serialise
-- A smallest page is with empty key:
--
-- >>> import FormatPage
-- >>> let Just page0 = pageSizeAddElem (Key "", Delete, Nothing) pageSizeEmpty
-- >>> let Just page0 = pageSizeAddElem (Key "", Delete) pageSizeEmpty
-- >>> page0
-- PageSize {pageSizeElems = 1, pageSizeBlobs = 0, pageSizeBytes = 32}
--
-- Then we can add pages with a single byte key, e.g.
--
-- >>> pageSizeAddElem (Key "a", Delete, Nothing) page0
-- >>> pageSizeAddElem (Key "a", Delete) page0
-- Just (PageSize {pageSizeElems = 2, pageSizeBlobs = 0, pageSizeBytes = 35})
--
-- i.e. roughly 3-4 bytes (when we get to 32/64 elements we add more bytes for bitmaps).
-- (key and value offset is together 4 bytes: so it's at least 4, the encoding of single element page takes more space).
--
-- If we write as small program, adding single byte keys to a page size:
--
-- >>> let calc s ps = case pageSizeAddElem (Key "x", Delete, Nothing) ps of { Nothing -> s; Just ps' -> calc (s + 1) ps' }
-- >>> let calc s ps = case pageSizeAddElem (Key "x", Delete) ps of { Nothing -> s; Just ps' -> calc (s + 1) ps' }
-- >>> calc 1 page0
-- 759
--
-- I.e. we can have a 4096 byte page with at most 759 keys, actually less,
-- as there are only 256 single byte keys.
--
-- >>> let calc2 s ps = case pageSizeAddElem (Key $ if s < 257 then "x" else "xx", Delete, Nothing) ps of { Nothing -> s; Just ps' -> calc2 (s + 1) ps' }
-- >>> let calc2 s ps = case pageSizeAddElem (Key $ if s < 257 then "x" else "xx", Delete) ps of { Nothing -> s; Just ps' -> calc2 (s + 1) ps' }
-- >>> calc2 1 page0
-- 680
--
Expand Down
75 changes: 34 additions & 41 deletions test/Test/Database/LSMTree/Internal/PageAcc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,34 +26,33 @@ tests = testGroup "Database.LSMTree.Internal.PageAcc"
[ testProperty "prototype" prototype

, testProperty "example-00" $ prototype []
, testProperty "example-01" $ prototype [(Proto.Key "foobar", Proto.Delete, Nothing)]
, testProperty "example-02" $ prototype [(Proto.Key "foobar", Proto.Insert (Proto.Value "value"), Just (Proto.BlobRef 111 333))]
, testProperty "example-03" $ prototype [(Proto.Key "\NUL",Proto.Delete,Nothing),(Proto.Key "\SOH",Proto.Delete,Nothing)]
, testProperty "example-01" $ prototype [(Proto.Key "foobar", Proto.Delete)]
, testProperty "example-02" $ prototype [(Proto.Key "foobar", Proto.Insert (Proto.Value "value") (Just (Proto.BlobRef 111 333)))]
, testProperty "example-03" $ prototype [(Proto.Key "\NUL",Proto.Delete),(Proto.Key "\SOH",Proto.Delete)]

-- entries around maximal size
, testProperty "example-04a" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4063 120))),Nothing)]
, testProperty "example-04b" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4064 120))),Nothing)]
, testProperty "example-04c" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4065 120))),Nothing)]
, testProperty "example-04a" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4063 120))) Nothing)]
, testProperty "example-04b" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4064 120))) Nothing)]
, testProperty "example-04c" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4065 120))) Nothing)]

, testProperty "example-05a" $ prototype [(Proto.Key "",Proto.Delete,Nothing),(Proto.Key "k",Proto.Insert (Proto.Value (BS.pack (replicate 4060 120))),Nothing)]
, testProperty "example-05b" $ prototype [(Proto.Key "",Proto.Delete,Nothing),(Proto.Key "k",Proto.Insert (Proto.Value (BS.pack (replicate 4061 120))),Nothing)]
, testProperty "example-05c" $ prototype [(Proto.Key "",Proto.Delete,Nothing),(Proto.Key "k",Proto.Insert (Proto.Value (BS.pack (replicate 4062 120))),Nothing)]
, testProperty "example-05a" $ prototype [(Proto.Key "",Proto.Delete),(Proto.Key "k",Proto.Insert (Proto.Value (BS.pack (replicate 4060 120))) Nothing)]
, testProperty "example-05b" $ prototype [(Proto.Key "",Proto.Delete),(Proto.Key "k",Proto.Insert (Proto.Value (BS.pack (replicate 4061 120))) Nothing)]
, testProperty "example-05c" $ prototype [(Proto.Key "",Proto.Delete),(Proto.Key "k",Proto.Insert (Proto.Value (BS.pack (replicate 4062 120))) Nothing)]

, testProperty "example-06a" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4051 120))),Just (Proto.BlobRef 111 333))]
, testProperty "example-06b" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4052 120))),Just (Proto.BlobRef 111 333))]
, testProperty "example-06c" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4053 120))),Just (Proto.BlobRef 111 333))]
, testProperty "example-06a" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4051 120))) (Just (Proto.BlobRef 111 333)))]
, testProperty "example-06b" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4052 120))) (Just (Proto.BlobRef 111 333)))]
, testProperty "example-06c" $ prototype [(Proto.Key "",Proto.Insert (Proto.Value (BS.pack (replicate 4053 120))) (Just (Proto.BlobRef 111 333)))]
]

-- | Strict 'pageSizeAddElem', doesn't allow for page to overflow
pageSizeAddElem' :: (Proto.Key, Proto.Operation, Maybe Proto.BlobRef) -> Proto.PageSize -> Maybe Proto.PageSize
pageSizeAddElem' :: (Proto.Key, Proto.Operation)
-> Proto.PageSize -> Maybe Proto.PageSize
pageSizeAddElem' e sz = do
sz' <- Proto.pageSizeAddElem e sz
guard (Proto.pageSizeBytes sz' <= 4096)
return sz'

prototype
:: [(Proto.Key, Proto.Operation, Maybe Proto.BlobRef)]
-> Property
prototype :: [(Proto.Key, Proto.Operation)] -> Property
prototype inputs' =
case invariant inputs' of
es -> runST $ do
Expand All @@ -62,36 +61,30 @@ prototype inputs' =
where
-- inputs should be ordered and unique to produce valid page.
invariant xs =
nubBy ((==) `on` fstOf3) $
sortBy (compare `on` fstOf3) $
map cleanOp xs

-- only insert op has blob references
-- TODO: make values small.
-- If the value will overflow page, we'll need a special page anyway.
cleanOp :: (k, Proto.Operation, Maybe bref) -> (k, Proto.Operation, Maybe bref)
cleanOp (k, Proto.Delete, _) = (k, Proto.Delete, Nothing)
cleanOp (k, Proto.Mupsert v, _) = (k, Proto.Mupsert v, Nothing)
cleanOp (k, Proto.Insert v, br) = (k, Proto.Insert v, br)

fstOf3 (k,_,_) = k

go :: PageAcc s -> Proto.PageSize -> [(Proto.Key, Proto.Operation, Maybe Proto.BlobRef)] -> [(Proto.Key, Proto.Operation, Maybe Proto.BlobRef)] -> ST s Property
go acc _ps acc2 [] = finish acc acc2
go acc ps acc2 (e@(k,op,bref):es) = case pageSizeAddElem' e ps of
nubBy ((==) `on` fst) $
sortBy (compare `on` fst) $
xs

go :: PageAcc s
-> Proto.PageSize
-> [(Proto.Key, Proto.Operation)]
-> [(Proto.Key, Proto.Operation)]
-> ST s Property
go acc _ps acc2 [] = finish acc acc2
go acc ps acc2 (e@(k,op):es) = case pageSizeAddElem' e ps of
Nothing -> do
added <- pageAccAddElem acc (convKey k) (convOp op bref)
added <- pageAccAddElem acc (convKey k) (convOp op)
if added
then return $ counterexample "PageAcc addition succeeded, prototype's doesn't." False
else finish acc acc2

Just ps' -> do
added <- pageAccAddElem acc (convKey k) (convOp op bref)
added <- pageAccAddElem acc (convKey k) (convOp op)
if added
then go acc ps' (e:acc2) es
else return $ counterexample "PageAcc addition failed, prototype's doesn't." False

finish :: PageAcc s -> [(Proto.Key, Proto.Operation, Maybe Proto.BlobRef)] -> ST s Property
finish :: PageAcc s -> [(Proto.Key, Proto.Operation)] -> ST s Property
finish acc acc2 = do
let (lhs, _) = toRawPage $ Proto.PageLogical $ reverse acc2
rawpage <- serialisePageAcc acc
Expand All @@ -107,8 +100,8 @@ prototype inputs' =
convBlobSpan :: Proto.BlobRef -> BlobSpan
convBlobSpan (Proto.BlobRef x y) = BlobSpan x y

convOp :: Proto.Operation -> Maybe Proto.BlobRef -> Entry SerialisedValue BlobSpan
convOp Proto.Delete _ = Delete
convOp (Proto.Mupsert v) _ = Mupdate (convValue v)
convOp (Proto.Insert v) Nothing = Insert (convValue v)
convOp (Proto.Insert v) (Just bspan) = InsertWithBlob (convValue v) (convBlobSpan bspan)
convOp :: Proto.Operation -> Entry SerialisedValue BlobSpan
convOp Proto.Delete = Delete
convOp (Proto.Mupsert v) = Mupdate (convValue v)
convOp (Proto.Insert v Nothing) = Insert (convValue v)
convOp (Proto.Insert v (Just bspan)) = InsertWithBlob (convValue v) (convBlobSpan bspan)

0 comments on commit f6a0359

Please sign in to comment.