Skip to content

Commit

Permalink
Identity up to slicing for Serialise classes
Browse files Browse the repository at this point in the history
This stronger property ensures that we deserialise correctly from sliced raw
bytes.
  • Loading branch information
jorisdral committed Apr 29, 2024
1 parent 902a6f6 commit a860513
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 2 deletions.
1 change: 1 addition & 0 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ library
Database.LSMTree.Internal.Normal
Database.LSMTree.Internal.PageAcc
Database.LSMTree.Internal.PageAcc1
Database.LSMTree.Internal.Primitive
Database.LSMTree.Internal.Range
Database.LSMTree.Internal.RawBytes
Database.LSMTree.Internal.RawOverflowPage
Expand Down
27 changes: 27 additions & 0 deletions src/Database/LSMTree/Internal/Primitive.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}

module Database.LSMTree.Internal.Primitive (
indexWord8ArrayAsWord16
, indexWord8ArrayAsWord32
, indexWord8ArrayAsWord64
) where

import Data.Primitive.ByteArray (ByteArray (..))
import GHC.Exts
import GHC.Word

{-# INLINE indexWord8ArrayAsWord16 #-}
indexWord8ArrayAsWord16 :: ByteArray -> Int -> Word16
indexWord8ArrayAsWord16 (ByteArray !ba#) (I# !off#) =
W16# (indexWord8ArrayAsWord16# ba# off#)

{-# INLINE indexWord8ArrayAsWord32 #-}
indexWord8ArrayAsWord32 :: ByteArray -> Int -> Word32
indexWord8ArrayAsWord32 (ByteArray !ba#) (I# !off#) =
W32# (indexWord8ArrayAsWord32# ba# off#)

{-# INLINE indexWord8ArrayAsWord64 #-}
indexWord8ArrayAsWord64 :: ByteArray -> Int -> Word64
indexWord8ArrayAsWord64 (ByteArray !ba#) (I# !off#) =
W64# (indexWord8ArrayAsWord64# ba# off#)
34 changes: 32 additions & 2 deletions src/Database/LSMTree/Internal/Serialise/Class.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
module Database.LSMTree.Internal.Serialise.Class (
SerialiseKey (..)
, serialiseKeyIdentity
, serialiseKeyIdentityUpToSlicing
, serialiseKeyPreservesOrdering
, serialiseKeyMinimalSize
, SerialiseValue (..)
, serialiseValueIdentity
, serialiseValueIdentityUpToSlicing
, serialiseValueConcatDistributes
, RawBytes (..)
, packSlice
) where

import qualified Data.ByteString as BS
Expand All @@ -24,6 +27,7 @@ import qualified Data.Vector.Primitive as PV
import Data.Void (Void, absurd)
import Data.Word
import Database.LSMTree.Internal.ByteString (byteArrayToSBS)
import Database.LSMTree.Internal.Primitive (indexWord8ArrayAsWord64)
import Database.LSMTree.Internal.RawBytes (RawBytes (..))
import qualified Database.LSMTree.Internal.RawBytes as RB
import Database.LSMTree.Internal.Vector
Expand All @@ -35,6 +39,7 @@ import GHC.Word (Word64 (..))
-- Instances should satisfy the following:
--
-- [Identity] @'deserialiseKey' ('serialiseKey' x) == x@
-- [Identity up to slicing] @'deserialiseKey' ('packSlice' prefix ('serialiseKey' x) suffix) == x@
-- [Ordering-preserving] @x \`'compare'\` y == 'serialiseKey' x \`'compare'\` 'serialiseKey' y@
--
-- Raw bytes are lexicographically ordered, so in particular this means that
Expand All @@ -61,6 +66,13 @@ class SerialiseKey k where
serialiseKeyIdentity :: (Eq k, SerialiseKey k) => k -> Bool
serialiseKeyIdentity x = deserialiseKey (serialiseKey x) == x

-- | Test the __Identity up to slicing__ law for the 'SerialiseKey' class
serialiseKeyIdentityUpToSlicing ::
(Eq k, SerialiseKey k)
=> RawBytes -> k -> RawBytes -> Bool
serialiseKeyIdentityUpToSlicing prefix x suffix =
deserialiseKey (packSlice prefix (serialiseKey x) suffix) == x

-- | Test the __Ordering-preserving__ law for the 'SerialiseKey' class
serialiseKeyPreservesOrdering :: (Ord k, SerialiseKey k) => k -> k -> Bool
serialiseKeyPreservesOrdering x y = x `compare` y == serialiseKey x `compare` serialiseKey y
Expand All @@ -74,6 +86,7 @@ serialiseKeyMinimalSize x = RB.size (serialiseKey x) >= 6
-- Instances should satisfy the following:
--
-- [Identity] @'deserialiseValue' ('serialiseValue' x) == x@
-- [Identity up to slicing] @'deserialiseValue' ('packSlice' prefix ('serialiseValue' x) suffix) == x@
-- [Concat distributes] @'deserialiseValueN' xs == 'deserialiseValue' ('mconcat' xs)@
class SerialiseValue v where
serialiseValue :: v -> RawBytes
Expand All @@ -87,10 +100,27 @@ class SerialiseValue v where
serialiseValueIdentity :: (Eq v, SerialiseValue v) => v -> Bool
serialiseValueIdentity x = deserialiseValue (serialiseValue x) == x

-- | Test the __Identity up to slicing__ law for the 'SerialiseValue' class
serialiseValueIdentityUpToSlicing ::
(Eq v, SerialiseValue v)
=> RawBytes -> v -> RawBytes -> Bool
serialiseValueIdentityUpToSlicing prefix x suffix =
deserialiseValue (packSlice prefix (serialiseValue x) suffix) == x

-- | Test the __Concat distributes__ law for the 'SerialiseValue' class
serialiseValueConcatDistributes :: forall v. (Eq v, SerialiseValue v) => Proxy v -> [RawBytes] -> Bool
serialiseValueConcatDistributes _ xs = deserialiseValueN @v xs == deserialiseValue (mconcat xs)

{-------------------------------------------------------------------------------
RawBytes
-------------------------------------------------------------------------------}

-- | @'packSlice' prefix x suffix@ makes @x@ into a slice with @prefix@ bytes on
-- the left and @suffix@ bytes on the right.
packSlice :: RawBytes -> RawBytes -> RawBytes -> RawBytes
packSlice prefix x suffix =
RB.take (RB.size x) (RB.drop (RB.size prefix) (prefix <> x <> suffix))

{-------------------------------------------------------------------------------
Word64
-------------------------------------------------------------------------------}
Expand All @@ -113,8 +143,8 @@ instance SerialiseValue Word64 where
P.writeByteArray ba 0 x
return ba

deserialiseValue (RawBytes (PV.Vector (I# off#) len (P.ByteArray ba#)))
| len >= 8 = W64# (indexWord8ArrayAsWord64# ba# off# )
deserialiseValue (RawBytes (PV.Vector off len ba))
| len >= 8 = indexWord8ArrayAsWord64 ba off
| otherwise = error "deserialiseValue: not enough bytes for Word64"

deserialiseValueN = deserialiseValue . mconcat
Expand Down
30 changes: 30 additions & 0 deletions test/Test/Database/LSMTree/Internal/Serialise/Class.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ keyProperties :: forall a. (Ord a, Show a, Arbitrary a, SerialiseKey a) => [Test
keyProperties =
[ testProperty "prop_roundtripSerialiseKey" $
prop_roundtripSerialiseKey @a
, testProperty "prop_roundtripSerialiseKeyUpToSlicing" $
prop_roundtripSerialiseKeyUpToSlicing @a
, testProperty "prop_orderPreservationSerialiseKey" $
prop_orderPreservationSerialiseKey @a
]
Expand All @@ -40,6 +42,8 @@ valueProperties :: forall a. (Ord a, Show a, Arbitrary a, SerialiseValue a) => [
valueProperties =
[ testProperty "prop_roundtripSerialiseValue" $
prop_roundtripSerialiseValue @a
, testProperty "prop_roundtripSerialiseValueUpToSlicing" $
prop_roundtripSerialiseValueUpToSlicing @a
, testProperty "prop_concatDistributesSerialiseValue" $
prop_concatDistributesSerialiseValue @a
]
Expand All @@ -50,6 +54,19 @@ prop_roundtripSerialiseKey k =
counterexample ("deserialised: " <> show @k (deserialiseKey (serialiseKey k))) $
serialiseKeyIdentity k

prop_roundtripSerialiseKeyUpToSlicing ::
forall k. (Eq k, Show k, SerialiseKey k)
=> RawBytes -> k -> RawBytes -> Property
prop_roundtripSerialiseKeyUpToSlicing prefix x suffix =
counterexample ("serialised: " <> show @RawBytes k) $
counterexample ("serialised and sliced: " <> show @RawBytes k') $
counterexample ("deserialised: " <> show @k x') $
serialiseKeyIdentityUpToSlicing prefix x suffix
where
k = serialiseKey x
k' = packSlice prefix k suffix
x' = deserialiseKey k'

prop_orderPreservationSerialiseKey :: forall k. (Ord k, SerialiseKey k) => k -> k -> Property
prop_orderPreservationSerialiseKey x y =
counterexample ("serialised: " <> show (serialiseKey x, serialiseKey y)) $
Expand All @@ -63,6 +80,19 @@ prop_roundtripSerialiseValue v =
counterexample ("deserialised: " <> show @v (deserialiseValue (serialiseValue v))) $
serialiseValueIdentity v

prop_roundtripSerialiseValueUpToSlicing ::
forall v. (Eq v, Show v, SerialiseValue v)
=> RawBytes -> v -> RawBytes -> Property
prop_roundtripSerialiseValueUpToSlicing prefix x suffix =
counterexample ("serialised: " <> show v) $
counterexample ("serialised and sliced: " <> show @RawBytes v') $
counterexample ("deserialised: " <> show @v x') $
serialiseValueIdentityUpToSlicing prefix x suffix
where
v = serialiseValue x
v' = packSlice prefix v suffix
x' = deserialiseValue v'

prop_concatDistributesSerialiseValue :: forall v. (Ord v, Show v, SerialiseValue v) => v -> Property
prop_concatDistributesSerialiseValue v =
forAllShrink (genChunks bytes) shrinkChunks $ (. map (RB.pack)) $ \chs ->
Expand Down

0 comments on commit a860513

Please sign in to comment.