From f940fb0bcae3aaa9ef2647d084254d735b87ad66 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Mon, 5 Aug 2024 12:46:49 +0200 Subject: [PATCH] Add configuration option for the fence pointer index The configuration option currently has no effect: the Compact Index is always used. --- .../micro/Bench/Database/LSMTree/Monoidal.hs | 1 + src/Database/LSMTree/Internal.hs | 53 ++++++++++++++++--- .../LSMTree/Internal/Serialise/Class.hs | 16 ++---- src/Database/LSMTree/Monoidal.hs | 1 + src/Database/LSMTree/Normal.hs | 1 + test/Test/Database/LSMTree/Class/Monoidal.hs | 1 + test/Test/Database/LSMTree/Class/Normal.hs | 1 + test/Test/Database/LSMTree/Internal.hs | 2 + .../Database/LSMTree/Normal/StateMachine.hs | 22 +++----- 9 files changed, 62 insertions(+), 36 deletions(-) diff --git a/bench/micro/Bench/Database/LSMTree/Monoidal.hs b/bench/micro/Bench/Database/LSMTree/Monoidal.hs index 6180a80b4..4c87c32ca 100644 --- a/bench/micro/Bench/Database/LSMTree/Monoidal.hs +++ b/bench/micro/Bench/Database/LSMTree/Monoidal.hs @@ -57,6 +57,7 @@ benchConfig = Normal.TableConfig { , confSizeRatio = Normal.Four , confWriteBufferAlloc = Normal.AllocNumEntries (Normal.NumEntries 20000) , confBloomFilterAlloc = Normal.AllocFixed 10 + , confFencePointerIndex = Normal.CompactIndex , confDiskCachePolicy = Normal.DiskCacheAll } diff --git a/src/Database/LSMTree/Internal.hs b/src/Database/LSMTree/Internal.hs index 22aaabc59..68dff203f 100644 --- a/src/Database/LSMTree/Internal.hs +++ b/src/Database/LSMTree/Internal.hs @@ -39,7 +39,7 @@ module Database.LSMTree.Internal ( , listSnapshots -- * Mutiple writable table handles , duplicate - -- * configuration + -- * Configuration , TableConfig (..) , defaultTableConfig , MergePolicy (..) @@ -48,6 +48,7 @@ module Database.LSMTree.Internal ( , NumEntries (..) , BloomFilterAlloc (..) , defaultBloomFilterAlloc + , FencePointerIndex (..) , DiskCachePolicy (..) -- * Exported for cabal-docspec , MergePolicyForLevel (..) @@ -1272,22 +1273,24 @@ duplicate th = withOpenTable th $ \thEnv -> do -- -- * Size ratio: 4 data TableConfig = TableConfig { - confMergePolicy :: !MergePolicy + confMergePolicy :: !MergePolicy -- Size ratio between the capacities of adjacent levels. - , confSizeRatio :: !SizeRatio + , confSizeRatio :: !SizeRatio -- | Total number of bytes that the write buffer can use. -- -- The maximum is 4GiB, which should be more than enough for realistic -- applications. - , confWriteBufferAlloc :: !WriteBufferAlloc - , confBloomFilterAlloc :: !BloomFilterAlloc + , confWriteBufferAlloc :: !WriteBufferAlloc + , confBloomFilterAlloc :: !BloomFilterAlloc + , confFencePointerIndex :: !FencePointerIndex -- | The policy for caching key\/value data from disk in memory. - , confDiskCachePolicy :: !DiskCachePolicy + , confDiskCachePolicy :: !DiskCachePolicy } - deriving stock Show + deriving stock (Show, Eq) instance NFData TableConfig where - rnf (TableConfig a b c d e) = rnf a `seq` rnf b `seq` rnf c `seq` rnf d `seq` rnf e + rnf (TableConfig a b c d e f) = + rnf a `seq` rnf b `seq` rnf c `seq` rnf d `seq` rnf e `seq` rnf f -- | TODO: this should be removed once we have proper snapshotting with proper -- persistence of the config to disk. @@ -1305,6 +1308,7 @@ defaultTableConfig = , confSizeRatio = Four , confWriteBufferAlloc = AllocNumEntries (NumEntries 20_000) , confBloomFilterAlloc = defaultBloomFilterAlloc + , confFencePointerIndex = CompactIndex , confDiskCachePolicy = DiskCacheAll } @@ -1413,6 +1417,39 @@ bloomFilterAllocForLevel :: BloomFilterAlloc -> LevelNo -> RunBloomFilterAlloc bloomFilterAllocForLevel (AllocFixed n) _ = RunAllocFixed n bloomFilterAllocForLevel (AllocRequestFPR fpr) _ = RunAllocRequestFPR fpr +-- | Configure the type of fence pointer index. +-- +-- TODO: this configuration option currently has no effect: 'CompactIndex' is +-- always used. +data FencePointerIndex = + -- | Use a compact fence pointer index. + -- + -- The compact index type is designed to work with keys that are large + -- cryptographic hashes, e.g. 32 bytes. + -- + -- When using the 'IndexCompact', additional constraints apply to the + -- 'Database.LSMTree.Internal.Serialise.Class.serialiseKey' function. The + -- __Minimal size__ law should be satisfied: + -- + -- [Minimal size] @'Database.LSMTree.Internal.RawBytes.size' + -- ('Database.LSMTree.Internal.Serialise.Class.serialiseKey' x) >= 8@ + -- + -- Use 'Database.LSMTree.Internal.Serialise.Class.serialiseKeyMinimalSize' + -- to test this law. + CompactIndex + -- | Use an ordinary fence pointer index, without any constraints on + -- serialised keys. + | OrdinaryIndex + deriving stock (Show, Eq) + +instance NFData FencePointerIndex where + rnf CompactIndex = () + rnf OrdinaryIndex = () + +-- | TODO: this should be removed once we have proper snapshotting with proper +-- persistence of the config to disk. +deriving stock instance Read FencePointerIndex + -- | The policy for caching data from disk in memory (using the OS page cache). -- -- Caching data in memory can improve performance if the access pattern has diff --git a/src/Database/LSMTree/Internal/Serialise/Class.hs b/src/Database/LSMTree/Internal/Serialise/Class.hs index a2dea9482..b63bbd843 100644 --- a/src/Database/LSMTree/Internal/Serialise/Class.hs +++ b/src/Database/LSMTree/Internal/Serialise/Class.hs @@ -43,18 +43,10 @@ import Numeric (showInt) -- [Ordering-preserving] @x \`'compare'\` y == 'serialiseKey' x \`'compare'\` 'serialiseKey' y@ -- -- Raw bytes are lexicographically ordered, so in particular this means that --- values should be serialised into big-endian formats. --- This constraint mainly exists for range queries, where the range is specified --- in terms of unserialised values, but the internal implementation works on the --- serialised representation. --- --- === IndexCompact constraints --- --- When using the 'IndexCompact', additional constraints apply to the --- serialisation function, so in that case instances should also satisfy the --- following: --- --- [Minimal size] @'sizeofRawBytes' >= 8@ +-- values should be serialised into big-endian formats. This constraint mainly +-- exists for range queries, where the range is specified in terms of +-- unserialised values, but the internal implementation works on the serialised +-- representation. class SerialiseKey k where serialiseKey :: k -> RawBytes -- TODO: 'deserialiseKey' is only strictly necessary for range queries. diff --git a/src/Database/LSMTree/Monoidal.hs b/src/Database/LSMTree/Monoidal.hs index ec0139ea4..e086185f5 100644 --- a/src/Database/LSMTree/Monoidal.hs +++ b/src/Database/LSMTree/Monoidal.hs @@ -43,6 +43,7 @@ module Database.LSMTree.Monoidal ( , Internal.NumEntries (..) , Internal.BloomFilterAlloc (..) , Internal.defaultBloomFilterAlloc + , Internal.FencePointerIndex (..) , Internal.DiskCachePolicy (..) , withTable , new diff --git a/src/Database/LSMTree/Normal.hs b/src/Database/LSMTree/Normal.hs index 2db3dd0af..4a29459fa 100644 --- a/src/Database/LSMTree/Normal.hs +++ b/src/Database/LSMTree/Normal.hs @@ -42,6 +42,7 @@ module Database.LSMTree.Normal ( , Internal.NumEntries (..) , Internal.BloomFilterAlloc (..) , Internal.defaultBloomFilterAlloc + , Internal.FencePointerIndex (..) , Internal.DiskCachePolicy (..) , withTable , new diff --git a/test/Test/Database/LSMTree/Class/Monoidal.hs b/test/Test/Database/LSMTree/Class/Monoidal.hs index 1bcd5d5b1..2a715a96b 100644 --- a/test/Test/Database/LSMTree/Class/Monoidal.hs +++ b/test/Test/Database/LSMTree/Class/Monoidal.hs @@ -46,6 +46,7 @@ tests = testGroup "Test.Database.LSMTree.Class.Monoidal" , R.confSizeRatio = R.Four , R.confWriteBufferAlloc = R.AllocNumEntries (R.NumEntries 3) , R.confBloomFilterAlloc = R.AllocFixed 10 + , R.confFencePointerIndex = R.CompactIndex , R.confDiskCachePolicy = R.DiskCacheNone } , testWithSessionArgs = \action -> diff --git a/test/Test/Database/LSMTree/Class/Normal.hs b/test/Test/Database/LSMTree/Class/Normal.hs index d69bd06ff..eefa686fb 100644 --- a/test/Test/Database/LSMTree/Class/Normal.hs +++ b/test/Test/Database/LSMTree/Class/Normal.hs @@ -52,6 +52,7 @@ tests = testGroup "Test.Database.LSMTree.Class.Normal" , R.confSizeRatio = R.Four , R.confWriteBufferAlloc = R.AllocNumEntries (R.NumEntries 3) , R.confBloomFilterAlloc = R.AllocFixed 10 + , R.confFencePointerIndex = R.CompactIndex , R.confDiskCachePolicy = R.DiskCacheNone } , testWithSessionArgs = \action -> diff --git a/test/Test/Database/LSMTree/Internal.hs b/test/Test/Database/LSMTree/Internal.hs index 564490ad4..e604bad67 100644 --- a/test/Test/Database/LSMTree/Internal.hs +++ b/test/Test/Database/LSMTree/Internal.hs @@ -127,6 +127,7 @@ prop_interimRestoreSessionUniqueRunNames (Positive (Small n)) (NonNegative m) = -- flushes and merges. , confWriteBufferAlloc = AllocNumEntries (NumEntries n) , confBloomFilterAlloc = AllocFixed 10 + , confFencePointerIndex = CompactIndex , confDiskCachePolicy = DiskCacheNone } @@ -172,6 +173,7 @@ prop_interimOpenTable dat = ioProperty $ -- flushes and merges. , confWriteBufferAlloc = AllocNumEntries (NumEntries 3) , confBloomFilterAlloc = AllocFixed 10 + , confFencePointerIndex = CompactIndex , confDiskCachePolicy = DiskCacheNone } diff --git a/test/Test/Database/LSMTree/Normal/StateMachine.hs b/test/Test/Database/LSMTree/Normal/StateMachine.hs index dcd17c118..6d5eb50bc 100644 --- a/test/Test/Database/LSMTree/Normal/StateMachine.hs +++ b/test/Test/Database/LSMTree/Normal/StateMachine.hs @@ -227,24 +227,14 @@ instance Arbitrary M.TableConfig where instance Arbitrary R.TableConfig where arbitrary :: Gen R.TableConfig arbitrary = pure $ R.TableConfig { - R.confMergePolicy = R.MergePolicyLazyLevelling - , R.confSizeRatio = R.Four - , R.confWriteBufferAlloc = R.AllocNumEntries (R.NumEntries 30) - , R.confBloomFilterAlloc = R.AllocFixed 10 - , R.confDiskCachePolicy = R.DiskCacheNone + R.confMergePolicy = R.MergePolicyLazyLevelling + , R.confSizeRatio = R.Four + , R.confWriteBufferAlloc = R.AllocNumEntries (R.NumEntries 30) + , R.confBloomFilterAlloc = R.AllocFixed 10 + , R.confFencePointerIndex = R.CompactIndex + , R.confDiskCachePolicy = R.DiskCacheNone } -instance Eq R.TableConfig where - R.TableConfig pol1 size1 wbAlloc1 bfAlloc1 cache1 - == R.TableConfig pol2 size2 wbAlloc2 bfAlloc2 cache2 - = and [ - pol1 == pol2 - , size1 == size2 - , wbAlloc1 == wbAlloc2 - , bfAlloc1 == bfAlloc2 - , cache1 == cache2 - ] - {------------------------------------------------------------------------------- Key and value types -------------------------------------------------------------------------------}