Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions bench/micro/Bench/Database/LSMTree/Internal/Index.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
{-# LANGUAGE CPP #-}

module Bench.Database.LSMTree.Internal.Index (benchmarks) where

import Control.Category ((>>>))
import Control.DeepSeq (rnf)
import Control.Monad.ST.Strict (runST)
import Criterion.Main (Benchmark, Benchmarkable, bench, bgroup, env,
whnf)
#if __GLASGOW_HASKELL__ < 910
import Data.List (foldl')
#endif
import Database.LSMTree.Extras.Generators (getKeyForIndexCompact,
mkPages, toAppends)
-- also for @Arbitrary@ instantiation of @SerialisedKey@
import Database.LSMTree.Extras.Index (Append, append)
import Database.LSMTree.Internal.Index (Index,
IndexType (Compact, Ordinary), newWithDefaults, search,
unsafeEnd)
import Database.LSMTree.Internal.Serialise
(SerialisedKey (SerialisedKey))
import Test.QuickCheck (choose, vector)
import Test.QuickCheck.Gen (Gen (MkGen))
import Test.QuickCheck.Random (mkQCGen)

-- * Benchmarks

benchmarks :: Benchmark
benchmarks = bgroup "Bench.Database.LSMTree.Internal.Index" $
map (uncurry benchmarksForSingleType) $
[("Compact", Compact), ("Ordinary", Ordinary)]
where

benchmarksForSingleType :: String -> IndexType -> Benchmark
benchmarksForSingleType indexTypeName indexType
= bgroup (indexTypeName ++ " index") $
[
-- Search
env (return $ searchIndex indexType 10000) $ \ index ->
env (return $ searchKeys 1000) $ \ keys ->
bench "Search" $
searchBenchmarkable index keys,

-- Incremental construction
env (return $ incrementalConstructionAppends 10000) $ \ appends ->
bench "Incremental construction" $
incrementalConstructionBenchmarkable indexType appends
]

-- * Utilities

-- | Deterministically constructs a value using a QuickCheck generator.
generated :: Gen a -> a
generated (MkGen exec) = exec (mkQCGen 411) 30

{-|
Constructs serialised keys that conform to the key size constraint of
compact indexes.
-}
keysForIndexCompact :: Int -- ^ Number of keys
-> [SerialisedKey] -- ^ Constructed keys
keysForIndexCompact = vector >>>
generated >>>
map (getKeyForIndexCompact >>> SerialisedKey)

{-|
Constructs append operations whose serialised keys conform to the key size
constraint of compact indexes.
-}
appendsForIndexCompact :: Int -- ^ Number of keys used in the construction
-> [Append] -- ^ Constructed append operations
appendsForIndexCompact = keysForIndexCompact >>>
mkPages 0.03 (choose (0, 16)) 0.01 >>>
generated >>>
toAppends
Comment on lines +52 to +75
Copy link
Collaborator

@jorisdral jorisdral Mar 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I personally normally avoid using QuickCheck generators because they are often in flux, and because they are primarily tailored towards testing, not benchmarking. It can be more future proof to write these functions with System.Random and related functions. When we change QuickCheck generators, we might accidentally change a benchmark as well

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m also not completely satisfied with using QuickCheck generators for benchmarks. That said, by using System.Random directly, we lose the ability to use existing utilities, like we use mkPages above. I’d like to leave the above code as it is for now. It might be worthwhile, though, to generally revisit the uses of QuickCheck generators in our benchmarks.

Copy link
Collaborator

@jorisdral jorisdral Mar 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That said, by using System.Random directly, we lose the ability to use existing utilities, like we use mkPages above.

That's not completely true. You should run the mkPages Gen with a seed, yes, but you can generate the keys with System.Random. See the Index.Compact benchmarks

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So you’re advocating a mixed-approach with some data being generated by directly using System.Random and other by using QuickCheck?

In the compact-index benchmark module, I can only see one use of mkPages, the one in constructionEnv. However, there the generator constructed by mkPages is run via generate, which means that it uses a seed produced by the global random number generator, not a seed specified in the source file. That said, I could still use my generated function for generators to work with fixed seeds.

Should I change the general index benchmarks to use that mixed approach where just the page data is generated using QuickCheck?

{-
The arguments used for 'mkPages' are the same as the ones used for
'genPages' in the instantiation of 'Arbitrary' for 'LogicalPageSummaries' at
the time of writing.
-}

{-|
Constructs an index by applying append operations to an initially empty
index.
-}
indexFromAppends :: IndexType -> [Append] -> Index
indexFromAppends indexType appends = runST $ do
indexAcc <- newWithDefaults indexType
mapM_ (flip append indexAcc) appends
snd <$> unsafeEnd indexAcc

-- * Benchmark ingredients

-- ** Search

-- | Constructs an index to be searched.
searchIndex :: IndexType -- ^ Type of index to construct
-> Int -- ^ Number of keys used in the construction
-> Index -- ^ Constructed index
searchIndex indexType keyCount
= indexFromAppends indexType (appendsForIndexCompact keyCount)

-- | Constructs a list of keys to search for.
searchKeys :: Int -- ^ Number of searches
-> [SerialisedKey] -- ^ Constructed search keys
searchKeys = keysForIndexCompact

-- | The action to be performed by a search benchmark.
searchBenchmarkable :: Index -> [SerialisedKey] -> Benchmarkable
searchBenchmarkable index = whnf $ foldl' (\ _ key -> rnf (search key index)) ()

-- ** Incremental construction

-- | Constructs append operations to be used in index construction.
incrementalConstructionAppends
:: Int -- ^ Number of keys used in the construction
Comment on lines +115 to +116
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
incrementalConstructionAppends
:: Int -- ^ Number of keys used in the construction
incrementalConstructionAppends ::
Int -- ^ Number of keys used in the construction

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jeltsch did you forget to resolve this?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that this wouldn’t be so important, given that it was labeled a “suggested” change and the pull request was already approved as it were.

Generally, I think that having this sort of hanging indentation by not putting :: above the arrows makes for an inferior layout. I know that your editor and GitHub seem to have problems with a layout like the present one when it comes to syntax highlighting, but I think we shouldn’t let current bugs of tools influence our layout.

Well, I can make the change. Should I?

-> [Append] -- ^ Constructed append operations
incrementalConstructionAppends = appendsForIndexCompact

-- | The action to be performed by an incremental-construction benchmark.
incrementalConstructionBenchmarkable :: IndexType -> [Append] -> Benchmarkable
incrementalConstructionBenchmarkable indexType appends
= whnf (indexFromAppends indexType) appends
2 changes: 2 additions & 0 deletions bench/micro/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
module Main (main) where

import qualified Bench.Database.LSMTree.Internal.BloomFilter
import qualified Bench.Database.LSMTree.Internal.Index
import qualified Bench.Database.LSMTree.Internal.Index.Compact
import qualified Bench.Database.LSMTree.Internal.Lookup
import qualified Bench.Database.LSMTree.Internal.Merge
Expand All @@ -21,6 +22,7 @@ main = do
#endif
defaultMain [
Bench.Database.LSMTree.Internal.BloomFilter.benchmarks
, Bench.Database.LSMTree.Internal.Index.benchmarks
, Bench.Database.LSMTree.Internal.Index.Compact.benchmarks
, Bench.Database.LSMTree.Internal.Lookup.benchmarks
, Bench.Database.LSMTree.Internal.Merge.benchmarks
Expand Down
1 change: 1 addition & 0 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,7 @@ benchmark lsm-tree-micro-bench
main-is: Main.hs
other-modules:
Bench.Database.LSMTree.Internal.BloomFilter
Bench.Database.LSMTree.Internal.Index
Bench.Database.LSMTree.Internal.Index.Compact
Bench.Database.LSMTree.Internal.Lookup
Bench.Database.LSMTree.Internal.Merge
Expand Down
46 changes: 26 additions & 20 deletions src/Database/LSMTree/Internal/Index/Ordinary.hs
Original file line number Diff line number Diff line change
Expand Up @@ -89,26 +89,32 @@ search key (IndexOrdinary lastKeys) = assert (pageCount > 0) result where
!pageCount = length lastKeys

result :: PageSpan
!result | protoStart < pageCount
= let

end :: Int
!end = maybe (pred pageCount) (+ protoStart) $
findIndex (/= lastKeys ! protoStart) $
drop (succ protoStart) lastKeys

in PageSpan (PageNo $ protoStart)
(PageNo $ end)
| otherwise
= let

start :: Int
!start = maybe 0 succ $
findIndexR (/= last lastKeys) $
lastKeys

in PageSpan (PageNo $ start)
(PageNo $ pred pageCount)
result | protoStart < pageCount
= let

resultKey :: SerialisedKey
!resultKey = lastKeys ! protoStart

end :: Int
!end = maybe (pred pageCount) (+ protoStart) $
findIndex (/= resultKey) $
drop (succ protoStart) lastKeys

in PageSpan (PageNo $ protoStart)
(PageNo $ end)
| otherwise
= let

resultKey :: SerialisedKey
!resultKey = last lastKeys

start :: Int
!start = maybe 0 succ $
findIndexR (/= resultKey) $
lastKeys

in PageSpan (PageNo $ start)
(PageNo $ pred pageCount)

{-|
For a specification of this operation, see the documentation of [its
Expand Down
1 change: 1 addition & 0 deletions src/Database/LSMTree/Internal/Vector.hs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ forMStrict xs f = V.forM xs (f >=> (pure $!))
-}
binarySearchL :: Ord a => V.Vector a -> a -> Int
binarySearchL vec val = runST $ V.unsafeThaw vec >>= flip VA.binarySearchL val
{-# INLINE binarySearchL #-}

{-# INLINE unsafeInsertWithMStrict #-}
-- | Insert (in a broad sense) an entry in a mutable vector at a given index,
Expand Down