Skip to content

Commit

Permalink
create micro benchmark for write buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
mheinzel committed Apr 25, 2024
1 parent b385254 commit dbba9fd
Show file tree
Hide file tree
Showing 7 changed files with 393 additions and 18 deletions.
344 changes: 344 additions & 0 deletions bench/micro/Bench/Database/LSMTree/Internal/WriteBuffer.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,344 @@
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE RecordWildCards #-}
{-# OPTIONS_GHC -Wno-orphans #-}

module Bench.Database.LSMTree.Internal.WriteBuffer (benchmarks) where

import Control.DeepSeq (NFData (..))
import Criterion.Main (Benchmark, bench, bgroup)
import qualified Criterion.Main as Cr
import Data.Bifunctor (first)
import qualified Data.ByteString as BS
import qualified Data.List as List
import qualified Data.Primitive as P
import Data.WideWord.Word256
import Data.Word (Word64)
import Database.LSMTree.Extras.Orphans ()
import qualified Database.LSMTree.Internal.RawBytes as RB
import Database.LSMTree.Internal.Run (Run)
import qualified Database.LSMTree.Internal.Run as Run
import Database.LSMTree.Internal.Serialise
import qualified Database.LSMTree.Internal.Serialise.Class as Class
import Database.LSMTree.Internal.Vector (mkPrimVector)
import Database.LSMTree.Internal.WriteBuffer (WriteBuffer)
import qualified Database.LSMTree.Internal.WriteBuffer as WB
import qualified Database.LSMTree.Normal as Normal
import GHC.Generics (Generic)
import Prelude hiding (getContents)
import System.Directory (removeDirectoryRecursive)
import qualified System.FS.API as FS
import qualified System.FS.IO as FS
import qualified System.FS.IO.Internal.Handle as FS (HandleOS (..))
import System.IO.Temp
import qualified System.Random as R
import System.Random (StdGen, Uniform, mkStdGen, uniform, uniformR)

benchmarks :: Benchmark
benchmarks = bgroup "Bench.Database.LSMTree.Internal.WriteBuffer" [
-- compare different buffer sizes to see superlinear cost of map insertion
benchWriteBuffer configWord64
{ name = "word64-2k"
, ninserts = 2_000
}
, benchWriteBuffer configWord64
{ name = "word64-10k"
, ninserts = 10_000
}
, benchWriteBuffer configWord64
{ name = "word64-50k"
, ninserts = 50_000
}
-- multi-page inserts (or almost so)
, benchWriteBuffer configWord64
{ name = "insert-page-2k"
, ninserts = 2_000
, randomValue = randomValueSized (4056, 4056) -- 1 page
}
, benchWriteBuffer configWord64
{ name = "insert-page-plus-byte-2k"
, ninserts = 2_000
, randomValue = randomValueSized (4057, 4057) -- 1 page + 1 byte
}
, benchWriteBuffer configWord64
{ name = "insert-huge-2k"
, ninserts = 2_000
, randomValue = randomValueSized (10_000, 20_000) -- 3-5 pages
}
-- UTxO workload
, benchWriteBuffer configUTxO
{ name = "utxo-2k"
, ninserts = 1_000
, ndeletes = 1_000
}
, benchWriteBuffer configUTxO
{ name = "utxo-10k"
, ninserts = 5_000
, ndeletes = 5_000
}
, benchWriteBuffer configUTxO
{ name = "utxo-50k"
, ninserts = 25_000
, ndeletes = 25_000
}
-- UTxO workload with blobs
, benchWriteBuffer configUTxO
{ name = "utxo-2k-blob"
, ninserts = 500
, nblobinserts = 500
, ndeletes = 1_000
}
, benchWriteBuffer configUTxO
{ name = "utxo-10k-blob"
, ninserts = 2_500
, nblobinserts = 2_500
, ndeletes = 5_000
}
, benchWriteBuffer configUTxO
{ name = "utxo-50k-blob"
, ninserts = 12_500
, nblobinserts = 12_500
, ndeletes = 25_000
}
]

benchWriteBuffer :: Config -> Benchmark
benchWriteBuffer conf@Config{name} =
withEnv $ \ ~(_dir, hasFS, kops) ->
bgroup name [
bench "insert" $
Cr.whnf (\kops' -> insert kops') kops
, Cr.env (pure $ insert kops) $ \wb ->
bench "flush" $
Cr.perRunEnvWithCleanup getPaths (const (cleanupPaths hasFS)) $ \p -> do
!run <- flush hasFS p wb
Run.removeReference hasFS run
, bench "insert+flush" $
-- To make sure the WriteBuffer really gets recomputed on every run,
-- we'd like to do: `whnfAppIO (kops' -> ...) kops`.
-- However, we also need per-run cleanup to avoid running out of
-- disk space. We use `perRunEnvWithCleanup`, which has two issues:
-- 1. Just as `whnfAppIO` etc., it takes an IO action and returns
-- `Benchmarkable`, which does not compose. As a workaround, we
-- thread `kops` through the environment, too.
-- 2. It forces the result to normal form, which would traverse the
-- whole run, so we force to WHNF ourselves and just return `()`.
Cr.perRunEnvWithCleanup
((,) kops <$> getPaths)
(const (cleanupPaths hasFS)) $ \(kops', p) -> do
!run <- flush hasFS p (insert kops')
-- Make sure to immediately close runs so we don't run out of
-- file handles. Ideally this would not be measured, but at
-- least it's pretty cheap.
Run.removeReference hasFS run
]
where
withEnv =
Cr.envWithCleanup
(writeBufferEnv conf)
writeBufferEnvCleanup

-- We'll remove the files on every run, so we can re-use the same run number.
getPaths :: IO Run.RunFsPaths
getPaths = pure (Run.RunFsPaths 0)

-- Simply remove the whole active directory.
cleanupPaths :: FS.HasFS IO FS.HandleIO -> IO ()
cleanupPaths hasFS = FS.removeDirectoryRecursive hasFS Run.activeRunsDir

insert :: [SerialisedKOp] -> WriteBuffer SerialisedKey SerialisedValue SerialisedBlob
insert = List.foldl' (\wb (k, e) -> WB.addEntryNormal k e wb) WB.empty

flush :: FS.HasFS IO FS.HandleIO -> Run.RunFsPaths -> WriteBuffer k v b -> IO (Run (FS.Handle (FS.HandleIO)))
flush = Run.fromWriteBuffer

type SerialisedKOp = (SerialisedKey, SerialisedEntry)
type SerialisedEntry = Normal.Update SerialisedValue SerialisedBlob

{-------------------------------------------------------------------------------
Orphans
-------------------------------------------------------------------------------}

-- TODO: move to shared Util/Extra/Orphan module?
deriving stock instance Generic (FS.HandleOS h)
deriving anyclass instance NFData (FS.HandleOS h)
deriving anyclass instance NFData FS.FsPath
deriving anyclass instance NFData h => NFData (FS.Handle h)
instance NFData (FS.HasFS m h) where
rnf x = x `seq` ()

-- TODO: move to src?
deriving stock instance Generic (Run.Run h)
deriving anyclass instance NFData h => NFData (Run.Run h)
deriving newtype instance NFData Run.RunFsPaths

{-------------------------------------------------------------------------------
Environments
-------------------------------------------------------------------------------}

-- | Config options describing a benchmarking scenario
data Config = Config {
-- | Name for the benchmark scenario described by this config.
name :: !String
-- | Number of key\/operation pairs in the run
, ninserts :: !Int
, nblobinserts :: !Int
, ndeletes :: !Int
, randomKey :: !(Rnd SerialisedKey)
, randomValue :: !(Rnd SerialisedValue)
, randomBlob :: !(Rnd SerialisedBlob)
}

type Rnd a = StdGen -> (a, StdGen)

defaultConfig :: Config
defaultConfig = Config {
name = "default"
, ninserts = 0
, nblobinserts = 0
, ndeletes = 0
, randomKey = (,) (error "randomKey not implemented")
, randomValue = (,) (error "randomValue not implemented")
, randomBlob = (,) (error "randomBlob not implemented")
}

configWord64 :: Config
configWord64 = defaultConfig {
randomKey = first serialiseKey . uniform @_ @Word64
, randomValue = first serialiseValue . uniform @_ @Word64
}

configUTxO :: Config
configUTxO = defaultConfig {
randomKey = first serialiseKey . uniform @_ @UTxOKey
, randomValue = first serialiseValue . uniform @_ @UTxOValue
, randomBlob = first serialiseBlob . randomUTxOBlob
}

writeBufferEnv ::
Config
-> IO ( FilePath -- ^ Temporary directory
, FS.HasFS IO FS.HandleIO
, [SerialisedKOp]
)
writeBufferEnv config = do
sysTmpDir <- getCanonicalTemporaryDirectory
benchTmpDir <- createTempDirectory sysTmpDir "writeBufferEnv"
let kops = lookupsEnv config (mkStdGen 17)
let hasFS = FS.ioHasFS (FS.MountPoint benchTmpDir)
pure (benchTmpDir, hasFS, kops)

writeBufferEnvCleanup ::
( FilePath -- ^ Temporary directory
, FS.HasFS IO FS.HandleIO
, [SerialisedKOp]
)
-> IO ()
writeBufferEnvCleanup (tmpDir, _, _) = do
removeDirectoryRecursive tmpDir

-- | Generate keys and entries to insert into the write buffer.
-- They are already serialised to exclude the cost from the benchmark.
lookupsEnv ::
Config
-> StdGen -- ^ RNG
-> [SerialisedKOp]
lookupsEnv Config {..} = take nentries . List.unfoldr (Just . randomKOp)
where
nentries = ninserts + nblobinserts + ndeletes

randomKOp :: Rnd SerialisedKOp
randomKOp g = let (!k, !g') = randomKey g
(!e, !g'') = randomEntry g'
in ((k, e), g'')

randomEntry :: Rnd SerialisedEntry
randomEntry = frequency
[ ( ninserts
, \g -> let (!v, !g') = randomValue g
in (Normal.Insert v Nothing, g')
)
, ( nblobinserts
, \g -> let (!v, !g') = randomValue g
(!b, !g'') = randomBlob g'
in (Normal.Insert v (Just b), g'')
)
, ( ndeletes
, \g -> (Normal.Delete, g)
)
]

frequency :: [(Int, Rnd a)] -> Rnd a
frequency xs0 g
| any ((< 0) . fst) xs0 = error "frequency: frequencies must be non-negative"
| tot == 0 = error "frequency: at least one frequency should be non-zero"
| otherwise = pick i xs0
where
(i, g') = uniformR (1, tot) g

tot = sum (map fst xs0)

pick n ((k,x):xs)
| n <= k = x g'
| otherwise = pick (n-k) xs
pick _ _ = error "frequency: pick used with empty list"

randomValueSized :: (Int, Int) -> Rnd SerialisedValue
randomValueSized range g =
let (!l, !g') = uniformR range g
(bs, !g'') = R.genByteString l g'
in (serialiseValue bs, g'')

{-------------------------------------------------------------------------------
UTxO keys, values and blobs
-------------------------------------------------------------------------------}

-- | A model of a UTxO key (256-bit hash)
newtype UTxOKey = UTxOKey Word256
deriving stock (Show, Generic)
deriving newtype ( Eq, Ord, NFData
, SerialiseKey
, Num, Enum, Real, Integral
)
deriving anyclass Uniform

-- | A model of a UTxO value (512-bit)
data UTxOValue = UTxOValue {
utxoValueHigh :: !Word256
, utxoValueLow :: !Word256
}
deriving stock (Show, Eq, Ord, Generic)
deriving anyclass (Uniform, NFData)

instance SerialiseValue UTxOValue where
serialiseValue (UTxOValue hi lo) = Class.serialiseValue lo <> Class.serialiseValue hi
deserialiseValue = error "deserialiseValue: unused"
deserialiseValueN = error "deserialiseValueN: unused"

instance SerialiseValue Word256 where
serialiseValue (Word256{word256hi, word256m1, word256m0, word256lo}) =
RB.RawBytes $ mkPrimVector 0 32 $ P.runByteArray $ do
ba <- P.newByteArray 32
P.writeByteArray ba 0 word256lo
P.writeByteArray ba 1 word256m0
P.writeByteArray ba 2 word256m1
P.writeByteArray ba 3 word256hi
return ba
deserialiseValue = error "deserialiseValue: unused"
deserialiseValueN = error "deserialiseValueN: unused"

-- | A blob of arbitrary size
newtype UTxOBlob = UTxOBlob BS.ByteString
deriving stock (Show, Eq, Ord, Generic)
deriving anyclass NFData

instance SerialiseValue UTxOBlob where
serialiseValue (UTxOBlob bs) = Class.serialiseValue bs
deserialiseValue = error "deserialiseValue: unused"
deserialiseValueN = error "deserialiseValueN: unused"

randomUTxOBlob :: Rnd UTxOBlob
randomUTxOBlob !g =
let (!len, !g') = uniformR (0, 0x2000) g
(!bs, !g'') = R.genByteString len g'
in (UTxOBlob bs, g'')
6 changes: 4 additions & 2 deletions bench/micro/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ import qualified Bench.Database.LSMTree.Internal.BloomFilter
import qualified Bench.Database.LSMTree.Internal.IndexCompact
import qualified Bench.Database.LSMTree.Internal.Lookup
import qualified Bench.Database.LSMTree.Internal.RawPage
import qualified Bench.Database.LSMTree.Internal.WriteBuffer
import Criterion.Main (defaultMain)

main :: IO ()
main = defaultMain [
Bench.Database.LSMTree.Internal.Lookup.benchmarks
, Bench.Database.LSMTree.Internal.BloomFilter.benchmarks
Bench.Database.LSMTree.Internal.BloomFilter.benchmarks
, Bench.Database.LSMTree.Internal.IndexCompact.benchmarks
, Bench.Database.LSMTree.Internal.Lookup.benchmarks
, Bench.Database.LSMTree.Internal.RawPage.benchmarks
, Bench.Database.LSMTree.Internal.WriteBuffer.benchmarks
]
1 change: 1 addition & 0 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ benchmark lsm-tree-micro-bench
Bench.Database.LSMTree.Internal.IndexCompact
Bench.Database.LSMTree.Internal.Lookup
Bench.Database.LSMTree.Internal.RawPage
Bench.Database.LSMTree.Internal.WriteBuffer
Database.LSMTree.Extras
Database.LSMTree.Extras.Generators
Database.LSMTree.Extras.Orphans
Expand Down
14 changes: 1 addition & 13 deletions src-extras/Database/LSMTree/Extras/Generators.hs
Original file line number Diff line number Diff line change
Expand Up @@ -575,17 +575,8 @@ shrinkSlice (RawBytes pvec) =
, m <- QC.shrink (PV.length pvec - n)
]

instance SerialiseKey RawBytes where
serialiseKey = id
deserialiseKey = id

instance SerialiseValue RawBytes where
serialiseValue = id
deserialiseValue = id
deserialiseValueN = mconcat

-- TODO: makes collisions very unlikely
deriving newtype instance Arbitrary SerialisedKey
deriving newtype instance SerialiseKey SerialisedKey

instance Arbitrary SerialisedValue where
-- good mix of sizes, including larger than two pages, also some slices
Expand All @@ -600,10 +591,7 @@ instance Arbitrary SerialisedValue where
| RB.size rb > 64 = coerce (shrink (LargeRawBytes rb))
| otherwise = coerce (shrink rb)

deriving newtype instance SerialiseValue SerialisedValue

deriving newtype instance Arbitrary SerialisedBlob
deriving newtype instance SerialiseValue SerialisedBlob

newtype LargeRawBytes = LargeRawBytes RawBytes
deriving Show
Expand Down

0 comments on commit dbba9fd

Please sign in to comment.