-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
create micro benchmark for write buffer
- Loading branch information
Showing
7 changed files
with
357 additions
and
18 deletions.
There are no files selected for viewing
308 changes: 308 additions & 0 deletions
308
bench/micro/Bench/Database/LSMTree/Internal/WriteBuffer.hs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,308 @@ | ||
{-# LANGUAGE DeriveAnyClass #-} | ||
{-# LANGUAGE DerivingStrategies #-} | ||
{-# LANGUAGE RecordWildCards #-} | ||
{-# OPTIONS_GHC -Wno-orphans #-} | ||
|
||
module Bench.Database.LSMTree.Internal.WriteBuffer (benchmarks) where | ||
|
||
import Control.DeepSeq (NFData (..)) | ||
import Criterion.Main (Benchmark, bench, bgroup) | ||
import qualified Criterion.Main as Cr | ||
import Data.Bifunctor (first) | ||
import qualified Data.ByteString as BS | ||
import qualified Data.List as List | ||
import qualified Data.Primitive as P | ||
import Data.WideWord.Word256 | ||
import Data.Word (Word64) | ||
import Database.LSMTree.Extras.Orphans () | ||
import qualified Database.LSMTree.Internal.RawBytes as RB | ||
import Database.LSMTree.Internal.Run (Run) | ||
import qualified Database.LSMTree.Internal.Run as Run | ||
import Database.LSMTree.Internal.Serialise | ||
import qualified Database.LSMTree.Internal.Serialise.Class as Class | ||
import Database.LSMTree.Internal.Vector (mkPrimVector) | ||
import Database.LSMTree.Internal.WriteBuffer (WriteBuffer) | ||
import qualified Database.LSMTree.Internal.WriteBuffer as WB | ||
import qualified Database.LSMTree.Normal as Normal | ||
import GHC.Generics (Generic) | ||
import Prelude hiding (getContents) | ||
import System.Directory (removeDirectoryRecursive) | ||
import qualified System.FS.API as FS | ||
import qualified System.FS.IO as FS | ||
import qualified System.FS.IO.Internal.Handle as FS (HandleOS (..)) | ||
import System.IO.Temp | ||
import qualified System.Random as R | ||
import System.Random (StdGen, Uniform, mkStdGen, uniform, uniformR) | ||
|
||
benchmarks :: Benchmark | ||
benchmarks = bgroup "Bench.Database.LSMTree.Internal.WriteBuffer" [ | ||
benchWriteBuffer configWord64 | ||
{ name = "word64-insert-10k" | ||
, ninserts = 10_000 | ||
} | ||
, benchWriteBuffer configWord64 | ||
{ name = "word64-delete-10k" | ||
, ndeletes = 10_000 | ||
} | ||
, benchWriteBuffer configWord64 | ||
{ name = "word64-blob-10k" | ||
, nblobinserts = 10_000 | ||
} | ||
-- multi-page inserts (or almost so) | ||
, benchWriteBuffer configWord64 | ||
{ name = "insert-page-2k" -- 1 page | ||
, ninserts = 2_000 | ||
, randomValue = first serialiseValue . randomByteStringR (4056, 4056) | ||
} | ||
, benchWriteBuffer configWord64 | ||
{ name = "insert-page-plus-byte-2k" -- 1 page + 1 byte | ||
, ninserts = 2_000 | ||
, randomValue = first serialiseValue . randomByteStringR (4057, 4057) | ||
} | ||
, benchWriteBuffer configWord64 | ||
{ name = "insert-huge-2k" -- 3-5 pages | ||
, ninserts = 2_000 | ||
, randomValue = first serialiseValue . randomByteStringR (10_000, 20_000) | ||
} | ||
-- UTxO workload | ||
-- compare different buffer sizes to see superlinear cost of map insertion | ||
, benchWriteBuffer configUTxO | ||
{ name = "utxo-2k" | ||
, ninserts = 1_000 | ||
, ndeletes = 1_000 | ||
} | ||
, benchWriteBuffer configUTxO | ||
{ name = "utxo-10k" | ||
, ninserts = 5_000 | ||
, ndeletes = 5_000 | ||
} | ||
, benchWriteBuffer configUTxO | ||
{ name = "utxo-50k" | ||
, ninserts = 25_000 | ||
, ndeletes = 25_000 | ||
} | ||
] | ||
|
||
benchWriteBuffer :: Config -> Benchmark | ||
benchWriteBuffer conf@Config{name} = | ||
withEnv $ \ ~(_dir, hasFS, kops) -> | ||
bgroup name [ | ||
bench "insert" $ | ||
Cr.whnf (\kops' -> insert kops') kops | ||
, Cr.env (pure $ insert kops) $ \wb -> | ||
bench "flush" $ | ||
Cr.perRunEnvWithCleanup getPaths (const (cleanupPaths hasFS)) $ \p -> do | ||
!run <- flush hasFS p wb | ||
Run.removeReference hasFS run | ||
, bench "insert+flush" $ | ||
-- To make sure the WriteBuffer really gets recomputed on every run, | ||
-- we'd like to do: `whnfAppIO (kops' -> ...) kops`. | ||
-- However, we also need per-run cleanup to avoid running out of | ||
-- disk space. We use `perRunEnvWithCleanup`, which has two issues: | ||
-- 1. Just as `whnfAppIO` etc., it takes an IO action and returns | ||
-- `Benchmarkable`, which does not compose. As a workaround, we | ||
-- thread `kops` through the environment, too. | ||
-- 2. It forces the result to normal form, which would traverse the | ||
-- whole run, so we force to WHNF ourselves and just return `()`. | ||
Cr.perRunEnvWithCleanup | ||
((,) kops <$> getPaths) | ||
(const (cleanupPaths hasFS)) $ \(kops', p) -> do | ||
!run <- flush hasFS p (insert kops') | ||
-- Make sure to immediately close runs so we don't run out of | ||
-- file handles. Ideally this would not be measured, but at | ||
-- least it's pretty cheap. | ||
Run.removeReference hasFS run | ||
] | ||
where | ||
withEnv = | ||
Cr.envWithCleanup | ||
(writeBufferEnv conf) | ||
writeBufferEnvCleanup | ||
|
||
-- We'll remove the files on every run, so we can re-use the same run number. | ||
getPaths :: IO Run.RunFsPaths | ||
getPaths = pure (Run.RunFsPaths 0) | ||
|
||
-- Simply remove the whole active directory. | ||
cleanupPaths :: FS.HasFS IO FS.HandleIO -> IO () | ||
cleanupPaths hasFS = FS.removeDirectoryRecursive hasFS Run.activeRunsDir | ||
|
||
insert :: [SerialisedKOp] -> WriteBuffer SerialisedKey SerialisedValue SerialisedBlob | ||
insert = List.foldl' (\wb (k, e) -> WB.addEntryNormal k e wb) WB.empty | ||
|
||
flush :: FS.HasFS IO FS.HandleIO -> Run.RunFsPaths -> WriteBuffer k v b -> IO (Run (FS.Handle (FS.HandleIO))) | ||
flush = Run.fromWriteBuffer | ||
|
||
type SerialisedKOp = (SerialisedKey, SerialisedEntry) | ||
type SerialisedEntry = Normal.Update SerialisedValue SerialisedBlob | ||
|
||
{------------------------------------------------------------------------------- | ||
Orphans | ||
-------------------------------------------------------------------------------} | ||
|
||
-- TODO: move to shared Util/Extra/Orphan module? | ||
deriving stock instance Generic (FS.HandleOS h) | ||
deriving anyclass instance NFData (FS.HandleOS h) | ||
deriving anyclass instance NFData FS.FsPath | ||
deriving anyclass instance NFData h => NFData (FS.Handle h) | ||
instance NFData (FS.HasFS m h) where | ||
rnf x = x `seq` () | ||
|
||
-- TODO: move to src? | ||
deriving stock instance Generic (Run.Run h) | ||
deriving anyclass instance NFData h => NFData (Run.Run h) | ||
deriving newtype instance NFData Run.RunFsPaths | ||
|
||
{------------------------------------------------------------------------------- | ||
Environments | ||
-------------------------------------------------------------------------------} | ||
|
||
-- | Config options describing a benchmarking scenario | ||
data Config = Config { | ||
-- | Name for the benchmark scenario described by this config. | ||
name :: !String | ||
-- | Number of key\/operation pairs in the run | ||
, ninserts :: !Int | ||
, nblobinserts :: !Int | ||
, ndeletes :: !Int | ||
, randomKey :: !(Rnd SerialisedKey) | ||
, randomValue :: !(Rnd SerialisedValue) | ||
, randomBlob :: !(Rnd SerialisedBlob) | ||
} | ||
|
||
type Rnd a = StdGen -> (a, StdGen) | ||
|
||
defaultConfig :: Config | ||
defaultConfig = Config { | ||
name = "default" | ||
, ninserts = 0 | ||
, nblobinserts = 0 | ||
, ndeletes = 0 | ||
, randomKey = (,) (error "randomKey not implemented") | ||
, randomValue = (,) (error "randomValue not implemented") | ||
, randomBlob = (,) (error "randomBlob not implemented") | ||
} | ||
|
||
configWord64 :: Config | ||
configWord64 = defaultConfig { | ||
randomKey = first serialiseKey . uniform @_ @Word64 | ||
, randomValue = first serialiseValue . uniform @_ @Word64 | ||
, randomBlob = first serialiseBlob . randomByteStringR (0, 0x2000) -- up to 8 kB | ||
} | ||
|
||
configUTxO :: Config | ||
configUTxO = defaultConfig { | ||
randomKey = first serialiseKey . uniform @_ @UTxOKey | ||
, randomValue = first serialiseValue . uniform @_ @UTxOValue | ||
} | ||
|
||
writeBufferEnv :: | ||
Config | ||
-> IO ( FilePath -- ^ Temporary directory | ||
, FS.HasFS IO FS.HandleIO | ||
, [SerialisedKOp] | ||
) | ||
writeBufferEnv config = do | ||
sysTmpDir <- getCanonicalTemporaryDirectory | ||
benchTmpDir <- createTempDirectory sysTmpDir "writeBufferEnv" | ||
let kops = lookupsEnv config (mkStdGen 17) | ||
let hasFS = FS.ioHasFS (FS.MountPoint benchTmpDir) | ||
pure (benchTmpDir, hasFS, kops) | ||
|
||
writeBufferEnvCleanup :: | ||
( FilePath -- ^ Temporary directory | ||
, FS.HasFS IO FS.HandleIO | ||
, [SerialisedKOp] | ||
) | ||
-> IO () | ||
writeBufferEnvCleanup (tmpDir, _, _) = do | ||
removeDirectoryRecursive tmpDir | ||
|
||
-- | Generate keys and entries to insert into the write buffer. | ||
-- They are already serialised to exclude the cost from the benchmark. | ||
lookupsEnv :: | ||
Config | ||
-> StdGen -- ^ RNG | ||
-> [SerialisedKOp] | ||
lookupsEnv Config {..} = take nentries . List.unfoldr (Just . randomKOp) | ||
where | ||
nentries = ninserts + nblobinserts + ndeletes | ||
|
||
randomKOp :: Rnd SerialisedKOp | ||
randomKOp g = let (!k, !g') = randomKey g | ||
(!e, !g'') = randomEntry g' | ||
in ((k, e), g'') | ||
|
||
randomEntry :: Rnd SerialisedEntry | ||
randomEntry = frequency | ||
[ ( ninserts | ||
, \g -> let (!v, !g') = randomValue g | ||
in (Normal.Insert v Nothing, g') | ||
) | ||
, ( nblobinserts | ||
, \g -> let (!v, !g') = randomValue g | ||
(!b, !g'') = randomBlob g' | ||
in (Normal.Insert v (Just b), g'') | ||
) | ||
, ( ndeletes | ||
, \g -> (Normal.Delete, g) | ||
) | ||
] | ||
|
||
frequency :: [(Int, Rnd a)] -> Rnd a | ||
frequency xs0 g | ||
| any ((< 0) . fst) xs0 = error "frequency: frequencies must be non-negative" | ||
| tot == 0 = error "frequency: at least one frequency should be non-zero" | ||
| otherwise = pick i xs0 | ||
where | ||
(i, g') = uniformR (1, tot) g | ||
|
||
tot = sum (map fst xs0) | ||
|
||
pick n ((k,x):xs) | ||
| n <= k = x g' | ||
| otherwise = pick (n-k) xs | ||
pick _ _ = error "frequency: pick used with empty list" | ||
|
||
randomByteStringR :: (Int, Int) -> Rnd BS.ByteString | ||
randomByteStringR range g = | ||
let (!l, !g') = uniformR range g | ||
in R.genByteString l g' | ||
|
||
{------------------------------------------------------------------------------- | ||
UTxO keys, values and blobs | ||
-------------------------------------------------------------------------------} | ||
|
||
-- | A model of a UTxO key (256-bit hash) | ||
newtype UTxOKey = UTxOKey Word256 | ||
deriving stock (Show, Generic) | ||
deriving newtype ( Eq, Ord, NFData | ||
, SerialiseKey | ||
, Num, Enum, Real, Integral | ||
) | ||
deriving anyclass Uniform | ||
|
||
-- | A model of a UTxO value (512-bit) | ||
data UTxOValue = UTxOValue { | ||
utxoValueHigh :: !Word256 | ||
, utxoValueLow :: !Word256 | ||
} | ||
deriving stock (Show, Eq, Ord, Generic) | ||
deriving anyclass (Uniform, NFData) | ||
|
||
instance SerialiseValue UTxOValue where | ||
serialiseValue (UTxOValue hi lo) = Class.serialiseValue lo <> Class.serialiseValue hi | ||
deserialiseValue = error "deserialiseValue: unused" | ||
deserialiseValueN = error "deserialiseValueN: unused" | ||
|
||
instance SerialiseValue Word256 where | ||
serialiseValue (Word256{word256hi, word256m1, word256m0, word256lo}) = | ||
RB.RawBytes $ mkPrimVector 0 32 $ P.runByteArray $ do | ||
ba <- P.newByteArray 32 | ||
P.writeByteArray ba 0 word256lo | ||
P.writeByteArray ba 1 word256m0 | ||
P.writeByteArray ba 2 word256m1 | ||
P.writeByteArray ba 3 word256hi | ||
return ba | ||
deserialiseValue = error "deserialiseValue: unused" | ||
deserialiseValueN = error "deserialiseValueN: unused" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.