From 425625a19eeb5d7d1671ba5de687aa39e5629f53 Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Tue, 16 Apr 2024 14:14:56 +0300 Subject: [PATCH] WIP initial-lsm-bench --- bench/macro/initial-lsm-bench.hs | 140 +++++++++++++++++++++++++++++++ lsm-tree.cabal | 20 +++++ 2 files changed, 160 insertions(+) create mode 100644 bench/macro/initial-lsm-bench.hs diff --git a/bench/macro/initial-lsm-bench.hs b/bench/macro/initial-lsm-bench.hs new file mode 100644 index 00000000..a5e68714 --- /dev/null +++ b/bench/macro/initial-lsm-bench.hs @@ -0,0 +1,140 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE NoFieldSelectors #-} +{-# LANGUAGE OverloadedRecordDot #-} +module Main (main) where + +import Control.Applicative ((<**>)) +import Control.Monad.ST (ST, runST) +import Data.Foldable (foldl') +import qualified Data.Primitive as P +import Data.Strict.Tuple (Pair ((:!:))) +import qualified Database.LSMTree.Internal.Normal as N +import qualified Database.LSMTree.Internal.Run as Run +import qualified Database.LSMTree.Internal.WriteBuffer as WB +import Database.LSMTree.Orphans () +import qualified Options.Applicative as O +import qualified System.Clock as Clock +import qualified System.FS.API as FS +import qualified System.FS.IO as FsIO +import System.IO.Temp (withTempDirectory) +import System.Mem (performMajorGC) +import qualified System.Random.SplitMix as SM +import Text.Printf (printf) + +------------------------------------------------------------------------------- +-- Setup +------------------------------------------------------------------------------- + +-- Using primitive makes serialisation overhead as low as possible +type K = P.ByteArray +type V = P.ByteArray +type B = P.ByteArray + +------------------------------------------------------------------------------- +-- Main +------------------------------------------------------------------------------- + +main :: IO () +main = do + opts <- O.execParser optsP' + print opts + withSessionDir $ \sessionRoot -> do + print sessionRoot + performMajorGC + ((), seconds) <- timed (benchRun opts sessionRoot) + printf "time %f s\n" seconds + + where + optsP' = O.info (optsP <**> O.helper) O.fullDesc + +------------------------------------------------------------------------------- +-- Bench +------------------------------------------------------------------------------- + +-- The benchmark should use 34 byte keys, and 60 byte values. +-- This corresponds roughly to the UTxO. +generateKeyValue :: SM.SMGen -> (SM.SMGen -> K -> V -> r) -> r +generateKeyValue g kont = do + -- key + let (!k, !gk) = runST $ do + -- we allocate larger bytearray. + mba <- P.newByteArray 40 + g0 <- generateWord64 mba 0 g + g1 <- generateWord64 mba 1 g0 + g2 <- generateWord64 mba 2 g1 + g3 <- generateWord64 mba 3 g2 + g4 <- generateWord64 mba 4 g3 + P.shrinkMutableByteArray mba 34 + ba <- P.unsafeFreezeByteArray mba + return (ba, g4) + + -- value + let (!v, !gv) = runST $ do + mba <- P.newByteArray 64 + g0 <- generateWord64 mba 0 gk + g1 <- generateWord64 mba 1 g0 + g2 <- generateWord64 mba 2 g1 + g3 <- generateWord64 mba 3 g2 + g4 <- generateWord64 mba 4 g3 + g5 <- generateWord64 mba 5 g4 + g6 <- generateWord64 mba 6 g5 + g7 <- generateWord64 mba 7 g6 + P.shrinkMutableByteArray mba 60 + ba <- P.unsafeFreezeByteArray mba + return (ba, g7) + + kont gv k v + where + generateWord64 :: P.MutableByteArray s -> Int -> SM.SMGen -> ST s SM.SMGen + generateWord64 mba i g' = do + let (!w64, !g'') = SM.nextWord64 g' + P.writeByteArray mba i w64 + return g'' + +{-# INLINE generateKeyValue #-} + +benchRun :: Opts -> FilePath -> IO () +benchRun opts sessionRoot = do + let fs = FsIO.ioHasFS (FS.MountPoint sessionRoot) + let initialGen = SM.mkSMGen 42 + -- flush write buffer + let wb :: WB.WriteBuffer K V B + !wb :!: _ = foldl' + (\(wb' :!: g) _ -> generateKeyValue g $ \g' k v -> WB.addEntryNormal k (N.Insert v Nothing) wb' :!: g') + (WB.empty :!: initialGen) + [ 1 .. opts.size ] + + _run <- Run.fromWriteBuffer fs (Run.RunFsPaths 42) wb + return () + +------------------------------------------------------------------------------- +-- Session +------------------------------------------------------------------------------- + +withSessionDir :: (FilePath -> IO a) -> IO a +withSessionDir = withTempDirectory "" "session" + +------------------------------------------------------------------------------- +-- Opts +------------------------------------------------------------------------------- + +data Opts = Opts + { size :: Int + -- TODO: do we need a flag to not remove temporary directory? + } + deriving Show + +optsP :: O.Parser Opts +optsP = pure Opts + <*> O.option O.auto (O.long "size" <> O.value 500000 <> O.help "Size of initial run") + +------------------------------------------------------------------------------- +-- clock +------------------------------------------------------------------------------- + +timed :: IO a -> IO (a, Double) +timed action = do + t1 <- Clock.getTime Clock.Monotonic + x <- action + t2 <- Clock.getTime Clock.Monotonic + return (x, fromIntegral (Clock.toNanoSecs (Clock.diffTimeSpec t2 t1)) * 1e-9) diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 0c32634a..f9bded5b 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -342,6 +342,26 @@ benchmark lsm-tree-macro-bench ghc-options: -rtsopts -with-rtsopts=-T -threaded +benchmark initial-lsm-bench + import: warnings, wno-x-partial + default-language: Haskell2010 + type: exitcode-stdio-1.0 + hs-source-dirs: bench/macro test + main-is: initial-lsm-bench.hs + build-depends: + , base + , clock + , fs-api + , lsm-tree + , lsm-tree:lsm-tree-utils + , optparse-applicative + , primitive + , splitmix + , strict + , temporary + + ghc-options: -rtsopts -with-rtsopts=-T -threaded + library kmerge import: warnings, wno-x-partial default-language: Haskell2010