diff --git a/bench/macro/initial-lsm-bench.hs b/bench/macro/initial-lsm-bench.hs new file mode 100644 index 00000000..ed58badd --- /dev/null +++ b/bench/macro/initial-lsm-bench.hs @@ -0,0 +1,112 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE NoFieldSelectors #-} +{-# LANGUAGE OverloadedRecordDot #-} +{-# LANGUAGE OverloadedStrings #-} +module Main (main) where + +import Control.Applicative ((<**>)) +import qualified Crypto.Hash.SHA256 as SHA256 +import qualified Data.Binary as B +import qualified Data.ByteString as BS +import Data.Foldable (foldl') +import qualified Data.Primitive as P +import Data.Word (Word64) +import qualified Database.LSMTree.Internal.Normal as N +import qualified Database.LSMTree.Internal.Run as Run +import qualified Database.LSMTree.Internal.WriteBuffer as WB +import Database.LSMTree.Orphans () +import qualified Options.Applicative as O +import qualified System.Clock as Clock +import qualified System.FS.API as FS +import qualified System.FS.IO as FsIO +import System.IO.Temp (withTempDirectory) +import System.Mem (performMajorGC) +import Text.Printf (printf) + +------------------------------------------------------------------------------- +-- Setup +------------------------------------------------------------------------------- + +-- Using primitive makes serialisation overhead as low as possible +type K = BS.ByteString +type V = BS.ByteString +type B = P.ByteArray + +------------------------------------------------------------------------------- +-- Main +------------------------------------------------------------------------------- + +main :: IO () +main = do + opts <- O.execParser optsP' + print opts + withSessionDir $ \sessionRoot -> do + print sessionRoot + performMajorGC + ((), seconds) <- timed (benchRun opts sessionRoot) + printf "time %f s\n" seconds + + where + optsP' = O.info (optsP <**> O.helper) O.fullDesc + +------------------------------------------------------------------------------- +-- Bench +------------------------------------------------------------------------------- + +theValue :: V +theValue = BS.replicate 60 120 -- 'x' +{-# NOINLINE theValue #-} + +-- We generate keys by hashing a word64 and adding two "random" bytes. +-- This way we can ensure that keys are distinct. +-- +-- I think this approach of generating keys should match UTxO quite well. +-- This is purely CPU bound operation, and we should be able to push IO +-- when doing these in between. +makeKey :: Word64 -> K +makeKey w64 = SHA256.hashlazy (B.encode w64) <> "==" + +benchRun :: Opts -> FilePath -> IO () +benchRun opts sessionRoot = do + let fs = FsIO.ioHasFS (FS.MountPoint sessionRoot) + -- flush write buffer + let wb :: WB.WriteBuffer K V B + !wb = foldl' + (\wb' i -> WB.addEntryNormal (makeKey i) (N.Insert theValue Nothing) wb') + WB.empty + [ 1 .. fromIntegral (opts.size) :: Word64 ] + + _run <- Run.fromWriteBuffer fs (Run.RunFsPaths 42) wb + return () + +------------------------------------------------------------------------------- +-- Session +------------------------------------------------------------------------------- + +withSessionDir :: (FilePath -> IO a) -> IO a +withSessionDir = withTempDirectory "" "session" + +------------------------------------------------------------------------------- +-- Opts +------------------------------------------------------------------------------- + +data Opts = Opts + { size :: Int + -- TODO: do we need a flag to not remove temporary directory? + } + deriving Show + +optsP :: O.Parser Opts +optsP = pure Opts + <*> O.option O.auto (O.long "size" <> O.value 500000 <> O.help "Size of initial run") + +------------------------------------------------------------------------------- +-- clock +------------------------------------------------------------------------------- + +timed :: IO a -> IO (a, Double) +timed action = do + t1 <- Clock.getTime Clock.Monotonic + x <- action + t2 <- Clock.getTime Clock.Monotonic + return (x, fromIntegral (Clock.toNanoSecs (Clock.diffTimeSpec t2 t1)) * 1e-9) diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 0c32634a..8c379639 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -342,6 +342,27 @@ benchmark lsm-tree-macro-bench ghc-options: -rtsopts -with-rtsopts=-T -threaded +benchmark initial-lsm-bench + import: warnings, wno-x-partial + default-language: Haskell2010 + type: exitcode-stdio-1.0 + hs-source-dirs: bench/macro test + main-is: initial-lsm-bench.hs + build-depends: + , base + , binary + , bytestring + , clock + , cryptohash-sha256 + , fs-api + , lsm-tree + , lsm-tree:lsm-tree-utils + , optparse-applicative + , primitive + , temporary + + ghc-options: -rtsopts -with-rtsopts=-T -threaded + library kmerge import: warnings, wno-x-partial default-language: Haskell2010