Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisdral committed Apr 25, 2024
1 parent 431901b commit a429b2d
Show file tree
Hide file tree
Showing 4 changed files with 327 additions and 2 deletions.
6 changes: 4 additions & 2 deletions bench/macro/Bench/Database/LSMTree/Internal/BloomFilter.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module Bench.Database.LSMTree.Internal.BloomFilter (
benchmarks
) where

import Control.DeepSeq
import Control.Exception (evaluate)
import Control.Monad
import Control.Monad.ST
Expand Down Expand Up @@ -76,7 +77,7 @@ benchmarks = do

putStrLn "Generating bloom filters..."
let rng0 = mkStdGen 42
vbs <- elemManyEnv filterSizes rng0
!vbs <- elemManyEnv filterSizes rng0
putStrLn " finished."
putStrLn ""

Expand Down Expand Up @@ -233,7 +234,8 @@ elemManyEnv filterSizes rng0 =
(cycle [ mb'
| (mb, (_, sizeFactor, _, _)) <- zip mbs filterSizes
, mb' <- replicate sizeFactor mb ]))
Vector.fromList <$> mapM Bloom.unsafeFreeze mbs
vbs <- Vector.fromList <$> mapM Bloom.unsafeFreeze mbs
pure $!! vbs

-- | This gives us a baseline cost of just calculating the series of keys.
benchBaseline :: Vector (Bloom SerialisedKey) -> StdGen -> Int -> ()
Expand Down
317 changes: 317 additions & 0 deletions bench/macro/Bench/Database/LSMTree/Internal/Lookup.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,317 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE TypeApplications #-}

module Bench.Database.LSMTree.Internal.Lookup (
benchmarks
) where

import Control.DeepSeq
import Control.Exception (evaluate)
import Control.Monad
import Data.Bits ((.&.))
import Data.BloomFilter (Bloom)
import qualified Data.BloomFilter as Bloom
import qualified Data.BloomFilter.Hash as Bloom
import qualified Data.BloomFilter.Mutable as MBloom
import Data.List (foldl')
import Data.Proxy
import Data.Time
import Data.Vector (Vector)
import qualified Data.Vector as V
import qualified Data.Vector as Vector
import qualified Data.Vector.Unboxed as VU
import Data.WideWord.Word256 (Word256)
import Data.Word (Word64)
import Database.LSMTree.Extras.Orphans ()
import Database.LSMTree.Extras.UTxO
import Database.LSMTree.Internal.Entry (Entry (Insert),
NumEntries (..))
import Database.LSMTree.Internal.IndexCompact (IndexCompact)
import Database.LSMTree.Internal.Lookup
import Database.LSMTree.Internal.Monkey
import Database.LSMTree.Internal.Run (RunFsPaths (..))
import qualified Database.LSMTree.Internal.RunBuilder as MRun
import Database.LSMTree.Internal.Serialise (SerialisedKey,
serialiseKey, serialiseValue)
import GHC.Stats
import Numeric
import qualified System.FS.API as FS
import System.FS.API (MountPoint (..))
import qualified System.FS.IO as FS
import System.IO.Temp (withSystemTempDirectory)
import System.Mem (performMajorGC)
import System.Random
import Text.Printf (printf)


-- Benchmark parameters you can tweak. The defaults are rather small, so the
-- runtime is short and do not show the effects of data sizes no longer fitting
-- into the CPU cache.

-- | The number of entries in the filter in the smallest LSM runs is
-- @2^benchmarkSizeBase@
benchmarkSizeBase :: SizeBase
benchmarkSizeBase = 16

-- | The number of lookups to do. This has to be smaller than the total size of
-- all the filters (otherwise we will not get true positive probes, which is
-- part of the point of this benchmark).
benchmarkNumLookups :: Int
benchmarkNumLookups = 25_000_000

benchmarkBatchSize :: BatchSize
benchmarkBatchSize = BatchSize 64

benchmarkExtBatchSize :: Int
benchmarkExtBatchSize = 256

-- A value of 0.02 results in 5 hashes and just over 8 bits per key.
benchmarkRequestedFPR :: RequestedFPR
benchmarkRequestedFPR = 0.02

benchmarks :: IO ()
benchmarks = do
#ifdef NO_IGNORE_ASSERTS
putStrLn "BENCHMARKING A BUILD WITH -fno-ignore-asserts"
#endif

enabled <- getRTSStatsEnabled
when (not enabled) $ fail "Need RTS +T statistics enabled"
let filterSizes = lsmStyleBloomFilters benchmarkSizeBase
benchmarkRequestedFPR
putStrLn "Bloom filter stats:"
putStrLn "(numEntries, sizeFactor, numBits, numHashFuncs)"
mapM_ print filterSizes
putStrLn $ "total number of entries:\t " ++ show (totalNumEntries filterSizes)
putStrLn $ "total filter size in bytes:\t " ++ show (totalNumBytes filterSizes)
putStrLn $ "total number of key lookups:\t " ++ show benchmarkNumLookups

unless (totalNumEntriesSanityCheck benchmarkSizeBase filterSizes) $
fail "totalNumEntriesSanityCheck failed"
unless (totalNumEntries filterSizes >= benchmarkNumLookups) $
fail "number of key lookups is more than number of entries"

putStrLn "Generating bloom filters..."
let keyRng0 = mkStdGen 42
valRng0 = mkStdGen 17
(blooms, indexes, handles, numEntriess) <- prepLookupsEnv filterSizes keyRng0 valRng0

putStrLn " finished."
putStrLn ""

baseline <-
benchmark "baseline"
"(This is the cost of just computing the keys.)"
(benchBaseline blooms keyRng0) benchmarkNumLookups
(0, 0)
#ifdef NO_IGNORE_ASSERTS
80 -- https://gitlab.haskell.org/ghc/ghc/-/issues/24625
#else
48
#endif

hashcost <-
benchmark "makeCheapHashes"
"(This includes the cost of hashing the keys, less the cost of computing the keys)"
(benchMakeCheapHashes blooms keyRng0) benchmarkNumLookups
baseline
0

_ <-
benchmark "elemCheapHashes"
"(this is the simple one-by-one lookup, less the cost of computing and hashing the keys)"
(benchElemCheapHashes blooms keyRng0) benchmarkNumLookups
(fst hashcost + fst baseline, snd hashcost + snd baseline)
0

return ()

type Alloc = Int

benchmark :: String
-> String
-> (Int -> ())
-> Int
-> (NominalDiffTime, Alloc)
-> Int
-> IO (NominalDiffTime, Alloc)
benchmark name description action n (subtractTime, subtractAlloc) expectedAlloc = do
putStrLn $ "Benchmarking " ++ name ++ " ... "
putStrLn description
performMajorGC
allocBefore <- allocated_bytes <$> getRTSStats
timeBefore <- getCurrentTime
evaluate (action n)
timeAfter <- getCurrentTime
performMajorGC
allocAfter <- allocated_bytes <$> getRTSStats
putStrLn "Finished."
let allocTotal :: Alloc
timeTotal = timeAfter `diffUTCTime` timeBefore
allocTotal = fromIntegral allocAfter - fromIntegral allocBefore
timeNet = timeTotal - subtractTime
allocNet = allocTotal - subtractAlloc

timePerKey, allocPerKey :: Double
timePerKey = realToFrac timeNet / fromIntegral n
allocPerKey = fromIntegral allocNet / fromIntegral n
let printStat :: String -> Double -> String -> IO ()
printStat label v unit =
putStrLn $ label ++ showGFloat (Just 2) v (' ':unit)
printStat "Time total: " (realToFrac timeTotal) "seconds"
printStat "Alloc total: " (fromIntegral allocTotal) "bytes"
printStat "Time net: " (realToFrac timeNet) "seconds"
printStat "Alloc net: " (fromIntegral allocNet) "bytes"
printStat "Time net per key: " timePerKey "seconds"
printStat "Alloc net per key: " allocPerKey "bytes"

unless (truncate allocPerKey == expectedAlloc) $ do
printf "WARNING: expecting %d, got %d bytes allocated per key\n"
expectedAlloc
(truncate allocPerKey :: Int)

putStrLn ""
return (timeNet, allocNet)

-- | (numEntries, sizeFactor, numBits, numHashFuncs)
type BloomFilterSizeInfo = (Int, Int, Word64, Int)
type SizeBase = Int
type RequestedFPR = Double

-- | Calculate the sizes of a realistic LSM style set of Bloom filters, one
-- for each LSM run. This uses base 4, with 4 disk levels, using tiering
-- for internal levels and leveling for the final (biggest) level.
--
-- Due to the incremental merging, each level actually has (in the worst case)
-- 2x the number of runs, hence 8 per level for tiering levels.
--
lsmStyleBloomFilters :: SizeBase -> RequestedFPR -> [BloomFilterSizeInfo]
lsmStyleBloomFilters l1 requestedFPR =
[ (numEntries, sizeFactor, fromIntegral numBits, numHashFuncs)
| (numEntries, sizeFactor)
<- replicate 8 (2^(l1+0), 1) -- 8 runs at level 1 (tiering)
++ replicate 8 (2^(l1+2), 4) -- 8 runs at level 2 (tiering)
++ replicate 8 (2^(l1+4),16) -- 8 runs at level 3 (tiering)
++ [(2^(l1+8),256)] -- 1 run at level 4 (leveling)
, let numBits = monkeyBits numEntries requestedFPR
numHashFuncs = monkeyHashFuncs numBits numEntries
]

totalNumEntries, totalNumBytes :: [BloomFilterSizeInfo] -> Int
totalNumEntries filterSizes =
sum [ numEntries | (numEntries, _, _, _) <- filterSizes ]

totalNumBytes filterSizes =
fromIntegral $ sum [ numBits | (_,_,numBits,_) <- filterSizes ] `div` 8

totalNumEntriesSanityCheck :: SizeBase -> [BloomFilterSizeInfo] -> Bool
totalNumEntriesSanityCheck l1 filterSizes =
totalNumEntries filterSizes
==
sum [ 2^l1 * sizeFactor | (_, sizeFactor, _, _) <- filterSizes ]


-- | Input environment for benchmarking 'Bloom.elemMany'.
--
-- The idea here is to have a collection of bloom filters corresponding to
-- the sizes used in a largeish LSM. In particular, the sizes are in increasing
-- powers of 4, and the largest ones should be bigger than the CPU L3 cache.
-- Furthermore, the keys in the filters are non-overlapping, and lookups will
-- be true positives in only one filter. Thus most lookups will be true
-- negatives.
--
-- The goal is to benchmark the benefits of optimisations for the LSM situation:
--
-- * where the same key is being looked up in many filters,
-- * with a hit in only one filter expected, and
-- * where the total size of the filters is too large to fully fit in cache
-- (though the smaller ones may fit in the caches).
--
prepLookupsEnv :: [BloomFilterSizeInfo]
-> StdGen
-> StdGen
-> IO ( V.Vector (Bloom SerialisedKey)
, V.Vector IndexCompact
, V.Vector (FS.Handle ())
, V.Vector NumEntries
)
prepLookupsEnv filterSizes keyRng0 valRng0 =
withSystemTempDirectory "macro-bench-prepLookups" $ \tmpDir -> do
let hasFS = FS.ioHasFS (MountPoint tmpDir)
-- create the filters
mrs <- sequence
[ MRun.new hasFS (RunFsPaths rfp) (NumEntries numEntries) (numEntries * 42)
| (rfp, (numEntries, _, numBits, numHashFuncs)) <- zip [0..] filterSizes ]
-- add elements
foldM_
(\(keyRng, valRng) (i, mr) -> do
-- progress
when (i .&. 0xFFFF == 0) (putStr ".")
-- insert n elements into filter b
let k :: UTxOKey
(!k, !keyRng') = uniform keyRng
v :: UTxOValue
(!v, !valRng') = uniform valRng
MRun.addKeyOp hasFS mr (serialiseKey k) (Insert (serialiseValue v))
return (keyRng', valRng')
)
(keyRng0, valRng0)
(zip [0 .. totalNumEntries filterSizes - 1]
(cycle [ mb'
| (mb, (_, sizeFactor, _, _)) <- zip mrs filterSizes
, mb' <- replicate sizeFactor mb ]))
ress <- V.fromList <$> mapM (MRun.unsafeFinalise hasFS) mrs
let (_, blooms, indexes, numEntriess) = V.unzip4 ress
handles = V.replicate (V.length blooms) (FS.Handle () (FS.mkFsPath []))

pure $!! (blooms, indexes, handles, numEntriess)

-- | This gives us a baseline cost of just calculating the series of keys.
benchBaseline :: Vector (Bloom SerialisedKey) -> StdGen -> Int -> ()
benchBaseline !_ !_ 0 = ()
benchBaseline !bs !rng !n =
let k :: Word256
(!k, !rng') = uniform rng
!k' = serialiseKey k
in k' `seq` benchBaseline bs rng' (n-1)

-- | This gives us a combined cost of calculating the series of keys and their
-- hashes.
benchMakeCheapHashes :: Vector (Bloom SerialisedKey) -> StdGen -> Int -> ()
benchMakeCheapHashes !_ !_ 0 = ()
benchMakeCheapHashes !bs !rng !n =
let k :: Word256
(!k, !rng') = uniform rng
!kh = Bloom.makeHashes (serialiseKey k) :: Bloom.CheapHashes SerialisedKey
in kh `seq` benchMakeCheapHashes bs rng' (n-1)

-- | This gives us a combined cost of calculating the series of keys, their
-- hashes, and then using 'Bloom.elemCheapHashes' with each filter.
benchElemCheapHashes :: Vector (Bloom SerialisedKey) -> StdGen -> Int -> ()
benchElemCheapHashes !_ !_ 0 = ()
benchElemCheapHashes !bs !rng !n =
let k :: Word256
(!k, !rng') = uniform rng
!kh = Bloom.makeHashes (serialiseKey k)
in foldl' (\_ b -> Bloom.elemHashes kh b `seq` ()) () bs
`seq` benchElemCheapHashes bs rng' (n-1)

-- -- | This gives us a combined cost of calculating the series of keys, their
-- -- hashes, and then using 'Bloom.elemCheapHashes' with each filter.
-- benchPrepLookups :: Vector (Bloom SerialisedKey) -> StdGen -> Int -> ()
-- benchPrepLookups !_ !_ 0 = ()
-- benchPrepLookups !bs !rng !n =
-- let (!rng1, !rng2) = split rng
-- vks :: V.Vector UTxOKey
-- !vks = vUniformWithReplacement' (Proxy @UTxOKey) rng1 benchmarkExtBatchSize
-- !kh = Bloom.makeHashes (serialiseKey k)
-- in foldl' (\_ b -> Bloom.elemHashes kh b `seq` ()) () bs
-- `seq` benchElemCheapHashes bs rng' (n-1)



-- vUniformWithReplacement' :: Uniform a => Proxy a -> StdGen -> Int -> V.Vector a
-- vUniformWithReplacement' _ rng0 n0 = V.unfoldrExactN n0 uniform rng0
2 changes: 2 additions & 0 deletions bench/macro/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
module Main (main) where

import qualified Bench.Database.LSMTree.Internal.BloomFilter
import qualified Bench.Database.LSMTree.Internal.Lookup

import System.IO

main :: IO ()
main = do
hSetBuffering stdout NoBuffering
Bench.Database.LSMTree.Internal.BloomFilter.benchmarks
Bench.Database.LSMTree.Internal.Lookup.benchmarks
4 changes: 4 additions & 0 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -323,17 +323,21 @@ benchmark lsm-tree-macro-bench
main-is: Main.hs
other-modules:
Bench.Database.LSMTree.Internal.BloomFilter
Bench.Database.LSMTree.Internal.Lookup
Database.LSMTree.Extras.Orphans
Database.LSMTree.Extras.UTxO

build-depends:
, base
, bytestring
, deepseq
, fs-api
, lsm-tree
, lsm-tree:bloomfilter
, lsm-tree:fs-api-blockio
, primitive
, random
, temporary
, time
, vector
, wide-word
Expand Down

0 comments on commit a429b2d

Please sign in to comment.