Skip to content

Commit

Permalink
Merge pull request #196 from IntersectMBO/lcg
Browse files Browse the repository at this point in the history
MCG for lsm-tree-bench-wp8.hs
  • Loading branch information
mheinzel committed May 7, 2024
2 parents 8abfb67 + 7af5b32 commit 5f81ac7
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 7 deletions.
16 changes: 10 additions & 6 deletions bench/macro/lsm-tree-bench-wp8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ import Data.Traversable (mapAccumL)
import Data.Tuple (swap)
import Data.Void (Void)
import Data.Word (Word64)
import qualified MCG
import qualified Options.Applicative as O
import qualified System.Clock as Clock
import qualified System.FS.API as FS
import qualified System.FS.IO as FsIO
import qualified System.Random.SplitMix as SM
import Text.Printf (printf)

-- We should be able to write this benchmark
Expand Down Expand Up @@ -233,7 +233,9 @@ doDryRun' gopts opts = do
printf "Expected number of duplicates (extreme upper bound): %5f out of %f\n" q n

-- TODO: open session to measure that as well.
let initGen = SM.mkSMGen opts.seed
let initGen = MCG.make
(fromIntegral $ gopts.initialSize + opts.batchSize * opts.batchCount)
opts.seed

keysRef <- newIORef $
if opts.check
Expand Down Expand Up @@ -283,16 +285,16 @@ We could also make it exact, but then we'll need to carry some state around
generateBatch
:: Int -- ^ initial size of the collection
-> Int -- ^ batch size
-> SM.SMGen -- ^ generator
-> MCG.MCG -- ^ generator
-> Int -- ^ batch number
-> (SM.SMGen, [Word64], [Word64])
-> (MCG.MCG, [Word64], [Word64])
generateBatch initialSize batchSize g b = (nextG, lookups, inserts)
where
maxK :: Word64
maxK = fromIntegral $ initialSize + batchSize * b

lookups :: [Word64]
(!nextG, lookups) = mapAccumL (\g' _ -> swap (SM.bitmaskWithRejection64 maxK g')) g [1 .. batchSize]
(!nextG, lookups) = mapAccumL (\g' _ -> swap (MCG.reject maxK g')) g [1 .. batchSize]

inserts :: [Word64]
inserts = [ maxK .. maxK + fromIntegral batchSize - 1 ]
Expand Down Expand Up @@ -335,7 +337,9 @@ doRun' gopts opts = do
name <- maybe (fail "invalid snapshot name") return $
LSM.mkSnapshotName "bench"

let initGen = SM.mkSMGen opts.seed
let initGen = MCG.make
(fromIntegral $ gopts.initialSize + opts.batchSize * opts.batchCount)
opts.seed

withSession someFs (FS.mkFsPath []) $ \session -> do
-- open snapshot
Expand Down
10 changes: 9 additions & 1 deletion lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,14 @@ benchmark lsm-tree-macro-bench

ghc-options: -rtsopts -with-rtsopts=-T -threaded

library mcg
import: language, warnings, wno-x-partial
hs-source-dirs: src-mcg
exposed-modules: MCG
build-depends:
, base
, primes

benchmark lsm-tree-bench-wp8
import: language, warnings, wno-x-partial
type: exitcode-stdio-1.0
Expand All @@ -360,8 +368,8 @@ benchmark lsm-tree-bench-wp8
, deepseq
, fs-api
, lsm-tree
, lsm-tree:mcg
, optparse-applicative
, splitmix

ghc-options: -rtsopts -with-rtsopts=-T -threaded

Expand Down
107 changes: 107 additions & 0 deletions src-mcg/MCG.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
{-# LANGUAGE NoFieldSelectors #-}
{-# LANGUAGE OverloadedRecordDot #-}
module MCG (
MCG,
make,
period,
next,
reject,
) where

import Data.Bits (countLeadingZeros, unsafeShiftR)
import Data.List (nub)
import Data.Numbers.Primes (isPrime, primeFactors)
import Data.Word (Word64)

-- $setup
-- >>> import Data.List (unfoldr, nub)

-- | https://en.wikipedia.org/wiki/Lehmer_random_number_generator
data MCG = MCG { m :: !Word64, a :: !Word64, x :: !Word64 }
deriving Show

-- invariants: m is a prime
-- a is a primitive element of Z_m
-- x is in [1..m-1]

-- | Create a MCG
--
-- >>> make 20 04
-- MCG {m = 23, a = 11, x = 5}
--
-- >>> make 101_000_000 20240429
-- MCG {m = 101000023, a = 197265, x = 20240430}
--
make
:: Word64 -- ^ a lower bound for the period
-> Word64 -- ^ initial seed.
-> MCG
make (max 4 -> period_) seed = MCG m a (mod (seed + 1) m)
where
-- start prime search from an odd number larger than asked period.
m = findM (if odd period_ then period_ + 2 else period_ + 1)
m' = m - 1
qs = nub $ primeFactors m'

a = findA (guessA m)

findM p = if isPrime p then p else findM (p + 2)

-- we find `a` using "brute-force" approach.
-- luckily, many elements a prime factors, so we don't need to try too hard.
-- and we only need to check prime factors of m - 1.
findA x
| all (\q -> mod (x ^ div m' q) m /= 1) qs
= x

| otherwise
= findA (x + 1)

-- | Period of the MCG.
--
-- Period is usually a bit larger than asked for, we look for the next prime:
--
-- >>> let g = make 9 04
-- >>> period g
-- 10
--
-- >>> take 22 (unfoldr (Just . next) g)
-- [4,7,3,1,0,5,2,6,8,9,4,7,3,1,0,5,2,6,8,9,4,7]
--
period :: MCG -> Word64
period (MCG m _ _) = m - 1

-- | Generate next number.
next :: MCG -> (Word64, MCG)
next (MCG m a x) = (x - 1, MCG m a (mod (x * a) m))

-- | Generate next numbers until one less than given bound is generated.
--
-- Replacing 'next' with @'reject' n@ effectively cuts the period to @n@:
--
-- >>> let g = make 9 04
-- >>> period g
-- 10
--
-- >>> take 22 (unfoldr (Just . reject 9) g)
-- [4,7,3,1,0,5,2,6,8,4,7,3,1,0,5,2,6,8,4,7,3,1]
--
-- if @n@ is close enough to actual period of 'MCG', the rejection ratio
-- is very small.
--
reject :: Word64 -> MCG -> (Word64, MCG)
reject ub g = case next g of
(x, g') -> if x < ub then (x, g') else reject ub g'

-------------------------------------------------------------------------------
-- guessing some initial a
-------------------------------------------------------------------------------

-- | calculate x -> log2 (x + 1) i.e. approximate how large the number is in bits.
word64Log2m1 :: Word64 -> Int
word64Log2m1 x = 64 - countLeadingZeros x

-- | we guess a such that a*a is larger than m:
-- we shift a number a little.
guessA :: Word64 -> Word64
guessA x = unsafeShiftR x (div (word64Log2m1 x) 3)

0 comments on commit 5f81ac7

Please sign in to comment.