Skip to content

Commit

Permalink
birthday
Browse files Browse the repository at this point in the history
  • Loading branch information
phadej committed Apr 25, 2024
1 parent 4020a64 commit 623742b
Showing 1 changed file with 39 additions and 21 deletions.
60 changes: 39 additions & 21 deletions bench/macro/lsm-tree-bench-wp8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ module Main (main) where

import Control.Applicative ((<**>))
import Control.Exception (bracket)
import Control.Monad (forM_, void, when)
import Control.Monad (forM_, unless, void, when)
import qualified Crypto.Hash.SHA256 as SHA256
import qualified Data.Binary as B
import qualified Data.ByteString as BS
Expand Down Expand Up @@ -87,8 +87,8 @@ theValue = BS.replicate 60 120 -- 'x'
-------------------------------------------------------------------------------

data GlobalOpts = GlobalOpts
{ rootDir :: FilePath -- ^ session directory.
, initialSize :: Int
{ rootDir :: !FilePath -- ^ session directory.
, initialSize :: !Int
}
deriving Show

Expand All @@ -98,7 +98,10 @@ data SetupOpts = SetupOpts
deriving Show

data RunOpts = RunOpts
{ batches :: Int
{ batchCount :: !Int
, batchSize :: !Int
, check :: !Bool
, seed :: !Word64
}
deriving Show

Expand Down Expand Up @@ -139,6 +142,9 @@ setupOptsP = pure SetupOpts
runOptsP :: O.Parser RunOpts
runOptsP = pure RunOpts
<*> pure 100
<*> pure 16 -- TODO: 256
<*> pure True
<*> pure 50

-------------------------------------------------------------------------------
-- clock
Expand Down Expand Up @@ -197,36 +203,48 @@ doSetup gopts _opts = do
-- dry-run
-------------------------------------------------------------------------------

batchSize :: Int
batchSize = 16

check :: Bool
check = True

doDryRun :: GlobalOpts -> RunOpts -> IO ()
doDryRun gopts opts = do
when opts.check $ do
-- we generate n random numbers in range of [ 1 .. d ]
-- what is the chance they are all distinct
let n = fromIntegral (opts.batchCount * opts.batchSize) :: Double
let d = fromIntegral gopts.initialSize :: Double
-- this is birthday problem.
let p = 1 - exp (negate $ (n * (n - 1)) / (2 * d))

-- number of people with a shared birthday
-- https://en.wikipedia.org/wiki/Birthday_problem#Number_of_people_with_a_shared_birthday
let q = n * (1 - ((d - 1) / d) ** (n - 1))

printf "Probability of a duplicate: %5f\n" p
printf "Number of duplicate removals: %5f\n" q

-- TODO: open session to measure that as well.
let initGen = SM.mkSMGen 42 -- TODO: configurable seed?
let initGen = SM.mkSMGen opts.seed

keysRef <- newIORef $ if check then IS.fromList [ 0 .. gopts.initialSize - 1 ] else IS.empty
keysRef <- newIORef $
if opts.check
then IS.fromList [ 0 .. gopts.initialSize - 1 ]
else IS.empty

void $ forFoldM_ initGen [ 0 .. opts.batches - 1 ] $ \b g -> do
void $ forFoldM_ initGen [ 0 .. opts.batchCount - 1 ] $ \b g -> do
-- how many keys there are
let maxK :: Word64
maxK = fromIntegral $ gopts.initialSize + batchSize * b
maxK = fromIntegral $ gopts.initialSize + opts.batchSize * b

let lookups :: [Word64]
(!nextG, lookups) = mapAccumL (\g' _ -> swap (SM.bitmaskWithRejection64 maxK g')) g [1 .. batchSize]
(!nextG, lookups) = mapAccumL (\g' _ -> swap (SM.bitmaskWithRejection64 maxK g')) g [1 .. opts.batchSize]

inserts :: [Word64]
inserts = [ maxK .. maxK + fromIntegral batchSize - 1 ]
inserts = [ maxK .. maxK + fromIntegral opts.batchSize - 1 ]

print lookups
print inserts
when check $ do
when opts.check $ do
keys <- readIORef keysRef
let new = IS.fromList $ map fromIntegral lookups
printf "missing in batch %d %s\n" b (show (IS.difference new keys))
let new = IS.fromList $ map fromIntegral lookups
let diff = IS.difference new keys
unless (IS.null diff) $ printf "missing in batch %d %s\n" b (show diff)

writeIORef keysRef $! IS.union
(IS.difference keys new)
(IS.fromList $ map fromIntegral inserts)
Expand Down

0 comments on commit 623742b

Please sign in to comment.