Skip to content

Commit

Permalink
Merge pull request #294 from IntersectMBO/jdral/optimise-index
Browse files Browse the repository at this point in the history
Optimise `unsafeWriteRange`
  • Loading branch information
jorisdral authored Jul 15, 2024
2 parents afc3639 + e7ffa86 commit 7427aff
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 17 deletions.
23 changes: 16 additions & 7 deletions bench/micro/Bench/Database/LSMTree/Internal/IndexCompact.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ module Bench.Database.LSMTree.Internal.IndexCompact (
) where

import Control.DeepSeq (deepseq)
import Control.Monad.ST (runST)
import Control.Monad.ST.Strict
import Criterion.Main
import Data.Foldable (Foldable (..))
import Data.Map.Range
import qualified Data.Vector.Unboxed.Mutable as VUM
import Data.Word
import Database.LSMTree.Extras.Generators
import Database.LSMTree.Extras.Random
import Database.LSMTree.Extras.UTxO
Expand All @@ -27,16 +30,22 @@ import Test.QuickCheck (generate)
benchmarks :: Benchmark
benchmarks = bgroup "Bench.Database.LSMTree.Internal.IndexCompact" [
bgroup "searches" [
env (searchEnv 0 2_500_000 1_000_000) $ \ ~(ic, ks) ->
env (searchEnv 0 10000 1000) $ \ ~(ic, ks) ->
bench "searches with 0-bit rfprec" $ whnf (searches ic) ks
, env (searchEnv 16 2_500_000 1_000_000) $ \ ~(ic, ks) ->
, env (searchEnv 16 10000 1000) $ \ ~(ic, ks) ->
bench "searches with 16-bit rfprec" $ whnf (searches ic) ks
]
, bgroup "construction" [
env (constructionEnv 0 2_500_000) $ \ pages ->
env (constructionEnv 0 1000) $ \ pages ->
bench "construction with 0-bit rfprec and chunk size 100" $ whnf (constructIndexCompact 100) pages
, env (constructionEnv 16 2_500_000) $ \ pages ->
, env (constructionEnv 16 1000) $ \ pages ->
bench "construction with 16-bit rfprec and chunk size 100" $ whnf (constructIndexCompact 100) pages
, env (VUM.replicate 3000 (7 :: Word32)) $ \ mv ->
bench "unsafeWriteRange-1k" $
whnfAppIO (\x -> stToIO (unsafeWriteRange mv (BoundInclusive 1000) (BoundInclusive 2000) x)) 17
, env (VUM.replicate 30000 (7 :: Word32)) $ \ mv ->
bench "unsafeWriteRange-10k" $
whnfAppIO (\x -> stToIO (unsafeWriteRange mv (BoundInclusive 10000) (BoundInclusive 20000) x)) 17
]
]

Expand All @@ -48,7 +57,7 @@ searchEnv ::
-> IO (IndexCompact, [SerialisedKey])
searchEnv rfprec npages nsearches = do
ic <- constructIndexCompact 100 <$> constructionEnv rfprec npages
stdgen <- newStdGen
let stdgen = mkStdGen 17
let ks = serialiseKey <$> uniformWithReplacement @UTxOKey stdgen nsearches
pure (ic, ks)

Expand All @@ -65,7 +74,7 @@ constructionEnv ::
-> Int -- ^ Number of pages
-> IO (RFPrecision, [Append])
constructionEnv rfprec n = do
stdgen <- newStdGen
let stdgen = mkStdGen 17
let ks = uniformWithoutReplacement @UTxOKey stdgen (2 * n)
ps <- generate (mkPages 0 (error "unused in constructionEnv") rfprec ks)
pure (rfprec, toAppends ps)
Expand Down
1 change: 1 addition & 0 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ test-suite lsm-tree-test
Test.Database.LSMTree.Internal.RunReaders
Test.Database.LSMTree.Internal.Serialise
Test.Database.LSMTree.Internal.Serialise.Class
Test.Database.LSMTree.Internal.Vector
Test.Database.LSMTree.Model.Monoidal
Test.Database.LSMTree.Model.Normal
Test.Database.LSMTree.Normal.StateMachine
Expand Down
27 changes: 17 additions & 10 deletions src/Database/LSMTree/Internal/IndexCompactAcc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@ module Database.LSMTree.Internal.IndexCompactAcc (
, appendSingle
, appendMulti
, unsafeEnd
-- * Internal: exported for testing and benchmarking
, unsafeWriteRange
, vectorLowerBound
, mvectorUpperBound
) where

import Control.DeepSeq (NFData (..))
import Control.Exception (assert)
import Control.Monad (forM_, when)
import Control.Monad (when)
import Control.Monad.ST.Strict
import Data.Bit hiding (flipBit)
import Data.Foldable (toList)
Expand Down Expand Up @@ -197,7 +201,7 @@ appendSingle (minKey, maxKey) ica@IndexCompactAcc{..} = do
let lb = smaybe NoBound (\i -> Bound (fromIntegral i) Exclusive) lastMinRfbits
ub = Bound (fromIntegral minRfbits) Inclusive
x = fromIntegral pageNo
writeRange icaRangeFinder lb ub x
unsafeWriteRange icaRangeFinder lb ub x
writeSTRef icaLastMinRfbits $! SJust minRfbits

-- | Set value in primary vector
Expand Down Expand Up @@ -246,11 +250,11 @@ appendMulti (k, n0) ica@IndexCompactAcc{..} =
let ix = pageNo `mod` icaMaxChunkSize -- will be 0 in recursive calls
remInChunk = min n (icaMaxChunkSize - ix)
readSTRef icaPrimary >>= \cs ->
writeRange (NE.head cs) (BoundInclusive ix) (BoundExclusive $ ix + remInChunk) minPrimbits
unsafeWriteRange (NE.head cs) (BoundInclusive ix) (BoundExclusive $ ix + remInChunk) minPrimbits
readSTRef icaClashes >>= \cs ->
writeRange (NE.head cs) (BoundInclusive ix) (BoundExclusive $ ix + remInChunk) (Bit True)
unsafeWriteRange (NE.head cs) (BoundInclusive ix) (BoundExclusive $ ix + remInChunk) (Bit True)
readSTRef icaLargerThanPage >>= \cs ->
writeRange (NE.head cs) (BoundInclusive ix) (BoundExclusive $ ix + remInChunk) (Bit True)
unsafeWriteRange (NE.head cs) (BoundInclusive ix) (BoundExclusive $ ix + remInChunk) (Bit True)
writeSTRef icaCurrentPageNumber $! pageNo + remInChunk
res <- yield ica
maybe id (:) res <$> overflows (n - remInChunk)
Expand Down Expand Up @@ -324,7 +328,7 @@ fillRangeFinderToEnd IndexCompactAcc{..} = do
let lb = smaybe NoBound (BoundExclusive . fromIntegral) lastMinRfbits
ub = NoBound
x = fromIntegral pageNo
writeRange icaRangeFinder lb ub x
unsafeWriteRange icaRangeFinder lb ub x
writeSTRef icaLastMinRfbits $! SJust $ 2 ^ icaRangeFinderPrecision


Expand All @@ -344,18 +348,21 @@ smaybe snothing sjust = \case
Vector extras
-------------------------------------------------------------------------------}

writeRange :: VU.Unbox a => VU.MVector s a -> Bound Int -> Bound Int -> a -> ST s ()
writeRange v lb ub x = forM_ [lb' .. ub'] $ \j -> VUM.write v j x
unsafeWriteRange :: VU.Unbox a => VU.MVector s a -> Bound Int -> Bound Int -> a -> ST s ()
unsafeWriteRange !v !lb !ub !x = VUM.set (VUM.unsafeSlice lb' len v) x
where
lb' = vectorLowerBound lb
ub' = mvectorUpperBound v ub
!lb' = vectorLowerBound lb
!ub' = mvectorUpperBound v ub
!len = ub' - lb' + 1

-- | Map a 'Bound' to the equivalent inclusive lower bound.
vectorLowerBound :: Bound Int -> Int
vectorLowerBound = \case
NoBound -> 0
BoundExclusive i -> i + 1
BoundInclusive i -> i

-- | Map a 'Bound' to the equivalent inclusive upper bound.
mvectorUpperBound :: VGM.MVector v a => v s a -> Bound Int -> Int
mvectorUpperBound v = \case
NoBound -> VGM.length v - 1
Expand Down
2 changes: 2 additions & 0 deletions test/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import qualified Test.Database.LSMTree.Internal.RunBuilder
import qualified Test.Database.LSMTree.Internal.RunReaders
import qualified Test.Database.LSMTree.Internal.Serialise
import qualified Test.Database.LSMTree.Internal.Serialise.Class
import qualified Test.Database.LSMTree.Internal.Vector
import qualified Test.Database.LSMTree.Model.Monoidal
import qualified Test.Database.LSMTree.Model.Normal
import qualified Test.Database.LSMTree.Normal.StateMachine
Expand Down Expand Up @@ -53,6 +54,7 @@ main = defaultMain $ testGroup "lsm-tree"
, Test.Database.LSMTree.Internal.IndexCompact.tests
, Test.Database.LSMTree.Internal.Serialise.tests
, Test.Database.LSMTree.Internal.Serialise.Class.tests
, Test.Database.LSMTree.Internal.Vector.tests
, Test.Database.LSMTree.Model.Normal.tests
, Test.Database.LSMTree.Model.Monoidal.tests
, Test.Database.LSMTree.Normal.StateMachine.tests
Expand Down
121 changes: 121 additions & 0 deletions test/Test/Database/LSMTree/Internal/Vector.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
{-# OPTIONS_GHC -Wno-orphans #-}

module Test.Database.LSMTree.Internal.Vector (tests) where

import Control.Monad (forM_)
import Control.Monad.ST
import Data.Map.Range
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM
import Data.Word
import Database.LSMTree.Extras
import Database.LSMTree.Internal.IndexCompactAcc as Cons
import Prelude hiding (max, min, pi)
import Test.QuickCheck
import Test.QuickCheck.Instances ()
import Test.QuickCheck.Monadic (PropertyM, monadicST, run)
import Test.Tasty (TestTree, localOption, testGroup)
import Test.Tasty.QuickCheck (QuickCheckTests (QuickCheckTests),
testProperty)
import Test.Util.Orphans ()
import Text.Printf (printf)

tests :: TestTree
tests = testGroup "Test.Database.LSMTree.Internal.Vector" [
localOption (QuickCheckTests 400) $
testProperty "propWriteRange" $ \v lb ub (x :: Word8) -> monadicST $ do
mv <- run $ VU.thaw v
propWriteRange mv lb ub x
, localOption (QuickCheckTests 400) $
testProperty "propUnsafeWriteRange" $ \v lb ub (x :: Word8) -> monadicST $ do
mv <- run $ VU.thaw v
propUnsafeWriteRange mv lb ub x
]

instance Arbitrary (Bound Int) where
arbitrary = oneof [
pure NoBound
, BoundInclusive <$> arbitrary
, BoundExclusive <$> arbitrary
]
shrink = \case
NoBound -> []
BoundInclusive x -> NoBound : (BoundInclusive <$> shrink x)
BoundExclusive x -> NoBound : (BoundInclusive <$> shrink x)
++ (BoundExclusive <$> shrink x)

intToInclusiveLowerBound :: Bound Int -> Int
intToInclusiveLowerBound = \case
NoBound -> 0
BoundInclusive i -> i
BoundExclusive i -> i + 1

intToInclusiveUpperBound :: VUM.Unbox a => VU.Vector a -> Bound Int -> Int
intToInclusiveUpperBound xs = \case
NoBound -> VU.length xs - 1
BoundInclusive i -> i
BoundExclusive i -> i - 1

-- | Safe version of 'unsafeWriteRange', used to test the unsafe version
-- against.
writeRange :: VU.Unbox a => VU.MVector s a -> Bound Int -> Bound Int -> a -> ST s Bool
writeRange !v !lb !ub !x
| 0 <= lb' && lb' < VUM.length v
, 0 <= ub' && ub' < VUM.length v
, lb' <= ub'
= forM_ [lb' .. ub'] (\j -> VUM.write v j x) >> pure True
| otherwise = pure False
where
!lb' = vectorLowerBound lb
!ub' = mvectorUpperBound v ub

propWriteRange :: forall s a. (VUM.Unbox a, Eq a, Show a)
=> VU.MVector s a
-> Bound Int
-> Bound Int
-> a
-> PropertyM (ST s) Property
propWriteRange mv1 lb ub x = run $ do
v1 <- VU.unsafeFreeze mv1
v2 <- VU.freeze mv1
b <- writeRange mv1 lb ub x

let xs1 = zip [0 :: Int ..] $ VU.toList v1
xs2 = zip [0..] $ VU.toList v2
lb' = intToInclusiveLowerBound lb
ub' = intToInclusiveUpperBound v1 ub

pure $ tabulate "range size" [showPowersOf10 (ub' - lb' + 1)] $
tabulate "vector size" [showPowersOf10 (VU.length v1)] $
if not b then
label "no suitable range" $ xs1 === xs2
else
counterexample (printf "lb=%d" lb') $
counterexample (printf "ub=%d" ub') $
conjoin [
counterexample "mismatch in prefix" $
take (lb' - 1) xs1 === take (lb' - 1) xs2
, counterexample "mismatch in suffix" $
drop (ub' + 1) xs1 === drop (ub' + 1) xs2
, counterexample "mimsatch in infix" $
fmap snd (drop lb' (take (ub' + 1) xs1)) ===
replicate (ub' - lb' + 1) x
]

propUnsafeWriteRange ::
forall s a. (VUM.Unbox a, Eq a, Show a)
=> VU.MVector s a
-> Bound Int
-> Bound Int
-> a
-> PropertyM (ST s) Property
propUnsafeWriteRange mv1 lb ub x = run $ do
v1 <- VU.unsafeFreeze mv1
v2 <- VU.freeze mv1
mv2 <- VU.unsafeThaw v2
b <- writeRange mv1 lb ub x
if not b then
pure $ label "no suitable range" True
else do
unsafeWriteRange mv2 lb ub x
pure $ v1 === v2

0 comments on commit 7427aff

Please sign in to comment.