Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ test-suite lsm-tree-test
Test.Database.LSMTree.Generators
Test.Database.LSMTree.Internal
Test.Database.LSMTree.Internal.BloomFilter
Test.Database.LSMTree.Internal.Chunk
Test.Database.LSMTree.Internal.CRC32C
Test.Database.LSMTree.Internal.Entry
Test.Database.LSMTree.Internal.IndexCompact
Expand Down
2 changes: 2 additions & 0 deletions test/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import qualified Test.Database.LSMTree.Class.Normal
import qualified Test.Database.LSMTree.Generators
import qualified Test.Database.LSMTree.Internal
import qualified Test.Database.LSMTree.Internal.BloomFilter
import qualified Test.Database.LSMTree.Internal.Chunk
import qualified Test.Database.LSMTree.Internal.CRC32C
import qualified Test.Database.LSMTree.Internal.Entry
import qualified Test.Database.LSMTree.Internal.IndexCompact
Expand Down Expand Up @@ -41,6 +42,7 @@ main = defaultMain $ testGroup "lsm-tree"
, Test.Database.LSMTree.Generators.tests
, Test.Database.LSMTree.Internal.tests
, Test.Database.LSMTree.Internal.BloomFilter.tests
, Test.Database.LSMTree.Internal.Chunk.tests
, Test.Database.LSMTree.Internal.CRC32C.tests
, Test.Database.LSMTree.Internal.Entry.tests
, Test.Database.LSMTree.Internal.Lookup.tests
Expand Down
173 changes: 173 additions & 0 deletions test/Test/Database/LSMTree/Internal/Chunk.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
{-# OPTIONS_GHC -Wno-orphans #-}

module Test.Database.LSMTree.Internal.Chunk (tests) where

import Prelude hiding (concat, drop, length)

import Control.Arrow ((>>>))
import Control.Monad.ST.Strict (runST)
import qualified Data.List as List (concat, drop, length)
import Data.Maybe (catMaybes, fromJust, isJust, isNothing)
import Data.Vector.Primitive (Vector, concat, fromList, length,
toList)
import Data.Word (Word8)
import Database.LSMTree.Internal.Chunk (Chunk, createBaler, feedBaler,
fromChunk, unsafeEndBaler)
import Test.QuickCheck (Arbitrary (arbitrary, shrink),
NonEmptyList (NonEmpty), Positive (Positive, getPositive),
Property, Small (Small, getSmall), Testable, scale,
shrinkMap, tabulate, (===), (==>))
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.QuickCheck (testProperty)

-- * Tests

tests :: TestTree
tests = testGroup "Test.Database.LSMTree.Internal.Chunk" $
[
testProperty "Content is preserved"
prop_contentIsPreserved,
testProperty "No remnant after output"
prop_noRemnantAfterOutput,
testProperty "Common chunks are large"
prop_commonChunksAreLarge,
testProperty "Remnant chunk is non-empty"
prop_remnantChunkIsNonEmpty,
testProperty "Remnant chunk is small"
prop_remnantChunkIsSmall
]

-- * Properties to test

{-
Feeds a freshly created baler a sequence of data portions and ends it
afterwards, yielding all output.
-}
balingOutput :: Int -- Minimum chunk size
-> [[Vector Word8]] -- Data portions to be fed
-> ([Maybe Chunk], Maybe Chunk) -- Feeding output and remnant
balingOutput minChunkSize food = runST $ do
baler <- createBaler minChunkSize
commonChunks <- mapM (flip feedBaler baler) food
remnant <- unsafeEndBaler baler
return (commonChunks, remnant)

{-
Supplies the output of a complete baler run for constructing a property.

The resulting property additionally provides statistics about the lengths of
buildup phases, where a buildup phase is a sequence of feedings that does
not result in chunks and is followed by an ultimate chunk production, which
happens either due to another feeding or due to the baler run ending and
producing a remnant chunk.
-}
withBalingOutput
:: Testable prop
=> Int -- Minimum chunk size
-> [[Vector Word8]] -- Data portions to be fed
-> ([Maybe Chunk] -> Maybe Chunk -> prop) -- Property from baler output
-> Property -- Resulting property
withBalingOutput minChunkSize food consumer
= tabulate "Lengths of buildup phases"
(map show (buildupPhasesLengths commonChunks))
(consumer commonChunks remnant)
where

commonChunks :: [Maybe Chunk]
remnant :: Maybe Chunk
(commonChunks, remnant) = balingOutput minChunkSize food

buildupPhasesLengths :: [Maybe Chunk] -> [Int]
buildupPhasesLengths [] = []
buildupPhasesLengths chunks = List.length buildupOutput :
buildupPhasesLengths (List.drop 1 followUp)
where

buildupOutput, followUp :: [Maybe Chunk]
(buildupOutput, followUp) = span isNothing chunks

prop_contentIsPreserved :: MinChunkSize -> [[Vector Word8]] -> Property
prop_contentIsPreserved (MinChunkSize minChunkSize) food
= withBalingOutput minChunkSize food $ \ commonChunks remnant ->
let

input :: Vector Word8
input = concat (List.concat food)

output :: Vector Word8
output = concat (fromChunk <$> catMaybes (commonChunks ++ [remnant]))

in input === output

prop_noRemnantAfterOutput :: MinChunkSize
-> NonEmptyList [Vector Word8]
-> Property
prop_noRemnantAfterOutput (MinChunkSize minChunkSize) (NonEmpty food)
= withBalingOutput minChunkSize food $ \ commonChunks remnant ->
isJust (last commonChunks) ==> isNothing remnant

prop_commonChunksAreLarge :: MinChunkSize -> [[Vector Word8]] -> Property
prop_commonChunksAreLarge (MinChunkSize minChunkSize) food
= withBalingOutput minChunkSize food $ \ commonChunks _ ->
all (fromChunk >>> length >>> (>= minChunkSize)) (catMaybes commonChunks)

remnantChunkSizeIs :: (Int -> Bool) -> Int -> [[Vector Word8]] -> Property
remnantChunkSizeIs constraint minChunkSize food
= withBalingOutput minChunkSize food $ \ _ remnant ->
isJust remnant ==> constraint (length (fromChunk (fromJust remnant)))

prop_remnantChunkIsNonEmpty :: MinChunkSize -> [[Vector Word8]] -> Property
prop_remnantChunkIsNonEmpty (MinChunkSize minChunkSize)
= remnantChunkSizeIs (> 0) minChunkSize

prop_remnantChunkIsSmall :: MinChunkSize -> [[Vector Word8]] -> Property
prop_remnantChunkIsSmall (MinChunkSize minChunkSize)
= remnantChunkSizeIs (< minChunkSize) minChunkSize

-- * Test case generation and shrinking

instance Arbitrary (Vector Word8) where

arbitrary = fromList <$> arbitrary

shrink = shrinkMap fromList toList

{-
The type of minimum chunk sizes.

This type is isomorphic to 'Int' but has a different way of generating test
cases. Only small, positive integers are generated, and they are generated
using \(2 \cdot s^{2}\) as the size parameter, where \(s\) refers to the
original size parameter.

The reasons for the modification of the size parameter in the
above-mentioned way are somewhat subtle.

First, we want the ratio between the average minimum chunk size and the
average size of a data portion that we feed to a baler to be independent of
the size parameter. Each data portion is a list of primitive vectors of
bytes, and arbitrarily generated lists and byte vectors have lengths that
are small, positive integers. Such integers are \(s/2\) on average. As a
result, the average size of data fed to a baler is \(s^{2}/4\). By
generating minimum chunk sizes with \(a \cdot s^{2}\) as the size parameter
for some constant \(a\), the average minimum chunk size is \(a/2 \cdot
s^{2}\) and therefore $2a$ times the average size of a data portion fed,
independently of \(s\).

Second, we want prompt chunk generation as well as chunk generation after
only two or more feedings to occur reasonably often. To achieve this to some
degree, we can tune the parameter \(a\). It appears that \(a\) being \(2\)
leads to reasonable results.
-}
newtype MinChunkSize = MinChunkSize Int deriving stock Show

fromMinChunkSize :: MinChunkSize -> Int
fromMinChunkSize (MinChunkSize minChunkSize) = minChunkSize

instance Arbitrary MinChunkSize where

arbitrary = scale (\ size -> 2 * size ^ (2 :: Int)) $
MinChunkSize <$> getSmall <$> getPositive <$> arbitrary

shrink = shrinkMap (MinChunkSize . getSmall . getPositive)
(Positive . Small . fromMinChunkSize)