diff --git a/lsm-tree.cabal b/lsm-tree.cabal index d413fa5d3..5ffd8e84d 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -295,6 +295,7 @@ test-suite lsm-tree-test Test.Database.LSMTree.Generators Test.Database.LSMTree.Internal Test.Database.LSMTree.Internal.BloomFilter + Test.Database.LSMTree.Internal.Chunk Test.Database.LSMTree.Internal.CRC32C Test.Database.LSMTree.Internal.Entry Test.Database.LSMTree.Internal.IndexCompact diff --git a/test/Main.hs b/test/Main.hs index 6ffb54a84..bd50f1217 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -9,6 +9,7 @@ import qualified Test.Database.LSMTree.Class.Normal import qualified Test.Database.LSMTree.Generators import qualified Test.Database.LSMTree.Internal import qualified Test.Database.LSMTree.Internal.BloomFilter +import qualified Test.Database.LSMTree.Internal.Chunk import qualified Test.Database.LSMTree.Internal.CRC32C import qualified Test.Database.LSMTree.Internal.Entry import qualified Test.Database.LSMTree.Internal.IndexCompact @@ -41,6 +42,7 @@ main = defaultMain $ testGroup "lsm-tree" , Test.Database.LSMTree.Generators.tests , Test.Database.LSMTree.Internal.tests , Test.Database.LSMTree.Internal.BloomFilter.tests + , Test.Database.LSMTree.Internal.Chunk.tests , Test.Database.LSMTree.Internal.CRC32C.tests , Test.Database.LSMTree.Internal.Entry.tests , Test.Database.LSMTree.Internal.Lookup.tests diff --git a/test/Test/Database/LSMTree/Internal/Chunk.hs b/test/Test/Database/LSMTree/Internal/Chunk.hs new file mode 100644 index 000000000..0d23dbc5e --- /dev/null +++ b/test/Test/Database/LSMTree/Internal/Chunk.hs @@ -0,0 +1,173 @@ +{-# OPTIONS_GHC -Wno-orphans #-} + +module Test.Database.LSMTree.Internal.Chunk (tests) where + +import Prelude hiding (concat, drop, length) + +import Control.Arrow ((>>>)) +import Control.Monad.ST.Strict (runST) +import qualified Data.List as List (concat, drop, length) +import Data.Maybe (catMaybes, fromJust, isJust, isNothing) +import Data.Vector.Primitive (Vector, concat, fromList, length, + toList) +import Data.Word (Word8) +import Database.LSMTree.Internal.Chunk (Chunk, createBaler, feedBaler, + fromChunk, unsafeEndBaler) +import Test.QuickCheck (Arbitrary (arbitrary, shrink), + NonEmptyList (NonEmpty), Positive (Positive, getPositive), + Property, Small (Small, getSmall), Testable, scale, + shrinkMap, tabulate, (===), (==>)) +import Test.Tasty (TestTree, testGroup) +import Test.Tasty.QuickCheck (testProperty) + +-- * Tests + +tests :: TestTree +tests = testGroup "Test.Database.LSMTree.Internal.Chunk" $ + [ + testProperty "Content is preserved" + prop_contentIsPreserved, + testProperty "No remnant after output" + prop_noRemnantAfterOutput, + testProperty "Common chunks are large" + prop_commonChunksAreLarge, + testProperty "Remnant chunk is non-empty" + prop_remnantChunkIsNonEmpty, + testProperty "Remnant chunk is small" + prop_remnantChunkIsSmall + ] + +-- * Properties to test + +{- + Feeds a freshly created baler a sequence of data portions and ends it + afterwards, yielding all output. +-} +balingOutput :: Int -- Minimum chunk size + -> [[Vector Word8]] -- Data portions to be fed + -> ([Maybe Chunk], Maybe Chunk) -- Feeding output and remnant +balingOutput minChunkSize food = runST $ do + baler <- createBaler minChunkSize + commonChunks <- mapM (flip feedBaler baler) food + remnant <- unsafeEndBaler baler + return (commonChunks, remnant) + +{- + Supplies the output of a complete baler run for constructing a property. + + The resulting property additionally provides statistics about the lengths of + buildup phases, where a buildup phase is a sequence of feedings that does + not result in chunks and is followed by an ultimate chunk production, which + happens either due to another feeding or due to the baler run ending and + producing a remnant chunk. +-} +withBalingOutput + :: Testable prop + => Int -- Minimum chunk size + -> [[Vector Word8]] -- Data portions to be fed + -> ([Maybe Chunk] -> Maybe Chunk -> prop) -- Property from baler output + -> Property -- Resulting property +withBalingOutput minChunkSize food consumer + = tabulate "Lengths of buildup phases" + (map show (buildupPhasesLengths commonChunks)) + (consumer commonChunks remnant) + where + + commonChunks :: [Maybe Chunk] + remnant :: Maybe Chunk + (commonChunks, remnant) = balingOutput minChunkSize food + + buildupPhasesLengths :: [Maybe Chunk] -> [Int] + buildupPhasesLengths [] = [] + buildupPhasesLengths chunks = List.length buildupOutput : + buildupPhasesLengths (List.drop 1 followUp) + where + + buildupOutput, followUp :: [Maybe Chunk] + (buildupOutput, followUp) = span isNothing chunks + +prop_contentIsPreserved :: MinChunkSize -> [[Vector Word8]] -> Property +prop_contentIsPreserved (MinChunkSize minChunkSize) food + = withBalingOutput minChunkSize food $ \ commonChunks remnant -> + let + + input :: Vector Word8 + input = concat (List.concat food) + + output :: Vector Word8 + output = concat (fromChunk <$> catMaybes (commonChunks ++ [remnant])) + + in input === output + +prop_noRemnantAfterOutput :: MinChunkSize + -> NonEmptyList [Vector Word8] + -> Property +prop_noRemnantAfterOutput (MinChunkSize minChunkSize) (NonEmpty food) + = withBalingOutput minChunkSize food $ \ commonChunks remnant -> + isJust (last commonChunks) ==> isNothing remnant + +prop_commonChunksAreLarge :: MinChunkSize -> [[Vector Word8]] -> Property +prop_commonChunksAreLarge (MinChunkSize minChunkSize) food + = withBalingOutput minChunkSize food $ \ commonChunks _ -> + all (fromChunk >>> length >>> (>= minChunkSize)) (catMaybes commonChunks) + +remnantChunkSizeIs :: (Int -> Bool) -> Int -> [[Vector Word8]] -> Property +remnantChunkSizeIs constraint minChunkSize food + = withBalingOutput minChunkSize food $ \ _ remnant -> + isJust remnant ==> constraint (length (fromChunk (fromJust remnant))) + +prop_remnantChunkIsNonEmpty :: MinChunkSize -> [[Vector Word8]] -> Property +prop_remnantChunkIsNonEmpty (MinChunkSize minChunkSize) + = remnantChunkSizeIs (> 0) minChunkSize + +prop_remnantChunkIsSmall :: MinChunkSize -> [[Vector Word8]] -> Property +prop_remnantChunkIsSmall (MinChunkSize minChunkSize) + = remnantChunkSizeIs (< minChunkSize) minChunkSize + +-- * Test case generation and shrinking + +instance Arbitrary (Vector Word8) where + + arbitrary = fromList <$> arbitrary + + shrink = shrinkMap fromList toList + +{- + The type of minimum chunk sizes. + + This type is isomorphic to 'Int' but has a different way of generating test + cases. Only small, positive integers are generated, and they are generated + using \(2 \cdot s^{2}\) as the size parameter, where \(s\) refers to the + original size parameter. + + The reasons for the modification of the size parameter in the + above-mentioned way are somewhat subtle. + + First, we want the ratio between the average minimum chunk size and the + average size of a data portion that we feed to a baler to be independent of + the size parameter. Each data portion is a list of primitive vectors of + bytes, and arbitrarily generated lists and byte vectors have lengths that + are small, positive integers. Such integers are \(s/2\) on average. As a + result, the average size of data fed to a baler is \(s^{2}/4\). By + generating minimum chunk sizes with \(a \cdot s^{2}\) as the size parameter + for some constant \(a\), the average minimum chunk size is \(a/2 \cdot + s^{2}\) and therefore $2a$ times the average size of a data portion fed, + independently of \(s\). + + Second, we want prompt chunk generation as well as chunk generation after + only two or more feedings to occur reasonably often. To achieve this to some + degree, we can tune the parameter \(a\). It appears that \(a\) being \(2\) + leads to reasonable results. +-} +newtype MinChunkSize = MinChunkSize Int deriving stock Show + +fromMinChunkSize :: MinChunkSize -> Int +fromMinChunkSize (MinChunkSize minChunkSize) = minChunkSize + +instance Arbitrary MinChunkSize where + + arbitrary = scale (\ size -> 2 * size ^ (2 :: Int)) $ + MinChunkSize <$> getSmall <$> getPositive <$> arbitrary + + shrink = shrinkMap (MinChunkSize . getSmall . getPositive) + (Positive . Small . fromMinChunkSize)