From 0855e883cc76b5bd698d9cdc8e3368af60d0126c Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Thu, 19 Jun 2025 15:52:16 +0200 Subject: [PATCH 1/6] Test `prettySnapshotVersion` and `currentSnapshotVersion` with `cabal-docspec` --- src/Database/LSMTree/Internal/Snapshot/Codec.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Database/LSMTree/Internal/Snapshot/Codec.hs b/src/Database/LSMTree/Internal/Snapshot/Codec.hs index 61c5d3c6a..9baa3fe17 100644 --- a/src/Database/LSMTree/Internal/Snapshot/Codec.hs +++ b/src/Database/LSMTree/Internal/Snapshot/Codec.hs @@ -60,11 +60,15 @@ import Text.Printf data SnapshotVersion = V0 deriving stock (Show, Eq) +-- | Pretty-print a snapshot version +-- -- >>> prettySnapshotVersion currentSnapshotVersion -- "v0" prettySnapshotVersion :: SnapshotVersion -> String prettySnapshotVersion V0 = "v0" +-- | The current snapshot version +-- -- >>> currentSnapshotVersion -- V0 currentSnapshotVersion :: SnapshotVersion From c825cc92962e079a2912847be9f95954fc3eed2f Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Thu, 19 Jun 2025 16:32:58 +0200 Subject: [PATCH 2/6] Prefix snapshot codec golden files using the current snapshot version As a result, we keep around golden files for old snapshot versions as newer ones are created. The upside of this is that we could test the backwards compatiblity of the snapshot codec: run versioned decoders on the golden files for older snapshot versions and check that there are no errors. I've not implemented such a test yet, but this would be one step in that direction. --- .../LSMTree/Internal/Snapshot/Codec.hs | 19 ++++- .../LSMTree/Internal/Snapshot/Codec/Golden.hs | 70 +++++++++--------- ... => V0.BloomFilterAlloc.A.snapshot.golden} | Bin ... => V0.BloomFilterAlloc.B.snapshot.golden} | 0 ...n => V0.DiskCachePolicy.A.snapshot.golden} | Bin ...n => V0.DiskCachePolicy.B.snapshot.golden} | Bin ...n => V0.DiskCachePolicy.C.snapshot.golden} | 0 ...0.FencePointerIndexType.A.snapshot.golden} | Bin ...0.FencePointerIndexType.B.snapshot.golden} | 0 ....golden => V0.IndexType.A.snapshot.golden} | 0 ....golden => V0.IndexType.B.snapshot.golden} | Bin ...en => V0.LevelMergeType.A.snapshot.golden} | Bin ...en => V0.LevelMergeType.B.snapshot.golden} | 0 ...lden => V0.MergeCredits.A.snapshot.golden} | 0 ....golden => V0.MergeDebt.A.snapshot.golden} | 0 ...olden => V0.MergePolicy.A.snapshot.golden} | Bin ... V0.MergePolicyForLevel.A.snapshot.golden} | Bin ... V0.MergePolicyForLevel.B.snapshot.golden} | 0 ...den => V0.MergeSchedule.A.snapshot.golden} | Bin ...den => V0.MergeSchedule.B.snapshot.golden} | 0 ...en => V0.NominalCredits.A.snapshot.golden} | 0 ...olden => V0.NominalDebt.A.snapshot.golden} | 0 ... V0.RunBloomFilterAlloc.A.snapshot.golden} | Bin ... V0.RunBloomFilterAlloc.B.snapshot.golden} | 0 ...en => V0.RunDataCaching.A.snapshot.golden} | Bin ...en => V0.RunDataCaching.B.snapshot.golden} | 0 ....golden => V0.RunNumber.A.snapshot.golden} | Bin ....golden => V0.RunParams.A.snapshot.golden} | Bin ....golden => V0.SizeRatio.A.snapshot.golden} | 0 ...IncomingRun_SnapshotRun.A.snapshot.golden} | Bin ...IncomingRun_SnapshotRun.B.snapshot.golden} | Bin ...0.SnapLevel_SnapshotRun.A.snapshot.golden} | Bin ....SnapLevels_SnapshotRun.A.snapshot.golden} | Bin ...elMergeType_SnapshotRun.A.snapshot.golden} | Bin ...elMergeType_SnapshotRun.B.snapshot.golden} | Bin ...eeMergeType_SnapshotRun.A.snapshot.golden} | Bin ...eeMergeType_SnapshotRun.B.snapshot.golden} | Bin ...ngTreeState_SnapshotRun.A.snapshot.golden} | Bin ...ngTreeState_SnapshotRun.B.snapshot.golden} | Bin ...ngTreeState_SnapshotRun.C.snapshot.golden} | Bin ...MergingTree_SnapshotRun.A.snapshot.golden} | Bin ...endingMerge_SnapshotRun.A.snapshot.golden} | Bin ...endingMerge_SnapshotRun.B.snapshot.golden} | Bin ...ExistingRun_SnapshotRun.A.snapshot.golden} | Bin ...ExistingRun_SnapshotRun.B.snapshot.golden} | Bin ...den => V0.SnapshotLabel.A.snapshot.golden} | 0 ...den => V0.SnapshotLabel.B.snapshot.golden} | 0 ... => V0.SnapshotMetaData.A.snapshot.golden} | Bin ...olden => V0.SnapshotRun.A.snapshot.golden} | Bin ...olden => V0.TableConfig.A.snapshot.golden} | Bin ...den => V0.TreeMergeType.A.snapshot.golden} | 0 ...den => V0.TreeMergeType.B.snapshot.golden} | 0 ...> V0.Vector_SnapshotRun.A.snapshot.golden} | Bin ...> V0.Vector_SnapshotRun.B.snapshot.golden} | 0 ...> V0.Vector_SnapshotRun.C.snapshot.golden} | Bin ... => V0.WriteBufferAlloc.A.snapshot.golden} | Bin 56 files changed, 51 insertions(+), 38 deletions(-) rename test/golden-file-data/snapshot-codec/{BloomFilterAlloc.A.snapshot.golden => V0.BloomFilterAlloc.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{BloomFilterAlloc.B.snapshot.golden => V0.BloomFilterAlloc.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{DiskCachePolicy.A.snapshot.golden => V0.DiskCachePolicy.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{DiskCachePolicy.B.snapshot.golden => V0.DiskCachePolicy.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{DiskCachePolicy.C.snapshot.golden => V0.DiskCachePolicy.C.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{FencePointerIndexType.A.snapshot.golden => V0.FencePointerIndexType.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{FencePointerIndexType.B.snapshot.golden => V0.FencePointerIndexType.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{IndexType.A.snapshot.golden => V0.IndexType.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{IndexType.B.snapshot.golden => V0.IndexType.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{LevelMergeType.A.snapshot.golden => V0.LevelMergeType.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{LevelMergeType.B.snapshot.golden => V0.LevelMergeType.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{MergeCredits.A.snapshot.golden => V0.MergeCredits.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{MergeDebt.A.snapshot.golden => V0.MergeDebt.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{MergePolicy.A.snapshot.golden => V0.MergePolicy.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{MergePolicyForLevel.A.snapshot.golden => V0.MergePolicyForLevel.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{MergePolicyForLevel.B.snapshot.golden => V0.MergePolicyForLevel.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{MergeSchedule.A.snapshot.golden => V0.MergeSchedule.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{MergeSchedule.B.snapshot.golden => V0.MergeSchedule.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{NominalCredits.A.snapshot.golden => V0.NominalCredits.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{NominalDebt.A.snapshot.golden => V0.NominalDebt.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{RunBloomFilterAlloc.A.snapshot.golden => V0.RunBloomFilterAlloc.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{RunBloomFilterAlloc.B.snapshot.golden => V0.RunBloomFilterAlloc.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{RunDataCaching.A.snapshot.golden => V0.RunDataCaching.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{RunDataCaching.B.snapshot.golden => V0.RunDataCaching.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{RunNumber.A.snapshot.golden => V0.RunNumber.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{RunParams.A.snapshot.golden => V0.RunParams.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SizeRatio.A.snapshot.golden => V0.SizeRatio.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapIncomingRun_SnapshotRun.A.snapshot.golden => V0.SnapIncomingRun_SnapshotRun.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapIncomingRun_SnapshotRun.B.snapshot.golden => V0.SnapIncomingRun_SnapshotRun.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapLevel_SnapshotRun.A.snapshot.golden => V0.SnapLevel_SnapshotRun.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapLevels_SnapshotRun.A.snapshot.golden => V0.SnapLevels_SnapshotRun.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapMergingRun_LevelMergeType_SnapshotRun.A.snapshot.golden => V0.SnapMergingRun_LevelMergeType_SnapshotRun.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapMergingRun_LevelMergeType_SnapshotRun.B.snapshot.golden => V0.SnapMergingRun_LevelMergeType_SnapshotRun.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapMergingRun_TreeMergeType_SnapshotRun.A.snapshot.golden => V0.SnapMergingRun_TreeMergeType_SnapshotRun.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapMergingRun_TreeMergeType_SnapshotRun.B.snapshot.golden => V0.SnapMergingRun_TreeMergeType_SnapshotRun.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapMergingTreeState_SnapshotRun.A.snapshot.golden => V0.SnapMergingTreeState_SnapshotRun.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapMergingTreeState_SnapshotRun.B.snapshot.golden => V0.SnapMergingTreeState_SnapshotRun.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapMergingTreeState_SnapshotRun.C.snapshot.golden => V0.SnapMergingTreeState_SnapshotRun.C.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapMergingTree_SnapshotRun.A.snapshot.golden => V0.SnapMergingTree_SnapshotRun.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapPendingMerge_SnapshotRun.A.snapshot.golden => V0.SnapPendingMerge_SnapshotRun.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapPendingMerge_SnapshotRun.B.snapshot.golden => V0.SnapPendingMerge_SnapshotRun.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapPreExistingRun_SnapshotRun.A.snapshot.golden => V0.SnapPreExistingRun_SnapshotRun.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapPreExistingRun_SnapshotRun.B.snapshot.golden => V0.SnapPreExistingRun_SnapshotRun.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapshotLabel.A.snapshot.golden => V0.SnapshotLabel.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapshotLabel.B.snapshot.golden => V0.SnapshotLabel.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapshotMetaData.A.snapshot.golden => V0.SnapshotMetaData.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{SnapshotRun.A.snapshot.golden => V0.SnapshotRun.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{TableConfig.A.snapshot.golden => V0.TableConfig.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{TreeMergeType.A.snapshot.golden => V0.TreeMergeType.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{TreeMergeType.B.snapshot.golden => V0.TreeMergeType.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{Vector_SnapshotRun.A.snapshot.golden => V0.Vector_SnapshotRun.A.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{Vector_SnapshotRun.B.snapshot.golden => V0.Vector_SnapshotRun.B.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{Vector_SnapshotRun.C.snapshot.golden => V0.Vector_SnapshotRun.C.snapshot.golden} (100%) rename test/golden-file-data/snapshot-codec/{WriteBufferAlloc.A.snapshot.golden => V0.WriteBufferAlloc.A.snapshot.golden} (100%) diff --git a/src/Database/LSMTree/Internal/Snapshot/Codec.hs b/src/Database/LSMTree/Internal/Snapshot/Codec.hs index 9baa3fe17..a65fd70d1 100644 --- a/src/Database/LSMTree/Internal/Snapshot/Codec.hs +++ b/src/Database/LSMTree/Internal/Snapshot/Codec.hs @@ -7,6 +7,7 @@ module Database.LSMTree.Internal.Snapshot.Codec ( SnapshotVersion (..) , prettySnapshotVersion , currentSnapshotVersion + , allCompatibleSnapshotVersions -- * Writing and reading files , writeFileSnapshotMetaData , readFileSnapshotMetaData @@ -74,10 +75,22 @@ prettySnapshotVersion V0 = "v0" currentSnapshotVersion :: SnapshotVersion currentSnapshotVersion = V0 +-- | All snapshot versions that the current snapshpt version is compatible with. +-- +-- >>> allCompatibleSnapshotVersions +-- [V0] +-- +-- >>> last allCompatibleSnapshotVersions == currentSnapshotVersion +-- True +allCompatibleSnapshotVersions :: [SnapshotVersion] +allCompatibleSnapshotVersions = [V0] + isCompatible :: SnapshotVersion -> Either String () -isCompatible otherVersion = do - case ( currentSnapshotVersion, otherVersion ) of - (V0, V0) -> Right () +isCompatible otherVersion + -- for the moment, all versions are backwards compatible: + | otherVersion `elem` allCompatibleSnapshotVersions + = Right () + | otherwise = Left "forward compatibility not supported" {------------------------------------------------------------------------------- Writing and reading files diff --git a/test/Test/Database/LSMTree/Internal/Snapshot/Codec/Golden.hs b/test/Test/Database/LSMTree/Internal/Snapshot/Codec/Golden.hs index e327816af..8092d8c72 100644 --- a/test/Test/Database/LSMTree/Internal/Snapshot/Codec/Golden.hs +++ b/test/Test/Database/LSMTree/Internal/Snapshot/Codec/Golden.hs @@ -26,10 +26,11 @@ import Database.LSMTree.Internal.RunBuilder (IndexType (..), import Database.LSMTree.Internal.RunNumber (RunNumber (..)) import Database.LSMTree.Internal.Snapshot import Database.LSMTree.Internal.Snapshot.Codec +import qualified System.Directory as Dir +import System.FilePath import qualified System.FS.API as FS -import System.FS.API.Types (FsPath, MountPoint (..), fsToFilePath, - mkFsPath, (<.>)) -import System.FS.IO (HandleIO, ioHasFS) +import System.FS.API.Types (MountPoint (..)) +import System.FS.IO (ioHasFS) import Test.QuickCheck (Property, counterexample, ioProperty, once, (.&&.)) import qualified Test.Tasty as Tasty @@ -70,47 +71,36 @@ snapshotCodecGoldenTest :: => Proxy a -> [TestTree] snapshotCodecGoldenTest proxy = [ - go (nameGolden proxy annotation) datum + go annotation datum | (annotation, datum) <- enumGoldenAnnotated' proxy ] where - go name datum = - let -- Various paths - -- - -- There are three paths for both the checksum and the snapshot files: - -- 1. The filepath of type @FsPath@ to which data is written. - -- 2. The filepath of type @FilePath@ from which data is read. - -- 3. The filepath of type @FilePath@ against which the data is compared. - -- - -- These file types' bindings have the following infix annotations, respectively: - -- 1. (Fs) for FsPath - -- 2. (Hs) for "Haskell" path - -- 3. (Au) for "Golden file" path - snapshotFsPath = mkFsPath [name] <.> "snapshot" - snapshotHsPath = fsToFilePath goldenDataMountPoint snapshotFsPath - snapshotAuPath = snapshotHsPath <> ".golden" + go ann datum = + let v = currentSnapshotVersion + outputFilePath = goldenDataFilePath filePathOutput proxy ann v + goldenFilePath = goldenDataFilePath filePathGolden proxy ann v -- IO actions - runnerIO :: FS.HasFS IO HandleIO - runnerIO = ioHasFS goldenDataMountPoint - removeIfExists :: FsPath -> IO () + removeIfExists :: FilePath -> IO () removeIfExists fp = - FS.doesFileExist runnerIO fp >>= (`when` (FS.removeFile runnerIO fp)) + Dir.doesFileExist fp >>= (`when` (Dir.removeFile fp)) outputAction :: IO () outputAction = do -- Ensure that if the output file already exists, we remove it and -- re-write out the serialized data. This ensures that there are no -- false-positives, false-negatives, or irrelevant I/O exceptions. - removeIfExists snapshotFsPath - BSL.writeFile snapshotHsPath . toLazyByteString $ encode datum + removeIfExists outputFilePath + BSL.writeFile outputFilePath . toLazyByteString $ encode datum - in Au.goldenVsFile name snapshotAuPath snapshotHsPath outputAction + in Au.goldenVsFile (nameGolden proxy ann v) goldenFilePath outputFilePath outputAction -- | Check that are no missing or unexpected files in the output directory prop_noUnexpectedOrMissingGoldenFiles :: Property prop_noUnexpectedOrMissingGoldenFiles = once $ ioProperty $ do - let expectedFiles = Set.fromList $ concat $ forallSnapshotTypes filePathsGolden - + let expectedFiles = Set.fromList $ concat $ forallSnapshotTypes $ \p -> concat [ + filePathsGolden p v + | v <- supportedVersions p + ] let hfs = ioHasFS goldenDataMountPoint actualDirectoryEntries <- FS.listDirectory hfs (FS.mkFsPath []) @@ -231,6 +221,9 @@ class EnumGolden a where singGolden :: a singGolden = snd $ head enumGoldenAnnotated + supportedVersions :: Proxy a -> [SnapshotVersion] + supportedVersions _ = allCompatibleSnapshotVersions + type Annotation = String enumGoldenAnnotated' :: EnumGolden a => Proxy a -> [(Annotation, a)] @@ -240,21 +233,28 @@ enumGoldenAnnotated' _ = enumGoldenAnnotated Enumeration class: names and file paths -------------------------------------------------------------------------------} -nameGolden :: Typeable a => Proxy a -> Annotation -> String -nameGolden p ann = map spaceToUnderscore (show $ typeRep p) ++ "." ++ ann +nameGolden :: Typeable a => Proxy a -> Annotation -> SnapshotVersion -> String +nameGolden p ann v = show v ++ "." ++ map spaceToUnderscore (show $ typeRep p) ++ "." ++ ann spaceToUnderscore :: Char -> Char spaceToUnderscore ' ' = '_' spaceToUnderscore c = c -filePathsGolden :: (EnumGolden a, Typeable a) => Proxy a -> [String] -filePathsGolden p = [ - filePathGolden p annotation +filePathsGolden :: + (EnumGolden a, Typeable a) + => Proxy a + -> SnapshotVersion + -> [String] +filePathsGolden p v = [ + filePathGolden p annotation v | (annotation, _) <- enumGoldenAnnotated' p ] -filePathGolden :: Typeable a => Proxy a -> String -> String -filePathGolden p ann = nameGolden p ann ++ ".snapshot.golden" +filePathOutput :: Typeable a => Proxy a -> String -> SnapshotVersion -> String +filePathOutput p ann v = nameGolden p ann v ++ ".snapshot" + +filePathGolden :: Typeable a => Proxy a -> String -> SnapshotVersion -> String +filePathGolden p ann v = nameGolden p ann v ++ ".snapshot.golden" {------------------------------------------------------------------------------- Enumeration class: instances diff --git a/test/golden-file-data/snapshot-codec/BloomFilterAlloc.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.BloomFilterAlloc.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/BloomFilterAlloc.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.BloomFilterAlloc.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/BloomFilterAlloc.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.BloomFilterAlloc.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/BloomFilterAlloc.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.BloomFilterAlloc.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/DiskCachePolicy.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.DiskCachePolicy.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/DiskCachePolicy.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.DiskCachePolicy.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/DiskCachePolicy.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.DiskCachePolicy.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/DiskCachePolicy.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.DiskCachePolicy.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/DiskCachePolicy.C.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.DiskCachePolicy.C.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/DiskCachePolicy.C.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.DiskCachePolicy.C.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/FencePointerIndexType.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.FencePointerIndexType.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/FencePointerIndexType.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.FencePointerIndexType.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/FencePointerIndexType.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.FencePointerIndexType.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/FencePointerIndexType.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.FencePointerIndexType.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/IndexType.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.IndexType.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/IndexType.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.IndexType.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/IndexType.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.IndexType.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/IndexType.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.IndexType.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/LevelMergeType.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.LevelMergeType.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/LevelMergeType.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.LevelMergeType.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/LevelMergeType.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.LevelMergeType.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/LevelMergeType.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.LevelMergeType.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/MergeCredits.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.MergeCredits.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/MergeCredits.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.MergeCredits.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/MergeDebt.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.MergeDebt.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/MergeDebt.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.MergeDebt.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/MergePolicy.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.MergePolicy.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/MergePolicy.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.MergePolicy.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/MergePolicyForLevel.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.MergePolicyForLevel.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/MergePolicyForLevel.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.MergePolicyForLevel.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/MergePolicyForLevel.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.MergePolicyForLevel.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/MergePolicyForLevel.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.MergePolicyForLevel.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/MergeSchedule.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.MergeSchedule.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/MergeSchedule.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.MergeSchedule.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/MergeSchedule.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.MergeSchedule.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/MergeSchedule.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.MergeSchedule.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/NominalCredits.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.NominalCredits.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/NominalCredits.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.NominalCredits.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/NominalDebt.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.NominalDebt.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/NominalDebt.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.NominalDebt.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/RunBloomFilterAlloc.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.RunBloomFilterAlloc.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/RunBloomFilterAlloc.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.RunBloomFilterAlloc.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/RunBloomFilterAlloc.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.RunBloomFilterAlloc.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/RunBloomFilterAlloc.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.RunBloomFilterAlloc.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/RunDataCaching.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.RunDataCaching.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/RunDataCaching.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.RunDataCaching.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/RunDataCaching.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.RunDataCaching.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/RunDataCaching.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.RunDataCaching.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/RunNumber.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.RunNumber.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/RunNumber.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.RunNumber.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/RunParams.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.RunParams.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/RunParams.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.RunParams.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SizeRatio.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SizeRatio.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SizeRatio.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SizeRatio.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapIncomingRun_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapIncomingRun_SnapshotRun.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapIncomingRun_SnapshotRun.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapIncomingRun_SnapshotRun.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapIncomingRun_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapIncomingRun_SnapshotRun.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapIncomingRun_SnapshotRun.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapIncomingRun_SnapshotRun.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapLevel_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapLevel_SnapshotRun.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapLevel_SnapshotRun.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapLevel_SnapshotRun.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapLevels_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapLevels_SnapshotRun.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapLevels_SnapshotRun.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapLevels_SnapshotRun.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapMergingRun_LevelMergeType_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapMergingRun_LevelMergeType_SnapshotRun.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapMergingRun_LevelMergeType_SnapshotRun.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapMergingRun_LevelMergeType_SnapshotRun.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapMergingRun_LevelMergeType_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapMergingRun_LevelMergeType_SnapshotRun.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapMergingRun_LevelMergeType_SnapshotRun.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapMergingRun_LevelMergeType_SnapshotRun.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapMergingRun_TreeMergeType_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapMergingRun_TreeMergeType_SnapshotRun.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapMergingRun_TreeMergeType_SnapshotRun.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapMergingRun_TreeMergeType_SnapshotRun.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapMergingRun_TreeMergeType_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapMergingRun_TreeMergeType_SnapshotRun.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapMergingRun_TreeMergeType_SnapshotRun.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapMergingRun_TreeMergeType_SnapshotRun.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapMergingTreeState_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapMergingTreeState_SnapshotRun.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapMergingTreeState_SnapshotRun.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapMergingTreeState_SnapshotRun.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapMergingTreeState_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapMergingTreeState_SnapshotRun.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapMergingTreeState_SnapshotRun.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapMergingTreeState_SnapshotRun.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapMergingTreeState_SnapshotRun.C.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapMergingTreeState_SnapshotRun.C.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapMergingTreeState_SnapshotRun.C.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapMergingTreeState_SnapshotRun.C.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapMergingTree_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapMergingTree_SnapshotRun.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapMergingTree_SnapshotRun.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapMergingTree_SnapshotRun.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapPendingMerge_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapPendingMerge_SnapshotRun.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapPendingMerge_SnapshotRun.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapPendingMerge_SnapshotRun.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapPendingMerge_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapPendingMerge_SnapshotRun.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapPendingMerge_SnapshotRun.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapPendingMerge_SnapshotRun.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapPreExistingRun_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapPreExistingRun_SnapshotRun.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapPreExistingRun_SnapshotRun.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapPreExistingRun_SnapshotRun.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapPreExistingRun_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapPreExistingRun_SnapshotRun.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapPreExistingRun_SnapshotRun.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapPreExistingRun_SnapshotRun.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapshotLabel.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapshotLabel.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapshotLabel.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapshotLabel.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapshotLabel.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapshotLabel.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapshotLabel.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapshotLabel.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapshotMetaData.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapshotMetaData.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapshotMetaData.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapshotMetaData.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.SnapshotRun.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/SnapshotRun.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.SnapshotRun.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/TableConfig.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.TableConfig.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/TableConfig.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.TableConfig.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/TreeMergeType.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.TreeMergeType.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/TreeMergeType.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.TreeMergeType.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/TreeMergeType.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.TreeMergeType.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/TreeMergeType.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.TreeMergeType.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/Vector_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.Vector_SnapshotRun.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/Vector_SnapshotRun.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.Vector_SnapshotRun.A.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/Vector_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.Vector_SnapshotRun.B.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/Vector_SnapshotRun.B.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.Vector_SnapshotRun.B.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/Vector_SnapshotRun.C.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.Vector_SnapshotRun.C.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/Vector_SnapshotRun.C.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.Vector_SnapshotRun.C.snapshot.golden diff --git a/test/golden-file-data/snapshot-codec/WriteBufferAlloc.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V0.WriteBufferAlloc.A.snapshot.golden similarity index 100% rename from test/golden-file-data/snapshot-codec/WriteBufferAlloc.A.snapshot.golden rename to test/golden-file-data/snapshot-codec/V0.WriteBufferAlloc.A.snapshot.golden From 2143a57a2ad73380fb9d0bbe0ca6af27ac7b0e9c Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Thu, 19 Jun 2025 11:30:17 +0100 Subject: [PATCH 3/6] Introduce a V1 snapshot serialisation format version But don't yet actually change the serialisation format. This is partly just to demonstrate to ourselves how to do it, so there's a pattern to follow in future. Doing this highlights that we cannot generally match on the version, and should only do so in places where the format is actually different between versions. Otherwise we would have to duplicate too much code. --- .../LSMTree/Internal/Snapshot/Codec.hs | 81 +++++++++--------- .../LSMTree/Internal/Snapshot/Codec.hs | 3 +- .../V1.BloomFilterAlloc.A.snapshot.golden | Bin 0 -> 11 bytes .../V1.BloomFilterAlloc.B.snapshot.golden | 1 + .../V1.DiskCachePolicy.A.snapshot.golden | Bin 0 -> 2 bytes .../V1.DiskCachePolicy.B.snapshot.golden | Bin 0 -> 5 bytes .../V1.DiskCachePolicy.C.snapshot.golden | 1 + ...V1.FencePointerIndexType.A.snapshot.golden | Bin 0 -> 1 bytes ...V1.FencePointerIndexType.B.snapshot.golden | 1 + .../V1.IndexType.A.snapshot.golden | 1 + .../V1.IndexType.B.snapshot.golden | Bin 0 -> 1 bytes .../V1.LevelMergeType.A.snapshot.golden | Bin 0 -> 1 bytes .../V1.LevelMergeType.B.snapshot.golden | 1 + .../V1.MergeCredits.A.snapshot.golden | 1 + .../V1.MergeDebt.A.snapshot.golden | 1 + .../V1.MergePolicy.A.snapshot.golden | Bin 0 -> 1 bytes .../V1.MergePolicyForLevel.A.snapshot.golden | Bin 0 -> 1 bytes .../V1.MergePolicyForLevel.B.snapshot.golden | 1 + .../V1.MergeSchedule.A.snapshot.golden | Bin 0 -> 1 bytes .../V1.MergeSchedule.B.snapshot.golden | 1 + .../V1.NominalCredits.A.snapshot.golden | 1 + .../V1.NominalDebt.A.snapshot.golden | 1 + .../V1.RunBloomFilterAlloc.A.snapshot.golden | Bin 0 -> 11 bytes .../V1.RunBloomFilterAlloc.B.snapshot.golden | 1 + .../V1.RunDataCaching.A.snapshot.golden | Bin 0 -> 1 bytes .../V1.RunDataCaching.B.snapshot.golden | 1 + .../V1.RunNumber.A.snapshot.golden | Bin 0 -> 3 bytes .../V1.RunParams.A.snapshot.golden | Bin 0 -> 15 bytes .../V1.SizeRatio.A.snapshot.golden | 1 + ...pIncomingRun_SnapshotRun.A.snapshot.golden | Bin 0 -> 18 bytes ...pIncomingRun_SnapshotRun.B.snapshot.golden | Bin 0 -> 9 bytes ...V1.SnapLevel_SnapshotRun.A.snapshot.golden | Bin 0 -> 34 bytes ...1.SnapLevels_SnapshotRun.A.snapshot.golden | Bin 0 -> 69 bytes ...velMergeType_SnapshotRun.A.snapshot.golden | Bin 0 -> 11 bytes ...velMergeType_SnapshotRun.B.snapshot.golden | Bin 0 -> 35 bytes ...reeMergeType_SnapshotRun.A.snapshot.golden | Bin 0 -> 11 bytes ...reeMergeType_SnapshotRun.B.snapshot.golden | Bin 0 -> 35 bytes ...ingTreeState_SnapshotRun.A.snapshot.golden | Bin 0 -> 9 bytes ...ingTreeState_SnapshotRun.B.snapshot.golden | Bin 0 -> 32 bytes ...ingTreeState_SnapshotRun.C.snapshot.golden | Bin 0 -> 13 bytes ...pMergingTree_SnapshotRun.A.snapshot.golden | Bin 0 -> 9 bytes ...PendingMerge_SnapshotRun.A.snapshot.golden | Bin 0 -> 30 bytes ...PendingMerge_SnapshotRun.B.snapshot.golden | Bin 0 -> 21 bytes ...eExistingRun_SnapshotRun.A.snapshot.golden | Bin 0 -> 9 bytes ...eExistingRun_SnapshotRun.B.snapshot.golden | Bin 0 -> 13 bytes .../V1.SnapshotLabel.A.snapshot.golden | 1 + .../V1.SnapshotLabel.B.snapshot.golden | 1 + .../V1.SnapshotMetaData.A.snapshot.golden | Bin 0 -> 122 bytes .../V1.SnapshotRun.A.snapshot.golden | Bin 0 -> 7 bytes .../V1.TableConfig.A.snapshot.golden | Bin 0 -> 22 bytes .../V1.TreeMergeType.A.snapshot.golden | 1 + .../V1.TreeMergeType.B.snapshot.golden | 1 + .../V1.Vector_SnapshotRun.A.snapshot.golden | Bin 0 -> 15 bytes .../V1.Vector_SnapshotRun.B.snapshot.golden | 1 + .../V1.Vector_SnapshotRun.C.snapshot.golden | Bin 0 -> 8 bytes .../V1.WriteBufferAlloc.A.snapshot.golden | Bin 0 -> 4 bytes 56 files changed, 63 insertions(+), 40 deletions(-) create mode 100644 test/golden-file-data/snapshot-codec/V1.BloomFilterAlloc.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.BloomFilterAlloc.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.DiskCachePolicy.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.DiskCachePolicy.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.DiskCachePolicy.C.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.FencePointerIndexType.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.FencePointerIndexType.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.IndexType.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.IndexType.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.LevelMergeType.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.LevelMergeType.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.MergeCredits.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.MergeDebt.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.MergePolicy.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.MergePolicyForLevel.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.MergePolicyForLevel.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.MergeSchedule.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.MergeSchedule.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.NominalCredits.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.NominalDebt.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.RunBloomFilterAlloc.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.RunBloomFilterAlloc.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.RunDataCaching.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.RunDataCaching.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.RunNumber.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.RunParams.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SizeRatio.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapIncomingRun_SnapshotRun.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapIncomingRun_SnapshotRun.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapLevel_SnapshotRun.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapLevels_SnapshotRun.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapMergingRun_LevelMergeType_SnapshotRun.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapMergingRun_LevelMergeType_SnapshotRun.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapMergingRun_TreeMergeType_SnapshotRun.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapMergingRun_TreeMergeType_SnapshotRun.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapMergingTreeState_SnapshotRun.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapMergingTreeState_SnapshotRun.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapMergingTreeState_SnapshotRun.C.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapMergingTree_SnapshotRun.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapPendingMerge_SnapshotRun.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapPendingMerge_SnapshotRun.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapPreExistingRun_SnapshotRun.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapPreExistingRun_SnapshotRun.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapshotLabel.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapshotLabel.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapshotMetaData.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.SnapshotRun.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.TableConfig.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.TreeMergeType.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.TreeMergeType.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.Vector_SnapshotRun.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.Vector_SnapshotRun.B.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.Vector_SnapshotRun.C.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.WriteBufferAlloc.A.snapshot.golden diff --git a/src/Database/LSMTree/Internal/Snapshot/Codec.hs b/src/Database/LSMTree/Internal/Snapshot/Codec.hs index a65fd70d1..5c2ba18c4 100644 --- a/src/Database/LSMTree/Internal/Snapshot/Codec.hs +++ b/src/Database/LSMTree/Internal/Snapshot/Codec.hs @@ -58,32 +58,33 @@ import Text.Printf -- for more. Forwards compatibility is not provided at all: snapshots with a -- later version than the current version for the library release will always -- fail. -data SnapshotVersion = V0 - deriving stock (Show, Eq) +data SnapshotVersion = V0 | V1 + deriving stock (Show, Eq, Ord) -- | Pretty-print a snapshot version -- -- >>> prettySnapshotVersion currentSnapshotVersion --- "v0" +-- "v1" prettySnapshotVersion :: SnapshotVersion -> String prettySnapshotVersion V0 = "v0" +prettySnapshotVersion V1 = "v1" -- | The current snapshot version -- -- >>> currentSnapshotVersion --- V0 +-- V1 currentSnapshotVersion :: SnapshotVersion -currentSnapshotVersion = V0 +currentSnapshotVersion = V1 -- | All snapshot versions that the current snapshpt version is compatible with. -- -- >>> allCompatibleSnapshotVersions --- [V0] +-- [V0,V1] -- -- >>> last allCompatibleSnapshotVersions == currentSnapshotVersion -- True allCompatibleSnapshotVersions :: [SnapshotVersion] -allCompatibleSnapshotVersions = [V0] +allCompatibleSnapshotVersions = [V0, V1] isCompatible :: SnapshotVersion -> Either String () isCompatible otherVersion @@ -214,6 +215,7 @@ instance Encode SnapshotVersion where encodeListLen 1 <> case ver of V0 -> encodeWord 0 + V1 -> encodeWord 1 instance Decode SnapshotVersion where decode = do @@ -221,6 +223,7 @@ instance Decode SnapshotVersion where ver <- decodeWord case ver of 0 -> pure V0 + 1 -> pure V1 _ -> fail ("Unknown snapshot format version number: " <> show ver) {------------------------------------------------------------------------------- @@ -239,7 +242,7 @@ instance Encode SnapshotMetaData where <> encodeMaybe mergingTree instance DecodeVersioned SnapshotMetaData where - decodeVersioned ver@V0 = do + decodeVersioned ver = do _ <- decodeListLenOf 5 SnapshotMetaData <$> decodeVersioned ver @@ -254,7 +257,7 @@ instance Encode SnapshotLabel where encode (SnapshotLabel s) = encodeString s instance DecodeVersioned SnapshotLabel where - decodeVersioned V0 = SnapshotLabel <$> decodeString + decodeVersioned _v = SnapshotLabel <$> decodeString instance Encode SnapshotRun where encode SnapshotRun { snapRunNumber, snapRunCaching, snapRunIndex } = @@ -265,7 +268,7 @@ instance Encode SnapshotRun where <> encode snapRunIndex instance DecodeVersioned SnapshotRun where - decodeVersioned v@V0 = do + decodeVersioned v = do n <- decodeListLen tag <- decodeWord case (n, tag) of @@ -303,7 +306,7 @@ instance Encode TableConfig where <> encode diskCachePolicy instance DecodeVersioned TableConfig where - decodeVersioned v@V0 = do + decodeVersioned v = do _ <- decodeListLenOf 7 confMergePolicy <- decodeVersioned v confMergeSchedule <- decodeVersioned v @@ -320,7 +323,7 @@ instance Encode MergePolicy where encode LazyLevelling = encodeWord 0 instance DecodeVersioned MergePolicy where - decodeVersioned V0 = do + decodeVersioned _v = do tag <- decodeWord case tag of 0 -> pure LazyLevelling @@ -332,7 +335,7 @@ instance Encode SizeRatio where encode Four = encodeInt 4 instance DecodeVersioned SizeRatio where - decodeVersioned V0 = do + decodeVersioned _v = do x <- decodeWord64 case x of 4 -> pure Four @@ -347,7 +350,7 @@ instance Encode WriteBufferAlloc where <> encodeInt numEntries instance DecodeVersioned WriteBufferAlloc where - decodeVersioned V0 = do + decodeVersioned _v = do _ <- decodeListLenOf 2 tag <- decodeWord case tag of @@ -365,7 +368,7 @@ instance Encode RunParams where <> encode runParamIndex instance DecodeVersioned RunParams where - decodeVersioned v@V0 = do + decodeVersioned v = do n <- decodeListLen tag <- decodeWord case (n, tag) of @@ -380,7 +383,7 @@ instance Encode RunDataCaching where encode NoCacheRunData = encodeWord 1 instance DecodeVersioned RunDataCaching where - decodeVersioned V0 = do + decodeVersioned _v = do tag <- decodeWord case tag of 0 -> pure CacheRunData @@ -392,7 +395,7 @@ instance Encode IndexType where encode Compact = encodeWord 1 instance DecodeVersioned IndexType where - decodeVersioned V0 = do + decodeVersioned _v = do tag <- decodeWord case tag of 0 -> pure Ordinary @@ -410,7 +413,7 @@ instance Encode RunBloomFilterAlloc where <> encodeDouble fpr instance DecodeVersioned RunBloomFilterAlloc where - decodeVersioned V0 = do + decodeVersioned _v = do n <- decodeListLen tag <- decodeWord case (n, tag) of @@ -431,7 +434,7 @@ instance Encode BloomFilterAlloc where <> encodeDouble x instance DecodeVersioned BloomFilterAlloc where - decodeVersioned V0 = do + decodeVersioned _v = do n <- decodeListLen tag <- decodeWord case (n, tag) of @@ -446,7 +449,7 @@ instance Encode FencePointerIndexType where encode OrdinaryIndex = encodeWord 1 instance DecodeVersioned FencePointerIndexType where - decodeVersioned V0 = do + decodeVersioned _v = do tag <- decodeWord case tag of 0 -> pure CompactIndex @@ -468,7 +471,7 @@ instance Encode DiskCachePolicy where <> encodeWord 2 instance DecodeVersioned DiskCachePolicy where - decodeVersioned V0 = do + decodeVersioned _v = do n <- decodeListLen tag <- decodeWord case (n, tag) of @@ -484,7 +487,7 @@ instance Encode MergeSchedule where encode Incremental = encodeWord 1 instance DecodeVersioned MergeSchedule where - decodeVersioned V0 = do + decodeVersioned _v = do tag <- decodeWord case tag of 0 -> pure OneShot @@ -501,7 +504,7 @@ instance Encode r => Encode (SnapLevels r) where encode (SnapLevels levels) = encode levels instance DecodeVersioned r => DecodeVersioned (SnapLevels r) where - decodeVersioned v@V0 = SnapLevels <$> decodeVersioned v + decodeVersioned v = SnapLevels <$> decodeVersioned v -- SnapLevel @@ -513,7 +516,7 @@ instance Encode r => Encode (SnapLevel r) where instance DecodeVersioned r => DecodeVersioned (SnapLevel r) where - decodeVersioned v@V0 = do + decodeVersioned v = do _ <- decodeListLenOf 2 SnapLevel <$> decodeVersioned v <*> decodeVersioned v @@ -531,7 +534,7 @@ instance Encode RunNumber where encode (RunNumber x) = encodeInt x instance DecodeVersioned RunNumber where - decodeVersioned V0 = RunNumber <$> decodeInt + decodeVersioned _v = RunNumber <$> decodeInt -- SnapIncomingRun @@ -549,7 +552,7 @@ instance Encode r => Encode (SnapIncomingRun r) where <> encode x instance DecodeVersioned r => DecodeVersioned (SnapIncomingRun r) where - decodeVersioned v@V0 = do + decodeVersioned v = do n <- decodeListLen tag <- decodeWord case (n, tag) of @@ -566,7 +569,7 @@ instance Encode MergePolicyForLevel where encode LevelLevelling = encodeWord 1 instance DecodeVersioned MergePolicyForLevel where - decodeVersioned V0 = do + decodeVersioned _v = do tag <- decodeWord case tag of 0 -> pure LevelTiering @@ -590,7 +593,7 @@ instance (Encode t, Encode r) => Encode (SnapMergingRun t r) where <> encode mt instance (DecodeVersioned t, DecodeVersioned r) => DecodeVersioned (SnapMergingRun t r) where - decodeVersioned v@V0 = do + decodeVersioned v = do n <- decodeListLen tag <- decodeWord case (n, tag) of @@ -606,25 +609,25 @@ instance Encode NominalDebt where encode (NominalDebt x) = encodeInt x instance DecodeVersioned NominalDebt where - decodeVersioned V0 = NominalDebt <$> decodeInt + decodeVersioned _v = NominalDebt <$> decodeInt instance Encode NominalCredits where encode (NominalCredits x) = encodeInt x instance DecodeVersioned NominalCredits where - decodeVersioned V0 = NominalCredits <$> decodeInt + decodeVersioned _v = NominalCredits <$> decodeInt instance Encode MergeDebt where encode (MergeDebt (MergeCredits x)) = encodeInt x instance DecodeVersioned MergeDebt where - decodeVersioned V0 = (MergeDebt . MergeCredits) <$> decodeInt + decodeVersioned _v = (MergeDebt . MergeCredits) <$> decodeInt instance Encode MergeCredits where encode (MergeCredits x) = encodeInt x instance DecodeVersioned MergeCredits where - decodeVersioned V0 = MergeCredits <$> decodeInt + decodeVersioned _v = MergeCredits <$> decodeInt -- MergeType @@ -633,7 +636,7 @@ instance Encode MR.LevelMergeType where encode MR.MergeLastLevel = encodeWord 1 instance DecodeVersioned MR.LevelMergeType where - decodeVersioned V0 = do + decodeVersioned _v = do tag <- decodeWord case tag of 0 -> pure MR.MergeMidLevel @@ -655,7 +658,7 @@ instance Encode MR.TreeMergeType where encode MR.MergeUnion = encodeWord 2 instance DecodeVersioned MR.TreeMergeType where - decodeVersioned V0 = do + decodeVersioned _v = do tag <- decodeWord case tag of 1 -> pure MR.MergeLevel @@ -672,7 +675,7 @@ instance Encode r => Encode (SnapMergingTree r) where encode (SnapMergingTree tState) = encode tState instance DecodeVersioned r => DecodeVersioned (SnapMergingTree r) where - decodeVersioned ver@V0 = SnapMergingTree <$> decodeVersioned ver + decodeVersioned ver = SnapMergingTree <$> decodeVersioned ver -- SnapMergingTreeState @@ -691,7 +694,7 @@ instance Encode r => Encode (SnapMergingTreeState r) where <> encode smrs instance DecodeVersioned r => DecodeVersioned (SnapMergingTreeState r) where - decodeVersioned v@V0 = do + decodeVersioned v = do n <- decodeListLen tag <- decodeWord case (n, tag) of @@ -714,7 +717,7 @@ instance Encode r => Encode (SnapPendingMerge r) where <> encodeList mts instance DecodeVersioned r => DecodeVersioned (SnapPendingMerge r) where - decodeVersioned v@V0 = do + decodeVersioned v = do n <- decodeListLen tag <- decodeWord case (n, tag) of @@ -735,7 +738,7 @@ instance Encode r => Encode (SnapPreExistingRun r) where <> encode smrs instance DecodeVersioned r => DecodeVersioned (SnapPreExistingRun r) where - decodeVersioned v@V0 = do + decodeVersioned v = do n <- decodeListLen tag <- decodeWord case (n, tag) of @@ -753,7 +756,7 @@ encodeMaybe = \case Just en -> encode en decodeMaybe :: DecodeVersioned a => SnapshotVersion -> Decoder s (Maybe a) -decodeMaybe v@V0 = do +decodeMaybe v = do tok <- peekTokenType case tok of TypeNull -> Nothing <$ decodeNull diff --git a/test/Test/Database/LSMTree/Internal/Snapshot/Codec.hs b/test/Test/Database/LSMTree/Internal/Snapshot/Codec.hs index 39794d02d..7967f3e90 100644 --- a/test/Test/Database/LSMTree/Internal/Snapshot/Codec.hs +++ b/test/Test/Database/LSMTree/Internal/Snapshot/Codec.hs @@ -191,8 +191,9 @@ testAll test = [ -------------------------------------------------------------------------------} instance Arbitrary SnapshotVersion where - arbitrary = elements [V0] + arbitrary = elements [V0, V1] shrink V0 = [] + shrink V1 = [V0] deriving newtype instance Arbitrary a => Arbitrary (Versioned a) diff --git a/test/golden-file-data/snapshot-codec/V1.BloomFilterAlloc.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.BloomFilterAlloc.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..a343dc7668ef92ac0c0d0166a8b3cd8b2e3d632c GIT binary patch literal 11 OcmZo-`0X%(0So{XWCBhA literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.BloomFilterAlloc.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.BloomFilterAlloc.B.snapshot.golden new file mode 100644 index 000000000..51182ace4 --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.BloomFilterAlloc.B.snapshot.golden @@ -0,0 +1 @@ +‚û@ !ûTD- \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.DiskCachePolicy.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.DiskCachePolicy.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..8b040ead36e9d32df7a5d8dfc594b6130bf3fc4a GIT binary patch literal 2 JcmZo<000350D=Gj literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.DiskCachePolicy.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.DiskCachePolicy.B.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..9478b3348f240302173a8b3e6499d16ef858385c GIT binary patch literal 5 McmZo-lw@H500QO!p#T5? literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.DiskCachePolicy.C.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.DiskCachePolicy.C.snapshot.golden new file mode 100644 index 000000000..02b41ff4e --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.DiskCachePolicy.C.snapshot.golden @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.FencePointerIndexType.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.FencePointerIndexType.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.FencePointerIndexType.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.FencePointerIndexType.B.snapshot.golden new file mode 100644 index 000000000..6b2aaa764 --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.FencePointerIndexType.B.snapshot.golden @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.IndexType.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.IndexType.A.snapshot.golden new file mode 100644 index 000000000..6b2aaa764 --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.IndexType.A.snapshot.golden @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.IndexType.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.IndexType.B.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.LevelMergeType.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.LevelMergeType.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.LevelMergeType.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.LevelMergeType.B.snapshot.golden new file mode 100644 index 000000000..6b2aaa764 --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.LevelMergeType.B.snapshot.golden @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.MergeCredits.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.MergeCredits.A.snapshot.golden new file mode 100644 index 000000000..a850a922c --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.MergeCredits.A.snapshot.golden @@ -0,0 +1 @@ +X \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.MergeDebt.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.MergeDebt.A.snapshot.golden new file mode 100644 index 000000000..a850a922c --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.MergeDebt.A.snapshot.golden @@ -0,0 +1 @@ +X \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.MergePolicy.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.MergePolicy.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.MergePolicyForLevel.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.MergePolicyForLevel.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.MergePolicyForLevel.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.MergePolicyForLevel.B.snapshot.golden new file mode 100644 index 000000000..6b2aaa764 --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.MergePolicyForLevel.B.snapshot.golden @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.MergeSchedule.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.MergeSchedule.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.MergeSchedule.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.MergeSchedule.B.snapshot.golden new file mode 100644 index 000000000..6b2aaa764 --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.MergeSchedule.B.snapshot.golden @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.NominalCredits.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.NominalCredits.A.snapshot.golden new file mode 100644 index 000000000..d9ba7315a --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.NominalCredits.A.snapshot.golden @@ -0,0 +1 @@ +* \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.NominalDebt.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.NominalDebt.A.snapshot.golden new file mode 100644 index 000000000..a850a922c --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.NominalDebt.A.snapshot.golden @@ -0,0 +1 @@ +X \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.RunBloomFilterAlloc.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.RunBloomFilterAlloc.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..a343dc7668ef92ac0c0d0166a8b3cd8b2e3d632c GIT binary patch literal 11 OcmZo-`0X%(0So{XWCBhA literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.RunBloomFilterAlloc.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.RunBloomFilterAlloc.B.snapshot.golden new file mode 100644 index 000000000..51182ace4 --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.RunBloomFilterAlloc.B.snapshot.golden @@ -0,0 +1 @@ +‚û@ !ûTD- \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.RunDataCaching.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.RunDataCaching.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.RunDataCaching.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.RunDataCaching.B.snapshot.golden new file mode 100644 index 000000000..6b2aaa764 --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.RunDataCaching.B.snapshot.golden @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.RunNumber.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.RunNumber.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..912f823fa811bbce28445b08cc596f84816f33f2 GIT binary patch literal 3 Kcmb1SVE_OCRsbFV literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.RunParams.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.RunParams.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..f7584567ef76fdd9a0dd9ec1f55a3cd1025f2eaa GIT binary patch literal 15 ScmZo+U}$3a?J$7>3>X0(astx; literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SizeRatio.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SizeRatio.A.snapshot.golden new file mode 100644 index 000000000..45a8ca02b --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.SizeRatio.A.snapshot.golden @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.SnapIncomingRun_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapIncomingRun_SnapshotRun.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..faaa76a549a2f5201e6053b6997e7ef6b7d2f489 GIT binary patch literal 18 XcmZo=V33HA&}s&fEew(@3=E6_AU6Wd literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapIncomingRun_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapIncomingRun_SnapshotRun.B.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..2349a199b436d69a71abde697f6f833ac620b6f0 GIT binary patch literal 9 QcmZo-Y+;aOVPIec00_+iCIA2c literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapLevel_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapLevel_SnapshotRun.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..cefd88665e03b42e923058e0a6a661f6fc3567e3 GIT binary patch literal 34 dcmZo_WnhqqkkD!dk}V99EDQ{cO;8$20RV5`1r-1Q literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapLevels_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapLevels_SnapshotRun.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..5caceeea0c920b7cdebfe297ec06a4ab2327701a GIT binary patch literal 69 gcmZo_YGq)Mh>*}~29hldk}M1ij7?A)#wDl%0Iz@ws{jB1 literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapMergingRun_LevelMergeType_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapMergingRun_LevelMergeType_SnapshotRun.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..c5e3a478e1ab246b3e835ef420a034b8a09f5316 GIT binary patch literal 11 ScmZo>kcenukYr(CU<3dSZUL46 literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapMergingRun_LevelMergeType_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapMergingRun_LevelMergeType_SnapshotRun.B.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..45fce58d678202cfa5f370def0c688d6ddd3ed04 GIT binary patch literal 35 icmZo=Y++z%V)*SafdLE{B_f(y7$jL37=RLBiU9zDNd=Dp literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapMergingRun_TreeMergeType_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapMergingRun_TreeMergeType_SnapshotRun.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..c5e3a478e1ab246b3e835ef420a034b8a09f5316 GIT binary patch literal 11 ScmZo>kcenukYr(CU<3dSZUL46 literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapMergingRun_TreeMergeType_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapMergingRun_TreeMergeType_SnapshotRun.B.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..d2f7f733d95e9cf31e6332907542c8562c5d500b GIT binary patch literal 35 icmZo=Y++z%V)*SafdLE{B_f(y7$jL37=RLBiV*;TN(GPr literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapMergingTreeState_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapMergingTreeState_SnapshotRun.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..5d414b1ccde2c97198a85afb324197d17c7f27c8 GIT binary patch literal 9 QcmZo-Xkn0KVPIec00_kaB>(^b literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapMergingTreeState_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapMergingTreeState_SnapshotRun.B.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..dfbec2ab17fee413876e0bea9c7f2e83126996b7 GIT binary patch literal 32 XcmZo-Y-VU`VrXHIWMN=n#AE;fXAuPT literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapMergingTreeState_SnapshotRun.C.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapMergingTreeState_SnapshotRun.C.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..1518556e937be2b10b5e0a2c51b0b1b34cb31851 GIT binary patch literal 13 UcmZo-YG#m#Xkn0KVPIec02Cqu8UO$Q literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapMergingTree_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapMergingTree_SnapshotRun.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..5d414b1ccde2c97198a85afb324197d17c7f27c8 GIT binary patch literal 9 QcmZo-Xkn0KVPIec00_kaB>(^b literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapPendingMerge_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapPendingMerge_SnapshotRun.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..6b7ddb15e0f7a4cefd95cf3ea661fcbb4429cd1a GIT binary patch literal 30 VcmZo>Xli0;VUT2DU|_^#003671abfX literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapPendingMerge_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapPendingMerge_SnapshotRun.B.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..906de5599898076b2c81cfa8ab5e11ba684bfa33 GIT binary patch literal 21 UcmZo-Y-(a?VUT2DU|@tZ05CNJP5=M^ literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapPreExistingRun_SnapshotRun.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapPreExistingRun_SnapshotRun.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..5d414b1ccde2c97198a85afb324197d17c7f27c8 GIT binary patch literal 9 QcmZo-Xkn0KVPIec00_kaB>(^b literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapPreExistingRun_SnapshotRun.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapPreExistingRun_SnapshotRun.B.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..e228e9c1677c1af53a703377c5bad7f2d59701f7 GIT binary patch literal 13 UcmZo-Y-W&%Xkn0KVPIec02CGi82|tP literal 0 HcmV?d00001 diff --git a/test/golden-file-data/snapshot-codec/V1.SnapshotLabel.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapshotLabel.A.snapshot.golden new file mode 100644 index 000000000..97651bcee --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.SnapshotLabel.A.snapshot.golden @@ -0,0 +1 @@ +qUserProvidedLabel \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.SnapshotLabel.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapshotLabel.B.snapshot.golden new file mode 100644 index 000000000..64845fb76 --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.SnapshotLabel.B.snapshot.golden @@ -0,0 +1 @@ +` \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.SnapshotMetaData.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapshotMetaData.A.snapshot.golden new file mode 100644 index 0000000000000000000000000000000000000000..3ab235bc5947295f50626b1bcd09fb7c3f6e228a GIT binary patch literal 122 zcmZoI3@uJA3Mk4i%S=g4@kvZd&1q*~U}<8Ih-hN??J$7>0vZ`4Ss0p{S{Z=i5?akb TvIQiP0+ayr literal 0 HcmV?d00001 From 6d4610d088c4e0f1f51079515ae76220d7315372 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 17 Jun 2025 12:11:25 +0100 Subject: [PATCH 4/6] Make the MergeBatchSize an adjustable parameter in TableConfig Previously it was hard coded to be the same as the write buffer size. Document what it means as a new tunable parameter. Setting this low (1) is important for getting good parallel work balance on the pipelined WP8 benchmark. It is a crucial change that makes the pipelined version actually improve performance. Previously it would only get about a 5 to 10% improvement. --- README.md | 33 +++++++++ lsm-tree.cabal | 31 ++++++++ .../Database/LSMTree/Extras/NoThunks.hs | 3 + src/Database/LSMTree.hs | 11 +-- src/Database/LSMTree/Internal/Config.hs | 68 +++++++++++++++++- .../LSMTree/Internal/Config/Override.hs | 12 +--- src/Database/LSMTree/Internal/IncomingRun.hs | 7 -- .../LSMTree/Internal/Snapshot/Codec.hs | 31 +++++++- src/Database/LSMTree/Simple.hs | 7 +- .../LSMTree/Internal/Snapshot/Codec.hs | 14 ++-- .../LSMTree/Internal/Snapshot/Codec/Golden.hs | 12 +++- test/Test/Database/LSMTree/StateMachine.hs | 6 +- test/Test/Database/LSMTree/StateMachine/DL.hs | 7 +- .../V1.MergeBatchSize.A.snapshot.golden | 1 + .../V1.MergeBatchSize.B.snapshot.golden | 1 + .../V1.SnapshotMetaData.A.snapshot.golden | Bin 122 -> 123 bytes .../V1.TableConfig.A.snapshot.golden | Bin 22 -> 23 bytes 17 files changed, 202 insertions(+), 42 deletions(-) create mode 100644 test/golden-file-data/snapshot-codec/V1.MergeBatchSize.A.snapshot.golden create mode 100644 test/golden-file-data/snapshot-codec/V1.MergeBatchSize.B.snapshot.golden diff --git a/README.md b/README.md index 795aea628..dff6c849f 100644 --- a/README.md +++ b/README.md @@ -356,6 +356,12 @@ The *disk cache policy* determines if lookup operations use the OS page cache. Caching may improve the performance of lookups and updates if database access follows certain patterns. +`confMergeBatchSize` +The merge batch size balances the maximum latency of individual update +operations, versus the latency of a sequence of update operations. +Bigger batches improves overall performance but some updates will take a +lot longer than others. The default is to use a large batch size. + ##### Fine-tuning: Merge Policy, Size Ratio, and Write Buffer Size The configuration parameters `confMergePolicy`, `confSizeRatio`, and @@ -647,6 +653,33 @@ locality if it is likely to access entries that have nearby keys. does not have good spatial or temporal locality. For instance, if the access pattern is uniformly random. +##### Fine-tuning: Merge Batch Size + +The *merge batch size* is a micro-tuning parameter, and in most cases +you do need to think about it and can leave it at its default. + +When using the `Incremental` merge schedule, merging is done in batches. +This is a trade-off: larger batches tends to mean better overall +performance but the downside is that while most updates (inserts, +deletes, upserts) are fast, some are slower (when a batch of merging +work has to be done). + +If you care most about the maximum latency of updates, then use a small +batch size. If you don't care about latency of individual operations, +just the latency of the overall sequence of operations then use a large +batch size. The default is to use a large batch size, the same size as +the write buffer itself. The minimum batch size is 1. The maximum batch +size is the size of the write buffer `confWriteBufferAlloc`. + +Note that the actual batch size is the minimum of this configuration +parameter and the size of the batch of operations performed (e.g. +`inserts`). So if you consistently use large batches, you can use a +batch size of 1 and the merge batch size will always be determined by +the operation batch size. + +A further reason why it may be preferable to use minimal batch sizes is +to get good parallel work balance, when using parallelism. + ### References The implementation of LSM-trees in this package draws inspiration from: diff --git a/lsm-tree.cabal b/lsm-tree.cabal index f05d07216..bbcd15ce9 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -183,6 +183,12 @@ description: The /disk cache policy/ determines if lookup operations use the OS page cache. Caching may improve the performance of lookups and updates if database access follows certain patterns. + [@confMergeBatchSize@] + The merge batch size balances the maximum latency of individual update + operations, versus the latency of a sequence of update operations. Bigger + batches improves overall performance but some updates will take a lot + longer than others. The default is to use a large batch size. + ==== Fine-tuning: Merge Policy, Size Ratio, and Write Buffer Size #fine_tuning_data_layout# The configuration parameters @confMergePolicy@, @confSizeRatio@, and @confWriteBufferAlloc@ affect how the table organises its data. @@ -429,6 +435,31 @@ description: * Use the @DiskCacheNone@ policy if the database's access pattern has does not have good spatial or temporal locality. For instance, if the access pattern is uniformly random. + ==== Fine-tuning: Merge Batch Size #fine_tuning_merge_batch_size# + + The /merge batch size/ is a micro-tuning parameter, and in most cases you do + need to think about it and can leave it at its default. + + When using the 'Incremental' merge schedule, merging is done in batches. This + is a trade-off: larger batches tends to mean better overall performance but the + downside is that while most updates (inserts, deletes, upserts) are fast, some + are slower (when a batch of merging work has to be done). + + If you care most about the maximum latency of updates, then use a small batch + size. If you don't care about latency of individual operations, just the + latency of the overall sequence of operations then use a large batch size. The + default is to use a large batch size, the same size as the write buffer itself. + The minimum batch size is 1. The maximum batch size is the size of the write + buffer 'confWriteBufferAlloc'. + + Note that the actual batch size is the minimum of this configuration + parameter and the size of the batch of operations performed (e.g. 'inserts'). + So if you consistently use large batches, you can use a batch size of 1 and + the merge batch size will always be determined by the operation batch size. + + A further reason why it may be preferable to use minimal batch sizes is to get + good parallel work balance, when using parallelism. + == References The implementation of LSM-trees in this package draws inspiration from: diff --git a/src-extras/Database/LSMTree/Extras/NoThunks.hs b/src-extras/Database/LSMTree/Extras/NoThunks.hs index f9d2f7b87..8e9d36857 100644 --- a/src-extras/Database/LSMTree/Extras/NoThunks.hs +++ b/src-extras/Database/LSMTree/Extras/NoThunks.hs @@ -659,6 +659,9 @@ deriving anyclass instance NoThunks DiskCachePolicy deriving stock instance Generic MergeSchedule deriving anyclass instance NoThunks MergeSchedule +deriving stock instance Generic MergeBatchSize +deriving anyclass instance NoThunks MergeBatchSize + {------------------------------------------------------------------------------- RWVar -------------------------------------------------------------------------------} diff --git a/src/Database/LSMTree.hs b/src/Database/LSMTree.hs index 04cd1c412..8fdcd4ca6 100644 --- a/src/Database/LSMTree.hs +++ b/src/Database/LSMTree.hs @@ -109,7 +109,8 @@ module Database.LSMTree ( confBloomFilterAlloc, confFencePointerIndex, confDiskCachePolicy, - confMergeSchedule + confMergeSchedule, + confMergeBatchSize ), defaultTableConfig, MergePolicy (LazyLevelling), @@ -119,6 +120,7 @@ module Database.LSMTree ( BloomFilterAlloc (AllocFixed, AllocRequestFPR), FencePointerIndexType (OrdinaryIndex, CompactIndex), DiskCachePolicy (..), + MergeBatchSize (..), -- ** Table Configuration Overrides #table_configuration_overrides# OverrideDiskCachePolicy (..), @@ -214,9 +216,10 @@ import qualified Database.LSMTree.Internal.BlobRef as Internal import Database.LSMTree.Internal.Config (BloomFilterAlloc (AllocFixed, AllocRequestFPR), DiskCachePolicy (..), FencePointerIndexType (..), - LevelNo (..), MergePolicy (..), MergeSchedule (..), - SizeRatio (..), TableConfig (..), WriteBufferAlloc (..), - defaultTableConfig, serialiseKeyMinimalSize) + LevelNo (..), MergeBatchSize (..), MergePolicy (..), + MergeSchedule (..), SizeRatio (..), TableConfig (..), + WriteBufferAlloc (..), defaultTableConfig, + serialiseKeyMinimalSize) import Database.LSMTree.Internal.Config.Override (OverrideDiskCachePolicy (..)) import Database.LSMTree.Internal.Entry (NumEntries (..)) diff --git a/src/Database/LSMTree/Internal/Config.hs b/src/Database/LSMTree/Internal/Config.hs index f0aa6b83f..15d405fdd 100644 --- a/src/Database/LSMTree/Internal/Config.hs +++ b/src/Database/LSMTree/Internal/Config.hs @@ -26,12 +26,16 @@ module Database.LSMTree.Internal.Config ( , diskCachePolicyForLevel -- * Merge schedule , MergeSchedule (..) + -- * Merge batch size + , MergeBatchSize (..) + , creditThresholdForLevel ) where import Control.DeepSeq (NFData (..)) import Database.LSMTree.Internal.Index (IndexType) import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact, Ordinary)) +import qualified Database.LSMTree.Internal.MergingRun as MR import qualified Database.LSMTree.Internal.RawBytes as RB import Database.LSMTree.Internal.Run (RunDataCaching (..)) import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..)) @@ -90,6 +94,12 @@ For a detailed discussion of fine-tuning the table configuration, see [Fine-tuni [@confDiskCachePolicy :: t'DiskCachePolicy'@] The /disk cache policy/ supports caching lookup operations using the OS page cache. Caching may improve the performance of lookups and updates if database access follows certain patterns. + +[@confMergeBatchSize :: t'MergeBatchSize'@] + The merge batch size balances the maximum latency of individual update + operations, versus the latency of a sequence of update operations. Bigger + batches improves overall performance but some updates will take a lot + longer than others. The default is to use a large batch size. -} data TableConfig = TableConfig { confMergePolicy :: !MergePolicy @@ -99,12 +109,14 @@ data TableConfig = TableConfig { , confBloomFilterAlloc :: !BloomFilterAlloc , confFencePointerIndex :: !FencePointerIndexType , confDiskCachePolicy :: !DiskCachePolicy + , confMergeBatchSize :: !MergeBatchSize } deriving stock (Show, Eq) instance NFData TableConfig where - rnf (TableConfig a b c d e f g) = - rnf a `seq` rnf b `seq` rnf c `seq` rnf d `seq` rnf e `seq` rnf f `seq` rnf g + rnf (TableConfig a b c d e f g h) = + rnf a `seq` rnf b `seq` rnf c `seq` rnf d `seq` + rnf e `seq` rnf f `seq` rnf g `seq` rnf h -- | The 'defaultTableConfig' defines reasonable defaults for all 'TableConfig' parameters. -- @@ -122,6 +134,8 @@ instance NFData TableConfig where -- OrdinaryIndex -- >>> confDiskCachePolicy defaultTableConfig -- DiskCacheAll +-- >>> confMergeBatchSize defaultTableConfig +-- MergeBatchSize 20000 -- defaultTableConfig :: TableConfig defaultTableConfig = @@ -133,6 +147,7 @@ defaultTableConfig = , confBloomFilterAlloc = AllocRequestFPR 1.0e-3 , confFencePointerIndex = OrdinaryIndex , confDiskCachePolicy = DiskCacheAll + , confMergeBatchSize = MergeBatchSize 20_000 -- same as write buffer } data RunLevelNo = RegularLevel LevelNo | UnionLevel @@ -238,6 +253,8 @@ data MergeSchedule = The 'Incremental' merge schedule spreads out the merging work over time. This is less efficient than the 'OneShot' merge schedule, but has a consistent workload. Using the 'Incremental' merge schedule, the worst-case disk I\/O complexity of the update operations is /logarithmic/ in the size of the table. + This 'Incremental' merge schedule still uses batching to improve performance. + The batch size can be controlled using the 'MergeBatchSize'. -} | Incremental deriving stock (Eq, Show) @@ -385,3 +402,50 @@ diskCachePolicyForLevel policy levelNo = RegularLevel l | l <= LevelNo n -> CacheRunData | otherwise -> NoCacheRunData UnionLevel -> NoCacheRunData + +{------------------------------------------------------------------------------- + Merge batch size +-------------------------------------------------------------------------------} + +{- | +The /merge batch size/ is a micro-tuning parameter, and in most cases you do +need to think about it and can leave it at its default. + +When using the 'Incremental' merge schedule, merging is done in batches. This +is a trade-off: larger batches tends to mean better overall performance but the +downside is that while most updates (inserts, deletes, upserts) are fast, some +are slower (when a batch of merging work has to be done). + +If you care most about the maximum latency of updates, then use a small batch +size. If you don't care about latency of individual operations, just the +latency of the overall sequence of operations then use a large batch size. The +default is to use a large batch size, the same size as the write buffer itself. +The minimum batch size is 1. The maximum batch size is the size of the write +buffer 'confWriteBufferAlloc'. + +Note that the actual batch size is the minimum of this configuration +parameter and the size of the batch of operations performed (e.g. 'inserts'). +So if you consistently use large batches, you can use a batch size of 1 and +the merge batch size will always be determined by the operation batch size. + +A further reason why it may be preferable to use minimal batch sizes is to get +good parallel work balance, when using parallelism. +-} +newtype MergeBatchSize = MergeBatchSize Int + deriving stock (Show, Eq, Ord) + deriving newtype (NFData) + +-- TODO: the thresholds for doing merge work should be different for each level, +-- and ideally all-pairs co-prime. +creditThresholdForLevel :: TableConfig -> LevelNo -> MR.CreditThreshold +creditThresholdForLevel TableConfig { + confMergeBatchSize = MergeBatchSize mergeBatchSz, + confWriteBufferAlloc = AllocNumEntries writeBufferSz + } + (LevelNo _i) = + MR.CreditThreshold + . MR.UnspentCredits + . MR.MergeCredits + . max 1 + . min writeBufferSz + $ mergeBatchSz diff --git a/src/Database/LSMTree/Internal/Config/Override.hs b/src/Database/LSMTree/Internal/Config/Override.hs index a2e7d5877..ad441f974 100644 --- a/src/Database/LSMTree/Internal/Config/Override.hs +++ b/src/Database/LSMTree/Internal/Config/Override.hs @@ -91,16 +91,8 @@ instance Override DiskCachePolicy SnapshotMetaData where in fmap (override rdc) smt instance Override DiskCachePolicy TableConfig where - override confDiskCachePolicy' TableConfig {..} - = TableConfig - { confMergePolicy, - confMergeSchedule, - confSizeRatio, - confWriteBufferAlloc, - confBloomFilterAlloc, - confFencePointerIndex, - confDiskCachePolicy = confDiskCachePolicy' - } + override confDiskCachePolicy' tc = + tc { confDiskCachePolicy = confDiskCachePolicy' } instance Override DiskCachePolicy (SnapLevels SnapshotRun) where override dcp (SnapLevels (vec :: V.Vector (SnapLevel SnapshotRun))) = diff --git a/src/Database/LSMTree/Internal/IncomingRun.hs b/src/Database/LSMTree/Internal/IncomingRun.hs index 70f9222a6..665a126f2 100644 --- a/src/Database/LSMTree/Internal/IncomingRun.hs +++ b/src/Database/LSMTree/Internal/IncomingRun.hs @@ -218,13 +218,6 @@ supplyCreditsIncomingRun conf ln (Merging _ nominalDebt nominalCreditsVar mr) -- use atomic operations for its counters). We could potentially simplify -- MergingRun by dispensing with batching for the MergeCredits counters. --- TODO: the thresholds for doing merge work should be different for each level, --- maybe co-prime? -creditThresholdForLevel :: TableConfig -> LevelNo -> MR.CreditThreshold -creditThresholdForLevel conf (LevelNo _i) = - let AllocNumEntries x = confWriteBufferAlloc conf - in MR.CreditThreshold (MR.UnspentCredits (MergeCredits x)) - -- | Deposit nominal credits in the local credits var, ensuring the total -- credits does not exceed the total debt. -- diff --git a/src/Database/LSMTree/Internal/Snapshot/Codec.hs b/src/Database/LSMTree/Internal/Snapshot/Codec.hs index 5c2ba18c4..89aa28d66 100644 --- a/src/Database/LSMTree/Internal/Snapshot/Codec.hs +++ b/src/Database/LSMTree/Internal/Snapshot/Codec.hs @@ -294,9 +294,10 @@ instance Encode TableConfig where , confBloomFilterAlloc = bloomFilterAlloc , confFencePointerIndex = fencePointerIndex , confDiskCachePolicy = diskCachePolicy + , confMergeBatchSize = mergeBatchSize } ) = - encodeListLen 7 + encodeListLen 8 <> encode mergePolicy <> encode mergeSchedule <> encode sizeRatio @@ -304,10 +305,11 @@ instance Encode TableConfig where <> encode bloomFilterAlloc <> encode fencePointerIndex <> encode diskCachePolicy + <> encode mergeBatchSize instance DecodeVersioned TableConfig where - decodeVersioned v = do - _ <- decodeListLenOf 7 + decodeVersioned v@V0 = do + decodeListLenOf 7 confMergePolicy <- decodeVersioned v confMergeSchedule <- decodeVersioned v confSizeRatio <- decodeVersioned v @@ -315,6 +317,21 @@ instance DecodeVersioned TableConfig where confBloomFilterAlloc <- decodeVersioned v confFencePointerIndex <- decodeVersioned v confDiskCachePolicy <- decodeVersioned v + let confMergeBatchSize = case confWriteBufferAlloc of + AllocNumEntries n -> MergeBatchSize n + pure TableConfig {..} + + -- We introduced the confMergeBatchSize in V1 + decodeVersioned v@V1 = do + decodeListLenOf 8 + confMergePolicy <- decodeVersioned v + confMergeSchedule <- decodeVersioned v + confSizeRatio <- decodeVersioned v + confWriteBufferAlloc <- decodeVersioned v + confBloomFilterAlloc <- decodeVersioned v + confFencePointerIndex <- decodeVersioned v + confDiskCachePolicy <- decodeVersioned v + confMergeBatchSize <- decodeVersioned v pure TableConfig {..} -- MergePolicy @@ -494,6 +511,14 @@ instance DecodeVersioned MergeSchedule where 1 -> pure Incremental _ -> fail ("[MergeSchedule] Unexpected tag: " <> show tag) +-- MergeBatchSize + +instance Encode MergeBatchSize where + encode (MergeBatchSize n) = encodeInt n + +instance DecodeVersioned MergeBatchSize where + decodeVersioned _v = MergeBatchSize <$> decodeInt + {------------------------------------------------------------------------------- Encoding and decoding: SnapLevels -------------------------------------------------------------------------------} diff --git a/src/Database/LSMTree/Simple.hs b/src/Database/LSMTree/Simple.hs index 67c4545a1..7a22ab554 100644 --- a/src/Database/LSMTree/Simple.hs +++ b/src/Database/LSMTree/Simple.hs @@ -111,6 +111,7 @@ module Database.LSMTree.Simple ( FencePointerIndexType (OrdinaryIndex, CompactIndex), DiskCachePolicy (..), MergeSchedule (..), + MergeBatchSize (..), -- ** Table Configuration Overrides #table_configuration_overrides# OverrideDiskCachePolicy (..), @@ -165,9 +166,9 @@ import Data.Vector (Vector) import Data.Void (Void) import Database.LSMTree (BloomFilterAlloc, CursorClosedError (..), DiskCachePolicy, FencePointerIndexType, - InvalidSnapshotNameError (..), MergePolicy, MergeSchedule, - OverrideDiskCachePolicy (..), Range (..), RawBytes, - ResolveAsFirst (..), SerialiseKey (..), + InvalidSnapshotNameError (..), MergeBatchSize, MergePolicy, + MergeSchedule, OverrideDiskCachePolicy (..), Range (..), + RawBytes, ResolveAsFirst (..), SerialiseKey (..), SerialiseKeyOrderPreserving, SerialiseValue (..), SessionClosedError (..), SizeRatio, SnapshotCorruptedError (..), diff --git a/test/Test/Database/LSMTree/Internal/Snapshot/Codec.hs b/test/Test/Database/LSMTree/Internal/Snapshot/Codec.hs index 7967f3e90..a5a1731e2 100644 --- a/test/Test/Database/LSMTree/Internal/Snapshot/Codec.hs +++ b/test/Test/Database/LSMTree/Internal/Snapshot/Codec.hs @@ -231,11 +231,11 @@ instance Arbitrary SnapshotRun where instance Arbitrary TableConfig where arbitrary = - TableConfig <$> arbitrary <*> arbitrary <*> arbitrary - <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary - shrink (TableConfig a b c d e f g) = - [ TableConfig a' b' c' d' e' f' g' - | (a', b', c', d', e', f', g') <- shrink (a, b, c, d, e, f, g) ] + TableConfig <$> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary + <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary + shrink (TableConfig a b c d e f g h) = + [ TableConfig a' b' c' d' e' f' g' h' + | (a', b', c', d', e', f', g', h') <- shrink (a, b, c, d, e, f, g, h) ] instance Arbitrary MergePolicy where arbitrary = pure LazyLevelling @@ -274,6 +274,10 @@ instance Arbitrary MergeSchedule where arbitrary = elements [OneShot, Incremental] shrink _ = [] +instance Arbitrary MergeBatchSize where + arbitrary = MergeBatchSize <$> arbitrary + shrink (MergeBatchSize n) = map MergeBatchSize (shrink n) + {------------------------------------------------------------------------------- Arbitrary: SnapLevels -------------------------------------------------------------------------------} diff --git a/test/Test/Database/LSMTree/Internal/Snapshot/Codec/Golden.hs b/test/Test/Database/LSMTree/Internal/Snapshot/Codec/Golden.hs index 8092d8c72..6c0c1cca3 100644 --- a/test/Test/Database/LSMTree/Internal/Snapshot/Codec/Golden.hs +++ b/test/Test/Database/LSMTree/Internal/Snapshot/Codec/Golden.hs @@ -15,8 +15,8 @@ import Data.Typeable import qualified Data.Vector as V import Database.LSMTree.Internal.Config (BloomFilterAlloc (..), DiskCachePolicy (..), FencePointerIndexType (..), - MergePolicy (..), MergeSchedule (..), SizeRatio (..), - TableConfig (..), WriteBufferAlloc (..)) + MergeBatchSize (..), MergePolicy (..), MergeSchedule (..), + SizeRatio (..), TableConfig (..), WriteBufferAlloc (..)) import Database.LSMTree.Internal.MergeSchedule (MergePolicyForLevel (..), NominalCredits (..), NominalDebt (..)) @@ -143,6 +143,7 @@ forallSnapshotTypes f = [ , f (Proxy @FencePointerIndexType) , f (Proxy @DiskCachePolicy) , f (Proxy @MergeSchedule) + , f (Proxy @MergeBatchSize) -- SnapLevels , f (Proxy @(SnapLevels SnapshotRun)) , f (Proxy @(SnapLevel SnapshotRun)) @@ -276,7 +277,8 @@ instance EnumGolden SnapshotLabel where SnapshotLabel{} -> () instance EnumGolden TableConfig where - singGolden = TableConfig singGolden singGolden singGolden singGolden singGolden singGolden singGolden + singGolden = TableConfig singGolden singGolden singGolden singGolden + singGolden singGolden singGolden singGolden where _coveredAllCases = \case TableConfig{} -> () @@ -329,6 +331,10 @@ instance EnumGolden MergeSchedule where OneShot{} -> () Incremental{} -> () +instance EnumGolden MergeBatchSize where + enumGolden = map MergeBatchSize [ 1, 1000 ] + supportedVersions _ = [V1] + instance EnumGolden (SnapLevels SnapshotRun) where singGolden = SnapLevels singGolden where diff --git a/test/Test/Database/LSMTree/StateMachine.hs b/test/Test/Database/LSMTree/StateMachine.hs index 109d98434..ee93f0436 100644 --- a/test/Test/Database/LSMTree/StateMachine.hs +++ b/test/Test/Database/LSMTree/StateMachine.hs @@ -94,8 +94,7 @@ import qualified Database.LSMTree.Class as Class import Database.LSMTree.Extras (showPowersOf) import Database.LSMTree.Extras.Generators (KeyForIndexCompact) import Database.LSMTree.Extras.NoThunks (propNoThunks) -import qualified Database.LSMTree.Internal.Config as R - (TableConfig (TableConfig)) +import qualified Database.LSMTree.Internal.Config as R (TableConfig (..)) import Database.LSMTree.Internal.Serialise (SerialisedBlob, SerialisedValue) import qualified Database.LSMTree.Internal.Types as R.Types @@ -226,6 +225,8 @@ instance Arbitrary R.TableConfig where ] confWriteBufferAlloc <- QC.arbitrary confFencePointerIndex <- QC.arbitrary + confMergeBatchSize <- QC.sized $ \sz -> + R.MergeBatchSize <$> QC.chooseInt (1, sz) pure $ R.TableConfig { R.confMergePolicy = R.LazyLevelling , R.confSizeRatio = R.Four @@ -234,6 +235,7 @@ instance Arbitrary R.TableConfig where , confFencePointerIndex , R.confDiskCachePolicy = R.DiskCacheNone , confMergeSchedule + , confMergeBatchSize } shrink R.TableConfig{..} = diff --git a/test/Test/Database/LSMTree/StateMachine/DL.hs b/test/Test/Database/LSMTree/StateMachine/DL.hs index 82750a560..0e452bff6 100644 --- a/test/Test/Database/LSMTree/StateMachine/DL.hs +++ b/test/Test/Database/LSMTree/StateMachine/DL.hs @@ -13,8 +13,7 @@ import Control.Tracer import qualified Data.Map.Strict as Map import qualified Data.Vector as V import Database.LSMTree as R -import qualified Database.LSMTree.Internal.Config as R - (TableConfig (TableConfig)) +import qualified Database.LSMTree.Internal.Config as R (TableConfig (..)) import qualified Database.LSMTree.Model.Session as Model (fromSomeTable, tables) import qualified Database.LSMTree.Model.Table as Model (values) import Prelude @@ -75,7 +74,9 @@ dl_example = do , confBloomFilterAlloc = AllocFixed 10 , confFencePointerIndex = OrdinaryIndex , confDiskCachePolicy = DiskCacheNone - , confMergeSchedule = OneShot }) + , confMergeSchedule = OneShot + , confMergeBatchSize = MergeBatchSize 4 + }) let kvs :: Map.Map Key Value kvs = Map.fromList $ QC.unGen (QC.vectorOf 37 $ (,) <$> QC.arbitrary <*> QC.arbitrary) diff --git a/test/golden-file-data/snapshot-codec/V1.MergeBatchSize.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.MergeBatchSize.A.snapshot.golden new file mode 100644 index 000000000..6b2aaa764 --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.MergeBatchSize.A.snapshot.golden @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.MergeBatchSize.B.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.MergeBatchSize.B.snapshot.golden new file mode 100644 index 000000000..fda74fd9c --- /dev/null +++ b/test/golden-file-data/snapshot-codec/V1.MergeBatchSize.B.snapshot.golden @@ -0,0 +1 @@ +è \ No newline at end of file diff --git a/test/golden-file-data/snapshot-codec/V1.SnapshotMetaData.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.SnapshotMetaData.A.snapshot.golden index 3ab235bc5947295f50626b1bcd09fb7c3f6e228a..e6cba0b493dc5d87f618a658dd2b4bcca27d5c41 100644 GIT binary patch delta 55 zcmb=b)@v;cElw>8D9SI(Oi4}gNlZ%3>0n@BX=0FwXkz&7Fo6LA8W|WRSs0oo`YTTi G&;kJd!w=8^ delta 54 zcmb=f(rqmaElw>8D9SI(Oi4}gNlZ%3X=h+yX=0FwXkz&7Fo6LA8W|*67@8&qC{GO3 F0s!$!56=Jq diff --git a/test/golden-file-data/snapshot-codec/V1.TableConfig.A.snapshot.golden b/test/golden-file-data/snapshot-codec/V1.TableConfig.A.snapshot.golden index 0020ff3e5602a4af511c255f6de59370e0bb141c..77830a93309a3bb9f5074b6308def73728a30611 100644 GIT binary patch delta 9 QcmWd=XY82BD9XqP00-FtU;qFB delta 7 OcmWd^V{D(uC<*`t7Xe)W From cc50eb7ab53ed260534d1cf01444b6ff8e53d8b5 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 17 Jun 2025 12:39:33 +0100 Subject: [PATCH 5/6] Generalise OverrideDiskCachePolicy to TableConfigOverride And add MergeBatchSize to TableConfigOverride. --- bench/macro/lsm-tree-bench-wp8.hs | 13 ++- src/Database/LSMTree.hs | 15 ++-- .../LSMTree/Internal/Config/Override.hs | 81 ++++++++++++++----- src/Database/LSMTree/Internal/Unsafe.hs | 12 +-- src/Database/LSMTree/Simple.hs | 21 ++--- .../Database/LSMTree/Internal/Snapshot/FS.hs | 4 +- 6 files changed, 97 insertions(+), 49 deletions(-) diff --git a/bench/macro/lsm-tree-bench-wp8.hs b/bench/macro/lsm-tree-bench-wp8.hs index 5870c933d..90eb413eb 100644 --- a/bench/macro/lsm-tree-bench-wp8.hs +++ b/bench/macro/lsm-tree-bench-wp8.hs @@ -185,8 +185,11 @@ mkTableConfigRun GlobalOpts{diskCachePolicy} conf = conf { LSM.confDiskCachePolicy = diskCachePolicy } -mkOverrideDiskCachePolicy :: GlobalOpts -> LSM.OverrideDiskCachePolicy -mkOverrideDiskCachePolicy GlobalOpts{diskCachePolicy} = LSM.OverrideDiskCachePolicy diskCachePolicy +mkOverrideDiskCachePolicy :: GlobalOpts -> LSM.TableConfigOverride +mkOverrideDiskCachePolicy GlobalOpts{diskCachePolicy} = + LSM.noTableConfigOverride { + LSM.overrideDiskCachePolicy = Just diskCachePolicy + } mkTracer :: GlobalOpts -> Tracer IO LSM.LSMTreeTrace mkTracer gopts @@ -582,8 +585,10 @@ doRun gopts opts = do -- reference version starts with empty (as it's not practical or -- necessary for testing to load the whole snapshot). tbl <- if check opts - then LSM.newTableWith @IO @K @V @B (mkTableConfigRun gopts benchTableConfig) session - else LSM.openTableFromSnapshotWith @IO @K @V @B (mkOverrideDiskCachePolicy gopts) session name label + then let conf = mkTableConfigRun gopts benchTableConfig + in LSM.newTableWith @IO @K @V @B conf session + else let conf = mkOverrideDiskCachePolicy gopts + in LSM.openTableFromSnapshotWith @IO @K @V @B conf session name label -- In checking mode, compare each output against a pure reference. checkvar <- newIORef $ pureReference diff --git a/src/Database/LSMTree.hs b/src/Database/LSMTree.hs index 8fdcd4ca6..e53fac48f 100644 --- a/src/Database/LSMTree.hs +++ b/src/Database/LSMTree.hs @@ -123,7 +123,8 @@ module Database.LSMTree ( MergeBatchSize (..), -- ** Table Configuration Overrides #table_configuration_overrides# - OverrideDiskCachePolicy (..), + TableConfigOverride (..), + noTableConfigOverride, -- * Ranges #ranges# Range (..), @@ -221,7 +222,7 @@ import Database.LSMTree.Internal.Config WriteBufferAlloc (..), defaultTableConfig, serialiseKeyMinimalSize) import Database.LSMTree.Internal.Config.Override - (OverrideDiskCachePolicy (..)) + (TableConfigOverride (..), noTableConfigOverride) import Database.LSMTree.Internal.Entry (NumEntries (..)) import qualified Database.LSMTree.Internal.Entry as Entry import Database.LSMTree.Internal.Merge (LevelMergeType (..)) @@ -2403,7 +2404,7 @@ Variant of 'withTableFromSnapshot' that accepts [table configuration overrides]( withTableFromSnapshotWith :: forall k v b a. (ResolveValue v) => - OverrideDiskCachePolicy -> + TableConfigOverride -> Session IO -> SnapshotName -> SnapshotLabel -> @@ -2414,7 +2415,7 @@ withTableFromSnapshotWith :: forall m k v b a. (IOLike m) => (ResolveValue v) => - OverrideDiskCachePolicy -> + TableConfigOverride -> Session m -> SnapshotName -> SnapshotLabel -> @@ -2478,7 +2479,7 @@ openTableFromSnapshot :: SnapshotLabel -> m (Table m k v b) openTableFromSnapshot session snapName snapLabel = - openTableFromSnapshotWith NoOverrideDiskCachePolicy session snapName snapLabel + openTableFromSnapshotWith noTableConfigOverride session snapName snapLabel {- | Variant of 'openTableFromSnapshot' that accepts [table configuration overrides](#g:table_configuration_overrides). @@ -2487,7 +2488,7 @@ Variant of 'openTableFromSnapshot' that accepts [table configuration overrides]( openTableFromSnapshotWith :: forall k v b. (ResolveValue v) => - OverrideDiskCachePolicy -> + TableConfigOverride -> Session IO -> SnapshotName -> SnapshotLabel -> @@ -2497,7 +2498,7 @@ openTableFromSnapshotWith :: forall m k v b. (IOLike m) => (ResolveValue v) => - OverrideDiskCachePolicy -> + TableConfigOverride -> Session m -> SnapshotName -> SnapshotLabel -> diff --git a/src/Database/LSMTree/Internal/Config/Override.hs b/src/Database/LSMTree/Internal/Config/Override.hs index ad441f974..9000d73b6 100644 --- a/src/Database/LSMTree/Internal/Config/Override.hs +++ b/src/Database/LSMTree/Internal/Config/Override.hs @@ -5,9 +5,10 @@ module Database.LSMTree.Internal.Config.Override ( -- $override-policy - -- * Override disk cache policy - OverrideDiskCachePolicy (..) - , overrideDiskCachePolicy + -- * Override table config + TableConfigOverride (..) + , noTableConfigOverride + , overrideTableConfig ) where import qualified Data.Vector as V @@ -42,35 +43,75 @@ import Database.LSMTree.Internal.Snapshot -- Another complicating factor is that we have thought about the possibility of -- restoring sharing of ongoing merges between live tables and newly opened -- snapshots. At that point, we run into the same challenges again... But for --- now, changing only the disk cache policy offline should work fine. +-- now, changing only the disk cache policy and merge batch size offline should +-- work fine. {------------------------------------------------------------------------------- - Override disk cache policy + Helper class +-------------------------------------------------------------------------------} + +-- | This class is only here so that we can recursively call 'override' on all +-- fields of a datatype, instead of having to invent a new name for each type +-- that the function is called on such as @overrideTableConfig@, +-- @overrideSnapshotRun@, etc. +class Override o a where + override :: o -> a -> a + +instance Override a c => Override (Maybe a) c where + override = maybe id override + +{------------------------------------------------------------------------------- + Override table config -------------------------------------------------------------------------------} {- | -The 'OverrideDiskCachePolicy' can be used to override the 'DiskCachePolicy' +The 'TableConfigOverride' can be used to override the 'TableConfig' when opening a table from a snapshot. -} -data OverrideDiskCachePolicy = - NoOverrideDiskCachePolicy - | OverrideDiskCachePolicy DiskCachePolicy +data TableConfigOverride = TableConfigOverride { + overrideDiskCachePolicy :: Maybe DiskCachePolicy, + overrideMergeBatchSize :: Maybe MergeBatchSize + } deriving stock (Show, Eq) --- | Override the disk cache policy that is stored in snapshot metadata. +-- | No override of the 'TableConfig'. You can use this as a default value and +-- record update to override some parameters, while being future-proof to new +-- parameters, e.g. +-- +-- > noTableConfigOverride { overrideDiskCachePolicy = DiskCacheNone } +-- +noTableConfigOverride :: TableConfigOverride +noTableConfigOverride = TableConfigOverride Nothing Nothing + +-- | Override the a subset of the table configuration parameters that are +-- stored in snapshot metadata. -- -- Tables opened from the new 'SnapshotMetaData' will use the new value for the --- disk cache policy. -overrideDiskCachePolicy :: OverrideDiskCachePolicy -> SnapshotMetaData -> SnapshotMetaData -overrideDiskCachePolicy (OverrideDiskCachePolicy dcp) = override dcp -overrideDiskCachePolicy NoOverrideDiskCachePolicy = id +-- table configuration. +overrideTableConfig :: TableConfigOverride + -> SnapshotMetaData -> SnapshotMetaData +overrideTableConfig = override --- | This class is only here so that we can recursively call 'override' on all --- fields of a datatype, instead of having to invent a new name for each type --- that the function is called on such as 'overrideTableConfig', --- 'overrideSnapshotRun', etc. -class Override o a where - override :: o -> a -> a +instance Override TableConfigOverride SnapshotMetaData where + override TableConfigOverride {..} = + override overrideMergeBatchSize + . override overrideDiskCachePolicy + +{------------------------------------------------------------------------------- + Override merge batch size +-------------------------------------------------------------------------------} + +instance Override MergeBatchSize SnapshotMetaData where + override mbs smd = + smd { snapMetaConfig = override mbs (snapMetaConfig smd) } + +instance Override MergeBatchSize TableConfig where + override confMergeBatchSize' tc = + tc { confMergeBatchSize = confMergeBatchSize' } + +{------------------------------------------------------------------------------- + Override disk cache policy +-------------------------------------------------------------------------------} -- NOTE: the instances below explicitly pattern match on the types of -- constructor fields. This makes the code more verbose, but it also makes the diff --git a/src/Database/LSMTree/Internal/Unsafe.hs b/src/Database/LSMTree/Internal/Unsafe.hs index 0ab05717d..523d540ed 100644 --- a/src/Database/LSMTree/Internal/Unsafe.hs +++ b/src/Database/LSMTree/Internal/Unsafe.hs @@ -109,8 +109,8 @@ import Database.LSMTree.Internal.Arena (ArenaManager, newArenaManager) import Database.LSMTree.Internal.BlobRef (WeakBlobRef (..)) import qualified Database.LSMTree.Internal.BlobRef as BlobRef import Database.LSMTree.Internal.Config -import Database.LSMTree.Internal.Config.Override - (OverrideDiskCachePolicy, overrideDiskCachePolicy) +import Database.LSMTree.Internal.Config.Override (TableConfigOverride, + overrideTableConfig) import Database.LSMTree.Internal.CRC32C (FileCorruptedError (..), FileFormat (..)) import qualified Database.LSMTree.Internal.Cursor as Cursor @@ -159,7 +159,7 @@ data LSMTreeTrace = | TraceCloseSession -- Table | TraceNewTable - | TraceOpenTableFromSnapshot SnapshotName OverrideDiskCachePolicy + | TraceOpenTableFromSnapshot SnapshotName TableConfigOverride | TraceTable TableId TableTrace | TraceDeleteSnapshot SnapshotName | TraceListSnapshots @@ -1287,7 +1287,7 @@ data SnapshotNotCompatibleError deriving anyclass (Exception) {-# SPECIALISE openTableFromSnapshot :: - OverrideDiskCachePolicy + TableConfigOverride -> Session IO h -> SnapshotName -> SnapshotLabel @@ -1296,7 +1296,7 @@ data SnapshotNotCompatibleError -- | See 'Database.LSMTree.openTableFromSnapshot'. openTableFromSnapshot :: (MonadMask m, MonadMVar m, MonadST m, MonadSTM m) - => OverrideDiskCachePolicy + => TableConfigOverride -> Session m h -> SnapshotName -> SnapshotLabel -- ^ Expected label @@ -1322,7 +1322,7 @@ openTableFromSnapshot policyOveride sesh snap label resolve = snapMetaData <- readFileSnapshotMetaData hfs contentPath checksumPath let SnapshotMetaData label' conf snapWriteBuffer snapLevels mTreeOpt - = overrideDiskCachePolicy policyOveride snapMetaData + = overrideTableConfig policyOveride snapMetaData unless (label == label') $ throwIO (ErrSnapshotWrongLabel snap label label') diff --git a/src/Database/LSMTree/Simple.hs b/src/Database/LSMTree/Simple.hs index 7a22ab554..e7aaf15a3 100644 --- a/src/Database/LSMTree/Simple.hs +++ b/src/Database/LSMTree/Simple.hs @@ -114,7 +114,8 @@ module Database.LSMTree.Simple ( MergeBatchSize (..), -- ** Table Configuration Overrides #table_configuration_overrides# - OverrideDiskCachePolicy (..), + TableConfigOverride (..), + noTableConfigOverride, -- * Ranges #ranges# Range (..), @@ -167,17 +168,17 @@ import Data.Void (Void) import Database.LSMTree (BloomFilterAlloc, CursorClosedError (..), DiskCachePolicy, FencePointerIndexType, InvalidSnapshotNameError (..), MergeBatchSize, MergePolicy, - MergeSchedule, OverrideDiskCachePolicy (..), Range (..), - RawBytes, ResolveAsFirst (..), SerialiseKey (..), - SerialiseKeyOrderPreserving, SerialiseValue (..), - SessionClosedError (..), SizeRatio, + MergeSchedule, Range (..), RawBytes, ResolveAsFirst (..), + SerialiseKey (..), SerialiseKeyOrderPreserving, + SerialiseValue (..), SessionClosedError (..), SizeRatio, SnapshotCorruptedError (..), SnapshotDoesNotExistError (..), SnapshotExistsError (..), SnapshotLabel (..), SnapshotName, SnapshotNotCompatibleError (..), TableClosedError (..), - TableConfig (..), TableCorruptedError (..), - TableTooLargeError (..), UnionCredits (..), UnionDebt (..), - WriteBufferAlloc, isValidSnapshotName, packSlice, + TableConfig (..), TableConfigOverride (..), + TableCorruptedError (..), TableTooLargeError (..), + UnionCredits (..), UnionDebt (..), WriteBufferAlloc, + isValidSnapshotName, noTableConfigOverride, packSlice, serialiseKeyIdentity, serialiseKeyIdentityUpToSlicing, serialiseKeyMinimalSize, serialiseKeyPreservesOrdering, serialiseValueIdentity, serialiseValueIdentityUpToSlicing, @@ -1425,7 +1426,7 @@ Variant of 'withTableFromSnapshot' that accepts [table configuration overrides]( -} withTableFromSnapshotWith :: forall k v a. - OverrideDiskCachePolicy -> + TableConfigOverride -> Session -> SnapshotName -> SnapshotLabel -> @@ -1468,7 +1469,7 @@ Variant of 'openTableFromSnapshot' that accepts [table configuration overrides]( -} openTableFromSnapshotWith :: forall k v. - OverrideDiskCachePolicy -> + TableConfigOverride -> Session -> SnapshotName -> SnapshotLabel -> diff --git a/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs b/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs index 7d0a3bee6..29fd05648 100644 --- a/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs +++ b/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs @@ -13,7 +13,7 @@ import Database.LSMTree.Extras (showPowersOf10) import Database.LSMTree.Extras.Generators () import Database.LSMTree.Internal.Config import Database.LSMTree.Internal.Config.Override - (OverrideDiskCachePolicy (..)) + (noTableConfigOverride) import Database.LSMTree.Internal.Entry import Database.LSMTree.Internal.Paths import Database.LSMTree.Internal.Serialise @@ -221,6 +221,6 @@ prop_flipSnapshotBit (Positive (Small bufferSize)) es pickFileBit = saveSnapshot snapName snapLabel t openSnap s = - openTableFromSnapshot NoOverrideDiskCachePolicy s snapName snapLabel resolve + openTableFromSnapshot noTableConfigOverride s snapName snapLabel resolve getConstructorName e = takeWhile (/= ' ') (show e) From 09f06aedbf74ef439f44f27c498e7b2440d7b7c3 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 17 Jun 2025 12:41:12 +0100 Subject: [PATCH 6/6] WP8 benchmark: use small merge batch size in pipelined mode This now gets real parallel speedups on the WP8 benchmark in pipelined mode. On my laptop, we get: * non-pipelined mode: 86.5k * before: pipelined mode (2 cores): 92.2k * after: pipelined mode (2 cores): 120.0k In part this is because pipelined mode on 1 core is a regression: 70.1k because it has to do strictly more work, and it avoids doing any batching which normally improves performance. --- bench/macro/lsm-tree-bench-wp8.hs | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/bench/macro/lsm-tree-bench-wp8.hs b/bench/macro/lsm-tree-bench-wp8.hs index 90eb413eb..fd071c980 100644 --- a/bench/macro/lsm-tree-bench-wp8.hs +++ b/bench/macro/lsm-tree-bench-wp8.hs @@ -180,16 +180,23 @@ mkTableConfigSetup GlobalOpts{diskCachePolicy} SetupOpts{bloomFilterAlloc} conf , LSM.confBloomFilterAlloc = bloomFilterAlloc } -mkTableConfigRun :: GlobalOpts -> LSM.TableConfig -> LSM.TableConfig -mkTableConfigRun GlobalOpts{diskCachePolicy} conf = conf { - LSM.confDiskCachePolicy = diskCachePolicy +mkTableConfigRun :: GlobalOpts -> RunOpts -> LSM.TableConfig -> LSM.TableConfig +mkTableConfigRun GlobalOpts{diskCachePolicy} RunOpts {pipelined} conf = + conf { + LSM.confDiskCachePolicy = diskCachePolicy, + LSM.confMergeBatchSize = if pipelined + then LSM.MergeBatchSize 1 + else LSM.confMergeBatchSize conf } -mkOverrideDiskCachePolicy :: GlobalOpts -> LSM.TableConfigOverride -mkOverrideDiskCachePolicy GlobalOpts{diskCachePolicy} = - LSM.noTableConfigOverride { - LSM.overrideDiskCachePolicy = Just diskCachePolicy - } +mkTableConfigOverride :: GlobalOpts -> RunOpts -> LSM.TableConfigOverride +mkTableConfigOverride GlobalOpts{diskCachePolicy} RunOpts {pipelined} = + LSM.noTableConfigOverride { + LSM.overrideDiskCachePolicy = Just diskCachePolicy, + LSM.overrideMergeBatchSize = if pipelined + then Just (LSM.MergeBatchSize 1) + else Nothing + } mkTracer :: GlobalOpts -> Tracer IO LSM.LSMTreeTrace mkTracer gopts @@ -585,9 +592,9 @@ doRun gopts opts = do -- reference version starts with empty (as it's not practical or -- necessary for testing to load the whole snapshot). tbl <- if check opts - then let conf = mkTableConfigRun gopts benchTableConfig + then let conf = mkTableConfigRun gopts opts benchTableConfig in LSM.newTableWith @IO @K @V @B conf session - else let conf = mkOverrideDiskCachePolicy gopts + else let conf = mkTableConfigOverride gopts opts in LSM.openTableFromSnapshotWith @IO @K @V @B conf session name label -- In checking mode, compare each output against a pure reference.