diff --git a/Makefile b/Makefile
new file mode 100755
index 000000000..c8064e82e
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,13 @@
+SOURCE=lsm-tree.cabal $(shell find src -type f -name '*.hs')
+
+.PHONY: watch
+watch:
+ fswatch -o $(SOURCE) | xargs -n1 -I{} make build
+
+.PHONY: build
+build: $(SOURCE)
+ time cabal haddock lsm-tree:lib:lsm-tree --builddir="dist-newstyle/haddock"
+
+.PHONY: serve
+serve:
+ python -m http.server -d "dist-newstyle/haddock/build/"*"/ghc-"*"/lsm-tree-"*"/doc/html/lsm-tree/"
diff --git a/README.md b/README.md
index 17166064f..fc249eba9 100644
--- a/README.md
+++ b/README.md
@@ -151,7 +151,7 @@ schedule are determined by the `TableConfig` parameters
| Session |
-Create/Open |
+Open |
N/A |
N/A |
O(1) |
@@ -166,7 +166,7 @@ schedule are determined by the `TableConfig` parameters
| Table |
-Create |
+New |
N/A |
N/A |
O(1) |
@@ -253,7 +253,7 @@ schedule are determined by the `TableConfig` parameters
| Cursor |
-Create |
+New |
LazyLevelling |
N/A |
$O(T \: \log_T \frac{n}{B})$ |
@@ -267,7 +267,7 @@ schedule are determined by the `TableConfig` parameters
|
-Read next entry |
+Next |
N/A |
N/A |
$O(\frac{1}{P})$ |
diff --git a/lsm-tree.cabal b/lsm-tree.cabal
index 9bba65a5b..892abff25 100644
--- a/lsm-tree.cabal
+++ b/lsm-tree.cabal
@@ -87,11 +87,11 @@ description:
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
| Resource | Operation | Merge policy | Merge schedule | Worst-case disk I\/O complexity |
+==========+========================+=================+=================+================================================+
- | Session | Create\/Open | N\/A | N\/A | \(O(1)\) |
+ | Session | Open | N\/A | N\/A | \(O(1)\) |
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
| | Close | @LazyLevelling@ | N\/A | \(O(o \: T \: \log_T \frac{n}{B})\) |
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
- | Table | Create | N\/A | N\/A | \(O(1)\) |
+ | Table | New | N\/A | N\/A | \(O(1)\) |
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
| | Close | @LazyLevelling@ | N\/A | \(O(T \: \log_T \frac{n}{B})\) |
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
@@ -115,11 +115,11 @@ description:
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
| | List | N\/A | N\/A | \(O(s)\) |
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
- | Cursor | Create | @LazyLevelling@ | N\/A | \(O(T \: \log_T \frac{n}{B})\) |
+ | Cursor | New | @LazyLevelling@ | N\/A | \(O(T \: \log_T \frac{n}{B})\) |
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
| | Close | @LazyLevelling@ | N\/A | \(O(T \: \log_T \frac{n}{B})\) |
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
- | | Read next entry | N\/A | N\/A | \(O(\frac{1}{P})\) |
+ | | Next | N\/A | N\/A | \(O(\frac{1}{P})\) |
+----------+------------------------+-----------------+-----------------+------------------------------------------------+
(*The variable \(b\) refers to the number of entries retrieved by the range lookup.)
diff --git a/scripts/lint-diskio-complexities.hs b/scripts/lint-diskio-complexities.hs
new file mode 100755
index 000000000..a6c84012e
--- /dev/null
+++ b/scripts/lint-diskio-complexities.hs
@@ -0,0 +1,181 @@
+#!/usr/bin/env cabal
+{- cabal:
+build-depends:
+ , base >=4.16 && <5
+ , bytestring ^>=0.11
+ , cassava ^>=0.5
+ , containers ^>=0.6 || ^>=0.7 || ^>=0.8
+ , process ^>=1.6
+ , text ^>=2.1
+ , vector ^>=0.12 || ^>=0.13
+-}
+{-# LANGUAGE InstanceSigs #-}
+{-# LANGUAGE OverloadedRecordDot #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE TypeApplications #-}
+
+import Control.Applicative (Alternative (..))
+import Control.Monad (unless)
+import qualified Data.ByteString.Lazy as BSL
+import Data.Csv
+import Data.List (zip4)
+import Data.Map (Map)
+import qualified Data.Map as M
+import Data.Maybe (fromMaybe, isNothing)
+import Data.Set (Set)
+import qualified Data.Set as S
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as TE
+import qualified Data.Text.IO as TIO
+import Data.Traversable (for)
+import Data.Vector (Vector)
+import qualified Data.Vector as V
+import System.Exit (ExitCode (..), exitWith)
+import System.Process (readProcess)
+
+main :: IO ()
+main = do
+ -- Get the disk I/O complexities from the package description
+ mapForPackageDescription <-
+ buildDiskIOComplexityMap . decodeDiskIOComplexities
+ <$> readProcess "./scripts/lint-diskio-complexities/dump-from-package-description.hs" [] ""
+
+ -- Get the disk I/O complexities from Database.LSMTree
+ mapForFullApi <-
+ buildDiskIOComplexityMap . decodeDiskIOComplexities
+ <$> readProcess "./scripts/lint-diskio-complexities/dump-from-source.sh" ["./src/Database/LSMTree.hs"] ""
+
+ -- Get the disk I/O complexities from Database.LSMTree.Simple
+ mapForSimpleApi <-
+ buildDiskIOComplexityMap . decodeDiskIOComplexities
+ <$> readProcess "./scripts/lint-diskio-complexities/dump-from-source.sh" ["./src/Database/LSMTree/Simple.hs"] ""
+
+ -- Comparing Database.LSMTree.Simple to Database.LSMTree
+ putStrLn "Comparing Database.LSMTree.Simple to Database.LSMTree:"
+ comparisonSimpleToFull <-
+ fmap concat . for (concat . M.elems $ mapForSimpleApi) $ \simpleEntry@DiskIOComplexity{..} -> do
+ case M.lookup function mapForFullApi of
+ Nothing ->
+ pure [("Database.LSMTree.Simple", simpleEntry)]
+ Just fullEntries
+ | simpleEntry `elem` fullEntries -> pure []
+ | otherwise -> pure (("Database.LSMTree.Simple", simpleEntry) : (("Database.LSMTree",) <$> fullEntries))
+ TIO.putStrLn (prettyDiskIOComplexityTable comparisonSimpleToFull)
+
+ -- Comparing lsm-tree.cabal to Database.LSMTree
+ putStrLn "Comparing lsm-tree.cabal to Database.LSMTree:"
+ comparisonPackageDescriptionToFull <-
+ fmap concat . for (concat . M.elems $ mapForPackageDescription) $ \simpleEntry@DiskIOComplexity{..} -> do
+ case M.lookup function mapForFullApi of
+ Nothing ->
+ pure [("lsm-tree.cabal", simpleEntry)]
+ Just fullEntries
+ | any (looseEq simpleEntry) fullEntries -> pure []
+ | otherwise -> pure (("lsm-tree.cabal", simpleEntry) : (("Database.LSMTree",) <$> fullEntries))
+ TIO.putStrLn (prettyDiskIOComplexityTable comparisonPackageDescriptionToFull)
+
+ -- Set the exit code based on whether any differences were found
+ unless (null comparisonSimpleToFull && null comparisonPackageDescriptionToFull) $
+ exitWith (ExitFailure 1)
+
+--------------------------------------------------------------------------------
+-- Helper functions
+--------------------------------------------------------------------------------
+
+type Function = Text
+type MergePolicy = Text
+type MergeSchedule = Text
+type WorstCaseDiskIOComplexity = Text
+type Condition = Text
+
+data DiskIOComplexity = DiskIOComplexity
+ { function :: Function
+ , mergePolicy :: Maybe MergePolicy
+ , mergeSchedule :: Maybe MergeSchedule
+ , worstCaseDiskIOComplexity :: WorstCaseDiskIOComplexity
+ , condition :: Maybe Condition
+ }
+ deriving (Eq, Show)
+
+-- | Loose equality which is used when comparing the disk I/O complexities
+-- listed in the package description to those in the modules. Those in the
+-- package description do not list complex side conditions, such as all
+-- tables having been closed beforehand, or all tables having the same merge
+-- policy. Therefore, this equality disregards mismatches when the first
+-- entry does not list a condition.
+looseEq :: DiskIOComplexity -> DiskIOComplexity -> Bool
+entry1 `looseEq` entry2 =
+ and
+ [ entry1.function == entry2.function
+ , entry1.mergePolicy == entry2.mergePolicy
+ , entry1.mergeSchedule == entry2.mergeSchedule
+ , entry1.worstCaseDiskIOComplexity == entry2.worstCaseDiskIOComplexity
+ , isNothing entry1.condition || entry1.condition == entry2.condition
+ ]
+
+-- | Typeset a tagged list of 'DiskIOComplexity' records as an aligned table.
+prettyDiskIOComplexityTable :: [(Text, DiskIOComplexity)] -> Text
+prettyDiskIOComplexityTable [] = "No differences found.\n"
+prettyDiskIOComplexityTable entries =
+ T.unlines
+ [ T.unwords
+ [ prettyCellForColumn tag tags
+ , prettyCellForColumn function functions
+ , prettyCellForColumn fullCondition fullConditions
+ , prettyCellForColumn worstCaseDiskIOComplexity worstCaseDiskIOComplexities
+ ]
+ | (tag, function, fullCondition, worstCaseDiskIOComplexity) <-
+ zip4 tags functions fullConditions worstCaseDiskIOComplexities
+ ]
+ where
+ tags = fst <$> entries
+ functions = ((.function) . snd) <$> entries
+ fullConditions = (prettyFullCondition . snd) <$> entries
+ worstCaseDiskIOComplexities = ((.worstCaseDiskIOComplexity) . snd) <$> entries
+
+ prettyCellForColumn :: Text -> [Text] -> Text
+ prettyCellForColumn cell column = cell <> T.replicate (maximum (T.length <$> column) - T.length cell) " "
+
+ prettyFullCondition :: DiskIOComplexity -> Text
+ prettyFullCondition DiskIOComplexity{..} =
+ fromMaybe "*" mergePolicy `slashWith` mergeSchedule `slashWith` condition
+ where
+ slashWith :: Text -> Maybe Text -> Text
+ slashWith x my = maybe x (\y -> x <> "/" <> y) my
+
+-- | Structure vector of 'DiskIOComplexity' records into lookup table by function name.
+buildDiskIOComplexityMap :: Vector DiskIOComplexity -> Map Function [DiskIOComplexity]
+buildDiskIOComplexityMap = M.unionsWith (<>) . fmap toSingletonMap . V.toList
+ where
+ toSingletonMap :: DiskIOComplexity -> Map Function [DiskIOComplexity]
+ toSingletonMap simpleEntry = M.singleton simpleEntry.function [simpleEntry]
+
+-- | Parse CSV file into vector of 'DiskIOComplexity' records.
+decodeDiskIOComplexities :: String -> Vector DiskIOComplexity
+decodeDiskIOComplexities =
+ either error snd . decodeByName . BSL.fromStrict . TE.encodeUtf8 . T.pack
+
+normaliseWorstCaseDiskIOComplexity :: WorstCaseDiskIOComplexity -> WorstCaseDiskIOComplexity
+normaliseWorstCaseDiskIOComplexity =
+ T.replace "+ " "+" . T.replace " +" "+" . T.replace " \\" "\\" . T.replace " " " " . T.replace "\\:" ""
+
+-- | Parse CSV row into 'DiskIOComplexity' record.
+instance FromNamedRecord DiskIOComplexity where
+ parseNamedRecord :: NamedRecord -> Parser DiskIOComplexity
+ parseNamedRecord m = do
+ function <- m .: "Function"
+ mergePolicy <- orNotApplicable (m .: "Merge policy")
+ mergeSchedule <- orNotApplicable (m .: "Merge schedule")
+ worstCaseDiskIOComplexity <- normaliseWorstCaseDiskIOComplexity <$> (m .: "Worst-case disk I/O complexity")
+ condition <- orNotApplicable (m .: "Condition")
+ pure DiskIOComplexity{..}
+ where
+ orNotApplicable :: Parser Text -> Parser (Maybe Text)
+ orNotApplicable pText = (emptyTextToNothing <$> pText) <|> pure Nothing
+ where
+ emptyTextToNothing :: Text -> Maybe Text
+ emptyTextToNothing txt =
+ if T.null txt then Nothing else Just txt
diff --git a/scripts/lint-diskio-complexities/dump-from-package-description.hs b/scripts/lint-diskio-complexities/dump-from-package-description.hs
new file mode 100755
index 000000000..5f7770f15
--- /dev/null
+++ b/scripts/lint-diskio-complexities/dump-from-package-description.hs
@@ -0,0 +1,141 @@
+#!/usr/bin/env cabal
+{- cabal:
+build-depends:
+ , base >=4.16
+ , bytestring >=0.11
+ , Cabal-syntax ^>=3.10 || ^>=3.12
+ , cassava ^>=0.5
+ , pandoc ^>=3.6.4
+ , pandoc-types ^>=1.23.1
+ , text >=2.1
+-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards #-}
+
+module Main (main) where
+
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as BSL (putStr)
+import Data.Csv (Header, NamedRecord, ToField (..),
+ ToNamedRecord (..), encodeByName, header, namedRecord,
+ (.=))
+import Data.IORef (atomicModifyIORef, newIORef)
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.IO as TIO
+import Data.Traversable (for)
+import Debug.Trace (traceShow)
+import Distribution.PackageDescription.Parsec
+ (parseGenericPackageDescriptionMaybe)
+import qualified Distribution.Types.GenericPackageDescription as GenericPackageDescription
+import qualified Distribution.Types.PackageDescription as PackageDescription
+import Distribution.Utils.ShortText (fromShortText)
+import System.IO (hPutStrLn, stderr)
+import Text.Pandoc (runIOorExplode)
+import Text.Pandoc.Definition (Block (..), Inline (..), Row (..),
+ TableBody (..))
+import Text.Pandoc.Extensions (getDefaultExtensions)
+import Text.Pandoc.Options (ReaderOptions (..), WriterOptions (..),
+ def)
+import Text.Pandoc.Readers (readHaddock)
+import Text.Pandoc.Walk (Walkable (query))
+
+tableEntryToFunction :: Text -> Text -> Text
+tableEntryToFunction resource operation
+ | (resource == "Table" || resource == "Cursor") && operation `notElem` ["New", "Close"] = toCamel (T.splitOn " " operation)
+ | resource == "Snapshot" && operation == "Open" = "openTableFromSnapshot"
+ | resource == "Snapshot" && operation == "List" = "listSnapshots" -- plural
+ | otherwise = toCamel [operation, resource]
+
+main :: IO ()
+main = do
+ let lsmTreeCabalFile = "lsm-tree.cabal"
+ lsmTreeCabalContent <- BS.readFile lsmTreeCabalFile
+ case parseGenericPackageDescriptionMaybe lsmTreeCabalContent of
+ Nothing -> hPutStrLn stderr $ "error: Could not parse '" <> lsmTreeCabalFile <> "'"
+ Just genericPackageDescription -> do
+ let packageDescription = GenericPackageDescription.packageDescription genericPackageDescription
+ let description = T.pack . fromShortText $ PackageDescription.description packageDescription
+ doc <- runIOorExplode $ readHaddock def description
+ -- Get the disk I/O complexity table
+ let diskIOComplexityTable = query dumpDiskIOComplexityTable doc
+ resourceRef <- newIORef ""
+ operationsRef <- newIORef ""
+ entries <-
+ fmap concat . for diskIOComplexityTable $ \row -> do
+ let fullRow = replicate (5 - length row) "" <> row
+ let [newResource, newOperations, newMergePolicy, newMergeSchedule, rawWorstCaseDiskIOComplexity] = fullRow
+ resource <- atomicModifyIORef resourceRef (merge newResource)
+ operations <- atomicModifyIORef operationsRef (merge newOperations)
+ let mergePolicy = if newMergePolicy == "N/A" then Nothing else Just newMergePolicy
+ let mergeSchedule = if newMergeSchedule == "N/A" then Nothing else Just newMergeSchedule
+ let worstCaseDiskIOComplexity = T.dropWhileEnd (`elem`[' ','*']) rawWorstCaseDiskIOComplexity
+ for (T.splitOn "/" operations) $ \operation -> do
+ let function = tableEntryToFunction resource operation
+ pure $ DiskIOComplexity {..}
+ let csvData = encodeByName diskIOComplexityHeader entries
+ BSL.putStr csvData
+
+--------------------------------------------------------------------------------
+-- Helper functions
+--------------------------------------------------------------------------------
+
+type Function = Text
+type MergePolicy = Text
+type MergeSchedule = Text
+type WorstCaseDiskIOComplexity = Text
+
+data DiskIOComplexity = DiskIOComplexity
+ { function :: Function
+ , mergePolicy :: Maybe MergePolicy
+ , mergeSchedule :: Maybe MergeSchedule
+ , worstCaseDiskIOComplexity :: WorstCaseDiskIOComplexity
+ }
+ deriving (Eq, Show)
+
+diskIOComplexityHeader :: Header
+diskIOComplexityHeader =
+ header
+ ["Function", "Merge policy", "Merge schedule", "Worst-case disk I/O complexity"]
+
+instance ToNamedRecord DiskIOComplexity where
+ toNamedRecord DiskIOComplexity {..} =
+ namedRecord
+ [ "Function" .= toField function
+ , "Merge policy" .= toField mergePolicy
+ , "Merge schedule" .= toField mergeSchedule
+ , "Worst-case disk I/O complexity" .= toField worstCaseDiskIOComplexity
+ ]
+
+contents :: Walkable Block a => a -> [Text]
+contents = query forBlock
+ where
+ forBlock :: Block -> [Text]
+ forBlock block = [T.unwords (query forInline block)]
+ forInline :: Inline -> [Text]
+ forInline (Code _ text) = [text]
+ forInline (Math _ text) = [text]
+ forInline (Str text) = [text]
+ forInline _ = []
+
+diskIOComplexityTableHeader :: [Text]
+diskIOComplexityTableHeader =
+ ["Resource", "Operation", "Merge policy", "Merge schedule", "Worst-case disk I/O complexity"]
+
+dumpDiskIOComplexityTable :: Block -> [[Text]]
+dumpDiskIOComplexityTable table@(Table _attr _caption _cols tableHead tableBodies _tableFoot)
+ | contents tableHead == diskIOComplexityTableHeader =
+ [ [T.unwords (contents cell) | cell <- cells]
+ | TableBody _ _ _ rows <- tableBodies
+ , Row _ cells <- rows
+ ]
+dumpDiskIOComplexityTable _ = []
+
+merge :: Text -> Text -> (Text, Text)
+merge new old = let res = if T.null new then old else new in (res, res)
+
+toCamel :: [Text] -> Text
+toCamel [] = mempty
+toCamel (x : xs) = T.toLower x <> T.concat (T.toTitle <$> xs)
diff --git a/scripts/time-complexities/from-hs.sh b/scripts/lint-diskio-complexities/dump-from-source.sh
similarity index 94%
rename from scripts/time-complexities/from-hs.sh
rename to scripts/lint-diskio-complexities/dump-from-source.sh
index 7649364cf..ff3196f64 100755
--- a/scripts/time-complexities/from-hs.sh
+++ b/scripts/lint-diskio-complexities/dump-from-source.sh
@@ -44,16 +44,16 @@ newline_rs='\
printf '%s\n' 'Function,Merge policy,Merge schedule,Worst-case disk I/O complexity,Condition'
sed -En -e '
- # Collect complexity information from a Haddock annotation
+ # Collect all disk I/O complexity information from a Haddock annotation
/^\{- *\|/,/-}/ {
# Store an unconditional disk I/O complexity
s/'"$unconditional_re"'/,,\1,/
t store
# Store a disk I/O complexity for the “nothing left open” case
- s/'"$nothing_left_open_re"'/,,\1,Nothing left open/
+ s/'"$nothing_left_open_re"'/,,\1,NothingLeftOpen/
t store
# Store a disk I/O complexity with unknown condition
- s/^.*'"$o_expr_re"'.*$/,,\1,Unknown condition/
+ s/^.*'"$o_expr_re"'.*$/,,\1,UnknownCondition/
t store
# Note down the occurrence of a “same merge policy” restriction
/'"$same_merge_policy_re"'/ {
@@ -81,7 +81,7 @@ sed -En -e '
:store
H
x
- s/\n=\n(.*)/'"$newline_rs"'\1Same merge policy'"$newline_rs"'=/
+ s/\n=\n(.*)/'"$newline_rs"'\1SameMergePolicy'"$newline_rs"'=/
x
# Continue with the next line
d