Merge pull request #4269 from input-output-hk/jdral/4268-improved-ant…

…i-diff-splits Split function for anti-diff finger tree that is based on heuristics
IntersectMBO · Jan 17, 2023 · 11cb369 · 11cb369
2 parents 55fedc5 + 4cc56db
commit 11cb369
Show file tree

Hide file tree

Showing 4 changed files with 266 additions and 28 deletions.
diff --git a/anti-diff/anti-diff.cabal b/anti-diff/anti-diff.cabal
@@ -108,4 +108,36 @@ test-suite test
                        -Wredundant-constraints
                        -Wmissing-export-lists
                        -Wno-unticked-promoted-constructors
-                       -fno-ignore-asserts
+                       -fno-ignore-asserts
+
+benchmark bench
+  type:                exitcode-stdio-1.0
+  default-language:    Haskell2010
+  hs-source-dirs:      bench
+  main-is:             Main.hs
+
+  other-modules:       Bench.Data.FingerTree.RootMeasured.Strict
+
+  build-depends:       base              >=4.9 && <4.15
+                     , cardano-strict-containers
+                     , deepseq
+                     , fingertree
+                     , groups
+                     , QuickCheck
+                     , tasty
+                     , tasty-bench
+                     , tasty-quickcheck
+
+                     , anti-diff
+                     , semigroupoid
+
+  ghc-options:         -Wall
+                       -Wcompat
+                       -Wincomplete-uni-patterns
+                       -Wincomplete-record-updates
+                       -Wpartial-fields
+                       -Widentities
+                       -Wredundant-constraints
+                       -Wmissing-export-lists
+                       -Wno-unticked-promoted-constructors
+                       -fno-ignore-asserts
diff --git a/anti-diff/bench/Bench/Data/FingerTree/RootMeasured/Strict.hs b/anti-diff/bench/Bench/Data/FingerTree/RootMeasured/Strict.hs
@@ -0,0 +1,176 @@
+{-# LANGUAGE DeriveAnyClass             #-}
+{-# LANGUAGE DerivingVia                #-}
+{-# LANGUAGE FlexibleInstances          #-}
+{-# LANGUAGE GeneralisedNewtypeDeriving #-}
+{-# LANGUAGE MultiParamTypeClasses      #-}
+{-# LANGUAGE NumericUnderscores         #-}
+{-# LANGUAGE StandaloneDeriving         #-}
+
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module Bench.Data.FingerTree.RootMeasured.Strict (benchmarks) where
+
+import           Control.Arrow
+import           Control.DeepSeq (NFData (..))
+import           Data.Bifunctor
+import           Data.Foldable
+import           Data.Group
+import           Data.Monoid
+import           Text.Printf
+
+import           Test.QuickCheck
+import           Test.Tasty (TestTree, testGroup)
+import           Test.Tasty.Bench
+import           Test.Tasty.QuickCheck
+
+import qualified Data.FingerTree as FT
+import           Data.FingerTree.RootMeasured.Strict
+import qualified Data.FingerTree.Strict as SFT
+
+{-------------------------------------------------------------------------------
+  Main benchmark tree
+-------------------------------------------------------------------------------}
+
+benchmarks :: Benchmark
+benchmarks = bgroup "Strict" [
+      benchSplits n 0
+    , benchSplits n (n `quot` 2)
+    , benchSplits n n
+    , tests
+    ]
+  where
+    n = 10_000_000
+
+-- | Benchmark helper
+benchSplits :: Int -> Int -> Benchmark
+benchSplits n m = env (pure $ fromList [1..n]) $ \sft ->
+  bgroup (printf "Split finger tree of size %d at position %d" n m) [
+      bgroup "nf full fingertree" [
+          bench "splitlAt" $
+            nf (splitlAt m) sft
+        , bench "splitrAt" $
+            nf (splitrAt m) sft
+        , bench "splitSizedAt" $
+            nf (splitSizedAt m) sft
+        ]
+    , bgroup "nf root measures" [
+          bench "splitlAt" $
+            nf (getRootMeasures . splitlAt m) sft
+        , bench "splitrAt" $
+            nf (getRootMeasures . splitrAt m) sft
+        , bench "splitSizedAt" $
+            nf (getRootMeasures . splitSizedAt m) sft
+        ]
+    , bgroup "nf internal measures" [
+          bench "splitlAt" $
+            nf (getInternalMeasures . splitlAt m) sft
+        , bench "splitrAt" $
+            nf (getInternalMeasures . splitrAt m) sft
+        , bench "splitSizedAt" $
+            nf (getInternalMeasures . splitSizedAt m) sft
+        ]
+    , bgroup "nf all measures" [
+          bench "splitlAt" $
+            nf ((getRootMeasures &&& getInternalMeasures) . splitlAt m) sft
+        , bench "splitrAt" $
+            nf ((getRootMeasures &&& getInternalMeasures) . splitrAt m) sft
+        , bench "splitSizedAt" $
+            nf ((getRootMeasures &&& getInternalMeasures) . splitSizedAt m) sft
+        ]
+    , testGroup "Sanity checks" [
+          testProperty "once prop_matchSplitAt_LeftRight" $
+            once $ prop_matchSplitAt_LeftRight m sft
+        , testProperty "once prop_matchSplitAt_RightSized" $
+            once $ prop_matchSplitAt_RightSized m sft
+        , testProperty "once prop_matchSplitAt_SizedLeft" $
+            once $ prop_matchSplitAt_SizedLeft m sft
+        ]
+    ]
+
+{-------------------------------------------------------------------------------
+  Types
+-------------------------------------------------------------------------------}
+
+type T = StrictFingerTree (Sum Int) Length Int
+
+newtype Length = Length {unLength :: Int}
+  deriving stock (Show, Eq, Ord)
+  deriving newtype NFData
+
+deriving via Sum Int instance Semigroup Length
+deriving via Sum Int instance Monoid Length
+deriving via Sum Int instance Group Length
+
+instance Measured Length Int where
+  measure _ = Length 1
+
+instance RootMeasured (Sum Int) Int where
+  measureRoot = Sum
+
+instance Sized Length where
+  size = unLength
+
+{-------------------------------------------------------------------------------
+  Functions to benchmark
+-------------------------------------------------------------------------------}
+
+splitlAt :: Int -> T -> (T, T)
+splitlAt n = splitl (Length n<)
+
+splitrAt :: Int -> T -> (T, T)
+splitrAt n = splitr (Length n<)
+
+splitSizedAt :: Int -> T -> (T, T)
+splitSizedAt n = splitSized (Length n<)
+
+{-------------------------------------------------------------------------------
+  Function results to evaluate
+-------------------------------------------------------------------------------}
+
+getRootMeasures :: (T, T) -> (Sum Int, Sum Int)
+getRootMeasures = bimap measureRoot measureRoot
+
+getInternalMeasures :: (T, T) -> (Length, Length)
+getInternalMeasures = bimap measure measure
+
+{-------------------------------------------------------------------------------
+  Orphan instances: @'NFData'@
+-------------------------------------------------------------------------------}
+
+deriving anyclass instance (NFData vr, NFData vi, NFData a, Measured vi a)
+                        => NFData (StrictFingerTree vr vi a)
+
+instance (NFData vi, NFData a, Measured vi a)
+      => NFData (SFT.StrictFingerTree vi a) where
+  rnf ft = rnf (FT.measure ft) `seq` rnf (toList ft)
+
+{-------------------------------------------------------------------------------
+  Property tests
+-------------------------------------------------------------------------------}
+
+tests :: TestTree
+tests = testGroup "Tests" [
+    testProperty "prop_matchSplitAt_LeftRight"
+      prop_matchSplitAt_LeftRight
+  , testProperty "prop_matchSplitAt_RightSized"
+      prop_matchSplitAt_RightSized
+  , testProperty "prop_matchSplitAt_SizedLeft"
+      prop_matchSplitAt_SizedLeft
+  ]
+
+-- | Results of @'splitlAt'@ should match results of @'splitrAt'@.
+prop_matchSplitAt_LeftRight :: Int -> T -> Property
+prop_matchSplitAt_LeftRight n sft = splitlAt n sft === splitrAt n sft
+
+-- | Results of @'splitrAt'@ should match results of @'splitSizedAt'@.
+prop_matchSplitAt_RightSized :: Int -> T -> Property
+prop_matchSplitAt_RightSized n sft = splitrAt n sft === splitSizedAt n sft
+
+-- | Results of @'splitSizedAt'@ should match results of @'splitlAt'@.
+prop_matchSplitAt_SizedLeft :: Int -> T -> Property
+prop_matchSplitAt_SizedLeft n sft = splitSizedAt n sft === splitlAt n sft
+
+instance (Arbitrary a, SuperMeasured vr vi a)
+      => Arbitrary (StrictFingerTree vr vi a) where
+  arbitrary = fromList <$> arbitrary
+  shrink    = fmap fromList . shrink . toList
diff --git a/anti-diff/bench/Main.hs b/anti-diff/bench/Main.hs
@@ -0,0 +1,18 @@
+module Main (main) where
+
+import           Test.Tasty.Bench
+
+import           Bench.Data.FingerTree.RootMeasured.Strict (benchmarks)
+
+main :: IO ()
+main = defaultMain [
+    bgroup "Bench" [
+        bgroup "Data" [
+            bgroup "FingerTree" [
+                bgroup "RootMeasured" [
+                    benchmarks
+                  ]
+              ]
+          ]
+      ]
+  ]
diff --git a/anti-diff/src/Data/FingerTree/RootMeasured/Strict.hs b/anti-diff/src/Data/FingerTree/RootMeasured/Strict.hs
@@ -1,7 +1,6 @@
 {-# LANGUAGE BangPatterns           #-}
 {-# LANGUAGE ConstraintKinds        #-}
 {-# LANGUAGE DeriveGeneric          #-}
-{-# LANGUAGE DerivingStrategies     #-}
 {-# LANGUAGE FlexibleContexts       #-}
 {-# LANGUAGE FlexibleInstances      #-}
 {-# LANGUAGE FunctionalDependencies #-}
@@ -19,8 +18,10 @@ module Data.FingerTree.RootMeasured.Strict (
   , fromList
   , (|>)
     -- * Splitting
+  , Sized (..)
   , SplitRootMeasure (..)
   , split
+  , splitSized
   , splitl
   , splitr
     -- * Maps
@@ -140,14 +141,6 @@ fromList !xs = SFT (foldMap measureRoot xs) (FT.fromList xs)
 -- the left part of the split result. @r@ denotes the right part of the split
 -- result. @f@ denotes a @'SplitRootMeasure'@ function. @length@ denotes a
 -- function that computes the length of a finger tree.
---
--- TODO(jdral): Not only the length of the left and right parts of the split
--- determine the complexity of the split function, but also the time complexity
--- of monoidal sums for the type of root measures @vr@. Under the assumption
--- that the time complexity of monoidal sums does not vary too much (or is
--- constant), this heuristic is probably sufficient. In the future, we might
--- want to experiment with different heuristics (that are not too costly to
--- compute), which let us split in even more efficient ways.
 split ::
      SuperMeasured vr vi a
   => (vi -> Bool)
@@ -176,39 +169,58 @@ newtype SplitRootMeasure vr vi a = SplitRootMeasure {
       -> (vr, vr)
   }
 
--- | /O(log(min(i,n-i))) + O(f(l, r)) = O(log(min(i,n-i))) + O(length(l))/.
--- Specialisation of @'split'@ that computes root measures by sutraction of the
--- left part's root measure.
---
--- Note: The @l@ suffix of the function name indicates that its time complexity
--- depends on the length of the left part of the split.
+-- | /O(log(min(i,n-i))) + O(i)/. Specialisation of @'split'@ that is fast if
+-- @i@ is small.
 splitl ::
      SuperMeasured vr vi a
   => (vi -> Bool)
   -> StrictFingerTree vr vi a
   -> ( StrictFingerTree vr vi a
      , StrictFingerTree vr vi a
      )
-splitl p = split p $ SplitRootMeasure $ \vr (left, _right) ->
-  let vrLeft = foldMap measureRoot left
-  in  (vrLeft, invert vrLeft <> vr)
+splitl p = split p $ SplitRootMeasure $ \vr (l, _r) ->
+  let vrl = foldMap measureRoot l
+  in  (vrl, invert vrl <> vr)
 
--- | /O(log(min(i,n-i))) + O(f(l, r)) = O(log(min(i,n-i))) + O(length(r))/.
--- Specialisation of @'split'@ that computes root measures by sutraction of the
--- right part's root measure.
---
--- Note: The @r@ suffix of the function name indicates that its time complexity
--- depends on the length of the right part of the split.
+-- | /O(log(min(i,n-i))) + O(n-i)/. Specialisation of @'split'@ that is fast if
+-- if @i@ is large.
 splitr ::
      SuperMeasured vr vi a
   => (vi -> Bool)
   -> StrictFingerTree vr vi a
   -> ( StrictFingerTree vr vi a
      , StrictFingerTree vr vi a
      )
-splitr p = split p $ SplitRootMeasure $ \vr (_left, right) ->
-  let vrRight = foldMap measureRoot right
-  in  (vr <> invert vrRight, vrRight)
+splitr p = split p $ SplitRootMeasure $ \vr (_l, r) ->
+  let vrr = foldMap measureRoot r
+  in  (vr <> invert vrr, vrr)
+
+class Sized a where
+  size :: a -> Int
+
+-- | /O(log(min(i,n-i))) + O(min(i,n-i))/. Specialisation of @'split'@ that
+-- automatically determines whether @i@ or @n-i@ is smallest.
+--
+-- Note: a way to view @'splitSized'@ is as being equivalent to a function that
+-- delegates to @'splitl'@ or @'splitr'@ based on whether @i@ or @n-i@ are
+-- smallest respectively.
+splitSized ::
+     (SuperMeasured vr vi a, Sized vi)
+  => (vi -> Bool)
+  -> StrictFingerTree vr vi a
+  -> ( StrictFingerTree vr vi a
+     , StrictFingerTree vr vi a
+     )
+splitSized p = split p $ SplitRootMeasure $ \vr (l, r) ->
+  let
+    (sizel, sizer) = (size (FT.measure l), size (FT.measure r))
+  in
+    if sizel <= sizer then
+      let vrl = foldMap measureRoot l
+      in  (vrl, invert vrl <> vr)
+    else
+      let vrr = foldMap measureRoot r
+      in  (vr ~~ vrr, vrr)
 
 {-------------------------------------------------------------------------------
   Maps