Fix the arbitrary evaluation nonsense in 'nofib'

IntersectMBO · Mar 27, 2024 · 532722e · 532722e
1 parent 1843823
commit 532722e
Show file tree

Hide file tree

Showing 5 changed files with 63 additions and 44 deletions.
diff --git a/plutus-benchmark/common/PlutusBenchmark/Common.hs b/plutus-benchmark/common/PlutusBenchmark/Common.hs
@@ -16,6 +16,8 @@ module PlutusBenchmark.Common
     , unsafeRunTermCek
     , runTermCek
     , cekResultMatchesHaskellValue
+    , mkEvalCtx
+    , evaluateCekLikeInProd
     , benchTermAgdaCek
     , benchProgramAgdaCek
     , TestSize (..)
@@ -29,14 +31,19 @@ where
 import Paths_plutus_benchmark as Export
 import PlutusBenchmark.ProtocolParameters as PP
 
+import PlutusLedgerApi.Common qualified as LedgerApi
+
+import PlutusTx qualified as Tx
+
 import PlutusCore qualified as PLC
 import PlutusCore.Default
 import PlutusCore.Evaluation.Machine.ExBudget (ExBudget (..))
 import PlutusCore.Evaluation.Machine.ExBudgetingDefaults qualified as PLC
 import PlutusCore.Evaluation.Machine.ExMemory (ExCPU (..), ExMemory (..))
-import PlutusTx qualified as Tx
+
 import UntypedPlutusCore qualified as UPLC
 import UntypedPlutusCore.Evaluation.Machine.Cek as Cek
+import UntypedPlutusCore.Evaluation.Machine.Cek qualified as UPLC
 
 import MAlonzo.Code.Evaluator.Term (runUAgda)
 
@@ -156,6 +163,34 @@ cekResultMatchesHaskellValue
 cekResultMatchesHaskellValue term matches value =
     (unsafeRunTermCek term) `matches` (unsafeRunTermCek $ haskellValueToTerm value)
 
+-- | Create the evaluation context for the benchmarks. This doesn't exactly match how it's done
+-- on-chain, but that's okay because the evaluation context is cached by the ledger, so we're
+-- deliberately not including it in the benchmarks.
+mkEvalCtx :: LedgerApi.EvaluationContext
+mkEvalCtx =
+    case PLC.defaultCostModelParams of
+        Just p ->
+            let errOrCtx =
+                    -- The validation benchmarks were all created from PlutusV1 scripts
+                    LedgerApi.mkDynEvaluationContext (const DefaultFunSemanticsVariant1) p
+            in case errOrCtx of
+                Right ec -> ec
+                Left err -> error $ show err
+        Nothing -> error "Couldn't get cost model params"
+
+-- | Evaluate a term as it would be evaluated using the on-chain evaluator.
+evaluateCekLikeInProd
+    :: LedgerApi.EvaluationContext
+    -> UPLC.Term PLC.NamedDeBruijn PLC.DefaultUni PLC.DefaultFun ()
+    -> Either
+            (UPLC.CekEvaluationException UPLC.NamedDeBruijn UPLC.DefaultUni UPLC.DefaultFun)
+            (UPLC.Term UPLC.NamedDeBruijn UPLC.DefaultUni UPLC.DefaultFun ())
+evaluateCekLikeInProd evalCtx term = do
+    let (getRes, _, _) =
+            let pv = LedgerApi.ledgerLanguageIntroducedIn LedgerApi.PlutusV1
+            -- The validation benchmarks were all created from PlutusV1 scripts
+            in LedgerApi.evaluateTerm UPLC.restrictingEnormous pv LedgerApi.Quiet evalCtx term
+    getRes
 
 ---------------- Run a term or program using the plutus-metatheory CEK evaluator ----------------
 

diff --git a/plutus-benchmark/nofib/bench/BenchCek.hs b/plutus-benchmark/nofib/bench/BenchCek.hs
@@ -1,8 +1,20 @@
+{-# LANGUAGE BangPatterns #-}
+
 {- | Plutus benchmarks for the CEK machine based on some nofib examples. -}
 module Main where
 
-import PlutusBenchmark.Common (benchTermCek)
-import Shared (benchWith)
+import Shared (benchWith, evaluateCekLikeInProd, mkEvalCtx)
+
+import Control.DeepSeq (force)
+import Control.Exception (evaluate)
+import Criterion (whnf)
 
 main :: IO ()
-main = benchWith benchTermCek
+main = do
+  evalCtx <- evaluate $ force mkEvalCtx
+  let mkCekBM term =
+          -- `force` to try to ensure that deserialiation is not included in benchmarking time.
+          let !benchTerm = force term
+              eval = either (error . show) (\_ -> ()) . evaluateCekLikeInProd evalCtx
+          in whnf eval benchTerm
+  benchWith mkCekBM
diff --git a/plutus-benchmark/nofib/bench/Shared.hs b/plutus-benchmark/nofib/bench/Shared.hs
@@ -1,8 +1,12 @@
 {- | Shared code for benchmarking Plutus and Haskell versions of the Plutus nofib examples -}
-module Shared (benchWith, mkBenchMarks)
-where
-
-import PlutusBenchmark.Common (Term, getConfig)
+module Shared (
+    benchWith
+    , mkBenchMarks
+    , mkEvalCtx
+    , evaluateCekLikeInProd
+    ) where
+
+import PlutusBenchmark.Common (Term, evaluateCekLikeInProd, getConfig, mkEvalCtx)
 
 import PlutusBenchmark.NoFib.Clausify qualified as Clausify
 import PlutusBenchmark.NoFib.Knights qualified as Knights

diff --git a/plutus-benchmark/plutus-benchmark.cabal b/plutus-benchmark/plutus-benchmark.cabal
@@ -79,6 +79,7 @@ library plutus-benchmark-common
     , filepath
     , flat               ^>=0.6
     , plutus-core        ^>=1.24
+    , plutus-ledger-api  ^>=1.24
     , plutus-metatheory
     , plutus-tx          ^>=1.24
     , tasty
@@ -141,8 +142,10 @@ benchmark nofib
   build-depends:
     , base                     >=4.9     && <5
     , criterion                >=1.5.9.0
+    , deepseq
     , nofib-internal
     , plutus-benchmark-common
+    , plutus-core              ^>=1.23
 
 benchmark nofib-hs
   import:         lang, ghc-version-support

diff --git a/plutus-benchmark/validation/bench/Common.hs b/plutus-benchmark/validation/bench/Common.hs
@@ -9,19 +9,12 @@ module Common (
     , Term
     ) where
 
-import PlutusBenchmark.Common (getConfig, getDataDir)
+import PlutusBenchmark.Common (evaluateCekLikeInProd, getConfig, getDataDir, mkEvalCtx)
 import PlutusBenchmark.NaturalSort
 
-import PlutusCore qualified as PLC
 import PlutusCore.Builtin qualified as PLC
 import PlutusCore.Data qualified as PLC
-import PlutusCore.Default qualified as PLC (BuiltinSemanticsVariant (DefaultFunSemanticsVariant1))
-import PlutusCore.Evaluation.Machine.ExBudgetingDefaults qualified as PLC
-import PlutusLedgerApi.Common (PlutusLedgerLanguage (PlutusV1), evaluateTerm,
-                               ledgerLanguageIntroducedIn, mkDynEvaluationContext)
-import PlutusLedgerApi.V1 (EvaluationContext, VerboseMode (..))
 import UntypedPlutusCore qualified as UPLC
-import UntypedPlutusCore.Evaluation.Machine.Cek qualified as UPLC
 
 import Criterion.Main
 import Criterion.Main.Options (Mode, parseWith)
@@ -132,34 +125,6 @@ benchWith act = do
         env (BS.readFile $ dir </> file) $ \(~scriptBS) ->
             bench (dropExtension file) $ act file scriptBS
 
--- | Create the evaluation context for the benchmarks. This doesn't exactly match how it's done
--- on-chain, but that's okay because the evaluation context is cached by the ledger, so we're
--- deliberately not including it in the benchmarks.
-mkEvalCtx :: EvaluationContext
-mkEvalCtx =
-    case PLC.defaultCostModelParams of
-        Just p ->
-            let errOrCtx =
-                    -- The validation benchmarks were all created from PlutusV1 scripts
-                    mkDynEvaluationContext (const PLC.DefaultFunSemanticsVariant1) p
-            in case errOrCtx of
-                Right ec -> ec
-                Left err -> error $ show err
-        Nothing -> error "Couldn't get cost model params"
-
--- | Evaluate a term as it would be evaluated using the on-chain evaluator.
-evaluateCekLikeInProd
-    :: EvaluationContext
-    -> UPLC.Term PLC.NamedDeBruijn PLC.DefaultUni PLC.DefaultFun ()
-    -> Either
-            (UPLC.CekEvaluationException UPLC.NamedDeBruijn UPLC.DefaultUni UPLC.DefaultFun)
-            (UPLC.Term UPLC.NamedDeBruijn UPLC.DefaultUni UPLC.DefaultFun ())
-evaluateCekLikeInProd evalCtx term = do
-    let (getRes, _, _) =
-            -- The validation benchmarks were all created from PlutusV1 scripts
-            evaluateTerm UPLC.restrictingEnormous (ledgerLanguageIntroducedIn PlutusV1) Quiet evalCtx term
-    getRes
-
 type Term = UPLC.Term UPLC.DeBruijn UPLC.DefaultUni UPLC.DefaultFun ()
 
 -- | If the term is an application of something to some arguments, peel off