Skip to content

Commit

Permalink
Fix the arbitrary evaluation nonsense in 'nofib'
Browse files Browse the repository at this point in the history
  • Loading branch information
effectfully committed Mar 27, 2024
1 parent 1843823 commit 532722e
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 44 deletions.
37 changes: 36 additions & 1 deletion plutus-benchmark/common/PlutusBenchmark/Common.hs
Expand Up @@ -16,6 +16,8 @@ module PlutusBenchmark.Common
, unsafeRunTermCek
, runTermCek
, cekResultMatchesHaskellValue
, mkEvalCtx
, evaluateCekLikeInProd
, benchTermAgdaCek
, benchProgramAgdaCek
, TestSize (..)
Expand All @@ -29,14 +31,19 @@ where
import Paths_plutus_benchmark as Export
import PlutusBenchmark.ProtocolParameters as PP

import PlutusLedgerApi.Common qualified as LedgerApi

import PlutusTx qualified as Tx

import PlutusCore qualified as PLC
import PlutusCore.Default
import PlutusCore.Evaluation.Machine.ExBudget (ExBudget (..))
import PlutusCore.Evaluation.Machine.ExBudgetingDefaults qualified as PLC
import PlutusCore.Evaluation.Machine.ExMemory (ExCPU (..), ExMemory (..))
import PlutusTx qualified as Tx

import UntypedPlutusCore qualified as UPLC
import UntypedPlutusCore.Evaluation.Machine.Cek as Cek
import UntypedPlutusCore.Evaluation.Machine.Cek qualified as UPLC

import MAlonzo.Code.Evaluator.Term (runUAgda)

Expand Down Expand Up @@ -156,6 +163,34 @@ cekResultMatchesHaskellValue
cekResultMatchesHaskellValue term matches value =
(unsafeRunTermCek term) `matches` (unsafeRunTermCek $ haskellValueToTerm value)

-- | Create the evaluation context for the benchmarks. This doesn't exactly match how it's done
-- on-chain, but that's okay because the evaluation context is cached by the ledger, so we're
-- deliberately not including it in the benchmarks.
mkEvalCtx :: LedgerApi.EvaluationContext
mkEvalCtx =
case PLC.defaultCostModelParams of
Just p ->
let errOrCtx =
-- The validation benchmarks were all created from PlutusV1 scripts
LedgerApi.mkDynEvaluationContext (const DefaultFunSemanticsVariant1) p
in case errOrCtx of
Right ec -> ec
Left err -> error $ show err
Nothing -> error "Couldn't get cost model params"

-- | Evaluate a term as it would be evaluated using the on-chain evaluator.
evaluateCekLikeInProd
:: LedgerApi.EvaluationContext
-> UPLC.Term PLC.NamedDeBruijn PLC.DefaultUni PLC.DefaultFun ()
-> Either
(UPLC.CekEvaluationException UPLC.NamedDeBruijn UPLC.DefaultUni UPLC.DefaultFun)
(UPLC.Term UPLC.NamedDeBruijn UPLC.DefaultUni UPLC.DefaultFun ())
evaluateCekLikeInProd evalCtx term = do
let (getRes, _, _) =
let pv = LedgerApi.ledgerLanguageIntroducedIn LedgerApi.PlutusV1
-- The validation benchmarks were all created from PlutusV1 scripts
in LedgerApi.evaluateTerm UPLC.restrictingEnormous pv LedgerApi.Quiet evalCtx term
getRes

---------------- Run a term or program using the plutus-metatheory CEK evaluator ----------------

Expand Down
18 changes: 15 additions & 3 deletions plutus-benchmark/nofib/bench/BenchCek.hs
@@ -1,8 +1,20 @@
{-# LANGUAGE BangPatterns #-}

{- | Plutus benchmarks for the CEK machine based on some nofib examples. -}
module Main where

import PlutusBenchmark.Common (benchTermCek)
import Shared (benchWith)
import Shared (benchWith, evaluateCekLikeInProd, mkEvalCtx)

import Control.DeepSeq (force)
import Control.Exception (evaluate)
import Criterion (whnf)

main :: IO ()
main = benchWith benchTermCek
main = do
evalCtx <- evaluate $ force mkEvalCtx
let mkCekBM term =
-- `force` to try to ensure that deserialiation is not included in benchmarking time.
let !benchTerm = force term
eval = either (error . show) (\_ -> ()) . evaluateCekLikeInProd evalCtx
in whnf eval benchTerm
benchWith mkCekBM
12 changes: 8 additions & 4 deletions plutus-benchmark/nofib/bench/Shared.hs
@@ -1,8 +1,12 @@
{- | Shared code for benchmarking Plutus and Haskell versions of the Plutus nofib examples -}
module Shared (benchWith, mkBenchMarks)
where

import PlutusBenchmark.Common (Term, getConfig)
module Shared (
benchWith
, mkBenchMarks
, mkEvalCtx
, evaluateCekLikeInProd
) where

import PlutusBenchmark.Common (Term, evaluateCekLikeInProd, getConfig, mkEvalCtx)

import PlutusBenchmark.NoFib.Clausify qualified as Clausify
import PlutusBenchmark.NoFib.Knights qualified as Knights
Expand Down
3 changes: 3 additions & 0 deletions plutus-benchmark/plutus-benchmark.cabal
Expand Up @@ -79,6 +79,7 @@ library plutus-benchmark-common
, filepath
, flat ^>=0.6
, plutus-core ^>=1.24
, plutus-ledger-api ^>=1.24
, plutus-metatheory
, plutus-tx ^>=1.24
, tasty
Expand Down Expand Up @@ -141,8 +142,10 @@ benchmark nofib
build-depends:
, base >=4.9 && <5
, criterion >=1.5.9.0
, deepseq
, nofib-internal
, plutus-benchmark-common
, plutus-core ^>=1.23

benchmark nofib-hs
import: lang, ghc-version-support
Expand Down
37 changes: 1 addition & 36 deletions plutus-benchmark/validation/bench/Common.hs
Expand Up @@ -9,19 +9,12 @@ module Common (
, Term
) where

import PlutusBenchmark.Common (getConfig, getDataDir)
import PlutusBenchmark.Common (evaluateCekLikeInProd, getConfig, getDataDir, mkEvalCtx)
import PlutusBenchmark.NaturalSort

import PlutusCore qualified as PLC
import PlutusCore.Builtin qualified as PLC
import PlutusCore.Data qualified as PLC
import PlutusCore.Default qualified as PLC (BuiltinSemanticsVariant (DefaultFunSemanticsVariant1))
import PlutusCore.Evaluation.Machine.ExBudgetingDefaults qualified as PLC
import PlutusLedgerApi.Common (PlutusLedgerLanguage (PlutusV1), evaluateTerm,
ledgerLanguageIntroducedIn, mkDynEvaluationContext)
import PlutusLedgerApi.V1 (EvaluationContext, VerboseMode (..))
import UntypedPlutusCore qualified as UPLC
import UntypedPlutusCore.Evaluation.Machine.Cek qualified as UPLC

import Criterion.Main
import Criterion.Main.Options (Mode, parseWith)
Expand Down Expand Up @@ -132,34 +125,6 @@ benchWith act = do
env (BS.readFile $ dir </> file) $ \(~scriptBS) ->
bench (dropExtension file) $ act file scriptBS

-- | Create the evaluation context for the benchmarks. This doesn't exactly match how it's done
-- on-chain, but that's okay because the evaluation context is cached by the ledger, so we're
-- deliberately not including it in the benchmarks.
mkEvalCtx :: EvaluationContext
mkEvalCtx =
case PLC.defaultCostModelParams of
Just p ->
let errOrCtx =
-- The validation benchmarks were all created from PlutusV1 scripts
mkDynEvaluationContext (const PLC.DefaultFunSemanticsVariant1) p
in case errOrCtx of
Right ec -> ec
Left err -> error $ show err
Nothing -> error "Couldn't get cost model params"

-- | Evaluate a term as it would be evaluated using the on-chain evaluator.
evaluateCekLikeInProd
:: EvaluationContext
-> UPLC.Term PLC.NamedDeBruijn PLC.DefaultUni PLC.DefaultFun ()
-> Either
(UPLC.CekEvaluationException UPLC.NamedDeBruijn UPLC.DefaultUni UPLC.DefaultFun)
(UPLC.Term UPLC.NamedDeBruijn UPLC.DefaultUni UPLC.DefaultFun ())
evaluateCekLikeInProd evalCtx term = do
let (getRes, _, _) =
-- The validation benchmarks were all created from PlutusV1 scripts
evaluateTerm UPLC.restrictingEnormous (ledgerLanguageIntroducedIn PlutusV1) Quiet evalCtx term
getRes

type Term = UPLC.Term UPLC.DeBruijn UPLC.DefaultUni UPLC.DefaultFun ()

-- | If the term is an application of something to some arguments, peel off
Expand Down

0 comments on commit 532722e

Please sign in to comment.