From 41f70cb3251e47cb91749081c92dc2e6db87b85c Mon Sep 17 00:00:00 2001 From: Nikolaos Bezirgiannis <329939+bezirg@users.noreply.github.com> Date: Thu, 25 May 2023 15:02:36 +0200 Subject: [PATCH] PLT-6006: Purge the use of int-cast lib (#5344) Co-authored-by: Nikolaos Bezirgiannis --- cabal.project | 2 - plutus-core/plutus-core.cabal | 1 - .../plutus-core/src/GHC/Natural/Extras.hs | 22 ++++++-- .../src/PlutusCore/Default/Builtins.hs | 2 +- .../src/PlutusCore/Default/Universe.hs | 56 ++++++++++--------- .../plutus-core/src/PlutusCore/Flat.hs | 36 +++++++----- 6 files changed, 70 insertions(+), 49 deletions(-) diff --git a/cabal.project b/cabal.project index 51f2b3c95f5..ee3139d78e3 100644 --- a/cabal.project +++ b/cabal.project @@ -50,8 +50,6 @@ extra-packages: ieee, filemanip if impl(ghc >= 9.6) allow-newer: - -- unmaintained? would be good to do something here - , int-cast:base -- https://github.com/mokus0/th-extras/pull/20 , th-extras:template-haskell , th-extras:th-abstraction diff --git a/plutus-core/plutus-core.cabal b/plutus-core/plutus-core.cabal index bf164522820..b4fd619085d 100644 --- a/plutus-core/plutus-core.cabal +++ b/plutus-core/plutus-core.cabal @@ -278,7 +278,6 @@ library , hashable >=1.4 , hedgehog >=1.0 , index-envs - , int-cast , lens , megaparsec , mmorph diff --git a/plutus-core/plutus-core/src/GHC/Natural/Extras.hs b/plutus-core/plutus-core/src/GHC/Natural/Extras.hs index 5ba3c2c73fc..4c2ee26187c 100644 --- a/plutus-core/plutus-core/src/GHC/Natural/Extras.hs +++ b/plutus-core/plutus-core/src/GHC/Natural/Extras.hs @@ -3,12 +3,26 @@ module GHC.Natural.Extras (naturalToWord64Maybe, Natural (..)) where -import Data.IntCast (intCastEq) import Data.Word (Word64) import GHC.Natural --- Note this will only work on 64bit platforms, but that's all we build on --- so that's okay. +{- | We only support 64-bit architectures, see Note [Index (Word64) (de)serialized through Natural]. +This implementation is safe w.r.t. cross-compilation, because WORD_SIZE_IN_BITS will point +to the compiler's target word size when compiling this module. +-} {-# INLINABLE naturalToWord64Maybe #-} naturalToWord64Maybe :: Natural -> Maybe Word64 -naturalToWord64Maybe n = intCastEq <$> naturalToWordMaybe n +naturalToWord64Maybe n = +#if WORD_SIZE_IN_BITS == 64 + fromIntegral <$> naturalToWordMaybe n +#else + +{- HLint does not know about WORD_SIZE_IN_BITS, so it calls cpphs to preprocess away these lines; +cpphs fails on encountering the #error cpp directive (as it should). +HLint treats this cpphs failure as an hlint failure. Skip HLint then. +-} +#ifndef __HLINT__ +#error unsupported WORD_SIZE_IN_BITS. We only support 64-bit architectures. +#endif + +#endif diff --git a/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs b/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs index b1641d43707..5fe087ac3a6 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs @@ -1092,7 +1092,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where toBuiltinMeaning _ver IndexByteString = makeBuiltinMeaning (\xs n -> if n >= 0 && n < BS.length xs then EvaluationSuccess $ toInteger $ BS.index xs n else EvaluationFailure) - -- TODO: fix the mess above with `indexMaybe` from `bytestring >= 0.11.0.0`. + -- TODO: fix the mess above with `indexMaybe` from `ghc>=9.2,bytestring >= 0.11.0.0`. (runCostingFunTwoArguments . paramIndexByteString) toBuiltinMeaning _ver EqualsByteString = makeBuiltinMeaning diff --git a/plutus-core/plutus-core/src/PlutusCore/Default/Universe.hs b/plutus-core/plutus-core/src/PlutusCore/Default/Universe.hs index e7a770b62a8..2ae95e8620c 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Default/Universe.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Default/Universe.hs @@ -1,4 +1,3 @@ --- editorconfig-checker-disable-file -- | The universe used by default and its instances. {-# OPTIONS -fno-warn-missing-pattern-synonym-signatures #-} @@ -11,6 +10,7 @@ {-# OPTIONS -Wno-redundant-constraints #-} {-# LANGUAGE BlockArguments #-} +{-# LANGUAGE CPP #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} @@ -26,6 +26,7 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE UndecidableInstances #-} +#include "MachDeps.h" -- effectfully: to the best of my experimentation, -O2 here improves performance, however by -- inspecting GHC Core I was only able to see a difference in how the 'KnownTypeIn' instance for @@ -50,7 +51,6 @@ import Control.Applicative import Data.Bits (toIntegralSized) import Data.ByteString qualified as BS import Data.Int -import Data.IntCast (intCastEq) import Data.Proxy import Data.Text qualified as Text import Data.Word @@ -89,7 +89,7 @@ to juggle values of polymorphic built-in types instantiated with non-built-in ty (it's not even possible to represent such a value in the AST, even though it's possible to represent such a 'Type'). -Finally, it is not necessarily the case that we need to allow embedding PLC terms into meta-constants. +Finally, it is not necessary to allow embedding PLC terms into meta-constants. We already allow built-in functions with polymorphic types. There might be a way to utilize this feature and have meta-constructors as built-in functions. -} @@ -119,7 +119,7 @@ instance GEq DefaultUni where -- recursive definition and we want two instead. The reason why we want two is because this -- allows GHC to inline the initial step that appears non-recursive to GHC, because recursion -- is hidden in the other function that is marked as @NOINLINE@ and is chosen by GHC as a - -- loop-breaker, see https://wiki.haskell.org/Inlining_and_Specialisation#What_is_a_loop-breaker.3F + -- loop-breaker, see https://wiki.haskell.org/Inlining_and_Specialisation#What_is_a_loop-breaker -- (we're not really sure if this is a reliable solution, but if it stops working, we won't miss -- very much and we've failed to settle on any other approach). -- @@ -296,31 +296,28 @@ instance TestTypesFromTheUniverseAreAllKnown DefaultUni Technically our universe only contains 'Integer', but many of the builtin functions that we would like to use work over 'Int' and 'Word8'. -This is inconvenient and also error-prone: dealing with a function that takes an 'Int' or 'Word8' means carefully -downcasting the 'Integer', running the function, potentially upcasting at the end. And it's easy to get -wrong by e.g. blindly using 'fromInteger'. +This is inconvenient and also error-prone: dealing with a function that takes an 'Int' or 'Word8' +means carefully downcasting the 'Integer', running the function, potentially upcasting at the end. +And it's easy to get wrong by e.g. blindly using 'fromInteger'. -Moreover, there is a latent risk here: if we *were* to build on a 32-bit platform, then programs which -use arguments between @maxBound :: Int32@ and @maxBound :: Int64@ would behave differently! +Moreover, there is a latent risk here: if we *were* to build on a 32-bit architecture, then programs +which use arguments between @maxBound :: Int32@ and @maxBound :: Int64@ would behave differently! So, what to do? We adopt the following strategy: -- We allow lifting/unlifting 'Int64' via 'Integer', including a safe downcast in 'readKnown'. - We allow lifting/unlifting 'Word8' via 'Integer', including a safe downcast in 'readKnown'. -- We allow lifting/unlifting 'Int' via 'Int64', converting between them using 'intCastEq'. - -This has the effect of allowing the use of 'Int64' always, and 'Int' iff it is provably equal to -'Int64'. So we can use 'Int' conveniently, but only if it has predictable behaviour. - -(An alternative would be to just add 'Int', but add 'IntCastEq Int Int64' as an instance constraint. -That would also work, this way just seemed a little more explicit, and avoids adding constraints, -which can sometimes interfere with optimization and inling.) +- We allow lifting/unlifting 'Int64' via 'Integer', including a safe downcast in 'readKnown'. +- We allow lifting/unlifting 'Int' via 'Int64', constraining the conversion between them to +64-bit architectures where this conversion is safe. Doing this effectively bans builds on 32-bit systems, but that's fine, since we don't care about supporting 32-bit systems anyway, and this way any attempts to build on them will fail fast. -Note: we couldn't fail the bounds check with 'AsUnliftingError', because an out-of-bounds error is not an -internal one -- it's a normal evaluation failure, but unlifting errors have this connotation of -being "internal". +Note: We have another 64-bit limitation, this time not during script execution but during +script deserialization, for more see Note [Index (Word64) (de)serialized through Natural]. + +Note: we couldn't fail the bounds check with 'AsUnliftingError', because an out-of-bounds error +is not an internal one -- it's a normal evaluation failure, but unlifting errors +have this connotation of being "internal". -} instance KnownTypeAst DefaultUni Int64 where @@ -345,19 +342,26 @@ instance HasConstantIn DefaultUni term => ReadKnownIn DefaultUni term Int64 wher else throwing_ _EvaluationFailure {-# INLINE readKnown #-} +#if WORD_SIZE_IN_BITS == 64 +-- See Note [Integral types as Integer]. + instance KnownTypeAst DefaultUni Int where toTypeAst _ = toTypeAst $ Proxy @Integer --- See Note [Integral types as Integer]. instance HasConstantIn DefaultUni term => MakeKnownIn DefaultUni term Int where - -- This could safely just be toInteger, but this way is more explicit and it'll - -- turn into the same thing anyway. - makeKnown = makeKnown . intCastEq @Int @Int64 + -- Convert Int-to-Integer via Int64. We could go directly `toInteger`, but this way + -- is more explicit and it'll turn into the same thing anyway. + -- Although this conversion is safe regardless of the CPU arch (unlike the opposite conversion), + -- we constrain it to 64-bit for the sake of uniformity. + makeKnown = makeKnown . fromIntegral @Int @Int64 {-# INLINE makeKnown #-} instance HasConstantIn DefaultUni term => ReadKnownIn DefaultUni term Int where - readKnown term = intCastEq @Int64 @Int <$> readKnown term + -- Convert Integer-to-Int via Int64. This instance is safe only for 64-bit architecture + -- where Int===Int64 (i.e. no truncation happening). + readKnown term = fromIntegral @Int64 @Int <$> readKnown term {-# INLINE readKnown #-} +#endif instance KnownTypeAst DefaultUni Word8 where toTypeAst _ = toTypeAst $ Proxy @Integer diff --git a/plutus-core/plutus-core/src/PlutusCore/Flat.hs b/plutus-core/plutus-core/src/PlutusCore/Flat.hs index 6caf49432ae..bc98d973010 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Flat.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Flat.hs @@ -91,23 +91,29 @@ By default, Flat does not use any space to serialise `()`. {- Note [Index (Word64) (de)serialized through Natural] With the recent change of CEK to use DeBruijn instead of Name, -we decided to change Index to be a Word instead of Natural, for performance reasons. +we decided to change Index to be a Word64 instead of Natural, for performance reasons. However, to be absolutely sure that the script format *does not change* -for plutus language version 1, we are converting from/to Word64 and (de)-serialize *only through -Natural*, to keep the old v1 flat format the same. - -Natural and Word64 are flat-compatible up-to `maxBound :: Word64`. -However, the current blockchain might have already stored a plutus v1 script -containing a hugely-indexed variable `>maxBound::Word64` -- such a script must be failing -because such a huge index must be a free variable (given the current script-size constraints). - -When decoding such an already-stored (failing) script -the Natural deserializer makes the script fail at the scopechecking step (previously -undebruijnification step). Hypotheically using the Word64 deserializer, the script would *hopefully* -fail as well, although earlier at the deserialization step. Initial tests and looking at flat -internals make this likely, but until proven, we postpone the transition to Word64 deserializer for -version 2 language. +for plutus, we are converting from/to Word64 and (de)-serialize *only through +Natural*, to keep the flat format the same. + +Also, for sake of speed & simplicity we restrict the current deserialization to work +only on 64-bit architectures (serialization still works on 32-bit architecture because it +is not in the critical path). + +Note: We have another 64-bit limitation, this time not during script deserialization but during +script execution, for more see Note [Integral types as Integer]. + +Going a step further is to switch to *direct* Word64 (de)-serializization: +afterall, Natural and Word64 are flat-compatible up-to `maxBound :: Word64`. +A problem may arise in currently stored scripts on the chain containing +a variable with a huge debruijn index: `>maxBound::Word64`. Such a script +will definitely fail at phase-2 validation (more specifically, at scope-checking) +because of the current script-size constraints deeming the hugely-indexed variable a free-variable. +Changing to direct Word64 (de)-serialization will turn those existing stored +scripts to a phase-1 failure instead of current phase-2 failure. + +We postpone this transition to direct Word64 (de)-serializer for a future plutus version. -}