PLT-6006: Purge the use of int-cast lib (#5344)

Co-authored-by: Nikolaos Bezirgiannis <bezirg@users.noreply.github.com>
IntersectMBO · May 25, 2023 · 41f70cb · 41f70cb · github-actions · May 25, 2023
1 parent 6e3bdbb
commit 41f70cb
Show file tree

Hide file tree

Showing 6 changed files with 70 additions and 49 deletions.
diff --git a/cabal.project b/cabal.project
@@ -50,8 +50,6 @@ extra-packages: ieee, filemanip
 
 if impl(ghc >= 9.6)
   allow-newer:
-    -- unmaintained? would be good to do something here
-    , int-cast:base
     -- https://github.com/mokus0/th-extras/pull/20
     , th-extras:template-haskell
     , th-extras:th-abstraction

diff --git a/plutus-core/plutus-core.cabal b/plutus-core/plutus-core.cabal
@@ -278,7 +278,6 @@ library
     , hashable                    >=1.4
     , hedgehog                    >=1.0
     , index-envs
-    , int-cast
     , lens
     , megaparsec
     , mmorph

diff --git a/plutus-core/plutus-core/src/GHC/Natural/Extras.hs b/plutus-core/plutus-core/src/GHC/Natural/Extras.hs
@@ -3,12 +3,26 @@
 
 module GHC.Natural.Extras (naturalToWord64Maybe, Natural (..)) where
 
-import Data.IntCast (intCastEq)
 import Data.Word (Word64)
 import GHC.Natural
 
--- Note this will only work on 64bit platforms, but that's all we build on
--- so that's okay.
+{- | We only support 64-bit architectures, see Note [Index (Word64) (de)serialized through Natural].
+This implementation is safe w.r.t. cross-compilation, because WORD_SIZE_IN_BITS will point
+to the compiler's target word size when compiling this module.
+-}
 {-# INLINABLE naturalToWord64Maybe #-}
 naturalToWord64Maybe :: Natural -> Maybe Word64
-naturalToWord64Maybe n = intCastEq <$> naturalToWordMaybe n
+naturalToWord64Maybe n =
+#if WORD_SIZE_IN_BITS == 64
+    fromIntegral <$> naturalToWordMaybe n
+#else
+
+{- HLint does not know about WORD_SIZE_IN_BITS, so it calls cpphs to preprocess away these lines;
+cpphs fails on encountering the #error cpp directive (as it should).
+HLint treats this cpphs failure as an hlint failure. Skip HLint then.
+-}
+#ifndef __HLINT__
+#error unsupported WORD_SIZE_IN_BITS. We only support 64-bit architectures.
+#endif
+
+#endif
diff --git a/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs b/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs
@@ -1092,7 +1092,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where
     toBuiltinMeaning _ver IndexByteString =
         makeBuiltinMeaning
             (\xs n -> if n >= 0 && n < BS.length xs then EvaluationSuccess $ toInteger $ BS.index xs n else EvaluationFailure)
-            -- TODO: fix the mess above with `indexMaybe` from `bytestring >= 0.11.0.0`.
+            -- TODO: fix the mess above with `indexMaybe` from `ghc>=9.2,bytestring >= 0.11.0.0`.
             (runCostingFunTwoArguments . paramIndexByteString)
     toBuiltinMeaning _ver EqualsByteString =
         makeBuiltinMeaning

diff --git a/plutus-core/plutus-core/src/PlutusCore/Default/Universe.hs b/plutus-core/plutus-core/src/PlutusCore/Default/Universe.hs
@@ -1,4 +1,3 @@
--- editorconfig-checker-disable-file
 -- | The universe used by default and its instances.
 
 {-# OPTIONS -fno-warn-missing-pattern-synonym-signatures #-}
@@ -11,6 +10,7 @@
 {-# OPTIONS -Wno-redundant-constraints #-}
 
 {-# LANGUAGE BlockArguments        #-}
+{-# LANGUAGE CPP                   #-}
 {-# LANGUAGE ConstraintKinds       #-}
 {-# LANGUAGE FlexibleInstances     #-}
 {-# LANGUAGE GADTs                 #-}
@@ -26,6 +26,7 @@
 {-# LANGUAGE TypeFamilies          #-}
 {-# LANGUAGE TypeOperators         #-}
 {-# LANGUAGE UndecidableInstances  #-}
+#include "MachDeps.h"
 
 -- effectfully: to the best of my experimentation, -O2 here improves performance, however by
 -- inspecting GHC Core I was only able to see a difference in how the 'KnownTypeIn' instance for
@@ -50,7 +51,6 @@ import Control.Applicative
 import Data.Bits (toIntegralSized)
 import Data.ByteString qualified as BS
 import Data.Int
-import Data.IntCast (intCastEq)
 import Data.Proxy
 import Data.Text qualified as Text
 import Data.Word
@@ -89,7 +89,7 @@ to juggle values of polymorphic built-in types instantiated with non-built-in ty
 (it's not even possible to represent such a value in the AST, even though it's possible to represent
 such a 'Type').
 
-Finally, it is not necessarily the case that we need to allow embedding PLC terms into meta-constants.
+Finally, it is not necessary to allow embedding PLC terms into meta-constants.
 We already allow built-in functions with polymorphic types. There might be a way to utilize this
 feature and have meta-constructors as built-in functions.
 -}
@@ -119,7 +119,7 @@ instance GEq DefaultUni where
     -- recursive definition and we want two instead. The reason why we want two is because this
     -- allows GHC to inline the initial step that appears non-recursive to GHC, because recursion
     -- is hidden in the other function that is marked as @NOINLINE@ and is chosen by GHC as a
-    -- loop-breaker, see https://wiki.haskell.org/Inlining_and_Specialisation#What_is_a_loop-breaker.3F
+    -- loop-breaker, see https://wiki.haskell.org/Inlining_and_Specialisation#What_is_a_loop-breaker
     -- (we're not really sure if this is a reliable solution, but if it stops working, we won't miss
     -- very much and we've failed to settle on any other approach).
     --
@@ -296,31 +296,28 @@ instance TestTypesFromTheUniverseAreAllKnown DefaultUni
 Technically our universe only contains 'Integer', but many of the builtin functions that we would
 like to use work over 'Int' and 'Word8'.
 
-This is inconvenient and also error-prone: dealing with a function that takes an 'Int' or 'Word8' means carefully
-downcasting the 'Integer', running the function, potentially upcasting at the end. And it's easy to get
-wrong by e.g. blindly using 'fromInteger'.
+This is inconvenient and also error-prone: dealing with a function that takes an 'Int' or 'Word8'
+means carefully downcasting the 'Integer', running the function, potentially upcasting at the end.
+And it's easy to get wrong by e.g. blindly using 'fromInteger'.
 
-Moreover, there is a latent risk here: if we *were* to build on a 32-bit platform, then programs which
-use arguments between @maxBound :: Int32@ and @maxBound :: Int64@ would behave differently!
+Moreover, there is a latent risk here: if we *were* to build on a 32-bit architecture, then programs
+which use arguments between @maxBound :: Int32@ and @maxBound :: Int64@ would behave differently!
 
 So, what to do? We adopt the following strategy:
-- We allow lifting/unlifting 'Int64' via 'Integer', including a safe downcast in 'readKnown'.
 - We allow lifting/unlifting 'Word8' via 'Integer', including a safe downcast in 'readKnown'.
-- We allow lifting/unlifting 'Int' via 'Int64', converting between them using 'intCastEq'.
-
-This has the effect of allowing the use of 'Int64' always, and 'Int' iff it is provably equal to
-'Int64'. So we can use 'Int' conveniently, but only if it has predictable behaviour.
-
-(An alternative would be to just add 'Int', but add 'IntCastEq Int Int64' as an instance constraint.
-That would also work, this way just seemed a little more explicit, and avoids adding constraints,
-which can sometimes interfere with optimization and inling.)
+- We allow lifting/unlifting 'Int64' via 'Integer', including a safe downcast in 'readKnown'.
+- We allow lifting/unlifting 'Int' via 'Int64', constraining the conversion between them to
+64-bit architectures where this conversion is safe.
 
 Doing this effectively bans builds on 32-bit systems, but that's fine, since we don't care about
 supporting 32-bit systems anyway, and this way any attempts to build on them will fail fast.
 
-Note: we couldn't fail the bounds check with 'AsUnliftingError', because an out-of-bounds error is not an
-internal one -- it's a normal evaluation failure, but unlifting errors have this connotation of
-being "internal".
+Note: We have another 64-bit limitation, this time not during script execution but during
+script deserialization, for more see Note [Index (Word64) (de)serialized through Natural].
+
+Note: we couldn't fail the bounds check with 'AsUnliftingError', because an out-of-bounds error
+is not an internal one -- it's a normal evaluation failure, but unlifting errors
+have this connotation of being "internal".
 -}
 
 instance KnownTypeAst DefaultUni Int64 where
@@ -345,19 +342,26 @@ instance HasConstantIn DefaultUni term => ReadKnownIn DefaultUni term Int64 wher
                 else throwing_ _EvaluationFailure
     {-# INLINE readKnown #-}
 
+#if WORD_SIZE_IN_BITS == 64
+-- See Note [Integral types as Integer].
+
 instance KnownTypeAst DefaultUni Int where
     toTypeAst _ = toTypeAst $ Proxy @Integer
 
--- See Note [Integral types as Integer].
 instance HasConstantIn DefaultUni term => MakeKnownIn DefaultUni term Int where
-    -- This could safely just be toInteger, but this way is more explicit and it'll
-    -- turn into the same thing anyway.
-    makeKnown = makeKnown . intCastEq @Int @Int64
+    -- Convert Int-to-Integer via Int64.  We could go directly `toInteger`, but this way
+    -- is more explicit and it'll turn into the same thing anyway.
+    -- Although this conversion is safe regardless of the CPU arch (unlike the opposite conversion),
+    -- we constrain it to 64-bit for the sake of uniformity.
+    makeKnown = makeKnown . fromIntegral @Int @Int64
     {-# INLINE makeKnown #-}
 
 instance HasConstantIn DefaultUni term => ReadKnownIn DefaultUni term Int where
-    readKnown term = intCastEq @Int64 @Int <$> readKnown term
+    -- Convert Integer-to-Int via Int64. This instance is safe only for 64-bit architecture
+    -- where Int===Int64 (i.e. no truncation happening).
+    readKnown term = fromIntegral @Int64 @Int <$> readKnown term
     {-# INLINE readKnown #-}
+#endif
 
 instance KnownTypeAst DefaultUni Word8 where
     toTypeAst _ = toTypeAst $ Proxy @Integer

diff --git a/plutus-core/plutus-core/src/PlutusCore/Flat.hs b/plutus-core/plutus-core/src/PlutusCore/Flat.hs
@@ -91,23 +91,29 @@ By default, Flat does not use any space to serialise `()`.
 {- Note [Index (Word64) (de)serialized through Natural]
 
 With the recent change of CEK to use DeBruijn instead of Name,
-we decided to change Index to be a Word instead of Natural, for performance reasons.
+we decided to change Index to be a Word64 instead of Natural, for performance reasons.
 
 However, to be absolutely sure that the script format *does not change*
-for plutus language version 1, we are converting from/to Word64 and (de)-serialize *only through
-Natural*, to keep the old v1 flat format the same.
-
-Natural and Word64 are flat-compatible up-to `maxBound :: Word64`.
-However, the current blockchain might have already stored a plutus v1 script
-containing a hugely-indexed variable `>maxBound::Word64` -- such a script must be failing
-because such a huge index must be a free variable (given the current script-size constraints).
-
-When decoding such an already-stored (failing) script
-the Natural deserializer makes the script fail at the scopechecking step (previously
-undebruijnification step). Hypotheically using the Word64 deserializer, the script would *hopefully*
-fail as well, although earlier at the deserialization step. Initial tests and looking at flat
-internals make this likely, but until proven, we postpone the transition to Word64 deserializer for
-version 2 language.
+for plutus, we are converting from/to Word64 and (de)-serialize *only through
+Natural*, to keep the flat format the same.
+
+Also, for sake of speed & simplicity we restrict the current deserialization to work
+only on 64-bit architectures (serialization still works on 32-bit architecture because it
+is not in the critical path).
+
+Note: We have another 64-bit limitation, this time not during script deserialization but during
+script execution, for more see Note [Integral types as Integer].
+
+Going a step further is to switch to *direct* Word64 (de)-serializization:
+afterall, Natural and Word64 are flat-compatible up-to `maxBound :: Word64`.
+A problem may arise in currently stored scripts on the chain containing
+a variable with a huge debruijn index: `>maxBound::Word64`. Such a script
+will definitely fail at phase-2 validation (more specifically, at scope-checking)
+because of the current script-size constraints deeming the hugely-indexed variable a free-variable.
+Changing to direct Word64 (de)-serialization will turn those existing stored
+scripts to a phase-1 failure instead of current phase-2 failure.
+
+We postpone this transition to direct Word64 (de)-serializer for a future plutus version.
 -}
Benchmark suite	Current: `41f70cb`	Previous: `6e3bdbb`	Ratio
`validation-decode-future-settle-early-2`	`342.3` μs	`321.7` μs	`1.06`
`validation-decode-future-settle-early-3`	`345.3` μs	`322.2` μs	`1.07`
`validation-decode-future-settle-early-4`	`853.3` μs	`756.3` μs	`1.13`
`validation-decode-game-sm-success_1-1`	`608.6` μs	`546.4` μs	`1.11`
`validation-decode-game-sm-success_1-2`	`170.4` μs	`159.8` μs	`1.07`
`validation-decode-game-sm-success_1-3`	`617.1` μs	`548.1` μs	`1.13`
`validation-decode-game-sm-success_1-4`	`171.1` μs	`158.8` μs	`1.08`
`validation-decode-game-sm-success_2-1`	`614.8` μs	`546.4` μs	`1.13`
`validation-decode-game-sm-success_2-2`	`172.4` μs	`159.2` μs	`1.08`
`validation-decode-game-sm-success_2-3`	`608` μs	`546.7` μs	`1.11`
`validation-decode-game-sm-success_2-4`	`172.2` μs	`159.2` μs	`1.08`
`validation-decode-game-sm-success_2-5`	`609.3` μs	`545` μs	`1.12`
`validation-decode-game-sm-success_2-6`	`171` μs	`159` μs	`1.08`
`validation-decode-multisig-sm-1`	`695` μs	`616.6` μs	`1.13`
`validation-decode-multisig-sm-2`	`711.2` μs	`615.8` μs	`1.15`
`validation-decode-multisig-sm-3`	`706.4` μs	`619.5` μs	`1.14`
`validation-decode-multisig-sm-4`	`694.3` μs	`618` μs	`1.12`
`validation-decode-multisig-sm-5`	`684.7` μs	`618.2` μs	`1.11`
`validation-decode-multisig-sm-6`	`688.7` μs	`620.9` μs	`1.11`
`validation-decode-multisig-sm-7`	`680.3` μs	`622.1` μs	`1.09`
`validation-decode-multisig-sm-8`	`690.8` μs	`630.2` μs	`1.10`
`validation-decode-multisig-sm-9`	`687.7` μs	`629.8` μs	`1.09`
`validation-decode-multisig-sm-10`	`688.5` μs	`632.3` μs	`1.09`
`validation-decode-ping-pong-1`	`562` μs	`524.3` μs	`1.07`
`validation-decode-ping-pong-2`	`559.5` μs	`521.4` μs	`1.07`
`validation-decode-ping-pong_2-1`	`568.7` μs	`521.4` μs	`1.09`
`validation-decode-prism-1`	`164.6` μs	`155.8` μs	`1.06`
`validation-decode-prism-2`	`603.8` μs	`542.2` μs	`1.11`
`validation-decode-prism-3`	`247.6` μs	`232.9` μs	`1.06`
`validation-decode-pubkey-1`	`167.9` μs	`158.5` μs	`1.06`
`validation-decode-stablecoin_1-1`	`1424` μs	`1104` μs	`1.29`
`validation-decode-stablecoin_1-2`	`171` μs	`160.8` μs	`1.06`
`validation-decode-stablecoin_1-3`	`1357` μs	`1103` μs	`1.23`
`validation-decode-stablecoin_1-4`	`170.4` μs	`161.4` μs	`1.06`
`validation-decode-stablecoin_1-5`	`1369` μs	`1110` μs	`1.23`
`validation-decode-stablecoin_1-6`	`170.6` μs	`161.3` μs	`1.06`
`validation-decode-stablecoin_2-1`	`1417` μs	`1111` μs	`1.28`
`validation-decode-stablecoin_2-2`	`171.6` μs	`162.3` μs	`1.06`
`validation-decode-stablecoin_2-3`	`1316` μs	`1114` μs	`1.18`
`validation-decode-stablecoin_2-4`	`172` μs	`160.1` μs	`1.07`
`validation-decode-token-account-1`	`248.4` μs	`230.3` μs	`1.08`
`validation-decode-token-account-2`	`219.9` μs	`209.4` μs	`1.05`
`validation-decode-uniswap-3`	`922.8` μs	`810.2` μs	`1.14`
`validation-decode-uniswap-4`	`181.5` μs	`172.8` μs	`1.05`
`validation-decode-uniswap-5`	`938.8` μs	`804.3` μs	`1.17`
`validation-decode-uniswap-6`	`187.7` μs	`172.4` μs	`1.09`
`validation-decode-vesting-1`	`357.7` μs	`325` μs	`1.10`
`nofib-clausify/formula1`	`9515` μs	`8904` μs	`1.07`
`nofib-clausify/formula2`	`12520` μs	`11680` μs	`1.07`
`nofib-clausify/formula3`	`34850` μs	`31930` μs	`1.09`
`nofib-clausify/formula4`	`56070` μs	`52300` μs	`1.07`
`nofib-clausify/formula5`	`206000` μs	`194800` μs	`1.06`
`nofib-knights/4x4`	`49060` μs	`45740` μs	`1.07`
`nofib-knights/6x6`	`131600` μs	`122500` μs	`1.07`
`nofib-knights/8x8`	`220600` μs	`201600` μs	`1.09`
`nofib-primetest/05digits`	`27260` μs	`25500` μs	`1.07`
`nofib-primetest/08digits`	`50610` μs	`46680` μs	`1.08`
`nofib-primetest/10digits`	`70570` μs	`65480.00000000001` μs	`1.08`
`nofib-primetest/20digits`	`148100` μs	`137200` μs	`1.08`
`nofib-primetest/30digits`	`220500` μs	`203500` μs	`1.08`
`nofib-primetest/40digits`	`307000` μs	`280900` μs	`1.09`
`nofib-primetest/50digits`	`295700` μs	`266900` μs	`1.11`
`nofib-queens4x4/bt`	`9693` μs	`7982` μs	`1.21`
`nofib-queens4x4/bm`	`13470` μs	`11110` μs	`1.21`
`nofib-queens4x4/bjbt1`	`11690` μs	`9799` μs	`1.19`
`nofib-queens4x4/bjbt2`	`12110` μs	`10070` μs	`1.20`
`nofib-queens4x4/fc`	`26170` μs	`21710` μs	`1.21`
`nofib-queens5x5/bt`	`120500` μs	`104600` μs	`1.15`
`nofib-queens5x5/bm`	`151900` μs	`126200` μs	`1.20`
`nofib-queens5x5/bjbt1`	`148300` μs	`123700` μs	`1.20`
`nofib-queens5x5/bjbt2`	`148700` μs	`126900` μs	`1.17`
`nofib-queens5x5/fc`	`307700` μs	`275500` μs	`1.12`