From 41f70cb3251e47cb91749081c92dc2e6db87b85c Mon Sep 17 00:00:00 2001
From: Nikolaos Bezirgiannis <329939+bezirg@users.noreply.github.com>
Date: Thu, 25 May 2023 15:02:36 +0200
Subject: [PATCH] PLT-6006: Purge the use of int-cast lib (#5344)

Co-authored-by: Nikolaos Bezirgiannis <bezirg@users.noreply.github.com>
---
 cabal.project                                 |  2 -
 plutus-core/plutus-core.cabal                 |  1 -
 .../plutus-core/src/GHC/Natural/Extras.hs     | 22 ++++++--
 .../src/PlutusCore/Default/Builtins.hs        |  2 +-
 .../src/PlutusCore/Default/Universe.hs        | 56 ++++++++++---------
 .../plutus-core/src/PlutusCore/Flat.hs        | 36 +++++++-----
 6 files changed, 70 insertions(+), 49 deletions(-)

diff --git a/cabal.project b/cabal.project
index 51f2b3c95f5..ee3139d78e3 100644
--- a/cabal.project
+++ b/cabal.project
@@ -50,8 +50,6 @@ extra-packages: ieee, filemanip
 
 if impl(ghc >= 9.6)
   allow-newer:
-    -- unmaintained? would be good to do something here
-    , int-cast:base
     -- https://github.com/mokus0/th-extras/pull/20
     , th-extras:template-haskell
     , th-extras:th-abstraction
diff --git a/plutus-core/plutus-core.cabal b/plutus-core/plutus-core.cabal
index bf164522820..b4fd619085d 100644
--- a/plutus-core/plutus-core.cabal
+++ b/plutus-core/plutus-core.cabal
@@ -278,7 +278,6 @@ library
     , hashable                    >=1.4
     , hedgehog                    >=1.0
     , index-envs
-    , int-cast
     , lens
     , megaparsec
     , mmorph
diff --git a/plutus-core/plutus-core/src/GHC/Natural/Extras.hs b/plutus-core/plutus-core/src/GHC/Natural/Extras.hs
index 5ba3c2c73fc..4c2ee26187c 100644
--- a/plutus-core/plutus-core/src/GHC/Natural/Extras.hs
+++ b/plutus-core/plutus-core/src/GHC/Natural/Extras.hs
@@ -3,12 +3,26 @@
 
 module GHC.Natural.Extras (naturalToWord64Maybe, Natural (..)) where
 
-import Data.IntCast (intCastEq)
 import Data.Word (Word64)
 import GHC.Natural
 
--- Note this will only work on 64bit platforms, but that's all we build on
--- so that's okay.
+{- | We only support 64-bit architectures, see Note [Index (Word64) (de)serialized through Natural].
+This implementation is safe w.r.t. cross-compilation, because WORD_SIZE_IN_BITS will point
+to the compiler's target word size when compiling this module.
+-}
 {-# INLINABLE naturalToWord64Maybe #-}
 naturalToWord64Maybe :: Natural -> Maybe Word64
-naturalToWord64Maybe n = intCastEq <$> naturalToWordMaybe n
+naturalToWord64Maybe n =
+#if WORD_SIZE_IN_BITS == 64
+    fromIntegral <$> naturalToWordMaybe n
+#else
+
+{- HLint does not know about WORD_SIZE_IN_BITS, so it calls cpphs to preprocess away these lines;
+cpphs fails on encountering the #error cpp directive (as it should).
+HLint treats this cpphs failure as an hlint failure. Skip HLint then.
+-}
+#ifndef __HLINT__
+#error unsupported WORD_SIZE_IN_BITS. We only support 64-bit architectures.
+#endif
+
+#endif
diff --git a/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs b/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs
index b1641d43707..5fe087ac3a6 100644
--- a/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs
+++ b/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs
@@ -1092,7 +1092,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where
     toBuiltinMeaning _ver IndexByteString =
         makeBuiltinMeaning
             (\xs n -> if n >= 0 && n < BS.length xs then EvaluationSuccess $ toInteger $ BS.index xs n else EvaluationFailure)
-            -- TODO: fix the mess above with `indexMaybe` from `bytestring >= 0.11.0.0`.
+            -- TODO: fix the mess above with `indexMaybe` from `ghc>=9.2,bytestring >= 0.11.0.0`.
             (runCostingFunTwoArguments . paramIndexByteString)
     toBuiltinMeaning _ver EqualsByteString =
         makeBuiltinMeaning
diff --git a/plutus-core/plutus-core/src/PlutusCore/Default/Universe.hs b/plutus-core/plutus-core/src/PlutusCore/Default/Universe.hs
index e7a770b62a8..2ae95e8620c 100644
--- a/plutus-core/plutus-core/src/PlutusCore/Default/Universe.hs
+++ b/plutus-core/plutus-core/src/PlutusCore/Default/Universe.hs
@@ -1,4 +1,3 @@
--- editorconfig-checker-disable-file
 -- | The universe used by default and its instances.
 
 {-# OPTIONS -fno-warn-missing-pattern-synonym-signatures #-}
@@ -11,6 +10,7 @@
 {-# OPTIONS -Wno-redundant-constraints #-}
 
 {-# LANGUAGE BlockArguments        #-}
+{-# LANGUAGE CPP                   #-}
 {-# LANGUAGE ConstraintKinds       #-}
 {-# LANGUAGE FlexibleInstances     #-}
 {-# LANGUAGE GADTs                 #-}
@@ -26,6 +26,7 @@
 {-# LANGUAGE TypeFamilies          #-}
 {-# LANGUAGE TypeOperators         #-}
 {-# LANGUAGE UndecidableInstances  #-}
+#include "MachDeps.h"
 
 -- effectfully: to the best of my experimentation, -O2 here improves performance, however by
 -- inspecting GHC Core I was only able to see a difference in how the 'KnownTypeIn' instance for
@@ -50,7 +51,6 @@ import Control.Applicative
 import Data.Bits (toIntegralSized)
 import Data.ByteString qualified as BS
 import Data.Int
-import Data.IntCast (intCastEq)
 import Data.Proxy
 import Data.Text qualified as Text
 import Data.Word
@@ -89,7 +89,7 @@ to juggle values of polymorphic built-in types instantiated with non-built-in ty
 (it's not even possible to represent such a value in the AST, even though it's possible to represent
 such a 'Type').
 
-Finally, it is not necessarily the case that we need to allow embedding PLC terms into meta-constants.
+Finally, it is not necessary to allow embedding PLC terms into meta-constants.
 We already allow built-in functions with polymorphic types. There might be a way to utilize this
 feature and have meta-constructors as built-in functions.
 -}
@@ -119,7 +119,7 @@ instance GEq DefaultUni where
     -- recursive definition and we want two instead. The reason why we want two is because this
     -- allows GHC to inline the initial step that appears non-recursive to GHC, because recursion
     -- is hidden in the other function that is marked as @NOINLINE@ and is chosen by GHC as a
-    -- loop-breaker, see https://wiki.haskell.org/Inlining_and_Specialisation#What_is_a_loop-breaker.3F
+    -- loop-breaker, see https://wiki.haskell.org/Inlining_and_Specialisation#What_is_a_loop-breaker
     -- (we're not really sure if this is a reliable solution, but if it stops working, we won't miss
     -- very much and we've failed to settle on any other approach).
     --
@@ -296,31 +296,28 @@ instance TestTypesFromTheUniverseAreAllKnown DefaultUni
 Technically our universe only contains 'Integer', but many of the builtin functions that we would
 like to use work over 'Int' and 'Word8'.
 
-This is inconvenient and also error-prone: dealing with a function that takes an 'Int' or 'Word8' means carefully
-downcasting the 'Integer', running the function, potentially upcasting at the end. And it's easy to get
-wrong by e.g. blindly using 'fromInteger'.
+This is inconvenient and also error-prone: dealing with a function that takes an 'Int' or 'Word8'
+means carefully downcasting the 'Integer', running the function, potentially upcasting at the end.
+And it's easy to get wrong by e.g. blindly using 'fromInteger'.
 
-Moreover, there is a latent risk here: if we *were* to build on a 32-bit platform, then programs which
-use arguments between @maxBound :: Int32@ and @maxBound :: Int64@ would behave differently!
+Moreover, there is a latent risk here: if we *were* to build on a 32-bit architecture, then programs
+which use arguments between @maxBound :: Int32@ and @maxBound :: Int64@ would behave differently!
 
 So, what to do? We adopt the following strategy:
-- We allow lifting/unlifting 'Int64' via 'Integer', including a safe downcast in 'readKnown'.
 - We allow lifting/unlifting 'Word8' via 'Integer', including a safe downcast in 'readKnown'.
-- We allow lifting/unlifting 'Int' via 'Int64', converting between them using 'intCastEq'.
-
-This has the effect of allowing the use of 'Int64' always, and 'Int' iff it is provably equal to
-'Int64'. So we can use 'Int' conveniently, but only if it has predictable behaviour.
-
-(An alternative would be to just add 'Int', but add 'IntCastEq Int Int64' as an instance constraint.
-That would also work, this way just seemed a little more explicit, and avoids adding constraints,
-which can sometimes interfere with optimization and inling.)
+- We allow lifting/unlifting 'Int64' via 'Integer', including a safe downcast in 'readKnown'.
+- We allow lifting/unlifting 'Int' via 'Int64', constraining the conversion between them to
+64-bit architectures where this conversion is safe.
 
 Doing this effectively bans builds on 32-bit systems, but that's fine, since we don't care about
 supporting 32-bit systems anyway, and this way any attempts to build on them will fail fast.
 
-Note: we couldn't fail the bounds check with 'AsUnliftingError', because an out-of-bounds error is not an
-internal one -- it's a normal evaluation failure, but unlifting errors have this connotation of
-being "internal".
+Note: We have another 64-bit limitation, this time not during script execution but during
+script deserialization, for more see Note [Index (Word64) (de)serialized through Natural].
+
+Note: we couldn't fail the bounds check with 'AsUnliftingError', because an out-of-bounds error
+is not an internal one -- it's a normal evaluation failure, but unlifting errors
+have this connotation of being "internal".
 -}
 
 instance KnownTypeAst DefaultUni Int64 where
@@ -345,19 +342,26 @@ instance HasConstantIn DefaultUni term => ReadKnownIn DefaultUni term Int64 wher
                 else throwing_ _EvaluationFailure
     {-# INLINE readKnown #-}
 
+#if WORD_SIZE_IN_BITS == 64
+-- See Note [Integral types as Integer].
+
 instance KnownTypeAst DefaultUni Int where
     toTypeAst _ = toTypeAst $ Proxy @Integer
 
--- See Note [Integral types as Integer].
 instance HasConstantIn DefaultUni term => MakeKnownIn DefaultUni term Int where
-    -- This could safely just be toInteger, but this way is more explicit and it'll
-    -- turn into the same thing anyway.
-    makeKnown = makeKnown . intCastEq @Int @Int64
+    -- Convert Int-to-Integer via Int64.  We could go directly `toInteger`, but this way
+    -- is more explicit and it'll turn into the same thing anyway.
+    -- Although this conversion is safe regardless of the CPU arch (unlike the opposite conversion),
+    -- we constrain it to 64-bit for the sake of uniformity.
+    makeKnown = makeKnown . fromIntegral @Int @Int64
     {-# INLINE makeKnown #-}
 
 instance HasConstantIn DefaultUni term => ReadKnownIn DefaultUni term Int where
-    readKnown term = intCastEq @Int64 @Int <$> readKnown term
+    -- Convert Integer-to-Int via Int64. This instance is safe only for 64-bit architecture
+    -- where Int===Int64 (i.e. no truncation happening).
+    readKnown term = fromIntegral @Int64 @Int <$> readKnown term
     {-# INLINE readKnown #-}
+#endif
 
 instance KnownTypeAst DefaultUni Word8 where
     toTypeAst _ = toTypeAst $ Proxy @Integer
diff --git a/plutus-core/plutus-core/src/PlutusCore/Flat.hs b/plutus-core/plutus-core/src/PlutusCore/Flat.hs
index 6caf49432ae..bc98d973010 100644
--- a/plutus-core/plutus-core/src/PlutusCore/Flat.hs
+++ b/plutus-core/plutus-core/src/PlutusCore/Flat.hs
@@ -91,23 +91,29 @@ By default, Flat does not use any space to serialise `()`.
 {- Note [Index (Word64) (de)serialized through Natural]
 
 With the recent change of CEK to use DeBruijn instead of Name,
-we decided to change Index to be a Word instead of Natural, for performance reasons.
+we decided to change Index to be a Word64 instead of Natural, for performance reasons.
 
 However, to be absolutely sure that the script format *does not change*
-for plutus language version 1, we are converting from/to Word64 and (de)-serialize *only through
-Natural*, to keep the old v1 flat format the same.
-
-Natural and Word64 are flat-compatible up-to `maxBound :: Word64`.
-However, the current blockchain might have already stored a plutus v1 script
-containing a hugely-indexed variable `>maxBound::Word64` -- such a script must be failing
-because such a huge index must be a free variable (given the current script-size constraints).
-
-When decoding such an already-stored (failing) script
-the Natural deserializer makes the script fail at the scopechecking step (previously
-undebruijnification step). Hypotheically using the Word64 deserializer, the script would *hopefully*
-fail as well, although earlier at the deserialization step. Initial tests and looking at flat
-internals make this likely, but until proven, we postpone the transition to Word64 deserializer for
-version 2 language.
+for plutus, we are converting from/to Word64 and (de)-serialize *only through
+Natural*, to keep the flat format the same.
+
+Also, for sake of speed & simplicity we restrict the current deserialization to work
+only on 64-bit architectures (serialization still works on 32-bit architecture because it
+is not in the critical path).
+
+Note: We have another 64-bit limitation, this time not during script deserialization but during
+script execution, for more see Note [Integral types as Integer].
+
+Going a step further is to switch to *direct* Word64 (de)-serializization:
+afterall, Natural and Word64 are flat-compatible up-to `maxBound :: Word64`.
+A problem may arise in currently stored scripts on the chain containing
+a variable with a huge debruijn index: `>maxBound::Word64`. Such a script
+will definitely fail at phase-2 validation (more specifically, at scope-checking)
+because of the current script-size constraints deeming the hugely-indexed variable a free-variable.
+Changing to direct Word64 (de)-serialization will turn those existing stored
+scripts to a phase-1 failure instead of current phase-2 failure.
+
+We postpone this transition to direct Word64 (de)-serializer for a future plutus version.
 -}