From bd78ed18fc611992ba1dcdc81eb12d5cc3e88009 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Mon, 14 Jun 2021 06:04:44 -0600
Subject: [PATCH] utf8.h: Add symbol for easing EBCDIC handling

This is then used in regcomp.c to avoid an #ifdef EBCDIC
---
 regcomp.c   | 11 ++++++-----
 utf8.h      |  5 +++++
 utfebcdic.h |  1 +
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index e7e83b737b21..b89f701cc273 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -19815,11 +19815,12 @@ S_optimize_regclass(pTHX_
          * invariant bytes, because they have the same bit patterns under UTF-8
          * as not. */
         PERL_UINT_FAST8_T inverted = 0;
-#ifdef EBCDIC
-        const PERL_UINT_FAST8_T max_permissible = 0xFF;
-#else
-        const PERL_UINT_FAST8_T max_permissible = 0x7F;
-#endif
+
+        /* Highest possible UTF-8 invariant is 7F on ASCII platforms; FF on
+         * EBCDIC */
+        const PERL_UINT_FAST8_T max_permissible
+                                    = nBIT_UMAX(7 + ONE_IF_EBCDIC_ZERO_IF_NOT);
+
         /* If doesn't fit the criteria for ANYOFM, invert and try again.  If
          * that works we will instead later generate an NANYOFM, and invert
          * back when through */
diff --git a/utf8.h b/utf8.h
index ce7f57fcceb7..fccc95d33135 100644
--- a/utf8.h
+++ b/utf8.h
@@ -265,6 +265,11 @@ are in the character. */
 #define isUTF8_POSSIBLY_PROBLEMATIC(c) (__ASSERT_(FITS_IN_8_BITS(c))        \
                                         (U8) c >= 0xED)
 
+/* It turns out that in a number of cases, that handling ASCII vs EBCDIC is a
+ * matter of being off-by-one.  So this is a convenience macro, used to avoid
+ * some #ifdefs. */
+#  define ONE_IF_EBCDIC_ZERO_IF_NOT  0
+
 #define UNICODE_IS_PERL_EXTENDED(uv)    UNLIKELY((UV) (uv) > 0x7FFFFFFF)
 
 #endif /* EBCDIC vs ASCII */
diff --git a/utfebcdic.h b/utfebcdic.h
index f6a54ab5d6d9..d016641cbf82 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -228,6 +228,7 @@ explicitly forbidden, and the shortest possible encoding should always be used
 
 #define UNICODE_IS_PERL_EXTENDED(uv)    UNLIKELY((UV) (uv) > 0x3FFFFFFF)
 
+#define ONE_IF_EBCDIC_ZERO_IF_NOT  1
 
 /*
  * ex: set ts=8 sts=4 sw=4 et: