Merge f0f7745 into 61d18b6

Perl · May 4, 2021 · a5a6f14 · a5a6f14
2 parents 61d18b6 + f0f7745
commit a5a6f14
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 21 deletions.
diff --git a/handy.h b/handy.h
@@ -1384,6 +1384,18 @@ or casts
 #   define WIDEST_UTYPE U32
 #endif
 
+/* Where there could be some confusion, use this as a static assert in macros
+ * to make sure that a parameter isn't a pointer.  But some compilers can't
+ * handle this.  The only one known so far that doesn't is gcc 3.3.6; the check
+ * below isn't thorough for such an old compiler, so may have to be revised if
+ * experience so dictates. */
+#if  ! defined(__GNUC__)                                                    \
+ || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3))
+#  define ASSERT_NOT_PTR(x) ((x) | 0)
+#else
+#  define ASSERT_NOT_PTR(x) (x)
+#endif
+
 /* FITS_IN_8_BITS(c) returns true if c doesn't have  a bit set other than in
  * the lower 8.  It is designed to be hopefully bomb-proof, making sure that no
  * bits of information are lost even on a 64-bit machine, but to get the
@@ -1396,10 +1408,8 @@ or casts
  * of operands.  Well, they are, but that is kind of the point.
  */
 #ifndef __COVERITY__
-  /* The '| 0' part ensures a compiler error if c is not integer (like e.g., a
-   * pointer) */
-#define FITS_IN_8_BITS(c) (   (sizeof(c) == 1)                      \
-                           || !(((WIDEST_UTYPE)((c) | 0)) & ~0xFF))
+#define FITS_IN_8_BITS(c) (      (sizeof(c) == 1)                           \
+                          || ! (((WIDEST_UTYPE) ASSERT_NOT_PTR(c)) & ~0xFF))
 #else
 #define FITS_IN_8_BITS(c) (1)
 #endif
@@ -1419,7 +1429,8 @@ or casts
  * asserts itself, once.  The reason that this is necessary is that the
  * duplicate asserts were exceeding the internal limits of some compilers */
 #define withinCOUNT_KNOWN_VALID_(c, l, n)                                   \
-    (((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0))))
+    ((((WIDEST_UTYPE) (c)) - ASSERT_NOT_PTR(l))                             \
+                                   <= ((WIDEST_UTYPE) ASSERT_NOT_PTR(n)))
 
 /* Returns true if c is in the range l..u, where 'l' is non-negative
  * Written this way so that after optimization, only one conditional test is
@@ -1453,17 +1464,14 @@ or casts
      * unsigned type.  khw supposes that it could be written as
      *      && ((c) == '\0' || (c) > 0)
      * to avoid the message, but the cast will likely avoid extra branches even
-     * with stupid compilers.
-     *
-     * The '| 0' part ensures a compiler error if c is not integer (like e.g.,
-     * a pointer) */
-#   define isASCII(c)    ((WIDEST_UTYPE)((c) | 0) < 128)
+     * with stupid compilers. */
+#   define isASCII(c)    (((WIDEST_UTYPE) ASSERT_NOT_PTR(c)) < 128)
 #endif
 
 /* Take the eight possible bit patterns of the lower 3 bits and you get the
  * lower 3 bits of the 8 octal digits, in both ASCII and EBCDIC, so those bits
  * can be ignored.  If the rest match '0', we have an octal */
-#define isOCTAL_A(c)  (((WIDEST_UTYPE)((c) | 0) & ~7) == '0')
+#define isOCTAL_A(c)  ((((WIDEST_UTYPE) ASSERT_NOT_PTR(c)) & ~7) == '0')
 
 #ifdef H_PERL       /* If have access to perl.h, lookup in its table */
 

diff --git a/utf8.h b/utf8.h
@@ -178,20 +178,24 @@ adding no time nor space requirements to the implementation.
 =cut
 */
 
-#define NATIVE_TO_LATIN1(ch)     (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
-#define LATIN1_TO_NATIVE(ch)     (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
+#define NATIVE_TO_LATIN1(ch)                                                \
+                (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
+#define LATIN1_TO_NATIVE(ch)                                                \
+                (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
 
 /* I8 is an intermediate version of UTF-8 used only in UTF-EBCDIC.  We thus
  * consider it to be identical to UTF-8 on ASCII platforms.  Strictly speaking
  * UTF-8 and UTF-EBCDIC are two different things, but we often conflate them
  * because they are 8-bit encodings that serve the same purpose in Perl, and
  * rarely do we need to distinguish them.  The term "NATIVE_UTF8" applies to
  * whichever one is applicable on the current platform */
-#define NATIVE_UTF8_TO_I8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
-#define I8_TO_NATIVE_UTF8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
+#define NATIVE_UTF8_TO_I8(ch)                                               \
+                (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
+#define I8_TO_NATIVE_UTF8(ch)                                               \
+                (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
 
-#define UNI_TO_NATIVE(ch)        ((UV) ((ch) | 0))
-#define NATIVE_TO_UNI(ch)        ((UV) ((ch) | 0))
+#define UNI_TO_NATIVE(ch)        ((UV) ASSERT_NOT_PTR(ch))
+#define NATIVE_TO_UNI(ch)        ((UV) ASSERT_NOT_PTR(ch))
 
 /*
 
@@ -615,10 +619,8 @@ The reason it works on both UTF-8 encoded strings and non-UTF-8 encoded, is
 that it returns TRUE in each for the exact same set of bit patterns.  It is
 valid on a subset of what UVCHR_IS_INVARIANT is valid on, so can just use that;
 and the compiler should optimize out anything extraneous given the
-implementation of the latter.  The |0 makes sure this isn't mistakenly called
-with a ptr argument.
-*/
-#define UTF8_IS_INVARIANT(c)	UVCHR_IS_INVARIANT((c) | 0)
+implementation of the latter. */
+#define UTF8_IS_INVARIANT(c)	UVCHR_IS_INVARIANT(ASSERT_NOT_PTR(c))
 
 /* Like the above, but its name implies a non-UTF8 input, which as the comments
  * above show, doesn't matter as to its implementation */