diff --git a/handy.h b/handy.h index 63441e2a2243..9cfcc5a17ea8 100644 --- a/handy.h +++ b/handy.h @@ -1384,6 +1384,18 @@ or casts # define WIDEST_UTYPE U32 #endif +/* Where there could be some confusion, use this as a static assert in macros + * to make sure that a parameter isn't a pointer. But some compilers can't + * handle this. The only one known so far that doesn't is gcc 3.3.6; the check + * below isn't thorough for such an old compiler, so may have to be revised if + * experience so dictates. */ +#if ! defined(__GNUC__) \ + || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)) +# define ASSERT_NOT_PTR(x) ((x) | 0) +#else +# define ASSERT_NOT_PTR(x) (x) +#endif + /* FITS_IN_8_BITS(c) returns true if c doesn't have a bit set other than in * the lower 8. It is designed to be hopefully bomb-proof, making sure that no * bits of information are lost even on a 64-bit machine, but to get the @@ -1396,10 +1408,8 @@ or casts * of operands. Well, they are, but that is kind of the point. */ #ifndef __COVERITY__ - /* The '| 0' part ensures a compiler error if c is not integer (like e.g., a - * pointer) */ -#define FITS_IN_8_BITS(c) ( (sizeof(c) == 1) \ - || !(((WIDEST_UTYPE)((c) | 0)) & ~0xFF)) +#define FITS_IN_8_BITS(c) ( (sizeof(c) == 1) \ + || ! (((WIDEST_UTYPE) ASSERT_NOT_PTR(c)) & ~0xFF)) #else #define FITS_IN_8_BITS(c) (1) #endif @@ -1419,7 +1429,8 @@ or casts * asserts itself, once. The reason that this is necessary is that the * duplicate asserts were exceeding the internal limits of some compilers */ #define withinCOUNT_KNOWN_VALID_(c, l, n) \ - (((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0)))) + ((((WIDEST_UTYPE) (c)) - ASSERT_NOT_PTR(l)) \ + <= ((WIDEST_UTYPE) ASSERT_NOT_PTR(n))) /* Returns true if c is in the range l..u, where 'l' is non-negative * Written this way so that after optimization, only one conditional test is @@ -1453,17 +1464,14 @@ or casts * unsigned type. khw supposes that it could be written as * && ((c) == '\0' || (c) > 0) * to avoid the message, but the cast will likely avoid extra branches even - * with stupid compilers. - * - * The '| 0' part ensures a compiler error if c is not integer (like e.g., - * a pointer) */ -# define isASCII(c) ((WIDEST_UTYPE)((c) | 0) < 128) + * with stupid compilers. */ +# define isASCII(c) (((WIDEST_UTYPE) ASSERT_NOT_PTR(c)) < 128) #endif /* Take the eight possible bit patterns of the lower 3 bits and you get the * lower 3 bits of the 8 octal digits, in both ASCII and EBCDIC, so those bits * can be ignored. If the rest match '0', we have an octal */ -#define isOCTAL_A(c) (((WIDEST_UTYPE)((c) | 0) & ~7) == '0') +#define isOCTAL_A(c) ((((WIDEST_UTYPE) ASSERT_NOT_PTR(c)) & ~7) == '0') #ifdef H_PERL /* If have access to perl.h, lookup in its table */ diff --git a/utf8.h b/utf8.h index 3bec01989f94..5e9e53d77f47 100644 --- a/utf8.h +++ b/utf8.h @@ -178,8 +178,10 @@ adding no time nor space requirements to the implementation. =cut */ -#define NATIVE_TO_LATIN1(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0))) -#define LATIN1_TO_NATIVE(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0))) +#define NATIVE_TO_LATIN1(ch) \ + (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch))) +#define LATIN1_TO_NATIVE(ch) \ + (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch))) /* I8 is an intermediate version of UTF-8 used only in UTF-EBCDIC. We thus * consider it to be identical to UTF-8 on ASCII platforms. Strictly speaking @@ -187,11 +189,13 @@ adding no time nor space requirements to the implementation. * because they are 8-bit encodings that serve the same purpose in Perl, and * rarely do we need to distinguish them. The term "NATIVE_UTF8" applies to * whichever one is applicable on the current platform */ -#define NATIVE_UTF8_TO_I8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0))) -#define I8_TO_NATIVE_UTF8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0))) +#define NATIVE_UTF8_TO_I8(ch) \ + (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch))) +#define I8_TO_NATIVE_UTF8(ch) \ + (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch))) -#define UNI_TO_NATIVE(ch) ((UV) ((ch) | 0)) -#define NATIVE_TO_UNI(ch) ((UV) ((ch) | 0)) +#define UNI_TO_NATIVE(ch) ((UV) ASSERT_NOT_PTR(ch)) +#define NATIVE_TO_UNI(ch) ((UV) ASSERT_NOT_PTR(ch)) /* @@ -615,10 +619,8 @@ The reason it works on both UTF-8 encoded strings and non-UTF-8 encoded, is that it returns TRUE in each for the exact same set of bit patterns. It is valid on a subset of what UVCHR_IS_INVARIANT is valid on, so can just use that; and the compiler should optimize out anything extraneous given the -implementation of the latter. The |0 makes sure this isn't mistakenly called -with a ptr argument. -*/ -#define UTF8_IS_INVARIANT(c) UVCHR_IS_INVARIANT((c) | 0) +implementation of the latter. */ +#define UTF8_IS_INVARIANT(c) UVCHR_IS_INVARIANT(ASSERT_NOT_PTR(c)) /* Like the above, but its name implies a non-UTF8 input, which as the comments * above show, doesn't matter as to its implementation */