Skip to content

Commit

Permalink
Merge f0f7745 into 61d18b6
Browse files Browse the repository at this point in the history
  • Loading branch information
khwilliamson committed May 4, 2021
2 parents 61d18b6 + f0f7745 commit a5a6f14
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 21 deletions.
30 changes: 19 additions & 11 deletions handy.h
Expand Up @@ -1384,6 +1384,18 @@ or casts
# define WIDEST_UTYPE U32
#endif

/* Where there could be some confusion, use this as a static assert in macros
* to make sure that a parameter isn't a pointer. But some compilers can't
* handle this. The only one known so far that doesn't is gcc 3.3.6; the check
* below isn't thorough for such an old compiler, so may have to be revised if
* experience so dictates. */
#if ! defined(__GNUC__) \
|| (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3))
# define ASSERT_NOT_PTR(x) ((x) | 0)
#else
# define ASSERT_NOT_PTR(x) (x)
#endif

/* FITS_IN_8_BITS(c) returns true if c doesn't have a bit set other than in
* the lower 8. It is designed to be hopefully bomb-proof, making sure that no
* bits of information are lost even on a 64-bit machine, but to get the
Expand All @@ -1396,10 +1408,8 @@ or casts
* of operands. Well, they are, but that is kind of the point.
*/
#ifndef __COVERITY__
/* The '| 0' part ensures a compiler error if c is not integer (like e.g., a
* pointer) */
#define FITS_IN_8_BITS(c) ( (sizeof(c) == 1) \
|| !(((WIDEST_UTYPE)((c) | 0)) & ~0xFF))
#define FITS_IN_8_BITS(c) ( (sizeof(c) == 1) \
|| ! (((WIDEST_UTYPE) ASSERT_NOT_PTR(c)) & ~0xFF))
#else
#define FITS_IN_8_BITS(c) (1)
#endif
Expand All @@ -1419,7 +1429,8 @@ or casts
* asserts itself, once. The reason that this is necessary is that the
* duplicate asserts were exceeding the internal limits of some compilers */
#define withinCOUNT_KNOWN_VALID_(c, l, n) \
(((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0))))
((((WIDEST_UTYPE) (c)) - ASSERT_NOT_PTR(l)) \
<= ((WIDEST_UTYPE) ASSERT_NOT_PTR(n)))

/* Returns true if c is in the range l..u, where 'l' is non-negative
* Written this way so that after optimization, only one conditional test is
Expand Down Expand Up @@ -1453,17 +1464,14 @@ or casts
* unsigned type. khw supposes that it could be written as
* && ((c) == '\0' || (c) > 0)
* to avoid the message, but the cast will likely avoid extra branches even
* with stupid compilers.
*
* The '| 0' part ensures a compiler error if c is not integer (like e.g.,
* a pointer) */
# define isASCII(c) ((WIDEST_UTYPE)((c) | 0) < 128)
* with stupid compilers. */
# define isASCII(c) (((WIDEST_UTYPE) ASSERT_NOT_PTR(c)) < 128)
#endif

/* Take the eight possible bit patterns of the lower 3 bits and you get the
* lower 3 bits of the 8 octal digits, in both ASCII and EBCDIC, so those bits
* can be ignored. If the rest match '0', we have an octal */
#define isOCTAL_A(c) (((WIDEST_UTYPE)((c) | 0) & ~7) == '0')
#define isOCTAL_A(c) ((((WIDEST_UTYPE) ASSERT_NOT_PTR(c)) & ~7) == '0')

#ifdef H_PERL /* If have access to perl.h, lookup in its table */

Expand Down
22 changes: 12 additions & 10 deletions utf8.h
Expand Up @@ -178,20 +178,24 @@ adding no time nor space requirements to the implementation.
=cut
*/

#define NATIVE_TO_LATIN1(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
#define LATIN1_TO_NATIVE(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
#define NATIVE_TO_LATIN1(ch) \
(__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
#define LATIN1_TO_NATIVE(ch) \
(__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))

/* I8 is an intermediate version of UTF-8 used only in UTF-EBCDIC. We thus
* consider it to be identical to UTF-8 on ASCII platforms. Strictly speaking
* UTF-8 and UTF-EBCDIC are two different things, but we often conflate them
* because they are 8-bit encodings that serve the same purpose in Perl, and
* rarely do we need to distinguish them. The term "NATIVE_UTF8" applies to
* whichever one is applicable on the current platform */
#define NATIVE_UTF8_TO_I8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
#define I8_TO_NATIVE_UTF8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ((ch) | 0)))
#define NATIVE_UTF8_TO_I8(ch) \
(__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))
#define I8_TO_NATIVE_UTF8(ch) \
(__ASSERT_(FITS_IN_8_BITS(ch)) ((U8) ASSERT_NOT_PTR(ch)))

#define UNI_TO_NATIVE(ch) ((UV) ((ch) | 0))
#define NATIVE_TO_UNI(ch) ((UV) ((ch) | 0))
#define UNI_TO_NATIVE(ch) ((UV) ASSERT_NOT_PTR(ch))
#define NATIVE_TO_UNI(ch) ((UV) ASSERT_NOT_PTR(ch))

/*
Expand Down Expand Up @@ -615,10 +619,8 @@ The reason it works on both UTF-8 encoded strings and non-UTF-8 encoded, is
that it returns TRUE in each for the exact same set of bit patterns. It is
valid on a subset of what UVCHR_IS_INVARIANT is valid on, so can just use that;
and the compiler should optimize out anything extraneous given the
implementation of the latter. The |0 makes sure this isn't mistakenly called
with a ptr argument.
*/
#define UTF8_IS_INVARIANT(c) UVCHR_IS_INVARIANT((c) | 0)
implementation of the latter. */
#define UTF8_IS_INVARIANT(c) UVCHR_IS_INVARIANT(ASSERT_NOT_PTR(c))

/* Like the above, but its name implies a non-UTF8 input, which as the comments
* above show, doesn't matter as to its implementation */
Expand Down

0 comments on commit a5a6f14

Please sign in to comment.