Skip to content

Commit

Permalink
utf8.h: Add new #define for extended length UTF-8
Browse files Browse the repository at this point in the history
The previous commit adds a convenient place to create a symbol to
indicate that the UTF-8 on this platform includes Perl's nearly-double
length extension.  The platforms this isn't needed on are 32-bit ASCII
ones.  This symbol allows removing a couple of places where EBCDCIC need
be considered.
  • Loading branch information
khwilliamson committed Jun 14, 2021
1 parent e8ac68e commit 8eb5523
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 2 deletions.
2 changes: 1 addition & 1 deletion inline.h
Expand Up @@ -1214,7 +1214,7 @@ Perl_isUTF8_CHAR(const U8 * const s0, const U8 * const e)
return s - s0 + 1;
}

#if defined(UV_IS_QUAD) || defined(EBCDIC)
#ifdef HAS_EXTRA_LONG_UTF8

if (e - s0 >= UTF8_MAXBYTES && NATIVE_UTF8_TO_I8(*s0) == 0xFF) {
return is_utf8_char_helper(s0, e, 0);
Expand Down
2 changes: 1 addition & 1 deletion utf8.c
Expand Up @@ -858,7 +858,7 @@ S_does_utf8_overflow(const U8 * const s,
# endif
#endif

#if ! defined(UV_IS_QUAD) && ! defined(EBCDIC)
#ifndef HAS_EXTRA_LONG_UTF8

/* On 32 bit ASCII machines, many overlongs that start with FF don't
* overflow */
Expand Down
1 change: 1 addition & 0 deletions utf8.h
Expand Up @@ -360,6 +360,7 @@ C<cp> is Unicode if above 255; otherwise is platform-native.
#if UVSIZE * CHARBITS > (6 * UTF_CONTINUATION_BYTE_INFO_BITS)
# define OFFUNISKIP_helper_(uv) \
UNLIKELY(uv > nBIT_UMAX(6 * UTF_CONTINUATION_BYTE_INFO_BITS))
# define HAS_EXTRA_LONG_UTF8
#else
# define OFFUNISKIP_helper_(uv) 0
#endif
Expand Down

0 comments on commit 8eb5523

Please sign in to comment.