Skip to content

Commit

Permalink
utf8.h: Add #define
Browse files Browse the repository at this point in the history
UTF_MIN_CONTINUATION_BYTE is clearer for use in some contexts
  • Loading branch information
khwilliamson committed Aug 7, 2021
1 parent ba8e516 commit 4a2c769
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion utf8.c
Expand Up @@ -1777,7 +1777,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
* code point is all we need. */
for (i = curlen; i < expectlen; i++) {
min_uv = UTF8_ACCUMULATE(min_uv,
I8_TO_NATIVE_UTF8(UTF_CONTINUATION_MARK));
I8_TO_NATIVE_UTF8(UTF_MIN_CONTINUATION_BYTE));
}

adjusted_s0 = temp_char_buf;
Expand Down
6 changes: 5 additions & 1 deletion utf8.h
Expand Up @@ -296,6 +296,9 @@ are in the character. */
* One could solve for two linear equations and come up with it.) */
#define UTF_CONTINUATION_MARK (UTF_IS_CONTINUATION_MASK & 0xB0)

/* This value is clearer in some contexts */
#define UTF_MIN_CONTINUATION_BYTE UTF_CONTINUATION_MARK

/* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the
* first byte thereof? */
#define UTF8_IS_CONTINUATION(c) (__ASSERT_(FITS_IN_8_BITS(c)) \
Expand All @@ -305,7 +308,8 @@ are in the character. */
/* Is the representation of the Unicode code point 'cp' the same regardless of
* being encoded in UTF-8 or not? This is a fundamental property of
* UTF-8,EBCDIC */
#define OFFUNI_IS_INVARIANT(c) (((WIDEST_UTYPE)(c)) < UTF_CONTINUATION_MARK)
#define OFFUNI_IS_INVARIANT(c) \
(((WIDEST_UTYPE)(c)) < UTF_MIN_CONTINUATION_BYTE)

/*
=for apidoc Am|bool|UVCHR_IS_INVARIANT|UV cp
Expand Down

0 comments on commit 4a2c769

Please sign in to comment.