From 738a6777de6fb2ec21b9faf85ab301a2eec73063 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 21 Jun 2021 15:39:26 -0600 Subject: [PATCH] utf8.h: Add #define UTF_MIN_CONTINUATION_BYTE is clearer for use in some contexts --- utf8.c | 2 +- utf8.h | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/utf8.c b/utf8.c index c7f3c58ade27..4a20fc3a36bf 100644 --- a/utf8.c +++ b/utf8.c @@ -1777,7 +1777,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, * code point is all we need. */ for (i = curlen; i < expectlen; i++) { min_uv = UTF8_ACCUMULATE(min_uv, - I8_TO_NATIVE_UTF8(UTF_CONTINUATION_MARK)); + I8_TO_NATIVE_UTF8(UTF_MIN_CONTINUATION_BYTE)); } adjusted_s0 = temp_char_buf; diff --git a/utf8.h b/utf8.h index b3fcfcc4ad3d..9ea6921ca2ad 100644 --- a/utf8.h +++ b/utf8.h @@ -296,6 +296,9 @@ are in the character. */ * One could solve for two linear equations and come up with it.) */ #define UTF_CONTINUATION_MARK (UTF_IS_CONTINUATION_MASK & 0xB0) +/* This value is clearer in some contexts */ +#define UTF_MIN_CONTINUATION_BYTE UTF_CONTINUATION_MARK + /* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the * first byte thereof? */ #define UTF8_IS_CONTINUATION(c) (__ASSERT_(FITS_IN_8_BITS(c)) \ @@ -305,7 +308,8 @@ are in the character. */ /* Is the representation of the Unicode code point 'cp' the same regardless of * being encoded in UTF-8 or not? This is a fundamental property of * UTF-8,EBCDIC */ -#define OFFUNI_IS_INVARIANT(c) (((WIDEST_UTYPE)(c)) < UTF_CONTINUATION_MARK) +#define OFFUNI_IS_INVARIANT(c) \ + (((WIDEST_UTYPE)(c)) < UTF_MIN_CONTINUATION_BYTE) /* =for apidoc Am|bool|UVCHR_IS_INVARIANT|UV cp