From 738a6777de6fb2ec21b9faf85ab301a2eec73063 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Mon, 21 Jun 2021 15:39:26 -0600
Subject: [PATCH] utf8.h: Add #define

UTF_MIN_CONTINUATION_BYTE is clearer for use in some contexts
---
 utf8.c | 2 +-
 utf8.h | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/utf8.c b/utf8.c
index c7f3c58ade27..4a20fc3a36bf 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1777,7 +1777,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
              * code point is all we need. */
             for (i = curlen; i < expectlen; i++) {
                 min_uv = UTF8_ACCUMULATE(min_uv,
-                                     I8_TO_NATIVE_UTF8(UTF_CONTINUATION_MARK));
+                                I8_TO_NATIVE_UTF8(UTF_MIN_CONTINUATION_BYTE));
             }
 
             adjusted_s0 = temp_char_buf;
diff --git a/utf8.h b/utf8.h
index b3fcfcc4ad3d..9ea6921ca2ad 100644
--- a/utf8.h
+++ b/utf8.h
@@ -296,6 +296,9 @@ are in the character. */
  * One could solve for two linear equations and come up with it.) */
 #define UTF_CONTINUATION_MARK       (UTF_IS_CONTINUATION_MASK & 0xB0)
 
+/* This value is clearer in some contexts */
+#define UTF_MIN_CONTINUATION_BYTE  UTF_CONTINUATION_MARK
+
 /* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the
  * first byte thereof? */
 #define UTF8_IS_CONTINUATION(c)     (__ASSERT_(FITS_IN_8_BITS(c))           \
@@ -305,7 +308,8 @@ are in the character. */
 /* Is the representation of the Unicode code point 'cp' the same regardless of
  * being encoded in UTF-8 or not? This is a fundamental property of
  * UTF-8,EBCDIC */
-#define OFFUNI_IS_INVARIANT(c) (((WIDEST_UTYPE)(c)) < UTF_CONTINUATION_MARK)
+#define OFFUNI_IS_INVARIANT(c)                                              \
+                        (((WIDEST_UTYPE)(c)) < UTF_MIN_CONTINUATION_BYTE)
 
 /*
 =for apidoc Am|bool|UVCHR_IS_INVARIANT|UV cp