From 02bd4e7e4abcb7ba7b5e617cada5f286ebad6ba6 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 14 Jun 2021 12:16:52 -0600 Subject: [PATCH] utf8.h: Split off compile-time value A future commit will have compilers saying that the result of OFFUNISKIP() isn't knowable at compile time. So, avoid using that macro here, creating a substitute. --- utf8.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/utf8.h b/utf8.h index e924138bb7ec..23c46c315764 100644 --- a/utf8.h +++ b/utf8.h @@ -416,6 +416,15 @@ encoded as UTF-8. C is a native (ASCII or EBCDIC) code point if less than * continuation byte */ #define MAX_PORTABLE_UTF8_TWO_BYTE (32 * nBIT_UMAX(5)) +/* How many bytes are needed to represent 0x10FFFF in UTF-8? This works by + * observation. It is because that number takes 21 bits to represent and the + * number of bytes required is proportional to + * (UTF_CONTINUATION_BYTE_INFO_BITS - 1). And that is because + * UTF_CONTINUATION_BYTE_INFO_BITS is the number of information bits contained + * in each continuation bit, and each new byte removes a bit of information + * from the start byte */ +#define OFFUNISKIP_0x10FFFF_ (21 / (UTF_CONTINUATION_BYTE_INFO_BITS - 1)) + /* =for apidoc AmnU|STRLEN|UTF8_MAXBYTES_CASE @@ -435,7 +444,7 @@ uppercase/lowercase/titlecase/fold into. =cut */ #define UTF8_MAXBYTES_CASE \ - MAX(UTF8_MAXBYTES, UTF8_MAX_FOLD_CHAR_EXPAND * OFFUNISKIP(0x10FFFF)) + MAX(UTF8_MAXBYTES, UTF8_MAX_FOLD_CHAR_EXPAND * OFFUNISKIP_0x10FFFF_) /* Rest of these are attributes of Unicode and perl's internals rather than the * encoding, or happen to be the same in both ASCII and EBCDIC (at least at