From bdc84a9ea762036890e5e5f8c8481e6f96fb9a32 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 10 Jun 2021 22:12:11 -0600 Subject: [PATCH] utf8_length: Fix undefined C behavior In C the comparison of two pointers is only legal if both point to within the same object, or to a virtual element one above the high edge of the object. The previous code was doing an addition potentially outside that range, and so the results would be undefined. --- utf8.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/utf8.c b/utf8.c index 72d3ac2b7af4..960bdfb6cecc 100644 --- a/utf8.c +++ b/utf8.c @@ -2372,23 +2372,31 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e) * the bitops (especially ~) can create illegal UTF-8. * In other words: in Perl UTF-8 is not just for Unicode. */ - if (UNLIKELY(e < s)) - goto warn_and_return; while (s < e) { - s += UTF8SKIP(s); + Ptrdiff_t expected_byte_count = UTF8SKIP(s); + + if (UNLIKELY(e - s < expected_byte_count)) { + goto warn_and_return; + } + len++; + s += expected_byte_count; + expected_byte_count = UTF8SKIP(s); } - if (UNLIKELY(e != s)) { - len--; - warn_and_return: - if (PL_op) - Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), - "%s in %s", unees, OP_DESC(PL_op)); - else - Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees); + if (LIKELY(e == s)) { + return len; } + /* Here, s > e on entry */ + + warn_and_return: + if (PL_op) + Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), + "%s in %s", unees, OP_DESC(PL_op)); + else + Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees); + return len; }