From cd7e6c884f038d4463b1c4768533b484e5c5c919 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 18 Apr 2012 22:14:15 -0600 Subject: [PATCH] is_utf8_char_slow(): Avoid accepting overlongs There are possible overlong sequences that this function blindly accepts. Instead of developing the code to figure this out, turn this function into a wrapper for utf8n_to_uvuni() which already has this check. --- utf8.c | 38 +++++--------------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/utf8.c b/utf8.c index 04e8b97737f0..c01ea4b5b9be 100644 --- a/utf8.c +++ b/utf8.c @@ -277,43 +277,15 @@ five bytes or more. STATIC STRLEN S_is_utf8_char_slow(const U8 *s, const STRLEN len) { - U8 u = *s; - STRLEN slen; - UV uv, ouv; - - PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW; - - if (UTF8_IS_INVARIANT(u)) - return len == 1; + dTHX; /* The function called below requires thread context */ - if (!UTF8_IS_START(u)) - return 0; - - if (len < 2 || !UTF8_IS_CONTINUATION(s[1])) - return 0; + STRLEN actual_len; - slen = len - 1; - s++; -#ifdef EBCDIC - u = NATIVE_TO_UTF(u); -#endif - u &= UTF_START_MASK(len); - uv = u; - ouv = uv; - while (slen--) { - if (!UTF8_IS_CONTINUATION(*s)) - return 0; - uv = UTF8_ACCUMULATE(uv, *s); - if (uv < ouv) - return 0; - ouv = uv; - s++; - } + PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW; - if ((STRLEN)UNISKIP(uv) < len) - return 0; + utf8n_to_uvuni(s, len, &actual_len, UTF8_CHECK_ONLY); - return len; + return (actual_len == (STRLEN) -1) ? 0 : actual_len; } /*