From cd7e6c884f038d4463b1c4768533b484e5c5c919 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Wed, 18 Apr 2012 22:14:15 -0600
Subject: [PATCH] is_utf8_char_slow(): Avoid accepting overlongs

There are possible overlong sequences that this function blindly
accepts.  Instead of developing the code to figure this out, turn this
function into a wrapper for utf8n_to_uvuni() which already has this
check.
---
 utf8.c | 38 +++++---------------------------------
 1 file changed, 5 insertions(+), 33 deletions(-)

diff --git a/utf8.c b/utf8.c
index 04e8b97737f0..c01ea4b5b9be 100644
--- a/utf8.c
+++ b/utf8.c
@@ -277,43 +277,15 @@ five bytes or more.
 STATIC STRLEN
 S_is_utf8_char_slow(const U8 *s, const STRLEN len)
 {
-    U8 u = *s;
-    STRLEN slen;
-    UV uv, ouv;
-
-    PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW;
-
-    if (UTF8_IS_INVARIANT(u))
-	return len == 1;
+    dTHX;   /* The function called below requires thread context */
 
-    if (!UTF8_IS_START(u))
-	return 0;
-
-    if (len < 2 || !UTF8_IS_CONTINUATION(s[1]))
-	return 0;
+    STRLEN actual_len;
 
-    slen = len - 1;
-    s++;
-#ifdef EBCDIC
-    u = NATIVE_TO_UTF(u);
-#endif
-    u &= UTF_START_MASK(len);
-    uv  = u;
-    ouv = uv;
-    while (slen--) {
-	if (!UTF8_IS_CONTINUATION(*s))
-	    return 0;
-	uv = UTF8_ACCUMULATE(uv, *s);
-	if (uv < ouv)
-	    return 0;
-	ouv = uv;
-	s++;
-    }
+    PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW;
 
-    if ((STRLEN)UNISKIP(uv) < len)
-	return 0;
+    utf8n_to_uvuni(s, len, &actual_len, UTF8_CHECK_ONLY);
 
-    return len;
+    return (actual_len == (STRLEN) -1) ? 0 : actual_len;
 }
 
 /*