Skip to content

Commit

Permalink
is_utf8_char_slow(): Avoid accepting overlongs
Browse files Browse the repository at this point in the history
There are possible overlong sequences that this function blindly
accepts.  Instead of developing the code to figure this out, turn this
function into a wrapper for utf8n_to_uvuni() which already has this
check.
  • Loading branch information
Karl Williamson committed Apr 26, 2012
1 parent 524080c commit cd7e6c8
Showing 1 changed file with 5 additions and 33 deletions.
38 changes: 5 additions & 33 deletions utf8.c
Expand Up @@ -277,43 +277,15 @@ five bytes or more.
STATIC STRLEN
S_is_utf8_char_slow(const U8 *s, const STRLEN len)
{
U8 u = *s;
STRLEN slen;
UV uv, ouv;

PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW;

if (UTF8_IS_INVARIANT(u))
return len == 1;
dTHX; /* The function called below requires thread context */

if (!UTF8_IS_START(u))
return 0;

if (len < 2 || !UTF8_IS_CONTINUATION(s[1]))
return 0;
STRLEN actual_len;

slen = len - 1;
s++;
#ifdef EBCDIC
u = NATIVE_TO_UTF(u);
#endif
u &= UTF_START_MASK(len);
uv = u;
ouv = uv;
while (slen--) {
if (!UTF8_IS_CONTINUATION(*s))
return 0;
uv = UTF8_ACCUMULATE(uv, *s);
if (uv < ouv)
return 0;
ouv = uv;
s++;
}
PERL_ARGS_ASSERT_IS_UTF8_CHAR_SLOW;

if ((STRLEN)UNISKIP(uv) < len)
return 0;
utf8n_to_uvuni(s, len, &actual_len, UTF8_CHECK_ONLY);

return len;
return (actual_len == (STRLEN) -1) ? 0 : actual_len;
}

/*
Expand Down

0 comments on commit cd7e6c8

Please sign in to comment.