diff --git a/embed.fnc b/embed.fnc index 329d145449e7..735f90d22dae 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1203,9 +1203,9 @@ ATidp |bool |is_utf8_fixed_width_buf_loclen_flags \ |NN const U8 * const s|STRLEN len \ |NULLOK const U8 **ep|NULLOK STRLEN *el|const U32 flags AmTdP |bool |is_utf8_valid_partial_char \ - |NN const U8 * const s|NN const U8 * const e + |NN const U8 * const s0|NN const U8 * const e ATidRp |bool |is_utf8_valid_partial_char_flags \ - |NN const U8 * const s|NN const U8 * const e|const U32 flags + |NN const U8 * const s0|NN const U8 * const e|const U32 flags CpR |bool |_is_uni_FOO|const U8 classnum|const UV c CpR |bool |_is_utf8_FOO|const U8 classnum|NN const U8 *p \ |NN const U8 * const e diff --git a/inline.h b/inline.h index 7b13be02b681..6c5b1bda467f 100644 --- a/inline.h +++ b/inline.h @@ -2217,18 +2217,47 @@ determined from just the first one or two bytes. */ PERL_STATIC_INLINE bool -Perl_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags) +Perl_is_utf8_valid_partial_char_flags(const U8 * const s0, const U8 * const e, const U32 flags) { PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS; - assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE |UTF8_DISALLOW_PERL_EXTENDED))); - if (s >= e || s + UTF8SKIP(s) <= e) { - return FALSE; + PERL_IS_UTF8_CHAR_DFA(s0, e, PL_extended_utf8_dfa_tab, + DFA_RETURN_FAILURE_, + DFA_TEASE_APART_FF_, + NOOP); + + /* The NOOP above causes the DFA to drop down here iff the input was a + * partial character. flags=0 => can return TRUE immediately; otherwise we + * need to check (not inline) if the partial character is the beginning of + * a disallowed one */ + if (flags == 0) { + return TRUE; + } + + return cBOOL(is_utf8_char_helper(s0, e, flags)); + +#ifdef HAS_EXTRA_LONG_UTF8 + + tease_apart_FF: + + /* Getting here means the input is either malformed, or, in the case of + * PL_extended_utf8_dfa_tab, was for the largest possible start byte. The + * latter case has to be extended UTF-8, so can fail immediately if that is + * forbidden */ + + if ( *s0 != I8_TO_NATIVE_UTF8(0xFF) + || (flags & (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_PERL_EXTENDED))) + { + return 0; } - return cBOOL(is_utf8_char_helper(s, e, flags)); + return is_utf8_FF_helper_(s0, e, + TRUE /* Require to be a partial character */ + ); +#endif + } /* diff --git a/proto.h b/proto.h index b1c02121ee62..3e7742a32eab 100644 --- a/proto.h +++ b/proto.h @@ -1780,16 +1780,16 @@ PERL_STATIC_INLINE bool Perl_is_utf8_string_loclen_flags(const U8 *s, STRLEN len #define PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS \ assert(s) #endif -/* PERL_CALLCONV bool is_utf8_valid_partial_char(const U8 * const s, const U8 * const e) +/* PERL_CALLCONV bool is_utf8_valid_partial_char(const U8 * const s0, const U8 * const e) __attribute__warn_unused_result__ __attribute__pure__; */ #define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR #ifndef PERL_NO_INLINE_FUNCTIONS -PERL_STATIC_INLINE bool Perl_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags) +PERL_STATIC_INLINE bool Perl_is_utf8_valid_partial_char_flags(const U8 * const s0, const U8 * const e, const U32 flags) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS \ - assert(s); assert(e) + assert(s0); assert(e) #endif PERL_CALLCONV bool Perl_isinfnan(NV nv)