diff --git a/t/lib/croak/toke_l1 b/t/lib/croak/toke_l1 index 6d656beb7ce8..27f0d67daee5 100644 --- a/t/lib/croak/toke_l1 +++ b/t/lib/croak/toke_l1 @@ -18,5 +18,5 @@ Malformed UTF-8 character (fatal) at - line 2. use utf8;y'0Á'' EXPECT Malformed UTF-8 character: \xc1\x27 (unexpected non-continuation byte 0x27, immediately after start byte 0xc1; need 2 bytes, got 1) at - line 1. -Malformed UTF-8 character: \xc1\x27 (any UTF-8 sequence that starts with "\xc1" is overlong which can and should be represented with a different, shorter sequence) at - line 1. +Malformed UTF-8 character: \xc1 (any UTF-8 sequence that starts with "\xc1" is overlong which can and should be represented with a different, shorter sequence) at - line 1. Malformed UTF-8 character (fatal) at - line 1. diff --git a/utf8.c b/utf8.c index 90ec77026412..722195b4e74c 100644 --- a/utf8.c +++ b/utf8.c @@ -1978,6 +1978,11 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, * end, based on how many bytes the start byte tells * us should be in it, but no further than s0 + * avail_len + * overlong_detect_length if no overlong malformation is present, this is + * 0; otherwise it is the number of bytes required to + * make that determination. It is used below to limit + * the number of bytes displayed in a warning so as to + * make the warning accurate and not misleading. */ bool success = true; @@ -2296,8 +2301,11 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, " \"%s\" is overlong which can and should be" " represented with a different, shorter sequence)", malformed_text, - byte_dump_string_(s0, send - s0, 0), - byte_dump_string_(s0, curlen, 0)); + byte_dump_string_(s0, curlen, 0), + byte_dump_string_(s0, + MIN(avail_len, + overlong_detect_length), + 0)); } else { U8 tmpbuf[UTF8_MAXBYTES+1];