diff --git a/embed.fnc b/embed.fnc index 551d46880562..648545ac1691 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3095,7 +3095,7 @@ S |int |sv_2iuv_non_preserve |NN SV *const sv SR |STRLEN |expect_number |NN const char **const pattern ST |STRLEN |sv_pos_u2b_forwards|NN const U8 *const start \ |NN const U8 *const send|NN STRLEN *const uoffset \ - |NN bool *const at_end + |NN bool *const at_end|NN bool *canonical_position ST |STRLEN |sv_pos_u2b_midway|NN const U8 *const start \ |NN const U8 *send|STRLEN uoffset|const STRLEN uend S |STRLEN |sv_pos_u2b_cached|NN SV *const sv|NN MAGIC **const mgp \ diff --git a/proto.h b/proto.h index faca6d1366e1..56f7d42e7e0c 100644 --- a/proto.h +++ b/proto.h @@ -6382,9 +6382,9 @@ STATIC STRLEN S_sv_pos_b2u_midway(pTHX_ const U8 *const s, const U8 *const targe STATIC STRLEN S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start, const U8 *const send, STRLEN uoffset, STRLEN uoffset0, STRLEN boffset0); #define PERL_ARGS_ASSERT_SV_POS_U2B_CACHED \ assert(sv); assert(mgp); assert(start); assert(send) -STATIC STRLEN S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send, STRLEN *const uoffset, bool *const at_end); +STATIC STRLEN S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send, STRLEN *const uoffset, bool *const at_end, bool *canonical_position); #define PERL_ARGS_ASSERT_SV_POS_U2B_FORWARDS \ - assert(start); assert(send); assert(uoffset); assert(at_end) + assert(start); assert(send); assert(uoffset); assert(at_end); assert(canonical_position) STATIC STRLEN S_sv_pos_u2b_midway(const U8 *const start, const U8 *send, STRLEN uoffset, const STRLEN uend); #define PERL_ARGS_ASSERT_SV_POS_U2B_MIDWAY \ assert(start); assert(send) diff --git a/sv.c b/sv.c index ceef9966da34..eec33eb5cab1 100644 --- a/sv.c +++ b/sv.c @@ -7225,7 +7225,8 @@ Perl_sv_len_utf8_nomg(pTHX_ SV * const sv) offset. */ static STRLEN S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send, - STRLEN *const uoffset_p, bool *const at_end) + STRLEN *const uoffset_p, bool *const at_end, + bool* canonical_position) { const U8 *s = start; STRLEN uoffset = *uoffset_p; @@ -7245,6 +7246,9 @@ S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send, it's actually a bounds error */ s = send; } + /* If the unicode position is beyond the end, we return the end but + shouldn't cache that position */ + *canonical_position = (uoffset == 0); *uoffset_p -= uoffset; return s - start; } @@ -7298,6 +7302,7 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start STRLEN boffset = 0; /* Actually always set, but let's keep gcc happy. */ bool found = FALSE; bool at_end = FALSE; + bool canonical_position = FALSE; PERL_ARGS_ASSERT_SV_POS_U2B_CACHED; @@ -7338,7 +7343,8 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start uoffset -= uoffset0; boffset = boffset0 + sv_pos_u2b_forwards(start + boffset0, - send, &uoffset, &at_end); + send, &uoffset, &at_end, + &canonical_position); uoffset += uoffset0; } } @@ -7380,7 +7386,8 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start STRLEN real_boffset; uoffset -= uoffset0; real_boffset = boffset0 + sv_pos_u2b_forwards(start + boffset0, - send, &uoffset, &at_end); + send, &uoffset, &at_end, + &canonical_position); uoffset += uoffset0; if (found && PL_utf8cache < 0) @@ -7389,7 +7396,7 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start boffset = real_boffset; } - if (PL_utf8cache && !SvGMAGICAL(sv) && SvPOK(sv)) { + if (PL_utf8cache && canonical_position && !SvGMAGICAL(sv) && SvPOK(sv)) { if (at_end) utf8_mg_len_cache_update(sv, mgp, uoffset); else diff --git a/t/op/index.t b/t/op/index.t index 2f0834e8d1ec..a218848851e9 100644 --- a/t/op/index.t +++ b/t/op/index.t @@ -8,7 +8,7 @@ BEGIN { } use strict; -plan( tests => 414 ); +plan( tests => 415 ); run_tests() unless caller; @@ -358,4 +358,12 @@ $x; EOS } + { + my $s = "abc"; + my $len = length($s); + utf8::upgrade($s); + length($s); + is(index($s, "", $len+1), 3, 'Overlong index doesn\'t confuse utf8 cache'); + } + } # end of sub run_tests