From e44cffb922cb8cac201ba5c1e415722875620da6 Mon Sep 17 00:00:00 2001 From: Hugo van der Sanden Date: Fri, 7 May 2021 01:25:49 +0100 Subject: [PATCH] gh18770: stop scanning for substrs after *COMMIT *ACCEPT already avoids this (because it is "ENDLIKE"), but gets a related fix to stop scanning for start class. --- regcomp.c | 26 ++++++++++++++++++++------ t/re/opt.t | 5 +++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/regcomp.c b/regcomp.c index f5e5f581dc7e..193ab2a8863d 100644 --- a/regcomp.c +++ b/regcomp.c @@ -6336,20 +6336,34 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", *(data->last_closep) = ARG(scan); } else if (OP(scan) == EVAL) { - if (data) - data->flags |= SF_HAS_EVAL; + if (data) + data->flags |= SF_HAS_EVAL; } else if ( PL_regkind[OP(scan)] == ENDLIKE ) { if (flags & SCF_DO_SUBSTR) { scan_commit(pRExC_state, data, minlenp, is_inf); flags &= ~SCF_DO_SUBSTR; } - if (data && OP(scan)==ACCEPT) { - data->flags |= SCF_SEEN_ACCEPT; - if (stopmin > min) - stopmin = min; + if (OP(scan) == ACCEPT) { + /* m{(*ACCEPT)x} does not have to start with 'x' */ + flags &= ~SCF_DO_STCLASS; + if (data) { + data->flags |= SCF_SEEN_ACCEPT; + if (stopmin > min) + stopmin = min; + } } } + else if (OP(scan) == COMMIT) { + /* gh18770: m{abc(*COMMIT)xyz} must fail on "abc abcxyz", so we + * must not end up with "abcxyz" as a fixed substring else we'll + * skip straight to attempting to match at offset 4. + */ + if (flags & SCF_DO_SUBSTR) { + scan_commit(pRExC_state, data, minlenp, is_inf); + flags &= ~SCF_DO_SUBSTR; + } + } else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */ { if (flags & SCF_DO_SUBSTR) { diff --git a/t/re/opt.t b/t/re/opt.t index 802fdcaaad14..3b110de99adc 100644 --- a/t/re/opt.t +++ b/t/re/opt.t @@ -268,3 +268,8 @@ acdef|bcdeg 5 1+cde - Tanchored,stclass=~[ab] a(b){2,3}c 4 -abb 1+bbc a(b|bb)c 3 -ab 1-bc Tfloating,Tfloating min offset a(b|bb){2}c 4 -abb 1-bbc Tanchored,Tfloating,Tfloating min offset + +abc(*COMMIT)xyz 6 0+abc - - +abc(*ACCEPT)xyz 3 0+abc - - +# Must not have stclass=[x] +(*ACCEPT)xyz 0 - - -