Skip to content

Commit

Permalink
Also set RXf_RTRIM if the pattern is /\s*$/u
Browse files Browse the repository at this point in the history
It takes a bit more care to handle 0-or-more alternations, but this covers
more regex variantions used in the wild.
  • Loading branch information
nwc10 authored and khwilliamson committed Jun 6, 2021
1 parent f47501b commit 9da0bd4
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
8 changes: 5 additions & 3 deletions regcomp.c
Expand Up @@ -8466,7 +8466,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
/* It's safe to read through *next only if OP(first) is a regop of
* the right type (not EXACT, for example).
*/
U8 nop = (fop == NOTHING || fop == MBOL || fop == SBOL || fop == PLUS)
U8 nop = (fop == NOTHING || fop == MBOL || fop == SBOL || fop == PLUS || fop == STAR)
? OP(next) : 0;

if (PL_regkind[fop] == NOTHING && nop == END)
Expand All @@ -8488,14 +8488,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
&& *(STRING(first)) == ' '
&& OP(regnext(first)) == END )
RExC_rx->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
else if (fop == PLUS
else if ((fop == PLUS || fop == STAR)
&& nop == POSIXU && FLAGS(next) == _CC_SPACE) {
regnode *second = regnext(first);
regnode *third = (OP(second) == EOS || OP(second) == SEOL)
? regnext(second) : NULL;
if (third && OP(third) == END) {
/* /[[:space:]]+\z/u
* /[[:space:]]+$/u */
* /[[:space:]]+$/u
* /[[:space:]]*$/u
* etc */
RExC_rx->extflags |= RXf_RTRIM | RXf_CHECK_ALL;
}
}
Expand Down
11 changes: 9 additions & 2 deletions regexec.c
Expand Up @@ -923,10 +923,17 @@ Perl_re_intuit_start(pTHX_

if(prog->extflags & RXf_RTRIM) {
const char *s = strend;
if (strpos == strend && prog->minlen == 0) {
/* \s* and we are asked to match an empty string */
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"Intuit: %sSuccessfully guessed:%s match at offset %ld\n",
PL_colors[4], PL_colors[5], (long)(s - strbeg)) );
return s;
}
if (strpos >= strend) {
/* This should be unreachable:
* String shorter than min possible regex match (0 < 1)
* but in the future we might want to also handle *, ? and {0,...}
* but in the future we might want to also handle ? and {0,...}
*/
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" rtrim intuit on empty string ...\n"));
Expand Down Expand Up @@ -960,7 +967,7 @@ Perl_re_intuit_start(pTHX_
}
}
}
if (s < strend) {
if (s < strend || s == strend && prog->minlen == 0) {
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"Intuit: %sSuccessfully guessed:%s match at offset %ld\n",
PL_colors[4], PL_colors[5], (long)(s - strbeg)) );
Expand Down

0 comments on commit 9da0bd4

Please sign in to comment.