Skip to content

Commit

Permalink
Merge b062b74 into 44e4059
Browse files Browse the repository at this point in the history
  • Loading branch information
khwilliamson committed Jun 9, 2021
2 parents 44e4059 + b062b74 commit bc20dbb
Show file tree
Hide file tree
Showing 10 changed files with 699 additions and 50 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Expand Up @@ -6098,6 +6098,7 @@ t/re/regexp_trielist.t See if regular expressions work with trie optimisation
t/re/regexp_unicode_prop.t See if unicode properties work in regular expressions as expected
t/re/regexp_unicode_prop_thr.t See if unicode properties work in regular expressions as expected under threads
t/re/rt122747.t Test rt122747 assert faile (requires DEBUGGING)
t/re/rtrim.t Tests for potential rtrim optimisations
t/re/rxcode.t See if /(?{ code })/ works
t/re/script_run.t See if script runs works
t/re/speed.t See if optimisations are keeping things fast
Expand Down
3 changes: 3 additions & 0 deletions dump.c
Expand Up @@ -918,6 +918,8 @@ S_pm_description(pTHX_ const PMOP *pm)
sv_catpvs(desc, ",WHITE");
if (RX_EXTFLAGS(regex) & RXf_NULL)
sv_catpvs(desc, ",NULL");
if (RX_EXTFLAGS(regex) & RXf_RTRIM)
sv_catpvs(desc, ",RTRIM");
}

append_flags(desc, pmflags, pmflags_flags_names);
Expand Down Expand Up @@ -1708,6 +1710,7 @@ const struct flag_to_name regexp_extflags_names[] = {
{RXf_SKIPWHITE, "SKIPWHITE,"},
{RXf_WHITE, "WHITE,"},
{RXf_NULL, "NULL,"},
{RXf_RTRIM, "RTRIM,"},
};

/* NOTE: this structure is mostly duplicative of one generated by
Expand Down
15 changes: 11 additions & 4 deletions ext/Devel-Peek/t/Peek.t
Expand Up @@ -355,9 +355,10 @@ do_test('reference to named subroutine without prototype',

if ($] >= 5.011) {
# note the conditionals on ENGINE and INTFLAGS were introduced in 5.19.9
do_test('reference to regexp',
qr(tic),
'SV = $RV\\($ADDR\\) at $ADDR
# We are taking great care to curate this test as if the module is dual life
# (or we actively want to cherry-pick entire chunks of it back to maint)
# Is this a good idea?
my $raw = 'SV = $RV\\($ADDR\\) at $ADDR
REFCNT = 1
FLAGS = \\(ROK\\)
RV = $ADDR
Expand Down Expand Up @@ -423,7 +424,13 @@ do_test('reference to regexp',
OFFS = $ADDR
QR_ANONCV = 0x0(?:
SAVED_COPY = 0x0)?'
));
);

$raw =~ s/ EXTFLAGS = 0x680000 / EXTFLAGS = 0x340000 /g
if $] >= 5.035;
do_test('reference to regexp',
qr(tic),
$raw);
} else {
do_test('reference to regexp',
qr(tic),
Expand Down
89 changes: 88 additions & 1 deletion regcharclass.h
Expand Up @@ -155,6 +155,35 @@
( 0x202F == cp || ( 0x202F < cp && \
( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) )

/*
XPERLSPACE: \p{XPerlSpace}
\p{XPerlSpace}
*/
/*** GENERATED CODE ***/
#define is_XPERLSPACE_utf8_safe_backwards(s,e) \
( ((s) - (e) > 2) ? \
( ( inRANGE_helper_(U8, *((const U8*)s - 1), '\t', '\r') || ' ' == *((const U8*)s - 1) ) ? 1\
: ( 0x80 == *((const U8*)s - 1) ) ? \
( ( 0x80 == *((const U8*)s - 2) ) ? \
( ( inRANGE_helper_(U8, *((const U8*)s - 3), 0xE2, 0xE3) ) ? 3 : 0 )\
: ( ( 0x9A == *((const U8*)s - 2) ) && ( 0xE1 == *((const U8*)s - 3) ) ) ? 3 : 0 )\
: ( inRANGE_helper_(U8, *((const U8*)s - 1), 0x81, 0x84) || inRANGE_helper_(U8, *((const U8*)s - 1), 0x86, 0x8A) || inRANGE_helper_(U8, *((const U8*)s - 1), 0xA8, 0xA9) || 0xAF == *((const U8*)s - 1) ) ?\
( ( ( 0x80 == *((const U8*)s - 2) ) && ( 0xE2 == *((const U8*)s - 3) ) ) ? 3 : 0 )\
: ( 0x85 == *((const U8*)s - 1) ) ? \
( ( 0x80 == *((const U8*)s - 2) ) ? \
( ( 0xE2 == *((const U8*)s - 3) ) ? 3 : 0 ) \
: ( 0xC2 == *((const U8*)s - 2) ) ? 2 : 0 ) \
: ( 0x9F == *((const U8*)s - 1) ) ? \
( ( ( 0x81 == *((const U8*)s - 2) ) && ( 0xE2 == *((const U8*)s - 3) ) ) ? 3 : 0 )\
: ( ( 0xA0 == *((const U8*)s - 1) ) && ( 0xC2 == *((const U8*)s - 2) ) ) ? 2 : 0 )\
: ((s) - (e) > 1) ? \
( ( inRANGE_helper_(U8, *((const U8*)s - 1), '\t', '\r') || ' ' == *((const U8*)s - 1) ) ? 1\
: ( ( 0x85 == *((const U8*)s - 1) || 0xA0 == *((const U8*)s - 1) ) && ( 0xC2 == *((const U8*)s - 2) ) ) ? 2 : 0 )\
: ((s) - (e) > 0) ? \
( inRANGE_helper_(U8, *((const U8*)s - 1), '\t', '\r') || ' ' == *((const U8*)s - 1) )\
: 0 )

/*
NONCHAR: Non character code points
Expand Down Expand Up @@ -1338,6 +1367,35 @@
( 0x202F == cp || ( 0x202F < cp && \
( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) )

/*
XPERLSPACE: \p{XPerlSpace}
\p{XPerlSpace}
*/
/*** GENERATED CODE ***/
#define is_XPERLSPACE_utf8_safe_backwards(s,e) \
( ((s) - (e) > 2) ? \
( ( '\t' == *((const U8*)s - 1) || inRANGE_helper_(U8, *((const U8*)s - 1), '\v', '\r') || '\n' == *((const U8*)s - 1) || 0x25 == *((const U8*)s - 1) || ' ' == *((const U8*)s - 1) ) ? 1\
: ( 0x41 == *((const U8*)s - 1) ) ? \
( ( 0x41 == *((const U8*)s - 2) ) ? \
( ( ( *((const U8*)s - 3) & 0xFB ) == 0xCA ) ? 3 : 0 ) \
: ( 0x63 == *((const U8*)s - 2) ) ? \
( ( 0xBC == *((const U8*)s - 3) ) ? 3 : 0 ) \
: ( 0x80 == *((const U8*)s - 2) ) ? 2 : 0 ) \
: ( inRANGE_helper_(U8, *((const U8*)s - 1), 0x42, 0x48) || 0x51 == *((const U8*)s - 1) ) ?\
( ( ( 0x41 == *((const U8*)s - 2) ) && ( 0xCA == *((const U8*)s - 3) ) ) ? 3 : 0 )\
: ( inRANGE_helper_(U8, *((const U8*)s - 1), 0x49, 0x4A) ) ? \
( ( ( inRANGE_helper_(U8, *((const U8*)s - 2), 0x41, 0x42) ) && ( 0xCA == *((const U8*)s - 3) ) ) ? 3 : 0 )\
: ( 0x56 == *((const U8*)s - 1) ) ? \
( ( ( 0x42 == *((const U8*)s - 2) ) && ( 0xCA == *((const U8*)s - 3) ) ) ? 3 : 0 )\
: ( ( ( 0x73 == *((const U8*)s - 1) ) && ( 0x43 == *((const U8*)s - 2) ) ) && ( 0xCA == *((const U8*)s - 3) ) ) ? 3 : 0 )\
: ((s) - (e) > 1) ? \
( ( '\t' == *((const U8*)s - 1) || inRANGE_helper_(U8, *((const U8*)s - 1), '\v', '\r') || '\n' == *((const U8*)s - 1) || 0x25 == *((const U8*)s - 1) || ' ' == *((const U8*)s - 1) ) ? 1\
: ( ( 0x41 == *((const U8*)s - 1) ) && ( 0x80 == *((const U8*)s - 2) ) ) ? 2 : 0 )\
: ((s) - (e) > 0) ? \
( '\t' == *((const U8*)s - 1) || inRANGE_helper_(U8, *((const U8*)s - 1), '\v', '\r') || '\n' == *((const U8*)s - 1) || 0x25 == *((const U8*)s - 1) || ' ' == *((const U8*)s - 1) )\
: 0 )

/*
NONCHAR: Non character code points
Expand Down Expand Up @@ -2516,6 +2574,35 @@
( 0x202F == cp || ( 0x202F < cp && \
( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) )

/*
XPERLSPACE: \p{XPerlSpace}
\p{XPerlSpace}
*/
/*** GENERATED CODE ***/
#define is_XPERLSPACE_utf8_safe_backwards(s,e) \
( ((s) - (e) > 2) ? \
( ( '\t' == *((const U8*)s - 1) || inRANGE_helper_(U8, *((const U8*)s - 1), '\v', '\r') || 0x15 == *((const U8*)s - 1) || '\n' == *((const U8*)s - 1) || ' ' == *((const U8*)s - 1) ) ? 1\
: ( 0x41 == *((const U8*)s - 1) ) ? \
( ( 0x41 == *((const U8*)s - 2) ) ? \
( ( ( *((const U8*)s - 3) & 0xFB ) == 0xCA ) ? 3 : 0 ) \
: ( 0x62 == *((const U8*)s - 2) ) ? \
( ( 0xBD == *((const U8*)s - 3) ) ? 3 : 0 ) \
: ( 0x78 == *((const U8*)s - 2) ) ? 2 : 0 ) \
: ( inRANGE_helper_(U8, *((const U8*)s - 1), 0x42, 0x48) || 0x51 == *((const U8*)s - 1) ) ?\
( ( ( 0x41 == *((const U8*)s - 2) ) && ( 0xCA == *((const U8*)s - 3) ) ) ? 3 : 0 )\
: ( inRANGE_helper_(U8, *((const U8*)s - 1), 0x49, 0x4A) ) ? \
( ( ( inRANGE_helper_(U8, *((const U8*)s - 2), 0x41, 0x42) ) && ( 0xCA == *((const U8*)s - 3) ) ) ? 3 : 0 )\
: ( 0x56 == *((const U8*)s - 1) ) ? \
( ( ( 0x42 == *((const U8*)s - 2) ) && ( 0xCA == *((const U8*)s - 3) ) ) ? 3 : 0 )\
: ( ( ( 0x72 == *((const U8*)s - 1) ) && ( 0x43 == *((const U8*)s - 2) ) ) && ( 0xCA == *((const U8*)s - 3) ) ) ? 3 : 0 )\
: ((s) - (e) > 1) ? \
( ( '\t' == *((const U8*)s - 1) || inRANGE_helper_(U8, *((const U8*)s - 1), '\v', '\r') || 0x15 == *((const U8*)s - 1) || '\n' == *((const U8*)s - 1) || ' ' == *((const U8*)s - 1) ) ? 1\
: ( ( 0x41 == *((const U8*)s - 1) ) && ( 0x78 == *((const U8*)s - 2) ) ) ? 2 : 0 )\
: ((s) - (e) > 0) ? \
( '\t' == *((const U8*)s - 1) || inRANGE_helper_(U8, *((const U8*)s - 1), '\v', '\r') || 0x15 == *((const U8*)s - 1) || '\n' == *((const U8*)s - 1) || ' ' == *((const U8*)s - 1) )\
: 0 )

/*
NONCHAR: Non character code points
Expand Down Expand Up @@ -3617,6 +3704,6 @@
* 696e706fddd3ce8cd48c7ea91caf4c9edf5c296432d320aa7b78631f69aa9eac lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 24120d5e0c9685c442c93bc1dbea9b85ef973bf8e9474baf0e55b160c288226b regen/charset_translations.pl
* 3635c6e564558e965018947bdab45f37d9a4fa82eb05b2694eae1a04bf7e65a3 regen/regcharclass.pl
* 29d7ced5065b4b2476607aefb87083c37a7dc5f9705430a7c0811d4232efca13 regen/regcharclass.pl
* b2f896452d2b30da3e04800f478c60c1fd0b03d6b668689b020f1e3cf1f1cdd9 regen/regcharclass_multi_char_folds.pl
* ex: set ro: */
17 changes: 16 additions & 1 deletion regcomp.c
Expand Up @@ -8466,7 +8466,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
/* It's safe to read through *next only if OP(first) is a regop of
* the right type (not EXACT, for example).
*/
U8 nop = (fop == NOTHING || fop == MBOL || fop == SBOL || fop == PLUS)
U8 nop = (fop == NOTHING || fop == MBOL || fop == SBOL || fop == PLUS || fop == STAR)
? OP(next) : 0;

if (PL_regkind[fop] == NOTHING && nop == END)
Expand All @@ -8488,6 +8488,21 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
&& *(STRING(first)) == ' '
&& OP(regnext(first)) == END )
RExC_rx->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
else if ((fop == PLUS || fop == STAR)
&& (nop == POSIXU || nop == POSIXD)
&& FLAGS(next) == _CC_SPACE) {
regnode *second = regnext(first);
regnode *third = (OP(second) == EOS || OP(second) == SEOL)
? regnext(second) : NULL;
if (third && OP(third) == END) {
/* /[[:space:]]+\z/u
* /[[:space:]]+$/u
* /[[:space:]]*$/u
* /\s*$/
* etc */
RExC_rx->extflags |= RXf_RTRIM | RXf_CHECK_ALL;
}
}

}

Expand Down

0 comments on commit bc20dbb

Please sign in to comment.