Permalink
Browse files

Revert "remove ANYOF_MATCHES_POSIXL"

This reverts commit
  • Loading branch information...
khwilliamson committed Sep 21, 2018
1 parent 476f4f5 commit 20a145e28627562b6c4dabb66aa45ca97c2c951f
Showing with 72 additions and 63 deletions.
  1. +36 −35 regcomp.c
  2. +33 −16 regcomp.h
  3. +3 −12 regexec.c
View
@@ -1448,11 +1448,9 @@ S_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc)
* test cases for locale, many parts of it may not work properly, it is
* safest to avoid locale unless necessary. */
if (RExC_contains_locale) {
OP(ssc) = ANYOFPOSIXL;
ANYOF_POSIXL_SETALL(ssc);
}
else {
OP(ssc) = ANYOF;
ANYOF_POSIXL_ZERO(ssc);
}
}
@@ -1483,7 +1481,7 @@ S_ssc_is_cp_posixl_init(const RExC_state_t *pRExC_state,
return FALSE;
}
if (OP(ssc) == ANYOFPOSIXL && ! ANYOF_POSIXL_SSC_TEST_ALL_SET(ssc)) {
if (RExC_contains_locale && ! ANYOF_POSIXL_SSC_TEST_ALL_SET(ssc)) {
return FALSE;
}
@@ -1618,7 +1616,8 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
#define ssc_match_all_cp(ssc) ssc_add_range(ssc, 0, UV_MAX)
/* 'AND' a given class with another one. Can create false positives. 'ssc'
* should not be inverted. */
* should not be inverted. 'and_with->flags & ANYOF_MATCHES_POSIXL' should be
* 0 if 'and_with' is a regnode_charclass instead of a regnode_ssc. */
STATIC void
S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
@@ -1721,7 +1720,7 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
/* If either P1 or P2 is empty, the intersection will be also; can skip
* the loop */
if (OP(and_with) != ANYOFPOSIXL) {
if (! (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL)) {
ANYOF_POSIXL_ZERO(ssc);
}
else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
@@ -1781,16 +1780,16 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
else {
ssc->invlist = anded_cp_list;
ANYOF_POSIXL_ZERO(ssc);
if (OP(and_with) == ANYOFPOSIXL) {
if (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL) {
ANYOF_POSIXL_OR((regnode_charclass_posixl*) and_with, ssc);
}
}
}
else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)
|| OP(and_with) == ANYOFPOSIXL)
|| (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL))
{
/* One or the other of P1, P2 is non-empty. */
if (OP(and_with) == ANYOFPOSIXL) {
if (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL) {
ANYOF_POSIXL_AND((regnode_charclass_posixl*) and_with, ssc);
}
ssc_union(ssc, anded_cp_list, FALSE);
@@ -1862,7 +1861,7 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
{
/* We ignore P2, leaving P1 going forward */
} /* else Not inverted */
else if (OP(or_with) == ANYOFPOSIXL) {
else if (ANYOF_FLAGS(or_with) & ANYOF_MATCHES_POSIXL) {
ANYOF_POSIXL_OR((regnode_charclass_posixl*)or_with, ssc);
if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
unsigned int i;
@@ -2040,6 +2039,7 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
ssc->invlist = NULL;
if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
ANYOF_FLAGS(ssc) |= ANYOF_MATCHES_POSIXL;
OP(ssc) = ANYOFPOSIXL;
}
else if (RExC_contains_locale) {
@@ -17215,6 +17215,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* by locale, and hence are dealt with separately */
if (! need_class) {
need_class = 1;
anyof_flags |= ANYOF_MATCHES_POSIXL;
/* We can't change this into some other type of node
* (unless this is the only element, in which case there
@@ -17225,14 +17226,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
/* Coverity thinks it is possible for this to be negative; both
* jhi and khw think it's not, but be safer */
assert( ! posixl
assert(! (anyof_flags & ANYOF_MATCHES_POSIXL)
|| (namedclass + ((namedclass % 2) ? -1 : 1)) >= 0);
/* See if it already matches the complement of this POSIX
* class */
if (POSIXL_TEST(posixl, namedclass + ((namedclass % 2)
? -1
: 1)))
if ( (anyof_flags & ANYOF_MATCHES_POSIXL)
&& POSIXL_TEST(posixl, namedclass + ((namedclass % 2)
? -1
: 1)))
{
posixl_matches_all = TRUE;
break; /* No need to continue. Since it matches both
@@ -18362,7 +18364,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
if ( cp_list
&& invert
&& OP(REGNODE_p(ret)) != ANYOFD
&& ! (ANYOF_FLAGS(REGNODE_p(ret)) & ANYOF_LOCALE_FLAGS)
&& ! (ANYOF_FLAGS(REGNODE_p(ret)) & (ANYOF_LOCALE_FLAGS))
&& ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
{
_invlist_invert(cp_list);
@@ -21207,23 +21209,6 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
invlist = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
}
if (OP(node) == ANYOFPOSIXL) {
int i;
/* what matches isn't determinable except during execution, so
* don't know enough here to invert */
inverting_allowed = FALSE;
/* What the posix classes match also varies at runtime, so these
* will be output symbolically. */
posixes = newSVpvs("");
for (i = 0; i < ANYOF_POSIXL_MAX; i++) {
if (ANYOF_POSIXL_TEST(node,i)) {
sv_catpv(posixes, anyofs[i]);
}
}
}
if (flags) {
if (OP(node) == ANYOFD) {
@@ -21242,11 +21227,27 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
not_utf8 = invlist_clone(PL_UpperLatin1, NULL);
}
}
else if (OP(node) == ANYOFL && (flags & ANYOFL_FOLD)) {
else if (OP(node) == ANYOFL || OP(node) == ANYOFPOSIXL) {
/* what matches isn't determinable except during execution, so
* don't know enough here to invert */
inverting_allowed = FALSE;
/* If either of these flags are set, what matches isn't
* determinable except during execution, so don't know enough here
* to invert */
if (flags & (ANYOFL_FOLD|ANYOF_MATCHES_POSIXL)) {
inverting_allowed = FALSE;
}
/* What the posix classes match also varies at runtime, so these
* will be output symbolically. */
if (ANYOF_POSIXL_TEST_ANY_SET(node)) {
int i;
posixes = newSVpvs("");
for (i = 0; i < ANYOF_POSIXL_MAX; i++) {
if (ANYOF_POSIXL_TEST(node,i)) {
sv_catpv(posixes, anyofs[i]);
}
}
}
}
}
View
@@ -218,7 +218,7 @@ struct regnode_charclass {
/* has runtime (locale) \d, \w, ..., [:posix:] classes */
struct regnode_charclass_posixl {
U8 flags;
U8 flags; /* ANYOF_MATCHES_POSIXL bit must go here */
U8 type;
U16 next_off;
U32 arg1;
@@ -236,7 +236,7 @@ struct regnode_charclass_posixl {
* have a pointer field because there is no alignment issue, and because it is
* set to NULL after construction, before any cloning of the pattern */
struct regnode_ssc {
U8 flags;
U8 flags; /* ANYOF_MATCHES_POSIXL bit must go here */
U8 type;
U16 next_off;
U32 arg1;
@@ -252,8 +252,10 @@ struct regnode_ssc {
* actually using it: by setting it to 1. This allows us to test and
* distinguish between an SSC and other ANYOF node types, as 'next_off' cannot
* otherwise be 1, because it is the offset to the next regnode expressed in
* units of regnodes. Since an ANYOF node contains extra fields, it would
* require a 512 bit word for the offset to be just 1 */
* units of regnodes. Since an ANYOF node contains extra fields, it adds up
* to 12 regnode units on 32-bit systems, (hence the minimum this can be (if
* not 0) is 11 there. Even if things get tightly packed on a 64-bit system,
* it still would be more than 1. */
#define set_ANYOF_SYNTHETIC(n) STMT_START{ OP(n) = ANYOF; \
NEXT_OFF(n) = 1; \
} STMT_END
@@ -452,7 +454,7 @@ struct regnode_ssc {
* unclear if this should have a flag or not. But, this flag can be
* shared with another, so it doesn't occupy extra space.
*
* At the moment, there are two spare bits, but this could be increased by
* At the moment, there is one spare bit, but this could be increased by
* various tricks:
*
* If just one more bit is needed, as of this writing it seems to khw that the
@@ -466,13 +468,18 @@ struct regnode_ssc {
* handler function, as the macro REGINCLASS in regexec.c does now for other
* cases.
*
* Another possibility is to make all ANYOFL nodes be ANYOF_POSIXL nodes, which
* have an extra 32 bits beyond what a regular ANYOFL one does, with 30 of
* those bits used for the POSIX class flags, so the ANYOFL_FOLD and
* ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD flags could be moved
* to the spare bits. That would mean the 30 bits become a bit field and extra
* instructions would have to be generated to tease it apart from the other two
* bits.
* Another possibility is based on the fact that ANYOF_MATCHES_POSIXL is
* redundant with the node type ANYOFPOSIXL. That flag could be removed, but
* at the expense of extra code in regexec.c. The flag has been retained
* because it allows us to see if we need to call reginsert, or just use the
* bitmap in one test.
*
* If this is done, an extension would be to make all ANYOFL nodes contain the
* extra 32 bits that ANYOFPOSIXL ones do. The posix flags only occupy 30
* bits, so the ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD flags
* and ANYOFL_FOLD could be moved to that extra space, but it would mean extra
* instructions, as there are currently places in the code that assume those
* two bits are zero.
*
* All told, 5 bits could be available for other uses if all of the above were
* done.
@@ -493,7 +500,7 @@ struct regnode_ssc {
* is used for runtime \d, \w, [:posix:], ..., which are used only in locale
* and the optimizer's synthetic start class. Non-locale \d, etc are resolved
* at compile-time. Only set under /l; can be in SSC */
/* Spare: Be sure to change ANYOF_FLAGS_ALL if this gets used 0x10 */
#define ANYOF_MATCHES_POSIXL 0x02
/* The fold is calculated and stored in the bitmap where possible at compile
* time. However under locale, the actual folding varies depending on
@@ -556,7 +563,7 @@ struct regnode_ssc {
#define ANYOF_FLAGS_ALL (0xff & ~0x10)
#define ANYOF_LOCALE_FLAGS (ANYOFL_FOLD)
#define ANYOF_LOCALE_FLAGS (ANYOFL_FOLD | ANYOF_MATCHES_POSIXL)
/* These are the flags that apply to both regular ANYOF nodes and synthetic
* start class nodes during construction of the SSC. During finalization of
@@ -661,9 +668,12 @@ struct regnode_ssc {
/* Shifts a bit to get, eg. 0x4000_0000, then subtracts 1 to get 0x3FFF_FFFF */
#define ANYOF_POSIXL_SETALL(ret) STMT_START { ((regnode_charclass_posixl*) (ret))->classflags = ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1; } STMT_END
#define ANYOF_CLASS_SETALL(ret) ANYOF_POSIXL_SETALL(ret)
/* This regnode isn't used unless at least one bit is set */
#define ANYOF_POSIXL_TEST_ANY_SET(p) (OP(p) == ANYOFPOSIXL)
#define ANYOF_POSIXL_TEST_ANY_SET(p) \
((ANYOF_FLAGS(p) & ANYOF_MATCHES_POSIXL) \
&& (((regnode_charclass_posixl*)(p))->classflags))
#define ANYOF_CLASS_TEST_ANY_SET(p) ANYOF_POSIXL_TEST_ANY_SET(p)
/* Since an SSC always has this field, we don't have to test for that; nor do
* we want to because the bit isn't set for SSC during its construction */
@@ -672,7 +682,14 @@ struct regnode_ssc {
#define ANYOF_POSIXL_SSC_TEST_ALL_SET(p) /* Are all bits set? */ \
(((regnode_ssc*) (p))->classflags \
== ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1)
#define ANYOF_POSIXL_TEST_ALL_SET(p) \
((ANYOF_FLAGS(p) & ANYOF_MATCHES_POSIXL) \
&& ((regnode_charclass_posixl*) (p))->classflags \
== ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1)
#define ANYOF_POSIXL_OR(source, dest) STMT_START { (dest)->classflags |= (source)->classflags ; } STMT_END
#define ANYOF_CLASS_OR(source, dest) ANYOF_POSIXL_OR((source), (dest))
#define ANYOF_POSIXL_AND(source, dest) STMT_START { (dest)->classflags &= (source)->classflags ; } STMT_END
View
@@ -2248,7 +2248,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
REXEC_FBC_CLASS_SCAN(1, /* 1=>is-utf8 */
reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
}
else if (ANYOF_FLAGS(c) || OP(c) == ANYOFPOSIXL) {
else if (ANYOF_FLAGS(c)) {
REXEC_FBC_CLASS_SCAN(0, reginclass(prog,c, (U8*)s, (U8*)s+1, 0));
}
else {
@@ -6716,16 +6716,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
{
Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required);
}
if (NEXTCHR_IS_EOS)
sayNO;
if (!reginclass(rex, scan, (U8*)locinput, (U8*)reginfo->strend,
utf8_target))
sayNO;
locinput += UTF8SKIP(locinput);
break;
/* FALLTHROUGH */
case ANYOFD: /* /[abc]/d */
case ANYOF: /* /[abc]/ */
if (NEXTCHR_IS_EOS)
@@ -9381,7 +9372,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
hardcount++;
}
}
else if (ANYOF_FLAGS(p) || OP(p) == ANYOFPOSIXL) {
else if (ANYOF_FLAGS(p)) {
while (scan < loceol
&& reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0))
scan++;

0 comments on commit 20a145e

Please sign in to comment.