Permalink
Browse files

regcomp.c: Generate new regnode for /[[:posix:]]/l

This follows on to the previous commit and generates code to use the new
regnode.  This allows some simplifications, with more to come in the
next commit.  The determination of the regnode is moved later in the
function that does this; the code that backed this out if we guessed
wrong is excised.
  • Loading branch information...
khwilliamson committed Sep 21, 2018
1 parent 949757b commit bc4194dcbc59382c3f814225e109ce49e81cca80
Showing with 38 additions and 48 deletions.
  1. +36 −46 regcomp.c
  2. +2 −2 t/re/anyof.t
View
@@ -2044,9 +2044,9 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
ANYOF_FLAGS(ssc) |= ANYOF_MATCHES_POSIXL;
OP(ssc) = ANYOFPOSIXL;
}
if (RExC_contains_locale) {
else if (RExC_contains_locale) {
OP(ssc) = ANYOFL;
}
@@ -5565,6 +5565,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
case ANYOFD:
case ANYOFL:
case ANYOFPOSIXL:
case ANYOF:
if (flags & SCF_DO_STCLASS_AND)
ssc_and(pRExC_state, data->start_class,
@@ -16486,6 +16487,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
AV* posix_warnings = NULL;
const bool do_posix_warnings = return_posix_warnings
|| (PASS2 && ckWARN(WARN_REGEXP));
U8 op = END; /* The returned node-type, initialized to an impossible
one. */
U8 anyof_flags = 0; /* flag bits if the node is an ANYOF-type */
U32 posixl = 0; /* bit field of posix classes matched under /l */
GET_RE_DEBUG_FLAGS_DECL;
@@ -16502,21 +16507,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
allow_multi_folds = FALSE;
#endif
/* Assume we are going to generate an ANYOF node. */
ret = reganode(pRExC_state,
(LOC)
? ANYOFL
: ANYOF,
0);
if (SIZE_ONLY) {
RExC_size += ANYOF_SKIP;
listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
}
else {
ANYOF_FLAGS(ret) = 0;
RExC_emit += ANYOF_SKIP;
listsv = newSVpvs_flags("# comment\n", SVs_TEMP);
initial_listsv_len = SvCUR(listsv);
SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated. */
@@ -16968,7 +16962,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
/* We don't know yet what this matches, so have to flag
* it */
ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
anyof_flags |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
}
else {
@@ -17173,14 +17167,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* by locale, and hence are dealt with separately */
if (! need_class) {
need_class = 1;
if (SIZE_ONLY) {
RExC_size += ANYOF_POSIXL_SKIP - ANYOF_SKIP;
}
else {
RExC_emit += ANYOF_POSIXL_SKIP - ANYOF_SKIP;
}
ANYOF_FLAGS(ret) |= ANYOF_MATCHES_POSIXL;
ANYOF_POSIXL_ZERO(ret);
anyof_flags |= ANYOF_MATCHES_POSIXL;
/* We can't change this into some other type of node
* (unless this is the only element, in which case there
@@ -17191,15 +17178,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
/* Coverity thinks it is possible for this to be negative; both
* jhi and khw think it's not, but be safer */
assert(! (ANYOF_FLAGS(ret) & ANYOF_MATCHES_POSIXL)
assert(! (anyof_flags & ANYOF_MATCHES_POSIXL)
|| (namedclass + ((namedclass % 2) ? -1 : 1)) >= 0);
/* See if it already matches the complement of this POSIX
* class */
if ((ANYOF_FLAGS(ret) & ANYOF_MATCHES_POSIXL)
&& ANYOF_POSIXL_TEST(ret, namedclass + ((namedclass % 2)
? -1
: 1)))
if ( (anyof_flags & ANYOF_MATCHES_POSIXL)
&& POSIXL_TEST(posixl, namedclass + ((namedclass % 2)
? -1
: 1)))
{
posixl_matches_all = TRUE;
break; /* No need to continue. Since it matches both
@@ -17208,7 +17195,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
}
/* Add this class to those that should be checked at runtime */
ANYOF_POSIXL_SET(ret, namedclass);
POSIXL_SET(posixl, namedclass);
/* The above-Latin1 characters are not subject to locale rules.
* Just add them, in the second pass, to the
@@ -17728,7 +17715,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
RExC_copy_start_in_constructed = RExC_start + constructed_prefix_len;
RExC_end = RExC_parse + len;
RExC_in_multi_char_class = 1;
RExC_emit = (regnode *)orig_emit;
ret = reg(pRExC_state, 1, &reg_flags, depth+1);
@@ -17904,23 +17890,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* an optimization */
if (op != END) {
/* Throw away this ANYOF regnode, and emit the calculated one,
/* Emit the calculated regnode,
* which should correspond to the beginning, not current, state of
* the parse */
const char * cur_parse = RExC_parse;
RExC_parse = (char *)orig_parse;
if ( SIZE_ONLY) {
if (! LOC) {
/* To get locale nodes to not use the full ANYOF size would
* require moving the code above that writes the portions
* of it that aren't in other nodes to after this point.
* e.g. ANYOF_POSIXL_SET */
RExC_size = orig_size;
}
}
else {
RExC_emit = (regnode *)orig_emit;
if (PL_regkind[op] == POSIXD) {
if (op == POSIXL) {
RExC_contains_locale = 1;
@@ -17929,7 +17903,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
op += NPOSIXD - POSIXD;
}
}
}
ret = reg_node(pRExC_state, op);
@@ -17959,9 +17932,26 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
}
}
if (SIZE_ONLY)
/* Assume we are going to generate an ANYOF-type node. */
op = (posixl)
? ANYOFPOSIXL
: (LOC)
? ANYOFL
: ANYOF;
ret = reganode(pRExC_state, op, 0);
if (SIZE_ONLY) {
RExC_size += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP + 1;
return ret;
}
/****** !SIZE_ONLY (Pass 2) AFTER HERE *********/
RExC_emit += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP;
ANYOF_FLAGS(ret) = anyof_flags;
if (posixl) {
ANYOF_POSIXL_SET_TO_BITMAP(ret, posixl);
}
/* If folding, we calculate all characters that could fold to or from the
* ones already on the list */
@@ -19917,7 +19907,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
const bool inverted = flags & ANYOF_INVERT;
if (OP(o) == ANYOFL) {
if (OP(o) == ANYOFL || OP(o) == ANYOFPOSIXL) {
if (ANYOFL_UTF8_LOCALE_REQD(flags)) {
sv_catpvs(sv, "{utf8-locale-reqd}");
}
@@ -21186,7 +21176,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
not_utf8 = invlist_clone(PL_UpperLatin1, NULL);
}
}
else if (OP(node) == ANYOFL) {
else if (OP(node) == ANYOFL || OP(node) == ANYOFPOSIXL) {
/* If either of these flags are set, what matches isn't
* determinable except during execution, so don't know enough here
View
@@ -50,8 +50,8 @@ my @tests = (
'ebcdic_ok_below_this_marker',
'(?l:[\x{212A}])' => 'ANYOFL[212A]',
'(?l:[\s\x{212A}])' => 'ANYOFL[\s][1680 2000-200A 2028-2029 202F 205F 212A 3000]',
'(?l:[^\S\x{202F}])' => 'ANYOFL[^\\S][1680 2000-200A 2028-2029 205F 3000]',
'(?l:[\s\x{212A}])' => 'ANYOFPOSIXL[\s][1680 2000-200A 2028-2029 202F 205F 212A 3000]',
'(?l:[^\S\x{202F}])' => 'ANYOFPOSIXL[^\\S][1680 2000-200A 2028-2029 205F 3000]',
'(?i:[^:])' => 'ANYOF[^:][0100-INFINITY]',
'[\p{Any}]' => 'ANYOF[\x00-\xFF][0100-10FFFF]',
'[\p{IsMyRuntimeProperty}]' => 'ANYOF[+utf8::IsMyRuntimeProperty]',

0 comments on commit bc4194d

Please sign in to comment.