Skip to content

Commit

Permalink
add, and use, some RXp_FOO() variants of RX_FOO()
Browse files Browse the repository at this point in the history
For various  RX_FOO() macros, add a RXp_FOO() variant, in such a way that
the original macro is now defined in terms of

    #define RX_FOO(rx_sv)  (RXp_FOO(ReANY(rx_sv)))

(This is a pre-existing convention; this commit just makes a larger subset
of the RX_() macros have an RXp_() variant).

Then use those macros in various pp_hot.c and regexec.c functions like
pp_match() and regexec_flags(), which already, or added via this commit,
have this line near the start:

    regexp *prog = ReANY(rx);

This avoids having to do multiple ReANY()'s, which is important as this
macro now includes a conditional in its expression (to cope with
PVLV-as-REGEX)..
  • Loading branch information
iabyn committed Jul 27, 2017
1 parent ea0673e commit 196a02a
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 84 deletions.
105 changes: 57 additions & 48 deletions pp_hot.c
Expand Up @@ -1898,7 +1898,8 @@ PP(pp_qr)
dSP;
PMOP * const pm = cPMOP;
REGEXP * rx = PM_GETRE(pm);
SV * const pkg = rx ? CALLREG_PACKAGE(rx) : NULL;
regexp *prog = ReANY(rx);
SV * const pkg = RXp_ENGINE(prog)->qr_package(aTHX_ (rx));
SV * const rv = sv_newmortal();
CV **cvp;
CV *cv;
Expand All @@ -1925,7 +1926,7 @@ PP(pp_qr)
(void)sv_bless(rv, stash);
}

if (UNLIKELY(RX_ISTAINTED(rx))) {
if (UNLIKELY(RXp_ISTAINTED(prog))) {
SvTAINTED_on(rv);
SvTAINTED_on(SvRV(rv));
}
Expand All @@ -1945,6 +1946,7 @@ PP(pp_match)
U8 r_flags = 0;
const char *truebase; /* Start of string */
REGEXP *rx = PM_GETRE(pm);
regexp *prog = ReANY(rx);
bool rxtainted;
const U8 gimme = GIMME_V;
STRLEN len;
Expand All @@ -1966,13 +1968,13 @@ PP(pp_match)
PUTBACK; /* EVAL blocks need stack_sp. */
/* Skip get-magic if this is a qr// clone, because regcomp has
already done it. */
truebase = ReANY(rx)->mother_re
truebase = prog->mother_re
? SvPV_nomg_const(TARG, len)
: SvPV_const(TARG, len);
if (!truebase)
DIE(aTHX_ "panic: pp_match");
strend = truebase + len;
rxtainted = (RX_ISTAINTED(rx) ||
rxtainted = (RXp_ISTAINTED(prog) ||
(TAINT_get && (pm->op_pmflags & PMf_RETAINT)));
TAINT_NOT;

Expand All @@ -1992,7 +1994,7 @@ PP(pp_match)
}

/* handle the empty pattern */
if (!RX_PRELEN(rx) && PL_curpm && !ReANY(rx)->mother_re) {
if (!RX_PRELEN(rx) && PL_curpm && !prog->mother_re) {
if (PL_curpm == PL_reg_curpm) {
if (PL_curpm_under) {
if (PL_curpm_under == PL_reg_curpm) {
Expand All @@ -2005,12 +2007,13 @@ PP(pp_match)
pm = PL_curpm;
}
rx = PM_GETRE(pm);
prog = ReANY(rx);
}

if (RX_MINLEN(rx) >= 0 && (STRLEN)RX_MINLEN(rx) > len) {
if (RXp_MINLEN(prog) >= 0 && (STRLEN)RXp_MINLEN(prog) > len) {
DEBUG_r(PerlIO_printf(Perl_debug_log, "String shorter than min possible regex match (%"
UVuf " < %" IVdf ")\n",
(UV)len, (IV)RX_MINLEN(rx)));
(UV)len, (IV)RXp_MINLEN(prog)));
goto nope;
}

Expand All @@ -2026,9 +2029,9 @@ PP(pp_match)
}

#ifdef PERL_SAWAMPERSAND
if ( RX_NPARENS(rx)
if ( RXp_NPARENS(prog)
|| PL_sawampersand
|| (RX_EXTFLAGS(rx) & (RXf_EVAL_SEEN|RXf_PMf_KEEPCOPY))
|| (RXp_EXTFLAGS(prog) & (RXf_EVAL_SEEN|RXf_PMf_KEEPCOPY))
|| (dynpm->op_pmflags & PMf_KEEPCOPY)
)
#endif
Expand Down Expand Up @@ -2066,56 +2069,59 @@ PP(pp_match)
#endif

if (rxtainted)
RX_MATCH_TAINTED_on(rx);
TAINT_IF(RX_MATCH_TAINTED(rx));
RXp_MATCH_TAINTED_on(prog);
TAINT_IF(RXp_MATCH_TAINTED(prog));

/* update pos */

if (global && (gimme != G_ARRAY || (dynpm->op_pmflags & PMf_CONTINUE))) {
if (!mg)
mg = sv_magicext_mglob(TARG);
MgBYTEPOS_set(mg, TARG, truebase, RX_OFFS(rx)[0].end);
if (RX_ZERO_LEN(rx))
MgBYTEPOS_set(mg, TARG, truebase, RXp_OFFS(prog)[0].end);
if (RXp_ZERO_LEN(prog))
mg->mg_flags |= MGf_MINMATCH;
else
mg->mg_flags &= ~MGf_MINMATCH;
}

if ((!RX_NPARENS(rx) && !global) || gimme != G_ARRAY) {
if ((!RXp_NPARENS(prog) && !global) || gimme != G_ARRAY) {
LEAVE_SCOPE(oldsave);
RETPUSHYES;
}

/* push captures on stack */

{
const I32 nparens = RX_NPARENS(rx);
const I32 nparens = RXp_NPARENS(prog);
I32 i = (global && !nparens) ? 1 : 0;

SPAGAIN; /* EVAL blocks could move the stack. */
EXTEND(SP, nparens + i);
EXTEND_MORTAL(nparens + i);
for (i = !i; i <= nparens; i++) {
PUSHs(sv_newmortal());
if (LIKELY((RX_OFFS(rx)[i].start != -1)
&& RX_OFFS(rx)[i].end != -1 ))
if (LIKELY((RXp_OFFS(prog)[i].start != -1)
&& RXp_OFFS(prog)[i].end != -1 ))
{
const I32 len = RX_OFFS(rx)[i].end - RX_OFFS(rx)[i].start;
const char * const s = RX_OFFS(rx)[i].start + truebase;
if (UNLIKELY(RX_OFFS(rx)[i].end < 0 || RX_OFFS(rx)[i].start < 0
|| len < 0 || len > strend - s))
const I32 len = RXp_OFFS(prog)[i].end - RXp_OFFS(prog)[i].start;
const char * const s = RXp_OFFS(prog)[i].start + truebase;
if (UNLIKELY( RXp_OFFS(prog)[i].end < 0
|| RXp_OFFS(prog)[i].start < 0
|| len < 0
|| len > strend - s)
)
DIE(aTHX_ "panic: pp_match start/end pointers, i=%ld, "
"start=%ld, end=%ld, s=%p, strend=%p, len=%" UVuf,
(long) i, (long) RX_OFFS(rx)[i].start,
(long)RX_OFFS(rx)[i].end, s, strend, (UV) len);
(long) i, (long) RXp_OFFS(prog)[i].start,
(long)RXp_OFFS(prog)[i].end, s, strend, (UV) len);
sv_setpvn(*SP, s, len);
if (DO_UTF8(TARG) && is_utf8_string((U8*)s, len))
SvUTF8_on(*SP);
}
}
if (global) {
curpos = (UV)RX_OFFS(rx)[0].end;
had_zerolen = RX_ZERO_LEN(rx);
curpos = (UV)RXp_OFFS(prog)[0].end;
had_zerolen = RXp_ZERO_LEN(prog);
PUTBACK; /* EVAL blocks may use stack */
r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST;
goto play_it_again;
Expand Down Expand Up @@ -3128,6 +3134,7 @@ PP(pp_subst)
char *orig;
U8 r_flags;
REGEXP *rx = PM_GETRE(pm);
regexp *prog = ReANY(rx);
STRLEN len;
int force_on_match = 0;
const I32 oldsave = PL_savestack_ix;
Expand Down Expand Up @@ -3185,7 +3192,7 @@ PP(pp_subst)
if (TAINTING_get) {
rxtainted = (
(SvTAINTED(TARG) ? SUBST_TAINT_STR : 0)
| (RX_ISTAINTED(rx) ? SUBST_TAINT_PAT : 0)
| (RXp_ISTAINTED(prog) ? SUBST_TAINT_PAT : 0)
| ((pm->op_pmflags & PMf_RETAINT) ? SUBST_TAINT_RETAINT : 0)
| ((once && !(rpm->op_pmflags & PMf_NONDESTRUCT))
? SUBST_TAINT_BOOLRET : 0));
Expand All @@ -3203,7 +3210,7 @@ PP(pp_subst)
second time with non-zero. */

/* handle the empty pattern */
if (!RX_PRELEN(rx) && PL_curpm && !ReANY(rx)->mother_re) {
if (!RX_PRELEN(rx) && PL_curpm && !prog->mother_re) {
if (PL_curpm == PL_reg_curpm) {
if (PL_curpm_under) {
if (PL_curpm_under == PL_reg_curpm) {
Expand All @@ -3216,12 +3223,13 @@ PP(pp_subst)
pm = PL_curpm;
}
rx = PM_GETRE(pm);
prog = ReANY(rx);
}

#ifdef PERL_SAWAMPERSAND
r_flags = ( RX_NPARENS(rx)
r_flags = ( RXp_NPARENS(prog)
|| PL_sawampersand
|| (RX_EXTFLAGS(rx) & (RXf_EVAL_SEEN|RXf_PMf_KEEPCOPY))
|| (RXp_EXTFLAGS(prog) & (RXf_EVAL_SEEN|RXf_PMf_KEEPCOPY))
|| (rpm->op_pmflags & PMf_KEEPCOPY)
)
? REXEC_COPY_STR
Expand Down Expand Up @@ -3267,12 +3275,12 @@ PP(pp_subst)
#ifdef PERL_ANY_COW
&& !was_cow
#endif
&& (I32)clen <= RX_MINLENRET(rx)
&& (I32)clen <= RXp_MINLENRET(prog)
&& ( once
|| !(r_flags & REXEC_COPY_STR)
|| (!SvGMAGICAL(dstr) && !(RX_EXTFLAGS(rx) & RXf_EVAL_SEEN))
|| (!SvGMAGICAL(dstr) && !(RXp_EXTFLAGS(prog) & RXf_EVAL_SEEN))
)
&& !(RX_EXTFLAGS(rx) & RXf_NO_INPLACE_SUBST)
&& !(RXp_EXTFLAGS(prog) & RXf_NO_INPLACE_SUBST)
&& (!doutf8 || SvUTF8(TARG))
&& !(rpm->op_pmflags & PMf_NONDESTRUCT))
{
Expand All @@ -3295,10 +3303,10 @@ PP(pp_subst)

if (once) {
char *d, *m;
if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
if (RXp_MATCH_TAINTED(prog)) /* run time pattern taint, eg locale */
rxtainted |= SUBST_TAINT_PAT;
m = orig + RX_OFFS(rx)[0].start;
d = orig + RX_OFFS(rx)[0].end;
m = orig + RXp_OFFS(prog)[0].start;
d = orig + RXp_OFFS(prog)[0].end;
s = orig;
if (m - s > strend - d) { /* faster to shorten from end */
I32 i;
Expand Down Expand Up @@ -3328,14 +3336,15 @@ PP(pp_subst)
}
else {
char *d, *m;
d = s = RX_OFFS(rx)[0].start + orig;
d = s = RXp_OFFS(prog)[0].start + orig;
do {
I32 i;
if (UNLIKELY(iters++ > maxiters))
DIE(aTHX_ "Substitution loop");
if (UNLIKELY(RX_MATCH_TAINTED(rx))) /* run time pattern taint, eg locale */
/* run time pattern taint, eg locale */
if (UNLIKELY(RXp_MATCH_TAINTED(prog)))
rxtainted |= SUBST_TAINT_PAT;
m = RX_OFFS(rx)[0].start + orig;
m = RXp_OFFS(prog)[0].start + orig;
if ((i = m - s)) {
if (s != d)
Move(s, d, i, char);
Expand All @@ -3345,7 +3354,7 @@ PP(pp_subst)
Copy(c, d, clen, char);
d += clen;
}
s = RX_OFFS(rx)[0].end + orig;
s = RXp_OFFS(prog)[0].end + orig;
} while (CALLREGEXEC(rx, s, strend, orig,
s == m, /* don't match same null twice */
TARG, NULL,
Expand Down Expand Up @@ -3381,10 +3390,10 @@ PP(pp_subst)
#ifdef PERL_ANY_COW
have_a_cow:
#endif
if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
if (RXp_MATCH_TAINTED(prog)) /* run time pattern taint, eg locale */
rxtainted |= SUBST_TAINT_PAT;
repl = dstr;
s = RX_OFFS(rx)[0].start + orig;
s = RXp_OFFS(prog)[0].start + orig;
dstr = newSVpvn_flags(orig, s-orig,
SVs_TEMP | (DO_UTF8(TARG) ? SVf_UTF8 : 0));
if (!c) {
Expand All @@ -3403,20 +3412,20 @@ PP(pp_subst)
do {
if (UNLIKELY(iters++ > maxiters))
DIE(aTHX_ "Substitution loop");
if (UNLIKELY(RX_MATCH_TAINTED(rx)))
if (UNLIKELY(RXp_MATCH_TAINTED(prog)))
rxtainted |= SUBST_TAINT_PAT;
if (RX_MATCH_COPIED(rx) && RX_SUBBEG(rx) != orig) {
if (RXp_MATCH_COPIED(prog) && RXp_SUBBEG(prog) != orig) {
char *old_s = s;
char *old_orig = orig;
assert(RX_SUBOFFSET(rx) == 0);
assert(RXp_SUBOFFSET(prog) == 0);

orig = RX_SUBBEG(rx);
orig = RXp_SUBBEG(prog);
s = orig + (old_s - old_orig);
strend = s + (strend - old_s);
}
m = RX_OFFS(rx)[0].start + orig;
m = RXp_OFFS(prog)[0].start + orig;
sv_catpvn_nomg_maybeutf8(dstr, s, m - s, DO_UTF8(TARG));
s = RX_OFFS(rx)[0].end + orig;
s = RXp_OFFS(prog)[0].end + orig;
if (first) {
/* replacement already stringified */
if (clen)
Expand Down Expand Up @@ -3478,7 +3487,7 @@ PP(pp_subst)
((rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_RETAINT)) ==
(SUBST_TAINT_STR|SUBST_TAINT_RETAINT))
)
(RX_MATCH_TAINTED_on(rx)); /* taint $1 et al */
(RXp_MATCH_TAINTED_on(prog)); /* taint $1 et al */

if (!(rxtainted & SUBST_TAINT_BOOLRET)
&& (rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_PAT))
Expand Down
18 changes: 9 additions & 9 deletions regexec.c
Expand Up @@ -707,7 +707,7 @@ Perl_re_intuit_start(pTHX_
goto fail;
}

RX_MATCH_UTF8_set(rx,utf8_target);
RXp_MATCH_UTF8_set(prog, utf8_target);
reginfo->is_utf8_target = cBOOL(utf8_target);
reginfo->info_aux = NULL;
reginfo->strbeg = strbeg;
Expand Down Expand Up @@ -841,7 +841,7 @@ Perl_re_intuit_start(pTHX_
#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
if (end_shift < 0)
Perl_croak(aTHX_ "panic: end_shift: %" IVdf " pattern:\n%s\n ",
(IV)end_shift, RX_PRECOMP(prog));
(IV)end_shift, RX_PRECOMP(rx));
#endif

restart:
Expand Down Expand Up @@ -2771,7 +2771,7 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx,
}
else {
/* create new COW SV to share string */
RX_MATCH_COPY_FREE(rx);
RXp_MATCH_COPY_FREE(prog);
prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
}
prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
Expand Down Expand Up @@ -2834,7 +2834,7 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx,
assert(min >= 0 && min <= max && min <= strend - strbeg);
sublen = max - min;

if (RX_MATCH_COPIED(rx)) {
if (RXp_MATCH_COPIED(prog)) {
if (sublen > prog->sublen)
prog->subbeg =
(char*)saferealloc(prog->subbeg, sublen+1);
Expand All @@ -2845,7 +2845,7 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx,
prog->subbeg[sublen] = '\0';
prog->suboffset = min;
prog->sublen = sublen;
RX_MATCH_COPIED_on(rx);
RXp_MATCH_COPIED_on(prog);
}
prog->subcoffset = prog->suboffset;
if (prog->suboffset && utf8_target) {
Expand All @@ -2872,7 +2872,7 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx,
}
}
else {
RX_MATCH_COPY_FREE(rx);
RXp_MATCH_COPY_FREE(prog);
prog->subbeg = strbeg;
prog->suboffset = 0;
prog->subcoffset = 0;
Expand Down Expand Up @@ -3029,7 +3029,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
/* match via INTUIT shouldn't have any captures.
* Let @-, @+, $^N know */
prog->lastparen = prog->lastcloseparen = 0;
RX_MATCH_UTF8_set(rx, utf8_target);
RXp_MATCH_UTF8_set(prog, utf8_target);
prog->offs[0].start = s - strbeg;
prog->offs[0].end = utf8_target
? (char*)utf8_hop((U8*)s, prog->minlenret) - strbeg
Expand All @@ -3056,8 +3056,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
Perl_croak(aTHX_ "corrupted regexp program");
}

RX_MATCH_TAINTED_off(rx);
RX_MATCH_UTF8_set(rx, utf8_target);
RXp_MATCH_TAINTED_off(prog);
RXp_MATCH_UTF8_set(prog, utf8_target);

reginfo->prog = rx; /* Yes, sorry that this is confusing. */
reginfo->intuit = 0;
Expand Down

0 comments on commit 196a02a

Please sign in to comment.