Skip to content

Commit

Permalink
[perl #116907] Allow //g matching past 2**31 threshold
Browse files Browse the repository at this point in the history
Change the internal fields for storing positions so that //g in scalar
context can move past the 2**31 character threshold.  Before this com-
mit, the numbers would wrap, resulting in assertion failures.

The changes in this commit are only enough to get the added test pass-
ing.  Stay tuned for more.
  • Loading branch information
Father Chrysostomos committed Aug 25, 2013
1 parent 389ecb5 commit 99a90e5
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 18 deletions.
2 changes: 1 addition & 1 deletion embed.fnc
Expand Up @@ -2091,7 +2091,7 @@ Es |U8 |regtail_study |NN struct RExC_state_t *pRExC_state \
#if defined(PERL_IN_REGEXEC_C)
ERs |bool |isFOO_lc |const U8 classnum|const U8 character
ERs |bool |isFOO_utf8_lc |const U8 classnum|NN const U8* character
ERs |I32 |regmatch |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog
ERs |SSize_t|regmatch |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog
ERs |I32 |regrepeat |NN regexp *prog|NN char **startposp \
|NN const regnode *p \
|NN regmatch_info *const reginfo \
Expand Down
2 changes: 1 addition & 1 deletion pp_hot.c
Expand Up @@ -1326,7 +1326,7 @@ PP(pp_match)
PMOP *dynpm = pm;
const char *s;
const char *strend;
I32 curpos = 0; /* initial pos() or current $+[0] */
SSize_t curpos = 0; /* initial pos() or current $+[0] */
I32 global;
U8 r_flags = 0;
const char *truebase; /* Start of string */
Expand Down
2 changes: 1 addition & 1 deletion proto.h
Expand Up @@ -7000,7 +7000,7 @@ STATIC bool S_reginclass(pTHX_ regexp * const prog, const regnode * const n, con
#define PERL_ARGS_ASSERT_REGINCLASS \
assert(n); assert(p)

STATIC I32 S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
STATIC SSize_t S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
Expand Down
4 changes: 2 additions & 2 deletions regcomp.c
Expand Up @@ -6729,7 +6729,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
struct regexp *const rx = ReANY(r);
char *s = NULL;
I32 i = 0;
I32 s1, t1;
SSize_t s1, t1;
I32 n = paren;

PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
Expand Down Expand Up @@ -6787,7 +6787,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
}

assert(s >= rx->subbeg);
assert(rx->sublen >= (s - rx->subbeg) + i );
assert((STRLEN)rx->sublen >= (STRLEN)((s - rx->subbeg) + i) );
if (i >= 0) {
#if NO_TAINT_SUPPORT
sv_setpvn(sv, s, i);
Expand Down
16 changes: 8 additions & 8 deletions regexec.c
Expand Up @@ -296,8 +296,8 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen)
);
for (p = parenfloor+1; p <= (I32)maxopenparen; p++) {
/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
SSPUSHINT(rex->offs[p].end);
SSPUSHINT(rex->offs[p].start);
SSPUSHIV(rex->offs[p].end);
SSPUSHIV(rex->offs[p].start);
SSPUSHINT(rex->offs[p].start_tmp);
DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"\n",
Expand Down Expand Up @@ -371,8 +371,8 @@ S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p)
for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
I32 tmps;
rex->offs[paren].start_tmp = SSPOPINT;
rex->offs[paren].start = SSPOPINT;
tmps = SSPOPINT;
rex->offs[paren].start = SSPOPIV;
tmps = SSPOPIV;
if (paren <= rex->lastparen)
rex->offs[paren].end = tmps;
DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
Expand Down Expand Up @@ -2097,8 +2097,8 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx,
} else
#endif
{
I32 min = 0;
I32 max = strend - strbeg;
SSize_t min = 0;
SSize_t max = strend - strbeg;
I32 sublen;

if ( (flags & REXEC_COPY_SKIP_POST)
Expand Down Expand Up @@ -2938,7 +2938,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
CHECKPOINT lastcp;
REGEXP *const rx = reginfo->prog;
regexp *const prog = ReANY(rx);
I32 result;
SSize_t result;
RXi_GET_DECL(prog,progi);
GET_RE_DEBUG_FLAGS_DECL;

Expand Down Expand Up @@ -3583,7 +3583,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
}

/* returns -1 on failure, $+[0] on success */
STATIC I32
STATIC SSize_t
S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
{
#if PERL_VERSION < 9 && !defined(PERL_CORE)
Expand Down
7 changes: 4 additions & 3 deletions regexp.h
Expand Up @@ -55,8 +55,8 @@ struct reg_substr_data {
/* offsets within a string of a particular /(.)/ capture */

typedef struct regexp_paren_pair {
I32 start;
I32 end;
SSize_t start;
SSize_t end;
/* 'start_tmp' records a new opening position before the matching end
* has been found, so that the old start and end values are still
* valid, e.g.
Expand Down Expand Up @@ -503,7 +503,8 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
#define RX_SAVED_COPY(prog) (ReANY(prog)->saved_copy)
/* last match was zero-length */
#define RX_ZERO_LEN(prog) \
(RX_OFFS(prog)[0].start + RX_GOFS(prog) == (UV)RX_OFFS(prog)[0].end)
(RX_OFFS(prog)[0].start + (SSize_t)RX_GOFS(prog) \
== RX_OFFS(prog)[0].end)

#endif /* PLUGGABLE_RE_EXTENSION */

Expand Down
12 changes: 10 additions & 2 deletions t/bigmem/regexp.t
Expand Up @@ -12,11 +12,19 @@ $ENV{PERL_TEST_MEMORY} >= 2
$Config{ptrsize} >= 8
or skip_all("Need 64-bit pointers for this test");

plan(2);
plan(3);

# [perl #116907]
# ${\2} to defeat constant folding, which in this case actually slows
# things down
my $x=" "x(${\2}**31);
my $x=" "x(${\2}**31) . "abcdefg";
ok $x =~ /./, 'match against long string succeeded';
is "$-[0]-$+[0]", '0-1', '@-/@+ after match against long string';

pos $x = 2**31-1;
my $result;
for(1..5) {
$x =~ /./g;
$result .= "$&-";
}
is $result," -a-b-c-d-", 'scalar //g hopping past the 2**31 threshold';

0 comments on commit 99a90e5

Please sign in to comment.