Skip to content

Commit

Permalink
regcomp.c - extend REF to hold the paren it needs to regcppush
Browse files Browse the repository at this point in the history
this way we can avoid pushing every buffer, we only need to push
the nestroot of the ref.
  • Loading branch information
demerphq committed Jan 29, 2023
1 parent c825104 commit 7cbf8c2
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 83 deletions.
20 changes: 10 additions & 10 deletions pod/perldebguts.pod
Expand Up @@ -773,25 +773,25 @@ will be lost.
SROPEN none Same as OPEN, but for script run
SRCLOSE none Close preceding SROPEN

REF num 1 Match some already matched string
REFF num 1 Match already matched string, using /di
REF num 2 Match some already matched string
REFF num 2 Match already matched string, using /di
rules.
REFFL num 1 Match already matched string, using /li
REFFL num 2 Match already matched string, using /li
rules.
REFFU num 1 Match already matched string, usng /ui.
REFFA num 1 Match already matched string, using /aai
REFFU num 2 Match already matched string, usng /ui.
REFFA num 2 Match already matched string, using /aai
rules.

# Named references. Code in regcomp.c assumes that these all are after
# the numbered references
REFN no-sv 1 Match some already matched string
REFFN no-sv 1 Match already matched string, using /di
REFN no-sv 2 Match some already matched string
REFFN no-sv 2 Match already matched string, using /di
rules.
REFFLN no-sv 1 Match already matched string, using /li
REFFLN no-sv 2 Match already matched string, using /li
rules.
REFFUN num 1 Match already matched string, using /ui
REFFUN num 2 Match already matched string, using /ui
rules.
REFFAN num 1 Match already matched string, using /aai
REFFAN num 2 Match already matched string, using /aai
rules.

# Support for long RE
Expand Down
10 changes: 6 additions & 4 deletions regcomp.c
Expand Up @@ -2758,7 +2758,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
SvREFCNT_inc_simple_void_NN(sv_dat);
}
RExC_sawback = 1;
ret = reg1node(pRExC_state,
ret = reg2node(pRExC_state,
((! FOLD)
? REFN
: (ASCII_FOLD_RESTRICTED)
Expand All @@ -2768,7 +2768,9 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
: (LOC)
? REFFLN
: REFFN),
num);
num, RExC_nestroot);
if (RExC_nestroot && num >= (U32)RExC_nestroot)
REGNODE_p(ret)->flags = VOLATILE_REF;
*flagp |= HASWIDTH;

nextchar(pRExC_state);
Expand Down Expand Up @@ -6037,7 +6039,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
}
RExC_sawback = 1;
ret = reg1node(pRExC_state,
ret = reg2node(pRExC_state,
((! FOLD)
? REF
: (ASCII_FOLD_RESTRICTED)
Expand All @@ -6047,7 +6049,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
: (LOC)
? REFFL
: REFF),
num);
num, RExC_nestroot);
if (RExC_nestroot && num >= RExC_nestroot)
REGNODE_p(ret)->flags = VOLATILE_REF;
if (OP(REGNODE_p(ret)) == REFF) {
Expand Down
20 changes: 10 additions & 10 deletions regcomp.sym
Expand Up @@ -233,22 +233,22 @@ CLOSE CLOSE, num 1 ; Close corresponding OPEN of #n.
SROPEN SROPEN, none ; Same as OPEN, but for script run
SRCLOSE SRCLOSE, none ; Close preceding SROPEN

REF REF, num 1 V ; Match some already matched string
REFF REF, num 1 V ; Match already matched string, using /di rules.
REFFL REF, num 1 V ; Match already matched string, using /li rules.
REF REF, num 2 V ; Match some already matched string
REFF REF, num 2 V ; Match already matched string, using /di rules.
REFFL REF, num 2 V ; Match already matched string, using /li rules.
# N?REFF[AU] could have been implemented using the FLAGS field of the
# regnode, but by having a separate node type, we can use the existing switch
# statement to avoid some tests
REFFU REF, num 1 V ; Match already matched string, usng /ui.
REFFA REF, num 1 V ; Match already matched string, using /aai rules.
REFFU REF, num 2 V ; Match already matched string, usng /ui.
REFFA REF, num 2 V ; Match already matched string, using /aai rules.

#*Named references. Code in regcomp.c assumes that these all are after
#*the numbered references
REFN REF, no-sv 1 V ; Match some already matched string
REFFN REF, no-sv 1 V ; Match already matched string, using /di rules.
REFFLN REF, no-sv 1 V ; Match already matched string, using /li rules.
REFFUN REF, num 1 V ; Match already matched string, using /ui rules.
REFFAN REF, num 1 V ; Match already matched string, using /aai rules.
REFN REF, no-sv 2 V ; Match some already matched string
REFFN REF, no-sv 2 V ; Match already matched string, using /di rules.
REFFLN REF, no-sv 2 V ; Match already matched string, using /li rules.
REFFUN REF, num 2 V ; Match already matched string, using /ui rules.
REFFAN REF, num 2 V ; Match already matched string, using /aai rules.

#*Support for long RE
LONGJMP LONGJMP, off 1 . 1 ; Jump far away.
Expand Down
9 changes: 6 additions & 3 deletions regcomp_debug.c
Expand Up @@ -489,9 +489,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
|| k == GROUPP || op == ACCEPT)
{
AV *name_list= NULL;
U32 parno= (op == ACCEPT) ? (U32)ARG2i(o) :
(op == OPEN || op == CLOSE) ? (U32)PARNO(o) :
(U32)ARG1u(o);
U32 parno= (op == ACCEPT) ? ARG2u(o) :
(op == OPEN || op == CLOSE) ? PARNO(o) :
ARG1u(o);
if ( RXp_PAREN_NAMES(prog) ) {
name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
} else if ( pRExC_state ) {
Expand Down Expand Up @@ -542,6 +542,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
Perl_sv_catpvf(aTHX_ sv, "/%" UVuf, (UV)parno); /* Parenth number */

}
if ( k == REF ) {
Perl_sv_catpvf(aTHX_ sv, " <%" IVdf ">", (IV)ARG2i(o));
}
if ( k == REF && reginfo) {
U32 n = ARG1u(o); /* which paren pair */
I32 ln = RXp_OFFS_START(prog,n);
Expand Down
6 changes: 3 additions & 3 deletions regexec.c
Expand Up @@ -8115,7 +8115,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
}
ref_yes:
if (scan->flags) { /* == VOLATILE_REF but only other value is 0 */
ST.cp = regcppush(rex, 0, maxopenparen);
ST.cp = regcppush(rex, ARG2u(scan) - 1, maxopenparen);
REGCP_SET(ST.lastcp);
PUSH_STATE_GOTO(REF_next, next, locinput, loceol,
script_run_begin);
Expand Down Expand Up @@ -8152,7 +8152,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
U32 arg;

case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
arg= (U32)ARG1u(scan);
arg = ARG1u(scan);
if (cur_eval && cur_eval->locinput == locinput) {
if ( ++nochange_depth > max_nochange_depth )
Perl_croak(aTHX_
Expand Down Expand Up @@ -8668,7 +8668,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
is_accepted = true;
if (scan->flags)
sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
utmp = (U32)ARG2i(scan);
utmp = ARG2u(scan);

if ( utmp ) {
regnode *cursor;
Expand Down
20 changes: 10 additions & 10 deletions regnodes.h
Expand Up @@ -94,16 +94,16 @@ typedef struct regnode tregnode_POSIXL;
typedef struct regnode tregnode_POSIXU;
typedef struct regnode_1 tregnode_PRUNE;
typedef struct regnode tregnode_PSEUDO;
typedef struct regnode_1 tregnode_REF;
typedef struct regnode_1 tregnode_REFF;
typedef struct regnode_1 tregnode_REFFA;
typedef struct regnode_1 tregnode_REFFAN;
typedef struct regnode_1 tregnode_REFFL;
typedef struct regnode_1 tregnode_REFFLN;
typedef struct regnode_1 tregnode_REFFN;
typedef struct regnode_1 tregnode_REFFU;
typedef struct regnode_1 tregnode_REFFUN;
typedef struct regnode_1 tregnode_REFN;
typedef struct regnode_2 tregnode_REF;
typedef struct regnode_2 tregnode_REFF;
typedef struct regnode_2 tregnode_REFFA;
typedef struct regnode_2 tregnode_REFFAN;
typedef struct regnode_2 tregnode_REFFL;
typedef struct regnode_2 tregnode_REFFLN;
typedef struct regnode_2 tregnode_REFFN;
typedef struct regnode_2 tregnode_REFFU;
typedef struct regnode_2 tregnode_REFFUN;
typedef struct regnode_2 tregnode_REFN;
typedef struct regnode_p tregnode_REGEX_SET;
typedef struct regnode tregnode_REG_ANY;
typedef struct regnode_1 tregnode_RENUM;
Expand Down
86 changes: 43 additions & 43 deletions t/re/pat_advanced.t
Expand Up @@ -2590,10 +2590,10 @@ Starting parse and generation
<\g{c}> | 6| brnc
| | piec
| | atom
<> | 8| tail~ OPEN1 'b' (4) -> REFN
| | Setting close paren #1 to 8
| 10| lsbr~ tying lastbr REFN (6) to ender CLOSE1 'b' (8) offset 2
| | tail~ REFN (6) -> CLOSE
<> | 9| tail~ OPEN1 'b' (4) -> REFN
| | Setting close paren #1 to 9
| 11| lsbr~ tying lastbr REFN <1> (6) to ender CLOSE1 'b' (9) offset 3
| | tail~ REFN <1> (6) -> CLOSE
Unmatched ( in regex; marked by <-- HERE in m/(?{a})( <-- HERE ?<b>\g{c}/ at - line 1.
Freeing REx: "(?{a})(?<b>\g{c}"
EOF_DEBUG_OUT
Expand All @@ -2618,35 +2618,35 @@ Starting parse and generation
<\g{c})(?<c>>...| 3| brnc
| | piec
| | atom
<)(?<c>x)(?&b)> | 5| tail~ OPEN1 'b' (1) -> REFN
| 7| lsbr~ tying lastbr REFN (3) to ender CLOSE1 'b' (5) offset 2
| | tail~ REFN (3) -> CLOSE
<)(?<c>x)(?&b)> | 6| tail~ OPEN1 'b' (1) -> REFN
| 8| lsbr~ tying lastbr REFN <1> (3) to ender CLOSE1 'b' (6) offset 3
| | tail~ REFN <1> (3) -> CLOSE
<(?<c>x)(?&b)> | | piec
| | atom
<?<c>x)(?&b)> | | reg
<x)(?&b)> | 9| brnc
<x)(?&b)> | 10| brnc
| | piec
| | atom
<)(?&b)> | 11| tail~ OPEN2 'c' (7) -> EXACT
| 13| lsbr~ tying lastbr EXACT <x> (9) to ender CLOSE2 'c' (11) offset 2
| | tail~ EXACT <x> (9) -> CLOSE
<)(?&b)> | 12| tail~ OPEN2 'c' (8) -> EXACT
| 14| lsbr~ tying lastbr EXACT <x> (10) to ender CLOSE2 'c' (12) offset 2
| | tail~ EXACT <x> (10) -> CLOSE
<(?&b)> | | tail~ OPEN1 'b' (1)
| | ~ REFN (3)
| | ~ CLOSE1 'b' (5) -> OPEN
| | ~ REFN <1> (3)
| | ~ CLOSE1 'b' (6) -> OPEN
| | piec
| | atom
<?&b)> | | reg
<> | 16| tail~ OPEN2 'c' (7)
| | ~ EXACT <x> (9)
| | ~ CLOSE2 'c' (11) -> GOSUB
| 17| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (16) offset 15
<> | 17| tail~ OPEN2 'c' (8)
| | ~ EXACT <x> (10)
| | ~ CLOSE2 'c' (12) -> GOSUB
| 18| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (17) offset 16
| | tail~ OPEN1 'b' (1)
| | ~ REFN (3)
| | ~ CLOSE1 'b' (5)
| | ~ OPEN2 'c' (7)
| | ~ EXACT <x> (9)
| | ~ CLOSE2 'c' (11)
| | ~ GOSUB1[+0:13] 'b' (13) -> END
| | ~ REFN <1> (3)
| | ~ CLOSE1 'b' (6)
| | ~ OPEN2 'c' (8)
| | ~ EXACT <x> (10)
| | ~ CLOSE2 'c' (12)
| | ~ GOSUB1[+0:14] 'b' (14) -> END
Need to redo parse
Freeing REx: "(?<b>\g{c})(?<c>x)(?&b)"
Starting parse and generation
Expand All @@ -2658,36 +2658,36 @@ Starting parse and generation
<\g{c})(?<c>>...| 3| brnc
| | piec
| | atom
<)(?<c>x)(?&b)> | 5| tail~ OPEN1 'b' (1) -> REFN
| 7| lsbr~ tying lastbr REFN2 'c' (3) to ender CLOSE1 'b' (5) offset 2
| | tail~ REFN2 'c' (3) -> CLOSE
<)(?<c>x)(?&b)> | 6| tail~ OPEN1 'b' (1) -> REFN
| 8| lsbr~ tying lastbr REFN2 'c' <1> (3) to ender CLOSE1 'b' (6) offset 3
| | tail~ REFN2 'c' <1> (3) -> CLOSE
<(?<c>x)(?&b)> | | piec
| | atom
<?<c>x)(?&b)> | | reg
<x)(?&b)> | 9| brnc
<x)(?&b)> | 10| brnc
| | piec
| | atom
<)(?&b)> | 11| tail~ OPEN2 'c' (7) -> EXACT
| 13| lsbr~ tying lastbr EXACT <x> (9) to ender CLOSE2 'c' (11) offset 2
| | tail~ EXACT <x> (9) -> CLOSE
<)(?&b)> | 12| tail~ OPEN2 'c' (8) -> EXACT
| 14| lsbr~ tying lastbr EXACT <x> (10) to ender CLOSE2 'c' (12) offset 2
| | tail~ EXACT <x> (10) -> CLOSE
<(?&b)> | | tail~ OPEN1 'b' (1)
| | ~ REFN2 'c' (3)
| | ~ CLOSE1 'b' (5) -> OPEN
| | ~ REFN2 'c' <1> (3)
| | ~ CLOSE1 'b' (6) -> OPEN
| | piec
| | atom
<?&b)> | | reg
<> | 16| tail~ OPEN2 'c' (7)
| | ~ EXACT <x> (9)
| | ~ CLOSE2 'c' (11) -> GOSUB
| 17| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (16) offset 15
<> | 17| tail~ OPEN2 'c' (8)
| | ~ EXACT <x> (10)
| | ~ CLOSE2 'c' (12) -> GOSUB
| 18| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (17) offset 16
| | tail~ OPEN1 'b' (1)
| | ~ REFN2 'c' (3)
| | ~ CLOSE1 'b' (5)
| | ~ OPEN2 'c' (7)
| | ~ EXACT <x> (9)
| | ~ CLOSE2 'c' (11)
| | ~ GOSUB1[+0:13] 'b' (13) -> END
Required size 16 nodes
| | ~ REFN2 'c' <1> (3)
| | ~ CLOSE1 'b' (6)
| | ~ OPEN2 'c' (8)
| | ~ EXACT <x> (10)
| | ~ CLOSE2 'c' (12)
| | ~ GOSUB1[+0:14] 'b' (14) -> END
Required size 17 nodes
first at 3
Freeing REx: "(?<b>\g{c})(?<c>x)(?&b)"
EOF_DEBUG_OUT
Expand Down

0 comments on commit 7cbf8c2

Please sign in to comment.