Skip to content

Commit

Permalink
XXX regcomp.sym: ANYOF lengths
Browse files Browse the repository at this point in the history
Won't compile without more stuff
  • Loading branch information
khwilliamson committed Oct 11, 2018
1 parent d6132ad commit a007dd9
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 46 deletions.
15 changes: 9 additions & 6 deletions pod/perldebguts.pod
Original file line number Diff line number Diff line change
Expand Up @@ -601,12 +601,15 @@ will be lost.
# [Special] alternatives:
REG_ANY no Match any one character (except newline).
SANY no Match any one character.
ANYOF sv 1 Match character in (or not in) this class,
single char match only
ANYOFD sv 1 Like ANYOF, but /d is in effect
ANYOFL sv 1 Like ANYOF, but /l is in effect
ANYOFPOSIXL sv 1 Like ANYOFL, but matches [[:posix:]]
classes
ANYOF sv Match character in (or not in) this class,
charclass single char match only
ANYOFD sv Like ANYOF, but /d is in effect
charclass
ANYOFL sv Like ANYOF, but /l is in effect
charclass
ANYOFPOSIXL sv Like ANYOFL, but matches [[:posix:]]
charclass_ classes
posixl
ANYOFM byte 1 Like ANYOF, but matches an invariant byte
as determined by the mask and arg

Expand Down
13 changes: 2 additions & 11 deletions regcomp.c
Original file line number Diff line number Diff line change
Expand Up @@ -15426,7 +15426,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
RExC_parse++;
if (nest_depth--) break;
node = reganode(pRExC_state, ANYOF, 0);
RExC_size += ANYOF_SKIP;
nextchar(pRExC_state);
Set_Node_Length(node,
RExC_parse - oregcomp_parse + 1); /* MJD */
Expand Down Expand Up @@ -17941,12 +17940,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
ret = reganode(pRExC_state, op, 0);

if (SIZE_ONLY) {
RExC_size += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP + 1;
return ret;
}

/****** !SIZE_ONLY (Pass 2) AFTER HERE *********/
RExC_emit += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP;

ANYOF_FLAGS(ret) = anyof_flags;
if (posixl) {
Expand Down Expand Up @@ -19198,7 +19195,8 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)

PERL_ARGS_ASSERT_REGANODE;

assert(regarglen[op] == 1);
/* ANYOF are special cased to allow non-length 1 args */
assert(regarglen[op] == 1 || PL_regkind[op] == ANYOF);

if (PASS2) {
regnode *ptr = ret;
Expand Down Expand Up @@ -21485,13 +21483,6 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
else if ( op == PLUS || op == STAR) {
DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1);
}
else if (PL_regkind[(U8)op] == ANYOF) {
/* arglen 1 + class block */
node += 1 + ((ANYOF_FLAGS(node) & ANYOF_MATCHES_POSIXL)
? ANYOF_POSIXL_SKIP
: ANYOF_SKIP);
node = NEXTOPER(node);
}
else if (PL_regkind[(U8)op] == EXACT) {
/* Literal string, where present. */
node += NODE_SZ_STR(node) - 1;
Expand Down
35 changes: 14 additions & 21 deletions regcomp.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,9 @@ struct regnode_ssc {
STMT_START { \
ARG_SET(ptr, arg); \
FILL_ADVANCE_NODE(ptr, op); \
(ptr) += 1; \
/* This is used generically for other operations\
* that have a longer argument */ \
(ptr) += regarglen[op]; \
} STMT_END
#define FILL_ADVANCE_NODE_2L_ARG(ptr, op, arg1, arg2) \
STMT_START { \
Expand Down Expand Up @@ -466,22 +468,18 @@ struct regnode_ssc {
* handler function, as the macro REGINCLASS in regexec.c does now for other
* cases.
*
* Another possibility is to instead (or additionally) rename the ANYOF_POSIXL
* flag to be ANYOFL_LARGE, to mean that the ANYOF node has an extra 32 bits
* beyond what a regular one does. That's what it effectively means now, with
* the extra space all for the POSIX class flags. But those classes actually
* only occupy 30 bits, so the ANYOFL_FOLD and
* ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD flags could be moved
* to that extra space. The 30 bits in the extra word would indicate if a
* posix class should be looked up or not. The downside of this is that ANYOFL
* nodes with folding would always have to have the extra space allocated, even
* if they didn't use the 30 posix bits. There isn't an SSC problem as all
* SSCs are this large anyway.
* Another possibility is based on the fact that ANYOF_MATCHES_POSIXL is
* redundant with the node type ANYOFPOSIXL. That flag could be removed, but
* at the expense of extra code in regexec.c. The flag has been retained
* because it allows us to see if we need to call reginsert, or just use the
* bitmap in one test.
*
* One could completely remove ANYOFL_LARGE and make all ANYOFL nodes large.
* REGINCLASS would have to be modified so that if the node type were this, it
* would call reginclass(), as the flag bit that indicates to do this now would
* be gone.
* If this is done, an extension would be to make all ANYOFL nodes contain the
* extra 32 bits that ANYOFPOSIXL ones do. The posix flags only occupy 30
* bits, so the ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD flags
* and ANYOFL_FOLD could be moved to that extra space, but it would mean extra
* instructions, as there are currently places in the code that assume those
* two bits are zero.
*
* All told, 5 bits could be available for other uses if all of the above were
* done.
Expand Down Expand Up @@ -707,11 +705,6 @@ struct regnode_ssc {
#define ANYOF_BITMAP_CLEARALL(p) \
Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE)

#define ANYOF_SKIP (EXTRA_SIZE(regnode_charclass) \
- EXTRA_SIZE(struct regnode_1))
#define ANYOF_POSIXL_SKIP (EXTRA_SIZE(regnode_charclass_posixl) \
- EXTRA_SIZE(struct regnode_1))

/*
* Utility definitions.
*/
Expand Down
8 changes: 4 additions & 4 deletions regcomp.sym
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ NBOUNDA NBOUND, no ; Match "" betweeen any \w\w or \W\W, where \w
#* [Special] alternatives:
REG_ANY REG_ANY, no 0 S ; Match any one character (except newline).
SANY REG_ANY, no 0 S ; Match any one character.
ANYOF ANYOF, sv 1 S ; Match character in (or not in) this class, single char match only
ANYOFD ANYOF, sv 1 S ; Like ANYOF, but /d is in effect
ANYOFL ANYOF, sv 1 S ; Like ANYOF, but /l is in effect
ANYOFPOSIXL ANYOF, sv 1 S ; Like ANYOFL, but matches [[:posix:]] classes
ANYOF ANYOF, sv charclass S ; Match character in (or not in) this class, single char match only
ANYOFD ANYOF, sv charclass S ; Like ANYOF, but /d is in effect
ANYOFL ANYOF, sv charclass S ; Like ANYOF, but /l is in effect
ANYOFPOSIXL ANYOF, sv charclass_posixl S ; Like ANYOFL, but matches [[:posix:]] classes
ANYOFM ANYOFM byte 1 S ; Like ANYOF, but matches an invariant byte as determined by the mask and arg

#* POSIX Character Classes:
Expand Down
8 changes: 4 additions & 4 deletions regnodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,10 +324,10 @@ static const U8 regarglen[] = {
0, /* NBOUNDA */
0, /* REG_ANY */
0, /* SANY */
EXTRA_SIZE(struct regnode_1), /* ANYOF */
EXTRA_SIZE(struct regnode_1), /* ANYOFD */
EXTRA_SIZE(struct regnode_1), /* ANYOFL */
EXTRA_SIZE(struct regnode_1), /* ANYOFPOSIXL */
EXTRA_SIZE(struct regnode_charclass), /* ANYOF */
EXTRA_SIZE(struct regnode_charclass), /* ANYOFD */
EXTRA_SIZE(struct regnode_charclass), /* ANYOFL */
EXTRA_SIZE(struct regnode_charclass_posixl), /* ANYOFPOSIXL */
EXTRA_SIZE(struct regnode_1), /* ANYOFM */
0, /* POSIXD */
0, /* POSIXL */
Expand Down

0 comments on commit a007dd9

Please sign in to comment.