Permalink
Browse files

regcomp.c: Use regnode offsets during parsing

This changes the pattern parsing to use offsets from the first node in
the pattern program, rather than direct addresses of such nodes.  This
is in preparation for a later change in which more mallocs will be done
which will change those addresses, whereas the offsets will remain
constant.  Once the final program space is allocated, real addresses are
used as currently.  This limits the necessary changes to a few
functions.  Also, real addresses are used if they are constant across a
function; again this limits the changes.

Doing this introduces a new typedef for clarity 'regnode_offset' which
is not a pointer, but a count.  This necessitates changing a bunch of
things to use 0 instead of NULL to indicate an error.

A new boolean is also required to indicate if we are in the first or
second passes of the pattern.  And separate heap space is allocated for
scratch during the first pass.
  • Loading branch information...
khwilliamson committed Sep 17, 2018
1 parent a01fadd commit f9db1e08c6c61b99622c496aca20d024ef11a3c3
Showing with 393 additions and 336 deletions.
  1. +17 −17 embed.fnc
  2. +17 −17 proto.h
  3. +335 −280 regcomp.c
  4. +22 −22 regcomp.h
  5. +2 −0 regexp.h
View
@@ -2362,21 +2362,21 @@ Ep |void |regprop |NULLOK const regexp *prog|NN SV* sv|NN const regnode* o|NULLO
Ep |int |re_printf |NN const char *fmt|...
#endif
#if defined(PERL_IN_REGCOMP_C)
Es |regnode*|reg |NN RExC_state_t *pRExC_state \
Es |regnode_offset|reg |NN RExC_state_t *pRExC_state \
|I32 paren|NN I32 *flagp|U32 depth
Es |regnode*|regnode_guts |NN RExC_state_t *pRExC_state \
Es |regnode_offset|regnode_guts|NN RExC_state_t *pRExC_state \
|const U8 op \
|const STRLEN extra_len \
|NN const char* const name
Es |regnode*|reganode |NN RExC_state_t *pRExC_state|U8 op \
Es |regnode_offset|reganode|NN RExC_state_t *pRExC_state|U8 op \
|U32 arg
Es |regnode*|reg2Lanode |NN RExC_state_t *pRExC_state \
Es |regnode_offset|reg2Lanode|NN RExC_state_t *pRExC_state \
|const U8 op \
|const U32 arg1 \
|const I32 arg2
Es |regnode*|regatom |NN RExC_state_t *pRExC_state \
Es |regnode_offset|regatom |NN RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth
Es |regnode*|regbranch |NN RExC_state_t *pRExC_state \
Es |regnode_offset|regbranch |NN RExC_state_t *pRExC_state \
|NN I32 *flagp|I32 first|U32 depth
Es |void |set_ANYOF_arg |NN RExC_state_t* const pRExC_state \
|NN regnode* const node \
@@ -2392,7 +2392,7 @@ Es |void |output_or_return_posix_warnings \
Es |AV* |add_multi_match|NULLOK AV* multi_char_matches \
|NN SV* multi_string \
|const STRLEN cp_count
Es |regnode*|regclass |NN RExC_state_t *pRExC_state \
Es |regnode_offset|regclass|NN RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth|const bool stop_at_1 \
|bool allow_multi_fold \
|const bool silence_non_portable \
@@ -2402,12 +2402,12 @@ Es |regnode*|regclass |NN RExC_state_t *pRExC_state \
|NULLOK AV** return_posix_warnings
Es |void|add_above_Latin1_folds|NN RExC_state_t *pRExC_state|const U8 cp \
|NN SV** invlist
Ei |regnode*|handle_named_backref|NN RExC_state_t *pRExC_state \
Ei |regnode_offset|handle_named_backref|NN RExC_state_t *pRExC_state \
|NN I32 *flagp \
|NN char * parse_start \
|char ch
EsnR |unsigned int|regex_set_precedence|const U8 my_operator
Es |regnode*|handle_regex_sets|NN RExC_state_t *pRExC_state \
Es |regnode_offset|handle_regex_sets|NN RExC_state_t *pRExC_state \
|NULLOK SV ** return_invlist \
|NN I32 *flagp|U32 depth \
|NN char * const oregcomp_parse
@@ -2418,21 +2418,21 @@ Es |void |dump_regex_sets_structures \
|const IV fence|NN AV * fence_stack
#endif
Es |void|parse_lparen_question_flags|NN RExC_state_t *pRExC_state
Es |regnode*|reg_node |NN RExC_state_t *pRExC_state|U8 op
Es |regnode*|regpiece |NN RExC_state_t *pRExC_state \
Es |regnode_offset|reg_node|NN RExC_state_t *pRExC_state|U8 op
Es |regnode_offset|regpiece|NN RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth
Es |bool |grok_bslash_N |NN RExC_state_t *pRExC_state \
|NULLOK regnode** nodep \
|NULLOK regnode_offset* nodep \
|NULLOK UV *code_point_p \
|NULLOK int* cp_count \
|NN I32 *flagp \
|const bool strict \
|const U32 depth
Es |void |reginsert |NN RExC_state_t *pRExC_state \
|U8 op|NN regnode *operand|U32 depth
|U8 op|regnode_offset operand|U32 depth
Es |void |regtail |NN RExC_state_t * pRExC_state \
|NN const regnode * const p \
|NN const regnode * const val \
|NN const regnode_offset p \
|NN const regnode_offset val \
|const U32 depth
Es |SV * |reg_scan_name |NN RExC_state_t *pRExC_state \
|U32 flags
@@ -2441,7 +2441,7 @@ Es |U32 |join_exact |NN RExC_state_t *pRExC_state \
|NN bool *unfolded_multi_char \
|U32 flags|NULLOK regnode *val|U32 depth
Ei |void |alloc_maybe_populate_EXACT|NN RExC_state_t *pRExC_state \
|NN regnode *node|NN I32 *flagp|STRLEN len \
|NN regnode_offset node|NN I32 *flagp|STRLEN len \
|UV code_point|bool downgradable
Ein |U8 |compute_EXACTish|NN RExC_state_t *pRExC_state
Es |void |nextchar |NN RExC_state_t *pRExC_state
@@ -2553,7 +2553,7 @@ Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie\
|NULLOK HV* widecharmap|NN AV *revcharmap\
|U32 next_alloc|U32 depth
Es |U8 |regtail_study |NN RExC_state_t *pRExC_state \
|NN regnode *p|NN const regnode *val|U32 depth
|NN regnode_offset p|NN const regnode_offset val|U32 depth
# endif
#endif
View
34 proto.h
@@ -4420,7 +4420,7 @@ PERL_CALLCONV int Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...);
assert(fmt)
STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
STATIC U8 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val, U32 depth);
STATIC U8 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth);
#define PERL_ARGS_ASSERT_REGTAIL_STUDY \
assert(pRExC_state); assert(p); assert(val)
# endif
@@ -5390,7 +5390,7 @@ STATIC AV* S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, con
#define PERL_ARGS_ASSERT_ADD_MULTI_MATCH \
assert(multi_string)
#ifndef PERL_NO_INLINE_FUNCTIONS
PERL_STATIC_INLINE void S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, I32 *flagp, STRLEN len, UV code_point, bool downgradable);
PERL_STATIC_INLINE void S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode_offset node, I32 *flagp, STRLEN len, UV code_point, bool downgradable);
#define PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT \
assert(pRExC_state); assert(node); assert(flagp)
#endif
@@ -5425,18 +5425,18 @@ PERL_STATIC_INLINE STRLEN* S_get_invlist_iter_addr(SV* invlist)
assert(invlist)
#endif
STATIC bool S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const U32 depth);
STATIC bool S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode_offset* nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const U32 depth);
#define PERL_ARGS_ASSERT_GROK_BSLASH_N \
assert(pRExC_state); assert(flagp)
#ifndef PERL_NO_INLINE_FUNCTIONS
PERL_STATIC_INLINE regnode* S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * parse_start, char ch);
PERL_STATIC_INLINE regnode_offset S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * parse_start, char ch);
#define PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF \
assert(pRExC_state); assert(flagp); assert(parse_start)
#endif
STATIC int S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char* const s, char ** updated_parse_ptr, AV** posix_warnings, const bool check_only);
#define PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX \
assert(pRExC_state); assert(s)
STATIC regnode* S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth, char * const oregcomp_parse);
STATIC regnode_offset S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth, char * const oregcomp_parse);
#define PERL_ARGS_ASSERT_HANDLE_REGEX_SETS \
assert(pRExC_state); assert(flagp); assert(oregcomp_parse)
STATIC SV* S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
@@ -5507,13 +5507,13 @@ PERL_STATIC_NO_RET void S_re_croak2(pTHX_ bool utf8, const char* pat1, const cha
#define PERL_ARGS_ASSERT_RE_CROAK2 \
assert(pat1); assert(pat2)
STATIC regnode* S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth);
STATIC regnode_offset S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REG \
assert(pRExC_state); assert(flagp)
STATIC regnode* S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const I32 arg2);
STATIC regnode_offset S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const I32 arg2);
#define PERL_ARGS_ASSERT_REG2LANODE \
assert(pRExC_state)
STATIC regnode* S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op);
STATIC regnode_offset S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op);
#define PERL_ARGS_ASSERT_REG_NODE \
assert(pRExC_state)
STATIC SV * S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags);
@@ -5524,31 +5524,31 @@ PERL_STATIC_INLINE char * S_reg_skipcomment(RExC_state_t *pRExC_state, char * p)
#define PERL_ARGS_ASSERT_REG_SKIPCOMMENT \
assert(pRExC_state); assert(p)
#endif
STATIC regnode* S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg);
STATIC regnode_offset S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg);
#define PERL_ARGS_ASSERT_REGANODE \
assert(pRExC_state)
STATIC regnode* S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
STATIC regnode_offset S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REGATOM \
assert(pRExC_state); assert(flagp)
STATIC regnode* S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth);
STATIC regnode_offset S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth);
#define PERL_ARGS_ASSERT_REGBRANCH \
assert(pRExC_state); assert(flagp)
STATIC regnode* S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold, const bool silence_non_portable, const bool strict, bool optimizable, SV** ret_invlist, AV** return_posix_warnings);
STATIC regnode_offset S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold, const bool silence_non_portable, const bool strict, bool optimizable, SV** ret_invlist, AV** return_posix_warnings);
#define PERL_ARGS_ASSERT_REGCLASS \
assert(pRExC_state); assert(flagp)
STATIC unsigned int S_regex_set_precedence(const U8 my_operator)
__attribute__warn_unused_result__;
STATIC void S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth);
STATIC void S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode_offset operand, U32 depth);
#define PERL_ARGS_ASSERT_REGINSERT \
assert(pRExC_state); assert(operand)
STATIC regnode* S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_len, const char* const name);
assert(pRExC_state)
STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_len, const char* const name);
#define PERL_ARGS_ASSERT_REGNODE_GUTS \
assert(pRExC_state); assert(name)
STATIC regnode* S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
STATIC regnode_offset S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REGPIECE \
assert(pRExC_state); assert(flagp)
STATIC void S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode * const p, const regnode * const val, const U32 depth);
STATIC void S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth);
#define PERL_ARGS_ASSERT_REGTAIL \
assert(pRExC_state); assert(p); assert(val)
STATIC void S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf);
Oops, something went wrong.

0 comments on commit f9db1e0

Please sign in to comment.