Skip to content

Commit

Permalink
regcomp.c: Use regnode offsets during parsing
Browse files Browse the repository at this point in the history
This changes the pattern parsing to use offsets from the first node in
the pattern program, rather than direct addresses of such nodes.  This
is in preparation for a later change in which more mallocs will be done
which will change those addresses, whereas the offsets will remain
constant.  Once the final program space is allocated, real addresses are
used as currently.  This limits the necessary changes to a few
functions.  Also, real addresses are used if they are constant across a
function; again this limits the changes.

Doing this introduces a new typedef for clarity 'regnode_offset' which
is not a pointer, but a count.  This necessitates changing a bunch of
things to use 0 instead of NULL to indicate an error.

A new boolean is also required to indicate if we are in the first or
second passes of the pattern.  And separate heap space is allocated for
scratch during the first pass.
  • Loading branch information
khwilliamson committed Oct 11, 2018
1 parent a007dd9 commit d97f5fc
Show file tree
Hide file tree
Showing 5 changed files with 393 additions and 336 deletions.
34 changes: 17 additions & 17 deletions embed.fnc
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -2362,21 +2362,21 @@ Ep |void |regprop |NULLOK const regexp *prog|NN SV* sv|NN const regnode* o|NULLO
Ep |int |re_printf |NN const char *fmt|... Ep |int |re_printf |NN const char *fmt|...
#endif #endif
#if defined(PERL_IN_REGCOMP_C) #if defined(PERL_IN_REGCOMP_C)
Es |regnode*|reg |NN RExC_state_t *pRExC_state \ Es |regnode_offset|reg |NN RExC_state_t *pRExC_state \
|I32 paren|NN I32 *flagp|U32 depth |I32 paren|NN I32 *flagp|U32 depth
Es |regnode*|regnode_guts |NN RExC_state_t *pRExC_state \ Es |regnode_offset|regnode_guts|NN RExC_state_t *pRExC_state \
|const U8 op \ |const U8 op \
|const STRLEN extra_len \ |const STRLEN extra_len \
|NN const char* const name |NN const char* const name
Es |regnode*|reganode |NN RExC_state_t *pRExC_state|U8 op \ Es |regnode_offset|reganode|NN RExC_state_t *pRExC_state|U8 op \
|U32 arg |U32 arg
Es |regnode*|reg2Lanode |NN RExC_state_t *pRExC_state \ Es |regnode_offset|reg2Lanode|NN RExC_state_t *pRExC_state \
|const U8 op \ |const U8 op \
|const U32 arg1 \ |const U32 arg1 \
|const I32 arg2 |const I32 arg2
Es |regnode*|regatom |NN RExC_state_t *pRExC_state \ Es |regnode_offset|regatom |NN RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth |NN I32 *flagp|U32 depth
Es |regnode*|regbranch |NN RExC_state_t *pRExC_state \ Es |regnode_offset|regbranch |NN RExC_state_t *pRExC_state \
|NN I32 *flagp|I32 first|U32 depth |NN I32 *flagp|I32 first|U32 depth
Es |void |set_ANYOF_arg |NN RExC_state_t* const pRExC_state \ Es |void |set_ANYOF_arg |NN RExC_state_t* const pRExC_state \
|NN regnode* const node \ |NN regnode* const node \
Expand All @@ -2392,7 +2392,7 @@ Es |void |output_or_return_posix_warnings \
Es |AV* |add_multi_match|NULLOK AV* multi_char_matches \ Es |AV* |add_multi_match|NULLOK AV* multi_char_matches \
|NN SV* multi_string \ |NN SV* multi_string \
|const STRLEN cp_count |const STRLEN cp_count
Es |regnode*|regclass |NN RExC_state_t *pRExC_state \ Es |regnode_offset|regclass|NN RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth|const bool stop_at_1 \ |NN I32 *flagp|U32 depth|const bool stop_at_1 \
|bool allow_multi_fold \ |bool allow_multi_fold \
|const bool silence_non_portable \ |const bool silence_non_portable \
Expand All @@ -2402,12 +2402,12 @@ Es |regnode*|regclass |NN RExC_state_t *pRExC_state \
|NULLOK AV** return_posix_warnings |NULLOK AV** return_posix_warnings
Es |void|add_above_Latin1_folds|NN RExC_state_t *pRExC_state|const U8 cp \ Es |void|add_above_Latin1_folds|NN RExC_state_t *pRExC_state|const U8 cp \
|NN SV** invlist |NN SV** invlist
Ei |regnode*|handle_named_backref|NN RExC_state_t *pRExC_state \ Ei |regnode_offset|handle_named_backref|NN RExC_state_t *pRExC_state \
|NN I32 *flagp \ |NN I32 *flagp \
|NN char * parse_start \ |NN char * parse_start \
|char ch |char ch
EsnR |unsigned int|regex_set_precedence|const U8 my_operator EsnR |unsigned int|regex_set_precedence|const U8 my_operator
Es |regnode*|handle_regex_sets|NN RExC_state_t *pRExC_state \ Es |regnode_offset|handle_regex_sets|NN RExC_state_t *pRExC_state \
|NULLOK SV ** return_invlist \ |NULLOK SV ** return_invlist \
|NN I32 *flagp|U32 depth \ |NN I32 *flagp|U32 depth \
|NN char * const oregcomp_parse |NN char * const oregcomp_parse
Expand All @@ -2418,21 +2418,21 @@ Es |void |dump_regex_sets_structures \
|const IV fence|NN AV * fence_stack |const IV fence|NN AV * fence_stack
#endif #endif
Es |void|parse_lparen_question_flags|NN RExC_state_t *pRExC_state Es |void|parse_lparen_question_flags|NN RExC_state_t *pRExC_state
Es |regnode*|reg_node |NN RExC_state_t *pRExC_state|U8 op Es |regnode_offset|reg_node|NN RExC_state_t *pRExC_state|U8 op
Es |regnode*|regpiece |NN RExC_state_t *pRExC_state \ Es |regnode_offset|regpiece|NN RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth |NN I32 *flagp|U32 depth
Es |bool |grok_bslash_N |NN RExC_state_t *pRExC_state \ Es |bool |grok_bslash_N |NN RExC_state_t *pRExC_state \
|NULLOK regnode** nodep \ |NULLOK regnode_offset* nodep \
|NULLOK UV *code_point_p \ |NULLOK UV *code_point_p \
|NULLOK int* cp_count \ |NULLOK int* cp_count \
|NN I32 *flagp \ |NN I32 *flagp \
|const bool strict \ |const bool strict \
|const U32 depth |const U32 depth
Es |void |reginsert |NN RExC_state_t *pRExC_state \ Es |void |reginsert |NN RExC_state_t *pRExC_state \
|U8 op|NN regnode *operand|U32 depth |U8 op|regnode_offset operand|U32 depth
Es |void |regtail |NN RExC_state_t * pRExC_state \ Es |void |regtail |NN RExC_state_t * pRExC_state \
|NN const regnode * const p \ |NN const regnode_offset p \
|NN const regnode * const val \ |NN const regnode_offset val \
|const U32 depth |const U32 depth
Es |SV * |reg_scan_name |NN RExC_state_t *pRExC_state \ Es |SV * |reg_scan_name |NN RExC_state_t *pRExC_state \
|U32 flags |U32 flags
Expand All @@ -2441,7 +2441,7 @@ Es |U32 |join_exact |NN RExC_state_t *pRExC_state \
|NN bool *unfolded_multi_char \ |NN bool *unfolded_multi_char \
|U32 flags|NULLOK regnode *val|U32 depth |U32 flags|NULLOK regnode *val|U32 depth
Ei |void |alloc_maybe_populate_EXACT|NN RExC_state_t *pRExC_state \ Ei |void |alloc_maybe_populate_EXACT|NN RExC_state_t *pRExC_state \
|NN regnode *node|NN I32 *flagp|STRLEN len \ |NN regnode_offset node|NN I32 *flagp|STRLEN len \
|UV code_point|bool downgradable |UV code_point|bool downgradable
Ein |U8 |compute_EXACTish|NN RExC_state_t *pRExC_state Ein |U8 |compute_EXACTish|NN RExC_state_t *pRExC_state
Es |void |nextchar |NN RExC_state_t *pRExC_state Es |void |nextchar |NN RExC_state_t *pRExC_state
Expand Down Expand Up @@ -2553,7 +2553,7 @@ Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie\
|NULLOK HV* widecharmap|NN AV *revcharmap\ |NULLOK HV* widecharmap|NN AV *revcharmap\
|U32 next_alloc|U32 depth |U32 next_alloc|U32 depth
Es |U8 |regtail_study |NN RExC_state_t *pRExC_state \ Es |U8 |regtail_study |NN RExC_state_t *pRExC_state \
|NN regnode *p|NN const regnode *val|U32 depth |NN regnode_offset p|NN const regnode_offset val|U32 depth
# endif # endif
#endif #endif


Expand Down
34 changes: 17 additions & 17 deletions proto.h
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -4420,7 +4420,7 @@ PERL_CALLCONV int Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...);
assert(fmt) assert(fmt)
STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags); STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags); STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
STATIC U8 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, const regnode *val, U32 depth); STATIC U8 S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth);
#define PERL_ARGS_ASSERT_REGTAIL_STUDY \ #define PERL_ARGS_ASSERT_REGTAIL_STUDY \
assert(pRExC_state); assert(p); assert(val) assert(pRExC_state); assert(p); assert(val)
# endif # endif
Expand Down Expand Up @@ -5390,7 +5390,7 @@ STATIC AV* S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, con
#define PERL_ARGS_ASSERT_ADD_MULTI_MATCH \ #define PERL_ARGS_ASSERT_ADD_MULTI_MATCH \
assert(multi_string) assert(multi_string)
#ifndef PERL_NO_INLINE_FUNCTIONS #ifndef PERL_NO_INLINE_FUNCTIONS
PERL_STATIC_INLINE void S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode *node, I32 *flagp, STRLEN len, UV code_point, bool downgradable); PERL_STATIC_INLINE void S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state, regnode_offset node, I32 *flagp, STRLEN len, UV code_point, bool downgradable);
#define PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT \ #define PERL_ARGS_ASSERT_ALLOC_MAYBE_POPULATE_EXACT \
assert(pRExC_state); assert(node); assert(flagp) assert(pRExC_state); assert(node); assert(flagp)
#endif #endif
Expand Down Expand Up @@ -5425,18 +5425,18 @@ PERL_STATIC_INLINE STRLEN* S_get_invlist_iter_addr(SV* invlist)
assert(invlist) assert(invlist)
#endif #endif


STATIC bool S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const U32 depth); STATIC bool S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode_offset* nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const U32 depth);
#define PERL_ARGS_ASSERT_GROK_BSLASH_N \ #define PERL_ARGS_ASSERT_GROK_BSLASH_N \
assert(pRExC_state); assert(flagp) assert(pRExC_state); assert(flagp)
#ifndef PERL_NO_INLINE_FUNCTIONS #ifndef PERL_NO_INLINE_FUNCTIONS
PERL_STATIC_INLINE regnode* S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * parse_start, char ch); PERL_STATIC_INLINE regnode_offset S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, char * parse_start, char ch);
#define PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF \ #define PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF \
assert(pRExC_state); assert(flagp); assert(parse_start) assert(pRExC_state); assert(flagp); assert(parse_start)
#endif #endif
STATIC int S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char* const s, char ** updated_parse_ptr, AV** posix_warnings, const bool check_only); STATIC int S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char* const s, char ** updated_parse_ptr, AV** posix_warnings, const bool check_only);
#define PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX \ #define PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX \
assert(pRExC_state); assert(s) assert(pRExC_state); assert(s)
STATIC regnode* S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth, char * const oregcomp_parse); STATIC regnode_offset S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth, char * const oregcomp_parse);
#define PERL_ARGS_ASSERT_HANDLE_REGEX_SETS \ #define PERL_ARGS_ASSERT_HANDLE_REGEX_SETS \
assert(pRExC_state); assert(flagp); assert(oregcomp_parse) assert(pRExC_state); assert(flagp); assert(oregcomp_parse)
STATIC SV* S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style) STATIC SV* S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
Expand Down Expand Up @@ -5507,13 +5507,13 @@ PERL_STATIC_NO_RET void S_re_croak2(pTHX_ bool utf8, const char* pat1, const cha
#define PERL_ARGS_ASSERT_RE_CROAK2 \ #define PERL_ARGS_ASSERT_RE_CROAK2 \
assert(pat1); assert(pat2) assert(pat1); assert(pat2)


STATIC regnode* S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth); STATIC regnode_offset S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REG \ #define PERL_ARGS_ASSERT_REG \
assert(pRExC_state); assert(flagp) assert(pRExC_state); assert(flagp)
STATIC regnode* S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const I32 arg2); STATIC regnode_offset S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const I32 arg2);
#define PERL_ARGS_ASSERT_REG2LANODE \ #define PERL_ARGS_ASSERT_REG2LANODE \
assert(pRExC_state) assert(pRExC_state)
STATIC regnode* S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op); STATIC regnode_offset S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op);
#define PERL_ARGS_ASSERT_REG_NODE \ #define PERL_ARGS_ASSERT_REG_NODE \
assert(pRExC_state) assert(pRExC_state)
STATIC SV * S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags); STATIC SV * S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags);
Expand All @@ -5524,31 +5524,31 @@ PERL_STATIC_INLINE char * S_reg_skipcomment(RExC_state_t *pRExC_state, char * p)
#define PERL_ARGS_ASSERT_REG_SKIPCOMMENT \ #define PERL_ARGS_ASSERT_REG_SKIPCOMMENT \
assert(pRExC_state); assert(p) assert(pRExC_state); assert(p)
#endif #endif
STATIC regnode* S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg); STATIC regnode_offset S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg);
#define PERL_ARGS_ASSERT_REGANODE \ #define PERL_ARGS_ASSERT_REGANODE \
assert(pRExC_state) assert(pRExC_state)
STATIC regnode* S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth); STATIC regnode_offset S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REGATOM \ #define PERL_ARGS_ASSERT_REGATOM \
assert(pRExC_state); assert(flagp) assert(pRExC_state); assert(flagp)
STATIC regnode* S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth); STATIC regnode_offset S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth);
#define PERL_ARGS_ASSERT_REGBRANCH \ #define PERL_ARGS_ASSERT_REGBRANCH \
assert(pRExC_state); assert(flagp) assert(pRExC_state); assert(flagp)
STATIC regnode* S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold, const bool silence_non_portable, const bool strict, bool optimizable, SV** ret_invlist, AV** return_posix_warnings); STATIC regnode_offset S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold, const bool silence_non_portable, const bool strict, bool optimizable, SV** ret_invlist, AV** return_posix_warnings);
#define PERL_ARGS_ASSERT_REGCLASS \ #define PERL_ARGS_ASSERT_REGCLASS \
assert(pRExC_state); assert(flagp) assert(pRExC_state); assert(flagp)
STATIC unsigned int S_regex_set_precedence(const U8 my_operator) STATIC unsigned int S_regex_set_precedence(const U8 my_operator)
__attribute__warn_unused_result__; __attribute__warn_unused_result__;


STATIC void S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth); STATIC void S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode_offset operand, U32 depth);
#define PERL_ARGS_ASSERT_REGINSERT \ #define PERL_ARGS_ASSERT_REGINSERT \
assert(pRExC_state); assert(operand) assert(pRExC_state)
STATIC regnode* S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_len, const char* const name); STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_len, const char* const name);
#define PERL_ARGS_ASSERT_REGNODE_GUTS \ #define PERL_ARGS_ASSERT_REGNODE_GUTS \
assert(pRExC_state); assert(name) assert(pRExC_state); assert(name)
STATIC regnode* S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth); STATIC regnode_offset S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REGPIECE \ #define PERL_ARGS_ASSERT_REGPIECE \
assert(pRExC_state); assert(flagp) assert(pRExC_state); assert(flagp)
STATIC void S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode * const p, const regnode * const val, const U32 depth); STATIC void S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth);
#define PERL_ARGS_ASSERT_REGTAIL \ #define PERL_ARGS_ASSERT_REGTAIL \
assert(pRExC_state); assert(p); assert(val) assert(pRExC_state); assert(p); assert(val)
STATIC void S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf); STATIC void S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf);
Expand Down
Loading

0 comments on commit d97f5fc

Please sign in to comment.