Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/pcre2.h.generic
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
#define PCRE2_MATCH_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create(uint32_t, pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create_with_frames(uint32_t, pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create_from_pattern(const pcre2_code *, \
pcre2_general_context *); \
Expand Down Expand Up @@ -868,6 +870,7 @@ pcre2_compile are called by application code. */
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
#define pcre2_match_data_create_with_frames PCRE2_SUFFIX(pcre2_match_data_create_with_frames_)
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
Expand Down
3 changes: 3 additions & 0 deletions src/pcre2.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
#define PCRE2_MATCH_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create(uint32_t, pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create_with_frames(uint32_t, pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create_from_pattern(const pcre2_code *, \
pcre2_general_context *); \
Expand Down Expand Up @@ -868,6 +870,7 @@ pcre2_compile are called by application code. */
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
#define pcre2_match_data_create_with_frames PCRE2_SUFFIX(pcre2_match_data_create_with_frames_)
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
Expand Down
3 changes: 3 additions & 0 deletions src/pcre2_intmodedep.h
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,8 @@ here.) */
#undef LOOKBEHIND_MAX
#define LOOKBEHIND_MAX UINT16_MAX

struct heapframe; /* see below */

typedef struct pcre2_real_code {
pcre2_memctl memctl; /* Memory control fields */
const uint8_t *tables; /* The character tables */
Expand Down Expand Up @@ -661,6 +663,7 @@ typedef struct pcre2_real_match_data {
uint8_t flags; /* Various flags */
uint16_t oveccount; /* Number of pairs */
int rc; /* The return code from the match */
struct heapframe *start_frames; /* Initial heap frames (NULL for stack) */
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
} pcre2_real_match_data;

Expand Down
139 changes: 93 additions & 46 deletions src/pcre2_match.c
Original file line number Diff line number Diff line change
Expand Up @@ -6268,35 +6268,10 @@ switch (Freturn_id)
#undef LBL
}


/*************************************************
* Match a Regular Expression *
*************************************************/

/* This function applies a compiled pattern to a subject string and picks out
portions of the string if it matches. Two elements in the vector are set for
each substring: the offsets to the start and end of the substring.

Arguments:
code points to the compiled expression
subject points to the subject string
length length of subject string (may contain binary zeros)
start_offset where to start in the subject string
options option bits
match_data points to a match_data block
mcontext points a PCRE2 context

Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough
= -1 => failed to match (PCRE2_ERROR_NOMATCH)
= -2 => partial match (PCRE2_ERROR_PARTIAL)
< -2 => some kind of unexpected problem
*/

PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
static int
match_start(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext)
pcre2_match_context *mcontext, heapframe *start_frames)
{
int rc;
int was_zero_terminated = 0;
Expand Down Expand Up @@ -6354,24 +6329,7 @@ pcre2_callout_block cb;
match_block actual_match_block;
match_block *mb = &actual_match_block;

/* Allocate an initial vector of backtracking frames on the stack. If this
proves to be too small, it is replaced by a larger one on the heap. To get a
vector of the size required that is aligned for pointers, allocate it as a
vector of pointers. */

PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
PCRE2_KEEP_UNINITIALIZED;
mb->stack_frames = (heapframe *)stack_frames_vector;

/* Recognize NULL, length 0 as an empty string. */

if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";

/* Plausibility checks */

if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
if (code == NULL || subject == NULL || match_data == NULL)
return PCRE2_ERROR_NULL;
mb->stack_frames = start_frames;

start_match = subject + start_offset;
req_cu_ptr = start_match - 1;
Expand Down Expand Up @@ -7533,6 +7491,95 @@ else match_data->rc = PCRE2_ERROR_NOMATCH;
return match_data->rc;
}

#if defined(__GNUC__) /* Works for clang/ICC too */
#define MATCH_START_ON_STACK_NOT_INLINABLE 1
#define MATCH_START_ON_STACK_NOINLINE __attribute__ ((noinline))
#elif defined(_MSC_VER)
#define MATCH_START_ON_STACK_NOT_INLINABLE 1
#define MATCH_START_ON_STACK_NOINLINE __declspec(noinline)
#else
#define MATCH_START_ON_STACK_NOT_INLINABLE 0
#define MATCH_START_ON_STACK_NOINLINE
#endif

static MATCH_START_ON_STACK_NOINLINE int
match_start_on_stack(const pcre2_code *code, PCRE2_SPTR subject,
PCRE2_SIZE length, PCRE2_SIZE start_offset, uint32_t options,
pcre2_match_data *match_data, pcre2_match_context *mcontext)
{
/* Allocate an initial vector of backtracking frames on the stack. If this
proves to be too small, it is replaced by a larger one on the heap. To get a
vector of the size required that is aligned for pointers, allocate it as a
vector of pointers. */

PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
PCRE2_KEEP_UNINITIALIZED;

return match_start(code, subject, length, start_offset, options, match_data,
mcontext, (heapframe *)stack_frames_vector);
}

/*************************************************
* Match a Regular Expression *
*************************************************/

/* This function applies a compiled pattern to a subject string and picks out
portions of the string if it matches. Two elements in the vector are set for
each substring: the offsets to the start and end of the substring.

Arguments:
code points to the compiled expression
subject points to the subject string
length length of subject string (may contain binary zeros)
start_offset where to start in the subject string
options option bits
match_data points to a match_data block
mcontext points a PCRE2 context

Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough
= -1 => failed to match (PCRE2_ERROR_NOMATCH)
= -2 => partial match (PCRE2_ERROR_PARTIAL)
< -2 => some kind of unexpected problem
*/

PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext)
{
/* Recognize NULL, length 0 as an empty string. */

if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";

/* Plausibility checks */

if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
if (code == NULL || subject == NULL || match_data == NULL)
return PCRE2_ERROR_NULL;

/* Use stack frames only if match_data does not provide ones, playing some
* noinline/indirection games to avoid allocating the frames on stack here
* when pcre2_match_data_create_with_frames() was used. */
if (match_data->start_frames == NULL) {
#if MATCH_START_ON_STACK_NOT_INLINABLE /* garanteed by the compiler */
return match_start_on_stack(code, subject, length, start_offset,
options, match_data, mcontext);
#else /* indirection that prevents inlining */
int (*volatile fn)(const pcre2_code*, PCRE2_SPTR, PCRE2_SIZE,
PCRE2_SIZE, uint32_t, pcre2_match_data*,
pcre2_match_context*) = &match_start_on_stack;
return (*fn)(code, subject, length, start_offset, options, match_data,
mcontext);
#endif
}

/* Use initial heap frames from match_data. */
return match_start(code, subject, length, start_offset,
options, match_data, mcontext,
match_data->start_frames);
}

/* These #undefs are here to enable unity builds with CMake. */

#undef NLBLOCK /* Block containing newline information */
Expand Down
63 changes: 51 additions & 12 deletions src/pcre2_match_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,36 @@ POSSIBILITY OF SUCH DAMAGE.

#include "pcre2_internal.h"

static inline PCRE2_SIZE
start_frames_offset(uint32_t oveccount)
{
return ((2*oveccount*sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
~(HEAPFRAME_ALIGNMENT - 1));
}

static inline pcre2_match_data *
match_data_create(uint32_t oveccount, int with_frames,
pcre2_general_context *gcontext)
{
pcre2_match_data *yield;
PCRE2_SIZE ovecsize;
if (oveccount < 1) oveccount = 1;
if (with_frames)
ovecsize = start_frames_offset(oveccount) + START_FRAMES_SIZE;
else
ovecsize = 2*oveccount*sizeof(PCRE2_SIZE);
yield = PRIV(memctl_malloc)(
offsetof(pcre2_match_data, ovector) + ovecsize, (pcre2_memctl *)gcontext);
if (yield == NULL) return NULL;
yield->oveccount = oveccount;
if (with_frames)
yield->start_frames = (heapframe *)
((char *)yield->ovector + ovecsize - START_FRAMES_SIZE);
else
yield->start_frames = NULL; /* use stack frames from pcre2_match() */
yield->flags = 0;
return yield;
}


/*************************************************
Expand All @@ -56,15 +86,22 @@ POSSIBILITY OF SUCH DAMAGE.
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
{
pcre2_match_data *yield;
if (oveccount < 1) oveccount = 1;
yield = PRIV(memctl_malloc)(
offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
(pcre2_memctl *)gcontext);
if (yield == NULL) return NULL;
yield->oveccount = oveccount;
yield->flags = 0;
return yield;
return match_data_create(oveccount, 0, gcontext);
}



/*************************************************
* Create a match data block given ovector size *
*************************************************/

/* A minimum of 1 is imposed on the number of ovector pairs. */

PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create_with_frames(uint32_t oveccount,
pcre2_general_context *gcontext)
{
return match_data_create(oveccount, 1, gcontext);
}


Expand All @@ -79,9 +116,9 @@ PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create_from_pattern(const pcre2_code *code,
pcre2_general_context *gcontext)
{
uint32_t oveccount = ((pcre2_real_code *)code)->top_bracket + 1;
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
gcontext);
return match_data_create(oveccount, 0, gcontext);
}


Expand Down Expand Up @@ -160,7 +197,9 @@ PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_match_data_size(pcre2_match_data *match_data)
{
return offsetof(pcre2_match_data, ovector) +
2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
(match_data->start_frames
? start_frames_offset(match_data->oveccount) + START_FRAMES_SIZE
: 2 * match_data->oveccount * sizeof(PCRE2_SIZE));
}

/* End of pcre2_match_data.c */
24 changes: 22 additions & 2 deletions src/pcre2test.c
Original file line number Diff line number Diff line change
Expand Up @@ -1248,6 +1248,14 @@ are supported. */
else \
G(a,32) = pcre2_match_data_create_32(b,c)

#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \
if (test_mode == PCRE8_MODE) \
G(a,8) = pcre2_match_data_create_with_frames_8(b,c); \
else if (test_mode == PCRE16_MODE) \
G(a,16) = pcre2_match_data_create_with_frames_16(b,c); \
else \
G(a,32) = pcre2_match_data_create_with_frames_32(b,c)

#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
if (test_mode == PCRE8_MODE) \
G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
Expand Down Expand Up @@ -1766,6 +1774,12 @@ the three different cases. */
else \
G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)

#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(a,BITONE) = G(pcre2_match_data_create_with_frames_,BITONE)(b,c); \
else \
G(a,BITTWO) = G(pcre2_match_data_create_with_frames_,BITTWO)(b,c)

#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
Expand Down Expand Up @@ -2071,6 +2085,8 @@ the three different cases. */
#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \
G(a,8) = pcre2_match_data_create_with_frames_8(b,c)
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
Expand Down Expand Up @@ -2178,6 +2194,8 @@ the three different cases. */
#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \
G(a,16) = pcre2_match_data_create_with_frames_16(b,c)
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
Expand Down Expand Up @@ -2285,6 +2303,8 @@ the three different cases. */
#define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
#define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
#define PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(a,b,c) \
G(a,32) = pcre2_match_data_create_with_frames_32(b,c)
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
Expand Down Expand Up @@ -7287,7 +7307,7 @@ else
{
max_oveccount = dat_datctl.oveccount;
PCRE2_MATCH_DATA_FREE(match_data);
PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
PCRE2_MATCH_DATA_CREATE_WITH_FRAMES(match_data, max_oveccount, NULL);
}

if (CASTVAR(void *, match_data) == NULL)
Expand Down Expand Up @@ -9170,7 +9190,7 @@ max_oveccount = DEFAULT_OVECCOUNT;
G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
G(match_data,BITS) = G(pcre2_match_data_create_with_frames_,BITS)(max_oveccount, G(general_context,BITS))

#define CONTEXTTESTS \
(void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
Expand Down