Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 140 additions & 68 deletions src/pcre2_jit_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -10589,49 +10589,43 @@ else
tmp_offset = LOCAL2;
}

/* Handle fixed part first. */
if (opcode != OP_UPTO && opcode != OP_POSUPTO)
if (opcode == OP_EXACT)
{
if (exact > 1)
{
SLJIT_ASSERT(early_fail_ptr == 0);
SLJIT_ASSERT(early_fail_ptr == 0 && exact >= 2);

if (common->mode == PCRE2_JIT_COMPLETE
if (common->mode == PCRE2_JIT_COMPLETE
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
&& !common->utf
&& !common->utf
#endif
&& type != OP_ANYNL && type != OP_EXTUNI)
{
OP2(SLJIT_SUB, TMP1, 0, STR_END, 0, STR_PTR, 0);
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, IN_UCHARS(exact)));
&& type != OP_ANYNL && type != OP_EXTUNI)
{
OP2(SLJIT_SUB, TMP1, 0, STR_END, 0, STR_PTR, 0);
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, IN_UCHARS(exact)));

#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
if (type == OP_ALLANY && !common->invalid_utf)
if (type == OP_ALLANY && !common->invalid_utf)
#else
if (type == OP_ALLANY)
if (type == OP_ALLANY)
#endif
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
else
{
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
}
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
else
{
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
}
else if (exact == 1 && opcode != OP_STAR && opcode != OP_MINSTAR && opcode != OP_POSSTAR)
else
{
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
}

if (early_fail_type == type_fail_range)
Expand All @@ -10649,8 +10643,8 @@ if (early_fail_type == type_fail_range)

switch(opcode)
{
case OP_UPTO:
case OP_STAR:
case OP_UPTO:
SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
max += exact;

Expand Down Expand Up @@ -11006,21 +11000,55 @@ switch(opcode)
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;

case OP_QUERY:
SLJIT_ASSERT(early_fail_ptr == 0);
if (private_data_ptr == 0)
allocate_stack(common, 1);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;

case OP_MINSTAR:
case OP_MINQUERY:
SLJIT_ASSERT(opcode == OP_MINSTAR || early_fail_ptr == 0);
if (private_data_ptr == 0)
allocate_stack(common, 1);

if (exact == 1)
if (exact >= 1)
{
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
if (exact >= 2)
{
/* Extuni has a separate exact opcode. */
SLJIT_ASSERT(tmp_base == TMP3 && early_fail_ptr == 0);
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);
}

if (opcode == OP_MINQUERY)
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, -1);

label = LABEL();
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;

compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);

if (exact >= 2)
{
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}

if (opcode == OP_MINQUERY)
OP2(SLJIT_AND, base, offset0, base, offset0, STR_PTR, 0);
else
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
}
else
{
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
}

if (early_fail_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
break;
Expand All @@ -11029,20 +11057,35 @@ switch(opcode)
SLJIT_ASSERT(early_fail_ptr == 0);
if (private_data_ptr == 0)
allocate_stack(common, 2);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);

OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;

case OP_QUERY:
case OP_MINQUERY:
SLJIT_ASSERT(early_fail_ptr == 0);
if (private_data_ptr == 0)
allocate_stack(common, 1);
if (exact == 0)
{
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
}

if (exact >= 2)
{
/* Extuni has a separate exact opcode. */
SLJIT_ASSERT(tmp_base == TMP3);
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);
}

label = LABEL();
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;

compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);

if (exact >= 2)
{
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}

OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
if (opcode == OP_QUERY)
compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;

case OP_EXACT:
Expand Down Expand Up @@ -11820,15 +11863,32 @@ switch(opcode)
}
break;

case OP_QUERY:
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
jump = JUMP(SLJIT_JUMP);
set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
JUMPHERE(jump);
if (private_data_ptr == 0)
free_stack(common, 1);
break;

case OP_MINSTAR:
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
if (exact != 1)
if (exact == 0)
{
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
}
else if (exact > 1)
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);

JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
set_jumps(exact == 1 ? CURRENT_AS(char_iterator_backtrack)->u.backtracks : jumplist, LABEL());
set_jumps(exact > 0 ? CURRENT_AS(char_iterator_backtrack)->u.backtracks : jumplist, LABEL());
if (private_data_ptr == 0)
free_stack(common, 1);
break;
Expand All @@ -11837,40 +11897,52 @@ switch(opcode)
OP1(SLJIT_MOV, TMP1, 0, base, offset1);
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));

OP1(SLJIT_MOV, base, offset1, TMP1, 0);
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
if (exact == 0)
{
add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));

set_jumps(jumplist, LABEL());
if (private_data_ptr == 0)
free_stack(common, 2);
break;
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);

set_jumps(jumplist, LABEL());
}
else
{
if (exact > 1)
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
JUMPTO(SLJIT_NOT_ZERO, CURRENT_AS(char_iterator_backtrack)->matchingpath);

set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
}

case OP_QUERY:
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
jump = JUMP(SLJIT_JUMP);
set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
JUMPHERE(jump);
if (private_data_ptr == 0)
free_stack(common, 1);
free_stack(common, 2);
break;

case OP_MINQUERY:
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
JUMPHERE(jump);

if (exact >= 1)
{
if (exact >= 2)
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
}
else
{
jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
JUMPHERE(jump);
}

if (private_data_ptr == 0)
free_stack(common, 1);
break;
Expand Down
11 changes: 11 additions & 0 deletions testdata/testinput1
Original file line number Diff line number Diff line change
Expand Up @@ -7056,10 +7056,21 @@ $/x
abcdefghbijb
abcdefghbij
abcdeb
abcdefghijx
\= Expect no match
abcdb
abcdefghijk

/[a-z]{1,6}?s|x/
asbs
abcdefs
abcdefghijkss
abcdefghijkx
ss
\= Expect no match
s
aaa

# --------------

# End of testinput1
9 changes: 9 additions & 0 deletions testdata/testinput4
Original file line number Diff line number Diff line change
Expand Up @@ -1869,6 +1869,15 @@
ab\x{600}\x{700}ijklh
ab\x{600}h\x{700}ijklmh

/([a-z\x{1000}\x{2000}]{1,2}?u)+$/utf
\x{1000}uu\x{2000}u
\x{1001}uuuu
\x{2001}uuuuu
uuuu\x{1fff}#u#\x{2000}\x{1000}u\x{2000}u
\= Expect no match
abuabuabuabu!
uuuuuuuuuuuu#

# --------------------------------------

/(ΣΆΜΟΣ) \1/i,utf
Expand Down
19 changes: 19 additions & 0 deletions testdata/testoutput1
Original file line number Diff line number Diff line change
Expand Up @@ -11095,12 +11095,31 @@ No match
0: abcdefghb
abcdeb
0: abcdeb
abcdefghijx
0: x
\= Expect no match
abcdb
No match
abcdefghijk
No match

/[a-z]{1,6}?s|x/
asbs
0: as
abcdefs
0: abcdefs
abcdefghijkss
0: fghijks
abcdefghijkx
0: x
ss
0: ss
\= Expect no match
s
No match
aaa
No match

# --------------

# End of testinput1
19 changes: 19 additions & 0 deletions testdata/testoutput4
Original file line number Diff line number Diff line change
Expand Up @@ -3055,6 +3055,25 @@ No match
ab\x{600}h\x{700}ijklmh
No match

/([a-z\x{1000}\x{2000}]{1,2}?u)+$/utf
\x{1000}uu\x{2000}u
0: \x{1000}uu\x{2000}u
1: u\x{2000}u
\x{1001}uuuu
0: uuuu
1: uu
\x{2001}uuuuu
0: uuuuu
1: uuu
uuuu\x{1fff}#u#\x{2000}\x{1000}u\x{2000}u
0: \x{2000}\x{1000}u\x{2000}u
1: \x{2000}u
\= Expect no match
abuabuabuabu!
No match
uuuuuuuuuuuu#
No match

# --------------------------------------

/(ΣΆΜΟΣ) \1/i,utf
Expand Down
Loading