Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 99 additions & 102 deletions src/pcre2_jit_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -7929,6 +7929,7 @@ static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc,
{
DEFINE_COMPILER;
jump_list *found = NULL;
jump_list *check_result = NULL;
jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
sljit_uw c, charoffset;
sljit_u32 max = 256, min = READ_CHAR_MAX;
Expand Down Expand Up @@ -8507,139 +8508,135 @@ SLJIT_ASSERT(ranges.range_count > 0);
#endif /* SUPPORT_UNICODE */

SLJIT_ASSERT(compares == 1);
if (charoffset != 0)
invertcmp = (list != backtracks);

if (ranges.range_count == 2)
{
range_start = ranges.ranges[0];
range_end = ranges.ranges[1];

if (range_start < range_end)
{
SET_CHAR_OFFSET(range_start);
jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
}
else
jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));

add_jump(compiler, backtracks, jump);

SLJIT_ASSERT(ranges.stack == ranges.local_stack);
if (found != NULL)
set_jumps(found, LABEL());
return;
}

if (ranges.range_count > 6 && charoffset != 0)
{
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
charoffset = 0;
}

charoffset = 0;
depth = 0;
first_item = 0;
last_item = ranges.range_count;
compares = ranges.range_count;
last_item = ranges.range_count - 2;
has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0;

while (TRUE)
{
/* At least two items are present. */
SLJIT_ASSERT(first_item < last_item);

if (first_item + 6 >= last_item)
if (first_item + 6 <= last_item)
{
range_start = ranges.ranges[first_item];
range_end = ranges.ranges[first_item + 1];
first_item += 2;
compares -= 2;
invertcmp = (compares == 0 && list != backtracks);
charoffset = 0;
jump = NULL;
SLJIT_ASSERT(charoffset == 0);
mid_item = ((first_item + last_item) >> 1) & ~(sljit_u32)1;
SLJIT_ASSERT(last_item >= mid_item + 4);

range_end = ranges.ranges[mid_item + 1];
ranges.stack[depth].jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)range_end);
ranges.stack[depth].first_item = (sljit_u32)(mid_item + 2);
ranges.stack[depth].last_item = (sljit_u32)last_item;

depth++;
SLJIT_ASSERT(ranges.stack == ranges.local_stack ?
depth <= XCLASS_LOCAL_RANGES_LOG2_SIZE : (ranges.stack + depth) <= (xclass_stack_item*)ranges.ranges);

if (range_start < range_end)
last_item = mid_item;
continue;
}

range_start = ranges.ranges[first_item];
range_end = ranges.ranges[first_item + 1];

if (range_start < range_end)
{
SET_CHAR_OFFSET(range_start);
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
}
else
{
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)range_start);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
}

if (first_item < last_item)
{
do
{
SET_CHAR_OFFSET(range_start);
first_item += 2;
range_start = ranges.ranges[first_item];
range_end = ranges.ranges[first_item + 1];

if (first_item < last_item)
if (range_start < range_end)
{
SET_CHAR_OFFSET(range_start);
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
}
else
jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
}
else
{
if (first_item < last_item)
{
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)range_start);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);

if (has_cmov)
SELECT(SLJIT_LESS_EQUAL, TMP2, STR_END, 0, TMP2);
else
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_LESS_EQUAL);
}
else
jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)range_start);
}

if (first_item < last_item)
{
do
{
range_start = ranges.ranges[first_item];
range_end = ranges.ranges[first_item + 1];
first_item += 2;
compares -= 2;
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));

if (range_start < range_end)
{
SET_CHAR_OFFSET(range_start);
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));

if (has_cmov)
SELECT(SLJIT_LESS_EQUAL, TMP2, STR_END, 0, TMP2);
else
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_LESS_EQUAL);
}
if (has_cmov)
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
else
{
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));

if (has_cmov)
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
else
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
}
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
}
while (first_item < last_item);

invertcmp = (compares == 0 && list != backtracks);

if (has_cmov)
jump = CMP(SLJIT_NOT_EQUAL ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
else
jump = JUMP(SLJIT_NOT_EQUAL ^ invertcmp);
}

add_jump(compiler, compares > 0 ? list : backtracks, jump);

if (depth == 0) break;

add_jump(compiler, list == backtracks ? &found : backtracks, JUMP(SLJIT_JUMP));

/* The charoffset resets after the end of a branch is reached. */
depth--;
first_item = ranges.stack[depth].first_item;
last_item = ranges.stack[depth].last_item;
JUMPHERE(ranges.stack[depth].jump);
continue;
while (first_item < last_item);
}

mid_item = ((first_item + last_item) >> 1) & ~(sljit_u32)1;
SLJIT_ASSERT(last_item > mid_item + 2);

range_start = ranges.ranges[mid_item];
range_end = ranges.ranges[mid_item + 1];

ranges.stack[depth].first_item = (sljit_u8)(mid_item + 2);
ranges.stack[depth].last_item = (sljit_u8)last_item;
if (depth == 0) break;

compares -= 2;
invertcmp = (compares == 0 && list != backtracks);
add_jump(compiler, &check_result, JUMP(SLJIT_JUMP));

if (range_start < range_end)
{
ranges.stack[depth].jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)range_end);
jump = CMP(SLJIT_GREATER_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)range_start);
}
else
{
OP2U(SLJIT_SUB | SLJIT_SET_GREATER | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)range_start);
ranges.stack[depth].jump = JUMP(SLJIT_GREATER);
jump = JUMP(SLJIT_EQUAL ^ invertcmp);
}
/* The charoffset resets after the end of a branch is reached. */
charoffset = 0;
depth--;
first_item = ranges.stack[depth].first_item;
last_item = ranges.stack[depth].last_item;
JUMPHERE(ranges.stack[depth].jump);
}

depth++;
SLJIT_ASSERT(ranges.stack == ranges.local_stack ?
depth <= XCLASS_LOCAL_RANGES_LOG2_SIZE : (ranges.stack + depth) <= (xclass_stack_item*)ranges.ranges);
if (check_result != NULL)
set_jumps(check_result, LABEL());

add_jump(compiler, compares > 0 ? list : backtracks, jump);
last_item = mid_item;
if (has_cmov)
jump = CMP(SLJIT_NOT_EQUAL ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
else
{
sljit_set_current_flags(compiler, SLJIT_SET_Z);
jump = JUMP(SLJIT_NOT_EQUAL ^ invertcmp);
}

SLJIT_ASSERT(compares == 0);
add_jump(compiler, backtracks, jump);

if (found != NULL)
set_jumps(found, LABEL());

Expand Down
24 changes: 24 additions & 0 deletions testdata/testinput5
Original file line number Diff line number Diff line change
Expand Up @@ -2863,4 +2863,28 @@
/([\x{6535}\x{6536}\x{6538}\x{6539}\x{653b}\x{653c}\x{653e}\x{653f}\x{6541}\x{6542}\x{8000}-\x{ffff}]#)+/B,utf
\x{6534}#\x{6537}#\x{653a}#\x{653d}#\x{6540}#\x{6543}#\x{7fff}#\x{6535}#\x{6536}#\x{6538}#\x{6539}#\x{653b}#\x{653c}#\x{653e}#\x{653f}#\x{6541}#\x{6542}#\x{8000}#\x{c246}#\x{ffff}

/[[:xdigit:]\x{400}-\x{600}]+/utf,ucp
!a0\x{400}\x{600}9\x{3ff}

/[^[:xdigit:]\x{400}-\x{600}]+/utf,ucp
\x{400}(\x{3ff}\x{601})\x{600}

/[[:xdigit:]\x{400}-\x{600}\x{700}]+/utf,ucp
!A0\x{700}9\x{601}

/[^[:xdigit:]\x{400}-\x{600}\x{700}]+/utf,ucp
\x{600}(\x{6ff}\x{701}\x{3ff}\x{601})\x{700}

/[[:xdigit:]\x{400}-\x{600}\x{700}-\x{800}\x{900}]+/utf,ucp
!f0\x{800}\x{600}9\x{601}

/[^[:xdigit:]\x{400}-\x{600}\x{700}-\x{800}\x{900}]+/utf,ucp
\x{700}[\x{3ff}\x{601}\x{6ff}\x{801}\x{8ff}\x{901}]\x{900}

/[[:xdigit:]\x{400}-\x{410}\x{500}\x{600}-\x{610}\x{700}\x{800}-\x{810}]+/utf,ucp
!F0\x{400}\x{410}\x{500}\x{600}\x{610}\x{700}\x{800}\x{810}9\x{7ff}

/[^[:xdigit:]\x{400}-\x{410}\x{500}\x{600}-\x{610}\x{700}\x{800}-\x{810}]+/utf,ucp
\x{800}<\x{3ff}\x{411}\x{4ff}\x{501}\x{5ff}\x{611}\x{6ff}\x{701}\x{7ff}\x{811}>\x{810}

# End of testinput5
Expand Down
32 changes: 32 additions & 0 deletions testdata/testoutput5
Original file line number Diff line number Diff line change
Expand Up @@ -6198,4 +6198,36 @@ Failed: error 115 at offset 52: reference to non-existent subpattern
0: \x{6535}#\x{6536}#\x{6538}#\x{6539}#\x{653b}#\x{653c}#\x{653e}#\x{653f}#\x{6541}#\x{6542}#\x{8000}#\x{c246}#
1: \x{c246}#

/[[:xdigit:]\x{400}-\x{600}]+/utf,ucp
!a0\x{400}\x{600}9\x{3ff}
0: a0\x{400}\x{600}9

/[^[:xdigit:]\x{400}-\x{600}]+/utf,ucp
\x{400}(\x{3ff}\x{601})\x{600}
0: (\x{3ff}\x{601})

/[[:xdigit:]\x{400}-\x{600}\x{700}]+/utf,ucp
!A0\x{700}9\x{601}
0: A0\x{700}9

/[^[:xdigit:]\x{400}-\x{600}\x{700}]+/utf,ucp
\x{600}(\x{6ff}\x{701}\x{3ff}\x{601})\x{700}
0: (\x{6ff}\x{701}\x{3ff}\x{601})

/[[:xdigit:]\x{400}-\x{600}\x{700}-\x{800}\x{900}]+/utf,ucp
!f0\x{800}\x{600}9\x{601}
0: f0\x{800}\x{600}9

/[^[:xdigit:]\x{400}-\x{600}\x{700}-\x{800}\x{900}]+/utf,ucp
\x{700}[\x{3ff}\x{601}\x{6ff}\x{801}\x{8ff}\x{901}]\x{900}
0: [\x{3ff}\x{601}\x{6ff}\x{801}\x{8ff}\x{901}]

/[[:xdigit:]\x{400}-\x{410}\x{500}\x{600}-\x{610}\x{700}\x{800}-\x{810}]+/utf,ucp
!F0\x{400}\x{410}\x{500}\x{600}\x{610}\x{700}\x{800}\x{810}9\x{7ff}
0: F0\x{400}\x{410}\x{500}\x{600}\x{610}\x{700}\x{800}\x{810}9

/[^[:xdigit:]\x{400}-\x{410}\x{500}\x{600}-\x{610}\x{700}\x{800}-\x{810}]+/utf,ucp
\x{800}<\x{3ff}\x{411}\x{4ff}\x{501}\x{5ff}\x{611}\x{6ff}\x{701}\x{7ff}\x{811}>\x{810}
0: <\x{3ff}\x{411}\x{4ff}\x{501}\x{5ff}\x{611}\x{6ff}\x{701}\x{7ff}\x{811}>

# End of testinput5
Expand Down
Loading