Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/pcre2_compile_class.c
Original file line number Diff line number Diff line change
Expand Up @@ -721,9 +721,13 @@ while (TRUE)
else
cranges->char_lists_types |= tmp1 << tmp2;

if (range_start < XCL_CHAR_LIST_LOW_16_START) break;
if (range_end < XCL_CHAR_LIST_LOW_16_START || tmp2 == 0)
{
PCRE2_ASSERT(range_start < XCL_CHAR_LIST_LOW_16_START);
break;
}

PCRE2_ASSERT(tmp2 >= XCL_TYPE_BIT_LEN);
PCRE2_ASSERT((tmp2 % XCL_TYPE_BIT_LEN) == 0);
char_list_end = char_list_start - 1;
char_list_start = *char_list_next++;
tmp1 = 0;
Expand Down
19 changes: 12 additions & 7 deletions src/pcre2_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1518,7 +1518,7 @@ High16 and High32: the highest bit is always one
The items are ordered in increasing order, so binary search can be
used to find the lower bound of an input character. The lower bound
is the highest item, which value is less or equal than the input
character. If the lower bit of the item is cleard, or the character
character. If the lower bit of the item is cleared, or the character
stored in the item equals to the input character, the input
character is in the character list. */

Expand All @@ -1539,14 +1539,19 @@ character is in the character list. */
#define XCL_CHAR_LIST_HIGH_32_END 0xffffffff
#define XCL_CHAR_LIST_HIGH_32_ADD 0x80000000

/* Mask for getting the descriptors of character list ranges.
Each descriptor has XCL_TYPE_BIT_LEN bits, and can be processed
by XCL_BEGIN_WITH_RANGE and XCL_ITEM_COUNT_MASK macros. */
/* Mask and length values for getting the descriptors of
all character list ranges. The bit length of each descriptor
is XCL_TYPE_BIT_LEN so the total size is 4*XCL_TYPE_BIT_LEN
(currently 12 bit). This data is stored for all four character
lists, even if no characters are present in a list. */
#define XCL_TYPE_MASK 0xfff
#define XCL_TYPE_BIT_LEN 3
/* If this bit is set, the first item of the character list is the
end of a range, which started before the starting character of the
character list. */
/* If this bit is set for a character class, the first item of the
character list is the end of a range, which started before the
starting character of the character list. If this bit is set, and
no characters are present in the list, the whole character class
is part of a range. E.g: [\x{500}-\x{12000}] covers the entire
0x8000-0xffff range. */
#define XCL_BEGIN_WITH_RANGE 0x4
/* Number of items in the character list: 0, 1, or 2. The value 3
represents that the item count is stored at the begining of the
Expand Down
6 changes: 6 additions & 0 deletions testdata/testinput5
Original file line number Diff line number Diff line change
Expand Up @@ -2892,6 +2892,12 @@
/([\x{6535}\x{6536}\x{6538}\x{6539}\x{653b}\x{653c}\x{653e}\x{653f}\x{6541}\x{6542}\x{8000}-\x{ffff}]#)+/B,utf
\x{6534}#\x{6537}#\x{653a}#\x{653d}#\x{6540}#\x{6543}#\x{7fff}#\x{6535}#\x{6536}#\x{6538}#\x{6539}#\x{653b}#\x{653c}#\x{653e}#\x{653f}#\x{6541}#\x{6542}#\x{8000}#\x{c246}#\x{ffff}

/[\x{ff}\x{100}\x{8000}\x{8002}\x{8004}\x{8006}\x{8008}\x{800a}\x{800c}\x{800e}]+/B,utf
\x{ff}\x{100}\x{8000}\x{800a}\x{800e}\x{101}

/[\x{ff}-\x{104}\x{8000}\x{8002}\x{8004}\x{8006}\x{8008}\x{800a}\x{800c}\x{800e}]+/B,utf
\x{ff}\x{100}\x{101}\x{104}\x{8000}\x{800a}\x{800e}\x{105}

/[[:xdigit:]\x{400}-\x{600}]+/utf,ucp
!a0\x{400}\x{600}9\x{3ff}

Expand Down
20 changes: 20 additions & 0 deletions testdata/testoutput5
Original file line number Diff line number Diff line change
Expand Up @@ -6339,6 +6339,26 @@ Failed: error 115 at offset 53: reference to non-existent subpattern
0: \x{6535}#\x{6536}#\x{6538}#\x{6539}#\x{653b}#\x{653c}#\x{653e}#\x{653f}#\x{6541}#\x{6542}#\x{8000}#\x{c246}#
1: \x{c246}#

/[\x{ff}\x{100}\x{8000}\x{8002}\x{8004}\x{8006}\x{8008}\x{800a}\x{800c}\x{800e}]+/B,utf
------------------------------------------------------------------
Bra
[\xff\x{100}\x{8000}\x{8002}\x{8004}\x{8006}\x{8008}\x{800a}\x{800c}\x{800e}]++
Ket
End
------------------------------------------------------------------
\x{ff}\x{100}\x{8000}\x{800a}\x{800e}\x{101}
0: \x{ff}\x{100}\x{8000}\x{800a}\x{800e}

/[\x{ff}-\x{104}\x{8000}\x{8002}\x{8004}\x{8006}\x{8008}\x{800a}\x{800c}\x{800e}]+/B,utf
------------------------------------------------------------------
Bra
[\xff\x{100}-\x{104}\x{8000}\x{8002}\x{8004}\x{8006}\x{8008}\x{800a}\x{800c}\x{800e}]++
Ket
End
------------------------------------------------------------------
\x{ff}\x{100}\x{101}\x{104}\x{8000}\x{800a}\x{800e}\x{105}
0: \x{ff}\x{100}\x{101}\x{104}\x{8000}\x{800a}\x{800e}

/[[:xdigit:]\x{400}-\x{600}]+/utf,ucp
!a0\x{400}\x{600}9\x{3ff}
0: a0\x{400}\x{600}9
Expand Down