Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -4338,6 +4338,18 @@ while (ptr < ptrend)
uint16_t ptype = 0, pdata = 0;
if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcode, cb))
goto FAILED;

/* In caseless matching, particular characteristics Lu, Ll, and Lt
get converted to the general characteristic L&. That is, upper,
lower, and title case letters are all conflated. */

if ((options & PCRE2_CASELESS) != 0 && ptype == PT_PC &&
(pdata == ucp_Lu || pdata == ucp_Ll || pdata == ucp_Lt))
{
ptype = PT_LAMP;
pdata = 0;
}

if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P;
*parsed_pattern++ = META_ESCAPE + escape;
*parsed_pattern++ = (ptype << 16) | pdata;
Expand Down
11 changes: 0 additions & 11 deletions src/pcre2_compile_class.c
Original file line number Diff line number Diff line change
Expand Up @@ -1378,17 +1378,6 @@ while (TRUE)
continue;
}

/* In caseless matching, particular characteristics Lu, Ll, and Lt
get converted to the general characteristic L&. That is, upper,
lower, and title case letters are all conflated. */

if ((options & PCRE2_CASELESS) != 0 && ptype == PT_PC &&
(pdata == ucp_Lu || pdata == ucp_Ll || pdata == ucp_Lt))
{
ptype = PT_LAMP;
pdata = 0;
}

PRIV(update_classbits)(ptype, pdata, (escape == ESC_P), classbits);

if ((xclass_props & XCLASS_HIGH_ANY) == 0)
Expand Down
7 changes: 4 additions & 3 deletions src/pcre2_jit_char_inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -626,13 +626,17 @@ if (category_list == UCPCAT_ALL)
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
return;
}

if (category_list != 0)
compares++;
#endif

if (*cc != XCL_END)
{
#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
if (common->utf && compares == 0 && !(status & XCLASS_IS_ECLASS))
{
SLJIT_ASSERT(category_list == 0);
max = 0;
min = (ccbegin[-1] & XCL_MAP) != 0 ? 0 : READ_CHAR_MAX;
xclass_update_min_max(common, cc, &min, &max);
Expand Down Expand Up @@ -701,9 +705,6 @@ if (status & XCLASS_NEEDS_UCD)

ccbegin = cc;

if (category_list != 0)
compares++;

if (status & XCLASS_HAS_BIDICL)
{
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
Expand Down
24 changes: 17 additions & 7 deletions testdata/testinput4
Original file line number Diff line number Diff line change
Expand Up @@ -2918,25 +2918,35 @@
/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/utf
>AbbD<
>Abb\x{01c5}<
\= Expect no match
\= Expect no match
>aBBd<
>aB!!<
>aB!!<

/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/i,utf
>aB!!<
>\x{01c5}B!!<
\= Expect no match
>aB!!<
>\x{01c5}B!!<
\= Expect no match
>AbbD<
>aBBd<
>Abb\x{01c5}<

/[.\p{Lu}][.\p{Ll}][.\P{Lu}][.\P{Ll}]/i,utf
>aB!!<
\= Expect no match
>aB!!<
\= Expect no match
>AbbD<
>aBBd<
>Abb\x{01c5}<

/[\p{Lt}\x{36b}][\P{Lt}\x{10a0}]/i,utf
>A!<
>\x{3c9}\x{58d}<
>\x{413}\x{940}<
\= Expect no match
\x{3c9}\x{3c9}
\x{58d}\x{58d}
\x{413}\x{413}
\x{940}\x{940}

/^\p{Lt}+/i,utf
\x{1c5}AB

Expand Down
2 changes: 2 additions & 0 deletions testdata/testinput5
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,8 @@

/[[:a\x{100}b:]]/utf

/[\p{InvalidOrBadProperty}]/

/a[^]b/utf,allow_empty_class,match_unset_backref
a\x{1234}b
a\nb
Expand Down
31 changes: 24 additions & 7 deletions testdata/testoutput4
Original file line number Diff line number Diff line change
Expand Up @@ -4681,18 +4681,18 @@ No match
0: AbbD
>Abb\x{01c5}<
0: Abb\x{1c5}
\= Expect no match
\= Expect no match
>aBBd<
No match
>aB!!<
>aB!!<
No match

/\p{Lu}\p{Ll}\P{Lu}\P{Ll}/i,utf
>aB!!<
>aB!!<
0: aB!!
>\x{01c5}B!!<
>\x{01c5}B!!<
0: \x{1c5}B!!
\= Expect no match
\= Expect no match
>AbbD<
No match
>aBBd<
Expand All @@ -4701,16 +4701,33 @@ No match
No match

/[.\p{Lu}][.\p{Ll}][.\P{Lu}][.\P{Ll}]/i,utf
>aB!!<
>aB!!<
0: aB!!
\= Expect no match
\= Expect no match
>AbbD<
No match
>aBBd<
No match
>Abb\x{01c5}<
No match

/[\p{Lt}\x{36b}][\P{Lt}\x{10a0}]/i,utf
>A!<
0: A!
>\x{3c9}\x{58d}<
0: \x{3c9}\x{58d}
>\x{413}\x{940}<
0: \x{413}\x{940}
\= Expect no match
\x{3c9}\x{3c9}
No match
\x{58d}\x{58d}
No match
\x{413}\x{413}
No match
\x{940}\x{940}
No match

/^\p{Lt}+/i,utf
\x{1c5}AB
0: \x{1c5}AB
Expand Down
3 changes: 3 additions & 0 deletions testdata/testoutput5
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,9 @@ No match
/[[:a\x{100}b:]]/utf
Failed: error 130 at offset 14: unknown POSIX class name

/[\p{InvalidOrBadProperty}]/
Failed: error 147 at offset 25: unknown property after \P or \p

/a[^]b/utf,allow_empty_class,match_unset_backref
a\x{1234}b
0: a\x{1234}b
Expand Down
Loading