Skip to content

Commit

Permalink
/\p{InFoo} should only match blocks, or be user-defined
Browse files Browse the repository at this point in the history
For a property \p{Block=Foo}, we allow the synonym  \p{InFoo} as
documented variously, including perluniprops, even though this usage is
discouraged, as a new Unicode release used in a new version of Perl
could cause the synonym to no longer work.

Prior to this commit, we erroneously allowed the synonym for other
properties, such as \p{InKana} or \p{InS}.
  • Loading branch information
khwilliamson committed Jun 3, 2019
1 parent 3000ebb commit 74333e9
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 9 deletions.
2 changes: 1 addition & 1 deletion charclass_invlists.h
Original file line number Diff line number Diff line change
Expand Up @@ -395301,7 +395301,7 @@ static const U8 WB_table[23][23] = {
* 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt
* 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt
* 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt
* b3d90fc23817ea4e33e9a90107c0a6c7b23314efd5712905ed172624d5524693 lib/unicore/mktables
* a53648677d262457dda0b22efba8820d2a45ca6ebae01f8c73d30db380eb83b5 lib/unicore/mktables
* a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
Expand Down
1 change: 1 addition & 0 deletions lib/unicore/mktables
Original file line number Diff line number Diff line change
Expand Up @@ -19133,6 +19133,7 @@ Test_GCB("1100 $nobreak 1161"); # Bug #70940
Expect(0, 0x2028, '\p{Print}', ""); # Bug # 71722
Expect(0, 0x2029, '\p{Print}', ""); # Bug # 71722
Expect(1, 0xFF10, '\p{XDigit}', ""); # Bug # 71726
Error('\p{InKana}'); # 'Kana' is not a block so InKana shouldn't compile

# Make sure this gets tested; it was not part of the official test suite at
# the time this was added. Note that this is as it would appear in the
Expand Down
2 changes: 1 addition & 1 deletion lib/unicore/uni_keywords.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1261,7 +1261,7 @@
# 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt
# 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt
# 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt
# b3d90fc23817ea4e33e9a90107c0a6c7b23314efd5712905ed172624d5524693 lib/unicore/mktables
# a53648677d262457dda0b22efba8820d2a45ca6ebae01f8c73d30db380eb83b5 lib/unicore/mktables
# a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
# 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
# 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
Expand Down
2 changes: 1 addition & 1 deletion regcharclass.h
Original file line number Diff line number Diff line change
Expand Up @@ -1901,7 +1901,7 @@
* 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt
* 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt
* 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt
* b3d90fc23817ea4e33e9a90107c0a6c7b23314efd5712905ed172624d5524693 lib/unicore/mktables
* a53648677d262457dda0b22efba8820d2a45ca6ebae01f8c73d30db380eb83b5 lib/unicore/mktables
* a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* 830144f6afdd047b009754ffa06134397268f6638837fe85283483eb0cfdd558 regen/regcharclass.pl
Expand Down
16 changes: 11 additions & 5 deletions regcomp.c
Original file line number Diff line number Diff line change
Expand Up @@ -22574,8 +22574,7 @@ Perl_parse_uniprop_string(pTHX_
int slash_pos = -1; /* Where the '/' is found, or negative if none */
int table_index = 0; /* The entry number for this property in the table
of all Unicode property names */
bool starts_with_In_or_Is = FALSE; /* ? Does the name start with 'In' or
'Is' */
bool starts_with_Is = FALSE; /* ? Does the name start with 'Is' */
Size_t lookup_offset = 0; /* Used to ignore the first few characters of
the normalized name in certain situations */
Size_t non_pkg_begin = 0; /* Offset of first byte in 'name' that isn't
Expand Down Expand Up @@ -23053,7 +23052,11 @@ Perl_parse_uniprop_string(pTHX_
&& name[non_pkg_begin+0] == 'I'
&& (name[non_pkg_begin+1] == 'n' || name[non_pkg_begin+1] == 's'))
{
starts_with_In_or_Is = TRUE;
/* Names that start with In have different characterstics than those
* that start with Is */
if (name[non_pkg_begin+1] == 's') {
starts_with_Is = TRUE;
}
}
else {
could_be_user_defined = FALSE;
Expand Down Expand Up @@ -23392,8 +23395,11 @@ Perl_parse_uniprop_string(pTHX_
/* If it didn't find the property ... */
if (table_index == 0) {

/* Try again stripping off any initial 'In' or 'Is' */
if (starts_with_In_or_Is) {
/* Try again stripping off any initial 'Is'. This is because we
* promise that an initial Is is optional. The same isn't true of
* names that start with 'In'. Those can match only blocks, and the
* lookup table already has those accounted for. */
if (starts_with_Is) {
lookup_name += 2;
lookup_len -= 2;
equals_pos -= 2;
Expand Down
2 changes: 1 addition & 1 deletion uni_keywords.h
Original file line number Diff line number Diff line change
Expand Up @@ -7284,7 +7284,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
* 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt
* 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt
* 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt
* b3d90fc23817ea4e33e9a90107c0a6c7b23314efd5712905ed172624d5524693 lib/unicore/mktables
* a53648677d262457dda0b22efba8820d2a45ca6ebae01f8c73d30db380eb83b5 lib/unicore/mktables
* a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
Expand Down

0 comments on commit 74333e9

Please sign in to comment.