diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs index bffbba1a72351..bf7772492e5bc 100644 --- a/src/libcore/char/methods.rs +++ b/src/libcore/char/methods.rs @@ -229,6 +229,9 @@ impl char { '\r' => EscapeDefaultState::Backslash('r'), '\n' => EscapeDefaultState::Backslash('n'), '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), + _ if self.is_grapheme_extended() => { + EscapeDefaultState::Unicode(self.escape_unicode()) + } _ if is_printable(self) => EscapeDefaultState::Char(self), _ => EscapeDefaultState::Unicode(self.escape_unicode()), }; @@ -692,16 +695,13 @@ impl char { general_category::Cc(self) } - /// Returns true if this `char` is a nonspacing mark code point, and false otherwise. + /// Returns true if this `char` is an extended grapheme character, and false otherwise. /// - /// 'Nonspacing mark code point' is defined in terms of the Unicode General - /// Category `Mn`. - #[unstable(feature = "rustc_private", - reason = "mainly needed for compiler internals", - issue = "27812")] + /// 'Extended grapheme character' is defined in terms of the Unicode Shaping and Rendering + /// Category `Grapheme_Extend`. #[inline] - pub fn is_nonspacing_mark(self) -> bool { - general_category::Mn(self) + pub(crate) fn is_grapheme_extended(self) -> bool { + derived_property::Grapheme_Extend(self) } /// Returns true if this `char` is numeric, and false otherwise. diff --git a/src/libcore/unicode/tables.rs b/src/libcore/unicode/tables.rs index 1fdd92cd47d3f..7f9c52e3aa562 100644 --- a/src/libcore/unicode/tables.rs +++ b/src/libcore/unicode/tables.rs @@ -38,126 +38,6 @@ pub mod general_category { Cc_table.lookup(c) } - pub const Mn_table: &super::BoolTrie = &super::BoolTrie { - r1: [ - 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, - 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, - 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, - 0xffffffffffffffff, 0x0000ffffffffffff, 0x0000000000000000, 0x0000000000000000, - 0x0000000000000000, 0x0000000000000000, 0x00000000000000f8, 0x0000000000000000, - 0x0000000000000000, 0x0000000000000000, 0xbffffffffffe0000, 0x00000000000000b6, - 0x0000000007ff0000, 0x00010000fffff800, 0x0000000000000000, 0x00003d9f9fc00000, - 0xffff000000020000, 0x00000000000007ff, 0x0001ffc000000000, 0x000ff80000000000 - ], - r2: [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 10, 11, 12, 13, 14, 15, 16, 11, 17, 18, 7, 2, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 31, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 32, 33, 34, 35, 36, 2, 37, 2, 38, 2, 2, 2, 39, 40, 41, 2, 42, - 43, 44, 45, 46, 2, 2, 47, 2, 2, 2, 48, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 49, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 50, 2, 51, 2, 52, 2, 2, 2, 2, 2, 2, 2, 2, 53, - 2, 54, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 55, 56, 57, 2, 2, 2, 2, 58, 2, 2, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 2, 2, 2, 68, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 69, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 70, 2, 2, 2, 2, 2, 2, 2 - ], - r3: &[ - 0x00003eeffbc00000, 0x000000000e000000, 0x0000000000000000, 0xfffffffbfff00000, - 0x1400000000000007, 0x0000000c00fe21fe, 0x1000000000000002, 0x0000000c0000201e, - 0x1000000000000006, 0x0023000000023986, 0xfc00000c000021be, 0x9000000000000002, - 0x0000000c0040201e, 0x0000000000000004, 0x0000000000002001, 0xc000000000000001, - 0x0000000c00603dc1, 0x0000000c00003040, 0x1800000000000003, 0x00000000005c0400, - 0x07f2000000000000, 0x0000000000007f80, 0x1bf2000000000000, 0x0000000000003f00, - 0x02a0000003000000, 0x7ffe000000000000, 0x1ffffffffeffe0df, 0x0000000000000040, - 0x66fde00000000000, 0x001e0001c3000000, 0x0000000020002064, 0x00000000e0000000, - 0x001c0000001c0000, 0x000c0000000c0000, 0x3fb0000000000000, 0x00000000200ffe40, - 0x0000000000003800, 0x0000020000000060, 0x0e04018700000000, 0x0000000009800000, - 0x9ff81fe57f400000, 0x3fff000000000000, 0x17d000000000000f, 0x000ff80000000004, - 0x00003b3c00000003, 0x0003a34000000000, 0x00cff00000000000, 0x031021fdfff70000, - 0xfbffffffffffffff, 0x0001ffe21fff0000, 0x0003800000000000, 0x8000000000000000, - 0xffffffff00000000, 0x00003c0000000000, 0x0000000006000000, 0x3ff0800000000000, - 0x00000000c0000000, 0x0003000000000000, 0x0000006000000844, 0x0003ffff00000030, - 0x00003fc000000000, 0x000000000003ff80, 0x13c8000000000007, 0x0000002000000000, - 0x00667e0000000000, 0x1000000000001008, 0xc19d000000000000, 0x0040300000000002, - 0x0000212000000000, 0x0000000040000000, 0x0000ffff0000ffff - ], - r4: [ - 0, 1, 2, 2, 2, 2, 3, 2, 2, 2, 2, 4, 2, 5, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 - ], - r5: &[ - 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 9, 10, 11, 12, 13, 0, 0, 14, 15, 16, 0, 0, 17, 18, 19, 20, - 0, 0, 21, 22, 23, 24, 25, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 28, 29, 0, 0, 0, - 0, 0, 30, 0, 31, 0, 32, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 39, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 46, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 47, - 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - ], - r6: &[ - 0x0000000000000000, 0x2000000000000000, 0x0000000100000000, 0x07c0000000000000, - 0x870000000000f06e, 0x0000006000000000, 0xff00000000000002, 0x800000000000007f, - 0x0678000000000003, 0x001fef8000000007, 0x0008000000000000, 0x7fc0000000000003, - 0x0000000000001c00, 0x40d3800000000000, 0x000007f880000000, 0x1000000000000003, - 0x001f1fc000000001, 0xff00000000000000, 0x000000000000005c, 0x85f8000000000000, - 0x000000000000000d, 0xb03c000000000000, 0x0000000030000001, 0xa7f8000000000000, - 0x0000000000000001, 0x00bf280000000000, 0x00000fbce0000000, 0x79f800000000067e, - 0x000000000e7e0080, 0x00000000037ffc00, 0xbf7f000000000000, 0x006dfcfffffc0000, - 0xb47e000000000000, 0x00000000000000bf, 0x001f000000000000, 0x007f000000000000, - 0x0000000000078000, 0x0000000060000000, 0xf800038000000000, 0x00003c0000000fe7, - 0x000000000000001c, 0xf87fffffffffffff, 0x00201fffffffffff, 0x0000fffef8000010, - 0x000007dbf9ffff7f, 0x00000000007f0000, 0x00000000000007f0, 0xffffffffffffffff, - 0x0000ffffffffffff - ], - }; - - pub fn Mn(c: char) -> bool { - Mn_table.lookup(c) - } - pub const N_table: &super::BoolTrie = &super::BoolTrie { r1: [ 0x03ff000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, @@ -213,7 +93,7 @@ pub mod general_category { 0x03ff000003ff0000 ], r4: [ - 0, 1, 2, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 5, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 0, 1, 2, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, @@ -235,17 +115,12 @@ pub mod general_category { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + 0 ], r6: &[ 0x0000000000000000, 0x001fffffffffffff, 0x0000000000000402, 0x00000000003e0000, 0x000003ff00000000, 0x0000ffc000000000, 0x03ff000000000000, 0xffc0000000000000, - 0x0000000003ff0000, 0x00000000000003ff, 0xffffffffffffffff, 0x00007fffffffffff, - 0xffffffffffffc000 + 0x0000000003ff0000, 0x00000000000003ff, 0xffffffffffffffff, 0x00007fffffffffff ], }; @@ -669,6 +544,127 @@ pub mod derived_property { Cased_table.lookup(c) } + pub const Grapheme_Extend_table: &super::BoolTrie = &super::BoolTrie { + r1: [ + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0xffffffffffffffff, 0x0000ffffffffffff, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x00000000000003f8, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0xbffffffffffe0000, 0x00000000000000b6, + 0x0000000007ff0000, 0x00010000fffff800, 0x0000000000000000, 0x00003d9f9fc00000, + 0xffff000000020000, 0x00000000000007ff, 0x0001ffc000000000, 0x000ff80000000000 + ], + r2: [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 7, 2, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 32, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 33, 34, 35, 36, 37, 2, 38, 2, 39, 2, 2, 2, 40, 41, 42, 2, 43, + 44, 45, 46, 47, 2, 2, 48, 2, 2, 2, 49, 2, 2, 2, 2, 2, 2, 2, 2, 50, 2, 2, 51, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 52, 2, 53, 2, 54, 2, 2, 2, 2, 2, 2, 2, 2, 55, + 2, 56, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 57, 58, 59, 2, 2, 2, 2, 60, 2, 2, 61, 62, 63, 64, 65, 66, 67, + 68, 69, 2, 2, 2, 70, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 71, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 72, 2, 2, 2, 2, 2, 58, 2 + ], + r3: &[ + 0x00003eeffbc00000, 0x000000000e000000, 0x0000000000000000, 0xfffffffbfff00000, + 0x1400000000000007, 0x0000000c00fe21fe, 0x5000000000000002, 0x0000000c0080201e, + 0x1000000000000006, 0x0023000000023986, 0xfc00000c000021be, 0xd000000000000002, + 0x0000000c00c0201e, 0x4000000000000004, 0x0000000000802001, 0xc000000000000001, + 0x0000000c00603dc1, 0x9000000000000002, 0x0000000c00603044, 0x5800000000000003, + 0x00000000805c8400, 0x07f2000000000000, 0x0000000000007f80, 0x1bf2000000000000, + 0x0000000000003f00, 0x02a0000003000000, 0x7ffe000000000000, 0x1ffffffffeffe0df, + 0x0000000000000040, 0x66fde00000000000, 0x001e0001c3000000, 0x0000000020002064, + 0x00000000e0000000, 0x001c0000001c0000, 0x000c0000000c0000, 0x3fb0000000000000, + 0x00000000200ffe40, 0x0000000000003800, 0x0000020000000060, 0x0e04018700000000, + 0x0000000009800000, 0x9ff81fe57f400000, 0x7fff000000000000, 0x17d000000000000f, + 0x000ff80000000004, 0x00003b3c00000003, 0x0003a34000000000, 0x00cff00000000000, + 0x031021fdfff70000, 0xfbffffffffffffff, 0x0000000000001000, 0x0001ffffffff0000, + 0x0003800000000000, 0x8000000000000000, 0xffffffff00000000, 0x0000fc0000000000, + 0x0000000006000000, 0x3ff7800000000000, 0x00000000c0000000, 0x0003000000000000, + 0x0000006000000844, 0x0003ffff00000030, 0x00003fc000000000, 0x000000000003ff80, + 0x13c8000000000007, 0x0000002000000000, 0x00667e0000000000, 0x1000000000001008, + 0xc19d000000000000, 0x0040300000000002, 0x0000212000000000, 0x0000000040000000, + 0x0000ffff0000ffff + ], + r4: [ + 0, 1, 2, 2, 2, 2, 3, 2, 2, 2, 2, 4, 2, 5, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + ], + r5: &[ + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 9, 10, 11, 12, 13, 0, 0, 14, 15, 16, 0, 0, 17, 18, 19, 20, + 0, 0, 21, 22, 23, 24, 25, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 28, 29, 0, 0, 0, + 0, 0, 30, 0, 31, 0, 32, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 39, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 46, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 48, 0, 0, 48, 48, + 48, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + ], + r6: &[ + 0x0000000000000000, 0x2000000000000000, 0x0000000100000000, 0x07c0000000000000, + 0x870000000000f06e, 0x0000006000000000, 0xff00000000000002, 0x800000000000007f, + 0x0678000000000003, 0x001fef8000000007, 0x0008000000000000, 0x7fc0000000000003, + 0x0000000000001c00, 0x40d3800000000000, 0x000007f880000000, 0x5000000000000003, + 0x001f1fc000800001, 0xff00000000000000, 0x000000000000005c, 0xa5f9000000000000, + 0x000000000000000d, 0xb03c800000000000, 0x0000000030000001, 0xa7f8000000000000, + 0x0000000000000001, 0x00bf280000000000, 0x00000fbce0000000, 0x79f800000000067e, + 0x000000000e7e0080, 0x00000000037ffc00, 0xbf7f000000000000, 0x006dfcfffffc0000, + 0xb47e000000000000, 0x00000000000000bf, 0x001f000000000000, 0x007f000000000000, + 0x0000000000078000, 0x0000000060000000, 0xf807c3a000000000, 0x00003c0000000fe7, + 0x000000000000001c, 0xf87fffffffffffff, 0x00201fffffffffff, 0x0000fffef8000010, + 0x000007dbf9ffff7f, 0x00000000007f0000, 0x00000000000007f0, 0xffffffff00000000, + 0xffffffffffffffff, 0x0000ffffffffffff + ], + }; + + pub fn Grapheme_Extend(c: char) -> bool { + Grapheme_Extend_table.lookup(c) + } + pub const Lowercase_table: &super::BoolTrie = &super::BoolTrie { r1: [ 0x0000000000000000, 0x07fffffe00000000, 0x0420040000000000, 0xff7fffff80000000, @@ -1841,24 +1837,7 @@ pub mod conversions { ('\u{118b8}', ['\u{118d8}', '\0', '\0']), ('\u{118b9}', ['\u{118d9}', '\0', '\0']), ('\u{118ba}', ['\u{118da}', '\0', '\0']), ('\u{118bb}', ['\u{118db}', '\0', '\0']), ('\u{118bc}', ['\u{118dc}', '\0', '\0']), ('\u{118bd}', ['\u{118dd}', '\0', '\0']), - ('\u{118be}', ['\u{118de}', '\0', '\0']), ('\u{118bf}', ['\u{118df}', '\0', '\0']), - ('\u{1e900}', ['\u{1e922}', '\0', '\0']), ('\u{1e901}', ['\u{1e923}', '\0', '\0']), - ('\u{1e902}', ['\u{1e924}', '\0', '\0']), ('\u{1e903}', ['\u{1e925}', '\0', '\0']), - ('\u{1e904}', ['\u{1e926}', '\0', '\0']), ('\u{1e905}', ['\u{1e927}', '\0', '\0']), - ('\u{1e906}', ['\u{1e928}', '\0', '\0']), ('\u{1e907}', ['\u{1e929}', '\0', '\0']), - ('\u{1e908}', ['\u{1e92a}', '\0', '\0']), ('\u{1e909}', ['\u{1e92b}', '\0', '\0']), - ('\u{1e90a}', ['\u{1e92c}', '\0', '\0']), ('\u{1e90b}', ['\u{1e92d}', '\0', '\0']), - ('\u{1e90c}', ['\u{1e92e}', '\0', '\0']), ('\u{1e90d}', ['\u{1e92f}', '\0', '\0']), - ('\u{1e90e}', ['\u{1e930}', '\0', '\0']), ('\u{1e90f}', ['\u{1e931}', '\0', '\0']), - ('\u{1e910}', ['\u{1e932}', '\0', '\0']), ('\u{1e911}', ['\u{1e933}', '\0', '\0']), - ('\u{1e912}', ['\u{1e934}', '\0', '\0']), ('\u{1e913}', ['\u{1e935}', '\0', '\0']), - ('\u{1e914}', ['\u{1e936}', '\0', '\0']), ('\u{1e915}', ['\u{1e937}', '\0', '\0']), - ('\u{1e916}', ['\u{1e938}', '\0', '\0']), ('\u{1e917}', ['\u{1e939}', '\0', '\0']), - ('\u{1e918}', ['\u{1e93a}', '\0', '\0']), ('\u{1e919}', ['\u{1e93b}', '\0', '\0']), - ('\u{1e91a}', ['\u{1e93c}', '\0', '\0']), ('\u{1e91b}', ['\u{1e93d}', '\0', '\0']), - ('\u{1e91c}', ['\u{1e93e}', '\0', '\0']), ('\u{1e91d}', ['\u{1e93f}', '\0', '\0']), - ('\u{1e91e}', ['\u{1e940}', '\0', '\0']), ('\u{1e91f}', ['\u{1e941}', '\0', '\0']), - ('\u{1e920}', ['\u{1e942}', '\0', '\0']), ('\u{1e921}', ['\u{1e943}', '\0', '\0']) + ('\u{118be}', ['\u{118de}', '\0', '\0']), ('\u{118bf}', ['\u{118df}', '\0', '\0']) ]; const to_uppercase_table: &[(char, [char; 3])] = &[ @@ -2472,24 +2451,7 @@ pub mod conversions { ('\u{118d8}', ['\u{118b8}', '\0', '\0']), ('\u{118d9}', ['\u{118b9}', '\0', '\0']), ('\u{118da}', ['\u{118ba}', '\0', '\0']), ('\u{118db}', ['\u{118bb}', '\0', '\0']), ('\u{118dc}', ['\u{118bc}', '\0', '\0']), ('\u{118dd}', ['\u{118bd}', '\0', '\0']), - ('\u{118de}', ['\u{118be}', '\0', '\0']), ('\u{118df}', ['\u{118bf}', '\0', '\0']), - ('\u{1e922}', ['\u{1e900}', '\0', '\0']), ('\u{1e923}', ['\u{1e901}', '\0', '\0']), - ('\u{1e924}', ['\u{1e902}', '\0', '\0']), ('\u{1e925}', ['\u{1e903}', '\0', '\0']), - ('\u{1e926}', ['\u{1e904}', '\0', '\0']), ('\u{1e927}', ['\u{1e905}', '\0', '\0']), - ('\u{1e928}', ['\u{1e906}', '\0', '\0']), ('\u{1e929}', ['\u{1e907}', '\0', '\0']), - ('\u{1e92a}', ['\u{1e908}', '\0', '\0']), ('\u{1e92b}', ['\u{1e909}', '\0', '\0']), - ('\u{1e92c}', ['\u{1e90a}', '\0', '\0']), ('\u{1e92d}', ['\u{1e90b}', '\0', '\0']), - ('\u{1e92e}', ['\u{1e90c}', '\0', '\0']), ('\u{1e92f}', ['\u{1e90d}', '\0', '\0']), - ('\u{1e930}', ['\u{1e90e}', '\0', '\0']), ('\u{1e931}', ['\u{1e90f}', '\0', '\0']), - ('\u{1e932}', ['\u{1e910}', '\0', '\0']), ('\u{1e933}', ['\u{1e911}', '\0', '\0']), - ('\u{1e934}', ['\u{1e912}', '\0', '\0']), ('\u{1e935}', ['\u{1e913}', '\0', '\0']), - ('\u{1e936}', ['\u{1e914}', '\0', '\0']), ('\u{1e937}', ['\u{1e915}', '\0', '\0']), - ('\u{1e938}', ['\u{1e916}', '\0', '\0']), ('\u{1e939}', ['\u{1e917}', '\0', '\0']), - ('\u{1e93a}', ['\u{1e918}', '\0', '\0']), ('\u{1e93b}', ['\u{1e919}', '\0', '\0']), - ('\u{1e93c}', ['\u{1e91a}', '\0', '\0']), ('\u{1e93d}', ['\u{1e91b}', '\0', '\0']), - ('\u{1e93e}', ['\u{1e91c}', '\0', '\0']), ('\u{1e93f}', ['\u{1e91d}', '\0', '\0']), - ('\u{1e940}', ['\u{1e91e}', '\0', '\0']), ('\u{1e941}', ['\u{1e91f}', '\0', '\0']), - ('\u{1e942}', ['\u{1e920}', '\0', '\0']), ('\u{1e943}', ['\u{1e921}', '\0', '\0']) + ('\u{118de}', ['\u{118be}', '\0', '\0']), ('\u{118df}', ['\u{118bf}', '\0', '\0']) ]; } diff --git a/src/libcore/unicode/unicode.py b/src/libcore/unicode/unicode.py index ce22c6f226a44..1da5878c4c6d3 100755 --- a/src/libcore/unicode/unicode.py +++ b/src/libcore/unicode/unicode.py @@ -486,7 +486,7 @@ def emit_norm_module(f, canon, compat, combine, norm_props): to_upper, to_lower, to_title) = load_unicode_data("UnicodeData.txt") load_special_casing("SpecialCasing.txt", to_upper, to_lower, to_title) want_derived = ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase", - "Cased", "Case_Ignorable"] + "Cased", "Case_Ignorable", "Grapheme_Extend"] derived = load_properties("DerivedCoreProperties.txt", want_derived) scripts = load_properties("Scripts.txt", []) props = load_properties("PropList.txt", @@ -495,7 +495,7 @@ def emit_norm_module(f, canon, compat, combine, norm_props): ["Full_Composition_Exclusion"]) # category tables - for (name, cat, pfuns) in ("general_category", gencats, ["N", "Cc", "Mn"]), \ + for (name, cat, pfuns) in ("general_category", gencats, ["N", "Cc"]), \ ("derived_property", derived, want_derived), \ ("property", props, ["White_Space", "Pattern_White_Space"]): emit_property_module(rf, name, cat, pfuns) @@ -503,3 +503,4 @@ def emit_norm_module(f, canon, compat, combine, norm_props): # normalizations and conversions module emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props) emit_conversions_module(rf, to_upper, to_lower, to_title) + print("Regenerated tables.rs.")