From dfe38dbca4b62132d7512f767bca6ebe6ddfe931 Mon Sep 17 00:00:00 2001 From: Florian Zeitz Date: Tue, 26 Nov 2013 06:15:45 +0100 Subject: [PATCH] Fix handling of upper/lowercase, and whitespace --- src/etc/unicode.py | 22 +- src/libstd/char.rs | 18 +- src/libstd/unicode.rs | 662 +++++++++++++++++++++++++ src/test/pretty/block-comment-wchar.pp | 11 +- src/test/pretty/block-comment-wchar.rs | 5 +- 5 files changed, 689 insertions(+), 29 deletions(-) diff --git a/src/etc/unicode.py b/src/etc/unicode.py index 7c49b9b57726a..d35263c100f4b 100755 --- a/src/etc/unicode.py +++ b/src/etc/unicode.py @@ -41,7 +41,7 @@ def load_unicode_data(f): continue [code, name, gencat, combine, bidi, decomp, deci, digit, num, mirror, - old, iso, upcase, lowcsae, titlecase ] = fields + old, iso, upcase, lowcase, titlecase ] = fields code = int(code, 16) @@ -89,11 +89,9 @@ def load_unicode_data(f): return (canon_decomp, compat_decomp, gencats, combines) - -def load_derived_core_properties(f): +def load_properties(f, interestingprops): fetch(f) - derivedprops = {} - interestingprops = ["XID_Start", "XID_Continue", "Alphabetic"] + props = {} re1 = re.compile("^([0-9A-F]+) +; (\w+)") re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+) +; (\w+)") @@ -118,10 +116,10 @@ def load_derived_core_properties(f): continue d_lo = int(d_lo, 16) d_hi = int(d_hi, 16) - if prop not in derivedprops: - derivedprops[prop] = [] - derivedprops[prop].append((d_lo, d_hi)) - return derivedprops + if prop not in props: + props[prop] = [] + props[prop].append((d_lo, d_hi)) + return props def escape_char(c): if c <= 0xff: @@ -376,5 +374,9 @@ def emit_decomp_module(f, canon, compat, combine): emit_decomp_module(rf, canon_decomp, compat_decomp, combines) -derived = load_derived_core_properties("DerivedCoreProperties.txt") +derived = load_properties("DerivedCoreProperties.txt", + ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"]) emit_property_module(rf, "derived_property", derived) + +props = load_properties("PropList.txt", ["White_Space"]) +emit_property_module(rf, "property", props) diff --git a/src/libstd/char.rs b/src/libstd/char.rs index b372993e3e506..3019e47c9f93e 100644 --- a/src/libstd/char.rs +++ b/src/libstd/char.rs @@ -14,7 +14,7 @@ use cast::transmute; use option::{None, Option, Some}; use iter::{Iterator, range_step}; use str::StrSlice; -use unicode::{derived_property, general_category, decompose}; +use unicode::{derived_property, property, general_category, decompose}; use to_str::ToStr; use str; @@ -89,30 +89,28 @@ pub fn is_XID_continue(c: char) -> bool { derived_property::XID_Continue(c) } /// /// Indicates whether a character is in lower case, defined -/// in terms of the Unicode General Category 'Ll' +/// in terms of the Unicode Derived Core Property 'Lowercase'. /// #[inline] -pub fn is_lowercase(c: char) -> bool { general_category::Ll(c) } +pub fn is_lowercase(c: char) -> bool { derived_property::Lowercase(c) } /// /// Indicates whether a character is in upper case, defined -/// in terms of the Unicode General Category 'Lu'. +/// in terms of the Unicode Derived Core Property 'Uppercase'. /// #[inline] -pub fn is_uppercase(c: char) -> bool { general_category::Lu(c) } +pub fn is_uppercase(c: char) -> bool { derived_property::Uppercase(c) } /// /// Indicates whether a character is whitespace. Whitespace is defined in -/// terms of the Unicode General Categories 'Zs', 'Zl', 'Zp' -/// additional 'Cc'-category control codes in the range [0x09, 0x0d] +/// terms of the Unicode Property 'White_Space'. /// #[inline] pub fn is_whitespace(c: char) -> bool { + // As an optimization ASCII whitespace characters are checked separately c == ' ' || ('\x09' <= c && c <= '\x0d') - || general_category::Zs(c) - || general_category::Zl(c) - || general_category::Zp(c) + || property::White_Space(c) } /// diff --git a/src/libstd/unicode.rs b/src/libstd/unicode.rs index 397bf8ae549dc..f19ce98ff1a33 100644 --- a/src/libstd/unicode.rs +++ b/src/libstd/unicode.rs @@ -4087,6 +4087,640 @@ pub mod derived_property { bsearch_range_table(c, Alphabetic_table) } + static Lowercase_table : &'static [(char,char)] = &[ + ('\x61', '\x7a'), ('\xaa', '\xaa'), + ('\xb5', '\xb5'), ('\xba', '\xba'), + ('\xdf', '\xf6'), ('\xf8', '\xff'), + ('\u0101', '\u0101'), ('\u0103', '\u0103'), + ('\u0105', '\u0105'), ('\u0107', '\u0107'), + ('\u0109', '\u0109'), ('\u010b', '\u010b'), + ('\u010d', '\u010d'), ('\u010f', '\u010f'), + ('\u0111', '\u0111'), ('\u0113', '\u0113'), + ('\u0115', '\u0115'), ('\u0117', '\u0117'), + ('\u0119', '\u0119'), ('\u011b', '\u011b'), + ('\u011d', '\u011d'), ('\u011f', '\u011f'), + ('\u0121', '\u0121'), ('\u0123', '\u0123'), + ('\u0125', '\u0125'), ('\u0127', '\u0127'), + ('\u0129', '\u0129'), ('\u012b', '\u012b'), + ('\u012d', '\u012d'), ('\u012f', '\u012f'), + ('\u0131', '\u0131'), ('\u0133', '\u0133'), + ('\u0135', '\u0135'), ('\u0137', '\u0138'), + ('\u013a', '\u013a'), ('\u013c', '\u013c'), + ('\u013e', '\u013e'), ('\u0140', '\u0140'), + ('\u0142', '\u0142'), ('\u0144', '\u0144'), + ('\u0146', '\u0146'), ('\u0148', '\u0149'), + ('\u014b', '\u014b'), ('\u014d', '\u014d'), + ('\u014f', '\u014f'), ('\u0151', '\u0151'), + ('\u0153', '\u0153'), ('\u0155', '\u0155'), + ('\u0157', '\u0157'), ('\u0159', '\u0159'), + ('\u015b', '\u015b'), ('\u015d', '\u015d'), + ('\u015f', '\u015f'), ('\u0161', '\u0161'), + ('\u0163', '\u0163'), ('\u0165', '\u0165'), + ('\u0167', '\u0167'), ('\u0169', '\u0169'), + ('\u016b', '\u016b'), ('\u016d', '\u016d'), + ('\u016f', '\u016f'), ('\u0171', '\u0171'), + ('\u0173', '\u0173'), ('\u0175', '\u0175'), + ('\u0177', '\u0177'), ('\u017a', '\u017a'), + ('\u017c', '\u017c'), ('\u017e', '\u0180'), + ('\u0183', '\u0183'), ('\u0185', '\u0185'), + ('\u0188', '\u0188'), ('\u018c', '\u018d'), + ('\u0192', '\u0192'), ('\u0195', '\u0195'), + ('\u0199', '\u019b'), ('\u019e', '\u019e'), + ('\u01a1', '\u01a1'), ('\u01a3', '\u01a3'), + ('\u01a5', '\u01a5'), ('\u01a8', '\u01a8'), + ('\u01aa', '\u01ab'), ('\u01ad', '\u01ad'), + ('\u01b0', '\u01b0'), ('\u01b4', '\u01b4'), + ('\u01b6', '\u01b6'), ('\u01b9', '\u01ba'), + ('\u01bd', '\u01bf'), ('\u01c6', '\u01c6'), + ('\u01c9', '\u01c9'), ('\u01cc', '\u01cc'), + ('\u01ce', '\u01ce'), ('\u01d0', '\u01d0'), + ('\u01d2', '\u01d2'), ('\u01d4', '\u01d4'), + ('\u01d6', '\u01d6'), ('\u01d8', '\u01d8'), + ('\u01da', '\u01da'), ('\u01dc', '\u01dd'), + ('\u01df', '\u01df'), ('\u01e1', '\u01e1'), + ('\u01e3', '\u01e3'), ('\u01e5', '\u01e5'), + ('\u01e7', '\u01e7'), ('\u01e9', '\u01e9'), + ('\u01eb', '\u01eb'), ('\u01ed', '\u01ed'), + ('\u01ef', '\u01f0'), ('\u01f3', '\u01f3'), + ('\u01f5', '\u01f5'), ('\u01f9', '\u01f9'), + ('\u01fb', '\u01fb'), ('\u01fd', '\u01fd'), + ('\u01ff', '\u01ff'), ('\u0201', '\u0201'), + ('\u0203', '\u0203'), ('\u0205', '\u0205'), + ('\u0207', '\u0207'), ('\u0209', '\u0209'), + ('\u020b', '\u020b'), ('\u020d', '\u020d'), + ('\u020f', '\u020f'), ('\u0211', '\u0211'), + ('\u0213', '\u0213'), ('\u0215', '\u0215'), + ('\u0217', '\u0217'), ('\u0219', '\u0219'), + ('\u021b', '\u021b'), ('\u021d', '\u021d'), + ('\u021f', '\u021f'), ('\u0221', '\u0221'), + ('\u0223', '\u0223'), ('\u0225', '\u0225'), + ('\u0227', '\u0227'), ('\u0229', '\u0229'), + ('\u022b', '\u022b'), ('\u022d', '\u022d'), + ('\u022f', '\u022f'), ('\u0231', '\u0231'), + ('\u0233', '\u0239'), ('\u023c', '\u023c'), + ('\u023f', '\u0240'), ('\u0242', '\u0242'), + ('\u0247', '\u0247'), ('\u0249', '\u0249'), + ('\u024b', '\u024b'), ('\u024d', '\u024d'), + ('\u024f', '\u0293'), ('\u0295', '\u02af'), + ('\u02b0', '\u02b8'), ('\u02c0', '\u02c1'), + ('\u02e0', '\u02e4'), ('\u0345', '\u0345'), + ('\u0371', '\u0371'), ('\u0373', '\u0373'), + ('\u0377', '\u0377'), ('\u037a', '\u037a'), + ('\u037b', '\u037d'), ('\u0390', '\u0390'), + ('\u03ac', '\u03ce'), ('\u03d0', '\u03d1'), + ('\u03d5', '\u03d7'), ('\u03d9', '\u03d9'), + ('\u03db', '\u03db'), ('\u03dd', '\u03dd'), + ('\u03df', '\u03df'), ('\u03e1', '\u03e1'), + ('\u03e3', '\u03e3'), ('\u03e5', '\u03e5'), + ('\u03e7', '\u03e7'), ('\u03e9', '\u03e9'), + ('\u03eb', '\u03eb'), ('\u03ed', '\u03ed'), + ('\u03ef', '\u03f3'), ('\u03f5', '\u03f5'), + ('\u03f8', '\u03f8'), ('\u03fb', '\u03fc'), + ('\u0430', '\u045f'), ('\u0461', '\u0461'), + ('\u0463', '\u0463'), ('\u0465', '\u0465'), + ('\u0467', '\u0467'), ('\u0469', '\u0469'), + ('\u046b', '\u046b'), ('\u046d', '\u046d'), + ('\u046f', '\u046f'), ('\u0471', '\u0471'), + ('\u0473', '\u0473'), ('\u0475', '\u0475'), + ('\u0477', '\u0477'), ('\u0479', '\u0479'), + ('\u047b', '\u047b'), ('\u047d', '\u047d'), + ('\u047f', '\u047f'), ('\u0481', '\u0481'), + ('\u048b', '\u048b'), ('\u048d', '\u048d'), + ('\u048f', '\u048f'), ('\u0491', '\u0491'), + ('\u0493', '\u0493'), ('\u0495', '\u0495'), + ('\u0497', '\u0497'), ('\u0499', '\u0499'), + ('\u049b', '\u049b'), ('\u049d', '\u049d'), + ('\u049f', '\u049f'), ('\u04a1', '\u04a1'), + ('\u04a3', '\u04a3'), ('\u04a5', '\u04a5'), + ('\u04a7', '\u04a7'), ('\u04a9', '\u04a9'), + ('\u04ab', '\u04ab'), ('\u04ad', '\u04ad'), + ('\u04af', '\u04af'), ('\u04b1', '\u04b1'), + ('\u04b3', '\u04b3'), ('\u04b5', '\u04b5'), + ('\u04b7', '\u04b7'), ('\u04b9', '\u04b9'), + ('\u04bb', '\u04bb'), ('\u04bd', '\u04bd'), + ('\u04bf', '\u04bf'), ('\u04c2', '\u04c2'), + ('\u04c4', '\u04c4'), ('\u04c6', '\u04c6'), + ('\u04c8', '\u04c8'), ('\u04ca', '\u04ca'), + ('\u04cc', '\u04cc'), ('\u04ce', '\u04cf'), + ('\u04d1', '\u04d1'), ('\u04d3', '\u04d3'), + ('\u04d5', '\u04d5'), ('\u04d7', '\u04d7'), + ('\u04d9', '\u04d9'), ('\u04db', '\u04db'), + ('\u04dd', '\u04dd'), ('\u04df', '\u04df'), + ('\u04e1', '\u04e1'), ('\u04e3', '\u04e3'), + ('\u04e5', '\u04e5'), ('\u04e7', '\u04e7'), + ('\u04e9', '\u04e9'), ('\u04eb', '\u04eb'), + ('\u04ed', '\u04ed'), ('\u04ef', '\u04ef'), + ('\u04f1', '\u04f1'), ('\u04f3', '\u04f3'), + ('\u04f5', '\u04f5'), ('\u04f7', '\u04f7'), + ('\u04f9', '\u04f9'), ('\u04fb', '\u04fb'), + ('\u04fd', '\u04fd'), ('\u04ff', '\u04ff'), + ('\u0501', '\u0501'), ('\u0503', '\u0503'), + ('\u0505', '\u0505'), ('\u0507', '\u0507'), + ('\u0509', '\u0509'), ('\u050b', '\u050b'), + ('\u050d', '\u050d'), ('\u050f', '\u050f'), + ('\u0511', '\u0511'), ('\u0513', '\u0513'), + ('\u0515', '\u0515'), ('\u0517', '\u0517'), + ('\u0519', '\u0519'), ('\u051b', '\u051b'), + ('\u051d', '\u051d'), ('\u051f', '\u051f'), + ('\u0521', '\u0521'), ('\u0523', '\u0523'), + ('\u0525', '\u0525'), ('\u0527', '\u0527'), + ('\u0561', '\u0587'), ('\u1d00', '\u1d2b'), + ('\u1d2c', '\u1d6a'), ('\u1d6b', '\u1d77'), + ('\u1d78', '\u1d78'), ('\u1d79', '\u1d9a'), + ('\u1d9b', '\u1dbf'), ('\u1e01', '\u1e01'), + ('\u1e03', '\u1e03'), ('\u1e05', '\u1e05'), + ('\u1e07', '\u1e07'), ('\u1e09', '\u1e09'), + ('\u1e0b', '\u1e0b'), ('\u1e0d', '\u1e0d'), + ('\u1e0f', '\u1e0f'), ('\u1e11', '\u1e11'), + ('\u1e13', '\u1e13'), ('\u1e15', '\u1e15'), + ('\u1e17', '\u1e17'), ('\u1e19', '\u1e19'), + ('\u1e1b', '\u1e1b'), ('\u1e1d', '\u1e1d'), + ('\u1e1f', '\u1e1f'), ('\u1e21', '\u1e21'), + ('\u1e23', '\u1e23'), ('\u1e25', '\u1e25'), + ('\u1e27', '\u1e27'), ('\u1e29', '\u1e29'), + ('\u1e2b', '\u1e2b'), ('\u1e2d', '\u1e2d'), + ('\u1e2f', '\u1e2f'), ('\u1e31', '\u1e31'), + ('\u1e33', '\u1e33'), ('\u1e35', '\u1e35'), + ('\u1e37', '\u1e37'), ('\u1e39', '\u1e39'), + ('\u1e3b', '\u1e3b'), ('\u1e3d', '\u1e3d'), + ('\u1e3f', '\u1e3f'), ('\u1e41', '\u1e41'), + ('\u1e43', '\u1e43'), ('\u1e45', '\u1e45'), + ('\u1e47', '\u1e47'), ('\u1e49', '\u1e49'), + ('\u1e4b', '\u1e4b'), ('\u1e4d', '\u1e4d'), + ('\u1e4f', '\u1e4f'), ('\u1e51', '\u1e51'), + ('\u1e53', '\u1e53'), ('\u1e55', '\u1e55'), + ('\u1e57', '\u1e57'), ('\u1e59', '\u1e59'), + ('\u1e5b', '\u1e5b'), ('\u1e5d', '\u1e5d'), + ('\u1e5f', '\u1e5f'), ('\u1e61', '\u1e61'), + ('\u1e63', '\u1e63'), ('\u1e65', '\u1e65'), + ('\u1e67', '\u1e67'), ('\u1e69', '\u1e69'), + ('\u1e6b', '\u1e6b'), ('\u1e6d', '\u1e6d'), + ('\u1e6f', '\u1e6f'), ('\u1e71', '\u1e71'), + ('\u1e73', '\u1e73'), ('\u1e75', '\u1e75'), + ('\u1e77', '\u1e77'), ('\u1e79', '\u1e79'), + ('\u1e7b', '\u1e7b'), ('\u1e7d', '\u1e7d'), + ('\u1e7f', '\u1e7f'), ('\u1e81', '\u1e81'), + ('\u1e83', '\u1e83'), ('\u1e85', '\u1e85'), + ('\u1e87', '\u1e87'), ('\u1e89', '\u1e89'), + ('\u1e8b', '\u1e8b'), ('\u1e8d', '\u1e8d'), + ('\u1e8f', '\u1e8f'), ('\u1e91', '\u1e91'), + ('\u1e93', '\u1e93'), ('\u1e95', '\u1e9d'), + ('\u1e9f', '\u1e9f'), ('\u1ea1', '\u1ea1'), + ('\u1ea3', '\u1ea3'), ('\u1ea5', '\u1ea5'), + ('\u1ea7', '\u1ea7'), ('\u1ea9', '\u1ea9'), + ('\u1eab', '\u1eab'), ('\u1ead', '\u1ead'), + ('\u1eaf', '\u1eaf'), ('\u1eb1', '\u1eb1'), + ('\u1eb3', '\u1eb3'), ('\u1eb5', '\u1eb5'), + ('\u1eb7', '\u1eb7'), ('\u1eb9', '\u1eb9'), + ('\u1ebb', '\u1ebb'), ('\u1ebd', '\u1ebd'), + ('\u1ebf', '\u1ebf'), ('\u1ec1', '\u1ec1'), + ('\u1ec3', '\u1ec3'), ('\u1ec5', '\u1ec5'), + ('\u1ec7', '\u1ec7'), ('\u1ec9', '\u1ec9'), + ('\u1ecb', '\u1ecb'), ('\u1ecd', '\u1ecd'), + ('\u1ecf', '\u1ecf'), ('\u1ed1', '\u1ed1'), + ('\u1ed3', '\u1ed3'), ('\u1ed5', '\u1ed5'), + ('\u1ed7', '\u1ed7'), ('\u1ed9', '\u1ed9'), + ('\u1edb', '\u1edb'), ('\u1edd', '\u1edd'), + ('\u1edf', '\u1edf'), ('\u1ee1', '\u1ee1'), + ('\u1ee3', '\u1ee3'), ('\u1ee5', '\u1ee5'), + ('\u1ee7', '\u1ee7'), ('\u1ee9', '\u1ee9'), + ('\u1eeb', '\u1eeb'), ('\u1eed', '\u1eed'), + ('\u1eef', '\u1eef'), ('\u1ef1', '\u1ef1'), + ('\u1ef3', '\u1ef3'), ('\u1ef5', '\u1ef5'), + ('\u1ef7', '\u1ef7'), ('\u1ef9', '\u1ef9'), + ('\u1efb', '\u1efb'), ('\u1efd', '\u1efd'), + ('\u1eff', '\u1f07'), ('\u1f10', '\u1f15'), + ('\u1f20', '\u1f27'), ('\u1f30', '\u1f37'), + ('\u1f40', '\u1f45'), ('\u1f50', '\u1f57'), + ('\u1f60', '\u1f67'), ('\u1f70', '\u1f7d'), + ('\u1f80', '\u1f87'), ('\u1f90', '\u1f97'), + ('\u1fa0', '\u1fa7'), ('\u1fb0', '\u1fb4'), + ('\u1fb6', '\u1fb7'), ('\u1fbe', '\u1fbe'), + ('\u1fc2', '\u1fc4'), ('\u1fc6', '\u1fc7'), + ('\u1fd0', '\u1fd3'), ('\u1fd6', '\u1fd7'), + ('\u1fe0', '\u1fe7'), ('\u1ff2', '\u1ff4'), + ('\u1ff6', '\u1ff7'), ('\u2071', '\u2071'), + ('\u207f', '\u207f'), ('\u2090', '\u209c'), + ('\u210a', '\u210a'), ('\u210e', '\u210f'), + ('\u2113', '\u2113'), ('\u212f', '\u212f'), + ('\u2134', '\u2134'), ('\u2139', '\u2139'), + ('\u213c', '\u213d'), ('\u2146', '\u2149'), + ('\u214e', '\u214e'), ('\u2170', '\u217f'), + ('\u2184', '\u2184'), ('\u24d0', '\u24e9'), + ('\u2c30', '\u2c5e'), ('\u2c61', '\u2c61'), + ('\u2c65', '\u2c66'), ('\u2c68', '\u2c68'), + ('\u2c6a', '\u2c6a'), ('\u2c6c', '\u2c6c'), + ('\u2c71', '\u2c71'), ('\u2c73', '\u2c74'), + ('\u2c76', '\u2c7b'), ('\u2c7c', '\u2c7d'), + ('\u2c81', '\u2c81'), ('\u2c83', '\u2c83'), + ('\u2c85', '\u2c85'), ('\u2c87', '\u2c87'), + ('\u2c89', '\u2c89'), ('\u2c8b', '\u2c8b'), + ('\u2c8d', '\u2c8d'), ('\u2c8f', '\u2c8f'), + ('\u2c91', '\u2c91'), ('\u2c93', '\u2c93'), + ('\u2c95', '\u2c95'), ('\u2c97', '\u2c97'), + ('\u2c99', '\u2c99'), ('\u2c9b', '\u2c9b'), + ('\u2c9d', '\u2c9d'), ('\u2c9f', '\u2c9f'), + ('\u2ca1', '\u2ca1'), ('\u2ca3', '\u2ca3'), + ('\u2ca5', '\u2ca5'), ('\u2ca7', '\u2ca7'), + ('\u2ca9', '\u2ca9'), ('\u2cab', '\u2cab'), + ('\u2cad', '\u2cad'), ('\u2caf', '\u2caf'), + ('\u2cb1', '\u2cb1'), ('\u2cb3', '\u2cb3'), + ('\u2cb5', '\u2cb5'), ('\u2cb7', '\u2cb7'), + ('\u2cb9', '\u2cb9'), ('\u2cbb', '\u2cbb'), + ('\u2cbd', '\u2cbd'), ('\u2cbf', '\u2cbf'), + ('\u2cc1', '\u2cc1'), ('\u2cc3', '\u2cc3'), + ('\u2cc5', '\u2cc5'), ('\u2cc7', '\u2cc7'), + ('\u2cc9', '\u2cc9'), ('\u2ccb', '\u2ccb'), + ('\u2ccd', '\u2ccd'), ('\u2ccf', '\u2ccf'), + ('\u2cd1', '\u2cd1'), ('\u2cd3', '\u2cd3'), + ('\u2cd5', '\u2cd5'), ('\u2cd7', '\u2cd7'), + ('\u2cd9', '\u2cd9'), ('\u2cdb', '\u2cdb'), + ('\u2cdd', '\u2cdd'), ('\u2cdf', '\u2cdf'), + ('\u2ce1', '\u2ce1'), ('\u2ce3', '\u2ce4'), + ('\u2cec', '\u2cec'), ('\u2cee', '\u2cee'), + ('\u2cf3', '\u2cf3'), ('\u2d00', '\u2d25'), + ('\u2d27', '\u2d27'), ('\u2d2d', '\u2d2d'), + ('\ua641', '\ua641'), ('\ua643', '\ua643'), + ('\ua645', '\ua645'), ('\ua647', '\ua647'), + ('\ua649', '\ua649'), ('\ua64b', '\ua64b'), + ('\ua64d', '\ua64d'), ('\ua64f', '\ua64f'), + ('\ua651', '\ua651'), ('\ua653', '\ua653'), + ('\ua655', '\ua655'), ('\ua657', '\ua657'), + ('\ua659', '\ua659'), ('\ua65b', '\ua65b'), + ('\ua65d', '\ua65d'), ('\ua65f', '\ua65f'), + ('\ua661', '\ua661'), ('\ua663', '\ua663'), + ('\ua665', '\ua665'), ('\ua667', '\ua667'), + ('\ua669', '\ua669'), ('\ua66b', '\ua66b'), + ('\ua66d', '\ua66d'), ('\ua681', '\ua681'), + ('\ua683', '\ua683'), ('\ua685', '\ua685'), + ('\ua687', '\ua687'), ('\ua689', '\ua689'), + ('\ua68b', '\ua68b'), ('\ua68d', '\ua68d'), + ('\ua68f', '\ua68f'), ('\ua691', '\ua691'), + ('\ua693', '\ua693'), ('\ua695', '\ua695'), + ('\ua697', '\ua697'), ('\ua723', '\ua723'), + ('\ua725', '\ua725'), ('\ua727', '\ua727'), + ('\ua729', '\ua729'), ('\ua72b', '\ua72b'), + ('\ua72d', '\ua72d'), ('\ua72f', '\ua731'), + ('\ua733', '\ua733'), ('\ua735', '\ua735'), + ('\ua737', '\ua737'), ('\ua739', '\ua739'), + ('\ua73b', '\ua73b'), ('\ua73d', '\ua73d'), + ('\ua73f', '\ua73f'), ('\ua741', '\ua741'), + ('\ua743', '\ua743'), ('\ua745', '\ua745'), + ('\ua747', '\ua747'), ('\ua749', '\ua749'), + ('\ua74b', '\ua74b'), ('\ua74d', '\ua74d'), + ('\ua74f', '\ua74f'), ('\ua751', '\ua751'), + ('\ua753', '\ua753'), ('\ua755', '\ua755'), + ('\ua757', '\ua757'), ('\ua759', '\ua759'), + ('\ua75b', '\ua75b'), ('\ua75d', '\ua75d'), + ('\ua75f', '\ua75f'), ('\ua761', '\ua761'), + ('\ua763', '\ua763'), ('\ua765', '\ua765'), + ('\ua767', '\ua767'), ('\ua769', '\ua769'), + ('\ua76b', '\ua76b'), ('\ua76d', '\ua76d'), + ('\ua76f', '\ua76f'), ('\ua770', '\ua770'), + ('\ua771', '\ua778'), ('\ua77a', '\ua77a'), + ('\ua77c', '\ua77c'), ('\ua77f', '\ua77f'), + ('\ua781', '\ua781'), ('\ua783', '\ua783'), + ('\ua785', '\ua785'), ('\ua787', '\ua787'), + ('\ua78c', '\ua78c'), ('\ua78e', '\ua78e'), + ('\ua791', '\ua791'), ('\ua793', '\ua793'), + ('\ua7a1', '\ua7a1'), ('\ua7a3', '\ua7a3'), + ('\ua7a5', '\ua7a5'), ('\ua7a7', '\ua7a7'), + ('\ua7a9', '\ua7a9'), ('\ua7f8', '\ua7f9'), + ('\ua7fa', '\ua7fa'), ('\ufb00', '\ufb06'), + ('\ufb13', '\ufb17'), ('\uff41', '\uff5a'), + ('\U00010428', '\U0001044f'), ('\U0001d41a', '\U0001d433'), + ('\U0001d44e', '\U0001d454'), ('\U0001d456', '\U0001d467'), + ('\U0001d482', '\U0001d49b'), ('\U0001d4b6', '\U0001d4b9'), + ('\U0001d4bb', '\U0001d4bb'), ('\U0001d4bd', '\U0001d4c3'), + ('\U0001d4c5', '\U0001d4cf'), ('\U0001d4ea', '\U0001d503'), + ('\U0001d51e', '\U0001d537'), ('\U0001d552', '\U0001d56b'), + ('\U0001d586', '\U0001d59f'), ('\U0001d5ba', '\U0001d5d3'), + ('\U0001d5ee', '\U0001d607'), ('\U0001d622', '\U0001d63b'), + ('\U0001d656', '\U0001d66f'), ('\U0001d68a', '\U0001d6a5'), + ('\U0001d6c2', '\U0001d6da'), ('\U0001d6dc', '\U0001d6e1'), + ('\U0001d6fc', '\U0001d714'), ('\U0001d716', '\U0001d71b'), + ('\U0001d736', '\U0001d74e'), ('\U0001d750', '\U0001d755'), + ('\U0001d770', '\U0001d788'), ('\U0001d78a', '\U0001d78f'), + ('\U0001d7aa', '\U0001d7c2'), ('\U0001d7c4', '\U0001d7c9'), + ('\U0001d7cb', '\U0001d7cb') + ]; + + pub fn Lowercase(c: char) -> bool { + bsearch_range_table(c, Lowercase_table) + } + + static Uppercase_table : &'static [(char,char)] = &[ + ('\x41', '\x5a'), ('\xc0', '\xd6'), + ('\xd8', '\xde'), ('\u0100', '\u0100'), + ('\u0102', '\u0102'), ('\u0104', '\u0104'), + ('\u0106', '\u0106'), ('\u0108', '\u0108'), + ('\u010a', '\u010a'), ('\u010c', '\u010c'), + ('\u010e', '\u010e'), ('\u0110', '\u0110'), + ('\u0112', '\u0112'), ('\u0114', '\u0114'), + ('\u0116', '\u0116'), ('\u0118', '\u0118'), + ('\u011a', '\u011a'), ('\u011c', '\u011c'), + ('\u011e', '\u011e'), ('\u0120', '\u0120'), + ('\u0122', '\u0122'), ('\u0124', '\u0124'), + ('\u0126', '\u0126'), ('\u0128', '\u0128'), + ('\u012a', '\u012a'), ('\u012c', '\u012c'), + ('\u012e', '\u012e'), ('\u0130', '\u0130'), + ('\u0132', '\u0132'), ('\u0134', '\u0134'), + ('\u0136', '\u0136'), ('\u0139', '\u0139'), + ('\u013b', '\u013b'), ('\u013d', '\u013d'), + ('\u013f', '\u013f'), ('\u0141', '\u0141'), + ('\u0143', '\u0143'), ('\u0145', '\u0145'), + ('\u0147', '\u0147'), ('\u014a', '\u014a'), + ('\u014c', '\u014c'), ('\u014e', '\u014e'), + ('\u0150', '\u0150'), ('\u0152', '\u0152'), + ('\u0154', '\u0154'), ('\u0156', '\u0156'), + ('\u0158', '\u0158'), ('\u015a', '\u015a'), + ('\u015c', '\u015c'), ('\u015e', '\u015e'), + ('\u0160', '\u0160'), ('\u0162', '\u0162'), + ('\u0164', '\u0164'), ('\u0166', '\u0166'), + ('\u0168', '\u0168'), ('\u016a', '\u016a'), + ('\u016c', '\u016c'), ('\u016e', '\u016e'), + ('\u0170', '\u0170'), ('\u0172', '\u0172'), + ('\u0174', '\u0174'), ('\u0176', '\u0176'), + ('\u0178', '\u0179'), ('\u017b', '\u017b'), + ('\u017d', '\u017d'), ('\u0181', '\u0182'), + ('\u0184', '\u0184'), ('\u0186', '\u0187'), + ('\u0189', '\u018b'), ('\u018e', '\u0191'), + ('\u0193', '\u0194'), ('\u0196', '\u0198'), + ('\u019c', '\u019d'), ('\u019f', '\u01a0'), + ('\u01a2', '\u01a2'), ('\u01a4', '\u01a4'), + ('\u01a6', '\u01a7'), ('\u01a9', '\u01a9'), + ('\u01ac', '\u01ac'), ('\u01ae', '\u01af'), + ('\u01b1', '\u01b3'), ('\u01b5', '\u01b5'), + ('\u01b7', '\u01b8'), ('\u01bc', '\u01bc'), + ('\u01c4', '\u01c4'), ('\u01c7', '\u01c7'), + ('\u01ca', '\u01ca'), ('\u01cd', '\u01cd'), + ('\u01cf', '\u01cf'), ('\u01d1', '\u01d1'), + ('\u01d3', '\u01d3'), ('\u01d5', '\u01d5'), + ('\u01d7', '\u01d7'), ('\u01d9', '\u01d9'), + ('\u01db', '\u01db'), ('\u01de', '\u01de'), + ('\u01e0', '\u01e0'), ('\u01e2', '\u01e2'), + ('\u01e4', '\u01e4'), ('\u01e6', '\u01e6'), + ('\u01e8', '\u01e8'), ('\u01ea', '\u01ea'), + ('\u01ec', '\u01ec'), ('\u01ee', '\u01ee'), + ('\u01f1', '\u01f1'), ('\u01f4', '\u01f4'), + ('\u01f6', '\u01f8'), ('\u01fa', '\u01fa'), + ('\u01fc', '\u01fc'), ('\u01fe', '\u01fe'), + ('\u0200', '\u0200'), ('\u0202', '\u0202'), + ('\u0204', '\u0204'), ('\u0206', '\u0206'), + ('\u0208', '\u0208'), ('\u020a', '\u020a'), + ('\u020c', '\u020c'), ('\u020e', '\u020e'), + ('\u0210', '\u0210'), ('\u0212', '\u0212'), + ('\u0214', '\u0214'), ('\u0216', '\u0216'), + ('\u0218', '\u0218'), ('\u021a', '\u021a'), + ('\u021c', '\u021c'), ('\u021e', '\u021e'), + ('\u0220', '\u0220'), ('\u0222', '\u0222'), + ('\u0224', '\u0224'), ('\u0226', '\u0226'), + ('\u0228', '\u0228'), ('\u022a', '\u022a'), + ('\u022c', '\u022c'), ('\u022e', '\u022e'), + ('\u0230', '\u0230'), ('\u0232', '\u0232'), + ('\u023a', '\u023b'), ('\u023d', '\u023e'), + ('\u0241', '\u0241'), ('\u0243', '\u0246'), + ('\u0248', '\u0248'), ('\u024a', '\u024a'), + ('\u024c', '\u024c'), ('\u024e', '\u024e'), + ('\u0370', '\u0370'), ('\u0372', '\u0372'), + ('\u0376', '\u0376'), ('\u0386', '\u0386'), + ('\u0388', '\u038a'), ('\u038c', '\u038c'), + ('\u038e', '\u038f'), ('\u0391', '\u03a1'), + ('\u03a3', '\u03ab'), ('\u03cf', '\u03cf'), + ('\u03d2', '\u03d4'), ('\u03d8', '\u03d8'), + ('\u03da', '\u03da'), ('\u03dc', '\u03dc'), + ('\u03de', '\u03de'), ('\u03e0', '\u03e0'), + ('\u03e2', '\u03e2'), ('\u03e4', '\u03e4'), + ('\u03e6', '\u03e6'), ('\u03e8', '\u03e8'), + ('\u03ea', '\u03ea'), ('\u03ec', '\u03ec'), + ('\u03ee', '\u03ee'), ('\u03f4', '\u03f4'), + ('\u03f7', '\u03f7'), ('\u03f9', '\u03fa'), + ('\u03fd', '\u042f'), ('\u0460', '\u0460'), + ('\u0462', '\u0462'), ('\u0464', '\u0464'), + ('\u0466', '\u0466'), ('\u0468', '\u0468'), + ('\u046a', '\u046a'), ('\u046c', '\u046c'), + ('\u046e', '\u046e'), ('\u0470', '\u0470'), + ('\u0472', '\u0472'), ('\u0474', '\u0474'), + ('\u0476', '\u0476'), ('\u0478', '\u0478'), + ('\u047a', '\u047a'), ('\u047c', '\u047c'), + ('\u047e', '\u047e'), ('\u0480', '\u0480'), + ('\u048a', '\u048a'), ('\u048c', '\u048c'), + ('\u048e', '\u048e'), ('\u0490', '\u0490'), + ('\u0492', '\u0492'), ('\u0494', '\u0494'), + ('\u0496', '\u0496'), ('\u0498', '\u0498'), + ('\u049a', '\u049a'), ('\u049c', '\u049c'), + ('\u049e', '\u049e'), ('\u04a0', '\u04a0'), + ('\u04a2', '\u04a2'), ('\u04a4', '\u04a4'), + ('\u04a6', '\u04a6'), ('\u04a8', '\u04a8'), + ('\u04aa', '\u04aa'), ('\u04ac', '\u04ac'), + ('\u04ae', '\u04ae'), ('\u04b0', '\u04b0'), + ('\u04b2', '\u04b2'), ('\u04b4', '\u04b4'), + ('\u04b6', '\u04b6'), ('\u04b8', '\u04b8'), + ('\u04ba', '\u04ba'), ('\u04bc', '\u04bc'), + ('\u04be', '\u04be'), ('\u04c0', '\u04c1'), + ('\u04c3', '\u04c3'), ('\u04c5', '\u04c5'), + ('\u04c7', '\u04c7'), ('\u04c9', '\u04c9'), + ('\u04cb', '\u04cb'), ('\u04cd', '\u04cd'), + ('\u04d0', '\u04d0'), ('\u04d2', '\u04d2'), + ('\u04d4', '\u04d4'), ('\u04d6', '\u04d6'), + ('\u04d8', '\u04d8'), ('\u04da', '\u04da'), + ('\u04dc', '\u04dc'), ('\u04de', '\u04de'), + ('\u04e0', '\u04e0'), ('\u04e2', '\u04e2'), + ('\u04e4', '\u04e4'), ('\u04e6', '\u04e6'), + ('\u04e8', '\u04e8'), ('\u04ea', '\u04ea'), + ('\u04ec', '\u04ec'), ('\u04ee', '\u04ee'), + ('\u04f0', '\u04f0'), ('\u04f2', '\u04f2'), + ('\u04f4', '\u04f4'), ('\u04f6', '\u04f6'), + ('\u04f8', '\u04f8'), ('\u04fa', '\u04fa'), + ('\u04fc', '\u04fc'), ('\u04fe', '\u04fe'), + ('\u0500', '\u0500'), ('\u0502', '\u0502'), + ('\u0504', '\u0504'), ('\u0506', '\u0506'), + ('\u0508', '\u0508'), ('\u050a', '\u050a'), + ('\u050c', '\u050c'), ('\u050e', '\u050e'), + ('\u0510', '\u0510'), ('\u0512', '\u0512'), + ('\u0514', '\u0514'), ('\u0516', '\u0516'), + ('\u0518', '\u0518'), ('\u051a', '\u051a'), + ('\u051c', '\u051c'), ('\u051e', '\u051e'), + ('\u0520', '\u0520'), ('\u0522', '\u0522'), + ('\u0524', '\u0524'), ('\u0526', '\u0526'), + ('\u0531', '\u0556'), ('\u10a0', '\u10c5'), + ('\u10c7', '\u10c7'), ('\u10cd', '\u10cd'), + ('\u1e00', '\u1e00'), ('\u1e02', '\u1e02'), + ('\u1e04', '\u1e04'), ('\u1e06', '\u1e06'), + ('\u1e08', '\u1e08'), ('\u1e0a', '\u1e0a'), + ('\u1e0c', '\u1e0c'), ('\u1e0e', '\u1e0e'), + ('\u1e10', '\u1e10'), ('\u1e12', '\u1e12'), + ('\u1e14', '\u1e14'), ('\u1e16', '\u1e16'), + ('\u1e18', '\u1e18'), ('\u1e1a', '\u1e1a'), + ('\u1e1c', '\u1e1c'), ('\u1e1e', '\u1e1e'), + ('\u1e20', '\u1e20'), ('\u1e22', '\u1e22'), + ('\u1e24', '\u1e24'), ('\u1e26', '\u1e26'), + ('\u1e28', '\u1e28'), ('\u1e2a', '\u1e2a'), + ('\u1e2c', '\u1e2c'), ('\u1e2e', '\u1e2e'), + ('\u1e30', '\u1e30'), ('\u1e32', '\u1e32'), + ('\u1e34', '\u1e34'), ('\u1e36', '\u1e36'), + ('\u1e38', '\u1e38'), ('\u1e3a', '\u1e3a'), + ('\u1e3c', '\u1e3c'), ('\u1e3e', '\u1e3e'), + ('\u1e40', '\u1e40'), ('\u1e42', '\u1e42'), + ('\u1e44', '\u1e44'), ('\u1e46', '\u1e46'), + ('\u1e48', '\u1e48'), ('\u1e4a', '\u1e4a'), + ('\u1e4c', '\u1e4c'), ('\u1e4e', '\u1e4e'), + ('\u1e50', '\u1e50'), ('\u1e52', '\u1e52'), + ('\u1e54', '\u1e54'), ('\u1e56', '\u1e56'), + ('\u1e58', '\u1e58'), ('\u1e5a', '\u1e5a'), + ('\u1e5c', '\u1e5c'), ('\u1e5e', '\u1e5e'), + ('\u1e60', '\u1e60'), ('\u1e62', '\u1e62'), + ('\u1e64', '\u1e64'), ('\u1e66', '\u1e66'), + ('\u1e68', '\u1e68'), ('\u1e6a', '\u1e6a'), + ('\u1e6c', '\u1e6c'), ('\u1e6e', '\u1e6e'), + ('\u1e70', '\u1e70'), ('\u1e72', '\u1e72'), + ('\u1e74', '\u1e74'), ('\u1e76', '\u1e76'), + ('\u1e78', '\u1e78'), ('\u1e7a', '\u1e7a'), + ('\u1e7c', '\u1e7c'), ('\u1e7e', '\u1e7e'), + ('\u1e80', '\u1e80'), ('\u1e82', '\u1e82'), + ('\u1e84', '\u1e84'), ('\u1e86', '\u1e86'), + ('\u1e88', '\u1e88'), ('\u1e8a', '\u1e8a'), + ('\u1e8c', '\u1e8c'), ('\u1e8e', '\u1e8e'), + ('\u1e90', '\u1e90'), ('\u1e92', '\u1e92'), + ('\u1e94', '\u1e94'), ('\u1e9e', '\u1e9e'), + ('\u1ea0', '\u1ea0'), ('\u1ea2', '\u1ea2'), + ('\u1ea4', '\u1ea4'), ('\u1ea6', '\u1ea6'), + ('\u1ea8', '\u1ea8'), ('\u1eaa', '\u1eaa'), + ('\u1eac', '\u1eac'), ('\u1eae', '\u1eae'), + ('\u1eb0', '\u1eb0'), ('\u1eb2', '\u1eb2'), + ('\u1eb4', '\u1eb4'), ('\u1eb6', '\u1eb6'), + ('\u1eb8', '\u1eb8'), ('\u1eba', '\u1eba'), + ('\u1ebc', '\u1ebc'), ('\u1ebe', '\u1ebe'), + ('\u1ec0', '\u1ec0'), ('\u1ec2', '\u1ec2'), + ('\u1ec4', '\u1ec4'), ('\u1ec6', '\u1ec6'), + ('\u1ec8', '\u1ec8'), ('\u1eca', '\u1eca'), + ('\u1ecc', '\u1ecc'), ('\u1ece', '\u1ece'), + ('\u1ed0', '\u1ed0'), ('\u1ed2', '\u1ed2'), + ('\u1ed4', '\u1ed4'), ('\u1ed6', '\u1ed6'), + ('\u1ed8', '\u1ed8'), ('\u1eda', '\u1eda'), + ('\u1edc', '\u1edc'), ('\u1ede', '\u1ede'), + ('\u1ee0', '\u1ee0'), ('\u1ee2', '\u1ee2'), + ('\u1ee4', '\u1ee4'), ('\u1ee6', '\u1ee6'), + ('\u1ee8', '\u1ee8'), ('\u1eea', '\u1eea'), + ('\u1eec', '\u1eec'), ('\u1eee', '\u1eee'), + ('\u1ef0', '\u1ef0'), ('\u1ef2', '\u1ef2'), + ('\u1ef4', '\u1ef4'), ('\u1ef6', '\u1ef6'), + ('\u1ef8', '\u1ef8'), ('\u1efa', '\u1efa'), + ('\u1efc', '\u1efc'), ('\u1efe', '\u1efe'), + ('\u1f08', '\u1f0f'), ('\u1f18', '\u1f1d'), + ('\u1f28', '\u1f2f'), ('\u1f38', '\u1f3f'), + ('\u1f48', '\u1f4d'), ('\u1f59', '\u1f59'), + ('\u1f5b', '\u1f5b'), ('\u1f5d', '\u1f5d'), + ('\u1f5f', '\u1f5f'), ('\u1f68', '\u1f6f'), + ('\u1fb8', '\u1fbb'), ('\u1fc8', '\u1fcb'), + ('\u1fd8', '\u1fdb'), ('\u1fe8', '\u1fec'), + ('\u1ff8', '\u1ffb'), ('\u2102', '\u2102'), + ('\u2107', '\u2107'), ('\u210b', '\u210d'), + ('\u2110', '\u2112'), ('\u2115', '\u2115'), + ('\u2119', '\u211d'), ('\u2124', '\u2124'), + ('\u2126', '\u2126'), ('\u2128', '\u2128'), + ('\u212a', '\u212d'), ('\u2130', '\u2133'), + ('\u213e', '\u213f'), ('\u2145', '\u2145'), + ('\u2160', '\u216f'), ('\u2183', '\u2183'), + ('\u24b6', '\u24cf'), ('\u2c00', '\u2c2e'), + ('\u2c60', '\u2c60'), ('\u2c62', '\u2c64'), + ('\u2c67', '\u2c67'), ('\u2c69', '\u2c69'), + ('\u2c6b', '\u2c6b'), ('\u2c6d', '\u2c70'), + ('\u2c72', '\u2c72'), ('\u2c75', '\u2c75'), + ('\u2c7e', '\u2c80'), ('\u2c82', '\u2c82'), + ('\u2c84', '\u2c84'), ('\u2c86', '\u2c86'), + ('\u2c88', '\u2c88'), ('\u2c8a', '\u2c8a'), + ('\u2c8c', '\u2c8c'), ('\u2c8e', '\u2c8e'), + ('\u2c90', '\u2c90'), ('\u2c92', '\u2c92'), + ('\u2c94', '\u2c94'), ('\u2c96', '\u2c96'), + ('\u2c98', '\u2c98'), ('\u2c9a', '\u2c9a'), + ('\u2c9c', '\u2c9c'), ('\u2c9e', '\u2c9e'), + ('\u2ca0', '\u2ca0'), ('\u2ca2', '\u2ca2'), + ('\u2ca4', '\u2ca4'), ('\u2ca6', '\u2ca6'), + ('\u2ca8', '\u2ca8'), ('\u2caa', '\u2caa'), + ('\u2cac', '\u2cac'), ('\u2cae', '\u2cae'), + ('\u2cb0', '\u2cb0'), ('\u2cb2', '\u2cb2'), + ('\u2cb4', '\u2cb4'), ('\u2cb6', '\u2cb6'), + ('\u2cb8', '\u2cb8'), ('\u2cba', '\u2cba'), + ('\u2cbc', '\u2cbc'), ('\u2cbe', '\u2cbe'), + ('\u2cc0', '\u2cc0'), ('\u2cc2', '\u2cc2'), + ('\u2cc4', '\u2cc4'), ('\u2cc6', '\u2cc6'), + ('\u2cc8', '\u2cc8'), ('\u2cca', '\u2cca'), + ('\u2ccc', '\u2ccc'), ('\u2cce', '\u2cce'), + ('\u2cd0', '\u2cd0'), ('\u2cd2', '\u2cd2'), + ('\u2cd4', '\u2cd4'), ('\u2cd6', '\u2cd6'), + ('\u2cd8', '\u2cd8'), ('\u2cda', '\u2cda'), + ('\u2cdc', '\u2cdc'), ('\u2cde', '\u2cde'), + ('\u2ce0', '\u2ce0'), ('\u2ce2', '\u2ce2'), + ('\u2ceb', '\u2ceb'), ('\u2ced', '\u2ced'), + ('\u2cf2', '\u2cf2'), ('\ua640', '\ua640'), + ('\ua642', '\ua642'), ('\ua644', '\ua644'), + ('\ua646', '\ua646'), ('\ua648', '\ua648'), + ('\ua64a', '\ua64a'), ('\ua64c', '\ua64c'), + ('\ua64e', '\ua64e'), ('\ua650', '\ua650'), + ('\ua652', '\ua652'), ('\ua654', '\ua654'), + ('\ua656', '\ua656'), ('\ua658', '\ua658'), + ('\ua65a', '\ua65a'), ('\ua65c', '\ua65c'), + ('\ua65e', '\ua65e'), ('\ua660', '\ua660'), + ('\ua662', '\ua662'), ('\ua664', '\ua664'), + ('\ua666', '\ua666'), ('\ua668', '\ua668'), + ('\ua66a', '\ua66a'), ('\ua66c', '\ua66c'), + ('\ua680', '\ua680'), ('\ua682', '\ua682'), + ('\ua684', '\ua684'), ('\ua686', '\ua686'), + ('\ua688', '\ua688'), ('\ua68a', '\ua68a'), + ('\ua68c', '\ua68c'), ('\ua68e', '\ua68e'), + ('\ua690', '\ua690'), ('\ua692', '\ua692'), + ('\ua694', '\ua694'), ('\ua696', '\ua696'), + ('\ua722', '\ua722'), ('\ua724', '\ua724'), + ('\ua726', '\ua726'), ('\ua728', '\ua728'), + ('\ua72a', '\ua72a'), ('\ua72c', '\ua72c'), + ('\ua72e', '\ua72e'), ('\ua732', '\ua732'), + ('\ua734', '\ua734'), ('\ua736', '\ua736'), + ('\ua738', '\ua738'), ('\ua73a', '\ua73a'), + ('\ua73c', '\ua73c'), ('\ua73e', '\ua73e'), + ('\ua740', '\ua740'), ('\ua742', '\ua742'), + ('\ua744', '\ua744'), ('\ua746', '\ua746'), + ('\ua748', '\ua748'), ('\ua74a', '\ua74a'), + ('\ua74c', '\ua74c'), ('\ua74e', '\ua74e'), + ('\ua750', '\ua750'), ('\ua752', '\ua752'), + ('\ua754', '\ua754'), ('\ua756', '\ua756'), + ('\ua758', '\ua758'), ('\ua75a', '\ua75a'), + ('\ua75c', '\ua75c'), ('\ua75e', '\ua75e'), + ('\ua760', '\ua760'), ('\ua762', '\ua762'), + ('\ua764', '\ua764'), ('\ua766', '\ua766'), + ('\ua768', '\ua768'), ('\ua76a', '\ua76a'), + ('\ua76c', '\ua76c'), ('\ua76e', '\ua76e'), + ('\ua779', '\ua779'), ('\ua77b', '\ua77b'), + ('\ua77d', '\ua77e'), ('\ua780', '\ua780'), + ('\ua782', '\ua782'), ('\ua784', '\ua784'), + ('\ua786', '\ua786'), ('\ua78b', '\ua78b'), + ('\ua78d', '\ua78d'), ('\ua790', '\ua790'), + ('\ua792', '\ua792'), ('\ua7a0', '\ua7a0'), + ('\ua7a2', '\ua7a2'), ('\ua7a4', '\ua7a4'), + ('\ua7a6', '\ua7a6'), ('\ua7a8', '\ua7a8'), + ('\ua7aa', '\ua7aa'), ('\uff21', '\uff3a'), + ('\U00010400', '\U00010427'), ('\U0001d400', '\U0001d419'), + ('\U0001d434', '\U0001d44d'), ('\U0001d468', '\U0001d481'), + ('\U0001d49c', '\U0001d49c'), ('\U0001d49e', '\U0001d49f'), + ('\U0001d4a2', '\U0001d4a2'), ('\U0001d4a5', '\U0001d4a6'), + ('\U0001d4a9', '\U0001d4ac'), ('\U0001d4ae', '\U0001d4b5'), + ('\U0001d4d0', '\U0001d4e9'), ('\U0001d504', '\U0001d505'), + ('\U0001d507', '\U0001d50a'), ('\U0001d50d', '\U0001d514'), + ('\U0001d516', '\U0001d51c'), ('\U0001d538', '\U0001d539'), + ('\U0001d53b', '\U0001d53e'), ('\U0001d540', '\U0001d544'), + ('\U0001d546', '\U0001d546'), ('\U0001d54a', '\U0001d550'), + ('\U0001d56c', '\U0001d585'), ('\U0001d5a0', '\U0001d5b9'), + ('\U0001d5d4', '\U0001d5ed'), ('\U0001d608', '\U0001d621'), + ('\U0001d63c', '\U0001d655'), ('\U0001d670', '\U0001d689'), + ('\U0001d6a8', '\U0001d6c0'), ('\U0001d6e2', '\U0001d6fa'), + ('\U0001d71c', '\U0001d734'), ('\U0001d756', '\U0001d76e'), + ('\U0001d790', '\U0001d7a8'), ('\U0001d7ca', '\U0001d7ca') + ]; + + pub fn Uppercase(c: char) -> bool { + bsearch_range_table(c, Uppercase_table) + } + static XID_Continue_table : &'static [(char,char)] = &[ ('\x30', '\x39'), ('\x41', '\x5a'), ('\x5f', '\x5f'), ('\x61', '\x7a'), @@ -4856,3 +5490,31 @@ pub mod derived_property { } } +pub mod property { + + fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { + use cmp::{Equal, Less, Greater}; + use vec::ImmutableVector; + use option::None; + r.bsearch(|&(lo,hi)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }) != None + } + + + static White_Space_table : &'static [(char,char)] = &[ + ('\x09', '\x0d'), ('\x20', '\x20'), + ('\x85', '\x85'), ('\xa0', '\xa0'), + ('\u1680', '\u1680'), ('\u2000', '\u200a'), + ('\u2028', '\u2028'), ('\u2029', '\u2029'), + ('\u202f', '\u202f'), ('\u205f', '\u205f'), + ('\u3000', '\u3000') + ]; + + pub fn White_Space(c: char) -> bool { + bsearch_range_table(c, White_Space_table) + } + +} diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp index c60cc1a36caba..15e6e52e3baf3 100644 --- a/src/test/pretty/block-comment-wchar.pp +++ b/src/test/pretty/block-comment-wchar.pp @@ -51,7 +51,7 @@ CR4+2: (should align) */ /* - // (NEL deliberately omitted) + NEL4+2: (should align) */ /* Ogham Space Mark 4+2: (should align) @@ -103,11 +103,10 @@ fn main() { // Taken from http://www.unicode.org/Public/UNIDATA/PropList.txt let chars = - ['\x0A', '\x0B', '\x0C', '\x0D', '\x20', - // '\x85', // for some reason Rust thinks NEL isn't whitespace - '\xA0', '\u1680', '\u2000', '\u2001', '\u2002', '\u2003', '\u2004', - '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', '\u2028', - '\u2029', '\u202F', '\u205F', '\u3000']; + ['\x0A', '\x0B', '\x0C', '\x0D', '\x20', '\x85', '\xA0', '\u1680', + '\u2000', '\u2001', '\u2002', '\u2003', '\u2004', '\u2005', '\u2006', + '\u2007', '\u2008', '\u2009', '\u200A', '\u2028', '\u2029', '\u202F', + '\u205F', '\u3000']; for c in chars.iter() { let ws = c.is_whitespace(); println!("{:?} {:?}" , c , ws); diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs index 52936da41b29f..98b9fd0507799 100644 --- a/src/test/pretty/block-comment-wchar.rs +++ b/src/test/pretty/block-comment-wchar.rs @@ -51,7 +51,7 @@ fn f() { CR4+2: (should align) */ /* - // (NEL deliberately omitted) +………… NEL4+2: (should align) */ /*      Ogham Space Mark 4+2: (should align) @@ -97,8 +97,7 @@ fn f() { fn main() { // Taken from http://www.unicode.org/Public/UNIDATA/PropList.txt let chars = - ['\x0A', '\x0B', '\x0C', '\x0D', '\x20', - // '\x85', // for some reason Rust thinks NEL isn't whitespace + ['\x0A', '\x0B', '\x0C', '\x0D', '\x20', '\x85', '\xA0', '\u1680', '\u2000', '\u2001', '\u2002', '\u2003', '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', '\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];