Auto merge of #33128 - xen0n:more-confusing-unicode-chars, r=nagisa

Add more aliases for Unicode confusable chars Building upon #29837, this PR: * added aliases for space characters, * distinguished square brackets from parens, and * added common CJK punctuation characters as aliases. This will especially help CJK users who may have forgotten to switch off IME when coding.
rust-lang · May 5, 2016 · 413bafd · 413bafd
2 parents 3f65afa + 496081c
commit 413bafd
Showing 1 changed file with 53 additions and 6 deletions.
diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs
@@ -16,6 +16,22 @@ use errors::DiagnosticBuilder;
 use super::StringReader;
 
 const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
+    (' ', "No-Break Space", ' '),
+    (' ', "Ogham Space Mark", ' '),
+    (' ', "En Quad", ' '),
+    (' ', "Em Quad", ' '),
+    (' ', "En Space", ' '),
+    (' ', "Em Space", ' '),
+    (' ', "Three-Per-Em Space", ' '),
+    (' ', "Four-Per-Em Space", ' '),
+    (' ', "Six-Per-Em Space", ' '),
+    (' ', "Figure Space", ' '),
+    (' ', "Punctuation Space", ' '),
+    (' ', "Thin Space", ' '),
+    (' ', "Hair Space", ' '),
+    (' ', "Narrow No-Break Space", ' '),
+    (' ', "Medium Mathematical Space", ' '),
+    ('　', "Ideographic Space", ' '),
     ('ߺ', "Nko Lajanyalan", '_'),
     ('﹍', "Dashed Low Line", '_'),
     ('﹎', "Centreline Low Line", '_'),
@@ -24,14 +40,18 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('‑', "Non-Breaking Hyphen", '-'),
     ('‒', "Figure Dash", '-'),
     ('–', "En Dash", '-'),
+    ('—', "Em Dash", '-'),
     ('﹘', "Small Em Dash", '-'),
     ('⁃', "Hyphen Bullet", '-'),
     ('˗', "Modifier Letter Minus Sign", '-'),
     ('−', "Minus Sign", '-'),
+    ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
     ('٫', "Arabic Decimal Separator", ','),
     ('‚', "Single Low-9 Quotation Mark", ','),
     ('ꓹ', "Lisu Letter Tone Na Po", ','),
+    ('，', "Fullwidth Comma", ','),
     (';', "Greek Question Mark", ';'),
+    ('；', "Fullwidth Semicolon", ';'),
     ('ः', "Devanagari Sign Visarga", ':'),
     ('ઃ', "Gujarati Sign Visarga", ':'),
     ('：', "Fullwidth Colon", ':'),
@@ -53,16 +73,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('ʔ', "Latin Letter Glottal Stop", '?'),
     ('ॽ', "Devanagari Letter Glottal Stop", '?'),
     ('Ꭾ', "Cherokee Letter He", '?'),
+    ('？', "Fullwidth Question Mark", '?'),
     ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
     ('․', "One Dot Leader", '.'),
     ('۔', "Arabic Full Stop", '.'),
     ('܁', "Syriac Supralinear Full Stop", '.'),
     ('܂', "Syriac Sublinear Full Stop", '.'),
     ('꘎', "Vai Full Stop", '.'),
     ('𐩐', "Kharoshthi Punctuation Dot", '.'),
+    ('·', "Middle Dot", '.'),
     ('٠', "Arabic-Indic Digit Zero", '.'),
     ('۰', "Extended Arabic-Indic Digit Zero", '.'),
     ('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
+    ('。', "Ideographic Full Stop", '.'),
+    ('・', "Katakana Middle Dot", '.'),
     ('՝', "Armenian Comma", '\''),
     ('＇', "Fullwidth Apostrophe", '\''),
     ('‘', "Left Single Quotation Mark", '\''),
@@ -108,16 +132,30 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
     ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
     ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
-    ('［', "Fullwidth Left Square Bracket", '('),
     ('❨', "Medium Left Parenthesis Ornament", '('),
-    ('❲', "Light Left Tortoise Shell Bracket Ornament", '('),
-    ('〔', "Left Tortoise Shell Bracket", '('),
     ('﴾', "Ornate Left Parenthesis", '('),
-    ('］', "Fullwidth Right Square Bracket", ')'),
+    ('（', "Fullwidth Left Parenthesis", '('),
     ('❩', "Medium Right Parenthesis Ornament", ')'),
-    ('❳', "Light Right Tortoise Shell Bracket Ornament", ')'),
-    ('〕', "Right Tortoise Shell Bracket", ')'),
     ('﴿', "Ornate Right Parenthesis", ')'),
+    ('）', "Fullwidth Right Parenthesis", ')'),
+    ('［', "Fullwidth Left Square Bracket", '['),
+    ('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
+    ('「', "Left Corner Bracket", '['),
+    ('『', "Left White Corner Bracket", '['),
+    ('【', "Left Black Lenticular Bracket", '['),
+    ('〔', "Left Tortoise Shell Bracket", '['),
+    ('〖', "Left White Lenticular Bracket", '['),
+    ('〘', "Left White Tortoise Shell Bracket", '['),
+    ('〚', "Left White Square Bracket", '['),
+    ('］', "Fullwidth Right Square Bracket", ']'),
+    ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
+    ('」', "Right Corner Bracket", ']'),
+    ('』', "Right White Corner Bracket", ']'),
+    ('】', "Right Black Lenticular Bracket", ']'),
+    ('〕', "Right Tortoise Shell Bracket", ']'),
+    ('〗', "Right White Lenticular Bracket", ']'),
+    ('〙', "Right White Tortoise Shell Bracket", ']'),
+    ('〛', "Right White Square Bracket", ']'),
     ('❴', "Medium Left Curly Bracket Ornament", '{'),
     ('❵', "Medium Right Curly Bracket Ornament", '}'),
     ('⁎', "Low Asterisk", '*'),
@@ -140,6 +178,8 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('⟍', "Mathematical Falling Diagonal", '\\'),
     ('⧵', "Reverse Solidus Operator", '\\'),
     ('⧹', "Big Reverse Solidus", '\\'),
+    ('、', "Ideographic Comma", '\\'),
+    ('ヽ', "Katakana Iteration Mark", '\\'),
     ('㇔', "Cjk Stroke D", '\\'),
     ('丶', "Cjk Unified Ideograph-4E36", '\\'),
     ('⼂', "Kangxi Radical Dot", '\\'),
@@ -148,15 +188,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
     ('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
     ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
     ('˂', "Modifier Letter Left Arrowhead", '<'),
+    ('〈', "Left Angle Bracket", '<'),
+    ('《', "Left Double Angle Bracket", '<'),
     ('꓿', "Lisu Punctuation Full Stop", '='),
     ('›', "Single Right-Pointing Angle Quotation Mark", '>'),
     ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
     ('˃', "Modifier Letter Right Arrowhead", '>'),
+    ('〉', "Right Angle Bracket", '>'),
+    ('》', "Right Double Angle Bracket", '>'),
     ('Ⲻ', "Coptic Capital Letter Dialect-P Ni", '-'),
     ('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
     ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ];
 
 const ASCII_ARRAY: &'static [(char, &'static str)] = &[
+    (' ', "Space"),
     ('_', "Underscore"),
     ('-', "Minus/Hyphen"),
     (',', "Comma"),
@@ -169,6 +214,8 @@ const ASCII_ARRAY: &'static [(char, &'static str)] = &[
     ('"', "Quotation Mark"),
     ('(', "Left Parenthesis"),
     (')', "Right Parenthesis"),
+    ('[', "Left Square Bracket"),
+    (']', "Right Square Bracket"),
     ('{', "Left Curly Brace"),
     ('}', "Right Curly Brace"),
     ('*', "Asterisk"),