Skip to content

Commit

Permalink
Auto merge of #33128 - xen0n:more-confusing-unicode-chars, r=nagisa
Browse files Browse the repository at this point in the history
Add more aliases for Unicode confusable chars

Building upon #29837, this PR:

* added aliases for space characters,
* distinguished square brackets from parens, and
* added common CJK punctuation characters as aliases.

This will especially help CJK users who may have forgotten to switch off IME when coding.
  • Loading branch information
bors committed May 5, 2016
2 parents 3f65afa + 496081c commit 413bafd
Showing 1 changed file with 53 additions and 6 deletions.
59 changes: 53 additions & 6 deletions src/libsyntax/parse/lexer/unicode_chars.rs
Expand Up @@ -16,6 +16,22 @@ use errors::DiagnosticBuilder;
use super::StringReader;

const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
(' ', "No-Break Space", ' '),
(' ', "Ogham Space Mark", ' '),
(' ', "En Quad", ' '),
(' ', "Em Quad", ' '),
(' ', "En Space", ' '),
(' ', "Em Space", ' '),
(' ', "Three-Per-Em Space", ' '),
(' ', "Four-Per-Em Space", ' '),
(' ', "Six-Per-Em Space", ' '),
(' ', "Figure Space", ' '),
(' ', "Punctuation Space", ' '),
(' ', "Thin Space", ' '),
(' ', "Hair Space", ' '),
(' ', "Narrow No-Break Space", ' '),
(' ', "Medium Mathematical Space", ' '),
(' ', "Ideographic Space", ' '),
('ߺ', "Nko Lajanyalan", '_'),
('﹍', "Dashed Low Line", '_'),
('﹎', "Centreline Low Line", '_'),
Expand All @@ -24,14 +40,18 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('‑', "Non-Breaking Hyphen", '-'),
('‒', "Figure Dash", '-'),
('–', "En Dash", '-'),
('—', "Em Dash", '-'),
('﹘', "Small Em Dash", '-'),
('⁃', "Hyphen Bullet", '-'),
('˗', "Modifier Letter Minus Sign", '-'),
('−', "Minus Sign", '-'),
('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'),
('٫', "Arabic Decimal Separator", ','),
('‚', "Single Low-9 Quotation Mark", ','),
('ꓹ', "Lisu Letter Tone Na Po", ','),
(',', "Fullwidth Comma", ','),
(';', "Greek Question Mark", ';'),
(';', "Fullwidth Semicolon", ';'),
('ः', "Devanagari Sign Visarga", ':'),
('ઃ', "Gujarati Sign Visarga", ':'),
(':', "Fullwidth Colon", ':'),
Expand All @@ -53,16 +73,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('ʔ', "Latin Letter Glottal Stop", '?'),
('ॽ', "Devanagari Letter Glottal Stop", '?'),
('Ꭾ', "Cherokee Letter He", '?'),
('?', "Fullwidth Question Mark", '?'),
('𝅭', "Musical Symbol Combining Augmentation Dot", '.'),
('․', "One Dot Leader", '.'),
('۔', "Arabic Full Stop", '.'),
('܁', "Syriac Supralinear Full Stop", '.'),
('܂', "Syriac Sublinear Full Stop", '.'),
('꘎', "Vai Full Stop", '.'),
('𐩐', "Kharoshthi Punctuation Dot", '.'),
('·', "Middle Dot", '.'),
('٠', "Arabic-Indic Digit Zero", '.'),
('۰', "Extended Arabic-Indic Digit Zero", '.'),
('ꓸ', "Lisu Letter Tone Mya Ti", '.'),
('。', "Ideographic Full Stop", '.'),
('・', "Katakana Middle Dot", '.'),
('՝', "Armenian Comma", '\''),
(''', "Fullwidth Apostrophe", '\''),
('‘', "Left Single Quotation Mark", '\''),
Expand Down Expand Up @@ -108,16 +132,30 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('ײ', "Hebrew Ligature Yiddish Double Yod", '"'),
('❞', "Heavy Double Comma Quotation Mark Ornament", '"'),
('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'),
('[', "Fullwidth Left Square Bracket", '('),
('❨', "Medium Left Parenthesis Ornament", '('),
('❲', "Light Left Tortoise Shell Bracket Ornament", '('),
('〔', "Left Tortoise Shell Bracket", '('),
('﴾', "Ornate Left Parenthesis", '('),
('', "Fullwidth Right Square Bracket", ')'),
('', "Fullwidth Left Parenthesis", '('),
('❩', "Medium Right Parenthesis Ornament", ')'),
('❳', "Light Right Tortoise Shell Bracket Ornament", ')'),
('〕', "Right Tortoise Shell Bracket", ')'),
('﴿', "Ornate Right Parenthesis", ')'),
(')', "Fullwidth Right Parenthesis", ')'),
('[', "Fullwidth Left Square Bracket", '['),
('❲', "Light Left Tortoise Shell Bracket Ornament", '['),
('「', "Left Corner Bracket", '['),
('『', "Left White Corner Bracket", '['),
('【', "Left Black Lenticular Bracket", '['),
('〔', "Left Tortoise Shell Bracket", '['),
('〖', "Left White Lenticular Bracket", '['),
('〘', "Left White Tortoise Shell Bracket", '['),
('〚', "Left White Square Bracket", '['),
(']', "Fullwidth Right Square Bracket", ']'),
('❳', "Light Right Tortoise Shell Bracket Ornament", ']'),
('」', "Right Corner Bracket", ']'),
('』', "Right White Corner Bracket", ']'),
('】', "Right Black Lenticular Bracket", ']'),
('〕', "Right Tortoise Shell Bracket", ']'),
('〗', "Right White Lenticular Bracket", ']'),
('〙', "Right White Tortoise Shell Bracket", ']'),
('〛', "Right White Square Bracket", ']'),
('❴', "Medium Left Curly Bracket Ornament", '{'),
('❵', "Medium Right Curly Bracket Ornament", '}'),
('⁎', "Low Asterisk", '*'),
Expand All @@ -140,6 +178,8 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('⟍', "Mathematical Falling Diagonal", '\\'),
('⧵', "Reverse Solidus Operator", '\\'),
('⧹', "Big Reverse Solidus", '\\'),
('、', "Ideographic Comma", '\\'),
('ヽ', "Katakana Iteration Mark", '\\'),
('㇔', "Cjk Stroke D", '\\'),
('丶', "Cjk Unified Ideograph-4E36", '\\'),
('⼂', "Kangxi Radical Dot", '\\'),
Expand All @@ -148,15 +188,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[
('‹', "Single Left-Pointing Angle Quotation Mark", '<'),
('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'),
('˂', "Modifier Letter Left Arrowhead", '<'),
('〈', "Left Angle Bracket", '<'),
('《', "Left Double Angle Bracket", '<'),
('꓿', "Lisu Punctuation Full Stop", '='),
('›', "Single Right-Pointing Angle Quotation Mark", '>'),
('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'),
('˃', "Modifier Letter Right Arrowhead", '>'),
('〉', "Right Angle Bracket", '>'),
('》', "Right Double Angle Bracket", '>'),
('Ⲻ', "Coptic Capital Letter Dialect-P Ni", '-'),
('Ɂ', "Latin Capital Letter Glottal Stop", '?'),
('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ];

const ASCII_ARRAY: &'static [(char, &'static str)] = &[
(' ', "Space"),
('_', "Underscore"),
('-', "Minus/Hyphen"),
(',', "Comma"),
Expand All @@ -169,6 +214,8 @@ const ASCII_ARRAY: &'static [(char, &'static str)] = &[
('"', "Quotation Mark"),
('(', "Left Parenthesis"),
(')', "Right Parenthesis"),
('[', "Left Square Bracket"),
(']', "Right Square Bracket"),
('{', "Left Curly Brace"),
('}', "Right Curly Brace"),
('*', "Asterisk"),
Expand Down

0 comments on commit 413bafd

Please sign in to comment.