Skip to content

Commit

Permalink
Only escape extended grapheme characters in the first position
Browse files Browse the repository at this point in the history
  • Loading branch information
varkor committed May 21, 2018
1 parent 8c89e7f commit c51f002
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 15 deletions.
5 changes: 4 additions & 1 deletion src/liballoc/str.rs
Expand Up @@ -372,12 +372,15 @@ impl str {

/// Escapes each char in `s` with [`char::escape_debug`].
///
/// Note: only extended grapheme codepoints that begin the string will be
/// escaped.
///
/// [`char::escape_debug`]: primitive.char.html#method.escape_debug
#[unstable(feature = "str_escape",
reason = "return type may change to be an iterator",
issue = "27791")]
pub fn escape_debug(&self) -> String {
self.chars().flat_map(|c| c.escape_debug()).collect()
self.chars().enumerate().flat_map(|(i, c)| c.escape_debug_ext(i == 0)).collect()
}

/// Escapes each char in `s` with [`char::escape_default`].
Expand Down
2 changes: 1 addition & 1 deletion src/liballoc/tests/str.rs
Expand Up @@ -999,7 +999,7 @@ fn test_escape_debug() {
assert_eq!("\u{10000}\u{10ffff}".escape_debug(), "\u{10000}\\u{10ffff}");
assert_eq!("ab\u{200b}".escape_debug(), "ab\\u{200b}");
assert_eq!("\u{10d4ea}\r".escape_debug(), "\\u{10d4ea}\\r");
assert_eq!("\u{301}a\u{301}\u{e000}".escape_debug(), "\\u{301}a\\u{301}bé\\u{e000}");
assert_eq!("\u{301}a\u{301}\u{e000}".escape_debug(), "\\u{301}a\u{301}\\u{e000}");
}

#[test]
Expand Down
34 changes: 22 additions & 12 deletions src/libcore/char/methods.rs
Expand Up @@ -187,6 +187,27 @@ impl char {
}
}

/// An extended version of `escape_debug` that optionally permits escaping
/// Extended Grapheme codepoints. This allows us to format characters like
/// nonspacing marks better when they're at the start of a string.
#[doc(hidden)]
#[unstable(feature = "str_internals", issue = "0")]
#[inline]
pub fn escape_debug_ext(self, escape_grapheme_extended: bool) -> EscapeDebug {
let init_state = match self {
'\t' => EscapeDefaultState::Backslash('t'),
'\r' => EscapeDefaultState::Backslash('r'),
'\n' => EscapeDefaultState::Backslash('n'),
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
_ if escape_grapheme_extended && self.is_grapheme_extended() => {
EscapeDefaultState::Unicode(self.escape_unicode())
}
_ if is_printable(self) => EscapeDefaultState::Char(self),
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
};
EscapeDebug(EscapeDefault { state: init_state })
}

/// Returns an iterator that yields the literal escape code of a character
/// as `char`s.
///
Expand Down Expand Up @@ -224,18 +245,7 @@ impl char {
#[stable(feature = "char_escape_debug", since = "1.20.0")]
#[inline]
pub fn escape_debug(self) -> EscapeDebug {
let init_state = match self {
'\t' => EscapeDefaultState::Backslash('t'),
'\r' => EscapeDefaultState::Backslash('r'),
'\n' => EscapeDefaultState::Backslash('n'),
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
_ if self.is_grapheme_extended() => {
EscapeDefaultState::Unicode(self.escape_unicode())
}
_ if is_printable(self) => EscapeDefaultState::Char(self),
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
};
EscapeDebug(EscapeDefault { state: init_state })
self.escape_debug_ext(true)
}

/// Returns an iterator that yields the literal escape code of a character
Expand Down
2 changes: 1 addition & 1 deletion src/libcore/tests/char.rs
Expand Up @@ -181,7 +181,7 @@ fn test_escape_debug() {
assert_eq!(string('\u{ff}'), "\u{ff}");
assert_eq!(string('\u{11b}'), "\u{11b}");
assert_eq!(string('\u{1d4b6}'), "\u{1d4b6}");
assert_eq!(string('\u{301}'), "'\\u{301}'"); // combining character
assert_eq!(string('\u{301}'), "\\u{301}"); // combining character
assert_eq!(string('\u{200b}'),"\\u{200b}"); // zero width space
assert_eq!(string('\u{e000}'), "\\u{e000}"); // private use 1
assert_eq!(string('\u{100000}'), "\\u{100000}"); // private use 2
Expand Down

0 comments on commit c51f002

Please sign in to comment.