From 9f145022ef9390e6cfb0b2416744a393d62d3350 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 18 Apr 2018 14:09:27 +1000 Subject: [PATCH] Avoid allocating when parsing \u{...} literals. `char_lit` uses an allocation in order to ignore '_' chars in \u{...} literals. This patch changes it to not do that by processing the chars more directly. This improves various rustc-perf benchmark measurements by up to 6%, particularly regex, futures, clap, coercions, hyper, and encoding. --- src/libsyntax/parse/mod.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index ff63c9a5c6d53..0397c3297db0a 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -271,8 +271,16 @@ pub fn char_lit(lit: &str, diag: Option<(Span, &Handler)>) -> (char, isize) { 'u' => { assert_eq!(lit.as_bytes()[2], b'{'); let idx = lit.find('}').unwrap(); - let s = &lit[3..idx].chars().filter(|&c| c != '_').collect::(); - let v = u32::from_str_radix(&s, 16).unwrap(); + + // All digits and '_' are ascii, so treat each byte as a char. + let mut v: u32 = 0; + for c in lit[3..idx].bytes() { + let c = char::from(c); + if c != '_' { + let x = c.to_digit(16).unwrap(); + v = v.checked_mul(16).unwrap().checked_add(x).unwrap(); + } + } let c = char::from_u32(v).unwrap_or_else(|| { if let Some((span, diag)) = diag { let mut diag = diag.struct_span_err(span, "invalid unicode character escape");