From e9348738fca8c88dca6e343702eaa12ffc8df34c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 18 Apr 2018 19:36:48 -0700 Subject: [PATCH] proc_macro: Stay on the "use the cache" path more Discovered in #50061 we're falling off the "happy path" of using a stringified token stream more often than we should. This was due to the fact that a user-written token like `0xf` is equality-different from the stringified token of `15` (despite being semantically equivalent). This patch updates the call to `eq_unspanned` with an even more awful solution, `probably_equal_for_proc_macro`, which ignores the value of each token and basically only compares the structure of the token stream, assuming that the AST doesn't change just one token at a time. While this is a step towards fixing #50061 there is still one regression from #49154 which needs to be fixed. --- src/libsyntax/parse/token.rs | 100 ++++++++++++++++-- src/libsyntax/tokenstream.rs | 34 ++++++ .../proc-macro/attribute-with-error.rs | 2 + 3 files changed, 127 insertions(+), 9 deletions(-) diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 2688a1adb5676..44394384c7a7e 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -26,6 +26,7 @@ use tokenstream::{TokenStream, TokenTree}; use tokenstream; use std::{cmp, fmt}; +use std::mem; use rustc_data_structures::sync::{Lrc, Lock}; #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)] @@ -88,6 +89,12 @@ impl Lit { ByteStr(_) | ByteStrRaw(..) => "byte string" } } + + // See comments in `interpolated_to_tokenstream` for why we care about + // *probably* equal here rather than actual equality + fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool { + mem::discriminant(self) == mem::discriminant(other) + } } pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool { @@ -530,14 +537,6 @@ impl Token { // stream they came from. Here we attempt to extract these // lossless token streams before we fall back to the // stringification. - // - // During early phases of the compiler, though, the AST could - // get modified directly (e.g. attributes added or removed) and - // the internal cache of tokens my not be invalidated or - // updated. Consequently if the "lossless" token stream - // disagrees with our actuall stringification (which has - // historically been much more battle-tested) then we go with - // the lossy stream anyway (losing span information). let mut tokens = None; match nt.0 { @@ -569,13 +568,96 @@ impl Token { let source = pprust::token_to_string(self); parse_stream_from_source_str(FileName::MacroExpansion, source, sess, Some(span)) }); + + // During early phases of the compiler the AST could get modified + // directly (e.g. attributes added or removed) and the internal cache + // of tokens my not be invalidated or updated. Consequently if the + // "lossless" token stream disagrees with our actual stringification + // (which has historically been much more battle-tested) then we go + // with the lossy stream anyway (losing span information). + // + // Note that the comparison isn't `==` here to avoid comparing spans, + // but it *also* is a "probable" equality which is a pretty weird + // definition. We mostly want to catch actual changes to the AST + // like a `#[cfg]` being processed or some weird `macro_rules!` + // expansion. + // + // What we *don't* want to catch is the fact that a user-defined + // literal like `0xf` is stringified as `15`, causing the cached token + // stream to not be literal `==` token-wise (ignoring spans) to the + // token stream we got from stringification. + // + // Instead the "probably equal" check here is "does each token + // recursively have the same discriminant?" We basically don't look at + // the token values here and assume that such fine grained modifications + // of token streams doesn't happen. if let Some(tokens) = tokens { - if tokens.eq_unspanned(&tokens_for_real) { + if tokens.probably_equal_for_proc_macro(&tokens_for_real) { return tokens } } return tokens_for_real } + + // See comments in `interpolated_to_tokenstream` for why we care about + // *probably* equal here rather than actual equality + pub fn probably_equal_for_proc_macro(&self, other: &Token) -> bool { + if mem::discriminant(self) != mem::discriminant(other) { + return false + } + match (self, other) { + (&Eq, &Eq) | + (&Lt, &Lt) | + (&Le, &Le) | + (&EqEq, &EqEq) | + (&Ne, &Ne) | + (&Ge, &Ge) | + (&Gt, &Gt) | + (&AndAnd, &AndAnd) | + (&OrOr, &OrOr) | + (&Not, &Not) | + (&Tilde, &Tilde) | + (&At, &At) | + (&Dot, &Dot) | + (&DotDot, &DotDot) | + (&DotDotDot, &DotDotDot) | + (&DotDotEq, &DotDotEq) | + (&DotEq, &DotEq) | + (&Comma, &Comma) | + (&Semi, &Semi) | + (&Colon, &Colon) | + (&ModSep, &ModSep) | + (&RArrow, &RArrow) | + (&LArrow, &LArrow) | + (&FatArrow, &FatArrow) | + (&Pound, &Pound) | + (&Dollar, &Dollar) | + (&Question, &Question) | + (&Whitespace, &Whitespace) | + (&Comment, &Comment) | + (&Eof, &Eof) => true, + + (&BinOp(a), &BinOp(b)) | + (&BinOpEq(a), &BinOpEq(b)) => a == b, + + (&OpenDelim(a), &OpenDelim(b)) | + (&CloseDelim(a), &CloseDelim(b)) => a == b, + + (&DocComment(a), &DocComment(b)) | + (&Shebang(a), &Shebang(b)) => a == b, + + (&Lifetime(a), &Lifetime(b)) => a.name == b.name, + (&Ident(a, b), &Ident(c, d)) => a.name == c.name && b == d, + + (&Literal(ref a, b), &Literal(ref c, d)) => { + b == d && a.probably_equal_for_proc_macro(c) + } + + (&Interpolated(_), &Interpolated(_)) => false, + + _ => panic!("forgot to add a token?"), + } + } } #[derive(Clone, RustcEncodable, RustcDecodable, Eq, Hash)] diff --git a/src/libsyntax/tokenstream.rs b/src/libsyntax/tokenstream.rs index 6ac04b3cdf6c4..e2b5c4e1adfdb 100644 --- a/src/libsyntax/tokenstream.rs +++ b/src/libsyntax/tokenstream.rs @@ -124,6 +124,24 @@ impl TokenTree { } } + // See comments in `interpolated_to_tokenstream` for why we care about + // *probably* equal here rather than actual equality + // + // This is otherwise the same as `eq_unspanned`, only recursing with a + // different method. + pub fn probably_equal_for_proc_macro(&self, other: &TokenTree) -> bool { + match (self, other) { + (&TokenTree::Token(_, ref tk), &TokenTree::Token(_, ref tk2)) => { + tk.probably_equal_for_proc_macro(tk2) + } + (&TokenTree::Delimited(_, ref dl), &TokenTree::Delimited(_, ref dl2)) => { + dl.delim == dl2.delim && + dl.stream().probably_equal_for_proc_macro(&dl2.stream()) + } + (_, _) => false, + } + } + /// Retrieve the TokenTree's span. pub fn span(&self) -> Span { match *self { @@ -250,6 +268,22 @@ impl TokenStream { t1.next().is_none() && t2.next().is_none() } + // See comments in `interpolated_to_tokenstream` for why we care about + // *probably* equal here rather than actual equality + // + // This is otherwise the same as `eq_unspanned`, only recursing with a + // different method. + pub fn probably_equal_for_proc_macro(&self, other: &TokenStream) -> bool { + let mut t1 = self.trees(); + let mut t2 = other.trees(); + for (t1, t2) in t1.by_ref().zip(t2.by_ref()) { + if !t1.probably_equal_for_proc_macro(&t2) { + return false; + } + } + t1.next().is_none() && t2.next().is_none() + } + /// Precondition: `self` consists of a single token tree. /// Returns true if the token tree is a joint operation w.r.t. `proc_macro::TokenNode`. pub fn as_tree(self) -> (TokenTree, bool /* joint? */) { diff --git a/src/test/compile-fail-fulldeps/proc-macro/attribute-with-error.rs b/src/test/compile-fail-fulldeps/proc-macro/attribute-with-error.rs index 00a27818327f6..edfedebf870cf 100644 --- a/src/test/compile-fail-fulldeps/proc-macro/attribute-with-error.rs +++ b/src/test/compile-fail-fulldeps/proc-macro/attribute-with-error.rs @@ -21,6 +21,8 @@ use attribute_with_error::foo; fn test1() { let a: i32 = "foo"; //~^ ERROR: mismatched types + let b: i32 = "f'oo"; + //~^ ERROR: mismatched types } fn test2() {