From 2af0218bf1ffca0750a352554f20a07b760a30a8 Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Wed, 20 May 2020 00:13:24 -0400 Subject: [PATCH] Recursively expand nonterminals --- src/librustc_ast/tokenstream.rs | 2 - src/librustc_parse/lib.rs | 109 ++++++++++++++++++++++++++++---- 2 files changed, 95 insertions(+), 16 deletions(-) diff --git a/src/librustc_ast/tokenstream.rs b/src/librustc_ast/tokenstream.rs index 7348c41dbe778..ff3469930c6d3 100644 --- a/src/librustc_ast/tokenstream.rs +++ b/src/librustc_ast/tokenstream.rs @@ -290,8 +290,6 @@ impl TokenStream { t1.next().is_none() && t2.next().is_none() } - - pub fn map_enumerated TokenTree>(self, mut f: F) -> TokenStream { TokenStream(Lrc::new( self.0 diff --git a/src/librustc_parse/lib.rs b/src/librustc_parse/lib.rs index 9d548fa0e81ad..352278d252616 100644 --- a/src/librustc_parse/lib.rs +++ b/src/librustc_parse/lib.rs @@ -7,20 +7,20 @@ #![feature(or_patterns)] use rustc_ast::ast; -use rustc_ast::token::{self, Nonterminal, Token, TokenKind, DelimToken}; -use rustc_ast::tokenstream::{self, TokenStream, TokenTree}; +use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind}; +use rustc_ast::tokenstream::{self, IsJoint, TokenStream, TokenTree}; use rustc_ast_pretty::pprust; use rustc_data_structures::sync::Lrc; use rustc_errors::{Diagnostic, FatalError, Level, PResult}; use rustc_session::parse::ParseSess; -use rustc_span::{FileName, SourceFile, Span}; use rustc_span::symbol::kw; +use rustc_span::{FileName, SourceFile, Span}; +use std::mem; use std::path::Path; use std::str; -use std::mem; -use log::info; +use log::{debug, info}; pub const MACRO_ARGUMENTS: Option<&'static str> = Some("macro arguments"); @@ -302,7 +302,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke // modifications, including adding/removing typically non-semantic // tokens such as extra braces and commas, don't happen. if let Some(tokens) = tokens { - if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real) { + if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real, sess) { return tokens; } info!( @@ -381,7 +381,11 @@ fn prepend_attrs( // // This is otherwise the same as `eq_unspanned`, only recursing with a // different method. -pub fn tokenstream_probably_equal_for_proc_macro(first: &TokenStream, other: &TokenStream) -> bool { +pub fn tokenstream_probably_equal_for_proc_macro( + first: &TokenStream, + other: &TokenStream, + sess: &ParseSess, +) -> bool { // When checking for `probably_eq`, we ignore certain tokens that aren't // preserved in the AST. Because they are not preserved, the pretty // printer arbitrarily adds or removes them when printing as token @@ -409,10 +413,83 @@ pub fn tokenstream_probably_equal_for_proc_macro(first: &TokenStream, other: &To true } - let mut t1 = first.trees().filter(semantic_tree); - let mut t2 = other.trees().filter(semantic_tree); + // When comparing two `TokenStream`s, we ignore the `IsJoint` information. + // + // However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will + // use `Token.glue` on adjacent tokens with the proper `IsJoint`. + // Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`) + // and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent + // when determining if two `TokenStream`s are 'probably equal'. + // + // Therefore, we use `break_two_token_op` to convert all tokens + // to the 'unglued' form (if it exists). This ensures that two + // `TokenStream`s which differ only in how their tokens are glued + // will be considered 'probably equal', which allows us to keep spans. + // + // This is important when the original `TokenStream` contained + // extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces + // will be omitted when we pretty-print, which can cause the original + // and reparsed `TokenStream`s to differ in the assignment of `IsJoint`, + // leading to some tokens being 'glued' together in one stream but not + // the other. See #68489 for more details. + fn break_tokens(tree: TokenTree) -> impl Iterator { + // In almost all cases, we should have either zero or one levels + // of 'unglueing'. However, in some unusual cases, we may need + // to iterate breaking tokens mutliple times. For example: + // '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]' + let mut token_trees: SmallVec<[_; 2]>; + if let TokenTree::Token(token) = &tree { + let mut out = SmallVec::<[_; 2]>::new(); + out.push(token.clone()); + // Iterate to fixpoint: + // * We start off with 'out' containing our initial token, and `temp` empty + // * If we are able to break any tokens in `out`, then `out` will have + // at least one more element than 'temp', so we will try to break tokens + // again. + // * If we cannot break any tokens in 'out', we are done + loop { + let mut temp = SmallVec::<[_; 2]>::new(); + let mut changed = false; + + for token in out.into_iter() { + if let Some((first, second)) = token.kind.break_two_token_op() { + temp.push(Token::new(first, DUMMY_SP)); + temp.push(Token::new(second, DUMMY_SP)); + changed = true; + } else { + temp.push(token); + } + } + out = temp; + if !changed { + break; + } + } + token_trees = out.into_iter().map(|t| TokenTree::Token(t)).collect(); + if token_trees.len() != 1 { + debug!("break_tokens: broke {:?} to {:?}", tree, token_trees); + } + } else { + token_trees = SmallVec::new(); + token_trees.push(tree); + } + token_trees.into_iter() + } + + let expand_nt = |tree: TokenTree| { + if let TokenTree::Token(Token { kind: TokenKind::Interpolated(nt), span }) = &tree { + nt_to_tokenstream(nt, sess, *span).into_trees() + } else { + TokenStream::new(vec![(tree, IsJoint::NonJoint)]).into_trees() + } + }; + + // Break tokens after we expand any nonterminals, so that we break tokens + // that are produced as a result of nonterminal expansion. + let mut t1 = first.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens); + let mut t2 = other.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens); for (t1, t2) in t1.by_ref().zip(t2.by_ref()) { - if !tokentree_probably_equal_for_proc_macro(&t1, &t2) { + if !tokentree_probably_equal_for_proc_macro(&t1, &t2, sess) { return false; } } @@ -471,25 +548,29 @@ crate fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bo b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate) } - (&Interpolated(_), &Interpolated(_)) => false, + // Expanded by `tokenstream_probably_equal_for_proc_macro` + (&Interpolated(_), &Interpolated(_)) => unreachable!(), _ => panic!("forgot to add a token?"), } } - // See comments in `Nonterminal::to_tokenstream` for why we care about // *probably* equal here rather than actual equality // // This is otherwise the same as `eq_unspanned`, only recursing with a // different method. -pub fn tokentree_probably_equal_for_proc_macro(first: &TokenTree, other: &TokenTree) -> bool { +pub fn tokentree_probably_equal_for_proc_macro( + first: &TokenTree, + other: &TokenTree, + sess: &ParseSess, +) -> bool { match (first, other) { (TokenTree::Token(token), TokenTree::Token(token2)) => { token_probably_equal_for_proc_macro(token, token2) } (TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => { - delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2) + delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2, sess) } _ => false, }