Move 'probably equal' methods to librustc_parse

This is preparation for PR #73084
rust-lang · Aug 2, 2020 · 183947f · 183947f
1 parent 5ef872f
commit 183947f
Show file tree

Hide file tree

Showing 5 changed files with 187 additions and 184 deletions.
diff --git a/Cargo.lock b/Cargo.lock
@@ -3808,6 +3808,7 @@ dependencies = [
  "rustc_lexer",
  "rustc_session",
  "rustc_span",
+ "smallvec 1.4.0",
  "tracing",
  "unicode-normalization",
 ]

diff --git a/src/librustc_ast/token.rs b/src/librustc_ast/token.rs
@@ -673,62 +673,6 @@ impl Token {
 
         Some(Token::new(kind, self.span.to(joint.span)))
     }
-
-    // See comments in `Nonterminal::to_tokenstream` for why we care about
-    // *probably* equal here rather than actual equality
-    crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool {
-        if mem::discriminant(&self.kind) != mem::discriminant(&other.kind) {
-            return false;
-        }
-        match (&self.kind, &other.kind) {
-            (&Eq, &Eq)
-            | (&Lt, &Lt)
-            | (&Le, &Le)
-            | (&EqEq, &EqEq)
-            | (&Ne, &Ne)
-            | (&Ge, &Ge)
-            | (&Gt, &Gt)
-            | (&AndAnd, &AndAnd)
-            | (&OrOr, &OrOr)
-            | (&Not, &Not)
-            | (&Tilde, &Tilde)
-            | (&At, &At)
-            | (&Dot, &Dot)
-            | (&DotDot, &DotDot)
-            | (&DotDotDot, &DotDotDot)
-            | (&DotDotEq, &DotDotEq)
-            | (&Comma, &Comma)
-            | (&Semi, &Semi)
-            | (&Colon, &Colon)
-            | (&ModSep, &ModSep)
-            | (&RArrow, &RArrow)
-            | (&LArrow, &LArrow)
-            | (&FatArrow, &FatArrow)
-            | (&Pound, &Pound)
-            | (&Dollar, &Dollar)
-            | (&Question, &Question)
-            | (&Whitespace, &Whitespace)
-            | (&Comment, &Comment)
-            | (&Eof, &Eof) => true,
-
-            (&BinOp(a), &BinOp(b)) | (&BinOpEq(a), &BinOpEq(b)) => a == b,
-
-            (&OpenDelim(a), &OpenDelim(b)) | (&CloseDelim(a), &CloseDelim(b)) => a == b,
-
-            (&DocComment(a), &DocComment(b)) | (&Shebang(a), &Shebang(b)) => a == b,
-
-            (&Literal(a), &Literal(b)) => a == b,
-
-            (&Lifetime(a), &Lifetime(b)) => a == b,
-            (&Ident(a, b), &Ident(c, d)) => {
-                b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
-            }
-
-            (&Interpolated(..), &Interpolated(..)) => false,
-
-            _ => panic!("forgot to add a token?"),
-        }
-    }
 }
 
 impl PartialEq<TokenKind> for Token {

diff --git a/src/librustc_ast/tokenstream.rs b/src/librustc_ast/tokenstream.rs
@@ -21,8 +21,6 @@ use rustc_macros::HashStable_Generic;
 use rustc_span::{Span, DUMMY_SP};
 use smallvec::{smallvec, SmallVec};
 
-use log::debug;
-
 use std::{iter, mem};
 
 /// When the main rust parser encounters a syntax-extension invocation, it
@@ -68,23 +66,6 @@ impl TokenTree {
         }
     }
 
-    // See comments in `Nonterminal::to_tokenstream` for why we care about
-    // *probably* equal here rather than actual equality
-    //
-    // This is otherwise the same as `eq_unspanned`, only recursing with a
-    // different method.
-    pub fn probably_equal_for_proc_macro(&self, other: &TokenTree) -> bool {
-        match (self, other) {
-            (TokenTree::Token(token), TokenTree::Token(token2)) => {
-                token.probably_equal_for_proc_macro(token2)
-            }
-            (TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
-                delim == delim2 && tts.probably_equal_for_proc_macro(&tts2)
-            }
-            _ => false,
-        }
-    }
-
     /// Retrieves the TokenTree's span.
     pub fn span(&self) -> Span {
         match self {
@@ -307,112 +288,6 @@ impl TokenStream {
         t1.next().is_none() && t2.next().is_none()
     }
 
-    // See comments in `Nonterminal::to_tokenstream` for why we care about
-    // *probably* equal here rather than actual equality
-    //
-    // This is otherwise the same as `eq_unspanned`, only recursing with a
-    // different method.
-    pub fn probably_equal_for_proc_macro(&self, other: &TokenStream) -> bool {
-        // When checking for `probably_eq`, we ignore certain tokens that aren't
-        // preserved in the AST. Because they are not preserved, the pretty
-        // printer arbitrarily adds or removes them when printing as token
-        // streams, making a comparison between a token stream generated from an
-        // AST and a token stream which was parsed into an AST more reliable.
-        fn semantic_tree(tree: &TokenTree) -> bool {
-            if let TokenTree::Token(token) = tree {
-                if let
-                    // The pretty printer tends to add trailing commas to
-                    // everything, and in particular, after struct fields.
-                    | token::Comma
-                    // The pretty printer emits `NoDelim` as whitespace.
-                    | token::OpenDelim(DelimToken::NoDelim)
-                    | token::CloseDelim(DelimToken::NoDelim)
-                    // The pretty printer collapses many semicolons into one.
-                    | token::Semi
-                    // The pretty printer collapses whitespace arbitrarily and can
-                    // introduce whitespace from `NoDelim`.
-                    | token::Whitespace
-                    // The pretty printer can turn `$crate` into `::crate_name`
-                    | token::ModSep = token.kind {
-                    return false;
-                }
-            }
-            true
-        }
-
-        // When comparing two `TokenStream`s, we ignore the `IsJoint` information.
-        //
-        // However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
-        // use `Token.glue` on adjacent tokens with the proper `IsJoint`.
-        // Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
-        // and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
-        // when determining if two `TokenStream`s are 'probably equal'.
-        //
-        // Therefore, we use `break_two_token_op` to convert all tokens
-        // to the 'unglued' form (if it exists). This ensures that two
-        // `TokenStream`s which differ only in how their tokens are glued
-        // will be considered 'probably equal', which allows us to keep spans.
-        //
-        // This is important when the original `TokenStream` contained
-        // extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
-        // will be omitted when we pretty-print, which can cause the original
-        // and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
-        // leading to some tokens being 'glued' together in one stream but not
-        // the other. See #68489 for more details.
-        fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
-            // In almost all cases, we should have either zero or one levels
-            // of 'unglueing'. However, in some unusual cases, we may need
-            // to iterate breaking tokens mutliple times. For example:
-            // '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
-            let mut token_trees: SmallVec<[_; 2]>;
-            if let TokenTree::Token(token) = &tree {
-                let mut out = SmallVec::<[_; 2]>::new();
-                out.push(token.clone());
-                // Iterate to fixpoint:
-                // * We start off with 'out' containing our initial token, and `temp` empty
-                // * If we are able to break any tokens in `out`, then `out` will have
-                //   at least one more element than 'temp', so we will try to break tokens
-                //   again.
-                // * If we cannot break any tokens in 'out', we are done
-                loop {
-                    let mut temp = SmallVec::<[_; 2]>::new();
-                    let mut changed = false;
-
-                    for token in out.into_iter() {
-                        if let Some((first, second)) = token.kind.break_two_token_op() {
-                            temp.push(Token::new(first, DUMMY_SP));
-                            temp.push(Token::new(second, DUMMY_SP));
-                            changed = true;
-                        } else {
-                            temp.push(token);
-                        }
-                    }
-                    out = temp;
-                    if !changed {
-                        break;
-                    }
-                }
-                token_trees = out.into_iter().map(TokenTree::Token).collect();
-                if token_trees.len() != 1 {
-                    debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
-                }
-            } else {
-                token_trees = SmallVec::new();
-                token_trees.push(tree);
-            }
-            token_trees.into_iter()
-        }
-
-        let mut t1 = self.trees().filter(semantic_tree).flat_map(break_tokens);
-        let mut t2 = other.trees().filter(semantic_tree).flat_map(break_tokens);
-        for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
-            if !t1.probably_equal_for_proc_macro(&t2) {
-                return false;
-            }
-        }
-        t1.next().is_none() && t2.next().is_none()
-    }
-
     pub fn map_enumerated<F: FnMut(usize, TokenTree) -> TokenTree>(self, mut f: F) -> TokenStream {
         TokenStream(Lrc::new(
             self.0

diff --git a/src/librustc_parse/Cargo.toml b/src/librustc_parse/Cargo.toml
@@ -21,3 +21,4 @@ rustc_session = { path = "../librustc_session" }
 rustc_span = { path = "../librustc_span" }
 rustc_ast = { path = "../librustc_ast" }
 unicode-normalization = "0.1.11"
+smallvec = { version = "1.0", features = ["union", "may_dangle"] }