Auto merge of #63709 - matklad:decomposed-tokens, r=petrochenkov

Move token gluing to token stream parsing work towards #63689, this moves token gluing from the lexer to the token tree layer. This is only a minimal step, but I like the negative diff here. r? @petrochenkov
rust-lang · Aug 20, 2019 · 1489095 · 1489095
2 parents 7858dc2 + 914e1f4
commit 1489095
Show file tree

Hide file tree

Showing 6 changed files with 68 additions and 209 deletions.
diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs
@@ -23,9 +23,6 @@ pub enum TokenKind {
     Lifetime { starts_with_number: bool },
     Semi,
     Comma,
-    DotDotDot,
-    DotDotEq,
-    DotDot,
     Dot,
     OpenParen,
     CloseParen,
@@ -37,41 +34,19 @@ pub enum TokenKind {
     Pound,
     Tilde,
     Question,
-    ColonColon,
     Colon,
     Dollar,
-    EqEq,
     Eq,
-    FatArrow,
-    Ne,
     Not,
-    Le,
-    LArrow,
     Lt,
-    ShlEq,
-    Shl,
-    Ge,
     Gt,
-    ShrEq,
-    Shr,
-    RArrow,
     Minus,
-    MinusEq,
     And,
-    AndAnd,
-    AndEq,
     Or,
-    OrOr,
-    OrEq,
-    PlusEq,
     Plus,
-    StarEq,
     Star,
-    SlashEq,
     Slash,
-    CaretEq,
     Caret,
-    PercentEq,
     Percent,
     Unknown,
 }
@@ -135,13 +110,7 @@ impl Cursor<'_> {
             '/' => match self.nth_char(0) {
                 '/' => self.line_comment(),
                 '*' => self.block_comment(),
-                _ => {
-                    if self.eat_assign() {
-                        SlashEq
-                    } else {
-                        Slash
-                    }
-                }
+                _ => Slash,
             },
             c if character_properties::is_whitespace(c) => self.whitespace(),
             'r' => match (self.nth_char(0), self.nth_char(1)) {
@@ -199,22 +168,7 @@ impl Cursor<'_> {
             }
             ';' => Semi,
             ',' => Comma,
-            '.' => {
-                if self.nth_char(0) == '.' {
-                    self.bump();
-                    if self.nth_char(0) == '.' {
-                        self.bump();
-                        DotDotDot
-                    } else if self.nth_char(0) == '=' {
-                        self.bump();
-                        DotDotEq
-                    } else {
-                        DotDot
-                    }
-                } else {
-                    Dot
-                }
-            }
+            '.' => Dot,
             '(' => OpenParen,
             ')' => CloseParen,
             '{' => OpenBrace,
@@ -225,112 +179,19 @@ impl Cursor<'_> {
             '#' => Pound,
             '~' => Tilde,
             '?' => Question,
-            ':' => {
-                if self.nth_char(0) == ':' {
-                    self.bump();
-                    ColonColon
-                } else {
-                    Colon
-                }
-            }
+            ':' => Colon,
             '$' => Dollar,
-            '=' => {
-                if self.nth_char(0) == '=' {
-                    self.bump();
-                    EqEq
-                } else if self.nth_char(0) == '>' {
-                    self.bump();
-                    FatArrow
-                } else {
-                    Eq
-                }
-            }
-            '!' => {
-                if self.nth_char(0) == '=' {
-                    self.bump();
-                    Ne
-                } else {
-                    Not
-                }
-            }
-            '<' => match self.nth_char(0) {
-                '=' => {
-                    self.bump();
-                    Le
-                }
-                '<' => {
-                    self.bump();
-                    if self.eat_assign() { ShlEq } else { Shl }
-                }
-                '-' => {
-                    self.bump();
-                    LArrow
-                }
-                _ => Lt,
-            },
-            '>' => match self.nth_char(0) {
-                '=' => {
-                    self.bump();
-                    Ge
-                }
-                '>' => {
-                    self.bump();
-                    if self.eat_assign() { ShrEq } else { Shr }
-                }
-                _ => Gt,
-            },
-            '-' => {
-                if self.nth_char(0) == '>' {
-                    self.bump();
-                    RArrow
-                } else {
-                    if self.eat_assign() { MinusEq } else { Minus }
-                }
-            }
-            '&' => {
-                if self.nth_char(0) == '&' {
-                    self.bump();
-                    AndAnd
-                } else {
-                    if self.eat_assign() { AndEq } else { And }
-                }
-            }
-            '|' => {
-                if self.nth_char(0) == '|' {
-                    self.bump();
-                    OrOr
-                } else {
-                    if self.eat_assign() { OrEq } else { Or }
-                }
-            }
-            '+' => {
-                if self.eat_assign() {
-                    PlusEq
-                } else {
-                    Plus
-                }
-            }
-            '*' => {
-                if self.eat_assign() {
-                    StarEq
-                } else {
-                    Star
-                }
-            }
-            '^' => {
-                if self.eat_assign() {
-                    CaretEq
-                } else {
-                    Caret
-                }
-            }
-            '%' => {
-                if self.eat_assign() {
-                    PercentEq
-                } else {
-                    Percent
-                }
-            }
+            '=' => Eq,
+            '!' => Not,
+            '<' => Lt,
+            '>' => Gt,
+            '-' => Minus,
+            '&' => And,
+            '|' => Or,
+            '+' => Plus,
+            '*' => Star,
+            '^' => Caret,
+            '%' => Percent,
             '\'' => self.lifetime_or_char(),
             '"' => {
                 let terminated = self.double_quoted_string();
@@ -643,15 +504,6 @@ impl Cursor<'_> {
             self.bump();
         }
     }
-
-    fn eat_assign(&mut self) -> bool {
-        if self.nth_char(0) == '=' {
-            self.bump();
-            true
-        } else {
-            false
-        }
-    }
 }
 
 pub mod character_properties {

diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
@@ -273,9 +273,6 @@ impl<'a> StringReader<'a> {
             }
             rustc_lexer::TokenKind::Semi => token::Semi,
             rustc_lexer::TokenKind::Comma => token::Comma,
-            rustc_lexer::TokenKind::DotDotDot => token::DotDotDot,
-            rustc_lexer::TokenKind::DotDotEq => token::DotDotEq,
-            rustc_lexer::TokenKind::DotDot => token::DotDot,
             rustc_lexer::TokenKind::Dot => token::Dot,
             rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren),
             rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren),
@@ -287,42 +284,20 @@ impl<'a> StringReader<'a> {
             rustc_lexer::TokenKind::Pound => token::Pound,
             rustc_lexer::TokenKind::Tilde => token::Tilde,
             rustc_lexer::TokenKind::Question => token::Question,
-            rustc_lexer::TokenKind::ColonColon => token::ModSep,
             rustc_lexer::TokenKind::Colon => token::Colon,
             rustc_lexer::TokenKind::Dollar => token::Dollar,
-            rustc_lexer::TokenKind::EqEq => token::EqEq,
             rustc_lexer::TokenKind::Eq => token::Eq,
-            rustc_lexer::TokenKind::FatArrow => token::FatArrow,
-            rustc_lexer::TokenKind::Ne => token::Ne,
             rustc_lexer::TokenKind::Not => token::Not,
-            rustc_lexer::TokenKind::Le => token::Le,
-            rustc_lexer::TokenKind::LArrow => token::LArrow,
             rustc_lexer::TokenKind::Lt => token::Lt,
-            rustc_lexer::TokenKind::ShlEq => token::BinOpEq(token::Shl),
-            rustc_lexer::TokenKind::Shl => token::BinOp(token::Shl),
-            rustc_lexer::TokenKind::Ge => token::Ge,
             rustc_lexer::TokenKind::Gt => token::Gt,
-            rustc_lexer::TokenKind::ShrEq => token::BinOpEq(token::Shr),
-            rustc_lexer::TokenKind::Shr => token::BinOp(token::Shr),
-            rustc_lexer::TokenKind::RArrow => token::RArrow,
             rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
-            rustc_lexer::TokenKind::MinusEq => token::BinOpEq(token::Minus),
             rustc_lexer::TokenKind::And => token::BinOp(token::And),
-            rustc_lexer::TokenKind::AndEq => token::BinOpEq(token::And),
-            rustc_lexer::TokenKind::AndAnd => token::AndAnd,
             rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
-            rustc_lexer::TokenKind::OrEq => token::BinOpEq(token::Or),
-            rustc_lexer::TokenKind::OrOr => token::OrOr,
             rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
-            rustc_lexer::TokenKind::PlusEq => token::BinOpEq(token::Plus),
             rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
-            rustc_lexer::TokenKind::StarEq => token::BinOpEq(token::Star),
             rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
-            rustc_lexer::TokenKind::SlashEq => token::BinOpEq(token::Slash),
             rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
-            rustc_lexer::TokenKind::CaretEq => token::BinOpEq(token::Caret),
             rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
-            rustc_lexer::TokenKind::PercentEq => token::BinOpEq(token::Percent),
 
             rustc_lexer::TokenKind::Unknown => {
                 let c = self.str_from(start).chars().next().unwrap();

diff --git a/src/libsyntax/parse/lexer/tests.rs b/src/libsyntax/parse/lexer/tests.rs
@@ -75,42 +75,50 @@ fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind
 }
 
 #[test]
-fn doublecolonparsing() {
+fn doublecolon_parsing() {
     with_default_globals(|| {
         let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
         let sh = mk_sess(sm.clone());
-        check_tokenization(setup(&sm, &sh, "a b".to_string()),
-                        vec![mk_ident("a"), token::Whitespace, mk_ident("b")]);
+        check_tokenization(
+            setup(&sm, &sh, "a b".to_string()),
+            vec![mk_ident("a"), token::Whitespace, mk_ident("b")],
+        );
     })
 }
 
 #[test]
-fn dcparsing_2() {
+fn doublecolon_parsing_2() {
     with_default_globals(|| {
         let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
         let sh = mk_sess(sm.clone());
-        check_tokenization(setup(&sm, &sh, "a::b".to_string()),
-                        vec![mk_ident("a"), token::ModSep, mk_ident("b")]);
+        check_tokenization(
+            setup(&sm, &sh, "a::b".to_string()),
+            vec![mk_ident("a"), token::Colon, token::Colon, mk_ident("b")],
+        );
     })
 }
 
 #[test]
-fn dcparsing_3() {
+fn doublecolon_parsing_3() {
     with_default_globals(|| {
         let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
         let sh = mk_sess(sm.clone());
-        check_tokenization(setup(&sm, &sh, "a ::b".to_string()),
-                        vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]);
+        check_tokenization(
+            setup(&sm, &sh, "a ::b".to_string()),
+            vec![mk_ident("a"), token::Whitespace, token::Colon, token::Colon, mk_ident("b")],
+        );
     })
 }
 
 #[test]
-fn dcparsing_4() {
+fn doublecolon_parsing_4() {
     with_default_globals(|| {
         let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
         let sh = mk_sess(sm.clone());
-        check_tokenization(setup(&sm, &sh, "a:: b".to_string()),
-                        vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]);
+        check_tokenization(
+            setup(&sm, &sh, "a:: b".to_string()),
+            vec![mk_ident("a"), token::Colon, token::Colon, token::Whitespace, mk_ident("b")],
+        );
     })
 }