Rollup merge of rust-lang#62329 - matklad:no-peeking, r=petrochenkov

Remove support for 1-token lookahead from the lexer `StringReader` maintained `peek_token` and `peek_span_src_raw` for look ahead. `peek_token` was used only by rustdoc syntax coloring. After moving peeking logic into highlighter, I was able to remove `peek_token` from the lexer. I tried to use `iter::Peekable`, but that wasn't as pretty as I hoped, due to buffered fatal errors. So I went with hand-rolled peeking. After that I've noticed that the only peeking behavior left was for raw tokens to test tt jointness. I've rewritten it in terms of trivia tokens, and not just spans. After that it became possible to simplify the awkward constructor of the lexer, which could return `Err` if the first peeked token contained error.
Mark-Simulacrum · Jul 4, 2019 · 7854e9e · 7854e9e
2 parents 99366a9 + 3e362a4
commit 7854e9e
Show file tree

Hide file tree

Showing 7 changed files with 129 additions and 180 deletions.
diff --git a/src/librustc_save_analysis/span_utils.rs b/src/librustc_save_analysis/span_utils.rs
@@ -53,7 +53,7 @@ impl<'a> SpanUtils<'a> {
     pub fn sub_span_of_token(&self, span: Span, tok: TokenKind) -> Option<Span> {
         let mut toks = self.retokenise_span(span);
         loop {
-            let next = toks.real_token();
+            let next = toks.next_token();
             if next == token::Eof {
                 return None;
             }

diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
@@ -38,17 +38,17 @@ pub fn render_with_highlighting(
         FileName::Custom(String::from("rustdoc-highlighting")),
         src.to_owned(),
     );
-    let highlight_result =
-        lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| {
-            let mut classifier = Classifier::new(lexer, sess.source_map());
-
-            let mut highlighted_source = vec![];
-            if classifier.write_source(&mut highlighted_source).is_err() {
-                Err(classifier.lexer.buffer_fatal_errors())
-            } else {
-                Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
-            }
-        });
+    let highlight_result = {
+        let lexer = lexer::StringReader::new(&sess, fm, None);
+        let mut classifier = Classifier::new(lexer, sess.source_map());
+
+        let mut highlighted_source = vec![];
+        if classifier.write_source(&mut highlighted_source).is_err() {
+            Err(classifier.lexer.buffer_fatal_errors())
+        } else {
+            Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
+        }
+    };
 
     match highlight_result {
         Ok(highlighted_source) => {
@@ -79,6 +79,7 @@ pub fn render_with_highlighting(
 /// each span of text in sequence.
 struct Classifier<'a> {
     lexer: lexer::StringReader<'a>,
+    peek_token: Option<Token>,
     source_map: &'a SourceMap,
 
     // State of the classifier.
@@ -178,6 +179,7 @@ impl<'a> Classifier<'a> {
     fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> {
         Classifier {
             lexer,
+            peek_token: None,
             source_map,
             in_attribute: false,
             in_macro: false,
@@ -187,10 +189,19 @@ impl<'a> Classifier<'a> {
 
     /// Gets the next token out of the lexer.
     fn try_next_token(&mut self) -> Result<Token, HighlightError> {
-        match self.lexer.try_next_token() {
-            Ok(token) => Ok(token),
-            Err(_) => Err(HighlightError::LexError),
+        if let Some(token) = self.peek_token.take() {
+            return Ok(token);
+        }
+        self.lexer.try_next_token().map_err(|()| HighlightError::LexError)
+    }
+
+    fn peek(&mut self) -> Result<&Token, HighlightError> {
+        if self.peek_token.is_none() {
+            self.peek_token = Some(
+                self.lexer.try_next_token().map_err(|()| HighlightError::LexError)?
+            );
         }
+        Ok(self.peek_token.as_ref().unwrap())
     }
 
     /// Exhausts the `lexer` writing the output into `out`.
@@ -234,7 +245,7 @@ impl<'a> Classifier<'a> {
             // reference or dereference operator or a reference or pointer type, instead of the
             // bit-and or multiplication operator.
             token::BinOp(token::And) | token::BinOp(token::Star)
-                if self.lexer.peek() != &token::Whitespace => Class::RefKeyWord,
+                if self.peek()? != &token::Whitespace => Class::RefKeyWord,
 
             // Consider this as part of a macro invocation if there was a
             // leading identifier.
@@ -257,7 +268,7 @@ impl<'a> Classifier<'a> {
             token::Question => Class::QuestionMark,
 
             token::Dollar => {
-                if self.lexer.peek().is_ident() {
+                if self.peek()?.is_ident() {
                     self.in_macro_nonterminal = true;
                     Class::MacroNonTerminal
                 } else {
@@ -280,9 +291,9 @@ impl<'a> Classifier<'a> {
                 // as an attribute.
 
                 // Case 1: #![inner_attribute]
-                if self.lexer.peek() == &token::Not {
+                if self.peek()? == &token::Not {
                     self.try_next_token()?; // NOTE: consumes `!` token!
-                    if self.lexer.peek() == &token::OpenDelim(token::Bracket) {
+                    if self.peek()? == &token::OpenDelim(token::Bracket) {
                         self.in_attribute = true;
                         out.enter_span(Class::Attribute)?;
                     }
@@ -292,7 +303,7 @@ impl<'a> Classifier<'a> {
                 }
 
                 // Case 2: #[outer_attribute]
-                if self.lexer.peek() == &token::OpenDelim(token::Bracket) {
+                if self.peek()? == &token::OpenDelim(token::Bracket) {
                     self.in_attribute = true;
                     out.enter_span(Class::Attribute)?;
                 }
@@ -341,7 +352,7 @@ impl<'a> Classifier<'a> {
                         if self.in_macro_nonterminal {
                             self.in_macro_nonterminal = false;
                             Class::MacroNonTerminal
-                        } else if self.lexer.peek() == &token::Not {
+                        } else if self.peek()? == &token::Not {
                             self.in_macro = true;
                             Class::Macro
                         } else {

diff --git a/src/librustdoc/passes/check_code_block_syntax.rs b/src/librustdoc/passes/check_code_block_syntax.rs
@@ -32,7 +32,8 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
             dox[code_block.code].to_owned(),
         );
 
-        let errors = Lexer::new_or_buffered_errs(&sess, source_file, None).and_then(|mut lexer| {
+        let errors = {
+            let mut lexer = Lexer::new(&sess, source_file, None);
             while let Ok(token::Token { kind, .. }) = lexer.try_next_token() {
                 if kind == token::Eof {
                     break;
@@ -46,7 +47,7 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
             } else {
                 Ok(())
             }
-        });
+        };
 
         if let Err(errors) = errors {
             let mut diag = if let Some(sp) =

diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs
@@ -268,7 +268,7 @@ fn read_block_comment(rdr: &mut StringReader<'_>,
         while level > 0 {
             debug!("=== block comment level {}", level);
             if rdr.is_eof() {
-                rdr.fatal("unterminated block comment").raise();
+                rdr.fatal_span_(rdr.pos, rdr.pos, "unterminated block comment").raise();
             }
             if rdr.ch_is('\n') {
                 trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
@@ -346,7 +346,7 @@ pub fn gather_comments(sess: &ParseSess, path: FileName, srdr: &mut dyn Read) ->
     srdr.read_to_string(&mut src).unwrap();
     let cm = SourceMap::new(sess.source_map().path_mapping().clone());
     let source_file = cm.new_source_file(path, src);
-    let mut rdr = lexer::StringReader::new_raw(sess, source_file, None);
+    let mut rdr = lexer::StringReader::new(sess, source_file, None);
 
     let mut comments: Vec<Comment> = Vec::new();
     let mut code_to_the_left = false; // Only code