Rollup merge of rust-lang#56884 - euclio:codeblock-diagnostics, r=Qui…

…etMisdreavus rustdoc: overhaul code block lexing errors Fixes rust-lang#53919. This PR moves the reporting of code block lexing errors from rendering time to an early pass, so we can use the compiler's error reporting mechanisms. This dramatically improves the diagnostics in this situation: we now de-emphasize the lexing errors as a note under a warning that has a span and suggestion instead of just emitting errors at the top level. Additionally, this PR generalizes the markdown -> source span calculation function, which should allow other rustdoc warnings to use better spans in the future. Last, the PR makes sure that the code block is always emitted in the docs, even if it fails to highlight correctly. Of note: - The new pass unfortunately adds another pass over the docs to gather the doc blocks for syntax-checking. I wonder if this could be combined with the pass that looks for testable blocks? I'm not familiar with that code, so I don't know how feasible that is. - `pulldown_cmark` doesn't make it easy to find the spans of the code blocks, so the code that calculates the spans is a little nasty. It works for all the test cases I threw at it, but I wouldn't be surprised if an edge case would break it. Should have a thorough review. - This PR worsens the state of rust-lang#56885, since those certain fatal lexing errors are now emitted before docs get generated at all.
Centril · Jan 15, 2019 · 6b04db4 · 6b04db4
2 parents 14ea6e5 + 8c93798
commit 6b04db4
Show file tree

Hide file tree

Showing 10 changed files with 562 additions and 131 deletions.
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
@@ -25,40 +25,51 @@ pub fn render_with_highlighting(
     tooltip: Option<(&str, &str)>,
 ) -> String {
     debug!("highlighting: ================\n{}\n==============", src);
-    let sess = parse::ParseSess::new(FilePathMapping::empty());
-    let fm = sess.source_map().new_source_file(FileName::Custom("stdin".to_string()),
-                                               src.to_string());
-
     let mut out = Vec::new();
     if let Some((tooltip, class)) = tooltip {
         write!(out, "<div class='information'><div class='tooltip {}'>ⓘ<span \
                      class='tooltiptext'>{}</span></div></div>",
                class, tooltip).unwrap();
     }
-    write_header(class, &mut out).unwrap();
-
-    let lexer = match lexer::StringReader::new_without_err(&sess, fm, None, "Output from rustc:") {
-        Ok(l) => l,
-        Err(_) => {
-            let first_line = src.lines().next().unwrap_or_else(|| "");
-            let mut err = sess.span_diagnostic
-                              .struct_warn(&format!("Invalid doc comment starting with: `{}`\n\
-                                                     (Ignoring this codeblock)",
-                                                    first_line));
-            err.emit();
-            return String::new();
+
+    let sess = parse::ParseSess::new(FilePathMapping::empty());
+    let fm = sess.source_map().new_source_file(
+        FileName::Custom(String::from("rustdoc-highlighting")),
+        src.to_owned(),
+    );
+    let highlight_result =
+        lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| {
+            let mut classifier = Classifier::new(lexer, sess.source_map());
+
+            let mut highlighted_source = vec![];
+            if classifier.write_source(&mut highlighted_source).is_err() {
+                Err(classifier.lexer.buffer_fatal_errors())
+            } else {
+                Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
+            }
+        });
+
+    match highlight_result {
+        Ok(highlighted_source) => {
+            write_header(class, &mut out).unwrap();
+            write!(out, "{}", highlighted_source).unwrap();
+            if let Some(extension) = extension {
+                write!(out, "{}", extension).unwrap();
+            }
+            write_footer(&mut out).unwrap();
         }
-    };
-    let mut classifier = Classifier::new(lexer, sess.source_map());
-    if classifier.write_source(&mut out).is_err() {
-        classifier.lexer.emit_fatal_errors();
-        return format!("<pre>{}</pre>", src);
-    }
+        Err(errors) => {
+            // If errors are encountered while trying to highlight, cancel the errors and just emit
+            // the unhighlighted source. The errors will have already been reported in the
+            // `check-code-block-syntax` pass.
+            for mut error in errors {
+                error.cancel();
+            }
 
-    if let Some(extension) = extension {
-        write!(out, "{}", extension).unwrap();
+            write!(out, "<pre><code>{}</code></pre>", src).unwrap();
+        }
     }
-    write_footer(&mut out).unwrap();
+
     String::from_utf8_lossy(&out[..]).into_owned()
 }
 
@@ -151,6 +162,17 @@ impl<U: Write> Writer for U {
     }
 }
 
+enum HighlightError {
+    LexError,
+    IoError(io::Error),
+}
+
+impl From<io::Error> for HighlightError {
+    fn from(err: io::Error) -> Self {
+        HighlightError::IoError(err)
+    }
+}
+
 impl<'a> Classifier<'a> {
     fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> {
         Classifier {
@@ -162,17 +184,11 @@ impl<'a> Classifier<'a> {
         }
     }
 
-    /// Gets the next token out of the lexer, emitting fatal errors if lexing fails.
-    fn try_next_token(&mut self) -> io::Result<TokenAndSpan> {
+    /// Gets the next token out of the lexer.
+    fn try_next_token(&mut self) -> Result<TokenAndSpan, HighlightError> {
         match self.lexer.try_next_token() {
             Ok(tas) => Ok(tas),
-            Err(_) => {
-                let mut err = self.lexer.sess.span_diagnostic
-                                  .struct_warn("Backing out of syntax highlighting");
-                err.note("You probably did not intend to render this as a rust code-block");
-                err.emit();
-                Err(io::Error::new(io::ErrorKind::Other, ""))
-            }
+            Err(_) => Err(HighlightError::LexError),
         }
     }
 
@@ -185,7 +201,7 @@ impl<'a> Classifier<'a> {
     /// source.
     fn write_source<W: Writer>(&mut self,
                                    out: &mut W)
-                                   -> io::Result<()> {
+                                   -> Result<(), HighlightError> {
         loop {
             let next = self.try_next_token()?;
             if next.tok == token::Eof {
@@ -202,7 +218,7 @@ impl<'a> Classifier<'a> {
     fn write_token<W: Writer>(&mut self,
                               out: &mut W,
                               tas: TokenAndSpan)
-                              -> io::Result<()> {
+                              -> Result<(), HighlightError> {
         let klass = match tas.tok {
             token::Shebang(s) => {
                 out.string(Escape(&s.as_str()), Class::None)?;
@@ -341,7 +357,9 @@ impl<'a> Classifier<'a> {
 
         // Anything that didn't return above is the simple case where we the
         // class just spans a single token, so we can use the `string` method.
-        out.string(Escape(&self.snip(tas.sp)), klass)
+        out.string(Escape(&self.snip(tas.sp)), klass)?;
+
+        Ok(())
     }
 
     // Helper function to get a snippet from the source_map.

diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
@@ -919,6 +919,115 @@ pub fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> {
     links
 }
 
+#[derive(Debug)]
+crate struct RustCodeBlock {
+    /// The range in the markdown that the code block occupies. Note that this includes the fences
+    /// for fenced code blocks.
+    pub range: Range<usize>,
+    /// The range in the markdown that the code within the code block occupies.
+    pub code: Range<usize>,
+    pub is_fenced: bool,
+    pub syntax: Option<String>,
+}
+
+/// Returns a range of bytes for each code block in the markdown that is tagged as `rust` or
+/// untagged (and assumed to be rust).
+crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {
+    let mut code_blocks = vec![];
+
+    if md.is_empty() {
+        return code_blocks;
+    }
+
+    let mut opts = Options::empty();
+    opts.insert(OPTION_ENABLE_TABLES);
+    opts.insert(OPTION_ENABLE_FOOTNOTES);
+    let mut p = Parser::new_ext(md, opts);
+
+    let mut code_block_start = 0;
+    let mut code_start = 0;
+    let mut is_fenced = false;
+    let mut previous_offset = 0;
+    let mut in_rust_code_block = false;
+    while let Some(event) = p.next() {
+        let offset = p.get_offset();
+
+        match event {
+            Event::Start(Tag::CodeBlock(syntax)) => {
+                let lang_string = if syntax.is_empty() {
+                    LangString::all_false()
+                } else {
+                    LangString::parse(&*syntax, ErrorCodes::Yes)
+                };
+
+                if lang_string.rust {
+                    in_rust_code_block = true;
+
+                    code_start = offset;
+                    code_block_start = match md[previous_offset..offset].find("```") {
+                        Some(fence_idx) => {
+                            is_fenced = true;
+                            previous_offset + fence_idx
+                        }
+                        None => offset,
+                    };
+                }
+            }
+            Event::End(Tag::CodeBlock(syntax)) if in_rust_code_block => {
+                in_rust_code_block = false;
+
+                let code_block_end = if is_fenced {
+                    let fence_str = &md[previous_offset..offset]
+                        .chars()
+                        .rev()
+                        .collect::<String>();
+                    fence_str
+                        .find("```")
+                        .map(|fence_idx| offset - fence_idx)
+                        .unwrap_or_else(|| offset)
+                } else if md
+                    .as_bytes()
+                    .get(offset)
+                    .map(|b| *b == b'\n')
+                    .unwrap_or_default()
+                {
+                    offset - 1
+                } else {
+                    offset
+                };
+
+                let code_end = if is_fenced {
+                    previous_offset
+                } else {
+                    code_block_end
+                };
+
+                code_blocks.push(RustCodeBlock {
+                    is_fenced,
+                    range: Range {
+                        start: code_block_start,
+                        end: code_block_end,
+                    },
+                    code: Range {
+                        start: code_start,
+                        end: code_end,
+                    },
+                    syntax: if !syntax.is_empty() {
+                        Some(syntax.into_owned())
+                    } else {
+                        None
+                    },
+                });
+            }
+            _ => (),
+        }
+
+        previous_offset = offset;
+    }
+
+    code_blocks
+}
+
 #[derive(Clone, Default, Debug)]
 pub struct IdMap {
     map: FxHashMap<String, usize>,

diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs
@@ -3,6 +3,7 @@
        html_root_url = "https://doc.rust-lang.org/nightly/",
        html_playground_url = "https://play.rust-lang.org/")]
 
+#![feature(bind_by_move_pattern_guards)]
 #![feature(rustc_private)]
 #![feature(box_patterns)]
 #![feature(box_syntax)]

diff --git a/src/librustdoc/passes/check_code_block_syntax.rs b/src/librustdoc/passes/check_code_block_syntax.rs
@@ -0,0 +1,109 @@
+use errors::Applicability;
+use syntax::parse::lexer::{TokenAndSpan, StringReader as Lexer};
+use syntax::parse::{ParseSess, token};
+use syntax::source_map::FilePathMapping;
+use syntax_pos::FileName;
+
+use clean;
+use core::DocContext;
+use fold::DocFolder;
+use html::markdown::{self, RustCodeBlock};
+use passes::Pass;
+
+pub const CHECK_CODE_BLOCK_SYNTAX: Pass =
+    Pass::early("check-code-block-syntax", check_code_block_syntax,
+                "validates syntax inside Rust code blocks");
+
+pub fn check_code_block_syntax(krate: clean::Crate, cx: &DocContext) -> clean::Crate {
+    SyntaxChecker { cx }.fold_crate(krate)
+}
+
+struct SyntaxChecker<'a, 'tcx: 'a, 'rcx: 'a> {
+    cx: &'a DocContext<'a, 'tcx, 'rcx>,
+}
+
+impl<'a, 'tcx, 'rcx> SyntaxChecker<'a, 'tcx, 'rcx> {
+    fn check_rust_syntax(&self, item: &clean::Item, dox: &str, code_block: RustCodeBlock) {
+        let sess = ParseSess::new(FilePathMapping::empty());
+        let source_file = sess.source_map().new_source_file(
+            FileName::Custom(String::from("doctest")),
+            dox[code_block.code].to_owned(),
+        );
+
+        let errors = Lexer::new_or_buffered_errs(&sess, source_file, None).and_then(|mut lexer| {
+            while let Ok(TokenAndSpan { tok, .. }) = lexer.try_next_token() {
+                if tok == token::Eof {
+                    break;
+                }
+            }
+
+            let errors = lexer.buffer_fatal_errors();
+
+            if !errors.is_empty() {
+                Err(errors)
+            } else {
+                Ok(())
+            }
+        });
+
+        if let Err(errors) = errors {
+            let mut diag = if let Some(sp) =
+                super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
+            {
+                let mut diag = self
+                    .cx
+                    .sess()
+                    .struct_span_warn(sp, "could not parse code block as Rust code");
+
+                for mut err in errors {
+                    diag.note(&format!("error from rustc: {}", err.message()));
+                    err.cancel();
+                }
+
+                if code_block.syntax.is_none() && code_block.is_fenced {
+                    let sp = sp.from_inner_byte_pos(0, 3);
+                    diag.span_suggestion_with_applicability(
+                        sp,
+                        "mark blocks that do not contain Rust code as text",
+                        String::from("```text"),
+                        Applicability::MachineApplicable,
+                    );
+                }
+
+                diag
+            } else {
+                // We couldn't calculate the span of the markdown block that had the error, so our
+                // diagnostics are going to be a bit lacking.
+                let mut diag = self.cx.sess().struct_span_warn(
+                    super::span_of_attrs(&item.attrs),
+                    "doc comment contains an invalid Rust code block",
+                );
+
+                for mut err in errors {
+                    // Don't bother reporting the error, because we can't show where it happened.
+                    err.cancel();
+                }
+
+                if code_block.syntax.is_none() && code_block.is_fenced {
+                    diag.help("mark blocks that do not contain Rust code as text: ```text");
+                }
+
+                diag
+            };
+
+            diag.emit();
+        }
+    }
+}
+
+impl<'a, 'tcx, 'rcx> DocFolder for SyntaxChecker<'a, 'tcx, 'rcx> {
+    fn fold_item(&mut self, item: clean::Item) -> Option<clean::Item> {
+        if let Some(dox) = &item.attrs.collapsed_doc_value() {
+            for code_block in markdown::rust_code_blocks(&dox) {
+                self.check_rust_syntax(&item, &dox, code_block);
+            }
+        }
+
+        self.fold_item_recur(item)
+    }
+}