Skip to content

Commit

Permalink
rustdoc: check code block syntax in early pass
Browse files Browse the repository at this point in the history
  • Loading branch information
euclio committed Jan 15, 2019
1 parent ee10d99 commit 8c93798
Show file tree
Hide file tree
Showing 10 changed files with 476 additions and 73 deletions.
92 changes: 55 additions & 37 deletions src/librustdoc/html/highlight.rs
Expand Up @@ -25,40 +25,51 @@ pub fn render_with_highlighting(
tooltip: Option<(&str, &str)>,
) -> String {
debug!("highlighting: ================\n{}\n==============", src);
let sess = parse::ParseSess::new(FilePathMapping::empty());
let fm = sess.source_map().new_source_file(FileName::Custom("stdin".to_string()),
src.to_string());

let mut out = Vec::new();
if let Some((tooltip, class)) = tooltip {
write!(out, "<div class='information'><div class='tooltip {}'>ⓘ<span \
class='tooltiptext'>{}</span></div></div>",
class, tooltip).unwrap();
}
write_header(class, &mut out).unwrap();

let lexer = match lexer::StringReader::new_without_err(&sess, fm, None, "Output from rustc:") {
Ok(l) => l,
Err(_) => {
let first_line = src.lines().next().unwrap_or_else(|| "");
let mut err = sess.span_diagnostic
.struct_warn(&format!("Invalid doc comment starting with: `{}`\n\
(Ignoring this codeblock)",
first_line));
err.emit();
return String::new();

let sess = parse::ParseSess::new(FilePathMapping::empty());
let fm = sess.source_map().new_source_file(
FileName::Custom(String::from("rustdoc-highlighting")),
src.to_owned(),
);
let highlight_result =
lexer::StringReader::new_or_buffered_errs(&sess, fm, None).and_then(|lexer| {
let mut classifier = Classifier::new(lexer, sess.source_map());

let mut highlighted_source = vec![];
if classifier.write_source(&mut highlighted_source).is_err() {
Err(classifier.lexer.buffer_fatal_errors())
} else {
Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
}
});

match highlight_result {
Ok(highlighted_source) => {
write_header(class, &mut out).unwrap();
write!(out, "{}", highlighted_source).unwrap();
if let Some(extension) = extension {
write!(out, "{}", extension).unwrap();
}
write_footer(&mut out).unwrap();
}
};
let mut classifier = Classifier::new(lexer, sess.source_map());
if classifier.write_source(&mut out).is_err() {
classifier.lexer.emit_fatal_errors();
return format!("<pre>{}</pre>", src);
}
Err(errors) => {
// If errors are encountered while trying to highlight, cancel the errors and just emit
// the unhighlighted source. The errors will have already been reported in the
// `check-code-block-syntax` pass.
for mut error in errors {
error.cancel();
}

if let Some(extension) = extension {
write!(out, "{}", extension).unwrap();
write!(out, "<pre><code>{}</code></pre>", src).unwrap();
}
}
write_footer(&mut out).unwrap();

String::from_utf8_lossy(&out[..]).into_owned()
}

Expand Down Expand Up @@ -151,6 +162,17 @@ impl<U: Write> Writer for U {
}
}

enum HighlightError {
LexError,
IoError(io::Error),
}

impl From<io::Error> for HighlightError {
fn from(err: io::Error) -> Self {
HighlightError::IoError(err)
}
}

impl<'a> Classifier<'a> {
fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> {
Classifier {
Expand All @@ -162,17 +184,11 @@ impl<'a> Classifier<'a> {
}
}

/// Gets the next token out of the lexer, emitting fatal errors if lexing fails.
fn try_next_token(&mut self) -> io::Result<TokenAndSpan> {
/// Gets the next token out of the lexer.
fn try_next_token(&mut self) -> Result<TokenAndSpan, HighlightError> {
match self.lexer.try_next_token() {
Ok(tas) => Ok(tas),
Err(_) => {
let mut err = self.lexer.sess.span_diagnostic
.struct_warn("Backing out of syntax highlighting");
err.note("You probably did not intend to render this as a rust code-block");
err.emit();
Err(io::Error::new(io::ErrorKind::Other, ""))
}
Err(_) => Err(HighlightError::LexError),
}
}

Expand All @@ -185,7 +201,7 @@ impl<'a> Classifier<'a> {
/// source.
fn write_source<W: Writer>(&mut self,
out: &mut W)
-> io::Result<()> {
-> Result<(), HighlightError> {
loop {
let next = self.try_next_token()?;
if next.tok == token::Eof {
Expand All @@ -202,7 +218,7 @@ impl<'a> Classifier<'a> {
fn write_token<W: Writer>(&mut self,
out: &mut W,
tas: TokenAndSpan)
-> io::Result<()> {
-> Result<(), HighlightError> {
let klass = match tas.tok {
token::Shebang(s) => {
out.string(Escape(&s.as_str()), Class::None)?;
Expand Down Expand Up @@ -341,7 +357,9 @@ impl<'a> Classifier<'a> {

// Anything that didn't return above is the simple case where we the
// class just spans a single token, so we can use the `string` method.
out.string(Escape(&self.snip(tas.sp)), klass)
out.string(Escape(&self.snip(tas.sp)), klass)?;

Ok(())
}

// Helper function to get a snippet from the source_map.
Expand Down
109 changes: 109 additions & 0 deletions src/librustdoc/html/markdown.rs
Expand Up @@ -919,6 +919,115 @@ pub fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> {
links
}

#[derive(Debug)]
crate struct RustCodeBlock {
/// The range in the markdown that the code block occupies. Note that this includes the fences
/// for fenced code blocks.
pub range: Range<usize>,
/// The range in the markdown that the code within the code block occupies.
pub code: Range<usize>,
pub is_fenced: bool,
pub syntax: Option<String>,
}

/// Returns a range of bytes for each code block in the markdown that is tagged as `rust` or
/// untagged (and assumed to be rust).
crate fn rust_code_blocks(md: &str) -> Vec<RustCodeBlock> {
let mut code_blocks = vec![];

if md.is_empty() {
return code_blocks;
}

let mut opts = Options::empty();
opts.insert(OPTION_ENABLE_TABLES);
opts.insert(OPTION_ENABLE_FOOTNOTES);
let mut p = Parser::new_ext(md, opts);

let mut code_block_start = 0;
let mut code_start = 0;
let mut is_fenced = false;
let mut previous_offset = 0;
let mut in_rust_code_block = false;
while let Some(event) = p.next() {
let offset = p.get_offset();

match event {
Event::Start(Tag::CodeBlock(syntax)) => {
let lang_string = if syntax.is_empty() {
LangString::all_false()
} else {
LangString::parse(&*syntax, ErrorCodes::Yes)
};

if lang_string.rust {
in_rust_code_block = true;

code_start = offset;
code_block_start = match md[previous_offset..offset].find("```") {
Some(fence_idx) => {
is_fenced = true;
previous_offset + fence_idx
}
None => offset,
};
}
}
Event::End(Tag::CodeBlock(syntax)) if in_rust_code_block => {
in_rust_code_block = false;

let code_block_end = if is_fenced {
let fence_str = &md[previous_offset..offset]
.chars()
.rev()
.collect::<String>();
fence_str
.find("```")
.map(|fence_idx| offset - fence_idx)
.unwrap_or_else(|| offset)
} else if md
.as_bytes()
.get(offset)
.map(|b| *b == b'\n')
.unwrap_or_default()
{
offset - 1
} else {
offset
};

let code_end = if is_fenced {
previous_offset
} else {
code_block_end
};

code_blocks.push(RustCodeBlock {
is_fenced,
range: Range {
start: code_block_start,
end: code_block_end,
},
code: Range {
start: code_start,
end: code_end,
},
syntax: if !syntax.is_empty() {
Some(syntax.into_owned())
} else {
None
},
});
}
_ => (),
}

previous_offset = offset;
}

code_blocks
}

#[derive(Clone, Default, Debug)]
pub struct IdMap {
map: FxHashMap<String, usize>,
Expand Down
1 change: 1 addition & 0 deletions src/librustdoc/lib.rs
Expand Up @@ -3,6 +3,7 @@
html_root_url = "https://doc.rust-lang.org/nightly/",
html_playground_url = "https://play.rust-lang.org/")]

#![feature(bind_by_move_pattern_guards)]
#![feature(rustc_private)]
#![feature(box_patterns)]
#![feature(box_syntax)]
Expand Down
109 changes: 109 additions & 0 deletions src/librustdoc/passes/check_code_block_syntax.rs
@@ -0,0 +1,109 @@
use errors::Applicability;
use syntax::parse::lexer::{TokenAndSpan, StringReader as Lexer};
use syntax::parse::{ParseSess, token};
use syntax::source_map::FilePathMapping;
use syntax_pos::FileName;

use clean;
use core::DocContext;
use fold::DocFolder;
use html::markdown::{self, RustCodeBlock};
use passes::Pass;

pub const CHECK_CODE_BLOCK_SYNTAX: Pass =
Pass::early("check-code-block-syntax", check_code_block_syntax,
"validates syntax inside Rust code blocks");

pub fn check_code_block_syntax(krate: clean::Crate, cx: &DocContext) -> clean::Crate {
SyntaxChecker { cx }.fold_crate(krate)
}

struct SyntaxChecker<'a, 'tcx: 'a, 'rcx: 'a> {
cx: &'a DocContext<'a, 'tcx, 'rcx>,
}

impl<'a, 'tcx, 'rcx> SyntaxChecker<'a, 'tcx, 'rcx> {
fn check_rust_syntax(&self, item: &clean::Item, dox: &str, code_block: RustCodeBlock) {
let sess = ParseSess::new(FilePathMapping::empty());
let source_file = sess.source_map().new_source_file(
FileName::Custom(String::from("doctest")),
dox[code_block.code].to_owned(),
);

let errors = Lexer::new_or_buffered_errs(&sess, source_file, None).and_then(|mut lexer| {
while let Ok(TokenAndSpan { tok, .. }) = lexer.try_next_token() {
if tok == token::Eof {
break;
}
}

let errors = lexer.buffer_fatal_errors();

if !errors.is_empty() {
Err(errors)
} else {
Ok(())
}
});

if let Err(errors) = errors {
let mut diag = if let Some(sp) =
super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
{
let mut diag = self
.cx
.sess()
.struct_span_warn(sp, "could not parse code block as Rust code");

for mut err in errors {
diag.note(&format!("error from rustc: {}", err.message()));
err.cancel();
}

if code_block.syntax.is_none() && code_block.is_fenced {
let sp = sp.from_inner_byte_pos(0, 3);
diag.span_suggestion_with_applicability(
sp,
"mark blocks that do not contain Rust code as text",
String::from("```text"),
Applicability::MachineApplicable,
);
}

diag
} else {
// We couldn't calculate the span of the markdown block that had the error, so our
// diagnostics are going to be a bit lacking.
let mut diag = self.cx.sess().struct_span_warn(
super::span_of_attrs(&item.attrs),
"doc comment contains an invalid Rust code block",
);

for mut err in errors {
// Don't bother reporting the error, because we can't show where it happened.
err.cancel();
}

if code_block.syntax.is_none() && code_block.is_fenced {
diag.help("mark blocks that do not contain Rust code as text: ```text");
}

diag
};

diag.emit();
}
}
}

impl<'a, 'tcx, 'rcx> DocFolder for SyntaxChecker<'a, 'tcx, 'rcx> {
fn fold_item(&mut self, item: clean::Item) -> Option<clean::Item> {
if let Some(dox) = &item.attrs.collapsed_doc_value() {
for code_block in markdown::rust_code_blocks(&dox) {
self.check_rust_syntax(&item, &dox, code_block);
}
}

self.fold_item_recur(item)
}
}

0 comments on commit 8c93798

Please sign in to comment.