Skip to content

Commit

Permalink
Render Markdown in search results
Browse files Browse the repository at this point in the history
Previously Markdown documentation was not rendered to HTML for search results,
which led to the output not being very readable, particularly for inline code.
This PR fixes that by rendering Markdown to HTML with the help of pulldown-cmark
(the library rustdoc uses to parse Markdown for the main text of documentation).
However, the text for the title attribute (the text shown when you hover over an
element) still uses the plain-text rendering since it is displayed in browsers
as plain-text.

Only these styles will be rendered; everything else is stripped away:

* *italics*
* **bold**
* `inline code`
  • Loading branch information
camelid committed Dec 3, 2020
1 parent b4def89 commit 5d4a712
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 50 deletions.
2 changes: 1 addition & 1 deletion src/librustdoc/clean/mod.rs
Expand Up @@ -1015,7 +1015,7 @@ impl<'tcx> Clean<FnDecl> for (DefId, ty::PolyFnSig<'tcx>) {
.iter()
.map(|t| Argument {
type_: t.clean(cx),
name: names.next().map_or(String::new(), |name| name.to_string()),
name: names.next().map_or_else(|| String::new(), |name| name.to_string()),
})
.collect(),
},
Expand Down
8 changes: 5 additions & 3 deletions src/librustdoc/formats/cache.rs
Expand Up @@ -14,13 +14,13 @@ use crate::config::RenderInfo;
use crate::fold::DocFolder;
use crate::formats::item_type::ItemType;
use crate::formats::Impl;
use crate::html::markdown::short_markdown_summary;
use crate::html::render::cache::{extern_location, get_index_search_type, ExternalLocation};
use crate::html::render::IndexItem;
use crate::html::render::{plain_text_summary, shorten};

thread_local!(crate static CACHE_KEY: RefCell<Arc<Cache>> = Default::default());

/// This cache is used to store information about the `clean::Crate` being
/// This cache is used to store information about the [`clean::Crate`] being
/// rendered in order to provide more useful documentation. This contains
/// information like all implementors of a trait, all traits a type implements,
/// documentation for all known traits, etc.
Expand Down Expand Up @@ -313,7 +313,9 @@ impl DocFolder for Cache {
ty: item.type_(),
name: s.to_string(),
path: path.join("::"),
desc: shorten(plain_text_summary(item.doc_value())),
desc: item
.doc_value()
.map_or_else(|| String::new(), short_markdown_summary),
parent,
parent_idx: None,
search_type: get_index_search_type(&item),
Expand Down
92 changes: 90 additions & 2 deletions src/librustdoc/html/markdown.rs
Expand Up @@ -17,8 +17,6 @@
//! // ... something using html
//! ```

#![allow(non_camel_case_types)]

use rustc_data_structures::fx::FxHashMap;
use rustc_hir::def_id::DefId;
use rustc_hir::HirId;
Expand Down Expand Up @@ -1037,7 +1035,97 @@ impl MarkdownSummaryLine<'_> {
}
}

/// Renders a subset of Markdown in the first paragraph of the provided Markdown.
///
/// - *Italics*, **bold**, and `inline code` styles **are** rendered.
/// - Headings and links are stripped (though the text *is* rendered).
/// - HTML, code blocks, and everything else are ignored.
///
/// Returns a tuple of the rendered HTML string and whether the output was shortened
/// due to the provided `length_limit`.
fn markdown_summary_with_limit(md: &str, length_limit: Option<u16>) -> (String, bool) {
if md.is_empty() {
return (String::new(), false);
}

let length_limit = length_limit.unwrap_or(u16::MAX) as usize;

let mut s = String::with_capacity(md.len() * 3 / 2);
let mut text_length = 0;
let mut stopped_early = false;

fn push(s: &mut String, text_length: &mut usize, text: &str) {
s.push_str(text);
*text_length += text.len();
};

'outer: for event in Parser::new_ext(md, Options::ENABLE_STRIKETHROUGH) {
match &event {
Event::Text(text) => {
for word in text.split_inclusive(char::is_whitespace) {
if text_length + word.len() >= length_limit {
stopped_early = true;
break 'outer;
}

push(&mut s, &mut text_length, word);
}
}
Event::Code(code) => {
if text_length + code.len() >= length_limit {
stopped_early = true;
break;
}

s.push_str("<code>");
push(&mut s, &mut text_length, code);
s.push_str("</code>");
}
Event::Start(tag) => match tag {
Tag::Emphasis => s.push_str("<em>"),
Tag::Strong => s.push_str("<strong>"),
Tag::CodeBlock(..) => break,
_ => {}
},
Event::End(tag) => match tag {
Tag::Emphasis => s.push_str("</em>"),
Tag::Strong => s.push_str("</strong>"),
Tag::Paragraph => break,
_ => {}
},
Event::HardBreak | Event::SoftBreak => {
if text_length + 1 >= length_limit {
stopped_early = true;
break;
}

push(&mut s, &mut text_length, " ");
}
_ => {}
}
}

(s, stopped_early)
}

/// Renders a shortened first paragraph of the given Markdown as a subset of Markdown,
/// making it suitable for contexts like the search index.
///
/// Will shorten to 59 or 60 characters, including an ellipsis (…) if it was shortened.
///
/// See [`markdown_summary_with_limit`] for details about what is rendered and what is not.
crate fn short_markdown_summary(markdown: &str) -> String {
let (mut s, was_shortened) = markdown_summary_with_limit(markdown, Some(59));

if was_shortened {
s.push('…');
}

s
}

/// Renders the first paragraph of the provided markdown as plain text.
/// Useful for alt-text.
///
/// - Headings, links, and formatting are stripped.
/// - Inline code is rendered as-is, surrounded by backticks.
Expand Down
33 changes: 32 additions & 1 deletion src/librustdoc/html/markdown/tests.rs
@@ -1,4 +1,4 @@
use super::plain_text_summary;
use super::{plain_text_summary, short_markdown_summary};
use super::{ErrorCodes, IdMap, Ignore, LangString, Markdown, MarkdownHtml};
use rustc_span::edition::{Edition, DEFAULT_EDITION};
use std::cell::RefCell;
Expand Down Expand Up @@ -204,6 +204,33 @@ fn test_header_ids_multiple_blocks() {
);
}

#[test]
fn test_short_markdown_summary() {
fn t(input: &str, expect: &str) {
let output = short_markdown_summary(input);
assert_eq!(output, expect, "original: {}", input);
}

t("hello [Rust](https://www.rust-lang.org) :)", "hello Rust :)");
t("*italic*", "<em>italic</em>");
t("**bold**", "<strong>bold</strong>");
t("Multi-line\nsummary", "Multi-line summary");
t("Hard-break \nsummary", "Hard-break summary");
t("hello [Rust] :)\n\n[Rust]: https://www.rust-lang.org", "hello Rust :)");
t("hello [Rust](https://www.rust-lang.org \"Rust\") :)", "hello Rust :)");
t("code `let x = i32;` ...", "code <code>let x = i32;</code> ...");
t("type `Type<'static>` ...", "type <code>Type<'static></code> ...");
t("# top header", "top header");
t("## header", "header");
t("first paragraph\n\nsecond paragraph", "first paragraph");
t("```\nfn main() {}\n```", "");
t("<div>hello</div>", "");
t(
"a *very*, **very** long first paragraph. it has lots of `inline code: Vec<T>`. and it has a [link](https://www.rust-lang.org).\nthat was a soft line break! \nthat was a hard one\n\nsecond paragraph.",
"a <em>very</em>, <strong>very</strong> long first paragraph. it has lots of …",
);
}

#[test]
fn test_plain_text_summary() {
fn t(input: &str, expect: &str) {
Expand All @@ -224,6 +251,10 @@ fn test_plain_text_summary() {
t("first paragraph\n\nsecond paragraph", "first paragraph");
t("```\nfn main() {}\n```", "");
t("<div>hello</div>", "");
t(
"a *very*, **very** long first paragraph. it has lots of `inline code: Vec<T>`. and it has a [link](https://www.rust-lang.org).\nthat was a soft line break! \nthat was a hard one\n\nsecond paragraph.",
"a very, very long first paragraph. it has lots of `inline code: Vec<T>`. and it has a link. that was a soft line break! that was a hard one",
);
}

#[test]
Expand Down
6 changes: 3 additions & 3 deletions src/librustdoc/html/render/cache.rs
Expand Up @@ -9,7 +9,7 @@ use crate::clean::types::GetDefId;
use crate::clean::{self, AttributesExt};
use crate::formats::cache::Cache;
use crate::formats::item_type::ItemType;
use crate::html::render::{plain_text_summary, shorten};
use crate::html::markdown::short_markdown_summary;
use crate::html::render::{Generic, IndexItem, IndexItemFunctionType, RenderType, TypeWithKind};

/// Indicates where an external crate can be found.
Expand Down Expand Up @@ -78,7 +78,7 @@ crate fn build_index(krate: &clean::Crate, cache: &mut Cache) -> String {
ty: item.type_(),
name: item.name.clone().unwrap(),
path: fqp[..fqp.len() - 1].join("::"),
desc: shorten(plain_text_summary(item.doc_value())),
desc: item.doc_value().map_or_else(|| String::new(), short_markdown_summary),
parent: Some(did),
parent_idx: None,
search_type: get_index_search_type(&item),
Expand Down Expand Up @@ -127,7 +127,7 @@ crate fn build_index(krate: &clean::Crate, cache: &mut Cache) -> String {
let crate_doc = krate
.module
.as_ref()
.map(|module| shorten(plain_text_summary(module.doc_value())))
.map(|module| module.doc_value().map_or_else(|| String::new(), short_markdown_summary))
.unwrap_or_default();

#[derive(Serialize)]
Expand Down
41 changes: 7 additions & 34 deletions src/librustdoc/html/render/mod.rs
Expand Up @@ -76,7 +76,9 @@ use crate::html::format::fmt_impl_for_trait_page;
use crate::html::format::Function;
use crate::html::format::{href, print_default_space, print_generic_bounds, WhereClause};
use crate::html::format::{print_abi_with_space, Buffer, PrintWithSpace};
use crate::html::markdown::{self, ErrorCodes, IdMap, Markdown, MarkdownHtml, MarkdownSummaryLine};
use crate::html::markdown::{
self, plain_text_summary, ErrorCodes, IdMap, Markdown, MarkdownHtml, MarkdownSummaryLine,
};
use crate::html::sources;
use crate::html::{highlight, layout, static_files};
use cache::{build_index, ExternalLocation};
Expand Down Expand Up @@ -1604,9 +1606,10 @@ impl Context {
Some(ref s) => s.to_string(),
};
let short = short.to_string();
map.entry(short)
.or_default()
.push((myname, Some(plain_text_summary(item.doc_value()))));
map.entry(short).or_default().push((
myname,
Some(item.doc_value().map_or_else(|| String::new(), plain_text_summary)),
));
}

if self.shared.sort_modules_alphabetically {
Expand Down Expand Up @@ -1810,36 +1813,6 @@ fn full_path(cx: &Context, item: &clean::Item) -> String {
s
}

/// Renders the first paragraph of the given markdown as plain text, making it suitable for
/// contexts like alt-text or the search index.
///
/// If no markdown is supplied, the empty string is returned.
///
/// See [`markdown::plain_text_summary`] for further details.
#[inline]
crate fn plain_text_summary(s: Option<&str>) -> String {
s.map(markdown::plain_text_summary).unwrap_or_default()
}

crate fn shorten(s: String) -> String {
if s.chars().count() > 60 {
let mut len = 0;
let mut ret = s
.split_whitespace()
.take_while(|p| {
// + 1 for the added character after the word.
len += p.chars().count() + 1;
len < 60
})
.collect::<Vec<_>>()
.join(" ");
ret.push('…');
ret
} else {
s
}
}

fn document(w: &mut Buffer, cx: &Context, item: &clean::Item, parent: Option<&clean::Item>) {
if let Some(ref name) = item.name {
info!("Documenting {}", name);
Expand Down
25 changes: 23 additions & 2 deletions src/librustdoc/html/static/main.js
Expand Up @@ -1611,7 +1611,7 @@ function defocusSearchBar() {
item.displayPath + "<span class=\"" + type + "\">" +
name + "</span></a></td><td>" +
"<a href=\"" + item.href + "\">" +
"<span class=\"desc\">" + escape(item.desc) +
"<span class=\"desc\">" + item.desc +
"&nbsp;</span></a></td></tr>";
});
output += "</table>";
Expand Down Expand Up @@ -2013,7 +2013,9 @@ function defocusSearchBar() {
}
var link = document.createElement("a");
link.href = rootPath + crates[i] + "/index.html";
link.title = rawSearchIndex[crates[i]].doc;
// The summary in the search index has HTML, so we need to
// dynamically render it as plaintext.
link.title = convertHTMLToPlaintext(rawSearchIndex[crates[i]].doc);
link.className = klass;
link.textContent = crates[i];

Expand All @@ -2026,6 +2028,25 @@ function defocusSearchBar() {
}
};

/**
* Convert HTML to plaintext:
*
* * Replace "<code>foo</code>" with "`foo`"
* * Strip all other HTML tags
*
* Used by the dynamic sidebar crate list renderer.
*
* @param {[string]} html [The HTML to convert]
* @return {[string]} [The resulting plaintext]
*/
function convertHTMLToPlaintext(html) {
var dom = new DOMParser().parseFromString(
html.replace('<code>', '`').replace('</code>', '`'),
'text/html',
);
return dom.body.innerText;
}


// delayed sidebar rendering.
window.initSidebarItems = function(items) {
Expand Down
@@ -1,21 +1,22 @@
#![crate_type = "lib"]
#![crate_name = "summaries"]

//! This summary has a [link] and `code`.
//! This *summary* has a [link] and `code`.
//!
//! This is the second paragraph.
//!
//! [link]: https://example.com

// @has search-index.js 'This summary has a link and `code`.'
// @has search-index.js 'This <em>summary</em> has a link and <code>code</code>.'
// @!has - 'second paragraph'

/// This `code` should be in backticks.
/// This `code` will be rendered in a code tag.
///
/// This text should not be rendered.
pub struct Sidebar;

// @has summaries/sidebar-items.js 'This `code` should be in backticks.'
// @has search-index.js 'This <code>code</code> will be rendered in a code tag.'
// @has summaries/sidebar-items.js 'This `code` will be rendered in a code tag.'
// @!has - 'text should not be rendered'

/// ```text
Expand Down

0 comments on commit 5d4a712

Please sign in to comment.