Skip to content

Commit

Permalink
馃憣 IMPROVE: Allow for heading anchor links in docutils (#678)
Browse files Browse the repository at this point in the history
This aligns the treatment of `[](#target)` style links for docutils with sphinx, such that they are linked to a heading slug.

The core behaviour for sphinx is not changed,
except that failed reference resolution
now emits a `myst.xref_missing` warning (as opposed to a `std.ref` one), with a clearer warning message. Also on failure, the reference is still created,
for people who wish to suppress the warning (see e.g. #677)
  • Loading branch information
chrisjsewell committed Jan 11, 2023
1 parent 797af5f commit 8daa00b
Show file tree
Hide file tree
Showing 21 changed files with 274 additions and 216 deletions.
2 changes: 1 addition & 1 deletion docs/intro.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ To parse single documents, see the [](docutils.md) section
## Write a CommonMark document

MyST is an extension of [CommonMark Markdown](https://commonmark.org/),
that includes [additional syntax](../syntax/syntax.md) for technical authoring,
that includes [additional syntax](syntax/syntax.md) for technical authoring,
which integrates with Docutils and Sphinx.

To start off, create an empty file called `myfile.md` and give it a markdown title and text.
Expand Down
9 changes: 7 additions & 2 deletions docs/syntax/syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ By default, MyST will resolve link destinations according to the following rules
3. Destinations which point to a local file path are treated as links to that file.
- The path must be relative and in [POSIX format](https://en.wikipedia.org/wiki/Path_(computing)#POSIX_and_Unix_paths) (i.e. `/` separators).
- If the path is to another source file in the project (e.g. a `.md` or `.rst` file),
then the link will be to the initial heading in that file.
then the link will be to the initial heading in that file or,
if the path is appended by a `#target`, to the heading "slug" in that file.
- If the path is to a non-source file (e.g. a `.png` or `.pdf` file),
then the link will be to the file itself, e.g. to download it.

Expand Down Expand Up @@ -290,10 +291,14 @@ Here are some examples:
- `[Non-source file](example.txt)`
- [Non-source file](example.txt)

* - Internal heading
* - Local heading
- `[Heading](#markdown-links-and-referencing)`
- [Heading](#markdown-links-and-referencing)

* - Heading in another file
- `[Heading](optional.md#auto-generated-header-anchors)`
- [Heading](optional.md#auto-generated-header-anchors)

:::

### Customising destination resolution
Expand Down
3 changes: 1 addition & 2 deletions myst_parser/config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ class MdParserConfig:
metadata={
"validator": optional(in_([1, 2, 3, 4, 5, 6, 7])),
"help": "Heading level depth to assign HTML anchors",
"sphinx_only": True,
},
)

Expand All @@ -180,7 +179,7 @@ class MdParserConfig:
"validator": optional(is_callable),
"help": "Function for creating heading anchors",
"global_only": True,
"sphinx_only": True,
"sphinx_only": True, # TODO docutils config doesn't handle callables
},
)

Expand Down
109 changes: 84 additions & 25 deletions myst_parser/mdit_to_docutils/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ def setup_render(
self._level_to_elem: dict[int, nodes.document | nodes.section] = {
0: self.document
}
# mapping of section slug to section node
self._slug_to_section: dict[str, nodes.section] = {}

@property
def sphinx_env(self) -> BuildEnvironment | None:
Expand Down Expand Up @@ -236,6 +238,37 @@ def _render_initialise(self) -> None:
def _render_finalise(self) -> None:
"""Finalise the render of the document."""

# attempt to replace id_link references with internal links
for refnode in findall(self.document)(nodes.reference):
if not refnode.get("id_link"):
continue
target = refnode["refuri"][1:]
if target in self._slug_to_section:
section_node = self._slug_to_section[target]
refnode["refid"] = section_node["ids"][0]

if not refnode.children:
implicit_text = clean_astext(section_node[0])
refnode += nodes.inline(
implicit_text, implicit_text, classes=["std", "std-ref"]
)
else:
self.create_warning(
f"local id not found: {refnode['refuri']!r}",
MystWarnings.XREF_MISSING,
line=refnode.line,
append_to=refnode,
)
refnode["refid"] = target
del refnode["refuri"]

if self._slug_to_section and self.sphinx_env:
# save for later reference resolution
self.sphinx_env.metadata[self.sphinx_env.docname]["myst_slugs"] = {
slug: (snode["ids"][0], clean_astext(snode[0]))
for slug, snode in self._slug_to_section.items()
}

# log warnings for duplicate reference definitions
# "duplicate_refs": [{"href": "ijk", "label": "B", "map": [4, 5], "title": ""}],
for dup_ref in self.md_env.get("duplicate_refs", []):
Expand Down Expand Up @@ -713,11 +746,29 @@ def render_heading(self, token: SyntaxTreeNode) -> None:
with self.current_node_context(title_node):
self.render_children(token)

# create a target reference for the section, based on the heading text
# create a target reference for the section, based on the heading text.
# Note, this is an implicit target, meaning that it is not prioritised,
# and is not stored by sphinx for ref resolution
name = nodes.fully_normalize_name(title_node.astext())
new_section["names"].append(name)
self.document.note_implicit_target(new_section, new_section)

# add possible reference slug, this may be different to the standard name above,
# and does not have to be normalised, so we treat it separately
if "id" in token.attrs:
slug = str(token.attrs["id"])
new_section["slug"] = slug
if slug in self._slug_to_section:
other_node = self._slug_to_section[slug]
self.create_warning(
f"duplicate heading slug {slug!r}, other at line {other_node.line}",
MystWarnings.ANCHOR_DUPE,
line=new_section.line,
)
else:
# we store this for later processing on finalise
self._slug_to_section[slug] = new_section

# set the section as the current node for subsequent rendering
self.current_node = new_section

Expand All @@ -736,19 +787,19 @@ def render_link(self, token: SyntaxTreeNode) -> None:
or self.md_config.gfm_only
or self.md_config.all_links_external
):
if token.info == "auto": # handles both autolink and linkify
return self.render_autolink(token)
else:
return self.render_external_url(token)
return self.render_external_url(token)

href = cast(str, token.attrGet("href") or "")

if href.startswith("#"):
return self.render_id_link(token)

# TODO ideally whether inv_link is enabled could be precomputed
if "inv_link" in self.md_config.enable_extensions and href.startswith("inv:"):
return self.create_inventory_link(token)

if token.info == "auto": # handles both autolink and linkify
return self.render_autolink(token)
return self.render_external_url(token)

# Check for external URL
url_scheme = urlparse(href).scheme
Expand All @@ -761,20 +812,27 @@ def render_link(self, token: SyntaxTreeNode) -> None:
return self.render_internal_link(token)

def render_external_url(self, token: SyntaxTreeNode) -> None:
"""Render link token `[text](link "title")`,
where the link has been identified as an external URL::
<reference refuri="link" title="title">
text
`text` can contain nested syntax, e.g. `[**bold**](url "title")`.
"""Render link token (including autolink and linkify),
where the link has been identified as an external URL.
"""
ref_node = nodes.reference()
self.add_line_and_source_path(ref_node, token)
self.copy_attributes(
token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"}
)
ref_node["refuri"] = cast(str, token.attrGet("href") or "")
ref_node["refuri"] = escapeHtml(token.attrGet("href") or "") # type: ignore[arg-type]
with self.current_node_context(ref_node, append=True):
self.render_children(token)

def render_id_link(self, token: SyntaxTreeNode) -> None:
"""Render link token like `[text](#id)`, to a local target."""
ref_node = nodes.reference()
self.add_line_and_source_path(ref_node, token)
ref_node["id_link"] = True
ref_node["refuri"] = token.attrGet("href") or ""
self.copy_attributes(
token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"}
)
with self.current_node_context(ref_node, append=True):
self.render_children(token)

Expand All @@ -799,17 +857,6 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None:
with self.current_node_context(ref_node, append=True):
self.render_children(token)

def render_autolink(self, token: SyntaxTreeNode) -> None:
refuri = escapeHtml(token.attrGet("href") or "") # type: ignore[arg-type]
ref_node = nodes.reference()
self.copy_attributes(
token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"}
)
ref_node["refuri"] = refuri
self.add_line_and_source_path(ref_node, token)
with self.current_node_context(ref_node, append=True):
self.render_children(token)

def create_inventory_link(self, token: SyntaxTreeNode) -> None:
r"""Create a link to an inventory object.
Expand Down Expand Up @@ -1641,3 +1688,15 @@ def html_meta_to_nodes(
output.append(pending)

return output


def clean_astext(node: nodes.Element) -> str:
"""Like node.astext(), but ignore images.
Copied from sphinx.
"""
node = node.deepcopy()
for img in findall(node)(nodes.image):
img["alt"] = ""
for raw in list(findall(node)(nodes.raw)):
raw.parent.remove(raw)
return node.astext()
99 changes: 29 additions & 70 deletions myst_parser/mdit_to_docutils/sphinx_.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,12 @@
from markdown_it.tree import SyntaxTreeNode
from sphinx import addnodes
from sphinx.domains.math import MathDomain
from sphinx.domains.std import StandardDomain
from sphinx.environment import BuildEnvironment
from sphinx.ext.intersphinx import InventoryAdapter
from sphinx.util import logging
from sphinx.util.nodes import clean_astext

from myst_parser import inventory
from myst_parser.mdit_to_docutils.base import DocutilsRenderer
from myst_parser.warnings_ import MystWarnings

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -49,38 +46,42 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None:
destination = os.path.relpath(
os.path.join(include_dir, os.path.normpath(destination)), source_dir
)

kwargs = {
"refdoc": self.sphinx_env.docname,
"reftype": "myst",
"refexplicit": len(token.children or []) > 0,
}
path_dest, *_path_ids = destination.split("#", maxsplit=1)
path_id = _path_ids[0] if _path_ids else None
potential_path = (
Path(self.sphinx_env.doc2path(self.sphinx_env.docname)).parent / destination
Path(self.sphinx_env.doc2path(self.sphinx_env.docname)).parent / path_dest
if self.sphinx_env.srcdir # not set in some test situations
else None
)
if (
potential_path
and potential_path.is_file()
and not any(
destination.endswith(suffix)
for suffix in self.sphinx_env.config.source_suffix
)
):
wrap_node = addnodes.download_reference(
refdoc=self.sphinx_env.docname,
reftarget=destination,
reftype="myst",
refdomain=None, # Added to enable cross-linking
refexplicit=len(token.children or []) > 0,
refwarn=False,
if path_dest == "./":
# this is a special case, where we want to reference the current document
potential_path = (
Path(self.sphinx_env.doc2path(self.sphinx_env.docname))
if self.sphinx_env.srcdir
else None
)
classes = ["xref", "download", "myst"]
text = destination if not token.children else ""
if potential_path and potential_path.is_file():
docname = self.sphinx_env.path2doc(str(potential_path))
if docname:
wrap_node = addnodes.pending_xref(
refdomain="doc", reftarget=docname, reftargetid=path_id, **kwargs
)
classes = ["xref", "myst"]
text = ""
else:
wrap_node = addnodes.download_reference(
refdomain=None, reftarget=path_dest, refwarn=False, **kwargs
)
classes = ["xref", "download", "myst"]
text = destination if not token.children else ""
else:
wrap_node = addnodes.pending_xref(
refdoc=self.sphinx_env.docname,
reftarget=destination,
reftype="myst",
refdomain=None, # Added to enable cross-linking
refexplicit=len(token.children or []) > 0,
refwarn=True,
refdomain=None, reftarget=destination, refwarn=True, **kwargs
)
classes = ["xref", "myst"]
text = ""
Expand Down Expand Up @@ -112,48 +113,6 @@ def get_inventory_matches(
)
)

def render_heading(self, token: SyntaxTreeNode) -> None:
"""This extends the docutils method, to allow for the addition of heading ids.
These ids are computed by the ``markdown-it-py`` ``anchors_plugin``
as "slugs" which are unique to a document.
The approach is similar to ``sphinx.ext.autosectionlabel``
"""
super().render_heading(token)

if not isinstance(self.current_node, nodes.section):
return

# create the slug string
slug = cast(str, token.attrGet("id"))
if slug is None:
return

section = self.current_node
doc_slug = (
self.sphinx_env.doc2path(self.sphinx_env.docname, base=False) + "#" + slug
)

# save the reference in the standard domain, so that it can be handled properly
domain = cast(StandardDomain, self.sphinx_env.get_domain("std"))
if doc_slug in domain.labels:
other_doc = self.sphinx_env.doc2path(domain.labels[doc_slug][0])
self.create_warning(
f"duplicate label {doc_slug}, other instance in {other_doc}",
MystWarnings.ANCHOR_DUPE,
line=section.line,
)
labelid = section["ids"][0]
domain.anonlabels[doc_slug] = self.sphinx_env.docname, labelid
domain.labels[doc_slug] = (
self.sphinx_env.docname,
labelid,
clean_astext(section[0]),
)

self.sphinx_env.metadata[self.sphinx_env.docname]["myst_anchors"] = True
section["myst-anchor"] = doc_slug

def render_math_block_label(self, token: SyntaxTreeNode) -> None:
"""Render math with referencable labels, e.g. ``$a=1$ (label)``."""
label = token.info
Expand Down

0 comments on commit 8daa00b

Please sign in to comment.