Skip to content

Commit

Permalink
Add support for FrontMatter block tokens (initial YAML block) (#25)
Browse files Browse the repository at this point in the history
The classic use case for sphinx is:

```markdown
---
orphan: true
---

This is an orphan document, not specified in any toctrees.
```
  • Loading branch information
chrisjsewell committed Feb 13, 2020
1 parent 4f5df00 commit ae6c3c3
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ For more information, also see the [CommonMark Spec](https://spec.commonmark.org

### Block Tokens

- **FrontMatter**: A YAML block at the start of the document enclosed by `---`
- **HTMLBlock**: Any valid HTML (rendered in HTML output only)
- **LineComment**: `% this is a comment`
- **BlockCode**: indented text (4 spaces)
Expand Down
15 changes: 15 additions & 0 deletions docs/syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ For more information, also see the [CommonMark Spec](https://spec.commonmark.org

### Block Tokens

- **FrontMatter**: A YAML block at the start of the document enclosed by `---`
- **HTMLBlock**: Any valid HTML (rendered in HTML output only)
- **LineComment**: `% this is a comment`
- **BlockCode**: indented text (4 spaces)
Expand Down Expand Up @@ -259,6 +260,20 @@ most beautiful mathematical formulas.
Here is some extra markdown syntax which provides functionality in rST that doesn't
exist in CommonMark.

## Front Matter

This is a YAML block at the start of the document, as used for example in [jekyll](https://jekyllrb.com/docs/front-matter/).
Sphinx intercepts this data and stores it within the global environment (as discussed [here](https://www.sphinx-doc.org/en/master/usage/restructuredtext/field-lists.html)), and a classic use-case is to specify 'orphan' documents,
that are not specified in any toctrees.

```markdown
---
orphan: true
---

This is an orphan document, not specified in any toctrees.
```

### Comments

You may add comments by putting the `%` character at the beginning of a line. This will
Expand Down
61 changes: 61 additions & 0 deletions myst_parser/block_tokens.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import re

import yaml

from mistletoe import block_token, span_token
import mistletoe.block_tokenizer as tokenizer

from mistletoe.block_token import ( # noqa: F401
tokenize,
HTMLBlock,
ThematicBreak,
List,
Expand All @@ -25,13 +28,70 @@
"List",
"Table",
"Footnote",
"FrontMatter",
"Paragraph",
]

# TODO add FieldList block token, see:
# https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#field-lists


class FrontMatter(block_token.BlockToken):
"""Front matter YAML block.
Not included in the parsing process, but called by Document.__init__.
"""

def __init__(self, lines):
assert lines and lines[0].startswith("---")
end_line = None
for i, line in enumerate(lines[1:]):
if line.startswith("---"):
end_line = i + 2
break
# TODO raise/report error if closing block not found
if end_line is None:
end_line = len(lines)
self.range = (0, end_line)
yaml_block = "\n".join(lines[1 : end_line - 1])
self.data = yaml.safe_load(yaml_block) or {}
self.children = []

@classmethod
def start(cls, line):
False

@classmethod
def read(cls, lines):
raise NotImplementedError()


class Document(block_token.BlockToken):
"""Document token."""

def __init__(self, lines):

self.footnotes = {}
block_token._root_node = self
span_token._root_node = self

if isinstance(lines, str):
lines = lines.splitlines(keepends=True)
lines = [line if line.endswith("\n") else "{}\n".format(line) for line in lines]
start_line = 0
self.children = []
if lines and lines[0].startswith("---"):
front_matter = FrontMatter(lines)
self.children.append(front_matter)
start_line = front_matter.range[1]
print(start_line)
lines = lines[start_line:]
self.children.extend(tokenize(lines, start_line))

span_token._root_node = None
block_token._root_node = None


class LineComment(block_token.BlockToken):
"""Line comment start with % """

Expand Down Expand Up @@ -119,6 +179,7 @@ def read(cls, lines):
while (
next_line is not None
and next_line.strip() != ""
# TODO transition checks should only be made on 'active' tokens
and not LineComment.start(next_line)
and not Heading.start(next_line)
and not CodeFence.start(next_line)
Expand Down
27 changes: 27 additions & 0 deletions myst_parser/docutils_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,33 @@ def render_document(self, token):
self.render_children(token)
return self.document

def render_front_matter(self, token):
"""Pass document front matter data
For RST, all field lists are captured by
``docutils.docutils.parsers.rst.states.Body.field_marker``,
then, if one occurs at the document, it is transformed by
`docutils.docutils.transforms.frontmatter.DocInfo`, and finally
this is intercepted by sphinx and added to the env in
`sphinx.environment.collectors.metadata.MetadataCollector.process_doc`
So technically the values should be parsed to AST, but this is redundant,
since `process_doc` just converts them back to text.
"""
# TODO this data could be used to support default option values for directives
docinfo = nodes.docinfo()
for key, value in token.data.items():
if not isinstance(value, (str, int, float)):
continue
value = str(value)
field_node = nodes.field()
field_node.source = value
field_node += nodes.field_name(key, "", nodes.Text(key, key))
field_node += nodes.field_body(value, nodes.Text(value, value))
docinfo += field_node
self.current_node.append(docinfo)

def render_paragraph(self, token):
if len(token.children) == 1 and isinstance(
token.children[0], myst_span_tokens.Target
Expand Down
2 changes: 1 addition & 1 deletion myst_parser/sphinx_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from docutils import frontend, parsers
from mistletoe import Document

from myst_parser.docutils_renderer import DocutilsRenderer
from myst_parser.block_tokens import Document


class MystParser(parsers.Parser):
Expand Down
5 changes: 5 additions & 0 deletions tests/sphinx/sourcedirs/basic/orphan.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
orphan: true
---

This is an orphan document, not specified in any toctrees.
6 changes: 5 additions & 1 deletion tests/test_docutils_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

import pytest

from mistletoe import Document
from mistletoe.block_token import tokenize
from mistletoe.span_token import tokenize_inner

from myst_parser.block_tokens import Document
from myst_parser.docutils_renderer import DocutilsRenderer


Expand Down Expand Up @@ -279,6 +279,10 @@ def test_footnote(renderer):
def test_full_run(sphinx_renderer, file_regression):
string = dedent(
"""\
---
a: 1
---
(target)=
# header 1
## sub header 1
Expand Down
6 changes: 6 additions & 0 deletions tests/test_docutils_renderer/test_full_run.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
<document source="">
<docinfo>
<field>
<field_name>
a
<field_body>
1
<target ids="target" names="target">
<section ids="header-1" names="header\ 1">
<title>
Expand Down

0 comments on commit ae6c3c3

Please sign in to comment.