Merge 12709bf into 573d24f

executablebooks · Mar 10, 2020 · c3e0653 · c3e0653
2 parents 573d24f + 12709bf
commit c3e0653
Show file tree

Hide file tree

Showing 17 changed files with 272 additions and 118 deletions.
diff --git a/contrib/jira_renderer.py b/contrib/jira_renderer.py
@@ -22,7 +22,10 @@
 #
 
 import html
+from typing import Optional
+
 from mistletoe import block_tokens
+from mistletoe.parse_context import ParseContext
 from mistletoe.renderers.base import BaseRenderer
 from mistletoe.renderers.html import HTMLRenderer
 
@@ -37,14 +40,24 @@ class JIRARenderer(BaseRenderer):
     default_block_tokens = HTMLRenderer.default_block_tokens
     default_span_tokens = HTMLRenderer.default_span_tokens
 
-    def __init__(self, find_blocks=None, find_spans=None):
+    def __init__(
+        self,
+        parse_context: Optional[ParseContext] = None,
+        as_standalone: bool = False,
+        add_css: str = None,
+    ):
         """Initialise the renderer
 
-        :param find_blocks: override the default block tokens (classes or class paths)
-        :param find_spans: override the default span tokens (classes or class paths)
+        :param parse_context: the parse context stores global parsing variables,
+            such as the block/span tokens to search for,
+            and link/footnote definitions that have been collected.
+            If None, a new context will be instatiated, with the default
+            block/span tokens for this renderer.
+            These will be re-instatiated on ``__enter__``.
+        :type parse_context: mistletoe.parse_context.ParseContext
         """
+        super().__init__(parse_context=parse_context)
         self.listTokens = []
-        super().__init__(find_blocks=find_blocks, find_spans=find_spans)
 
     def render_strong(self, token):
         template = "*{}*"
@@ -196,7 +209,6 @@ def render_html_block(token):
         return token.content
 
     def render_document(self, token):
-        self.link_definitions.update(token.link_definitions)
         return self.render_inner(token)
 
 

diff --git a/contrib/mathjax.py b/contrib/mathjax.py
@@ -41,4 +41,6 @@ def render_document(self, token):
         """
         Append CDN link for MathJax to the end of <body>.
         """
+        # TODO incompatible with as_standalone
+        self.as_standalone = False
         return super().render_document(token) + self.mathjax_src
diff --git a/contrib/pygments_renderer.py b/contrib/pygments_renderer.py
@@ -9,8 +9,8 @@ class PygmentsRenderer(HTMLRenderer):
     formatter = HtmlFormatter()
     formatter.noclasses = True
 
-    def __init__(self, find_blocks=None, find_spans=None, style="default"):
-        super().__init__(find_blocks=find_blocks, find_spans=find_spans)
+    def __init__(self, parse_context=None, style="default"):
+        super().__init__(parse_context=parse_context)
         self.formatter.style = get_style(style)
 
     def render_block_code(self, token):

diff --git a/contrib/scheme.py b/contrib/scheme.py
@@ -98,8 +98,8 @@ class Scheme(BaseRenderer):
     default_block_tokens = (Program,)
     default_span_tokens = (Expr, Number, String, Variable, Whitespace)
 
-    def __init__(self):
-        super().__init__()
+    def __init__(self, parse_context=None):
+        super().__init__(parse_context=None)
 
         self.env = ChainMap(
             {

diff --git a/contrib/toc_renderer.py b/contrib/toc_renderer.py
@@ -20,15 +20,8 @@ class TOCRenderer(HTMLRenderer):
         extras (list): allows subclasses to add even more custom tokens.
     """
 
-    def __init__(
-        self,
-        depth=5,
-        omit_title=True,
-        filter_conds=[],
-        find_blocks=None,
-        find_spans=None,
-    ):
-        super().__init__(find_blocks=find_blocks, find_spans=find_spans)
+    def __init__(self, depth=5, omit_title=True, filter_conds=[], parse_context=None):
+        super().__init__(parse_context=parse_context)
         self._headings = []
         self.depth = depth
         self.omit_title = omit_title

diff --git a/docs/api/index.rst b/docs/api/index.rst
@@ -1,3 +1,5 @@
+.. _api/main:
+
 mistletoe-EPB API
 =================
 

diff --git a/docs/index.md b/docs/index.md
@@ -38,6 +38,11 @@ which eventually, it is hoped, will be merged into mistletoe itself.
   to resolve ambiguities during parsing.
   Outputs are predictable and well-defined.
 
+* **Clear API**:
+  Documents can be built and assessed programatically,
+  in an object-orientated manner.
+  See {ref}`intro/api_use` and {ref}`api/main` for details.
+
 * **Extensible**:
   Strikethrough and tables are supported natively,
   and custom block-level and span-level tokens can easily be added.
@@ -79,8 +84,8 @@ caption: Contents
 ---
 using/intro.md
 using/develop.md
-using/contributing.md
 api/index.rst
+using/contributing.md
 ```
 
 [mistune]: https://github.com/lepture/mistune

diff --git a/docs/using/intro.md b/docs/using/intro.md
@@ -45,9 +45,9 @@ with open('foo.md', 'r') as fin:
 
 ```
 
-`mistletoe.markdown()` uses mistletoe's default settings: allowing HTML mixins
-and rendering to HTML. The function also accepts an additional argument
-`renderer`. To produce LaTeX output:
+{py:func}`mistletoe.markdown` defaults to
+using the {py:class}`~mistletoe.renderers.html.HTMLRenderer`,
+but other renderers can be chosen, such as the ones listed in {ref}`renderers/core`. To produce LaTeX output:
 
 ```python
 import mistletoe
@@ -57,37 +57,6 @@ with open('foo.md', 'r') as fin:
     rendered = mistletoe.markdown(fin, LaTeXRenderer)
 ```
 
-Finally, here's how you would manually specify tokens sets and a renderer
-for mistletoe. In the following example, we use `HTMLRenderer` to render
-the AST; first parsing only tokens that are strictly CommonMark compliant
-(see {ref}`block tokens <tokens/block>` and {ref}`span tokens <tokens/span>`),
-then including an extended token set (see {ref}`extended tokens <tokens/extension>`).
-
-```python
-from mistletoe import Document, HTMLRenderer, token_sets
-
-cmark_block_tokens = token_sets.get_commonmark_block_tokens()
-cmark_span_tokens = token_sets.get_commonmark_span_tokens()
-extended_block_tokens = token_sets.get_extended_block_tokens()
-extended_span_tokens = token_sets.get_extended_span_tokens()
-
-with open('foo.md', 'r') as fin:
-    rendered1 = mistletoe.markdown(
-        fin, renderer=HTMLRenderer,
-        find_blocks=cmark_block_tokens, find_spans=cmark_span_tokens
-    )
-
-    rendered2 = mistletoe.markdown(
-        fin, renderer=HTMLRenderer,
-        find_blocks=extended_block_tokens, find_spans=extended_span_tokens
-    )
-
-```
-
-```{seealso}
-{ref}`api/utils`
-```
-
 ### From the command-line
 
 pip installation enables mistletoe's command-line utility. Type the following
@@ -144,6 +113,133 @@ and some \textit{italics}
 >>>
 ```
 
+### Customise the parse
+
+To exert even greater control over the parsing process,
+renderers can be initialised with an existing {py:class}`~mistletoe.parse_context.ParseContext` instance.
+This class stores global variables that are utilised during the parsing process, such as such as the block/span tokens to search for,
+and link/footnote definitions that have been collected.
+At any one time, one of these objects is set per thread;
+set by {py:func}`~mistletoe.parse_context.set_parse_context` and
+retrieved by {py:func}`~mistletoe.parse_context.get_parse_context`.
+
+In the following example, we use the {py:class}`~mistletoe.renderers.html.HTMLRenderer` to parse a file:
+
+- first parsing only tokens that are strictly CommonMark compliant
+(see {ref}`block tokens <tokens/block>` and {ref}`span tokens <tokens/span>`), then
+- including an extended token set (see {ref}`extended tokens <tokens/extension>`).
+
+```python
+from mistletoe import Document, HTMLRenderer, ParseContext, token_sets
+
+commonmark_context = ParseContext(
+    find_blocks=token_sets.get_commonmark_block_tokens(),
+    find_spans=token_sets.get_commonmark_span_tokens(),
+)
+extended_context = ParseContext(
+    find_blocks=token_sets.get_extended_block_tokens(),
+    find_spans=token_sets.get_extended_span_tokens(),
+)
+
+with open('foo.md', 'r') as fin:
+    rendered1 = mistletoe.markdown(
+        fin, renderer=HTMLRenderer,
+        parse_context=commonmark_context
+    )
+
+    rendered2 = mistletoe.markdown(
+        fin, renderer=HTMLRenderer,
+        parse_context=extended_context
+    )
+
+```
+
+```{seealso}
+{ref}`api/utils`
+```
+
+(intro/api_use)=
+
+### Programmatic Use
+
+To parse the text only to the mistletoe AST, the general entry point is the {py:meth}`mistletoe.block_tokens.Document.read` method
+(athough actually all block tokens have a ``read`` method that can be used directly).
+
+```python
+from mistletoe import Document
+
+text = """
+Here's some *text*
+
+1. a list
+
+> a *quote*"""
+doc = Document.read(text)
+doc
+```
+
+```python
+Document(children=3, link_definitions=0, footnotes=0, footref_order=0, front_matter=None)
+```
+
+All tokens have a `children` attribute:
+
+```python
+doc.children
+```
+
+```python
+[Paragraph(children=2, position=(2, 2)),
+ List(children=1, loose=False, start_at=1, position=(3, 4)),
+ Quote(children=1, position=(6, 6))]
+```
+
+or you can walk through the entire syntax tree, using the
+{py:meth}`~mistletoe.base_elements.Token.walk` method:
+
+```python
+for item in doc.walk():
+    print(item)
+```
+
+```python
+WalkItem(node=Paragraph(children=2, position=(2, 2)), parent=Document(children=3, link_definitions=0, footnotes=0, footref_order=0, front_matter=None), index=0, depth=1)
+WalkItem(node=List(children=1, loose=False, start_at=1, position=(3, 4)), parent=Document(children=3, link_definitions=0, footnotes=0, footref_order=0, front_matter=None), index=1, depth=1)
+WalkItem(node=Quote(children=1, position=(6, 6)), parent=Document(children=3, link_definitions=0, footnotes=0, footref_order=0, front_matter=None), index=2, depth=1)
+WalkItem(node=RawText(), parent=Paragraph(children=2, position=(2, 2)), index=0, depth=2)
+WalkItem(node=Emphasis(children=1), parent=Paragraph(children=2, position=(2, 2)), index=1, depth=2)
+WalkItem(node=ListItem(children=1, loose=False, leader='1.', prepend=3, next_marker=None, position=(3, 4)), parent=List(children=1, loose=False, start_at=1, position=(3, 4)), index=0, depth=2)
+WalkItem(node=Paragraph(children=2, position=(7, 7)), parent=Quote(children=1, position=(6, 6)), index=0, depth=2)
+WalkItem(node=RawText(), parent=Emphasis(children=1), index=0, depth=3)
+WalkItem(node=Paragraph(children=1, position=(4, 4)), parent=ListItem(children=1, loose=False, leader='1.', prepend=3, next_marker=None, position=(3, 4)), index=0, depth=3)
+WalkItem(node=RawText(), parent=Paragraph(children=2, position=(7, 7)), index=0, depth=3)
+WalkItem(node=Emphasis(children=1), parent=Paragraph(children=2, position=(7, 7)), index=1, depth=3)
+WalkItem(node=RawText(), parent=Paragraph(children=1, position=(4, 4)), index=0, depth=4)
+WalkItem(node=RawText(), parent=Emphasis(children=1), index=0, depth=4)
+```
+
+Finally you could even build your own AST programatically!
+
+```python
+from mistletoe import block_tokens, span_tokens, HTMLRenderer
+
+doc = block_tokens.Document(children=[
+    block_tokens.Paragraph(
+        position=(0, 1),
+        children=[
+            span_tokens.Emphasis(
+                position=(0, 1),
+                children=[span_tokens.RawText("hallo")]
+            )
+    ])
+])
+HTMLRenderer().render(doc)
+```
+
+```html
+<p><em>hallo</em></p>
+```
+
 (intro/performance)=
 
 ## Performance

diff --git a/mistletoe/__init__.py b/mistletoe/__init__.py
@@ -15,18 +15,31 @@
 from mistletoe.block_tokens import Document
 from mistletoe.renderers.base import BaseRenderer  # noqa: F401
 from mistletoe.renderers.html import HTMLRenderer
+from mistletoe.parse_context import ParseContext  # noqa: F401
 
 
 def markdown(
-    iterable, renderer=HTMLRenderer, init_token=Document, read_kwargs=None, **kwargs
+    iterable,
+    renderer: BaseRenderer = HTMLRenderer,
+    parse_context=None,
+    init_token=Document,
+    read_kwargs=None,
+    **kwargs
 ):
     """
     Render text with a given renderer.
 
     :param iterable: string or list of strings
+    :param renderer: the renderer to use
+    :param parse_context: the parse context stores global parsing variables,
+        such as the block/span tokens to search for,
+        and link/footnote definitions that have been collected.
+        If None, a new context will be instatiated, with the default
+        block/span tokens for this renderer.
+    :type parse_context: mistletoe.parse_context.ParseContext
     :param init_token: The initial token to use for parsing the text `init_token.read`
     :param read_kwargs: key-word arguments to parse to the ``init_token.read`` method
     :param kwargs: key-word arguments to parse to the renderer initialisation
     """
-    with renderer(**kwargs) as renderer:
+    with renderer(parse_context=parse_context, **kwargs) as renderer:
         return renderer.render(init_token.read(iterable, **(read_kwargs or {})))
diff --git a/mistletoe/block_tokens.py b/mistletoe/block_tokens.py
@@ -85,7 +85,7 @@ def read(cls, lines):
 
 
 @autodoc
-@attr.s(slots=True, kw_only=True)
+@attr.s(slots=False, kw_only=True)
 class Document(BlockToken):
     """Document container."""
 
@@ -115,6 +115,8 @@ class Document(BlockToken):
     front_matter: Optional[FrontMatter] = attr.ib(
         default=None, metadata={"doc": "Front matter YAML block"}
     )
+    # TODO add is_nested parameter?
+    # or have a subclass of document specifically for nesting?
 
     @classmethod
     def read(