Update to mistletoe-ebp==0.9.4a2 dependency (#110)

This update allows us to drop most of the span token patching code, and we now use the `Math` token directly from mistletoe. It also improves how token sets are instantiated within the renderers, making it a lot easier to swap in/out tokens to be included in the parse.
executablebooks · Mar 8, 2020 · 9122b52 · 9122b52
1 parent 40b4de4
commit 9122b52
Show file tree

Hide file tree

Showing 12 changed files with 179 additions and 270 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -15,5 +15,6 @@
     "python.linting.pylintEnabled": false,
     "python.linting.flake8Enabled": true,
     "python.linting.enabled": true,
-    "autoDocstring.customTemplatePath": "docstring.fmt.mustache"
+    "autoDocstring.customTemplatePath": "docstring.fmt.mustache",
+    "python.pythonPath": "/anaconda/envs/ebp/bin/python"
 }
diff --git a/docs/api/tokens.rst b/docs/api/tokens.rst
@@ -43,16 +43,6 @@ Role
     :exclude-members: __init__
 
 
-Math
-....
-
-.. autoclass:: myst_parser.span_tokens.Math
-    :members:
-    :no-undoc-members:
-    :show-inheritance:
-    :exclude-members: __init__
-
-
 Target
 ......
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -110,6 +110,7 @@ def run_apidoc(app):
 autodoc_member_order = "bysource"
 
 nitpick_ignore = [
+    ("py:class", "Any"),
     ("py:class", "Tuple"),
     ("py:class", "ForwardRef"),
     ("py:class", "NoneType"),

diff --git a/myst_parser/__init__.py b/myst_parser/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.4.1"
+__version__ = "0.5.0a1"
 
 
 def text_to_tokens(text: str):

diff --git a/myst_parser/block_tokens.py b/myst_parser/block_tokens.py
@@ -4,38 +4,9 @@
 import attr
 
 from mistletoe import block_tokens
-from mistletoe.block_tokens import (  # noqa: F401
-    FrontMatter,
-    HTMLBlock,
-    Heading,
-    LinkDefinition,
-    ThematicBreak,
-    Table,
-    TableRow,
-    BlockCode,
-    CodeFence,
-)
+from mistletoe.block_tokens import Heading, ThematicBreak, CodeFence
 from mistletoe.attr_doc import autodoc
 
-"""
-Tokens to be included in the parsing process, in the order specified.
-"""
-__all__ = [
-    "HTMLBlock",
-    "LineComment",
-    "BlockCode",
-    "Heading",
-    "Quote",
-    "CodeFence",
-    "ThematicBreak",
-    "BlockBreak",
-    "List",
-    "Table",
-    "LinkDefinition",
-    "Paragraph",
-    "FrontMatter",
-]
-
 
 @autodoc
 @attr.s(slots=True, kw_only=True)
@@ -69,10 +40,10 @@ def read(
             # TODO this is a placeholder for implementing span level range storage
             # (with start/end character attributes)
             for result in doc.walk():
-                if not hasattr(result.node, "position"):
+                if getattr(result.node, "position", None) is None:
                     try:
                         result.node.position = result.parent.position
-                    except AttributeError:
+                    except (AttributeError, TypeError):
                         raise
         return doc
 
@@ -157,7 +128,10 @@ def read(cls, lines):
 @autodoc
 @attr.s(slots=True, kw_only=True)
 class Quote(block_tokens.Quote):
-    """Quote token. (`["> # heading\\n", "> paragraph\\n"]`)."""
+    """Quote token. (`["> # heading\\n", "> paragraph\\n"]`).
+
+    MyST variant, that includes transitions to `LineComment` and `BlockBreak`.
+    """
 
     @classmethod
     def transition(cls, next_line):
@@ -179,6 +153,8 @@ class Paragraph(block_tokens.Paragraph):
     """Paragraph token. (`["some\\n", "continuous\\n", "lines\\n"]`)
 
     Boundary between span-level and block-level tokens.
+
+    MyST variant, that includes transitions to `LineComment` and `BlockBreak`.
     """
 
     @classmethod
@@ -197,7 +173,10 @@ def transition(cls, next_line):
 @autodoc
 @attr.s(slots=True, kw_only=True)
 class List(block_tokens.List):
-    """List token (unordered or ordered)"""
+    """List token (unordered or ordered)
+
+    MyST variant, that includes transitions to `LineComment` and `BlockBreak`.
+    """
 
     @classmethod
     def read(cls, lines):
@@ -244,6 +223,8 @@ class ListItem(block_tokens.ListItem):
     """List items.
 
     Not included in the parsing process, but called by List.
+
+    MyST variant, that includes transitions to `LineComment` and `BlockBreak`.
     """
 
     @staticmethod

diff --git a/myst_parser/docutils_renderer.py b/myst_parser/docutils_renderer.py
@@ -1,6 +1,5 @@
 from contextlib import contextmanager
 import copy
-from itertools import chain
 from os.path import splitext
 from pathlib import Path
 import re
@@ -19,12 +18,11 @@
 from docutils.utils import new_document, Reporter
 import yaml
 
-from mistletoe import span_tokens
+from mistletoe import block_tokens, block_tokens_ext, span_tokens, span_tokens_ext
 from mistletoe.renderers.base import BaseRenderer
 
-from myst_parser import span_tokens as myst_span_tokens
 from myst_parser import block_tokens as myst_block_tokens
-from mistletoe.parse_context import ParseContext, set_parse_context, tokens_from_module
+from myst_parser import span_tokens as myst_span_tokens
 from myst_parser.parse_directives import parse_directive_text, DirectiveParsingError
 from myst_parser.utils import escape_url
 
@@ -35,19 +33,52 @@ class DocutilsRenderer(BaseRenderer):
     Note this renderer has no dependencies on Sphinx.
     """
 
+    default_block_tokens = (
+        block_tokens.HTMLBlock,
+        myst_block_tokens.LineComment,
+        block_tokens.BlockCode,
+        block_tokens.Heading,
+        myst_block_tokens.Quote,
+        block_tokens.CodeFence,
+        block_tokens.ThematicBreak,
+        myst_block_tokens.BlockBreak,
+        myst_block_tokens.List,
+        block_tokens_ext.Table,
+        block_tokens.LinkDefinition,
+        myst_block_tokens.Paragraph,
+    )
+
+    default_span_tokens = (
+        span_tokens.EscapeSequence,
+        myst_span_tokens.Role,
+        span_tokens.HTMLSpan,
+        span_tokens.AutoLink,
+        myst_span_tokens.Target,
+        span_tokens.CoreTokens,
+        span_tokens_ext.Math,
+        # TODO there is no matching core element in docutils for strikethrough
+        # span_tokens_ext.Strikethrough,
+        span_tokens.InlineCode,
+        span_tokens.LineBreak,
+        span_tokens.RawText,
+    )
+
     def __init__(
         self,
         document: Optional[nodes.document] = None,
         current_node: Optional[nodes.Element] = None,
         config: Optional[dict] = None,
+        find_blocks=None,
+        find_spans=None,
     ):
         """Initialise the renderer.
 
         :param document: The document to populate (or create a new one if None)
         :param current_node: The root node from which to begin populating
             (default is document, or should be an ancestor of document)
         :param config: contains configuration specific to the rendering process
-
+        :param find_blocks: override the default block tokens (classes or class paths)
+        :param find_spans: override the default span tokens (classes or class paths)
         """
         self.config = config or {}
         self.document = document or self.new_document()  # type: nodes.document
@@ -57,20 +88,7 @@ def __init__(
         get_language(self.language_module)
         self._level_to_elem = {0: self.document}
 
-        super().__init__()
-
-        _myst_span_tokens = tokens_from_module(myst_span_tokens)
-        _myst_block_tokens = tokens_from_module(myst_block_tokens)
-
-        for token in chain(_myst_span_tokens, _myst_block_tokens):
-            render_func = getattr(self, self._cls_to_func(token.__name__))
-            self.render_map[token.__name__] = render_func
-
-        parse_context = ParseContext(
-            block_tokens=_myst_block_tokens, span_tokens=_myst_span_tokens
-        )
-        set_parse_context(parse_context)
-        self.parse_context = parse_context.copy()
+        super().__init__(find_blocks=find_blocks, find_spans=find_spans)
 
     def new_document(self, source_path="notset") -> nodes.document:
         settings = OptionParser(components=(RSTParser,)).get_default_values()
@@ -80,7 +98,7 @@ def add_line_and_source_path(self, node, token):
         """Copy the line number and document source path to the docutils node."""
         try:
             node.line = token.position[0] + 1
-        except AttributeError:
+        except (AttributeError, TypeError):
             pass
         node.source = self.document["source"]
 
@@ -356,7 +374,7 @@ def render_image(self, token):
         img_node["uri"] = token.src
 
         img_node["alt"] = ""
-        if token.children and isinstance(token.children[0], myst_span_tokens.RawText):
+        if token.children and isinstance(token.children[0], span_tokens.RawText):
             img_node["alt"] = token.children[0].content
             token.children[0].content = ""
 
@@ -451,7 +469,7 @@ def render_role(self, token):
         # TODO role name white/black lists
         try:
             lineno = token.position[0]
-        except AttributeError:
+        except (AttributeError, TypeError):
             lineno = 0
         inliner = MockInliner(self, lineno)
         role_func, messages = roles.role(

diff --git a/myst_parser/html_renderer.py b/myst_parser/html_renderer.py
@@ -1,52 +1,67 @@
-import html
-from itertools import chain
-import re
 from textwrap import dedent
 
-from mistletoe.parse_context import ParseContext, set_parse_context, tokens_from_module
+from mistletoe import block_tokens, block_tokens_ext, span_tokens, span_tokens_ext
 from mistletoe.renderers import html as html_renderer
 
-from myst_parser import span_tokens
-from myst_parser import block_tokens
+from myst_parser.block_tokens import LineComment, BlockBreak, Quote, Paragraph, List
+from myst_parser.span_tokens import Role, Target
 
 
 class HTMLRenderer(html_renderer.HTMLRenderer):
-    """This HTML render uses the same block/span tokens as the docutils renderer.
+    """This HTML render uses the uses the MyST spec block and span tokens.
 
     It is used to test compliance with the commonmark spec,
     and can be used for basic previews,
     but does not run roles/directives, resolve cross-references etc...
     """
 
-    def __init__(self, add_mathjax=False, as_standalone=False, add_css=None):
+    default_block_tokens = (
+        block_tokens.HTMLBlock,
+        LineComment,
+        block_tokens.BlockCode,
+        block_tokens.Heading,
+        Quote,
+        block_tokens.CodeFence,
+        block_tokens.ThematicBreak,
+        BlockBreak,
+        List,
+        block_tokens_ext.Table,
+        block_tokens.LinkDefinition,
+        Paragraph,
+    )
+
+    default_span_tokens = (
+        span_tokens.EscapeSequence,
+        Role,
+        span_tokens.HTMLSpan,
+        span_tokens.AutoLink,
+        Target,
+        span_tokens.CoreTokens,
+        span_tokens_ext.Math,
+        # TODO there is no matching core element in docutils for strikethrough
+        # span_tokens_ext.Strikethrough,
+        span_tokens.InlineCode,
+        span_tokens.LineBreak,
+        span_tokens.RawText,
+    )
+
+    def __init__(
+        self,
+        find_blocks=None,
+        find_spans=None,
+        add_mathjax=False,
+        as_standalone=False,
+        add_css=None,
+    ):
         """Intitalise HTML renderer
 
+        :param find_blocks: override the default block tokens (classes or class paths)
+        :param find_spans: override the default span tokens (classes or class paths)
         :param add_mathjax: add the mathjax CDN
         :param as_standalone: return the HTML body within a minmal HTML page
         :param add_css: if as_standalone=True, CSS to add to the header
         """
-        self._suppress_ptag_stack = [False]
-
-        super(html_renderer.HTMLRenderer, self).__init__()
-
-        myst_span_tokens = tokens_from_module(span_tokens)
-        myst_block_tokens = tokens_from_module(block_tokens)
-
-        for token in chain(myst_span_tokens, myst_block_tokens):
-            render_func = getattr(self, self._cls_to_func(token.__name__))
-            self.render_map[token.__name__] = render_func
-
-        parse_context = ParseContext(myst_block_tokens, myst_span_tokens)
-        set_parse_context(parse_context)
-        self.parse_context = parse_context.copy()
-
-        # html.entities.html5 includes entitydefs not ending with ';',
-        # CommonMark seems to hate them, so...
-        self._stdlib_charref = html._charref
-        _charref = re.compile(
-            r"&(#[0-9]+;" r"|#[xX][0-9a-fA-F]+;" r"|[^\t\n\f <&#;]{1,32};)"
-        )
-        html._charref = _charref
+        super().__init__(find_blocks=find_blocks, find_spans=find_spans)
 
         self.mathjax_src = ""
         if add_mathjax: