Merge 01b7bb1 into 9d27dee

executablebooks · Mar 4, 2020 · 4d4fb2b · 4d4fb2b
2 parents 9d27dee + 01b7bb1
commit 4d4fb2b
Show file tree

Hide file tree

Showing 54 changed files with 698 additions and 23 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -118,6 +118,7 @@ def run_apidoc(app):
     ("py:class", "mistletoe.block_token.Footnote"),
     ("py:class", "mistletoe.block_token.Paragraph"),
     ("py:class", "mistletoe.block_token.ThematicBreak"),
+    ("py:class", "mistletoe.block_token.HTMLBlock"),
     ("py:class", "mistletoe.base_renderer.BaseRenderer"),
     ("py:class", "mistletoe.html_renderer.HTMLRenderer"),
     ("py:class", "mistletoe.span_token.SpanToken"),

diff --git a/docs/develop/contributing.md b/docs/develop/contributing.md
@@ -25,6 +25,9 @@ Optionally you can run `black` and `flake8` separately:
 
 Editors like VS Code also have automatic code reformat utilities, which can adhere to this standard.
 
+All functions and class methods should be annotated with types and include a docstring. The prefered docstring format is outlined in `MyST-Parser/docstring.fmt.mustache` and can be used automatically with the
+[autodocstring](https://marketplace.visualstudio.com/items?itemName=njpwerner.autodocstring) VS Code extension.
+
 ## Testing
 
 For code tests:

diff --git a/docs/using/use_api.md b/docs/using/use_api.md
@@ -35,7 +35,7 @@ Here's some *text*
 
 1. a list
 
-> a quote""")
+> a *quote*""")
 root
 ```
 
@@ -66,6 +66,34 @@ list_token.__dict__
 {'children': [MyST.ListItem(children=1)], 'loose': False, 'start': 1}
 ```
 
+You can also recursively traverse the syntax tree, yielding `TraverseResult`s that contain the element, its parent and depth from the source token:
+
+```python
+from pprint import pprint
+from myst_parser import traverse
+tree = [
+    (t.parent.__class__.__name__, t.node.__class__.__name__, t.depth)
+    for t in traverse(root)
+]
+pprint(tree)
+```
+
+```python
+[('Document', 'Paragraph', 1),
+ ('Document', 'List', 1),
+ ('Document', 'Quote', 1),
+ ('Paragraph', 'RawText', 2),
+ ('Paragraph', 'Emphasis', 2),
+ ('List', 'ListItem', 2),
+ ('Quote', 'Paragraph', 2),
+ ('Emphasis', 'RawText', 3),
+ ('ListItem', 'Paragraph', 3),
+ ('Paragraph', 'RawText', 3),
+ ('Paragraph', 'Emphasis', 3),
+ ('Paragraph', 'RawText', 4),
+ ('Emphasis', 'RawText', 4)]
+```
+
 ## AST Renderer
 
 The `myst_parser.ast_renderer.AstRenderer` converts a token to a nested dictionary representation.

diff --git a/myst_parser/__init__.py b/myst_parser/__init__.py
@@ -1,4 +1,6 @@
-__version__ = "0.2.0"
+from .utils import traverse  # noqa: F401
+
+__version__ = "0.3.0"
 
 
 def text_to_tokens(text: str):

diff --git a/myst_parser/block_tokens.py b/myst_parser/block_tokens.py
@@ -1,9 +1,11 @@
 import re
+from typing import List, Union
 
 from mistletoe import block_token, span_token
 import mistletoe.block_tokenizer as tokenizer
+from mistletoe.block_token import tokenize, Footnote  # noqa: F401
 
-from mistletoe.block_token import tokenize, HTMLBlock, Footnote, TableRow  # noqa: F401
+from myst_parser import traverse
 
 """
 Tokens to be included in the parsing process, in the order specified.
@@ -73,7 +75,22 @@ def __repr__(self):
 class Document(block_token.BlockToken):
     """Document token."""
 
-    def __init__(self, lines, start_line=0, inc_front_matter=True, store_lines=False):
+    def __init__(
+        self,
+        lines: Union[str, List[str]],
+        start_line: int = 0,
+        inc_front_matter: bool = True,
+        store_lines: bool = False,
+        propogate_range: bool = True,
+    ):
+        """Parse lines to a syntax token and its (recursive) children.
+
+        :param lines: string or list of strings
+        :param start_line: the initial line (used for nested parsing)
+        :param inc_front_matter: search for an initial YAML block front matter block
+        :param store_lines: store the lines on the token (as `token._lines`)
+        :param propogate_range: traverse the final syntax tree and add missing ranges
+        """
 
         self.footnotes = {}
         self._start_line = start_line
@@ -96,22 +113,48 @@ def __init__(self, lines, start_line=0, inc_front_matter=True, store_lines=False
             lines = lines[start_line:]
         self.children.extend(tokenize(lines, start_line))
 
+        if propogate_range:
+            # TODO this is a placeholder for implementing span level range storage
+            # (with start/end character attributes)
+            for result in traverse(self):
+                if not hasattr(result.node, "range"):
+                    try:
+                        result.node.range = result.parent.range
+                    except AttributeError:
+                        pass
+
         span_token._root_node = None
         block_token._root_node = None
 
     def __repr__(self):
         return "MyST.{}(blocks={})".format(self.__class__.__name__, len(self.children))
 
 
+class HTMLBlock(block_token.HTMLBlock):
+    """
+    Block-level HTML tokens.
+
+    Attributes:
+        content (str): literal strings rendered as-is.
+    """
+
+    # TODO range
+    def __repr__(self):
+        return "MyST.{}()".format(self.__class__.__name__)
+
+
 class LinkDefinition(Footnote):
-    """This has been renamed since, these actually refer to
+    """Link definition.
+
+    The constructor returns None, because the footnote information
+    is stored in Footnote.read.
+
+    Note: This has been renamed since, these actually refer to
     https://spec.commonmark.org/0.28/#link-reference-definitions,
     rather than what would generally be considered a footnote:
     https://www.markdownguide.org/extended-syntax/#footnotes
     """
 
-    pass
-
 
 class LineComment(block_token.BlockToken):
     """Line comment start with % """
@@ -411,7 +454,7 @@ def read(cls, lines):
 
     def __repr__(self):
         return "MyST.{}(range={},language={})".format(
-            self.__class__.__name__, self.range, self.language
+            self.__class__.__name__, self.range, self.language or "none"
         )
 
 
@@ -430,7 +473,8 @@ class CodeFence(block_token.CodeFence):
     def __init__(self, match):
         lines, open_info, self.range = match
         self.language = span_token.EscapeSequence.strip(open_info[2])
-        self.arguments = span_token.EscapeSequence.strip(open_info[3].splitlines()[0])
+        arg_lines = open_info[3].splitlines() or [""]
+        self.arguments = span_token.EscapeSequence.strip(arg_lines[0])
         self.children = (span_token.RawText("".join(lines)),)
 
     @classmethod
@@ -481,15 +525,21 @@ class Table(block_token.Table):
 
     def __init__(self, result):
         lines, self.range = result
+        # TODO why minimum of 3 `-`?
         if "---" in lines[1]:
             self.column_align = [
                 self.parse_align(column) for column in self.split_delimiter(lines[1])
             ]
-            self.header = TableRow(lines[0], self.column_align)
-            self.children = [TableRow(line, self.column_align) for line in lines[2:]]
+            self.header = TableRow(lines[0], self.range[0], self.column_align)
+            self.children = [
+                TableRow(line, self.range[0] + i, self.column_align)
+                for i, line in enumerate(lines[2:], 2)
+            ]
         else:
             self.column_align = [None]
-            self.children = [TableRow(line) for line in lines]
+            self.children = [
+                TableRow(line, self.range[0] + i) for i, line in enumerate(lines)
+            ]
 
     @staticmethod
     def read(lines):
@@ -498,8 +548,10 @@ def read(lines):
         line_buffer = [next(lines)]
         while lines.peek() is not None and "|" in lines.peek():
             line_buffer.append(next(lines))
+        # TODO why minimum of 3 `-`?
         if len(line_buffer) < 2 or "---" not in line_buffer[1]:
             lines.reset()
+            print("hi", line_buffer)
             return None
         return line_buffer, (start_line, lines.lineno)
 
@@ -509,7 +561,59 @@ def __repr__(self):
         )
 
 
+class TableRow(block_token.BlockToken):
+    """
+    Table row token.
+
+    Should only be called by Table.__init__().
+    """
+
+    def __init__(self, line, lineno, row_align=None):
+        self.range = [lineno, lineno]
+        self.row_align = row_align or [None]
+        cells = filter(None, line.strip().split("|"))
+        self.children = [
+            TableCell(cell.strip() if cell else "", lineno, align)
+            for cell, align in block_token.zip_longest(cells, self.row_align)
+        ]
+
+    def __repr__(self):
+        return "MyST.{}(range={},cells={})".format(
+            self.__class__.__name__, self.range, len(self.children)
+        )
+
+
+class TableCell(block_token.BlockToken):
+    """
+    Table cell token.
+    Boundary between span-level and block-level tokens.
+
+    Should only be called by TableRow.__init__().
+
+    Attributes:
+        align (bool): align option for current cell (default to None).
+        children (list): inner (span-)tokens.
+    """
+
+    def __init__(self, content, lineno, align=None):
+        self.align = align
+        self.range = [lineno, lineno]
+        super().__init__(content, span_token.tokenize_inner)
+
+    def __repr__(self):
+        return "MyST.{}(range={})".format(self.__class__.__name__, self.range)
+
+
 class List(block_token.List):
+    """
+    List token.
+
+    Attributes:
+        children (list): a list of ListItem tokens.
+        loose (bool): whether the list is loose.
+        start (NoneType or int): None if unordered, starting number if ordered.
+    """
+
     def __init__(self, matches):
         self.children = [ListItem(*match) for match in matches]
         self.loose = any(item.loose for item in self.children)