Skip to content

Commit

Permalink
Merge 01b7bb1 into 9d27dee
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisjsewell committed Mar 4, 2020
2 parents 9d27dee + 01b7bb1 commit 4d4fb2b
Show file tree
Hide file tree
Showing 54 changed files with 698 additions and 23 deletions.
1 change: 1 addition & 0 deletions docs/conf.py
Expand Up @@ -118,6 +118,7 @@ def run_apidoc(app):
("py:class", "mistletoe.block_token.Footnote"),
("py:class", "mistletoe.block_token.Paragraph"),
("py:class", "mistletoe.block_token.ThematicBreak"),
("py:class", "mistletoe.block_token.HTMLBlock"),
("py:class", "mistletoe.base_renderer.BaseRenderer"),
("py:class", "mistletoe.html_renderer.HTMLRenderer"),
("py:class", "mistletoe.span_token.SpanToken"),
Expand Down
3 changes: 3 additions & 0 deletions docs/develop/contributing.md
Expand Up @@ -25,6 +25,9 @@ Optionally you can run `black` and `flake8` separately:

Editors like VS Code also have automatic code reformat utilities, which can adhere to this standard.

All functions and class methods should be annotated with types and include a docstring. The prefered docstring format is outlined in `MyST-Parser/docstring.fmt.mustache` and can be used automatically with the
[autodocstring](https://marketplace.visualstudio.com/items?itemName=njpwerner.autodocstring) VS Code extension.

## Testing

For code tests:
Expand Down
30 changes: 29 additions & 1 deletion docs/using/use_api.md
Expand Up @@ -35,7 +35,7 @@ Here's some *text*
1. a list
> a quote""")
> a *quote*""")
root
```

Expand Down Expand Up @@ -66,6 +66,34 @@ list_token.__dict__
{'children': [MyST.ListItem(children=1)], 'loose': False, 'start': 1}
```

You can also recursively traverse the syntax tree, yielding `TraverseResult`s that contain the element, its parent and depth from the source token:

```python
from pprint import pprint
from myst_parser import traverse
tree = [
(t.parent.__class__.__name__, t.node.__class__.__name__, t.depth)
for t in traverse(root)
]
pprint(tree)
```

```python
[('Document', 'Paragraph', 1),
('Document', 'List', 1),
('Document', 'Quote', 1),
('Paragraph', 'RawText', 2),
('Paragraph', 'Emphasis', 2),
('List', 'ListItem', 2),
('Quote', 'Paragraph', 2),
('Emphasis', 'RawText', 3),
('ListItem', 'Paragraph', 3),
('Paragraph', 'RawText', 3),
('Paragraph', 'Emphasis', 3),
('Paragraph', 'RawText', 4),
('Emphasis', 'RawText', 4)]
```

## AST Renderer

The `myst_parser.ast_renderer.AstRenderer` converts a token to a nested dictionary representation.
Expand Down
4 changes: 3 additions & 1 deletion myst_parser/__init__.py
@@ -1,4 +1,6 @@
__version__ = "0.2.0"
from .utils import traverse # noqa: F401

__version__ = "0.3.0"


def text_to_tokens(text: str):
Expand Down
124 changes: 114 additions & 10 deletions myst_parser/block_tokens.py
@@ -1,9 +1,11 @@
import re
from typing import List, Union

from mistletoe import block_token, span_token
import mistletoe.block_tokenizer as tokenizer
from mistletoe.block_token import tokenize, Footnote # noqa: F401

from mistletoe.block_token import tokenize, HTMLBlock, Footnote, TableRow # noqa: F401
from myst_parser import traverse

"""
Tokens to be included in the parsing process, in the order specified.
Expand Down Expand Up @@ -73,7 +75,22 @@ def __repr__(self):
class Document(block_token.BlockToken):
"""Document token."""

def __init__(self, lines, start_line=0, inc_front_matter=True, store_lines=False):
def __init__(
self,
lines: Union[str, List[str]],
start_line: int = 0,
inc_front_matter: bool = True,
store_lines: bool = False,
propogate_range: bool = True,
):
"""Parse lines to a syntax token and its (recursive) children.
:param lines: string or list of strings
:param start_line: the initial line (used for nested parsing)
:param inc_front_matter: search for an initial YAML block front matter block
:param store_lines: store the lines on the token (as `token._lines`)
:param propogate_range: traverse the final syntax tree and add missing ranges
"""

self.footnotes = {}
self._start_line = start_line
Expand All @@ -96,22 +113,48 @@ def __init__(self, lines, start_line=0, inc_front_matter=True, store_lines=False
lines = lines[start_line:]
self.children.extend(tokenize(lines, start_line))

if propogate_range:
# TODO this is a placeholder for implementing span level range storage
# (with start/end character attributes)
for result in traverse(self):
if not hasattr(result.node, "range"):
try:
result.node.range = result.parent.range
except AttributeError:
pass

span_token._root_node = None
block_token._root_node = None

def __repr__(self):
return "MyST.{}(blocks={})".format(self.__class__.__name__, len(self.children))


class HTMLBlock(block_token.HTMLBlock):
"""
Block-level HTML tokens.
Attributes:
content (str): literal strings rendered as-is.
"""

# TODO range
def __repr__(self):
return "MyST.{}()".format(self.__class__.__name__)


class LinkDefinition(Footnote):
"""This has been renamed since, these actually refer to
"""Link definition.
The constructor returns None, because the footnote information
is stored in Footnote.read.
Note: This has been renamed since, these actually refer to
https://spec.commonmark.org/0.28/#link-reference-definitions,
rather than what would generally be considered a footnote:
https://www.markdownguide.org/extended-syntax/#footnotes
"""

pass


class LineComment(block_token.BlockToken):
"""Line comment start with % """
Expand Down Expand Up @@ -411,7 +454,7 @@ def read(cls, lines):

def __repr__(self):
return "MyST.{}(range={},language={})".format(
self.__class__.__name__, self.range, self.language
self.__class__.__name__, self.range, self.language or "none"
)


Expand All @@ -430,7 +473,8 @@ class CodeFence(block_token.CodeFence):
def __init__(self, match):
lines, open_info, self.range = match
self.language = span_token.EscapeSequence.strip(open_info[2])
self.arguments = span_token.EscapeSequence.strip(open_info[3].splitlines()[0])
arg_lines = open_info[3].splitlines() or [""]
self.arguments = span_token.EscapeSequence.strip(arg_lines[0])
self.children = (span_token.RawText("".join(lines)),)

@classmethod
Expand Down Expand Up @@ -481,15 +525,21 @@ class Table(block_token.Table):

def __init__(self, result):
lines, self.range = result
# TODO why minimum of 3 `-`?
if "---" in lines[1]:
self.column_align = [
self.parse_align(column) for column in self.split_delimiter(lines[1])
]
self.header = TableRow(lines[0], self.column_align)
self.children = [TableRow(line, self.column_align) for line in lines[2:]]
self.header = TableRow(lines[0], self.range[0], self.column_align)
self.children = [
TableRow(line, self.range[0] + i, self.column_align)
for i, line in enumerate(lines[2:], 2)
]
else:
self.column_align = [None]
self.children = [TableRow(line) for line in lines]
self.children = [
TableRow(line, self.range[0] + i) for i, line in enumerate(lines)
]

@staticmethod
def read(lines):
Expand All @@ -498,8 +548,10 @@ def read(lines):
line_buffer = [next(lines)]
while lines.peek() is not None and "|" in lines.peek():
line_buffer.append(next(lines))
# TODO why minimum of 3 `-`?
if len(line_buffer) < 2 or "---" not in line_buffer[1]:
lines.reset()
print("hi", line_buffer)
return None
return line_buffer, (start_line, lines.lineno)

Expand All @@ -509,7 +561,59 @@ def __repr__(self):
)


class TableRow(block_token.BlockToken):
"""
Table row token.
Should only be called by Table.__init__().
"""

def __init__(self, line, lineno, row_align=None):
self.range = [lineno, lineno]
self.row_align = row_align or [None]
cells = filter(None, line.strip().split("|"))
self.children = [
TableCell(cell.strip() if cell else "", lineno, align)
for cell, align in block_token.zip_longest(cells, self.row_align)
]

def __repr__(self):
return "MyST.{}(range={},cells={})".format(
self.__class__.__name__, self.range, len(self.children)
)


class TableCell(block_token.BlockToken):
"""
Table cell token.
Boundary between span-level and block-level tokens.
Should only be called by TableRow.__init__().
Attributes:
align (bool): align option for current cell (default to None).
children (list): inner (span-)tokens.
"""

def __init__(self, content, lineno, align=None):
self.align = align
self.range = [lineno, lineno]
super().__init__(content, span_token.tokenize_inner)

def __repr__(self):
return "MyST.{}(range={})".format(self.__class__.__name__, self.range)


class List(block_token.List):
"""
List token.
Attributes:
children (list): a list of ListItem tokens.
loose (bool): whether the list is loose.
start (NoneType or int): None if unordered, starting number if ordered.
"""

def __init__(self, matches):
self.children = [ListItem(*match) for match in matches]
self.loose = any(item.loose for item in self.children)
Expand Down

0 comments on commit 4d4fb2b

Please sign in to comment.