Skip to content

Commit 50fb9a8

Browse files
authored
Zendesk helpcenter components diff (#4)
* adds parsing zendesk article md components * rename refactor * adds zendesk components rendering * fix list comparison sdiff doesn't point the difference between ordered list vs. unordered list * comparision tests * tests fixtures * refactor: more accurate naming * style: fix linting * bump version
1 parent 6f74eb1 commit 50fb9a8

16 files changed

+383
-26
lines changed

sdiff/__init__.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
from .parser import parse
1+
from typing import Type
2+
3+
from .parser import parse, MdParser, ZendeskHelpMdParser # noqa
24
from .renderer import TextRenderer
35
from .compare import diff_struct, diff_links # noqa
46

57

6-
def diff(md1, md2, renderer=TextRenderer()):
7-
tree1 = parse(md1)
8-
tree2 = parse(md2)
8+
def diff(md1, md2, renderer=TextRenderer(), parser_cls: Type[MdParser] = MdParser):
9+
tree1 = parse(md1, parser_cls)
10+
tree2 = parse(md2, parser_cls)
911

1012
tree1, tree2, struct_errors = diff_struct(tree1, tree2)
1113
# tree1, tree2, links_errors = diff_links(tree1, tree2)

sdiff/model.py

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
1+
from abc import ABC
12
from enum import Enum
23

4+
import typing
5+
from typing import Union
6+
7+
if typing.TYPE_CHECKING:
8+
from sdiff.renderer import HtmlRenderer, TextRenderer # noqa
9+
310

411
class Symbols(Enum):
512
null = ''
@@ -14,12 +21,18 @@ class Symbols(Enum):
1421
new_line = 'n'
1522

1623

24+
class ZendeskArtSymbols(Enum):
25+
steps = 'S'
26+
tabs = 'T'
27+
callout = 'C'
28+
29+
1730
class Node(object):
1831
symbol = Symbols.null.value
1932
name = ''
2033

2134
def __init__(self, nodes=None):
22-
self.nodes = nodes or []
35+
self.nodes: typing.List[Node] = nodes or []
2336
self.meta = {}
2437

2538
def __str__(self):
@@ -110,6 +123,14 @@ def __init__(self, ordered, nodes=None):
110123
super().__init__(nodes)
111124
self.ordered = ordered
112125

126+
def __hash__(self):
127+
return hash((self.symbol, self.ordered))
128+
129+
def __eq__(self, other):
130+
if not isinstance(other, List):
131+
return False
132+
return self.ordered == other.ordered
133+
113134
def __repr__(self):
114135
return repr({'type': self.name, 'meta': self.meta, 'nodes': self.nodes, 'ordered': self.ordered})
115136

@@ -202,3 +223,58 @@ def __repr__(self):
202223

203224
def original(self, renderer):
204225
return renderer.render_node(self, u' \u00B6\n')
226+
227+
228+
class ZendeskHelpNode(Node, ABC):
229+
def wrap(self, content: str) -> str:
230+
return f'<{self.name}>\n\n{content}</{self.name}>\n'
231+
232+
def original(self, renderer: Union['HtmlRenderer', 'TextRenderer']) -> str:
233+
nested_content = ''.join(node.original(renderer) for node in self.nodes)
234+
result = self.wrap(nested_content)
235+
return renderer.render_node(self, result)
236+
237+
238+
class ZendeskHelpSteps(ZendeskHelpNode):
239+
symbol = ZendeskArtSymbols.steps.value
240+
name = 'steps'
241+
242+
def wrap(self, content: str) -> str:
243+
return f'<{self.name}>\n\n{content}</{self.name}>\n'
244+
245+
def original(self, renderer: Union['HtmlRenderer', 'TextRenderer']) -> str:
246+
nested_content = ''.join(node.original(renderer) for node in self.nodes)
247+
result = self.wrap(nested_content)
248+
return renderer.render_node(self, result)
249+
250+
251+
class ZendeskHelpTabs(ZendeskHelpNode):
252+
symbol = ZendeskArtSymbols.tabs.value
253+
name = 'tabs'
254+
255+
256+
class ZendeskHelpCallout(ZendeskHelpNode):
257+
symbol = ZendeskArtSymbols.callout.value
258+
name = 'callout'
259+
260+
def __init__(self, style: str = None, nodes: typing.List[Node] = None):
261+
super().__init__(nodes)
262+
self.style = style
263+
264+
def __repr__(self):
265+
return repr({'type': self.name, 'meta': self.meta, 'nodes': self.nodes, 'style': self.style})
266+
267+
def __hash__(self):
268+
return hash((self.name, self.style))
269+
270+
def __eq__(self, other):
271+
if not isinstance(other, ZendeskHelpCallout):
272+
return False
273+
return self.style == other.style
274+
275+
def wrap(self, content: str) -> str:
276+
if self.style:
277+
attr = f' {self.style}'
278+
else:
279+
attr = ''
280+
return f'<{self.name}{attr}>\n\n{content}</{self.name}>\n'

sdiff/parser.py

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Match, Type
2+
13
import mistune
24
import re
35

@@ -55,7 +57,7 @@ def parse_text(self, m):
5557
self.tokens.append(node)
5658

5759

58-
class BlockLexer(mistune.BlockLexer):
60+
class MdParser(mistune.BlockLexer):
5961
default_rules = [
6062
'newline', 'list_block', 'block_html',
6163
'heading', 'lheading',
@@ -67,6 +69,10 @@ class BlockLexer(mistune.BlockLexer):
6769
'hrule', 'list_block', 'text',
6870
)
6971

72+
@classmethod
73+
def get_lexer(cls):
74+
return cls()
75+
7076
def __init__(self):
7177
super().__init__()
7278
self.grammar_class.block_html = re.compile(
@@ -156,13 +162,48 @@ def _process_list_item(self, cap, bull):
156162
loose = _next
157163

158164
node = ListItem()
159-
block_lexer = BlockLexer()
165+
block_lexer = self.get_lexer()
160166
nodes = block_lexer.parse(item, self.list_rules)
161167
node.add_nodes(nodes)
162168
result.append(node)
163169
return result
164170

165171

172+
class ZendeskHelpMdParser(MdParser):
173+
TAG_CONTENT_GROUP = 'tag_content'
174+
TAG_PATTERN = r'^\s*(<{tag_name}{attr_re}>(?P<%s>[\s\S]+?)</{tag_name}>)\s*$' % TAG_CONTENT_GROUP
175+
CALLOUT_STYLE_GROUP = 'style'
176+
CALLOUT_ATTR_PATTERN = r'( (?P<%s>green|red|yellow))*' % CALLOUT_STYLE_GROUP
177+
178+
def __init__(self):
179+
super().__init__()
180+
self.grammar_class.callout = re.compile(self.TAG_PATTERN.format(tag_name='callout',
181+
attr_re=self.CALLOUT_ATTR_PATTERN))
182+
self.default_rules.insert(0, 'callout')
183+
184+
self.grammar_class.steps = re.compile(self.TAG_PATTERN.format(tag_name='steps', attr_re=''))
185+
self.default_rules.insert(0, 'steps')
186+
187+
self.grammar_class.tabs = re.compile(self.TAG_PATTERN.format(tag_name='tabs', attr_re=''))
188+
self.default_rules.insert(0, 'tabs')
189+
190+
def parse_callout(self, m: Match[str]) -> None:
191+
style = m.group(self.CALLOUT_STYLE_GROUP)
192+
self._parse_nested(ZendeskHelpCallout(style), m)
193+
194+
def parse_steps(self, m: Match[str]) -> None:
195+
self._parse_nested(ZendeskHelpSteps(), m)
196+
197+
def parse_tabs(self, m: Match[str]) -> None:
198+
self._parse_nested(ZendeskHelpTabs(), m)
199+
200+
def _parse_nested(self, node: Node, m: Match[str]) -> None:
201+
nested_content = m.group(self.TAG_CONTENT_GROUP)
202+
nested_nodes = self.get_lexer().parse(nested_content)
203+
node.add_nodes(nested_nodes)
204+
self.tokens.append(node)
205+
206+
166207
def _remove_spaces_from_empty_lines(text):
167208
return '\n'.join([re.sub(r'^( {1,}|\t{1,})$', '\n', line) for line in text.splitlines()])
168209

@@ -171,9 +212,9 @@ def _remove_ltr_rtl_marks(text):
171212
return re.sub(r'(\u200e|\u200f)', '', text)
172213

173214

174-
def parse(text):
215+
def parse(text, parser_cls: Type[MdParser] = MdParser):
175216
# HACK dirty hack to be consistent with Markdown list_block
176217
text = _remove_spaces_from_empty_lines(text)
177218
text = _remove_ltr_rtl_marks(text)
178-
block_lexer = BlockLexer()
219+
block_lexer = parser_cls()
179220
return Root(block_lexer.parse(text))

sdiff/renderer.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
from sdiff.model import Root, Node
2+
3+
14
class HtmlRenderer(object):
25

3-
def render(self, tree):
6+
def render(self, tree: Root):
47
result = tree.original(self)
58
return '<pre>\n%s\n</pre>' % result.strip()
69

@@ -14,9 +17,9 @@ def render_node(self, node, text):
1417

1518
class TextRenderer(object):
1619

17-
def render(self, tree):
20+
def render(self, tree: Root):
1821
result = tree.original(self)
1922
return result.strip()
2023

21-
def render_node(self, node, text):
24+
def render_node(self, node: Node, text):
2225
return text

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from setuptools import setup, find_packages
33

44

5-
version = '0.3.0'
5+
version = '0.4.0'
66

77

88
def read(f):
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<tabs>
2+
3+
# Hallo
4+
5+
<callout green>
6+
7+
1. ein
8+
2. zwei
9+
10+
</callout>
11+
12+
</tabs>
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<tabs>
2+
3+
# Hello
4+
5+
<callout red>
6+
7+
1. one
8+
2. two
9+
10+
</callout>
11+
12+
</tabs>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<steps>
2+
3+
* ein
4+
* zwei
5+
6+
</steps>
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<steps>
2+
3+
1. one
4+
2. two
5+
6+
</steps>
7+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<tabs>
2+
3+
# Hallo
4+
5+
<steps>
6+
7+
1. ein
8+
2. zwei
9+
10+
</steps>
11+
12+
# Zwei tab
13+
14+
content
15+
16+
</tabs>

0 commit comments

Comments
 (0)