diff --git a/sdiff/__init__.py b/sdiff/__init__.py index 3835f9c..fed9ad1 100644 --- a/sdiff/__init__.py +++ b/sdiff/__init__.py @@ -1,11 +1,13 @@ -from .parser import parse +from typing import Type + +from .parser import parse, MdParser, ZendeskHelpMdParser # noqa from .renderer import TextRenderer from .compare import diff_struct, diff_links # noqa -def diff(md1, md2, renderer=TextRenderer()): - tree1 = parse(md1) - tree2 = parse(md2) +def diff(md1, md2, renderer=TextRenderer(), parser_cls: Type[MdParser] = MdParser): + tree1 = parse(md1, parser_cls) + tree2 = parse(md2, parser_cls) tree1, tree2, struct_errors = diff_struct(tree1, tree2) # tree1, tree2, links_errors = diff_links(tree1, tree2) diff --git a/sdiff/model.py b/sdiff/model.py index 03060f5..492c84b 100644 --- a/sdiff/model.py +++ b/sdiff/model.py @@ -1,5 +1,12 @@ +from abc import ABC from enum import Enum +import typing +from typing import Union + +if typing.TYPE_CHECKING: + from sdiff.renderer import HtmlRenderer, TextRenderer # noqa + class Symbols(Enum): null = '' @@ -14,12 +21,18 @@ class Symbols(Enum): new_line = 'n' +class ZendeskArtSymbols(Enum): + steps = 'S' + tabs = 'T' + callout = 'C' + + class Node(object): symbol = Symbols.null.value name = '' def __init__(self, nodes=None): - self.nodes = nodes or [] + self.nodes: typing.List[Node] = nodes or [] self.meta = {} def __str__(self): @@ -110,6 +123,14 @@ def __init__(self, ordered, nodes=None): super().__init__(nodes) self.ordered = ordered + def __hash__(self): + return hash((self.symbol, self.ordered)) + + def __eq__(self, other): + if not isinstance(other, List): + return False + return self.ordered == other.ordered + def __repr__(self): return repr({'type': self.name, 'meta': self.meta, 'nodes': self.nodes, 'ordered': self.ordered}) @@ -202,3 +223,58 @@ def __repr__(self): def original(self, renderer): return renderer.render_node(self, u' \u00B6\n') + + +class ZendeskHelpNode(Node, ABC): + def wrap(self, content: str) -> str: + return f'<{self.name}>\n\n{content}\n' + + def original(self, renderer: Union['HtmlRenderer', 'TextRenderer']) -> str: + nested_content = ''.join(node.original(renderer) for node in self.nodes) + result = self.wrap(nested_content) + return renderer.render_node(self, result) + + +class ZendeskHelpSteps(ZendeskHelpNode): + symbol = ZendeskArtSymbols.steps.value + name = 'steps' + + def wrap(self, content: str) -> str: + return f'<{self.name}>\n\n{content}\n' + + def original(self, renderer: Union['HtmlRenderer', 'TextRenderer']) -> str: + nested_content = ''.join(node.original(renderer) for node in self.nodes) + result = self.wrap(nested_content) + return renderer.render_node(self, result) + + +class ZendeskHelpTabs(ZendeskHelpNode): + symbol = ZendeskArtSymbols.tabs.value + name = 'tabs' + + +class ZendeskHelpCallout(ZendeskHelpNode): + symbol = ZendeskArtSymbols.callout.value + name = 'callout' + + def __init__(self, style: str = None, nodes: typing.List[Node] = None): + super().__init__(nodes) + self.style = style + + def __repr__(self): + return repr({'type': self.name, 'meta': self.meta, 'nodes': self.nodes, 'style': self.style}) + + def __hash__(self): + return hash((self.name, self.style)) + + def __eq__(self, other): + if not isinstance(other, ZendeskHelpCallout): + return False + return self.style == other.style + + def wrap(self, content: str) -> str: + if self.style: + attr = f' {self.style}' + else: + attr = '' + return f'<{self.name}{attr}>\n\n{content}\n' diff --git a/sdiff/parser.py b/sdiff/parser.py index 6a1e1c9..634a08c 100644 --- a/sdiff/parser.py +++ b/sdiff/parser.py @@ -1,3 +1,5 @@ +from typing import Match, Type + import mistune import re @@ -55,7 +57,7 @@ def parse_text(self, m): self.tokens.append(node) -class BlockLexer(mistune.BlockLexer): +class MdParser(mistune.BlockLexer): default_rules = [ 'newline', 'list_block', 'block_html', 'heading', 'lheading', @@ -67,6 +69,10 @@ class BlockLexer(mistune.BlockLexer): 'hrule', 'list_block', 'text', ) + @classmethod + def get_lexer(cls): + return cls() + def __init__(self): super().__init__() self.grammar_class.block_html = re.compile( @@ -156,13 +162,48 @@ def _process_list_item(self, cap, bull): loose = _next node = ListItem() - block_lexer = BlockLexer() + block_lexer = self.get_lexer() nodes = block_lexer.parse(item, self.list_rules) node.add_nodes(nodes) result.append(node) return result +class ZendeskHelpMdParser(MdParser): + TAG_CONTENT_GROUP = 'tag_content' + TAG_PATTERN = r'^\s*(<{tag_name}{attr_re}>(?P<%s>[\s\S]+?))\s*$' % TAG_CONTENT_GROUP + CALLOUT_STYLE_GROUP = 'style' + CALLOUT_ATTR_PATTERN = r'( (?P<%s>green|red|yellow))*' % CALLOUT_STYLE_GROUP + + def __init__(self): + super().__init__() + self.grammar_class.callout = re.compile(self.TAG_PATTERN.format(tag_name='callout', + attr_re=self.CALLOUT_ATTR_PATTERN)) + self.default_rules.insert(0, 'callout') + + self.grammar_class.steps = re.compile(self.TAG_PATTERN.format(tag_name='steps', attr_re='')) + self.default_rules.insert(0, 'steps') + + self.grammar_class.tabs = re.compile(self.TAG_PATTERN.format(tag_name='tabs', attr_re='')) + self.default_rules.insert(0, 'tabs') + + def parse_callout(self, m: Match[str]) -> None: + style = m.group(self.CALLOUT_STYLE_GROUP) + self._parse_nested(ZendeskHelpCallout(style), m) + + def parse_steps(self, m: Match[str]) -> None: + self._parse_nested(ZendeskHelpSteps(), m) + + def parse_tabs(self, m: Match[str]) -> None: + self._parse_nested(ZendeskHelpTabs(), m) + + def _parse_nested(self, node: Node, m: Match[str]) -> None: + nested_content = m.group(self.TAG_CONTENT_GROUP) + nested_nodes = self.get_lexer().parse(nested_content) + node.add_nodes(nested_nodes) + self.tokens.append(node) + + def _remove_spaces_from_empty_lines(text): return '\n'.join([re.sub(r'^( {1,}|\t{1,})$', '\n', line) for line in text.splitlines()]) @@ -171,9 +212,9 @@ def _remove_ltr_rtl_marks(text): return re.sub(r'(\u200e|\u200f)', '', text) -def parse(text): +def parse(text, parser_cls: Type[MdParser] = MdParser): # HACK dirty hack to be consistent with Markdown list_block text = _remove_spaces_from_empty_lines(text) text = _remove_ltr_rtl_marks(text) - block_lexer = BlockLexer() + block_lexer = parser_cls() return Root(block_lexer.parse(text)) diff --git a/sdiff/renderer.py b/sdiff/renderer.py index b27e260..4adba59 100644 --- a/sdiff/renderer.py +++ b/sdiff/renderer.py @@ -1,6 +1,9 @@ +from sdiff.model import Root, Node + + class HtmlRenderer(object): - def render(self, tree): + def render(self, tree: Root): result = tree.original(self) return '
\n%s\n
' % result.strip() @@ -14,9 +17,9 @@ def render_node(self, node, text): class TextRenderer(object): - def render(self, tree): + def render(self, tree: Root): result = tree.original(self) return result.strip() - def render_node(self, node, text): + def render_node(self, node: Node, text): return text diff --git a/setup.py b/setup.py index 341372d..cc60c1c 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages -version = '0.3.0' +version = '0.4.0' def read(f): diff --git a/tests/fixtures/different/zendesk_callout_in_tabs.de.md b/tests/fixtures/different/zendesk_callout_in_tabs.de.md new file mode 100644 index 0000000..d3519f4 --- /dev/null +++ b/tests/fixtures/different/zendesk_callout_in_tabs.de.md @@ -0,0 +1,12 @@ + + +# Hallo + + + +1. ein +2. zwei + + + + \ No newline at end of file diff --git a/tests/fixtures/different/zendesk_callout_in_tabs.en.md b/tests/fixtures/different/zendesk_callout_in_tabs.en.md new file mode 100644 index 0000000..9a15f31 --- /dev/null +++ b/tests/fixtures/different/zendesk_callout_in_tabs.en.md @@ -0,0 +1,12 @@ + + +# Hello + + + +1. one +2. two + + + + \ No newline at end of file diff --git a/tests/fixtures/different/zendesk_steps.de.md b/tests/fixtures/different/zendesk_steps.de.md new file mode 100644 index 0000000..7771550 --- /dev/null +++ b/tests/fixtures/different/zendesk_steps.de.md @@ -0,0 +1,6 @@ + + +* ein +* zwei + + diff --git a/tests/fixtures/different/zendesk_steps.en.md b/tests/fixtures/different/zendesk_steps.en.md new file mode 100644 index 0000000..c9a64d6 --- /dev/null +++ b/tests/fixtures/different/zendesk_steps.en.md @@ -0,0 +1,7 @@ + + +1. one +2. two + + + diff --git a/tests/fixtures/same/zendesk_steps_in_tabs.de.md b/tests/fixtures/same/zendesk_steps_in_tabs.de.md new file mode 100644 index 0000000..14425f2 --- /dev/null +++ b/tests/fixtures/same/zendesk_steps_in_tabs.de.md @@ -0,0 +1,16 @@ + + +# Hallo + + + +1. ein +2. zwei + + + +# Zwei tab + +content + + \ No newline at end of file diff --git a/tests/fixtures/same/zendesk_steps_in_tabs.en.md b/tests/fixtures/same/zendesk_steps_in_tabs.en.md new file mode 100644 index 0000000..66f8b2b --- /dev/null +++ b/tests/fixtures/same/zendesk_steps_in_tabs.en.md @@ -0,0 +1,16 @@ + + +# Steps tab + + + +1. one +2. two + + + +# Second tab + +content + + \ No newline at end of file diff --git a/tests/fixtures/trees.py b/tests/fixtures/trees.py index 0188a62..9fdea43 100644 --- a/tests/fixtures/trees.py +++ b/tests/fixtures/trees.py @@ -29,9 +29,9 @@ def r4t(): ]) -def lm2tm2t(): +def lm2tm2t(ordered=False): return Root([ - List([ + List(ordered, [ ListItem([ Header(2, [ Text('dummy text') @@ -93,3 +93,44 @@ def pta2t(): Text('heading') ]) ]) + + +def Slmtmt(): # noqa + return Root([ + ZendeskHelpSteps([ + List(True, [ + ListItem([ + Text('one') + ]), + ListItem([ + Text('two') + ]) + ]) + ]) + ]) + + +def T1tpt(): # noqa + return Root([ + ZendeskHelpTabs([ + Header(1, [ + Text('tab title') + ]), + Paragraph([ + Text('tab content') + ]) + ]) + ]) + + +def C1tpt(style=None): # noqa + return Root([ + ZendeskHelpCallout(style, [ + Header(1, [ + Text('callout title') + ]), + Paragraph([ + Text('callout content') + ]) + ]) + ]) diff --git a/tests/test_compare.py b/tests/test_compare.py index 9486637..f083a2f 100644 --- a/tests/test_compare.py +++ b/tests/test_compare.py @@ -1,6 +1,7 @@ from unittest import TestCase from sdiff.compare import diff_links, diff_struct +from sdiff.model import List from .fixtures import trees @@ -61,3 +62,17 @@ def test_missing_new_line(self): _, _, errors = diff_struct(trees.pt(), trees.ptnt()) self.assertEqual('n', errors[0].node.symbol) self.assertEqual('text', errors[1].node.text) + + def test_different_lists(self): + unordered = trees.lm2tm2t(False) + ordered = trees.lm2tm2t(True) + _, _, errors = diff_struct(unordered, ordered) + + with self.subTest('missing unordered list'): + actual = errors[0].node + self.assertEqual(actual, List(ordered=False)) + self.assertEqual(actual.meta.get('style'), 'del') + with self.subTest('additional ordered list'): + actual = errors[1].node + self.assertEqual(actual, List(ordered=True)) + self.assertEqual(actual.meta.get('style'), 'ins') diff --git a/tests/test_parser.py b/tests/test_parser.py index 982e16c..498c070 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,13 +1,22 @@ from unittest import TestCase -from sdiff import parser +from sdiff import parser, MdParser, ZendeskHelpMdParser +from sdiff.model import ZendeskHelpSteps -class TestParser(TestCase): +class ParserTestCase(TestCase): + def setUp(self) -> None: + super().setUp() + self.parser_cls = MdParser - def _run_and_assert(self, data, expected): - actual = parser.parse(data).print_all() + def _run_and_assert(self, data: str, expected: str): + actual = parser.parse(data, parser_cls=self.parser_cls).print_all() self.assertEqual(expected, actual) + def _parse(self, data: str): + return parser.parse(data, parser_cls=self.parser_cls) + + +class TestParser(ParserTestCase): def test_empty(self): self._run_and_assert('', '') @@ -36,7 +45,7 @@ def test_exclamation_mark(self): self._run_and_assert('Danke!', 'pt') def test_escape_html(self): - actual = parser.parse('text') + actual = self._parse('text') self.assertEqual('<sub>text</sub>', actual.nodes[0].nodes[0].text) def test_ignore_single_space(self): @@ -49,17 +58,98 @@ def test_space_new_line_saparated_as_single_text(self): self._run_and_assert(' \n', 'xpt') def test_lheading_text(self): - actual = parser.parse('heading\n=============') + actual = self._parse('heading\n=============') self.assertEqual('heading', actual.nodes[0].nodes[0].text) def test_heading_text(self): - actual = parser.parse('###heading') + actual = self._parse('### heading') self.assertEqual('heading', actual.nodes[0].nodes[0].text) def test_link_wrapped_in_text(self): self._run_and_assert('some text [link](url) new text', 'ptat') +class TestZendeskParser(ParserTestCase): + def setUp(self) -> None: + super().setUp() + self.parser_cls = ZendeskHelpMdParser + + def test_callout(self): + fixture = """ + + # title + content + + """ + self._run_and_assert(fixture, 'C1tpt') + + def test_callout_style(self): + fixture = """ + + # title + content + + """ + actual = self._parse(fixture) + self.assertEqual(actual.nodes[0].style, 'green') + + def test_callout_invalid_style(self): + fixture = """ + + # title + content + + """ + actual = self._parse(fixture) + self.assertNotEqual(actual.nodes[0].name, 'callout') + + def test_tabs(self): + fixture = """ + + # title 1 + content 1 + # title 2 + content 2 + + """ + self._run_and_assert(fixture, 'T1tpt1tpt') + + def test_steps(self): + steps_fixture = """ + + 1. one + 2. two + 3. tri + + """ + with self.subTest('happy path'): + self._run_and_assert(steps_fixture, 'Slmtmtmt') + with self.subTest('nested in tabs'): + fixture = """ + + # title 1 + content 1 + # title 2 + %s + + """ % steps_fixture + self._run_and_assert(fixture, 'T1tpt1tSlmtmtmt') + + def test_invalid_closing_tag(self): + fixture = """ + + 1. one + + """ + actual = self._parse(fixture) + self.assertNotEqual(actual.nodes[0], ZendeskHelpSteps()) + + def test_parses_with_invalid_formatting(self): + fixture = '1. one' + actual = self._parse(fixture) + self.assertEqual(actual.nodes[0], ZendeskHelpSteps()) + + class TestReplaceLines(TestCase): def test_single_empty_line(self): diff --git a/tests/test_renderer.py b/tests/test_renderer.py index 33e0d3d..4668b0a 100644 --- a/tests/test_renderer.py +++ b/tests/test_renderer.py @@ -22,6 +22,22 @@ def test_several_elements(self): actual = self.renderer.render(trees.pta2t()) self.assertEqual('test link\n\n##heading', actual) + def test_zendesk_steps(self): + actual = self.renderer.render(trees.Slmtmt()) + self.assertEqual('\n\n0. one\n1. two\n\n\n', actual) + + def test_zendesk_tabs(self): + actual = self.renderer.render(trees.T1tpt()) + self.assertEqual('\n\n#tab title\n\ntab content\n\n', actual) + + def test_zendesk_callout(self): + actual = self.renderer.render(trees.C1tpt()) + self.assertEqual('\n\n#callout title\n\ncallout content\n\n', actual) + + def test_zendesk_callout_styled(self): + actual = self.renderer.render(trees.C1tpt(style='awesome')) + self.assertEqual('\n\n#callout title\n\ncallout content\n\n', actual) + class TestHtmlRenderer(TestCase): diff --git a/tests/test_sdiff.py b/tests/test_sdiff.py index 27e56d9..8179659 100644 --- a/tests/test_sdiff.py +++ b/tests/test_sdiff.py @@ -4,6 +4,8 @@ import sdiff from pathlib import Path +from sdiff import ZendeskHelpMdParser + def _load_fixture(*path): return open(os.path.join('tests/fixtures', *path)).read() @@ -22,13 +24,15 @@ def test_same(self): for case in cases: with self.subTest(case=case): path1, path2 = case - _, _, errors = sdiff.diff(_load_fixture('same', path1), _load_fixture('same', path2)) - self.assertEqual([], errors) + _, _, errors = sdiff.diff(_load_fixture('same', path1), _load_fixture('same', path2), + parser_cls=ZendeskHelpMdParser) + self.assertEqual([], errors, msg=case) def test_different(self): cases = _read_test_files('different') for case in cases: with self.subTest(case=case): path1, path2 = case - _, _, errors = sdiff.diff(_load_fixture('different', path1), _load_fixture('different', path2)) - self.assertNotEqual([], errors) + _, _, errors = sdiff.diff(_load_fixture('different', path1), _load_fixture('different', path2), + parser_cls=ZendeskHelpMdParser) + self.assertNotEqual([], errors, msg=case)