Skip to content

Commit

Permalink
Merge pull request #8 from PolicyStat/issue_8
Browse files Browse the repository at this point in the history
If a 100% change of text in TD breaks the table layout
  • Loading branch information
jlward committed Jan 19, 2015
2 parents 432096f + 57aa2f2 commit f2f1a68
Show file tree
Hide file tree
Showing 5 changed files with 399 additions and 4 deletions.
50 changes: 47 additions & 3 deletions htmltreediff/html.py
@@ -1,11 +1,13 @@
from htmltreediff.util import (
check_text_similarity,
is_element,
minidom_tostring,
parse_minidom,
parse_text,
minidom_tostring,
remove_node,
unwrap,
wrap_inner,
remove_node,
check_text_similarity,
wrap_nodes,
)
from htmltreediff.changes import dom_diff, distribute

Expand Down Expand Up @@ -46,10 +48,50 @@ def diff(old_html, new_html, cutoff=0.0, plaintext=False, pretty=False):
return minidom_tostring(dom, pretty=pretty)


def _internalize_changes_markup(dom, child_tag_names):
# Delete tags are always ordered first.
for del_tag in list(dom.getElementsByTagName('del')):
ins_tag = del_tag.nextSibling
# The one child tag of `del_tag` should be child_tag_names
if len(del_tag.childNodes) != 1:
continue
if ins_tag is None or len(ins_tag.childNodes) != 1:
continue
if ins_tag.tagName != 'ins':
continue
deleted_tag = del_tag.firstChild
if not is_element(deleted_tag):
continue
if deleted_tag.tagName not in child_tag_names:
continue
# The one child tag of `ins_tag` should be child_tag_names
inserted_tag = ins_tag.firstChild
if not is_element(inserted_tag):
continue
if inserted_tag.tagName not in child_tag_names:
continue

attributes = dict(
[key, value] for key, value in
inserted_tag.attributes.items()
)
nodes_to_unwrap = [
deleted_tag,
inserted_tag,
]
for n in nodes_to_unwrap:
unwrap(n)
new_node = wrap_nodes([del_tag, ins_tag], inserted_tag.tagName)
for key, value in attributes.items():
new_node.setAttribute(key, value)


def fix_lists(dom):
# <ins> and <del> tags are not allowed within <ul> or <ol> tags.
# Move them to the nearest li, so that the numbering isn't interrupted.

_internalize_changes_markup(dom, set(['li']))

# Find all del > li and ins > li sets.
del_tags = set()
ins_tags = set()
Expand All @@ -73,6 +115,8 @@ def fix_lists(dom):


def fix_tables(dom):
_internalize_changes_markup(dom, set(['td', 'th']))

# Show table row insertions
tags = set()
for node in list(dom.getElementsByTagName('tr')):
Expand Down
152 changes: 152 additions & 0 deletions htmltreediff/test_html.py
Expand Up @@ -374,6 +374,158 @@ def test_fix_lists():
</ol>
'''
),
(
'LI full content change does not add another LI',
'''
<ol>
<del>
<li>AAA</li>
</del>
<ins>
<li>BBB</li>
</ins>
</ol>
''',
'''
<ol>
<li><del>AAA</del><ins>BBB</ins></li>
</ol>
'''
),
(
'LI full content change keeps attrs',
'''
<ol>
<del>
<li class="old" id="foo">AAA</li>
</del>
<ins>
<li class="new">BBB</li>
</ins>
</ol>
''',
'''
<ol>
<li class="new"><del>AAA</del><ins>BBB</ins></li>
</ol>
'''
),
(
'LI changes markup internalization fix not done if next tag is not an insert', # noqa
'''
<ol>
<del>
<li>AAA</li>
</del>
<li><strong>BBB</strong></li>
<ins>
<li>CCC</li>
</ins>
</ol>
''',
'''
<ol>
<li class="del-li">
<del>AAA</del>
</li>
<li><strong>BBB</strong></li>
<li><ins>CCC</ins></li>
</ol>
''',
),
(
'LI changes markup internalization fix not done if next tag is not an insert', # noqa
'''
<ol>
<del>
<li>AAA</li>
</del>
<li><strong>BBB</strong></li>
<ins>
<li>CCC</li>
</ins>
</ol>
''',
'''
<ol>
<li class="del-li">
<del>AAA</del>
</li>
<li><strong>BBB</strong></li>
<li><ins>CCC</ins></li>
</ol>
''',
),
(
'LI after del must be ins',
'''
<ol>
<del>
<li>AAA</li>
</del>
<del>
<li>BBB</li>
</del>
<ins>
<li>CCC</li>
</ins>
</ol>
''',
'''
<ol>
<li class="del-li">
<del>AAA</del>
</li>
<li><del>BBB</del><ins>CCC</ins></li>
</ol>
''',
),
(
'LI changes markup internalization fix not performed if next tags child is not li', # noqa
'''
<ol>
<del>
<li>AAA</li>
</del>
<ins>
<foo>BBB</foo>
</ins>
</ol>
''',
'''
<ol>
<li class="del-li">
<del>AAA</del>
</li>
<ins>
<foo>BBB</foo>
</ins>
</ol>
''',
),
(
'LI changes markup internalization fix not performed if next tags is text', # noqa
'''
<ol>
<del>
<li>AAA</li>
</del>
<ins>
BBB
</ins>
</ol>
''',
'''
<ol>
<li class="del-li">
<del>AAA</del>
</li>
<ins>
BBB
</ins>
</ol>
''',
),
]
for test_name, changes, fixed_changes in cases:
changes = collapse(changes)
Expand Down

0 comments on commit f2f1a68

Please sign in to comment.