Skip to content

Commit

Permalink
Merge pull request #35 from NextThought/unicode_disallowed_tag
Browse files Browse the repository at this point in the history
Ensure disallowed tags include unicode empty str
  • Loading branch information
jzuech3 committed Sep 14, 2020
2 parents 64adaba + b3e1581 commit c5a21ea
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 3 deletions.
3 changes: 2 additions & 1 deletion CHANGES.rst
Expand Up @@ -5,7 +5,8 @@
1.6.1 (unreleased)
==================

- Nothing changed yet.
- Ensure disallowed tags nested within anchors do not raise.
See `issue 34 <https://github.com/NextThought/nti.contentfragments/issues/34>`.


1.6.0 (2020-09-02)
Expand Down
3 changes: 2 additions & 1 deletion src/nti/contentfragments/html.py
Expand Up @@ -185,6 +185,7 @@ def _in_anchor(self):
def __iter__(self):
for token in super(_SanitizerFilter, self).__iter__():
if token:
__traceback_info__ = token
token_type = token["type"]
if token_type == 'Characters' and not self._in_anchor:
for text_token in self._find_links_in_text(token):
Expand Down Expand Up @@ -267,7 +268,7 @@ def disallowed_token(self, token):

# Otherwise, don't escape the tag, simply drop the tag name, but
# preserve the contents.
token['data'] = ''
token['data'] = u''
token["type"] = "Characters"

del token["name"]
Expand Down
9 changes: 8 additions & 1 deletion src/nti/contentfragments/tests/test_html.py
Expand Up @@ -129,7 +129,6 @@ def test_sanitize_user_html_chat(self):
assert_that(frag_html._html_to_sanitized_text(plain_text),
is_(same_instance(plain_text)))


def test_sanitize_img(self):
html = '<html><body><img style="color: blue; text-align: left; max-width: 10px" href="foo"></body></html>'
exp = '<html><body><img href="foo" style="color: blue; text-align: left; max-width: 100%;" /></body></html>'
Expand Down Expand Up @@ -203,6 +202,14 @@ def test_nested_anchors(self):
'<a href="www.google.com">www.google.com</a></p></body></html>'
_check_sanitized(html, exp)

def test_disallowed(self):
html = '<div>'
_check_sanitized(html, u'')

def test_disallowed_within_anchor(self):
html = '<a href="www.nextthought.com"><div>test</div></a>'
_check_sanitized(html, u'<html><body><a href="www.nextthought.com">test</a></body></html>')


@contextlib.contextmanager
def _provide_utility(util):
Expand Down

0 comments on commit c5a21ea

Please sign in to comment.