Skip to content

Commit

Permalink
In linkify(), stop scanning when an RFC 1738-specified unsafe char is…
Browse files Browse the repository at this point in the history
… encountered in the path portion of the URL.

This keeps URLs from including garbage that happens to be sitting next to them, e.g. www.fred.com/stuff{don't include this}.
  • Loading branch information
erikrose committed Oct 11, 2010
1 parent d1b2a25 commit 002bcba
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
4 changes: 3 additions & 1 deletion bleach/__init__.py
Expand Up @@ -48,7 +48,9 @@

url_re = re.compile(r"""\b(?:[\w-]+:/{0,3})? # http://
(?<!@)([\w-]+\.)+(?:%s) # xx.yy.tld
(?:[/?]\S*)? # /path/zz
(?:[/?][^\s\{\}\|\\\^\[\]`<>"\x80-\xFF\x00-\x1F\x7F]*)?
# /path/zz (excluding "unsafe" chars from RFC 1738,
# except for # and ~, which happen in practice)
\b # Break at a word boundary.
""" % u'|'.join(TLDS),
re.VERBOSE)
Expand Down
7 changes: 7 additions & 0 deletions bleach/tests/test_links.py
Expand Up @@ -134,3 +134,10 @@ def test_javascript_url():
"""javascript: urls should never be linkified."""
s = 'javascript:document.vulnerable'
eq_(s, b.linkify(s))


def test_unsafe_url():
"""Any unsafe char ({}[]<>, etc.) in the path should end URL scanning."""
eq_('All your{"<a href="http://xx.yy.com/grover.png" '
'rel="nofollow">xx.yy.com/grover.png</a>"}base are',
b.linkify('All your{"xx.yy.com/grover.png"}base are'))

0 comments on commit 002bcba

Please sign in to comment.