Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IDN support to colander.url #352

Merged
merged 6 commits into from Jan 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.rst
@@ -1,3 +1,8 @@
1.8.4 (Unreleased)
==================

- Add IDN support to ``colander.url``.

1.8.3 (2020-11-28)
==================

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Expand Up @@ -16,6 +16,7 @@ exclude = '''
| dist
| build
| docs
| env.*
)/
'''

Expand Down
67 changes: 47 additions & 20 deletions src/colander/__init__.py
Expand Up @@ -607,26 +607,53 @@ def _luhnok(value):
return checksum


# Gingerly lifted from Django 1.3.x:
# https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
# <3 y'all!
URL_REGEX = (
# {http,ftp}s:// (not required)
r'^((?:http|ftp)s?://)?'
# Domain
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+'
r'(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|'
# Localhost
r'localhost|'
# IPv6 address
r'\[[a-f0-9:]+\]|'
# IPv4 address
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
# Optional port
r'(?::\d+)?'
# Path
r'(?:/?|[/?]\S+)$'
)
# port from Django 4.1.x:
# https://github.com/django/django/blob/stable/4.1.x/django/core/validators.py#L70


def _make_url_regex_src():
ul = u"\u00a1-\uffff" # Unicode letters range (must not be a raw string).

# IP patterns
ipv4_re = (
r"(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)"
r"(?:\.(?:0|25[0-5]|2[0-4][0-9]|1[0-9]?[0-9]?|[1-9][0-9]?)){3}"
)
ipv6_re = r"\[[0-9a-f:.]+\]" # (simple regex, validated later)

# Host patterns
hostname_re = (
(r"[a-z" + ul + r"0-9]")
+ r"(?:"
+ (r"[a-z" + ul + r"0-9-]{0,61}")
+ (r"[a-z" + ul + r"0-9]")
+ r")?"
)
# Max length for domain name labels is 63 characters per RFC 1034 sec. 3.1
domain_re = r"(?:\.(?!-)[a-z" + ul + r"0-9-]{1,63}(?<!-))*"
tld_re = (
r"\." # dot
r"(?!-)" # can't start with a dash
r"(?:[a-z" + ul + "-]{2,63}" # domain label
r"|xn--[a-z0-9]{1,59})" # or punycode label
r"(?<!-)" # can't end with a dash
r"\.?" # may have a trailing dot
)
host_re = "(" + hostname_re + domain_re + tld_re + "|localhost)"

return (
# {http,ftp}s:// (not required)
r"^((?:http|ftp)s?://)?"
r"(?:[^\s:@/]+(?::[^\s:@/]*)?@)?" # user:pass authentication
r"(?:" + ipv4_re + "|" + ipv6_re + "|" + host_re + ")"
r"(?::[0-9]{1,5})?" # port
r"(?:[/?#][^\s]*)?" # resource path
r"\Z"
)


URL_REGEX = _make_url_regex_src()
del _make_url_regex_src

url = Regex(URL_REGEX, msg=_('Must be a URL'), flags=re.IGNORECASE)

Expand Down
107 changes: 84 additions & 23 deletions tests/test_colander.py
Expand Up @@ -640,50 +640,111 @@ def _callFUT(self, val):

return url(None, val)

def test_it_success(self):
val = 'http://example.com'
def _assert_success(self, val):
result = self._callFUT(val)
self.assertEqual(result, None)

def test_it_failure(self):
val = 'not-a-url'
def _assert_failure(self, val):
from colander import Invalid

self.assertRaises(Invalid, self._callFUT, val)

def test_it_success(self):
self._assert_success('http://example.com')

def test_it_failure(self):
self._assert_failure('not-a-url')

def test_add_sample_dos(self):
# In the old regex (colander <=1.6) this would cause a catastrophic
# backtracking that would cause the regex engine to go into an infinite
# loop.
val = "http://www.mysite.com/(tttttttttttttttttttttt.jpg"
self._assert_success(
"http://www.mysite.com/(tttttttttttttttttttttt.jpg"
)

result = self._callFUT(val)
self.assertEqual(result, None)
def test_no_scheme(self):
self._assert_success("www.mysite.com")

def test_website_no_scheme(self):
val = "www.mysite.com"
def test_file_scheme_raises(self):
self._assert_failure("file:///this/is/a/file.jpg")

result = self._callFUT(val)
self.assertEqual(result, None)
def test_auth_user(self):
self._assert_success("http://user@mysite.com")

def test_ipv6(self):
val = "http://[2001:db8::0]/"
def test_auth_user_blank_password(self):
self._assert_success("http://user:@mysite.com")

result = self._callFUT(val)
self.assertEqual(result, None)
def test_auth_user_password(self):
self._assert_success("http://user:password@mysite.com")

def test_ipv4(self):
val = "http://192.0.2.1/"
def test_auth_user_password_with_quoted_atmark(self):
self._assert_success("http://user:pass%40word@mysite.com")

result = self._callFUT(val)
self.assertEqual(result, None)
def test_host_ipv6(self):
self._assert_success("http://[2001:db8::0]/")

def test_file_raises(self):
from colander import Invalid
def test_host_ipv4(self):
self._assert_success("http://192.0.2.1/")

val = "file:///this/is/a/file.jpg"
def test_host_fqdn_dot_finished(self):
self._assert_success("http://www.mysite.com.")

self.assertRaises(Invalid, self._callFUT, val)
def test_host_fqdn_dot_started_raises(self):
self._assert_failure("http://.mysite.com")

def test_host_fqdn_hyphen_contains(self):
self._assert_success("http://www.my-site.com")

def test_host_fqdn_hyphen_finished_raises(self):
self._assert_failure("http://www.mysite-.com")

def test_host_fqdn_hyphen_started_raises(self):
self._assert_failure("http://www.-mysite.com")

def test_host_i18n_idna(self):
self._assert_success("http://xn--vck8cuc4a.com")

def test_host_i18n_raw(self):
self._assert_success(
text_(
b"http://\xe3\x82\xb5\xe3\x83\xb3\xe3\x83\x97\xe3\x83\xab.com",
"utf-8",
)
)

def test_host_localhost(self):
self._assert_success("http://localhost/")

def test_host_no_fqdn_failure(self):
self._assert_failure("http://mysite")

def test_port(self):
self._assert_success("http://mysite.com:8080")

def test_no_port_raises(self):
self._assert_failure("http://mysite.com:/path")

def test_wrong_port_raises(self):
self._assert_failure("http://mysite.com:aaa")

def test_qs(self):
self._assert_success("http://mysite.com/path?k=v")

def test_fragment(self):
self._assert_success("http://mysite.com/path#fragment")

def test_qs_fragment(self):
self._assert_success("http://mysite.com/path?k=v#fragment")

def test_slashless_qs(self):
self._assert_success("http://mysite.com?k=v")

def test_slashless_fragment(self):
self._assert_success("http://mysite.com#fragment")

def test_trailing_space_raises(self):
self._assert_failure("http://mysite.com ")


class Test_file_uri_validator(unittest.TestCase):
Expand Down