From 601b79865dc6beb50f8500697dfc6e3563e791c5 Mon Sep 17 00:00:00 2001 From: Stefan Wojcik Date: Sun, 9 Apr 2017 22:33:11 -0400 Subject: [PATCH 1/8] support unicode in EmailField --- mongoengine/fields.py | 101 +++++++++++++++++++++++++++++++++++++---- tests/fields/fields.py | 56 +++++++++++++++++------ 2 files changed, 134 insertions(+), 23 deletions(-) diff --git a/mongoengine/fields.py b/mongoengine/fields.py index 06c56f06c..fe1e40f60 100644 --- a/mongoengine/fields.py +++ b/mongoengine/fields.py @@ -4,6 +4,7 @@ import re import time import uuid +import socket import warnings from collections import Mapping from operator import itemgetter @@ -154,21 +155,103 @@ class EmailField(StringField): .. versionadded:: 0.4 """ + USER_REGEX = re.compile( + # `dot-atom` defined in RFC 5322 Section 3.2.3. + r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z" + # `quoted-string` defined in RFC 5322 Section 3.2.4. + r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)', + re.IGNORECASE + ) + + UTF8_USER_REGEX = re.compile( + # RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to include + # `UTF8-non-ascii`. + ur"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+)*\Z" + # `quoted-string` + ur'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)', + re.IGNORECASE | re.UNICODE + ) - EMAIL_REGEX = re.compile( - # dot-atom - r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*" - # quoted-string - r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-011\013\014\016-\177])*"' - # domain (max length of an ICAAN TLD is 22 characters) - r')@(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}|[A-Z0-9-]{2,}(? Date: Mon, 10 Apr 2017 08:18:56 -0400 Subject: [PATCH 2/8] flake8 fixes --- mongoengine/fields.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mongoengine/fields.py b/mongoengine/fields.py index fe1e40f60..d364185df 100644 --- a/mongoengine/fields.py +++ b/mongoengine/fields.py @@ -2,9 +2,9 @@ import decimal import itertools import re +import socket import time import uuid -import socket import warnings from collections import Mapping from operator import itemgetter @@ -223,7 +223,7 @@ def validate_domain_part(self, domain_part): if domain_part[0] == '[' and domain_part[-1] == ']': for addr_family in socket.AF_INET, socket.AF_INET6: try: - return socket.inet_pton(addr_family , domain_part[1:-1]) + socket.inet_pton(addr_family, domain_part[1:-1]) return True except (socket.error, UnicodeEncodeError): pass From 41371e5fc5a751446fa98939eb562a8d6c022244 Mon Sep 17 00:00:00 2001 From: Stefan Wojcik Date: Mon, 10 Apr 2017 10:00:49 -0400 Subject: [PATCH 3/8] empty whitelist by default + allow_ip_domain option --- mongoengine/fields.py | 23 ++++++------- tests/document/instance.py | 2 +- tests/fields/fields.py | 67 +++++++++++++++++++++++++++++--------- 3 files changed, 64 insertions(+), 28 deletions(-) diff --git a/mongoengine/fields.py b/mongoengine/fields.py index d364185df..2c5835511 100644 --- a/mongoengine/fields.py +++ b/mongoengine/fields.py @@ -177,27 +177,24 @@ class EmailField(StringField): re.IGNORECASE ) - domain_whitelist = ['localhost'] - error_msg = u'Invalid email address: %s' - def __init__(self, domain_whitelist=None, allow_utf8_user=False, *args, - **kwargs): + def __init__(self, domain_whitelist=None, allow_utf8_user=False, + allow_ip_domain=False, *args, **kwargs): """Initialize the EmailField. Args: domain_whitelist (list) - list of otherwise invalid domain names which you'd like to support. - Includes "localhost" by default. allow_utf8_user (bool) - if True, the user part of the email address can contain UTF8 characters. False by default. + allow_ip_domain (bool) - if True, the domain part of the email + can be a valid IPv4 or IPv6 address. """ - if domain_whitelist is not None: - self.domain_whitelist = domain_whitelist - + self.domain_whitelist = domain_whitelist or [] self.allow_utf8_user = allow_utf8_user - + self.allow_ip_domain = allow_ip_domain super(EmailField, self).__init__(*args, **kwargs) def validate_user_part(self, user_part): @@ -220,8 +217,12 @@ def validate_domain_part(self, domain_part): return True # Validate IPv4/IPv6, e.g. user@[192.168.0.1] - if domain_part[0] == '[' and domain_part[-1] == ']': - for addr_family in socket.AF_INET, socket.AF_INET6: + if ( + self.allow_ip_domain and + domain_part[0] == '[' and + domain_part[-1] == ']' + ): + for addr_family in (socket.AF_INET, socket.AF_INET6): try: socket.inet_pton(addr_family, domain_part[1:-1]) return True diff --git a/tests/document/instance.py b/tests/document/instance.py index 78d326fc6..c59de96fe 100644 --- a/tests/document/instance.py +++ b/tests/document/instance.py @@ -844,7 +844,7 @@ def test_save(self): class Recipient(Document): email = EmailField(required=True) - recipient = Recipient(email='root@localhost') + recipient = Recipient(email='not-an-email') self.assertRaises(ValidationError, recipient.save) recipient.save(validate=False) diff --git a/tests/fields/fields.py b/tests/fields/fields.py index 78c1ccb5b..87a1a21a1 100644 --- a/tests/fields/fields.py +++ b/tests/fields/fields.py @@ -3468,22 +3468,6 @@ class User(Document): user = User(email='ross@example.com.') self.assertRaises(ValidationError, user.validate) - # localhost should be whitelisted by default - user = User(email='me@localhost') - user.validate() - - # valid IPv4 domain - user = User(email='email@[127.0.0.1]') - user.validate() - - # valid IPv6 domain - user = User(email='email@[2001:dB8::1]') - user.validate() - - # invalid IP - user = User(email='email@[324.0.0.1]') - self.assertRaises(ValidationError, user.validate) - # unicode domain user = User(email=u'user@пример.рф') user.validate() @@ -3492,6 +3476,10 @@ class User(Document): user = User(email=u'user@пример') self.assertRaises(ValidationError, user.validate) + def test_email_field_unicode_user(self): + class User(Document): + email = EmailField() + # unicode user shouldn't validate by default... user = User(email=u'Dörte@Sörensen.example.com') self.assertRaises(ValidationError, user.validate) @@ -3503,6 +3491,53 @@ class User(Document): user = User(email=u'Dörte@Sörensen.example.com') user.validate() + def test_email_field_domain_whitelist(self): + class User(Document): + email = EmailField() + + # localhost domain shouldn't validate by default... + user = User(email='me@localhost') + self.assertRaises(ValidationError, user.validate) + + # ...but it should be fine if it's whitelisted + class User(Document): + email = EmailField(domain_whitelist=['localhost']) + + user = User(email='me@localhost') + user.validate() + + def test_email_field_ip_domain(self): + class User(Document): + email = EmailField() + + valid_ipv4 = 'email@[127.0.0.1]' + valid_ipv6 = 'email@[2001:dB8::1]' + invalid_ip = 'email@[324.0.0.1]' + + # IP address as a domain shouldn't validate by default... + user = User(email=valid_ipv4) + self.assertRaises(ValidationError, user.validate) + + user = User(email=valid_ipv6) + self.assertRaises(ValidationError, user.validate) + + user = User(email=invalid_ip) + self.assertRaises(ValidationError, user.validate) + + # ...but it should be fine with allow_ip_domain set to True + class User(Document): + email = EmailField(allow_ip_domain=True) + + user = User(email=valid_ipv4) + user.validate() + + user = User(email=valid_ipv6) + user.validate() + + # invalid IP should still fail validation + user = User(email=invalid_ip) + self.assertRaises(ValidationError, user.validate) + def test_email_field_honors_regex(self): class User(Document): email = EmailField(regex=r'\w+@example.com') From 33e50e48c14a9233796befdc9c34a900e3b63175 Mon Sep 17 00:00:00 2001 From: Stefan Wojcik Date: Sat, 15 Apr 2017 16:46:36 -0400 Subject: [PATCH 4/8] use six.u --- mongoengine/fields.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mongoengine/fields.py b/mongoengine/fields.py index 2c5835511..b631fc48f 100644 --- a/mongoengine/fields.py +++ b/mongoengine/fields.py @@ -164,12 +164,13 @@ class EmailField(StringField): ) UTF8_USER_REGEX = re.compile( - # RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to include - # `UTF8-non-ascii`. - ur"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+)*\Z" - # `quoted-string` - ur'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)', - re.IGNORECASE | re.UNICODE + six.u( + # RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to + # include `UTF8-non-ascii`. + r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+)*\Z" + # `quoted-string` + r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)' + ), re.IGNORECASE | re.UNICODE ) DOMAIN_REGEX = re.compile( From dee5465440e665e21e9cfb913cf9c03609750f34 Mon Sep 17 00:00:00 2001 From: Stefan Wojcik Date: Sat, 15 Apr 2017 18:07:32 -0400 Subject: [PATCH 5/8] dont run the unicode email test on pypy3 --- tests/fields/fields.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/fields/fields.py b/tests/fields/fields.py index 87a1a21a1..3241879bf 100644 --- a/tests/fields/fields.py +++ b/tests/fields/fields.py @@ -6,6 +6,7 @@ import itertools import re import pymongo +import sys from nose.plugins.skip import SkipTest from collections import OrderedDict @@ -3477,6 +3478,11 @@ class User(Document): self.assertRaises(ValidationError, user.validate) def test_email_field_unicode_user(self): + # Don't run this test on pypy3, which doesn't support unicode regex: + # https://bitbucket.org/pypy/pypy/issues/1821/regular-expression-doesnt-find-unicode + if sys.version_info[:2] == (3, 2): + raise SkipTest('unicode email addresses are not supported on PyPy 3') + class User(Document): email = EmailField() From 7d5caf836855cc9b843d50df8b17aa4ff897bd6d Mon Sep 17 00:00:00 2001 From: Stefan Wojcik Date: Sat, 15 Apr 2017 22:20:19 -0400 Subject: [PATCH 6/8] switch from octal to hex for consistency --- mongoengine/fields.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mongoengine/fields.py b/mongoengine/fields.py index b631fc48f..7a8b359d8 100644 --- a/mongoengine/fields.py +++ b/mongoengine/fields.py @@ -159,7 +159,7 @@ class EmailField(StringField): # `dot-atom` defined in RFC 5322 Section 3.2.3. r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z" # `quoted-string` defined in RFC 5322 Section 3.2.4. - r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)', + r'|^"([\x01-\x08\x0b\x0c\x0e-\x1f!#-\[\]-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*"\Z)', re.IGNORECASE ) @@ -167,9 +167,9 @@ class EmailField(StringField): six.u( # RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to # include `UTF8-non-ascii`. - r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+)*\Z" + r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z\x80-\U0010FFFF]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z\x80-\U0010FFFF]+)*\Z" # `quoted-string` - r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)' + r'|^"([\x01-\x08\x0b\x0c\x0e-\x1f!#-\[\]-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*"\Z)', ), re.IGNORECASE | re.UNICODE ) From a0a3805e2d6961c9e794e1cf0517727902186fb2 Mon Sep 17 00:00:00 2001 From: Stefan Wojcik Date: Sat, 15 Apr 2017 23:14:19 -0400 Subject: [PATCH 7/8] Revert "switch from octal to hex for consistency" (because of pypy3) This reverts commit 7d5caf836855cc9b843d50df8b17aa4ff897bd6d. --- mongoengine/fields.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mongoengine/fields.py b/mongoengine/fields.py index 7a8b359d8..b631fc48f 100644 --- a/mongoengine/fields.py +++ b/mongoengine/fields.py @@ -159,7 +159,7 @@ class EmailField(StringField): # `dot-atom` defined in RFC 5322 Section 3.2.3. r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z" # `quoted-string` defined in RFC 5322 Section 3.2.4. - r'|^"([\x01-\x08\x0b\x0c\x0e-\x1f!#-\[\]-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*"\Z)', + r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)', re.IGNORECASE ) @@ -167,9 +167,9 @@ class EmailField(StringField): six.u( # RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to # include `UTF8-non-ascii`. - r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z\x80-\U0010FFFF]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z\x80-\U0010FFFF]+)*\Z" + r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+)*\Z" # `quoted-string` - r'|^"([\x01-\x08\x0b\x0c\x0e-\x1f!#-\[\]-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*"\Z)', + r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)' ), re.IGNORECASE | re.UNICODE ) From 5e0b97e90cc687805d6311f0cc6315440b53e954 Mon Sep 17 00:00:00 2001 From: Stefan Wojcik Date: Sun, 16 Apr 2017 11:50:28 -0400 Subject: [PATCH 8/8] add a test with an invalid data type --- tests/fields/fields.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/fields/fields.py b/tests/fields/fields.py index 3241879bf..806062561 100644 --- a/tests/fields/fields.py +++ b/tests/fields/fields.py @@ -3477,6 +3477,10 @@ class User(Document): user = User(email=u'user@пример') self.assertRaises(ValidationError, user.validate) + # invalid data type + user = User(email=123) + self.assertRaises(ValidationError, user.validate) + def test_email_field_unicode_user(self): # Don't run this test on pypy3, which doesn't support unicode regex: # https://bitbucket.org/pypy/pypy/issues/1821/regular-expression-doesnt-find-unicode