Skip to content

Commit

Permalink
Add possibility to cache dns lookups (#58)
Browse files Browse the repository at this point in the history
Add optional argument dns_resolver to validate_email. If provided it will be used instead of the default resolver. The provided resolver can have a configured cache and custom timeout.

Co-authored-by: Joshua Tauberer <jt@occams.info>
  • Loading branch information
HeyHugo and JoshData authored Nov 15, 2020
1 parent ad53fb4 commit 1431de0
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 25 deletions.
29 changes: 18 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Key features:
login forms or other uses related to identifying users.
* Gives friendly error messages when validation fails (appropriate to show
to end users).
* (optionally) Checks deliverability: Does the domain name resolve?
* (optionally) Checks deliverability: Does the domain name resolve? And you can override the default DNS resolver.
* Supports internationalized domain names and (optionally)
internationalized local parts.
* Normalizes email addresses (super important for internationalized
Expand Down Expand Up @@ -69,23 +69,27 @@ This validates the address and gives you its normalized form. You should
put the normalized form in your database and always normalize before
checking if an address is in your database.

The validator will accept internationalized email addresses, but email
addresses with non-ASCII characters in the *local* part of the address
(before the @-sign) require the
[SMTPUTF8](https://tools.ietf.org/html/rfc6531) extension which may not
be supported by your mail submission library or your outbound mail
server. If you know ahead of time that SMTPUTF8 is not supported then
**add the keyword argument allow\_smtputf8=False to fail validation for
addresses that would require SMTPUTF8**:
When validating many email addresses or to control the timeout (the default is 15 seconds), create a caching [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to reuse in each call:

```python
valid = validate_email(email, allow_smtputf8=False)
from email_validator import validate_email, caching_resolver

resolver = caching_resolver(timeout=10)

while True:
valid = validate_email(email, dns_resolver=resolver)
```

The validator will accept internationalized email addresses, but not all
mail systems can send email to an addresses with non-ASCII characters in
the *local* part of the address (before the @-sign). See the `allow_smtputf8`
option below.


Overview
--------

The module provides a single function `validate_email(email_address)` which
The module provides a function `validate_email(email_address)` which
takes an email address (either a `str` or ASCII `bytes`) and:

- Raises a `EmailNotValidError` with a helpful, human-readable error
Expand Down Expand Up @@ -128,6 +132,9 @@ shown):

`allow_empty_local=False`: Set to `True` to allow an empty local part (i.e.
`@example.com`), e.g. for validating Postfix aliases.

`dns_resolver=None`: Pass an instance of [dns.resolver.Resolver](https://dnspython.readthedocs.io/en/latest/resolver-class.html) to control the DNS resolver including setting a timeout and [a cache](https://dnspython.readthedocs.io/en/latest/resolver-caching.html). The `caching_resolver` function shown above is a helper function to construct a dns.resolver.Resolver with a [LRUCache](https://dnspython.readthedocs.io/en/latest/resolver-caching.html#dns.resolver.LRUCache). Reuse the same resolver instance across calls to `validate_email` to make use of the cache.


Internationalized email addresses
---------------------------------
Expand Down
38 changes: 25 additions & 13 deletions email_validator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,20 @@ def __get_length_reason(addr, utf8=False, limit=EMAIL_MAX_LENGTH):
return reason.format(prefix, diff, suffix)


def caching_resolver(timeout=DEFAULT_TIMEOUT, cache=None):
resolver = dns.resolver.Resolver()
resolver.cache = cache or dns.resolver.LRUCache()
resolver.lifetime = timeout # timeout, in seconds
return resolver


def validate_email(
email,
allow_smtputf8=True,
allow_empty_local=False,
check_deliverability=True,
timeout=DEFAULT_TIMEOUT,
dns_resolver=None
):
"""
Validates an email address, raising an EmailNotValidError if the address is not valid or returning a dict of
Expand Down Expand Up @@ -273,7 +281,9 @@ def validate_email(
if check_deliverability:
# Validate the email address's deliverability and update the
# return dict with metadata.
deliverability_info = validate_email_deliverability(ret["domain"], ret["domain_i18n"], timeout)
deliverability_info = validate_email_deliverability(
ret["domain"], ret["domain_i18n"], timeout, dns_resolver
)
if "mx" in deliverability_info:
ret.mx = deliverability_info["mx"]
ret.mx_fallback_type = deliverability_info["mx-fallback"]
Expand Down Expand Up @@ -443,15 +453,22 @@ def validate_email_domain_part(domain):
}


def validate_email_deliverability(domain, domain_i18n, timeout=DEFAULT_TIMEOUT):
def validate_email_deliverability(domain, domain_i18n, timeout=DEFAULT_TIMEOUT, dns_resolver=None):
# Check that the domain resolves to an MX record. If there is no MX record,
# try an A or AAAA record which is a deprecated fallback for deliverability.

def dns_resolver_resolve_shim(resolver, domain, record):
# If no dns.resolver.Resolver was given, get dnspython's default resolver.
# Override the default resolver's timeout. This may affect other uses of
# dnspython in this process.
if dns_resolver is None:
dns_resolver = dns.resolver.get_default_resolver()
dns_resolver.lifetime = timeout

def dns_resolver_resolve_shim(domain, record):
try:
# dns.resolver.Resolver.resolve is new to dnspython 2.x.
# https://dnspython.readthedocs.io/en/latest/resolver-class.html#dns.resolver.Resolver.resolve
return resolver.resolve(domain, record)
return dns_resolver.resolve(domain, record)
except AttributeError:
# dnspython 2.x is only available in Python 3.6 and later. For earlier versions
# of Python, we maintain compatibility with dnspython 1.x which has a
Expand All @@ -460,7 +477,7 @@ def dns_resolver_resolve_shim(resolver, domain, record):
# which we prevent by adding a "." to the domain name to make it absolute.
# dns.resolver.Resolver.query is deprecated in dnspython version 2.x.
# https://dnspython.readthedocs.io/en/latest/resolver-class.html#dns.resolver.Resolver.query
return resolver.query(domain + ".", record)
return dns_resolver.query(domain + ".", record)

try:
# We need a way to check how timeouts are handled in the tests. So we
Expand All @@ -469,28 +486,23 @@ def dns_resolver_resolve_shim(resolver, domain, record):
if getattr(validate_email_deliverability, 'TEST_CHECK_TIMEOUT', False):
raise dns.exception.Timeout()

resolver = dns.resolver.get_default_resolver()

if timeout:
resolver.lifetime = timeout

try:
# Try resolving for MX records and get them in sorted priority order.
response = dns_resolver_resolve_shim(resolver, domain, "MX")
response = dns_resolver_resolve_shim(domain, "MX")
mtas = sorted([(r.preference, str(r.exchange).rstrip('.')) for r in response])
mx_fallback = None
except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):

# If there was no MX record, fall back to an A record.
try:
response = dns_resolver_resolve_shim(resolver, domain, "A")
response = dns_resolver_resolve_shim(domain, "A")
mtas = [(0, str(r)) for r in response]
mx_fallback = "A"
except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):

# If there was no A record, fall back to an AAAA record.
try:
response = dns_resolver_resolve_shim(resolver, domain, "AAAA")
response = dns_resolver_resolve_shim(domain, "AAAA")
mtas = [(0, str(r)) for r in response]
mx_fallback = "AAAA"
except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer):
Expand Down
28 changes: 27 additions & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from unittest import mock
import dns.resolver
import pytest
from email_validator import EmailSyntaxError, EmailUndeliverableError, \
validate_email, validate_email_deliverability, \
ValidatedEmail
caching_resolver, ValidatedEmail
# Let's test main but rename it to be clear
from email_validator import main as validator_main

Expand Down Expand Up @@ -344,3 +346,27 @@ def test_main_output_shim(monkeypatch, capsys):
# The \n is part of the print statement, not part of the string, which is what the b'...' is
# Since we're mocking py 2.7 here instead of actually using 2.7, this was the closest I could get
assert stdout == "b'An email address cannot have a period immediately after the @-sign.'\n"


@mock.patch("dns.resolver.LRUCache.put")
def test_validate_email__with_caching_resolver(mocked_put):
dns_resolver = caching_resolver()
validate_email("test@gmail.com", dns_resolver=dns_resolver)
assert mocked_put.called

with mock.patch("dns.resolver.LRUCache.get") as mocked_get:
validate_email("test@gmail.com", dns_resolver=dns_resolver)
assert mocked_get.called


@mock.patch("dns.resolver.LRUCache.put")
def test_validate_email__with_configured_resolver(mocked_put):
dns_resolver = dns.resolver.Resolver()
dns_resolver.lifetime = 10
dns_resolver.cache = dns.resolver.LRUCache(max_size=1000)
validate_email("test@gmail.com", dns_resolver=dns_resolver)
assert mocked_put.called

with mock.patch("dns.resolver.LRUCache.get") as mocked_get:
validate_email("test@gmail.com", dns_resolver=dns_resolver)
assert mocked_get.called

0 comments on commit 1431de0

Please sign in to comment.