Skip to content

Commit

Permalink
Use same regexp everywhere; downcase domain list in generator
Browse files Browse the repository at this point in the history
- Use the same valid email regexp everywhere - based on PHP's
  FILTER_VALIDATE_EMAIL,
- Lowercase the list entries in the generator, rather than in each
  match implementation.
  • Loading branch information
owst committed Mar 18, 2016
1 parent 6b91eb0 commit fcbc296
Show file tree
Hide file tree
Showing 17 changed files with 54 additions and 62 deletions.
24 changes: 14 additions & 10 deletions lib/generator.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ var p = require("path");
var mu = require('mu2');
var _ = require('lodash');
var EventEmitter2 = require('eventemitter2').EventEmitter2;
var multiline = require('multiline');
var loader = require("./loader");

var Generator = new EventEmitter2({
Expand All @@ -31,19 +30,24 @@ Generator.parsePath = function (fullpath) {
};
};

// Extracted from validator
var emailRegexp = multiline(function () {
/*
/^((([a-z]|\d|[!#\$%&'\*\+\-\/=\?\^_`{\|}~]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])+(\.([a-z]|\d|[!#\$%&'\*\+\-\/=\?\^_`{\|}~]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])+)*)|((\x22)((((\x20|\x09)*(\x0d\x0a))?(\x20|\x09)+)?(([\x01-\x08\x0b\x0c\x0e-\x1f\x7f]|\x21|[\x23-\x5b]|[\x5d-\x7e]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(\\([\x01-\x09\x0b\x0c\x0d-\x7f]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]))))*(((\x20|\x09)*(\x0d\x0a))?(\x20|\x09)+)?(\x22)))@((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))$/i
*/
}).trim();
// Based on PHP FILTER_VALIDATE_EMAIL
// See https://fightingforalostcause.net/content/misc/2006/compare-email-regex.php
Generator.getEmailRegexp = function() {
return /^(?!(?:(?:\x22?\x5C[\x00-\x7E]\x22?)|(?:\x22?[^\x5C\x22]\x22?)){255,})(?!(?:(?:\x22?\x5C[\x00-\x7E]\x22?)|(?:\x22?[^\x5C\x22]\x22?)){65,}@)(?:(?:[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x5E-\x7E]+)|(?:\x22(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]|(?:\x5C[\x00-\x7F]))*\x22))(?:\.(?:(?:[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x5E-\x7E]+)|(?:\x22(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]|(?:\x5C[\x00-\x7F]))*\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-[a-z0-9]+)*)|(?:\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\]))$/;
};

Generator.getEmailRegexp = _.partial(_.identity, emailRegexp);
Generator.getDomainList = function() {
return loader.getList().map(function (domain) {
return domain.toLowerCase()
});
}

Generator.compile = function (fn) {
var templates = this.getTemplatesSync();
var regexp = this.getEmailRegexp();
var list = loader.getList();
// Remove /'s from the string representation to allow language-specific
// regexp delimiters to be used.
var regexp = Generator.getEmailRegexp().toString().slice(1, -1);
var list = Generator.getDomainList();
var listJSON = JSON.stringify(list);
var listSTR = listJSON.substring(1, listJSON.length - 1);

Expand Down
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
"glob": "^5.0.14",
"lodash": "3.x.x",
"mu2": "~0.5.20",
"multiline": "1.0.x",
"node-range": "0.1.x"
},
"devDependencies": {
Expand Down
7 changes: 2 additions & 5 deletions platform/clojure/mailchecker.clj

Large diffs are not rendered by default.

5 changes: 1 addition & 4 deletions platform/clojure/mailchecker.tmpl.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,10 @@

(def ^:const blacklist (set [{{& listSTR }}]))

; Source: https://github.com/scstarkey/noir/blob/998e846dd44f42b8e01a6977e6d22a3eff5e4542/src/noir/validation.clj#L37-L40
; Modified to return true/false
(defn is-email?
"Returns true if email is an email address"
[email]
(if (re-matches #"(?i)[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?" email)
true false))
(if (re-matches #"{{& regexp }}" email) true false))

(defn at-split
"Returns list from string splitted on @ char"
Expand Down
2 changes: 1 addition & 1 deletion platform/elixir/mail_checker.ex

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion platform/elixir/mail_checker.tmpl.ex
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ defmodule MailChecker do
end

def valid_address?(email) do
Regex.match?(~r/^(?!(?:(?:\x22?\x5C[\x00-\x7E]\x22?)|(?:\x22?[^\x5C\x22]\x22?)){255,})(?!(?:(?:\x22?\x5C[\x00-\x7E]\x22?)|(?:\x22?[^\x5C\x22]\x22?)){65,}@)(?:(?:[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x5E-\x7E]+)|(?:\x22(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]|(?:\x5C[\x00-\x7F]))*\x22))(?:\.(?:(?:[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x5E-\x7E]+)|(?:\x22(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]|(?:\x5C[\x00-\x7F]))*\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-[a-z0-9]+)*)|(?:\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\]))$/i, email)
Regex.match?(~r/{{& regexp}}/i, email)
end
end
7 changes: 4 additions & 3 deletions platform/javascript/MailChecker.js

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions platform/javascript/MailChecker.tmpl.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
*
* <script type="text/javascript" src="mailchecker/platform/javascript/mailchecker.js"></script>
* <script type="text/javascript">
* alert(MailChecker("plop@plop.33mail.com"));
* alert(MailChecker.is_valid("plop@plop.33mail.com"));
* </script>
*/

(function(global){
var isValidEmail = {{& regexp }};
var isValidEmail = /{{& regexp }}/;
var blacklist = [{{& listSTR }}];

function mapRange(start, endExclusive, f) {
Expand Down Expand Up @@ -40,6 +40,7 @@

global.MailChecker = {
is_valid: function (email){
email = email.toLowerCase();
if(!isValidEmail.test(email)){return false;}
return !is_blacklisted(email);
},
Expand Down
5 changes: 3 additions & 2 deletions platform/node/index.js

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion platform/node/index.tmpl.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
var range = require('node-range');

var blacklist = [{{& listSTR }}];
var isValidEmail = {{& regexp }};
var isValidEmail = /{{& regexp }}/;

function all_domain_suffixes(email) {
var domain_components = email.split('@')[1].split('.');
Expand All @@ -28,6 +28,7 @@ function is_blacklisted(email) {

module.exports = {
is_valid: function (email){
email = email.toLowerCase();
if(!isValidEmail.test(email)){return false;}
return !is_blacklisted(email);
},
Expand Down
5 changes: 1 addition & 4 deletions platform/php/MailChecker.php

Large diffs are not rendered by default.

5 changes: 1 addition & 4 deletions platform/php/MailChecker.tmpl.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@ class MailChecker {
private $blacklist;

public function __construct() {
$this->blacklist = array_map(
'strtolower',
array_unique(array({{& listSTR }}))
);
$this->blacklist = array_unique(array({{& listSTR }}));
}

public function isValid($email) {
Expand Down
8 changes: 2 additions & 6 deletions platform/python/MailChecker.py

Large diffs are not rendered by default.

8 changes: 2 additions & 6 deletions platform/python/MailChecker.tmpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,8 @@
class MailChecker(object):

def __init__(self):
self.blacklist = set([s.lower() for s in [{{& listSTR}}]])

# Based on PHP FILTER_VALIDATE_EMAIL
# See https://fightingforalostcause.net/content/misc/2006/compare-email-regex.php
self.email_regex = r"^(?!(?:(?:\x22?\x5C[\x00-\x7E]\x22?)|(?:\x22?[^\x5C\x22]\x22?)){255,})(?!(?:(?:\x22?\x5C[\x00-\x7E]\x22?)|(?:\x22?[^\x5C\x22]\x22?)){65,}@)(?:(?:[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x5E-\x7E]+)|(?:\x22(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]|(?:\x5C[\x00-\x7F]))*\x22))(?:\.(?:(?:[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x5E-\x7E]+)|(?:\x22(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]|(?:\x5C[\x00-\x7F]))*\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-[a-z0-9]+)*)|(?:\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\]))$"
self.valid_matcher = re.compile(self.email_regex)
self.blacklist = set([{{& listSTR}}])
self.valid_matcher = re.compile(r"{{& regexp}}")

def is_valid(self, email):
email = email.lower().strip()
Expand Down
6 changes: 1 addition & 5 deletions platform/ruby/mail_checker.rb

Large diffs are not rendered by default.

8 changes: 2 additions & 6 deletions platform/ruby/mail_checker.tmpl.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
require 'set'

module MailChecker
# Based on PHP FILTER_VALIDATE_EMAIL
# See https://fightingforalostcause.net/content/misc/2006/compare-email-regex.php
EMAIL_REGEX = /^(?!(?:(?:\x22?\x5C[\x00-\x7E]\x22?)|(?:\x22?[^\x5C\x22]\x22?)){255,})(?!(?:(?:\x22?\x5C[\x00-\x7E]\x22?)|(?:\x22?[^\x5C\x22]\x22?)){65,}@)(?:(?:[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x5E-\x7E]+)|(?:\x22(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]|(?:\x5C[\x00-\x7F]))*\x22))(?:\.(?:(?:[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x5E-\x7E]+)|(?:\x22(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]|(?:\x5C[\x00-\x7F]))*\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-[a-z0-9]+)*)|(?:\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\]))$/i
EMAIL_REGEX = /{{& regexp}}/i
# Blacklisted domains
BLACKLIST = [{{& listSTR }}].
map(&:downcase).
to_set
BLACKLIST = [{{& listSTR }}].to_set

def self.valid?(email)
return false unless valid_email?(email)
Expand Down
15 changes: 14 additions & 1 deletion test/generator.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,23 @@ suite('Generator', function () {
});
});

suite('.getDomainList', function () {
test('should return a non-empty list of lowercase strings', function () {
var r = Generator.getDomainList();

r.length.should.be.above(0);

r.map(function (domain) {
domain.should.eql(domain.toLowerCase());
});
});
});

suite('.getEmailRegexp', function () {
test('should return the regexp', function () {
var r = Generator.getEmailRegexp();
r.should.eql("/^((([a-z]|\\d|[!#\\$%&'\\*\\+\\-\\/=\\?\\^_`{\\|}~]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])+(\\.([a-z]|\\d|[!#\\$%&'\\*\\+\\-\\/=\\?\\^_`{\\|}~]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])+)*)|((\\x22)((((\\x20|\\x09)*(\\x0d\\x0a))?(\\x20|\\x09)+)?(([\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]|\\x21|[\\x23-\\x5b]|[\\x5d-\\x7e]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(\\\\([\\x01-\\x09\\x0b\\x0c\\x0d-\\x7f]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF]))))*(((\\x20|\\x09)*(\\x0d\\x0a))?(\\x20|\\x09)+)?(\\x22)))@((([a-z]|\\d|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(([a-z]|\\d|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])*([a-z]|\\d|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])))\\.)+(([a-z]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])|(([a-z]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])([a-z]|\\d|-|\\.|_|~|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])*([a-z]|[\\u00A0-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFEF])))$/i");

r.should.eql(/^(?!(?:(?:\x22?\x5C[\x00-\x7E]\x22?)|(?:\x22?[^\x5C\x22]\x22?)){255,})(?!(?:(?:\x22?\x5C[\x00-\x7E]\x22?)|(?:\x22?[^\x5C\x22]\x22?)){65,}@)(?:(?:[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x5E-\x7E]+)|(?:\x22(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]|(?:\x5C[\x00-\x7F]))*\x22))(?:\.(?:(?:[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x5E-\x7E]+)|(?:\x22(?:[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]|(?:\x5C[\x00-\x7F]))*\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-[a-z0-9]+)*\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-[a-z0-9]+)*)|(?:\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\]))$/);
});
});

Expand Down

0 comments on commit fcbc296

Please sign in to comment.