public
Description: A Python module for managing a list of rules describing effective top-level domains.
Homepage:
Clone URL: git://github.com/jpwatts/effectivetlds.git
effectivetlds / effectivetlds.py
100644 57 lines (44 sloc) 1.671 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
class Rules(list):
    """A list of rules for identifying effective top-level domains.
 
Rules are formatted as described by the `Public Suffix List`_.
 
>>> tlds = Rules()
 
Add rules to the list using the ``Rules.load`` method:
 
>>> tlds.load(['com', '*.org'])
>>> 'com' in tlds
True
>>> 'example.com' in tlds
False
>>> 'example.org' in tlds
True
 
``Rules.load`` will ignore comments and empty rules. This allows the
`Public Suffix List`_ to be loaded unmodified.
 
>>> tlds.load(['// comment', '', '*.net'])
>>> '// comment' in tlds
False
>>> '' in tlds
False
>>> 'example.net' in tlds
True
 
.. _`Public Suffix List`: http://publicsuffix.org/
 
"""
 
    def __contains__(self, domain):
        """Check if `domain` matches any of the rules."""
        if super(Rules, self).__contains__(domain): return True
        exception = '!%s' % domain
        if super(Rules, self).__contains__(exception): return False
        # TODO: The rules allow wildcards at any level of the domain; this
        # only handles wildcards at the beginning of the domain.
        # Fix that.
        wildcard = '*.%s' % '.'.join(domain.split('.')[1:])
        return super(Rules, self).__contains__(wildcard)
 
    def load(self, rules):
        if isinstance(rules, basestring):
            rules = [rules]
        for rule in rules:
            rule = rule.strip()
            if not rule or rule.startswith('//'): continue
            rule = rule.split()[0] # read only to the first white space
            self.append(rule)
 
 
if __name__ == '__main__':
    import doctest
    doctest.testmod()