Shave some API yaks.

API changes: * as_css is now a method everywhere * Add ContainerToken.unit (always None) * Remove ContainerToken.__iter__ and .__len__ * Add a TokenList type * Make Declaration.value and RuleSet.selector TokenList’s
Kozea · Apr 5, 2012 · 8ceae28 · 8ceae28
1 parent 9e0eaad
commit 8ceae28
Show file tree

Hide file tree

Showing 15 changed files with 150 additions and 132 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -1,2 +1,10 @@
 [run]
 branch = True
+
+[report]
+exclude_lines =
+    pragma: no cover
+    def __repr__
+    except ImportError
+omit =
+    tinycss/tests/speed.py
diff --git a/docs/parsing.rst b/docs/parsing.rst
@@ -83,14 +83,13 @@ are not parsed by tinycss. They appear as tokens instead.
 
 .. module:: tinycss.token_data
 
+.. autoclass:: TokenList()
+    :member-order: bysource
+    :members:
 .. autoclass:: Token()
+    :members:
 .. autoclass:: tinycss.speedups.CToken()
 .. autoclass:: ContainerToken()
-
-    .. autoattribute:: as_css
-
-    .. method:: __iter__, __len__
-
-        Shortcuts for accessing :attr:`content`.
+    :members:
 
 .. autoclass:: FunctionToken()
diff --git a/tinycss/css21.py b/tinycss/css21.py
@@ -14,9 +14,10 @@
 from itertools import chain, islice
 
 from .decoding import decode
+from .token_data import TokenList
 from .tokenizer import tokenize_grouped
-from .parsing import (strip_whitespace, validate_value, validate_block,
-                      validate_any, ParseError)
+from .parsing import (strip_whitespace, remove_whitespace, split_on_comma,
+                      validate_value, validate_block, validate_any, ParseError)
 
 
 #  stylesheet  : [ CDO | CDC | S | statement ]*;
@@ -63,7 +64,7 @@ def __init__(self, rules, errors, encoding):
         self.errors = errors
         self.encoding = encoding
 
-    def __repr__(self):  # pragma: no cover
+    def __repr__(self):
         return '<{0.__class__.__name__} {1} rules {2} errors>'.format(
             self, len(self.rules), len(self.errors))
 
@@ -78,13 +79,15 @@ class AtRule(object):
 
     .. attribute:: head
 
-        The "head" of the at-rule until ``;`` or ``{``: a list of tokens
-        (:class:`~.token_data.Token` or :class:`~.token_data.ContainerToken`)
+        The part of the at-rule between the at-keyword and the ``{``
+        marking the body, or the ``;`` marking the end of an at-rule without
+        a body.  A :class:`~.token_data.TokenList`.
 
     .. attribute:: body
 
-        A block as a :class:`~.token_data.ContainerToken` with
-        ``token.type == '{'``, or ``None`` if the at-rule ends with ``;``.
+        The content of the body between ``{`` and ``}`` as a
+        :class:`~.token_data.TokenList`, or ``None`` if there is no body
+        (ie. if the rule ends with ``;``).
 
     The head was validated against the core grammar but **not** the body,
     as the body might contain declarations. In case of an error in a
@@ -98,12 +101,12 @@ class AtRule(object):
     """
     def __init__(self, at_keyword, head, body, line, column):
         self.at_keyword = at_keyword
-        self.head = head
-        self.body = body
+        self.head = TokenList(head)
+        self.body = TokenList(body) if body is not None else body
         self.line = line
         self.column = column
 
-    def __repr__(self):  # pragma: no cover
+    def __repr__(self):
         return ('<{0.__class__.__name__} {0.line}:{0.column} {0.at_keyword}>'
                 .format(self))
 
@@ -117,8 +120,7 @@ class RuleSet(object):
 
     .. attribute:: selector
 
-        The selector as a list of :class:`~.token_data.Token` or
-        :class:`~.token_data.ContainerToken`.
+        The selector as a :class:`~.token_data.TokenList`.
         In CSS 3, this is actually called a selector group.
 
     .. attribute:: declarations
@@ -130,12 +132,12 @@ class RuleSet(object):
     at_keyword = None
 
     def __init__(self, selector, declarations, line, column):
-        self.selector = selector
+        self.selector = TokenList(selector)
         self.declarations = declarations
         self.line = line
         self.column = column
 
-    def __repr__(self):  # pragma: no cover
+    def __repr__(self):
         return ('<{0.__class__.__name__} at {0.line}:{0.column}'
                 ' {0.selector.as_css}>'.format(self))
 
@@ -149,8 +151,7 @@ class Declaration(object):
 
     .. attribute:: value
 
-        The property value as a list of :class:`~.token_data.Token` or
-        :class:`~.token_data.ContainerToken`.
+        The property value as a :class:`~.token_data.TokenList`.
 
         The value is not parsed. UAs using tinycss may only support
         some properties or some values and tinycss does not know which.
@@ -168,12 +169,12 @@ class Declaration(object):
     """
     def __init__(self, name, value, priority, line, column):
         self.name = name
-        self.value = value
+        self.value = TokenList(value)
         self.priority = priority
         self.line = line
         self.column = column
 
-    def __repr__(self):  # pragma: no cover
+    def __repr__(self):
         priority = ' !' + self.priority if self.priority else ''
         return ('<{0.__class__.__name__} {0.line}:{0.column}'
                 ' {0.name}: {0.value.as_css}{1}>'.format(self, priority))
@@ -219,7 +220,7 @@ def __init__(self, selector, specificity, declarations, at_rules,
         self.line = line
         self.column = column
 
-    def __repr__(self):  # pragma: no cover
+    def __repr__(self):
         return ('<{0.__class__.__name__} {0.line}:{0.column}'
                 ' {0.selector}>'.format(self))
 
@@ -250,7 +251,7 @@ def __init__(self, media, rules, line, column):
         self.line = line
         self.column = column
 
-    def __repr__(self):  # pragma: no cover
+    def __repr__(self):
         return ('<{0.__class__.__name__} {0.line}:{0.column}'
                 ' {0.media}>'.format(self))
 
@@ -283,7 +284,7 @@ def __init__(self, uri, media, line, column):
         self.line = line
         self.column = column
 
-    def __repr__(self):  # pragma: no cover
+    def __repr__(self):
         return ('<{0.__class__.__name__} {0.line}:{0.column}'
                 ' {0.uri}>'.format(self))
 
@@ -303,7 +304,7 @@ def _remove_at_charset(tokens):
     if [t.type for t in header] == ['ATKEYWORD', 'S', 'STRING', ';']:
         atkw, space, string, semicolon = header
         if ((atkw.value, space.value) == ('@charset', ' ')
-                and string.as_css[0] == '"'):
+                and string.as_css()[0] == '"'):
             # Found a valid @charset rule, only keep what’s after it.
             return tokens
     return chain(header, tokens)
@@ -469,7 +470,7 @@ def read_at_rule(self, at_keyword_token, tokens):
                 for head_token in head:
                     validate_any(head_token, 'at-rule head')
                 if token.type == '{':
-                    body = token
+                    body = token.content
                 else:
                     body = None
                 return AtRule(at_keyword, head, body,
@@ -512,8 +513,7 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
                 raise ParseError(rule,
                     'invalid {0} rule: missing block'.format(rule.at_keyword))
             declarations, at_rules, rule_errors = \
-                self.parse_declarations_and_at_rules(
-                    rule.body.content, '@page')
+                self.parse_declarations_and_at_rules(rule.body, '@page')
             errors.extend(rule_errors)
             return PageRule(selector, specificity, declarations, at_rules,
                             rule.line, rule.column)
@@ -522,12 +522,12 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
             if context != 'stylesheet':
                 raise ParseError(rule, '@media rule not allowed in ' + context)
             if not rule.head:
-                raise ParseError(rule.body, 'expected media types for @media')
+                raise ParseError(rule, 'expected media types for @media')
             media = self.parse_media(rule.head)
             if rule.body is None:
                 raise ParseError(rule,
                     'invalid {0} rule: missing block'.format(rule.at_keyword))
-            rules, rule_errors = self.parse_rules(rule.body.content, '@media')
+            rules, rule_errors = self.parse_rules(rule.body, '@media')
             errors.extend(rule_errors)
             return MediaRule(media, rules, rule.line, rule.column)
 
@@ -558,7 +558,9 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
             else:
                 media = ['all']
             if rule.body is not None:
-                raise ParseError(rule.body, "expected ';', got a block")
+                # The position of the ';' token would be best, but we don’t
+                # have it anymore here.
+                raise ParseError(head[-1], "expected ';', got a block")
             return ImportRule(uri, media, rule.line, rule.column)
 
         elif rule.at_keyword == '@charset':
@@ -574,34 +576,21 @@ def parse_media(self, tokens):
         Media Queries are expected to override this.
 
         :param tokens:
-            An non-empty iterable of tokens
+            A non-empty list of tokens
         :raises:
             :class:`~.parsing.ParseError` on invalid media types/queries
         :returns:
             For CSS 2.1, a list of media types as strings
         """
         media_types = []
-        tokens = iter(tokens)
-        token = next(tokens)
-        while 1:
-            if token.type == 'IDENT':
-                media_types.append(token.value.lower())
+        for part in split_on_comma(remove_whitespace(tokens)):
+            types = [token.type for token in part]
+            if types == ['IDENT']:
+                media_types.append(part[0].value)
             else:
-                raise ParseError(token,
-                    'expected a media type, got {0}'.format(token.type))
-            token = next(tokens, None)
-            if not token:
-                return media_types
-            if not (token.type == 'DELIM' and token.value == ','):
-                raise ParseError(token,
-                    'expected a comma, got {0}'.format(token.type))
-            while 1:
-                next_token = next(tokens, None)
-                if not next_token:
-                    raise ParseError(token, 'expected a media type')
-                token = next_token
-                if token.type != 'S':
-                    break
+                raise ParseError(tokens[0], 'expected a media type'
+                    + ((', got ' + ', '.join(types)) if types else ''))
+        return media_types
 
     def parse_page_selector(self, tokens):
         """Parse an @page selector.

diff --git a/tinycss/decoding.py b/tinycss/decoding.py
@@ -21,14 +21,6 @@
 __all__ = ['decode']  # Everything else is implementation detail
 
 
-if sys.version_info[0] < 3:  # pragma: no cover
-    def _unicode_to_native(string):
-        return string.encode('utf8')
-else:  # pragma: no cover
-    def _unicode_to_native(string):
-        return string
-
-
 def decode(css_bytes, protocol_encoding=None,
            linking_encoding=None, document_encoding=None):
     """
@@ -63,11 +55,17 @@ def decode(css_bytes, protocol_encoding=None,
             if has_at_charset:
                 extract, endianness = encoding
                 encoding = extract(match.group(1))
+                # Get an ASCII-only unicode value.
+                # This is the only thing that works on both Python 2 and 3
+                # for bytes.decode()
+                # Non-ASCII encoding names are invalid anyway,
+                # but make sure they stay invalid.
                 encoding = encoding.decode('ascii', 'replace')
+                encoding = encoding.replace('\ufffd', '?')
                 if encoding.replace('-', '').replace('_', '').lower() in [
                         'utf16', 'utf32']:
                     encoding += endianness
-                encoding = _unicode_to_native(encoding)
+                encoding = encoding.encode('ascii', 'replace').decode('ascii')
             css_unicode = try_encoding(css_bytes, encoding)
             if css_unicode and not (has_at_charset and not
                                     css_unicode.startswith('@charset "')):
@@ -85,13 +83,14 @@ def decode(css_bytes, protocol_encoding=None,
 
 
 def try_encoding(css_bytes, encoding, fallback=True):
-    try:
+    if fallback:
+        try:
+            css_unicode = css_bytes.decode(encoding)
+        # LookupError means unknown encoding
+        except (UnicodeDecodeError, LookupError):
+            return None
+    else:
         css_unicode = css_bytes.decode(encoding)
-    # LookupError means unknown encoding
-    except (UnicodeDecodeError, LookupError):
-        if not fallback:
-            raise
-        return None
     if css_unicode and css_unicode[0] == '\ufeff':
         # Remove any Byte Order Mark
         css_unicode = css_unicode[1:]

diff --git a/tinycss/page3.py b/tinycss/page3.py
@@ -114,8 +114,7 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
                 raise ParseError(rule.head[0],
                     'unexpected %s token in %s rule header'
                     % (rule.head[0].type, rule.at_keyword))
-            declarations, body_errors = self.parse_declaration_list(
-                rule.body.content)
+            declarations, body_errors = self.parse_declaration_list(rule.body)
             errors.extend(body_errors)
             return MarginRule(rule.at_keyword, declarations,
                               rule.line, rule.column)

diff --git a/tinycss/parsing.py b/tinycss/parsing.py
@@ -165,5 +165,5 @@ def __init__(self, subject, reason):
             'Parse error at {0.line}:{0.column}, {0.reason}'.format(self))
         super(ParseError, self).__init__(self.message)
 
-    def __repr__(self):  # pragma: no cover
+    def __repr__(self):
         return ('<{0.__class__.__name__}: {0.message}>'.format(self))
diff --git a/tinycss/selectors3.py b/tinycss/selectors3.py
@@ -21,7 +21,7 @@
 
 try:
     from lxml import cssselect
-except ImportError as exc:  # pragma: no cover
+except ImportError as exc:
     exc.message = exc.msg = (
         __name__ + ' depends on lxml.cssselect. Please install lxml '
         'with "pip install lxml" or from http://lxml.de/')
@@ -116,7 +116,7 @@ def parse_selector_group_string(css_string):
 
 
 def _parse_selector_group_tokens(group_tokens):
-    return [parse_selector_string(''.join(t.as_css for t in tokens))
+    return [parse_selector_string(''.join(t.as_css() for t in tokens))
             for tokens in split_on_comma(group_tokens)]
 
 
@@ -236,5 +236,5 @@ def parse_ruleset(self, first_token, tokens):
         except InvalidSelectorError as exc:
             # Invalidate the whole ruleset even if some selectors
             # in the selector group are valid.
-            raise ParseError(ruleset, exc.args[0])
+            raise ParseError(ruleset.selector, exc.args[0])
         return ruleset, errors
diff --git a/tinycss/speedups.pyx b/tinycss/speedups.pyx
@@ -29,18 +29,25 @@ cdef class CToken:
     """
     is_container = False
 
-    cdef public object type, as_css, value, unit
+    cdef public object type, _as_css, value, unit
     cdef public Py_ssize_t line, column
 
     def __init__(self, type_, css_value, value, unit, line, column):
         self.type = type_
-        self.as_css = css_value
+        self._as_css = css_value
         self.value = value
         self.unit = unit
         self.line = line
         self.column = column
 
-    def __repr__(self):  # pragma: no cover
+    def as_css(self):
+        """
+        Return as an Unicode string the CSS representation of the token,
+        as parsed in the source.
+        """
+        return self._as_css
+
+    def __repr__(self):
         return ('<Token {0.type} at {0.line}:{0.column} {0.value!r}{1}>'
                 .format(self, self.unit or ''))
 

diff --git a/tinycss/tests/speed.py b/tinycss/tests/speed.py
@@ -57,7 +57,7 @@ def parse(tokenizer_name):
         stylesheet = CSS21Parser().parse_stylesheet_bytes(CSS)
     result = []
     for rule in stylesheet.rules:
-        selector = ''.join(s.as_css for s in rule.selector)
+        selector = rule.selector.as_css()
         declarations = [
             (declaration.name, len(list(remove_whitespace(declaration.value))))
             for declaration in rule.declarations]