Permalink
Browse files

Shave some API yaks.

API changes:

* as_css is now a method everywhere
* Add ContainerToken.unit (always None)
* Remove ContainerToken.__iter__ and .__len__
* Add a TokenList type
* Make Declaration.value and RuleSet.selector TokenList’s
  • Loading branch information...
1 parent 9e0eaad commit 8ceae2844fbfb0bb91fe7f0ce5a590f0faec1a40 @SimonSapin SimonSapin committed Apr 5, 2012
View
@@ -1,2 +1,10 @@
[run]
branch = True
+
+[report]
+exclude_lines =
+ pragma: no cover
+ def __repr__
+ except ImportError
+omit =
+ tinycss/tests/speed.py
View
@@ -83,14 +83,13 @@ are not parsed by tinycss. They appear as tokens instead.
.. module:: tinycss.token_data
+.. autoclass:: TokenList()
+ :member-order: bysource
+ :members:
.. autoclass:: Token()
+ :members:
.. autoclass:: tinycss.speedups.CToken()
.. autoclass:: ContainerToken()
-
- .. autoattribute:: as_css
-
- .. method:: __iter__, __len__
-
- Shortcuts for accessing :attr:`content`.
+ :members:
.. autoclass:: FunctionToken()
View
@@ -14,9 +14,10 @@
from itertools import chain, islice
from .decoding import decode
+from .token_data import TokenList
from .tokenizer import tokenize_grouped
-from .parsing import (strip_whitespace, validate_value, validate_block,
- validate_any, ParseError)
+from .parsing import (strip_whitespace, remove_whitespace, split_on_comma,
+ validate_value, validate_block, validate_any, ParseError)
# stylesheet : [ CDO | CDC | S | statement ]*;
@@ -63,7 +64,7 @@ def __init__(self, rules, errors, encoding):
self.errors = errors
self.encoding = encoding
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return '<{0.__class__.__name__} {1} rules {2} errors>'.format(
self, len(self.rules), len(self.errors))
@@ -78,13 +79,15 @@ class AtRule(object):
.. attribute:: head
- The "head" of the at-rule until ``;`` or ``{``: a list of tokens
- (:class:`~.token_data.Token` or :class:`~.token_data.ContainerToken`)
+ The part of the at-rule between the at-keyword and the ``{``
+ marking the body, or the ``;`` marking the end of an at-rule without
+ a body. A :class:`~.token_data.TokenList`.
.. attribute:: body
- A block as a :class:`~.token_data.ContainerToken` with
- ``token.type == '{'``, or ``None`` if the at-rule ends with ``;``.
+ The content of the body between ``{`` and ``}`` as a
+ :class:`~.token_data.TokenList`, or ``None`` if there is no body
+ (ie. if the rule ends with ``;``).
The head was validated against the core grammar but **not** the body,
as the body might contain declarations. In case of an error in a
@@ -98,12 +101,12 @@ class AtRule(object):
"""
def __init__(self, at_keyword, head, body, line, column):
self.at_keyword = at_keyword
- self.head = head
- self.body = body
+ self.head = TokenList(head)
+ self.body = TokenList(body) if body is not None else body
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column} {0.at_keyword}>'
.format(self))
@@ -117,8 +120,7 @@ class RuleSet(object):
.. attribute:: selector
- The selector as a list of :class:`~.token_data.Token` or
- :class:`~.token_data.ContainerToken`.
+ The selector as a :class:`~.token_data.TokenList`.
In CSS 3, this is actually called a selector group.
.. attribute:: declarations
@@ -130,12 +132,12 @@ class RuleSet(object):
at_keyword = None
def __init__(self, selector, declarations, line, column):
- self.selector = selector
+ self.selector = TokenList(selector)
self.declarations = declarations
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__} at {0.line}:{0.column}'
' {0.selector.as_css}>'.format(self))
@@ -149,8 +151,7 @@ class Declaration(object):
.. attribute:: value
- The property value as a list of :class:`~.token_data.Token` or
- :class:`~.token_data.ContainerToken`.
+ The property value as a :class:`~.token_data.TokenList`.
The value is not parsed. UAs using tinycss may only support
some properties or some values and tinycss does not know which.
@@ -168,12 +169,12 @@ class Declaration(object):
"""
def __init__(self, name, value, priority, line, column):
self.name = name
- self.value = value
+ self.value = TokenList(value)
self.priority = priority
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
priority = ' !' + self.priority if self.priority else ''
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.name}: {0.value.as_css}{1}>'.format(self, priority))
@@ -219,7 +220,7 @@ def __init__(self, selector, specificity, declarations, at_rules,
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.selector}>'.format(self))
@@ -250,7 +251,7 @@ def __init__(self, media, rules, line, column):
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.media}>'.format(self))
@@ -283,7 +284,7 @@ def __init__(self, uri, media, line, column):
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.uri}>'.format(self))
@@ -303,7 +304,7 @@ def _remove_at_charset(tokens):
if [t.type for t in header] == ['ATKEYWORD', 'S', 'STRING', ';']:
atkw, space, string, semicolon = header
if ((atkw.value, space.value) == ('@charset', ' ')
- and string.as_css[0] == '"'):
+ and string.as_css()[0] == '"'):
# Found a valid @charset rule, only keep what’s after it.
return tokens
return chain(header, tokens)
@@ -469,7 +470,7 @@ def read_at_rule(self, at_keyword_token, tokens):
for head_token in head:
validate_any(head_token, 'at-rule head')
if token.type == '{':
- body = token
+ body = token.content
else:
body = None
return AtRule(at_keyword, head, body,
@@ -512,8 +513,7 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
raise ParseError(rule,
'invalid {0} rule: missing block'.format(rule.at_keyword))
declarations, at_rules, rule_errors = \
- self.parse_declarations_and_at_rules(
- rule.body.content, '@page')
+ self.parse_declarations_and_at_rules(rule.body, '@page')
errors.extend(rule_errors)
return PageRule(selector, specificity, declarations, at_rules,
rule.line, rule.column)
@@ -522,12 +522,12 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
if context != 'stylesheet':
raise ParseError(rule, '@media rule not allowed in ' + context)
if not rule.head:
- raise ParseError(rule.body, 'expected media types for @media')
+ raise ParseError(rule, 'expected media types for @media')
media = self.parse_media(rule.head)
if rule.body is None:
raise ParseError(rule,
'invalid {0} rule: missing block'.format(rule.at_keyword))
- rules, rule_errors = self.parse_rules(rule.body.content, '@media')
+ rules, rule_errors = self.parse_rules(rule.body, '@media')
errors.extend(rule_errors)
return MediaRule(media, rules, rule.line, rule.column)
@@ -558,7 +558,9 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
else:
media = ['all']
if rule.body is not None:
- raise ParseError(rule.body, "expected ';', got a block")
+ # The position of the ';' token would be best, but we don’t
+ # have it anymore here.
+ raise ParseError(head[-1], "expected ';', got a block")
return ImportRule(uri, media, rule.line, rule.column)
elif rule.at_keyword == '@charset':
@@ -574,34 +576,21 @@ def parse_media(self, tokens):
Media Queries are expected to override this.
:param tokens:
- An non-empty iterable of tokens
+ A non-empty list of tokens
:raises:
:class:`~.parsing.ParseError` on invalid media types/queries
:returns:
For CSS 2.1, a list of media types as strings
"""
media_types = []
- tokens = iter(tokens)
- token = next(tokens)
- while 1:
- if token.type == 'IDENT':
- media_types.append(token.value.lower())
+ for part in split_on_comma(remove_whitespace(tokens)):
+ types = [token.type for token in part]
+ if types == ['IDENT']:
+ media_types.append(part[0].value)
else:
- raise ParseError(token,
- 'expected a media type, got {0}'.format(token.type))
- token = next(tokens, None)
- if not token:
- return media_types
- if not (token.type == 'DELIM' and token.value == ','):
- raise ParseError(token,
- 'expected a comma, got {0}'.format(token.type))
- while 1:
- next_token = next(tokens, None)
- if not next_token:
- raise ParseError(token, 'expected a media type')
- token = next_token
- if token.type != 'S':
- break
+ raise ParseError(tokens[0], 'expected a media type'
+ + ((', got ' + ', '.join(types)) if types else ''))
+ return media_types
def parse_page_selector(self, tokens):
"""Parse an @page selector.
View
@@ -21,14 +21,6 @@
__all__ = ['decode'] # Everything else is implementation detail
-if sys.version_info[0] < 3: # pragma: no cover
- def _unicode_to_native(string):
- return string.encode('utf8')
-else: # pragma: no cover
- def _unicode_to_native(string):
- return string
-
-
def decode(css_bytes, protocol_encoding=None,
linking_encoding=None, document_encoding=None):
"""
@@ -63,11 +55,17 @@ def decode(css_bytes, protocol_encoding=None,
if has_at_charset:
extract, endianness = encoding
encoding = extract(match.group(1))
+ # Get an ASCII-only unicode value.
+ # This is the only thing that works on both Python 2 and 3
+ # for bytes.decode()
+ # Non-ASCII encoding names are invalid anyway,
+ # but make sure they stay invalid.
encoding = encoding.decode('ascii', 'replace')
+ encoding = encoding.replace('\ufffd', '?')
if encoding.replace('-', '').replace('_', '').lower() in [
'utf16', 'utf32']:
encoding += endianness
- encoding = _unicode_to_native(encoding)
+ encoding = encoding.encode('ascii', 'replace').decode('ascii')
css_unicode = try_encoding(css_bytes, encoding)
if css_unicode and not (has_at_charset and not
css_unicode.startswith('@charset "')):
@@ -85,13 +83,14 @@ def decode(css_bytes, protocol_encoding=None,
def try_encoding(css_bytes, encoding, fallback=True):
- try:
+ if fallback:
+ try:
+ css_unicode = css_bytes.decode(encoding)
+ # LookupError means unknown encoding
+ except (UnicodeDecodeError, LookupError):
+ return None
+ else:
css_unicode = css_bytes.decode(encoding)
- # LookupError means unknown encoding
- except (UnicodeDecodeError, LookupError):
- if not fallback:
- raise
- return None
if css_unicode and css_unicode[0] == '\ufeff':
# Remove any Byte Order Mark
css_unicode = css_unicode[1:]
View
@@ -114,8 +114,7 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
raise ParseError(rule.head[0],
'unexpected %s token in %s rule header'
% (rule.head[0].type, rule.at_keyword))
- declarations, body_errors = self.parse_declaration_list(
- rule.body.content)
+ declarations, body_errors = self.parse_declaration_list(rule.body)
errors.extend(body_errors)
return MarginRule(rule.at_keyword, declarations,
rule.line, rule.column)
View
@@ -165,5 +165,5 @@ def __init__(self, subject, reason):
'Parse error at {0.line}:{0.column}, {0.reason}'.format(self))
super(ParseError, self).__init__(self.message)
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__}: {0.message}>'.format(self))
View
@@ -21,7 +21,7 @@
try:
from lxml import cssselect
-except ImportError as exc: # pragma: no cover
+except ImportError as exc:
exc.message = exc.msg = (
__name__ + ' depends on lxml.cssselect. Please install lxml '
'with "pip install lxml" or from http://lxml.de/')
@@ -116,7 +116,7 @@ def parse_selector_group_string(css_string):
def _parse_selector_group_tokens(group_tokens):
- return [parse_selector_string(''.join(t.as_css for t in tokens))
+ return [parse_selector_string(''.join(t.as_css() for t in tokens))
for tokens in split_on_comma(group_tokens)]
@@ -236,5 +236,5 @@ def parse_ruleset(self, first_token, tokens):
except InvalidSelectorError as exc:
# Invalidate the whole ruleset even if some selectors
# in the selector group are valid.
- raise ParseError(ruleset, exc.args[0])
+ raise ParseError(ruleset.selector, exc.args[0])
return ruleset, errors
View
@@ -29,18 +29,25 @@ cdef class CToken:
"""
is_container = False
- cdef public object type, as_css, value, unit
+ cdef public object type, _as_css, value, unit
cdef public Py_ssize_t line, column
def __init__(self, type_, css_value, value, unit, line, column):
self.type = type_
- self.as_css = css_value
+ self._as_css = css_value
self.value = value
self.unit = unit
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def as_css(self):
+ """
+ Return as an Unicode string the CSS representation of the token,
+ as parsed in the source.
+ """
+ return self._as_css
+
+ def __repr__(self):
return ('<Token {0.type} at {0.line}:{0.column} {0.value!r}{1}>'
.format(self, self.unit or ''))
View
@@ -57,7 +57,7 @@ def parse(tokenizer_name):
stylesheet = CSS21Parser().parse_stylesheet_bytes(CSS)
result = []
for rule in stylesheet.rules:
- selector = ''.join(s.as_css for s in rule.selector)
+ selector = rule.selector.as_css()
declarations = [
(declaration.name, len(list(remove_whitespace(declaration.value))))
for declaration in rule.declarations]
Oops, something went wrong.

0 comments on commit 8ceae28

Please sign in to comment.