Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Shave some API yaks.

API changes:

* as_css is now a method everywhere
* Add ContainerToken.unit (always None)
* Remove ContainerToken.__iter__ and .__len__
* Add a TokenList type
* Make Declaration.value and RuleSet.selector TokenList’s
  • Loading branch information...
commit 8ceae2844fbfb0bb91fe7f0ce5a590f0faec1a40 1 parent 9e0eaad
@SimonSapin authored
View
8 .coveragerc
@@ -1,2 +1,10 @@
[run]
branch = True
+
+[report]
+exclude_lines =
+ pragma: no cover
+ def __repr__
+ except ImportError
+omit =
+ tinycss/tests/speed.py
View
11 docs/parsing.rst
@@ -83,14 +83,13 @@ are not parsed by tinycss. They appear as tokens instead.
.. module:: tinycss.token_data
+.. autoclass:: TokenList()
+ :member-order: bysource
+ :members:
.. autoclass:: Token()
+ :members:
.. autoclass:: tinycss.speedups.CToken()
.. autoclass:: ContainerToken()
-
- .. autoattribute:: as_css
-
- .. method:: __iter__, __len__
-
- Shortcuts for accessing :attr:`content`.
+ :members:
.. autoclass:: FunctionToken()
View
87 tinycss/css21.py
@@ -14,9 +14,10 @@
from itertools import chain, islice
from .decoding import decode
+from .token_data import TokenList
from .tokenizer import tokenize_grouped
-from .parsing import (strip_whitespace, validate_value, validate_block,
- validate_any, ParseError)
+from .parsing import (strip_whitespace, remove_whitespace, split_on_comma,
+ validate_value, validate_block, validate_any, ParseError)
# stylesheet : [ CDO | CDC | S | statement ]*;
@@ -63,7 +64,7 @@ def __init__(self, rules, errors, encoding):
self.errors = errors
self.encoding = encoding
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return '<{0.__class__.__name__} {1} rules {2} errors>'.format(
self, len(self.rules), len(self.errors))
@@ -78,13 +79,15 @@ class AtRule(object):
.. attribute:: head
- The "head" of the at-rule until ``;`` or ``{``: a list of tokens
- (:class:`~.token_data.Token` or :class:`~.token_data.ContainerToken`)
+ The part of the at-rule between the at-keyword and the ``{``
+ marking the body, or the ``;`` marking the end of an at-rule without
+ a body. A :class:`~.token_data.TokenList`.
.. attribute:: body
- A block as a :class:`~.token_data.ContainerToken` with
- ``token.type == '{'``, or ``None`` if the at-rule ends with ``;``.
+ The content of the body between ``{`` and ``}`` as a
+ :class:`~.token_data.TokenList`, or ``None`` if there is no body
+ (ie. if the rule ends with ``;``).
The head was validated against the core grammar but **not** the body,
as the body might contain declarations. In case of an error in a
@@ -98,12 +101,12 @@ class AtRule(object):
"""
def __init__(self, at_keyword, head, body, line, column):
self.at_keyword = at_keyword
- self.head = head
- self.body = body
+ self.head = TokenList(head)
+ self.body = TokenList(body) if body is not None else body
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column} {0.at_keyword}>'
.format(self))
@@ -117,8 +120,7 @@ class RuleSet(object):
.. attribute:: selector
- The selector as a list of :class:`~.token_data.Token` or
- :class:`~.token_data.ContainerToken`.
+ The selector as a :class:`~.token_data.TokenList`.
In CSS 3, this is actually called a selector group.
.. attribute:: declarations
@@ -130,12 +132,12 @@ class RuleSet(object):
at_keyword = None
def __init__(self, selector, declarations, line, column):
- self.selector = selector
+ self.selector = TokenList(selector)
self.declarations = declarations
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__} at {0.line}:{0.column}'
' {0.selector.as_css}>'.format(self))
@@ -149,8 +151,7 @@ class Declaration(object):
.. attribute:: value
- The property value as a list of :class:`~.token_data.Token` or
- :class:`~.token_data.ContainerToken`.
+ The property value as a :class:`~.token_data.TokenList`.
The value is not parsed. UAs using tinycss may only support
some properties or some values and tinycss does not know which.
@@ -168,12 +169,12 @@ class Declaration(object):
"""
def __init__(self, name, value, priority, line, column):
self.name = name
- self.value = value
+ self.value = TokenList(value)
self.priority = priority
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
priority = ' !' + self.priority if self.priority else ''
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.name}: {0.value.as_css}{1}>'.format(self, priority))
@@ -219,7 +220,7 @@ def __init__(self, selector, specificity, declarations, at_rules,
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.selector}>'.format(self))
@@ -250,7 +251,7 @@ def __init__(self, media, rules, line, column):
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.media}>'.format(self))
@@ -283,7 +284,7 @@ def __init__(self, uri, media, line, column):
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.uri}>'.format(self))
@@ -303,7 +304,7 @@ def _remove_at_charset(tokens):
if [t.type for t in header] == ['ATKEYWORD', 'S', 'STRING', ';']:
atkw, space, string, semicolon = header
if ((atkw.value, space.value) == ('@charset', ' ')
- and string.as_css[0] == '"'):
+ and string.as_css()[0] == '"'):
# Found a valid @charset rule, only keep what’s after it.
return tokens
return chain(header, tokens)
@@ -469,7 +470,7 @@ def read_at_rule(self, at_keyword_token, tokens):
for head_token in head:
validate_any(head_token, 'at-rule head')
if token.type == '{':
- body = token
+ body = token.content
else:
body = None
return AtRule(at_keyword, head, body,
@@ -512,8 +513,7 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
raise ParseError(rule,
'invalid {0} rule: missing block'.format(rule.at_keyword))
declarations, at_rules, rule_errors = \
- self.parse_declarations_and_at_rules(
- rule.body.content, '@page')
+ self.parse_declarations_and_at_rules(rule.body, '@page')
errors.extend(rule_errors)
return PageRule(selector, specificity, declarations, at_rules,
rule.line, rule.column)
@@ -522,12 +522,12 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
if context != 'stylesheet':
raise ParseError(rule, '@media rule not allowed in ' + context)
if not rule.head:
- raise ParseError(rule.body, 'expected media types for @media')
+ raise ParseError(rule, 'expected media types for @media')
media = self.parse_media(rule.head)
if rule.body is None:
raise ParseError(rule,
'invalid {0} rule: missing block'.format(rule.at_keyword))
- rules, rule_errors = self.parse_rules(rule.body.content, '@media')
+ rules, rule_errors = self.parse_rules(rule.body, '@media')
errors.extend(rule_errors)
return MediaRule(media, rules, rule.line, rule.column)
@@ -558,7 +558,9 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
else:
media = ['all']
if rule.body is not None:
- raise ParseError(rule.body, "expected ';', got a block")
+ # The position of the ';' token would be best, but we don’t
+ # have it anymore here.
+ raise ParseError(head[-1], "expected ';', got a block")
return ImportRule(uri, media, rule.line, rule.column)
elif rule.at_keyword == '@charset':
@@ -574,34 +576,21 @@ def parse_media(self, tokens):
Media Queries are expected to override this.
:param tokens:
- An non-empty iterable of tokens
+ A non-empty list of tokens
:raises:
:class:`~.parsing.ParseError` on invalid media types/queries
:returns:
For CSS 2.1, a list of media types as strings
"""
media_types = []
- tokens = iter(tokens)
- token = next(tokens)
- while 1:
- if token.type == 'IDENT':
- media_types.append(token.value.lower())
+ for part in split_on_comma(remove_whitespace(tokens)):
+ types = [token.type for token in part]
+ if types == ['IDENT']:
+ media_types.append(part[0].value)
else:
- raise ParseError(token,
- 'expected a media type, got {0}'.format(token.type))
- token = next(tokens, None)
- if not token:
- return media_types
- if not (token.type == 'DELIM' and token.value == ','):
- raise ParseError(token,
- 'expected a comma, got {0}'.format(token.type))
- while 1:
- next_token = next(tokens, None)
- if not next_token:
- raise ParseError(token, 'expected a media type')
- token = next_token
- if token.type != 'S':
- break
+ raise ParseError(tokens[0], 'expected a media type'
+ + ((', got ' + ', '.join(types)) if types else ''))
+ return media_types
def parse_page_selector(self, tokens):
"""Parse an @page selector.
View
29 tinycss/decoding.py
@@ -21,14 +21,6 @@
__all__ = ['decode'] # Everything else is implementation detail
-if sys.version_info[0] < 3: # pragma: no cover
- def _unicode_to_native(string):
- return string.encode('utf8')
-else: # pragma: no cover
- def _unicode_to_native(string):
- return string
-
-
def decode(css_bytes, protocol_encoding=None,
linking_encoding=None, document_encoding=None):
"""
@@ -63,11 +55,17 @@ def decode(css_bytes, protocol_encoding=None,
if has_at_charset:
extract, endianness = encoding
encoding = extract(match.group(1))
+ # Get an ASCII-only unicode value.
+ # This is the only thing that works on both Python 2 and 3
+ # for bytes.decode()
+ # Non-ASCII encoding names are invalid anyway,
+ # but make sure they stay invalid.
encoding = encoding.decode('ascii', 'replace')
+ encoding = encoding.replace('\ufffd', '?')
if encoding.replace('-', '').replace('_', '').lower() in [
'utf16', 'utf32']:
encoding += endianness
- encoding = _unicode_to_native(encoding)
+ encoding = encoding.encode('ascii', 'replace').decode('ascii')
css_unicode = try_encoding(css_bytes, encoding)
if css_unicode and not (has_at_charset and not
css_unicode.startswith('@charset "')):
@@ -85,13 +83,14 @@ def decode(css_bytes, protocol_encoding=None,
def try_encoding(css_bytes, encoding, fallback=True):
- try:
+ if fallback:
+ try:
+ css_unicode = css_bytes.decode(encoding)
+ # LookupError means unknown encoding
+ except (UnicodeDecodeError, LookupError):
+ return None
+ else:
css_unicode = css_bytes.decode(encoding)
- # LookupError means unknown encoding
- except (UnicodeDecodeError, LookupError):
- if not fallback:
- raise
- return None
if css_unicode and css_unicode[0] == '\ufeff':
# Remove any Byte Order Mark
css_unicode = css_unicode[1:]
View
3  tinycss/page3.py
@@ -114,8 +114,7 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
raise ParseError(rule.head[0],
'unexpected %s token in %s rule header'
% (rule.head[0].type, rule.at_keyword))
- declarations, body_errors = self.parse_declaration_list(
- rule.body.content)
+ declarations, body_errors = self.parse_declaration_list(rule.body)
errors.extend(body_errors)
return MarginRule(rule.at_keyword, declarations,
rule.line, rule.column)
View
2  tinycss/parsing.py
@@ -165,5 +165,5 @@ def __init__(self, subject, reason):
'Parse error at {0.line}:{0.column}, {0.reason}'.format(self))
super(ParseError, self).__init__(self.message)
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return ('<{0.__class__.__name__}: {0.message}>'.format(self))
View
6 tinycss/selectors3.py
@@ -21,7 +21,7 @@
try:
from lxml import cssselect
-except ImportError as exc: # pragma: no cover
+except ImportError as exc:
exc.message = exc.msg = (
__name__ + ' depends on lxml.cssselect. Please install lxml '
'with "pip install lxml" or from http://lxml.de/')
@@ -116,7 +116,7 @@ def parse_selector_group_string(css_string):
def _parse_selector_group_tokens(group_tokens):
- return [parse_selector_string(''.join(t.as_css for t in tokens))
+ return [parse_selector_string(''.join(t.as_css() for t in tokens))
for tokens in split_on_comma(group_tokens)]
@@ -236,5 +236,5 @@ def parse_ruleset(self, first_token, tokens):
except InvalidSelectorError as exc:
# Invalidate the whole ruleset even if some selectors
# in the selector group are valid.
- raise ParseError(ruleset, exc.args[0])
+ raise ParseError(ruleset.selector, exc.args[0])
return ruleset, errors
View
13 tinycss/speedups.pyx
@@ -29,18 +29,25 @@ cdef class CToken:
"""
is_container = False
- cdef public object type, as_css, value, unit
+ cdef public object type, _as_css, value, unit
cdef public Py_ssize_t line, column
def __init__(self, type_, css_value, value, unit, line, column):
self.type = type_
- self.as_css = css_value
+ self._as_css = css_value
self.value = value
self.unit = unit
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def as_css(self):
+ """
+ Return as an Unicode string the CSS representation of the token,
+ as parsed in the source.
+ """
+ return self._as_css
+
+ def __repr__(self):
return ('<Token {0.type} at {0.line}:{0.column} {0.value!r}{1}>'
.format(self, self.unit or ''))
View
2  tinycss/tests/speed.py
@@ -57,7 +57,7 @@ def parse(tokenizer_name):
stylesheet = CSS21Parser().parse_stylesheet_bytes(CSS)
result = []
for rule in stylesheet.rules:
- selector = ''.join(s.as_css for s in rule.selector)
+ selector = rule.selector.as_css()
declarations = [
(declaration.name, len(list(remove_whitespace(declaration.value))))
for declaration in rule.declarations]
View
13 tinycss/tests/test_css21.py
@@ -114,7 +114,7 @@ def test_at_rules(css_source, expected_rules, expected_errors):
('foo @page {} bar {}', [('bar', [])],
['unexpected ATKEYWORD token in selector']),
- ('foo { content: "unclosed string;\n color:red; ; margin/**/: 2cm; }',
+ ('foo { content: "unclosed string;\n color:red; ; margin/**/\n: 2cm; }',
[('foo', [('margin', [('DIMENSION', 2)])])],
['unexpected BAD_STRING token in property value']),
@@ -151,10 +151,10 @@ def parse_at_rule(self, rule, stylesheet_rules, errors, context):
assert_errors(stylesheet.errors, expected_errors)
result = [
(rule.at_keyword, list(jsonify(rule.head)),
- list(jsonify(rule.body.content))
+ list(jsonify(rule.body))
if rule.body is not None else None)
if rule.at_keyword else
- (''.join(s.as_css for s in rule.selector), [
+ (rule.selector.as_css(), [
(decl.name, list(jsonify(decl.value)))
for decl in rule.declarations])
for rule in stylesheet.rules
@@ -318,9 +318,10 @@ def test_at_page(css, expected_result, expected_errors):
('@media all;', [], ['invalid @media rule: missing block']),
('@media {}', [], ['expected media types for @media']),
('@media 4 {}', [], ['expected a media type, got INTEGER']),
- ('@media , screen {}', [], ['expected a media type, got DELIM']),
+ ('@media , screen {}', [], ['expected a media type']),
('@media screen, {}', [], ['expected a media type']),
- ('@media screen print {}', [], ['expected a comma, got S']),
+ ('@media screen print {}', [],
+ ['expected a media type, got IDENT, IDENT']),
('@media all { @page { a: 1 } @media; @import; foo { a: 1 } }',
[(['all'], [('foo', [('a', [('INTEGER', 1)])])])],
@@ -337,7 +338,7 @@ def test_at_media(css_source, expected_rules, expected_errors):
assert rule.at_keyword == '@media'
result = [
(rule.media, [
- (''.join(s.as_css for s in sub_rule.selector), [
+ (sub_rule.selector.as_css(), [
(decl.name, list(jsonify(decl.value)))
for decl in sub_rule.declarations])
for sub_rule in rule.rules
View
2  tinycss/tests/test_decoding.py
@@ -32,7 +32,7 @@ def params(css, encoding, use_bom=False, expect_error=False, **kwargs):
params('£', 'ShiftJIS', protocol_encoding='utf8',
document_encoding='ShiftJIS'),
params('@charset "utf8"; £', 'ShiftJIS', expect_error=True),
- params('@charset "utf£"; £', 'ShiftJIS', expect_error=True),
+ params('@charset "utf£8"; £', 'ShiftJIS', expect_error=True),
params('@charset "unknown-encoding"; £', 'ShiftJIS', expect_error=True),
params('@charset "utf8"; £', 'ShiftJIS', document_encoding='ShiftJIS'),
params('£', 'ShiftJIS', linking_encoding='utf8',
View
2  tinycss/tests/test_selectors3.py
@@ -17,7 +17,7 @@
try:
import lxml.cssselect
-except ImportError: # pragma: no cover
+except ImportError:
LXML_INSTALLED = False
else:
LXML_INSTALLED = True
View
5 tinycss/tests/test_tokenizer.py
@@ -277,7 +277,7 @@ def test_token_serialize_css(tokenize, css_source):
pytest.skip('Speedups not available')
for _regroup in [regroup, lambda x: x]:
tokens = _regroup(tokenize(css_source, ignore_comments=False))
- result = ''.join(token.as_css for token in tokens)
+ result = ''.join(token.as_css() for token in tokens)
assert result == css_source
@@ -295,7 +295,4 @@ def test_token_api(tokenize, css_source):
assert len(tokens) == 1
token = tokens[0]
expected_len = 7 # 2 spaces, 2 commas, 3 others.
- assert len(token) == expected_len
assert len(token.content) == expected_len
- for a, b in zip(iter(token), token.content):
- assert a is b
View
93 tinycss/token_data.py
@@ -124,7 +124,7 @@
try:
unichr
-except NameError: # pragma: no cover
+except NameError:
# Python 3
unichr = chr
unicode = str
@@ -274,10 +274,6 @@ class Token(object):
but these are ignored, are syntax errors, or are later transformed
into :class:`ContainerToken` or :class:`FunctionToken`.
- .. attribute:: as_css
-
- The string as it was read from the CSS source
-
.. attribute:: value
The parsed value:
@@ -303,25 +299,32 @@ class Token(object):
.. attribute:: line
- The line number of this token in the CSS source
+ The line number in the CSS source of the start of this token.
.. attribute:: column
- The column number inside a line of this token in the CSS source
+ The column number (inside a source line) of the start of this token.
"""
is_container = False
- __slots__ = 'type', 'as_css', 'value', 'unit', 'line', 'column'
+ __slots__ = 'type', '_as_css', 'value', 'unit', 'line', 'column'
def __init__(self, type_, css_value, value, unit, line, column):
self.type = type_
- self.as_css = css_value
+ self._as_css = css_value
self.value = value
self.unit = unit
self.line = line
self.column = column
- def __repr__(self): # pragma: no cover
+ def as_css(self):
+ """
+ Return as an Unicode string the CSS representation of the token,
+ as parsed in the source.
+ """
+ return self._as_css
+
+ def __repr__(self):
return ('<Token {0.type} at {0.line}:{0.column} {0.value!r}{1}>'
.format(self, self.unit or ''))
@@ -340,15 +343,10 @@ class ContainerToken(object):
``FUNCTION``. For ``FUNCTION``, the object is actually a
:class:`FunctionToken`.
- .. attribute:: css_start
-
- The string for the opening token as it was read from the CSS source.
- Eg: ``{``
-
- .. attribute:: css_end
+ .. attribute:: unit
- The string for the closing token as it was read from the CSS source
- Eg: ``}``
+ Always ``None``. Included to make :class:`ContainerToken` behave
+ more like :class:`Token`.
.. attribute:: content
@@ -357,47 +355,40 @@ class ContainerToken(object):
.. attribute:: line
- The line number of the opening token in the CSS source
+ The line number in the CSS source of the start of this token.
.. attribute:: column
- The column number inside a line of the opening token in the CSS source
+ The column number (inside a source line) of the start of this token.
"""
is_container = True
- __slots__ = 'type', 'css_start', 'css_end', 'content', 'line', 'column'
+ unit = None
+ __slots__ = 'type', '_css_start', '_css_end', 'content', 'line', 'column'
def __init__(self, type_, css_start, css_end, content, line, column):
self.type = type_
- self.css_start = css_start
- self.css_end = css_end
+ self._css_start = css_start
+ self._css_end = css_end
self.content = content
self.line = line
self.column = column
- @property
def as_css(self):
- """The (recursive) CSS representation of the token,
+ """
+ Return as an Unicode string the CSS representation of the token,
as parsed in the source.
"""
- parts = [self.css_start]
- parts.extend(token.as_css for token in self.content)
- parts.append(self.css_end)
+ parts = [self._css_start]
+ parts.extend(token.as_css() for token in self.content)
+ parts.append(self._css_end)
return ''.join(parts)
format_string = '<ContainerToken {0.type} at {0.line}:{0.column}>'
- def __repr__(self): # pragma: no cover
+ def __repr__(self):
return (self.format_string + ' {0.content}').format(self)
- # Sequence-like API (not the full collections.Sequence ABC, though)
-
- def __iter__(self):
- return iter(self.content)
-
- def __len__(self):
- return len(self.content)
-
class FunctionToken(ContainerToken):
"""A specialized :class:`ContainerToken` for a ``FUNCTION`` group.
@@ -419,3 +410,31 @@ def __init__(self, type_, css_start, css_end, function_name, content,
format_string = ('<FunctionToken {0.function_name}() at '
'{0.line}:{0.column}>')
+
+
+class TokenList(list):
+ """
+ A mixed list of :class:`~.token_data.Token` and
+ :class:`~.token_data.ContainerToken` objects.
+
+ This is a subclass of the builtin :class:`~builtins.list` type.
+ It can be iterated, indexed and sliced as usual, but also has some
+ additional API:
+
+ """
+ @property
+ def line(self):
+ """The line number in the CSS source of the first token."""
+ return self[0].line
+
+ @property
+ def column(self):
+ """The column number (inside a source line) of the first token."""
+ return self[0].column
+
+ def as_css(self):
+ """
+ Return as an Unicode string the CSS representation of the tokens,
+ as parsed in the source.
+ """
+ return ''.join(token.as_css() for token in self)
View
6 tinycss/tokenizer.py
@@ -177,11 +177,11 @@ def _regroup_inner(stop_at=None,
if eof[0]:
end = '' # Implicit end of structure at EOF.
if type_ == 'FUNCTION':
- yield FunctionToken(token.type, token.as_css, end,
+ yield FunctionToken(token.type, token.as_css(), end,
token.value, content,
token.line, token.column)
else:
- yield ContainerToken(token.type, token.as_css, end,
+ yield ContainerToken(token.type, token.as_css(), end,
content,
token.line, token.column)
else:
@@ -208,7 +208,7 @@ def tokenize_grouped(css_source, ignore_comments=True):
python_tokenize_flat = tokenize_flat
try:
from . import speedups
-except ImportError: # pragma: no cover
+except ImportError:
cython_tokenize_flat = None
else:
cython_tokenize_flat = speedups.tokenize_flat
Please sign in to comment.
Something went wrong with that request. Please try again.