Skip to content
This repository has been archived by the owner on Sep 18, 2019. It is now read-only.

Commit

Permalink
Shave some API yaks.
Browse files Browse the repository at this point in the history
API changes:

* as_css is now a method everywhere
* Add ContainerToken.unit (always None)
* Remove ContainerToken.__iter__ and .__len__
* Add a TokenList type
* Make Declaration.value and RuleSet.selector TokenList’s
  • Loading branch information
SimonSapin committed Apr 5, 2012
1 parent 9e0eaad commit 8ceae28
Show file tree
Hide file tree
Showing 15 changed files with 150 additions and 132 deletions.
8 changes: 8 additions & 0 deletions .coveragerc
@@ -1,2 +1,10 @@
[run]
branch = True

[report]
exclude_lines =
pragma: no cover
def __repr__
except ImportError
omit =
tinycss/tests/speed.py
11 changes: 5 additions & 6 deletions docs/parsing.rst
Expand Up @@ -83,14 +83,13 @@ are not parsed by tinycss. They appear as tokens instead.

.. module:: tinycss.token_data

.. autoclass:: TokenList()
:member-order: bysource
:members:
.. autoclass:: Token()
:members:
.. autoclass:: tinycss.speedups.CToken()
.. autoclass:: ContainerToken()

.. autoattribute:: as_css

.. method:: __iter__, __len__

Shortcuts for accessing :attr:`content`.
:members:

.. autoclass:: FunctionToken()
87 changes: 38 additions & 49 deletions tinycss/css21.py
Expand Up @@ -14,9 +14,10 @@
from itertools import chain, islice

from .decoding import decode
from .token_data import TokenList
from .tokenizer import tokenize_grouped
from .parsing import (strip_whitespace, validate_value, validate_block,
validate_any, ParseError)
from .parsing import (strip_whitespace, remove_whitespace, split_on_comma,
validate_value, validate_block, validate_any, ParseError)


# stylesheet : [ CDO | CDC | S | statement ]*;
Expand Down Expand Up @@ -63,7 +64,7 @@ def __init__(self, rules, errors, encoding):
self.errors = errors
self.encoding = encoding

def __repr__(self): # pragma: no cover
def __repr__(self):
return '<{0.__class__.__name__} {1} rules {2} errors>'.format(
self, len(self.rules), len(self.errors))

Expand All @@ -78,13 +79,15 @@ class AtRule(object):
.. attribute:: head
The "head" of the at-rule until ``;`` or ``{``: a list of tokens
(:class:`~.token_data.Token` or :class:`~.token_data.ContainerToken`)
The part of the at-rule between the at-keyword and the ``{``
marking the body, or the ``;`` marking the end of an at-rule without
a body. A :class:`~.token_data.TokenList`.
.. attribute:: body
A block as a :class:`~.token_data.ContainerToken` with
``token.type == '{'``, or ``None`` if the at-rule ends with ``;``.
The content of the body between ``{`` and ``}`` as a
:class:`~.token_data.TokenList`, or ``None`` if there is no body
(ie. if the rule ends with ``;``).
The head was validated against the core grammar but **not** the body,
as the body might contain declarations. In case of an error in a
Expand All @@ -98,12 +101,12 @@ class AtRule(object):
"""
def __init__(self, at_keyword, head, body, line, column):
self.at_keyword = at_keyword
self.head = head
self.body = body
self.head = TokenList(head)
self.body = TokenList(body) if body is not None else body
self.line = line
self.column = column

def __repr__(self): # pragma: no cover
def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column} {0.at_keyword}>'
.format(self))

Expand All @@ -117,8 +120,7 @@ class RuleSet(object):
.. attribute:: selector
The selector as a list of :class:`~.token_data.Token` or
:class:`~.token_data.ContainerToken`.
The selector as a :class:`~.token_data.TokenList`.
In CSS 3, this is actually called a selector group.
.. attribute:: declarations
Expand All @@ -130,12 +132,12 @@ class RuleSet(object):
at_keyword = None

def __init__(self, selector, declarations, line, column):
self.selector = selector
self.selector = TokenList(selector)
self.declarations = declarations
self.line = line
self.column = column

def __repr__(self): # pragma: no cover
def __repr__(self):
return ('<{0.__class__.__name__} at {0.line}:{0.column}'
' {0.selector.as_css}>'.format(self))

Expand All @@ -149,8 +151,7 @@ class Declaration(object):
.. attribute:: value
The property value as a list of :class:`~.token_data.Token` or
:class:`~.token_data.ContainerToken`.
The property value as a :class:`~.token_data.TokenList`.
The value is not parsed. UAs using tinycss may only support
some properties or some values and tinycss does not know which.
Expand All @@ -168,12 +169,12 @@ class Declaration(object):
"""
def __init__(self, name, value, priority, line, column):
self.name = name
self.value = value
self.value = TokenList(value)
self.priority = priority
self.line = line
self.column = column

def __repr__(self): # pragma: no cover
def __repr__(self):
priority = ' !' + self.priority if self.priority else ''
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.name}: {0.value.as_css}{1}>'.format(self, priority))
Expand Down Expand Up @@ -219,7 +220,7 @@ def __init__(self, selector, specificity, declarations, at_rules,
self.line = line
self.column = column

def __repr__(self): # pragma: no cover
def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.selector}>'.format(self))

Expand Down Expand Up @@ -250,7 +251,7 @@ def __init__(self, media, rules, line, column):
self.line = line
self.column = column

def __repr__(self): # pragma: no cover
def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.media}>'.format(self))

Expand Down Expand Up @@ -283,7 +284,7 @@ def __init__(self, uri, media, line, column):
self.line = line
self.column = column

def __repr__(self): # pragma: no cover
def __repr__(self):
return ('<{0.__class__.__name__} {0.line}:{0.column}'
' {0.uri}>'.format(self))

Expand All @@ -303,7 +304,7 @@ def _remove_at_charset(tokens):
if [t.type for t in header] == ['ATKEYWORD', 'S', 'STRING', ';']:
atkw, space, string, semicolon = header
if ((atkw.value, space.value) == ('@charset', ' ')
and string.as_css[0] == '"'):
and string.as_css()[0] == '"'):
# Found a valid @charset rule, only keep what’s after it.
return tokens
return chain(header, tokens)
Expand Down Expand Up @@ -469,7 +470,7 @@ def read_at_rule(self, at_keyword_token, tokens):
for head_token in head:
validate_any(head_token, 'at-rule head')
if token.type == '{':
body = token
body = token.content
else:
body = None
return AtRule(at_keyword, head, body,
Expand Down Expand Up @@ -512,8 +513,7 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
raise ParseError(rule,
'invalid {0} rule: missing block'.format(rule.at_keyword))
declarations, at_rules, rule_errors = \
self.parse_declarations_and_at_rules(
rule.body.content, '@page')
self.parse_declarations_and_at_rules(rule.body, '@page')
errors.extend(rule_errors)
return PageRule(selector, specificity, declarations, at_rules,
rule.line, rule.column)
Expand All @@ -522,12 +522,12 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
if context != 'stylesheet':
raise ParseError(rule, '@media rule not allowed in ' + context)
if not rule.head:
raise ParseError(rule.body, 'expected media types for @media')
raise ParseError(rule, 'expected media types for @media')
media = self.parse_media(rule.head)
if rule.body is None:
raise ParseError(rule,
'invalid {0} rule: missing block'.format(rule.at_keyword))
rules, rule_errors = self.parse_rules(rule.body.content, '@media')
rules, rule_errors = self.parse_rules(rule.body, '@media')
errors.extend(rule_errors)
return MediaRule(media, rules, rule.line, rule.column)

Expand Down Expand Up @@ -558,7 +558,9 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
else:
media = ['all']
if rule.body is not None:
raise ParseError(rule.body, "expected ';', got a block")
# The position of the ';' token would be best, but we don’t
# have it anymore here.
raise ParseError(head[-1], "expected ';', got a block")
return ImportRule(uri, media, rule.line, rule.column)

elif rule.at_keyword == '@charset':
Expand All @@ -574,34 +576,21 @@ def parse_media(self, tokens):
Media Queries are expected to override this.
:param tokens:
An non-empty iterable of tokens
A non-empty list of tokens
:raises:
:class:`~.parsing.ParseError` on invalid media types/queries
:returns:
For CSS 2.1, a list of media types as strings
"""
media_types = []
tokens = iter(tokens)
token = next(tokens)
while 1:
if token.type == 'IDENT':
media_types.append(token.value.lower())
for part in split_on_comma(remove_whitespace(tokens)):
types = [token.type for token in part]
if types == ['IDENT']:
media_types.append(part[0].value)
else:
raise ParseError(token,
'expected a media type, got {0}'.format(token.type))
token = next(tokens, None)
if not token:
return media_types
if not (token.type == 'DELIM' and token.value == ','):
raise ParseError(token,
'expected a comma, got {0}'.format(token.type))
while 1:
next_token = next(tokens, None)
if not next_token:
raise ParseError(token, 'expected a media type')
token = next_token
if token.type != 'S':
break
raise ParseError(tokens[0], 'expected a media type'
+ ((', got ' + ', '.join(types)) if types else ''))
return media_types

def parse_page_selector(self, tokens):
"""Parse an @page selector.
Expand Down
29 changes: 14 additions & 15 deletions tinycss/decoding.py
Expand Up @@ -21,14 +21,6 @@
__all__ = ['decode'] # Everything else is implementation detail


if sys.version_info[0] < 3: # pragma: no cover
def _unicode_to_native(string):
return string.encode('utf8')
else: # pragma: no cover
def _unicode_to_native(string):
return string


def decode(css_bytes, protocol_encoding=None,
linking_encoding=None, document_encoding=None):
"""
Expand Down Expand Up @@ -63,11 +55,17 @@ def decode(css_bytes, protocol_encoding=None,
if has_at_charset:
extract, endianness = encoding
encoding = extract(match.group(1))
# Get an ASCII-only unicode value.
# This is the only thing that works on both Python 2 and 3
# for bytes.decode()
# Non-ASCII encoding names are invalid anyway,
# but make sure they stay invalid.
encoding = encoding.decode('ascii', 'replace')
encoding = encoding.replace('\ufffd', '?')
if encoding.replace('-', '').replace('_', '').lower() in [
'utf16', 'utf32']:
encoding += endianness
encoding = _unicode_to_native(encoding)
encoding = encoding.encode('ascii', 'replace').decode('ascii')
css_unicode = try_encoding(css_bytes, encoding)
if css_unicode and not (has_at_charset and not
css_unicode.startswith('@charset "')):
Expand All @@ -85,13 +83,14 @@ def decode(css_bytes, protocol_encoding=None,


def try_encoding(css_bytes, encoding, fallback=True):
try:
if fallback:
try:
css_unicode = css_bytes.decode(encoding)
# LookupError means unknown encoding
except (UnicodeDecodeError, LookupError):
return None
else:
css_unicode = css_bytes.decode(encoding)
# LookupError means unknown encoding
except (UnicodeDecodeError, LookupError):
if not fallback:
raise
return None
if css_unicode and css_unicode[0] == '\ufeff':
# Remove any Byte Order Mark
css_unicode = css_unicode[1:]
Expand Down
3 changes: 1 addition & 2 deletions tinycss/page3.py
Expand Up @@ -114,8 +114,7 @@ def parse_at_rule(self, rule, previous_rules, errors, context):
raise ParseError(rule.head[0],
'unexpected %s token in %s rule header'
% (rule.head[0].type, rule.at_keyword))
declarations, body_errors = self.parse_declaration_list(
rule.body.content)
declarations, body_errors = self.parse_declaration_list(rule.body)
errors.extend(body_errors)
return MarginRule(rule.at_keyword, declarations,
rule.line, rule.column)
Expand Down
2 changes: 1 addition & 1 deletion tinycss/parsing.py
Expand Up @@ -165,5 +165,5 @@ def __init__(self, subject, reason):
'Parse error at {0.line}:{0.column}, {0.reason}'.format(self))
super(ParseError, self).__init__(self.message)

def __repr__(self): # pragma: no cover
def __repr__(self):
return ('<{0.__class__.__name__}: {0.message}>'.format(self))
6 changes: 3 additions & 3 deletions tinycss/selectors3.py
Expand Up @@ -21,7 +21,7 @@

try:
from lxml import cssselect
except ImportError as exc: # pragma: no cover
except ImportError as exc:
exc.message = exc.msg = (
__name__ + ' depends on lxml.cssselect. Please install lxml '
'with "pip install lxml" or from http://lxml.de/')
Expand Down Expand Up @@ -116,7 +116,7 @@ def parse_selector_group_string(css_string):


def _parse_selector_group_tokens(group_tokens):
return [parse_selector_string(''.join(t.as_css for t in tokens))
return [parse_selector_string(''.join(t.as_css() for t in tokens))
for tokens in split_on_comma(group_tokens)]


Expand Down Expand Up @@ -236,5 +236,5 @@ def parse_ruleset(self, first_token, tokens):
except InvalidSelectorError as exc:
# Invalidate the whole ruleset even if some selectors
# in the selector group are valid.
raise ParseError(ruleset, exc.args[0])
raise ParseError(ruleset.selector, exc.args[0])
return ruleset, errors
13 changes: 10 additions & 3 deletions tinycss/speedups.pyx
Expand Up @@ -29,18 +29,25 @@ cdef class CToken:
"""
is_container = False

cdef public object type, as_css, value, unit
cdef public object type, _as_css, value, unit
cdef public Py_ssize_t line, column

def __init__(self, type_, css_value, value, unit, line, column):
self.type = type_
self.as_css = css_value
self._as_css = css_value
self.value = value
self.unit = unit
self.line = line
self.column = column

def __repr__(self): # pragma: no cover
def as_css(self):
"""
Return as an Unicode string the CSS representation of the token,
as parsed in the source.
"""
return self._as_css

def __repr__(self):
return ('<Token {0.type} at {0.line}:{0.column} {0.value!r}{1}>'
.format(self, self.unit or ''))

Expand Down
2 changes: 1 addition & 1 deletion tinycss/tests/speed.py
Expand Up @@ -57,7 +57,7 @@ def parse(tokenizer_name):
stylesheet = CSS21Parser().parse_stylesheet_bytes(CSS)
result = []
for rule in stylesheet.rules:
selector = ''.join(s.as_css for s in rule.selector)
selector = rule.selector.as_css()
declarations = [
(declaration.name, len(list(remove_whitespace(declaration.value))))
for declaration in rule.declarations]
Expand Down

0 comments on commit 8ceae28

Please sign in to comment.