Skip to content

Commit

Permalink
Add parser support for functional pseudo-elements. See #29.
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonSapin committed Sep 15, 2013
1 parent 039a844 commit a4b12ae
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 32 deletions.
63 changes: 46 additions & 17 deletions cssselect/parser.py
Expand Up @@ -57,7 +57,8 @@ class Selector(object):
"""
def __init__(self, tree, pseudo_element=None):
self.parsed_tree = tree
if pseudo_element is not None:
if pseudo_element is not None and not isinstance(
pseudo_element, FunctionalPseudoElement):
pseudo_element = ascii_lower(pseudo_element)
#: The identifier for the pseudo-element as a string, or ``None``.
#:
Expand All @@ -78,6 +79,8 @@ def __init__(self, tree, pseudo_element=None):
self.pseudo_element = pseudo_element

def __repr__(self):
if isinstance(self.pseudo_element, FunctionalPseudoElement):
pseudo_element = repr(self.pseudo_element)
if self.pseudo_element:
pseudo_element = '::%s' % self.pseudo_element
else:
Expand Down Expand Up @@ -115,6 +118,28 @@ def specificity(self):
return a, b, c


class FunctionalPseudoElement(object):
"""
Represents selector::name(expr)
"""
def __init__(self, name, arguments):
self.name = ascii_lower(name)
self.arguments = arguments

def __repr__(self):
return '%s[::%s(%r)]' % (
self.__class__.__name__, self.name,
[token.value for token in self.arguments])

def argument_types(self):
return [token.type for token in self.arguments]

def specificity(self):
a, b, c = self.selector.specificity()
b += 1
return a, b, c


class Function(object):
"""
Represents selector:name(expr)
Expand Down Expand Up @@ -398,6 +423,10 @@ def parse_simple_selector(stream, inside_negation=False):
if stream.peek() == ('DELIM', ':'):
stream.next()
pseudo_element = stream.next_ident()
if stream.peek() == ('DELIM', '('):
stream.next()
pseudo_element = FunctionalPseudoElement(
pseudo_element, parse_arguments(stream))
continue
ident = stream.next_ident()
if ident.lower() in ('first-line', 'first-letter',
Expand Down Expand Up @@ -425,22 +454,7 @@ def parse_simple_selector(stream, inside_negation=False):
raise SelectorSyntaxError("Expected ')', got %s" % (next,))
result = Negation(result, argument)
else:
arguments = []
while 1:
stream.skip_whitespace()
next = stream.next()
if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
('DELIM', '+'), ('DELIM', '-')]:
arguments.append(next)
elif next == ('DELIM', ')'):
break
else:
raise SelectorSyntaxError(
"Expected an argument, got %s" % (next,))
if not arguments:
raise SelectorSyntaxError(
"Expected at least one argument, got %s" % (next,))
result = Function(result, ident, arguments)
result = Function(result, ident, parse_arguments(stream))
else:
raise SelectorSyntaxError(
"Expected selector, got %s" % (peek,))
Expand All @@ -450,6 +464,21 @@ def parse_simple_selector(stream, inside_negation=False):
return result, pseudo_element


def parse_arguments(stream):
arguments = []
while 1:
stream.skip_whitespace()
next = stream.next()
if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
('DELIM', '+'), ('DELIM', '-')]:
arguments.append(next)
elif next == ('DELIM', ')'):
return arguments
else:
raise SelectorSyntaxError(
"Expected an argument, got %s" % (next,))


def parse_attrib(selector, stream):
stream.skip_whitespace()
attrib = stream.next_ident_or_star()
Expand Down
76 changes: 73 additions & 3 deletions cssselect/tests.py
Expand Up @@ -23,7 +23,9 @@
from lxml import etree, html
from cssselect import (parse, GenericTranslator, HTMLTranslator,
SelectorSyntaxError, ExpressionError)
from cssselect.parser import tokenize, parse_series, _unicode
from cssselect.parser import (tokenize, parse_series, _unicode,
FunctionalPseudoElement)
from cssselect.xpath import _unicode_safe_getattr, XPathExpr


if sys.version_info[0] < 3:
Expand Down Expand Up @@ -150,6 +152,7 @@ def parse_pseudo(css):
result = []
for selector in parse(css):
pseudo = selector.pseudo_element
pseudo = _unicode(pseudo) if pseudo else pseudo
# No Symbol here
assert pseudo is None or type(pseudo) is _unicode
selector = repr(selector.parsed_tree).replace("(u'", "('")
Expand All @@ -176,6 +179,10 @@ def parse_one(css):
assert parse_one('::firsT-linE') == ('Element[*]', 'first-line')
assert parse_one('::firsT-letteR') == ('Element[*]', 'first-letter')

assert parse_one('::text-content') == ('Element[*]', 'text-content')
assert parse_one('::attr(name)') == (
"Element[*]", "FunctionalPseudoElement[::attr(['name'])]")

assert parse_one('::Selection') == ('Element[*]', 'selection')
assert parse_one('foo:after') == ('Element[foo]', 'after')
assert parse_one('foo::selection') == ('Element[foo]', 'selection')
Expand Down Expand Up @@ -264,8 +271,6 @@ def get_error(css):
"Expected ident or '*', got <DELIM '#' at 1>")
assert get_error('[foo=#]') == (
"Expected string or ident, got <DELIM '#' at 5>")
assert get_error(':nth-child()') == (
"Expected at least one argument, got <DELIM ')' at 11>")
assert get_error('[href]a') == (
"Expected selector, got <IDENT 'a' at 6>")
assert get_error('[rel=stylesheet]') == None
Expand Down Expand Up @@ -436,6 +441,71 @@ def test_unicode_escapes(self):
assert css_to_xpath('*[aval="\'\\20\r\n \'"]') == (
'''descendant-or-self::*[@aval = "' '"]''')

def test_xpath_pseudo_elements(self):
class CustomTranslator(GenericTranslator):
def xpath_pseudo_element(self, xpath, pseudo_element):
if isinstance(pseudo_element, FunctionalPseudoElement):
method = 'xpath_%s_functional_pseudo_element' % (
pseudo_element.name.replace('-', '_'))
method = _unicode_safe_getattr(self, method, None)
if not method:
raise ExpressionError(
"The functional pseudo-element ::%s() is unknown"
% functional.name)
xpath = method(xpath, pseudo_element.arguments)
else:
method = 'xpath_%s_simple_pseudo_element' % (
pseudo_element.replace('-', '_'))
method = _unicode_safe_getattr(self, method, None)
if not method:
raise ExpressionError(
"The pseudo-element ::%s is unknown"
% pseudo_element)
xpath = method(xpath)
return xpath

# functional pseudo-class:
# elements that have a certain number of attributes
def xpath_nb_attr_function(self, xpath, function):
nb_attributes = int(function.arguments[0].value)
return xpath.add_condition(
"count(@*)=%d" % nb_attributes)

# pseudo-class:
# elements that have 5 attributes
def xpath_five_attributes_pseudo(self, xpath):
return xpath.add_condition("count(@*)=5")

# functional pseudo-element:
# element's attribute by name
def xpath_attr_functional_pseudo_element(self, xpath, arguments):
attribute_name = arguments[0].value
other = XPathExpr('@%s' % attribute_name, '', )
return xpath.join('/', other)

# pseudo-element:
# element's text() nodes
def xpath_text_node_simple_pseudo_element(self, xpath):
other = XPathExpr('text()', '', )
return xpath.join('/', other)

# pseudo-element:
# element's href attribute
def xpath_attr_href_simple_pseudo_element(self, xpath):
other = XPathExpr('@href', '', )
return xpath.join('/', other)

def xpath(css):
return _unicode(CustomTranslator().css_to_xpath(css))

assert xpath(':five-attributes') == "descendant-or-self::*[count(@*)=5]"
assert xpath(':nb-attr(3)') == "descendant-or-self::*[count(@*)=3]"
assert xpath('::attr(href)') == "descendant-or-self::*/@href"
assert xpath('::text-node') == "descendant-or-self::*/text()"
assert xpath('::attr-href') == "descendant-or-self::*/@href"
assert xpath('p img::attr(src)') == (
"descendant-or-self::p/descendant-or-self::*/img/@src")

def test_series(self):
def series(css):
selector, = parse(':nth-child(%s)' % css)
Expand Down
34 changes: 22 additions & 12 deletions cssselect/xpath.py
Expand Up @@ -26,6 +26,12 @@
_unicode = str


def _unicode_safe_getattr(obj, name, default=None):
# getattr() with a non-ASCII name fails on Python 2.x
name = name.encode('ascii', 'replace').decode('ascii')
return getattr(obj, name, default)


class ExpressionError(SelectorError, RuntimeError):
"""Unknown or unsupported selector (eg. pseudo-class)."""

Expand Down Expand Up @@ -178,14 +184,9 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
The equivalent XPath 1.0 expression as an Unicode string.
"""
selectors = parse(css)
for selector in selectors:
if selector.pseudo_element:
raise ExpressionError('Pseudo-elements are not supported.')

return ' | '.join(
self.selector_to_xpath(selector, prefix)
for selector in selectors)
for selector in parse(css))

def selector_to_xpath(self, selector, prefix='descendant-or-self::'):
"""Translate a parsed selector to XPath.
Expand All @@ -207,8 +208,21 @@ def selector_to_xpath(self, selector, prefix='descendant-or-self::'):
raise TypeError('Expected a parsed selector, got %r' % (selector,))
xpath = self.xpath(tree)
assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
if selector.pseudo_element:
xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
return (prefix or '') + _unicode(xpath)

def xpath_pseudo_element(self, xpath, pseudo_element):
"""Translate a pseudo-element.
Defaults to not supporting pseudo-elements at all,
but can be overridden by sub-classes.
"""
if pseudo_element:
raise ExpressionError('Pseudo-elements are not supported.')
return xpath

@staticmethod
def xpath_literal(s):
s = _unicode(s)
Expand Down Expand Up @@ -253,9 +267,7 @@ def xpath_negation(self, negation):
def xpath_function(self, function):
"""Translate a functional pseudo-class."""
method = 'xpath_%s_function' % function.name.replace('-', '_')
# getattr() with a non-ASCII name fails on Python 2.x
method = method.encode('ascii', 'replace').decode('ascii')
method = getattr(self, method, None)
method = _unicode_safe_getattr(self, method, None)
if not method:
raise ExpressionError(
"The pseudo-class :%s() is unknown" % function.name)
Expand All @@ -264,9 +276,7 @@ def xpath_function(self, function):
def xpath_pseudo(self, pseudo):
"""Translate a pseudo-class."""
method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_')
# getattr() with a non-ASCII name fails on Python 2.x
method = method.encode('ascii', 'replace').decode('ascii')
method = getattr(self, method, None)
method = _unicode_safe_getattr(self, method, None)
if not method:
# TODO: better error message for pseudo-elements?
raise ExpressionError(
Expand Down

0 comments on commit a4b12ae

Please sign in to comment.