Skip to content
Newer
Older
100644 695 lines (583 sloc) 24.4 KB
4112258 change coding: utf8 to utf-8
James Salter authored Jan 26, 2015
1 # coding: utf-8
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
2 """
3 cssselect.xpath
4 ===============
5
6 Translation of parsed CSS selectors to XPath expressions.
7
8
9 :copyright: (c) 2007-2012 Ian Bicking and contributors.
10 See AUTHORS for more details.
11 :license: BSD, see LICENSE for more details.
12
13 """
14
b551594 @SimonSapin Better test coverage.
authored Apr 20, 2012
15 import sys
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
16 import re
b551594 @SimonSapin Better test coverage.
authored Apr 20, 2012
17
d405f89 @SimonSapin Add tests for series with whitespace
authored Jun 14, 2012
18 from cssselect.parser import parse, parse_series, SelectorError
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
19
20
b551594 @SimonSapin Better test coverage.
authored Apr 20, 2012
21 if sys.version_info[0] < 3:
3e9fcf1 @SimonSapin Move the prefix logic and the fast path into Translator.
authored Apr 15, 2012
22 _basestring = basestring
23 _unicode = unicode
b551594 @SimonSapin Better test coverage.
authored Apr 20, 2012
24 else:
3e9fcf1 @SimonSapin Move the prefix logic and the fast path into Translator.
authored Apr 15, 2012
25 _basestring = str
26 _unicode = str
27
28
a4b12ae @SimonSapin Add parser support for functional pseudo-elements. See #29.
authored Sep 15, 2013
29 def _unicode_safe_getattr(obj, name, default=None):
30 # getattr() with a non-ASCII name fails on Python 2.x
31 name = name.encode('ascii', 'replace').decode('ascii')
32 return getattr(obj, name, default)
33
34
dfad9fc @SimonSapin Better error handling
authored Apr 15, 2012
35 class ExpressionError(SelectorError, RuntimeError):
899bd93 @SimonSapin Docs: add a quickstart, document the whole public API.
authored Apr 18, 2012
36 """Unknown or unsupported selector (eg. pseudo-class)."""
dfad9fc @SimonSapin Better error handling
authored Apr 15, 2012
37
38
3e9fcf1 @SimonSapin Move the prefix logic and the fast path into Translator.
authored Apr 15, 2012
39 #### XPath Helpers
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
40
41 class XPathExpr(object):
42
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
43 def __init__(self, path='', element='*', condition='', star_prefix=False):
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
44 self.path = path
45 self.element = element
46 self.condition = condition
47
48 def __str__(self):
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
49 path = _unicode(self.path) + _unicode(self.element)
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
50 if self.condition:
51 path += '[%s]' % self.condition
52 return path
53
54 def __repr__(self):
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
55 return '%s[%s]' % (self.__class__.__name__, self)
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
56
57 def add_condition(self, condition):
58 if self.condition:
59 self.condition = '%s and (%s)' % (self.condition, condition)
60 else:
61 self.condition = condition
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
62 return self
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
63
64 def add_name_test(self):
65 if self.element == '*':
66 # We weren't doing a test anyway
67 return
fe8acda @SimonSapin Make xpath_literal a staticmethod of translators.
authored Apr 17, 2012
68 self.add_condition(
69 "name() = %s" % GenericTranslator.xpath_literal(self.element))
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
70 self.element = '*'
71
72 def add_star_prefix(self):
73 """
b551594 @SimonSapin Better test coverage.
authored Apr 20, 2012
74 Append '*/' to the path to keep the context constrained
75 to a single parent.
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
76 """
b551594 @SimonSapin Better test coverage.
authored Apr 20, 2012
77 self.path += '*/'
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
78
79 def join(self, combiner, other):
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
80 path = _unicode(self) + combiner
b551594 @SimonSapin Better test coverage.
authored Apr 20, 2012
81 # Any "star prefix" is redundant when joining.
82 if other.path != '*/':
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
83 path += other.path
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
84 self.path = path
85 self.element = other.element
86 self.condition = other.condition
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
87 return self
88
065140c @SimonSapin Split cssselect into sub-modules.
authored Apr 15, 2012
89
90 split_at_single_quotes = re.compile("('+)").split
91
c221b7b @SimonSapin Workaround element/attribute names with special characters
authored Jun 14, 2012
92 # The spec is actually more permissive than that, but don’t bother.
93 # This is just for the fast path.
94 # http://www.w3.org/TR/REC-xml/#NT-NameStartChar
95 is_safe_name = re.compile('^[a-zA-Z_][a-zA-Z0-9_.-]*$').match
96
e48ecc6 @SimonSapin Fix #10: '~=', '^=' and '*=' attribute operators with an empty string
authored Jun 14, 2012
97 # Test that the string is not empty and does not contain whitespace
98 is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match
99
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
100
101 #### Translation
102
f70f60e @SimonSapin Make a new HTMLTranslator and move :checked there.
authored Apr 17, 2012
103 class GenericTranslator(object):
104 """
105 Translator for "generic" XML documents.
35a2f57 @SimonSapin Fix case-sensitivity issues
authored Apr 19, 2012
106
107 Everything is case-sensitive, no assumption is made on the meaning
108 of element names and attribute names.
109
f70f60e @SimonSapin Make a new HTMLTranslator and move :checked there.
authored Apr 17, 2012
110 """
eac05a4 @SimonSapin HERE BE DRAGONS
authored Jan 10, 2013
111
112 ####
113 #### HERE BE DRAGONS
114 ####
115 #### You are welcome to hook into this to change some behavior,
116 #### but do so at your own risks.
117 #### Until is has recieved a lot more work and review,
118 #### I reserve the right to change this API in backward-incompatible ways
119 #### with any minor version of cssselect.
120 #### See https://github.com/SimonSapin/cssselect/pull/22
121 #### -- Simon Sapin.
122 ####
123
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
124 combinator_mapping = {
125 ' ': 'descendant',
126 '>': 'child',
127 '+': 'direct_adjacent',
128 '~': 'indirect_adjacent',
129 }
130
131 attribute_operator_mapping = {
132 'exists': 'exists',
133 '=': 'equals',
134 '~=': 'includes',
135 '|=': 'dashmatch',
136 '^=': 'prefixmatch',
137 '$=': 'suffixmatch',
138 '*=': 'substringmatch',
139 '!=': 'different', # XXX Not in Level 3 but meh
140 }
141
baebdf3 @SimonSapin Make the attribute for ID selectors overridable.
authored Apr 18, 2012
142 #: The attribute used for ID selectors depends on the document language:
143 #: http://www.w3.org/TR/selectors/#id-selectors
144 id_attribute = 'id'
145
856621a @SimonSapin Add support for :lang(), close #3
authored Apr 24, 2012
146 #: The attribute used for ``:lang()`` depends on the document language:
147 #: http://www.w3.org/TR/selectors/#lang-pseudo
148 lang_attribute = 'xml:lang'
149
35a2f57 @SimonSapin Fix case-sensitivity issues
authored Apr 19, 2012
150 #: The case sensitivity of document language element names,
151 #: attribute names, and attribute values in selectors depends
152 #: on the document language.
153 #: http://www.w3.org/TR/selectors/#casesens
154 #:
155 #: When a document language defines one of these as case-insensitive,
156 #: cssselect assumes that the document parser makes the parsed values
157 #: lower-case. Making the selector lower-case too makes the comparaison
158 #: case-insensitive.
159 #:
160 #: In HTML, element names and attributes names (but not attribute values)
161 #: are case-insensitive. All of lxml.html, html5lib, BeautifulSoup4
162 #: and HTMLParser make them lower-case in their parse result, so
163 #: the assumption holds.
164 lower_case_element_names = False
165 lower_case_attribute_names = False
166 lower_case_attribute_values = False
167
91e752d @dangra Let extended translators override what XPathExpr class is used
dangra authored Jan 10, 2013
168 # class used to represent and xpath expression
169 xpathexpr_cls = XPathExpr
170
7e8c0ce @SimonSapin Add proper parser support for pseudo-elements.
authored Apr 18, 2012
171 def css_to_xpath(self, css, prefix='descendant-or-self::'):
817c1c5 @SimonSapin New API:parse, Selector, selector_to_xpath
authored Apr 18, 2012
172 """Translate a *group of selectors* to XPath.
173
f4676c3 @SimonSapin Pseudo-elements *are* supported now. (Parsed, at least.)
authored Apr 18, 2012
174 Pseudo-elements are not supported here since XPath only knows
6a83c2a @SimonSapin Documentation details.
authored Apr 18, 2012
175 about "real" elements.
3e9fcf1 @SimonSapin Move the prefix logic and the fast path into Translator.
authored Apr 15, 2012
176
7e8c0ce @SimonSapin Add proper parser support for pseudo-elements.
authored Apr 18, 2012
177 :param css:
899bd93 @SimonSapin Docs: add a quickstart, document the whole public API.
authored Apr 18, 2012
178 A *group of selectors* as an Unicode string.
070cc0d @SimonSapin Document the 'prefix' parameter.
authored Oct 17, 2013
179 :param prefix:
180 This string is prepended to the XPath expression for each selector.
181 The default makes selectors scoped to the context node’s subtree.
899bd93 @SimonSapin Docs: add a quickstart, document the whole public API.
authored Apr 18, 2012
182 :raises:
183 :class:`SelectorSyntaxError` on invalid selectors,
817c1c5 @SimonSapin New API:parse, Selector, selector_to_xpath
authored Apr 18, 2012
184 :class:`ExpressionError` on unknown/unsupported selectors,
185 including pseudo-elements.
899bd93 @SimonSapin Docs: add a quickstart, document the whole public API.
authored Apr 18, 2012
186 :returns:
187 The equivalent XPath 1.0 expression as an Unicode string.
c52dab0 @SimonSapin Update the documentation for 0.2
authored Apr 16, 2012
188
e76c9b2 @SimonSapin Move the fast path into the parser.
authored Apr 15, 2012
189 """
d29ac49 @SimonSapin Switch back to default to ignoring pseudo-elements
authored Oct 17, 2013
190 return ' | '.join(self.selector_to_xpath(selector, prefix,
191 translate_pseudo_elements=True)
192 for selector in parse(css))
3e9fcf1 @SimonSapin Move the prefix logic and the fast path into Translator.
authored Apr 15, 2012
193
d29ac49 @SimonSapin Switch back to default to ignoring pseudo-elements
authored Oct 17, 2013
194 def selector_to_xpath(self, selector, prefix='descendant-or-self::',
195 translate_pseudo_elements=False):
817c1c5 @SimonSapin New API:parse, Selector, selector_to_xpath
authored Apr 18, 2012
196 """Translate a parsed selector to XPath.
197
198
199 :param selector:
200 A parsed :class:`Selector` object.
070cc0d @SimonSapin Document the 'prefix' parameter.
authored Oct 17, 2013
201 :param prefix:
202 This string is prepended to the resulting XPath expression.
203 The default makes selectors scoped to the context node’s subtree.
d29ac49 @SimonSapin Switch back to default to ignoring pseudo-elements
authored Oct 17, 2013
204 :param translate_pseudo_elements:
205 Unless this is set to ``True`` (as :meth:`css_to_xpath` does),
206 the :attr:`~Selector.pseudo_element` attribute of the selector
207 is ignored.
208 It is the caller's responsibility to reject selectors
209 with pseudo-elements, or to account for them somehow.
817c1c5 @SimonSapin New API:parse, Selector, selector_to_xpath
authored Apr 18, 2012
210 :raises:
211 :class:`ExpressionError` on unknown/unsupported selectors.
212 :returns:
213 The equivalent XPath 1.0 expression as an Unicode string.
214
215 """
b551594 @SimonSapin Better test coverage.
authored Apr 20, 2012
216 tree = getattr(selector, 'parsed_tree', None)
217 if not tree:
218 raise TypeError('Expected a parsed selector, got %r' % (selector,))
c221b7b @SimonSapin Workaround element/attribute names with special characters
authored Jun 14, 2012
219 xpath = self.xpath(tree)
91e752d @dangra Let extended translators override what XPathExpr class is used
dangra authored Jan 10, 2013
220 assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
d29ac49 @SimonSapin Switch back to default to ignoring pseudo-elements
authored Oct 17, 2013
221 if translate_pseudo_elements and selector.pseudo_element:
a4b12ae @SimonSapin Add parser support for functional pseudo-elements. See #29.
authored Sep 15, 2013
222 xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
c221b7b @SimonSapin Workaround element/attribute names with special characters
authored Jun 14, 2012
223 return (prefix or '') + _unicode(xpath)
817c1c5 @SimonSapin New API:parse, Selector, selector_to_xpath
authored Apr 18, 2012
224
a4b12ae @SimonSapin Add parser support for functional pseudo-elements. See #29.
authored Sep 15, 2013
225 def xpath_pseudo_element(self, xpath, pseudo_element):
226 """Translate a pseudo-element.
227
228 Defaults to not supporting pseudo-elements at all,
229 but can be overridden by sub-classes.
230
231 """
e1a0f0d @SimonSapin Remove unnecessary check.
authored Sep 16, 2013
232 raise ExpressionError('Pseudo-elements are not supported.')
a4b12ae @SimonSapin Add parser support for functional pseudo-elements. See #29.
authored Sep 15, 2013
233
fe8acda @SimonSapin Make xpath_literal a staticmethod of translators.
authored Apr 17, 2012
234 @staticmethod
235 def xpath_literal(s):
a759111 @SimonSapin Remove dead code.
authored Apr 18, 2012
236 s = _unicode(s)
fe8acda @SimonSapin Make xpath_literal a staticmethod of translators.
authored Apr 17, 2012
237 if "'" not in s:
238 s = "'%s'" % s
239 elif '"' not in s:
240 s = '"%s"' % s
241 else:
242 s = "concat(%s)" % ','.join([
243 (("'" in part) and '"%s"' or "'%s'") % part
244 for part in split_at_single_quotes(s) if part
245 ])
246 return s
247
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
248 def xpath(self, parsed_selector):
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
249 """Translate any parsed selector object."""
250 type_name = type(parsed_selector).__name__
ecda4b9 @SimonSapin Nicer exception on unknown node type in the parsed tree
authored Sep 15, 2013
251 method = getattr(self, 'xpath_%s' % type_name.lower(), None)
252 if method is None:
253 raise ExpressionError('%s is not supported.' % type_name)
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
254 return method(parsed_selector)
255
256
257 # Dispatched by parsed object type
258
259 def xpath_combinedselector(self, combined):
260 """Translate a combined selector."""
b551594 @SimonSapin Better test coverage.
authored Apr 20, 2012
261 combinator = self.combinator_mapping[combined.combinator]
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
262 method = getattr(self, 'xpath_%s_combinator' % combinator)
263 return method(self.xpath(combined.selector),
264 self.xpath(combined.subselector))
265
86f3e7e @SimonSapin Add a special parsed object for negation. (Prepare for specificity.)
authored Apr 18, 2012
266 def xpath_negation(self, negation):
267 xpath = self.xpath(negation.selector)
268 sub_xpath = self.xpath(negation.subselector)
269 sub_xpath.add_name_test()
5bc7090 @SimonSapin Fix the translation of :not(*)
authored Apr 20, 2012
270 if sub_xpath.condition:
271 return xpath.add_condition('not(%s)' % sub_xpath.condition)
272 else:
273 return xpath.add_condition('0')
86f3e7e @SimonSapin Add a special parsed object for negation. (Prepare for specificity.)
authored Apr 18, 2012
274
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
275 def xpath_function(self, function):
276 """Translate a functional pseudo-class."""
c192fcb @SimonSapin Make pseudo-elements lower-case in the ASCII range.
authored Jun 14, 2012
277 method = 'xpath_%s_function' % function.name.replace('-', '_')
a4b12ae @SimonSapin Add parser support for functional pseudo-elements. See #29.
authored Sep 15, 2013
278 method = _unicode_safe_getattr(self, method, None)
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
279 if not method:
280 raise ExpressionError(
5491775 @SimonSapin Add :contains() back, but case-sensitive.
authored Apr 17, 2012
281 "The pseudo-class :%s() is unknown" % function.name)
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
282 return method(self.xpath(function.selector), function)
283
284 def xpath_pseudo(self, pseudo):
285 """Translate a pseudo-class."""
c192fcb @SimonSapin Make pseudo-elements lower-case in the ASCII range.
authored Jun 14, 2012
286 method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_')
a4b12ae @SimonSapin Add parser support for functional pseudo-elements. See #29.
authored Sep 15, 2013
287 method = _unicode_safe_getattr(self, method, None)
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
288 if not method:
1b31e70 @SimonSapin Fix error messages for know but unsupported stuff.
authored Apr 16, 2012
289 # TODO: better error message for pseudo-elements?
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
290 raise ExpressionError(
5491775 @SimonSapin Add :contains() back, but case-sensitive.
authored Apr 17, 2012
291 "The pseudo-class :%s is unknown" % pseudo.ident)
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
292 return method(self.xpath(pseudo.selector))
293
294
295 def xpath_attrib(self, selector):
296 """Translate an attribute selector."""
b551594 @SimonSapin Better test coverage.
authored Apr 20, 2012
297 operator = self.attribute_operator_mapping[selector.operator]
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
298 method = getattr(self, 'xpath_attrib_%s' % operator)
35a2f57 @SimonSapin Fix case-sensitivity issues
authored Apr 19, 2012
299 if self.lower_case_attribute_names:
300 name = selector.attrib.lower()
301 else:
302 name = selector.attrib
c221b7b @SimonSapin Workaround element/attribute names with special characters
authored Jun 14, 2012
303 safe = is_safe_name(name)
7189f52 @SimonSapin Rewrite the tokenizer to conform to the spec grammar.
authored Jun 7, 2012
304 if selector.namespace:
c221b7b @SimonSapin Workaround element/attribute names with special characters
authored Jun 14, 2012
305 name = '%s:%s' % (selector.namespace, name)
306 safe = safe and is_safe_name(selector.namespace)
307 if safe:
308 attrib = '@' + name
7189f52 @SimonSapin Rewrite the tokenizer to conform to the spec grammar.
authored Jun 7, 2012
309 else:
c221b7b @SimonSapin Workaround element/attribute names with special characters
authored Jun 14, 2012
310 attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name)
35a2f57 @SimonSapin Fix case-sensitivity issues
authored Apr 19, 2012
311 if self.lower_case_attribute_values:
312 value = selector.value.lower()
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
313 else:
35a2f57 @SimonSapin Fix case-sensitivity issues
authored Apr 19, 2012
314 value = selector.value
c221b7b @SimonSapin Workaround element/attribute names with special characters
authored Jun 14, 2012
315 return method(self.xpath(selector.selector), attrib, value)
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
316
317 def xpath_class(self, class_selector):
318 """Translate a class selector."""
538c684 @SimonSapin Implement .foo (class selector) as [@class~=foo], as defined in the spec
authored Apr 17, 2012
319 # .foo is defined as [class~=foo] in the spec.
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
320 xpath = self.xpath(class_selector.selector)
538c684 @SimonSapin Implement .foo (class selector) as [@class~=foo], as defined in the spec
authored Apr 17, 2012
321 return self.xpath_attrib_includes(
322 xpath, '@class', class_selector.class_name)
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
323
324 def xpath_hash(self, id_selector):
325 """Translate an ID selector."""
326 xpath = self.xpath(id_selector.selector)
35a2f57 @SimonSapin Fix case-sensitivity issues
authored Apr 19, 2012
327 return self.xpath_attrib_equals(xpath, '@id', id_selector.id)
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
328
329 def xpath_element(self, selector):
330 """Translate a type or universal selector."""
7189f52 @SimonSapin Rewrite the tokenizer to conform to the spec grammar.
authored Jun 7, 2012
331 element = selector.element
332 if not element:
333 element = '*'
c221b7b @SimonSapin Workaround element/attribute names with special characters
authored Jun 14, 2012
334 safe = True
335 else:
336 safe = is_safe_name(element)
337 if self.lower_case_element_names:
338 element = element.lower()
7189f52 @SimonSapin Rewrite the tokenizer to conform to the spec grammar.
authored Jun 7, 2012
339 if selector.namespace:
35a2f57 @SimonSapin Fix case-sensitivity issues
authored Apr 19, 2012
340 # Namespace prefixes are case-sensitive.
341 # http://www.w3.org/TR/css3-namespace/#prefixes
342 element = '%s:%s' % (selector.namespace, element)
c221b7b @SimonSapin Workaround element/attribute names with special characters
authored Jun 14, 2012
343 safe = safe and is_safe_name(selector.namespace)
91e752d @dangra Let extended translators override what XPathExpr class is used
dangra authored Jan 10, 2013
344 xpath = self.xpathexpr_cls(element=element)
c221b7b @SimonSapin Workaround element/attribute names with special characters
authored Jun 14, 2012
345 if not safe:
346 xpath.add_name_test()
347 return xpath
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
348
349
350 # CombinedSelector: dispatch by combinator
351
352 def xpath_descendant_combinator(self, left, right):
353 """right is a child, grand-child or further descendant of left"""
354 return left.join('/descendant-or-self::*/', right)
355
356 def xpath_child_combinator(self, left, right):
357 """right is an immediate child of left"""
358 return left.join('/', right)
359
360 def xpath_direct_adjacent_combinator(self, left, right):
361 """right is a sibling immediately after left"""
362 xpath = left.join('/following-sibling::', right)
363 xpath.add_name_test()
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
364 return xpath.add_condition('position() = 1')
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
365
366 def xpath_indirect_adjacent_combinator(self, left, right):
367 """right is a sibling after left, immediately or not"""
368 return left.join('/following-sibling::', right)
369
370
371 # Function: dispatch by function/pseudo-class name
372
373 def xpath_nth_child_function(self, xpath, function, last=False,
374 add_name_test=True):
ab99922 @SimonSapin Fix some cases for the parsing of series (see #7)
authored Apr 20, 2012
375 try:
d405f89 @SimonSapin Add tests for series with whitespace
authored Jun 14, 2012
376 a, b = parse_series(function.arguments)
ab99922 @SimonSapin Fix some cases for the parsing of series (see #7)
authored Apr 20, 2012
377 except ValueError:
378 raise ExpressionError("Invalid series: '%r'" % function.arguments)
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
379 if add_name_test:
380 xpath.add_name_test()
381 xpath.add_star_prefix()
382 if a == 0:
383 if last:
384 b = 'last() - %s' % b
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
385 return xpath.add_condition('position() = %s' % b)
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
386 if last:
387 # FIXME: I'm not sure if this is right
388 a = -a
389 b = -b
390 if b > 0:
391 b_neg = str(-b)
392 else:
393 b_neg = '+%s' % (-b)
394 if a != 1:
395 expr = ['(position() %s) mod %s = 0' % (b_neg, a)]
396 else:
397 expr = []
398 if b >= 0:
399 expr.append('position() >= %s' % b)
400 elif b < 0 and last:
401 expr.append('position() < (last() %s)' % b)
402 expr = ' and '.join(expr)
403 if expr:
404 xpath.add_condition(expr)
405 return xpath
406 # FIXME: handle an+b, odd, even
407 # an+b means every-a, plus b, e.g., 2n+1 means odd
408 # 0n+b means b
409 # n+0 means a=1, i.e., all elements
410 # an means every a elements, i.e., 2n means even
411 # -n means -1n
412 # -1n+6 means elements 6 and previous
413
414 def xpath_nth_last_child_function(self, xpath, function):
415 return self.xpath_nth_child_function(xpath, function, last=True)
416
417 def xpath_nth_of_type_function(self, xpath, function):
418 if xpath.element == '*':
dfad9fc @SimonSapin Better error handling
authored Apr 15, 2012
419 raise ExpressionError(
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
420 "*:nth-of-type() is not implemented")
421 return self.xpath_nth_child_function(xpath, function,
422 add_name_test=False)
423
424 def xpath_nth_last_of_type_function(self, xpath, function):
425 if xpath.element == '*':
dfad9fc @SimonSapin Better error handling
authored Apr 15, 2012
426 raise ExpressionError(
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
427 "*:nth-of-type() is not implemented")
428 return self.xpath_nth_child_function(xpath, function, last=True,
429 add_name_test=False)
430
5491775 @SimonSapin Add :contains() back, but case-sensitive.
authored Apr 17, 2012
431 def xpath_contains_function(self, xpath, function):
7189f52 @SimonSapin Rewrite the tokenizer to conform to the spec grammar.
authored Jun 7, 2012
432 # Defined there, removed in later drafts:
433 # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
434 if function.argument_types() not in (['STRING'], ['IDENT']):
435 raise ExpressionError(
436 "Expected a single string or ident for :contains(), got %r"
437 % function.arguments)
438 value = function.arguments[0].value
439 return xpath.add_condition(
9fff95b @redapple Remove string() conversion of element in "contains" function translation
redapple authored Sep 15, 2013
440 'contains(., %s)' % self.xpath_literal(value))
5491775 @SimonSapin Add :contains() back, but case-sensitive.
authored Apr 17, 2012
441
856621a @SimonSapin Add support for :lang(), close #3
authored Apr 24, 2012
442 def xpath_lang_function(self, xpath, function):
7189f52 @SimonSapin Rewrite the tokenizer to conform to the spec grammar.
authored Jun 7, 2012
443 if function.argument_types() not in (['STRING'], ['IDENT']):
444 raise ExpressionError(
445 "Expected a single string or ident for :lang(), got %r"
446 % function.arguments)
447 value = function.arguments[0].value
856621a @SimonSapin Add support for :lang(), close #3
authored Apr 24, 2012
448 return xpath.add_condition(
9846271 @sjp Use XPath 'lang()' in XML docs.
sjp authored Nov 15, 2012
449 "lang(%s)" % (self.xpath_literal(value)))
1b31e70 @SimonSapin Fix error messages for know but unsupported stuff.
authored Apr 16, 2012
450
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
451
452 # Pseudo: dispatch by pseudo-class name
453
454 def xpath_root_pseudo(self, xpath):
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
455 return xpath.add_condition("not(parent::*)")
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
456
457 def xpath_first_child_pseudo(self, xpath):
458 xpath.add_star_prefix()
459 xpath.add_name_test()
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
460 return xpath.add_condition('position() = 1')
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
461
462 def xpath_last_child_pseudo(self, xpath):
463 xpath.add_star_prefix()
464 xpath.add_name_test()
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
465 return xpath.add_condition('position() = last()')
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
466
467 def xpath_first_of_type_pseudo(self, xpath):
468 if xpath.element == '*':
dfad9fc @SimonSapin Better error handling
authored Apr 15, 2012
469 raise ExpressionError(
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
470 "*:first-of-type is not implemented")
471 xpath.add_star_prefix()
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
472 return xpath.add_condition('position() = 1')
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
473
474 def xpath_last_of_type_pseudo(self, xpath):
475 if xpath.element == '*':
dfad9fc @SimonSapin Better error handling
authored Apr 15, 2012
476 raise ExpressionError(
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
477 "*:last-of-type is not implemented")
478 xpath.add_star_prefix()
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
479 return xpath.add_condition('position() = last()')
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
480
481 def xpath_only_child_pseudo(self, xpath):
482 xpath.add_name_test()
483 xpath.add_star_prefix()
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
484 return xpath.add_condition('last() = 1')
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
485
486 def xpath_only_of_type_pseudo(self, xpath):
487 if xpath.element == '*':
dfad9fc @SimonSapin Better error handling
authored Apr 15, 2012
488 raise ExpressionError(
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
489 "*:only-of-type is not implemented")
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
490 return xpath.add_condition('last() = 1')
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
491
492 def xpath_empty_pseudo(self, xpath):
ac10a36 @sjp Using string-length() to test for emptiness of text nodes.
sjp authored Nov 15, 2012
493 return xpath.add_condition("not(*) and not(string-length())")
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
494
f70f60e @SimonSapin Make a new HTMLTranslator and move :checked there.
authored Apr 17, 2012
495 def pseudo_never_matches(self, xpath):
496 """Common implementation for pseudo-classes that never match."""
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
497 return xpath.add_condition("0")
1b31e70 @SimonSapin Fix error messages for know but unsupported stuff.
authored Apr 16, 2012
498
f70f60e @SimonSapin Make a new HTMLTranslator and move :checked there.
authored Apr 17, 2012
499 xpath_link_pseudo = pseudo_never_matches
500 xpath_visited_pseudo = pseudo_never_matches
501 xpath_hover_pseudo = pseudo_never_matches
502 xpath_active_pseudo = pseudo_never_matches
503 xpath_focus_pseudo = pseudo_never_matches
504 xpath_target_pseudo = pseudo_never_matches
505 xpath_enabled_pseudo = pseudo_never_matches
506 xpath_disabled_pseudo = pseudo_never_matches
507 xpath_checked_pseudo = pseudo_never_matches
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
508
509 # Attrib: dispatch by attribute operator
510
a9dc855 @SimonSapin Remove an unused parameter.
authored Apr 17, 2012
511 def xpath_attrib_exists(self, xpath, name, value):
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
512 assert not value
513 xpath.add_condition(name)
514 return xpath
515
a9dc855 @SimonSapin Remove an unused parameter.
authored Apr 17, 2012
516 def xpath_attrib_equals(self, xpath, name, value):
fe8acda @SimonSapin Make xpath_literal a staticmethod of translators.
authored Apr 17, 2012
517 xpath.add_condition('%s = %s' % (name, self.xpath_literal(value)))
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
518 return xpath
519
a9dc855 @SimonSapin Remove an unused parameter.
authored Apr 17, 2012
520 def xpath_attrib_different(self, xpath, name, value):
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
521 # FIXME: this seems like a weird hack...
522 if value:
523 xpath.add_condition('not(%s) or %s != %s'
fe8acda @SimonSapin Make xpath_literal a staticmethod of translators.
authored Apr 17, 2012
524 % (name, name, self.xpath_literal(value)))
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
525 else:
526 xpath.add_condition('%s != %s'
fe8acda @SimonSapin Make xpath_literal a staticmethod of translators.
authored Apr 17, 2012
527 % (name, self.xpath_literal(value)))
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
528 return xpath
529
a9dc855 @SimonSapin Remove an unused parameter.
authored Apr 17, 2012
530 def xpath_attrib_includes(self, xpath, name, value):
e48ecc6 @SimonSapin Fix #10: '~=', '^=' and '*=' attribute operators with an empty string
authored Jun 14, 2012
531 if is_non_whitespace(value):
532 xpath.add_condition(
533 "%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
534 % (name, name, self.xpath_literal(' '+value+' ')))
535 else:
536 xpath.add_condition('0')
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
537 return xpath
538
a9dc855 @SimonSapin Remove an unused parameter.
authored Apr 17, 2012
539 def xpath_attrib_dashmatch(self, xpath, name, value):
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
540 # Weird, but true...
541 xpath.add_condition('%s and (%s = %s or starts-with(%s, %s))' % (
542 name,
fe8acda @SimonSapin Make xpath_literal a staticmethod of translators.
authored Apr 17, 2012
543 name, self.xpath_literal(value),
544 name, self.xpath_literal(value + '-')))
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
545 return xpath
546
a9dc855 @SimonSapin Remove an unused parameter.
authored Apr 17, 2012
547 def xpath_attrib_prefixmatch(self, xpath, name, value):
e48ecc6 @SimonSapin Fix #10: '~=', '^=' and '*=' attribute operators with an empty string
authored Jun 14, 2012
548 if value:
549 xpath.add_condition('%s and starts-with(%s, %s)' % (
550 name, name, self.xpath_literal(value)))
551 else:
552 xpath.add_condition('0')
553 return xpath
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
554
a9dc855 @SimonSapin Remove an unused parameter.
authored Apr 17, 2012
555 def xpath_attrib_suffixmatch(self, xpath, name, value):
e48ecc6 @SimonSapin Fix #10: '~=', '^=' and '*=' attribute operators with an empty string
authored Jun 14, 2012
556 if value:
557 # Oddly there is a starts-with in XPath 1.0, but not ends-with
558 xpath.add_condition(
559 '%s and substring(%s, string-length(%s)-%s) = %s'
560 % (name, name, name, len(value)-1, self.xpath_literal(value)))
561 else:
562 xpath.add_condition('0')
563 return xpath
899aff8 @SimonSapin Move the XPath translation into a new Translator class.
authored Apr 15, 2012
564
a9dc855 @SimonSapin Remove an unused parameter.
authored Apr 17, 2012
565 def xpath_attrib_substringmatch(self, xpath, name, value):
e48ecc6 @SimonSapin Fix #10: '~=', '^=' and '*=' attribute operators with an empty string
authored Jun 14, 2012
566 if value:
567 # Attribute selectors are case sensitive
568 xpath.add_condition('%s and contains(%s, %s)' % (
569 name, name, self.xpath_literal(value)))
570 else:
571 xpath.add_condition('0')
572 return xpath
f70f60e @SimonSapin Make a new HTMLTranslator and move :checked there.
authored Apr 17, 2012
573
574
575 class HTMLTranslator(GenericTranslator):
576 """
35a2f57 @SimonSapin Fix case-sensitivity issues
authored Apr 19, 2012
577 Translator for (X)HTML documents.
578
3d8fd09 @SimonSapin Implement :link per the HTML5 spec.
authored Apr 20, 2012
579 Has a more useful implementation of some pseudo-classes based on
580 HTML-specific element names and attribute names, as described in
581 the `HTML5 specification`_. It assumes no-quirks mode.
35a2f57 @SimonSapin Fix case-sensitivity issues
authored Apr 19, 2012
582 The API is the same as :class:`GenericTranslator`.
583
3d8fd09 @SimonSapin Implement :link per the HTML5 spec.
authored Apr 20, 2012
584 .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors
585
35a2f57 @SimonSapin Fix case-sensitivity issues
authored Apr 19, 2012
586 :param xhtml:
587 If false (the default), element names and attribute names
588 are case-insensitive.
589
f70f60e @SimonSapin Make a new HTMLTranslator and move :checked there.
authored Apr 17, 2012
590 """
856621a @SimonSapin Add support for :lang(), close #3
authored Apr 24, 2012
591
592 lang_attribute = 'lang'
593
35a2f57 @SimonSapin Fix case-sensitivity issues
authored Apr 19, 2012
594 def __init__(self, xhtml=False):
595 self.xhtml = xhtml # Might be useful for sub-classes?
596 if not xhtml:
597 # See their definition in GenericTranslator.
598 self.lower_case_element_names = True
599 self.lower_case_attribute_names = True
600
f70f60e @SimonSapin Make a new HTMLTranslator and move :checked there.
authored Apr 17, 2012
601 def xpath_checked_pseudo(self, xpath):
602 # FIXME: is this really all the elements?
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
603 return xpath.add_condition(
f70f60e @SimonSapin Make a new HTMLTranslator and move :checked there.
authored Apr 17, 2012
604 "(@selected and name(.) = 'option') or "
de70d4a @SimonSapin Implement :checked per the HTML5 spec.
authored Apr 20, 2012
605 "(@checked "
606 "and (name(.) = 'input' or name(.) = 'command')"
607 "and (@type = 'checkbox' or @type = 'radio'))")
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
608
9846271 @sjp Use XPath 'lang()' in XML docs.
sjp authored Nov 15, 2012
609 def xpath_lang_function(self, xpath, function):
610 if function.argument_types() not in (['STRING'], ['IDENT']):
611 raise ExpressionError(
612 "Expected a single string or ident for :lang(), got %r"
613 % function.arguments)
614 value = function.arguments[0].value
615 return xpath.add_condition(
616 "ancestor-or-self::*[@lang][1][starts-with(concat("
617 # XPath 1.0 has no lower-case function...
618 "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
619 "'abcdefghijklmnopqrstuvwxyz'), "
620 "'-'), %s)]"
621 % (self.lang_attribute, self.xpath_literal(value.lower() + '-')))
622
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
623 def xpath_link_pseudo(self, xpath):
3d8fd09 @SimonSapin Implement :link per the HTML5 spec.
authored Apr 20, 2012
624 return xpath.add_condition("@href and "
625 "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')")
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
626
627 # Links are never visited, the implementation for :visited is the same
628 # as in GenericTranslator
629
630 def xpath_disabled_pseudo(self, xpath):
631 # http://www.w3.org/TR/html5/section-index.html#attributes-1
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
632 return xpath.add_condition('''
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
633 (
634 @disabled and
635 (
1c12e2a @SimonSapin Implement :enabled/:disabled per the HTML5 spec. (Almost, see #6)
authored Apr 20, 2012
636 (name(.) = 'input' and @type != 'hidden') or
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
637 name(.) = 'button' or
638 name(.) = 'select' or
639 name(.) = 'textarea' or
640 name(.) = 'command' or
641 name(.) = 'fieldset' or
642 name(.) = 'optgroup' or
643 name(.) = 'option'
644 )
645 ) or (
646 (
1c12e2a @SimonSapin Implement :enabled/:disabled per the HTML5 spec. (Almost, see #6)
authored Apr 20, 2012
647 (name(.) = 'input' and @type != 'hidden') or
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
648 name(.) = 'button' or
649 name(.) = 'select' or
1c12e2a @SimonSapin Implement :enabled/:disabled per the HTML5 spec. (Almost, see #6)
authored Apr 20, 2012
650 name(.) = 'textarea'
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
651 )
652 and ancestor::fieldset[@disabled]
653 )
654 ''')
655 # FIXME: in the second half, add "and is not a descendant of that
656 # fieldset element's first legend element child, if any."
657
658 def xpath_enabled_pseudo(self, xpath):
659 # http://www.w3.org/TR/html5/section-index.html#attributes-1
935351a @SimonSapin Simplify the XPath translation.
authored Apr 18, 2012
660 return xpath.add_condition('''
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
661 (
1c12e2a @SimonSapin Implement :enabled/:disabled per the HTML5 spec. (Almost, see #6)
authored Apr 20, 2012
662 @href and (
663 name(.) = 'a' or
664 name(.) = 'link' or
665 name(.) = 'area'
666 )
667 ) or (
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
668 (
669 name(.) = 'command' or
670 name(.) = 'fieldset' or
1c12e2a @SimonSapin Implement :enabled/:disabled per the HTML5 spec. (Almost, see #6)
authored Apr 20, 2012
671 name(.) = 'optgroup'
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
672 )
673 and not(@disabled)
674 ) or (
675 (
1c12e2a @SimonSapin Implement :enabled/:disabled per the HTML5 spec. (Almost, see #6)
authored Apr 20, 2012
676 (name(.) = 'input' and @type != 'hidden') or
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
677 name(.) = 'button' or
678 name(.) = 'select' or
679 name(.) = 'textarea' or
680 name(.) = 'keygen'
681 )
682 and not (@disabled or ancestor::fieldset[@disabled])
1c12e2a @SimonSapin Implement :enabled/:disabled per the HTML5 spec. (Almost, see #6)
authored Apr 20, 2012
683 ) or (
684 name(.) = 'option' and not(
685 @disabled or ancestor::optgroup[@disabled]
686 )
0bf34b9 @SimonSapin Add :link, :disabled and :enabled in HTMLTranslator.
authored Apr 17, 2012
687 )
688 ''')
1c12e2a @SimonSapin Implement :enabled/:disabled per the HTML5 spec. (Almost, see #6)
authored Apr 20, 2012
689 # FIXME: ... or "li elements that are children of menu elements,
690 # and that have a child element that defines a command, if the first
691 # such element's Disabled State facet is false (not disabled)".
692 # FIXME: after ancestor::fieldset[@disabled], add "and is not a
693 # descendant of that fieldset element's first legend element child,
694 # if any."
Something went wrong with that request. Please try again.