Skip to content
This repository
  • 9 commits
  • 6 files changed
  • 0 comments
  • 1 contributor
31  CHANGES
@@ -4,27 +4,32 @@ Changelog
4 4
 Planned changes
5 5
 ---------------
6 6
 
7  
-* Implement ``:target``, ``:hover``, ``:focus`` and ``:active``
8  
-  as never matching
9  
-* Make a new HTML-specific ``Translator`` subclass. There, implement
10  
-  ``:enabled``, ``:disabled``, ``:link`` and ``:visited``
11  
-  (with all links  "not visited"). Move ``:checked`` to this new
12  
-  ``HTMLTranslator``
13  
-* Make all of these never match for "generic" XML. (Unless otherwise specified,
14  
-  there is no link, checkbox, etc.)
15 7
 * Add some support for pseudo-elements by separating them from the
16 8
   rest of the selector.
17 9
 * Add specificity calculation.
18 10
 
19  
-Discussion is open if anyone is interested in implementing eg. ``:target``
20  
-or ``:visited`` differently, but they can always do it in a ``Translator``
21  
-subclass.
22  
-
23 11
 
24 12
 Version 0.3
25 13
 -----------
26 14
 
27  
-Fix many parsing bugs.
  15
+Released on 2012-04-17.
  16
+
  17
+* Fix many parsing bugs.
  18
+* Rename the :class:`Translator` class to :class:`GenericTranslator`
  19
+* There, implement ``:target``, ``:hover``, ``:focus``, ``:active``
  20
+  ``:checked``, ``:enabled``, ``:disabled``, ``:link`` and ``:visited``
  21
+  as never matching.
  22
+* Make a new HTML-specific ``HTMLTranslator`` subclass. There, implement
  23
+  ``:checked``, ``:enabled``, ``:disabled``, ``:link`` and ``:visited``
  24
+  as appropriate for HTML, with all links  "not visited".
  25
+* Remove the :func:`css_to_xpath` function. The translator classes
  26
+  are the new API.
  27
+* Add support for ``:contains()`` back, but case-sensitive. lxml will
  28
+  override it to be case-insensitive for backward-compatibility.
  29
+
  30
+Discussion is open if anyone is interested in implementing eg. ``:target``
  31
+or ``:visited`` differently, but they can always do it in a ``Translator``
  32
+subclass.
28 33
 
29 34
 
30 35
 Version 0.2
6  cssselect/__init__.py
@@ -13,10 +13,8 @@
13 13
 """
14 14
 
15 15
 from cssselect.parser import SelectorError, SelectorSyntaxError
16  
-from cssselect.xpath import Translator, ExpressionError
  16
+from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
17 17
 
18 18
 
19  
-VERSION = '0.2'
  19
+VERSION = '0.3'
20 20
 __version__ = VERSION
21  
-
22  
-css_to_xpath = Translator().css_to_xpath
1  cssselect/parser.py
@@ -326,7 +326,6 @@ def parse_simple_selector(stream, inside_negation=False):
326 326
         else:
327 327
             raise SelectorSyntaxError(
328 328
                 "Expected selector, got '%s'" % peek)
329  
-        # FIXME: not sure what "negation" is
330 329
     if consumed == len(stream.used):
331 330
         raise SelectorSyntaxError(
332 331
             "Expected selector, got '%s'" % stream.peek())
57  cssselect/tests.py
@@ -22,8 +22,7 @@
22 22
 
23 23
 from lxml import html
24 24
 from cssselect.parser import tokenize, parse, parse_series, SelectorSyntaxError
25  
-from cssselect.xpath import Translator, ExpressionError
26  
-from cssselect import css_to_xpath
  25
+from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
27 26
 
28 27
 
29 28
 class TestCssselect(unittest.TestCase):
@@ -187,7 +186,7 @@ def get_error(css):
187 186
 
188 187
     def test_translation(self):
189 188
         def xpath(css):
190  
-            return str(Translator().xpath(parse(css)))
  189
+            return str(GenericTranslator().xpath(parse(css)))
191 190
 
192 191
         assert xpath('*') == "*"
193 192
         assert xpath('E') == "e"
@@ -238,6 +237,10 @@ def xpath(css):
238 237
             "e[not(*) and not(normalize-space())]")
239 238
         assert xpath('E:root') == (
240 239
             "e[not(parent::*)]")
  240
+        assert xpath('E:contains("foo")') == (
  241
+            "e[contains(string(.), 'foo')]")
  242
+        assert xpath('E:contains(foo)') == (
  243
+            "e[contains(string(.), 'foo')]")
241 244
         assert xpath('E.warning') == (
242 245
             "e[@class and contains("
243 246
                "concat(' ', normalize-space(@class), ' '), ' warning ')]")
@@ -263,7 +266,7 @@ def test_unicode(self):
263 266
         else:
264 267
             css = '.a\xc1b'.decode('ISO-8859-1')
265 268
 
266  
-        xpath = css_to_xpath(css)
  269
+        xpath = GenericTranslator().css_to_xpath(css)
267 270
         assert css[1:] in xpath
268 271
         xpath = xpath.encode('ascii', 'xmlcharrefreplace').decode('ASCII')
269 272
         assert xpath == (
@@ -271,6 +274,7 @@ def test_unicode(self):
271 274
             "concat(' ', normalize-space(@class), ' '), ' aÁb ')]")
272 275
 
273 276
     def test_quoting(self):
  277
+        css_to_xpath = GenericTranslator().css_to_xpath
274 278
         assert css_to_xpath('*[aval="\'"]') == (
275 279
             '''descendant-or-self::*[@aval = "'"]''')
276 280
         assert css_to_xpath('*[aval="\'\'\'"]') == (
@@ -282,6 +286,7 @@ def test_quoting(self):
282 286
 
283 287
     def test_unicode_escapes(self):
284 288
         # \22 == '"'  \20 == ' '
  289
+        css_to_xpath = GenericTranslator().css_to_xpath
285 290
         assert css_to_xpath(r'*[aval="\'\22\'"]') == (
286 291
             '''descendant-or-self::*[@aval = concat("'",'"',"'")]''')
287 292
         assert css_to_xpath(r'*[aval="\'\22 2\'"]') == (
@@ -305,17 +310,24 @@ def test_select(self):
305 310
         sort_key = dict(
306 311
             (el, count) for count, el in enumerate(document.getiterator())
307 312
         ).__getitem__
  313
+        css_to_xpath = GenericTranslator().css_to_xpath
  314
+        html_css_to_xpath = HTMLTranslator().css_to_xpath
308 315
 
309  
-        def select_ids(selector):
  316
+        def select_ids(selector, html_only):
310 317
             xpath = css_to_xpath(selector)
311 318
             items = document.xpath(xpath)
  319
+            if html_only:
  320
+                assert items == []
  321
+                xpath = html_css_to_xpath(selector)
  322
+                items = document.xpath(xpath)
312 323
             items.sort(key=sort_key)
313 324
             return [element.get('id', 'nil') for element in items]
314 325
 
315  
-        def pcss(main, *selectors):
316  
-            result = select_ids(main)
  326
+        def pcss(main, *selectors, **kwargs):
  327
+            html_only = kwargs.pop('html_only', False)
  328
+            result = select_ids(main, html_only)
317 329
             for selector in selectors:
318  
-                assert select_ids(selector) == result
  330
+                assert select_ids(selector, html_only) == result
319 331
             return result
320 332
 
321 333
         all_ids = pcss('*')
@@ -357,7 +369,8 @@ def pcss(main, *selectors):
357 369
         assert pcss('ol:nth-last-of-type(1)') == ['first-ol']
358 370
         assert pcss('span:only-child') == ['foobar-span']
359 371
         assert pcss('li div:only-child') == ['li-div']
360  
-        assert pcss('div *:only-child') == ['li-div', 'foobar-span']
  372
+        assert pcss('div *:only-child') == [
  373
+            'li-div', 'checkbox-disabled', 'foobar-span']
361 374
         self.assertRaises(ExpressionError, pcss, 'p *:only-of-type')
362 375
         self.assertRaises(ExpressionError, pcss, 'p:lang(fr)')
363 376
         assert pcss('p:only-of-type') == ['paragraph']
@@ -366,6 +379,13 @@ def pcss(main, *selectors):
366 379
             'third-li', 'fourth-li', 'fifth-li', 'sixth-li', 'seventh-li']
367 380
         assert pcss(':root', 'html:root') == ['html']
368 381
         assert pcss('li:root', '* :root') == []
  382
+        assert pcss('*:contains("link")') == [
  383
+            'html', 'nil', 'outer-div', 'tag-anchor', 'nofollow-anchor']
  384
+        assert pcss('*:contains("LInk")') == []  # case sensitive
  385
+        assert pcss('*:contains("e")') == [
  386
+            'html', 'nil', 'outer-div', 'first-ol', 'first-li',
  387
+            'paragraph', 'p-em']
  388
+        assert pcss('*:contains("E")') == []  # case-sensitive
369 389
         assert pcss('.a', '.b', '*.a', 'ol.a') == ['first-ol']
370 390
         assert pcss('.c', '*.c') == ['first-ol', 'third-li', 'fourth-li']
371 391
         assert pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c') == [
@@ -382,11 +402,19 @@ def pcss(main, *selectors):
382 402
         assert pcss('ol#first-ol *:last-child') == ['li-div', 'seventh-li']
383 403
         assert pcss('#outer-div:first-child') == ['outer-div']
384 404
         assert pcss('#outer-div :first-child') == [
385  
-            'name-anchor', 'first-li', 'li-div', 'p-b']
  405
+            'name-anchor', 'first-li', 'li-div', 'p-b', 'checkbox-disabled']
  406
+        assert pcss('a[href]') == ['tag-anchor', 'nofollow-anchor']
  407
+        assert pcss(':link', html_only=True) == pcss('a[href]')
  408
+        assert pcss(':checked', html_only=True) == ['checkbox-checked']
  409
+        assert pcss(':disabled', html_only=True) == [
  410
+            'fieldset', 'checkbox-disabled']
  411
+        assert pcss(':enabled', html_only=True) == [
  412
+            'checkbox-unchecked', 'checkbox-checked']
386 413
 
387 414
     def test_select_shakespeare(self):
388 415
         document = html.document_fromstring(HTML_SHAKESPEARE)
389 416
         body = document.xpath('//body')[0]
  417
+        css_to_xpath = GenericTranslator().css_to_xpath
390 418
 
391 419
         try:
392 420
             basestring_ = basestring
@@ -410,6 +438,7 @@ def count(selector):
410 438
         ## searching the body.
411 439
         #assert count('*') == 252
412 440
         assert count('*') == 246
  441
+        assert count('div:contains(CELIA)') == 26
413 442
         assert count('div:only-child') == 22 # ?
414 443
         assert count('div:nth-child(even)') == 106
415 444
         assert count('div:nth-child(2n)') == 106
@@ -477,7 +506,13 @@ def count(selector):
477 506
  </ol>
478 507
  <p id="paragraph">
479 508
    <b id="p-b">hi</b> <em id="p-em">there</em>
480  
-   <b id="p-b2">guy</b></p>
  509
+   <b id="p-b2">guy</b>
  510
+   <input type="checkbox" id="checkbox-unchecked">
  511
+   <input type="checkbox" id="checkbox-checked" checked="checked">
  512
+   <fieldset id="fieldset" disabled="disabled">
  513
+     <input type="checkbox" id="checkbox-disabled">
  514
+   </fieldset>
  515
+ </p>
481 516
  <ol id="second-ol">
482 517
  </ol>
483 518
 </div>
210  cssselect/xpath.py
@@ -78,7 +78,8 @@ def add_name_test(self):
78 78
         if self.element == '*':
79 79
             # We weren't doing a test anyway
80 80
             return
81  
-        self.add_condition("name() = %s" % xpath_literal(self.element))
  81
+        self.add_condition(
  82
+            "name() = %s" % GenericTranslator.xpath_literal(self.element))
82 83
         self.element = '*'
83 84
 
84 85
     def add_star_prefix(self):
@@ -126,27 +127,13 @@ def __str__(self):
126 127
 
127 128
 split_at_single_quotes = re.compile("('+)").split
128 129
 
129  
-def xpath_literal(s):
130  
-    if isinstance(s, Element):
131  
-        # This is probably a symbol that looks like an expression...
132  
-        s = s._format_element()
133  
-    else:
134  
-        s = _unicode(s)
135  
-    if "'" not in s:
136  
-        s = "'%s'" % s
137  
-    elif '"' not in s:
138  
-        s = '"%s"' % s
139  
-    else:
140  
-        s = "concat(%s)" % ','.join([
141  
-            (("'" in part) and '"%s"' or "'%s'") % part
142  
-            for part in split_at_single_quotes(s) if part
143  
-            ])
144  
-    return s
145  
-
146 130
 
147 131
 #### Translation
148 132
 
149  
-class Translator(object):
  133
+class GenericTranslator(object):
  134
+    """
  135
+    Translator for "generic" XML documents.
  136
+    """
150 137
     combinator_mapping = {
151 138
         ' ': 'descendant',
152 139
         '>': 'child',
@@ -173,8 +160,8 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
173 160
 
174 161
         .. sourcecode:: pycon
175 162
 
176  
-            >>> from cssselect import css_to_xpath
177  
-            >>> expression = css_to_xpath('div.content')
  163
+            >>> from cssselect import GenericTranslator
  164
+            >>> expression = GenericTranslator().css_to_xpath('div.content')
178 165
             >>> print(expression)
179 166
             descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' content ')]
180 167
 
@@ -201,6 +188,24 @@ def css_to_xpath(self, css, prefix='descendant-or-self::'):
201 188
         xpath.add_prefix(prefix or '')
202 189
         return _unicode(xpath)
203 190
 
  191
+    @staticmethod
  192
+    def xpath_literal(s):
  193
+        if isinstance(s, Element):
  194
+            # This is probably a symbol that looks like an expression...
  195
+            s = s._format_element()
  196
+        else:
  197
+            s = _unicode(s)
  198
+        if "'" not in s:
  199
+            s = "'%s'" % s
  200
+        elif '"' not in s:
  201
+            s = '"%s"' % s
  202
+        else:
  203
+            s = "concat(%s)" % ','.join([
  204
+                (("'" in part) and '"%s"' or "'%s'") % part
  205
+                for part in split_at_single_quotes(s) if part
  206
+                ])
  207
+        return s
  208
+
204 209
     def xpath(self, parsed_selector, prefix=None):
205 210
         """Translate any parsed selector object."""
206 211
         type_name = type(parsed_selector).__name__
@@ -232,7 +237,7 @@ def xpath_function(self, function):
232 237
         method = getattr(self, method, None)
233 238
         if not method:
234 239
             raise ExpressionError(
235  
-                "The pseudo-class :%r is unknown" % function.name)
  240
+                "The pseudo-class :%s() is unknown" % function.name)
236 241
         return method(self.xpath(function.selector), function)
237 242
 
238 243
     def xpath_pseudo(self, pseudo):
@@ -242,7 +247,7 @@ def xpath_pseudo(self, pseudo):
242 247
         if not method:
243 248
             # TODO: better error message for pseudo-elements?
244 249
             raise ExpressionError(
245  
-                "The pseudo-class :%r is unknown" % pseudo.ident)
  250
+                "The pseudo-class :%s is unknown" % pseudo.ident)
246 251
         return method(self.xpath(pseudo.selector))
247 252
 
248 253
 
@@ -258,23 +263,19 @@ def xpath_attrib(self, selector):
258 263
             name = '@' + selector.attrib
259 264
         else:
260 265
             name = '@%s:%s' % (selector.namespace, selector.attrib)
261  
-        return method(self.xpath(selector.selector),
262  
-                      name, operator, selector.value)
  266
+        return method(self.xpath(selector.selector), name, selector.value)
263 267
 
264 268
     def xpath_class(self, class_selector):
265 269
         """Translate a class selector."""
266  
-        # FIXME: is this HTML-specific?
  270
+        # .foo is defined as [class~=foo] in the spec.
267 271
         xpath = self.xpath(class_selector.selector)
268  
-        xpath.add_condition(
269  
-            "@class and "
270  
-            "contains(concat(' ', normalize-space(@class), ' '), %s)"
271  
-            % xpath_literal(' ' + class_selector.class_name + ' '))
272  
-        return xpath
  272
+        return self.xpath_attrib_includes(
  273
+            xpath, '@class', class_selector.class_name)
273 274
 
274 275
     def xpath_hash(self, id_selector):
275 276
         """Translate an ID selector."""
276 277
         xpath = self.xpath(id_selector.selector)
277  
-        xpath.add_condition('@id = %s' % xpath_literal(id_selector.id))
  278
+        xpath.add_condition('@id = %s' % self.xpath_literal(id_selector.id))
278 279
         return xpath
279 280
 
280 281
     def xpath_element(self, selector):
@@ -377,6 +378,11 @@ def xpath_not_function(self, xpath, function):
377 378
         xpath.add_condition('not(%s)' % condition)
378 379
         return xpath
379 380
 
  381
+    def xpath_contains_function(self, xpath, function):
  382
+        xpath.add_condition('contains(string(.), %s)'
  383
+                            % self.xpath_literal(function.arguments))
  384
+        return xpath
  385
+
380 386
     def function_unsupported(self, xpath, pseudo):
381 387
         raise ExpressionError(
382 388
             "The pseudo-class :%s() is not supported" % pseudo.name)
@@ -386,12 +392,6 @@ def function_unsupported(self, xpath, pseudo):
386 392
 
387 393
     # Pseudo: dispatch by pseudo-class name
388 394
 
389  
-    def xpath_checked_pseudo(self, xpath):
390  
-        # FIXME: is this really all the elements?
391  
-        xpath.add_condition("(@selected or @checked) and "
392  
-                            "(name(.) = 'input' or name(.) = 'option')")
393  
-        return xpath
394  
-
395 395
     def xpath_root_pseudo(self, xpath):
396 396
         xpath.add_condition("not(parent::*)")
397 397
         return xpath
@@ -441,68 +441,146 @@ def xpath_empty_pseudo(self, xpath):
441 441
         xpath.add_condition("not(*) and not(normalize-space())")
442 442
         return xpath
443 443
 
444  
-    def pseudo_unsupported(self, xpath, pseudo):
445  
-        raise ExpressionError(
446  
-            "The pseudo-class :%s is not supported" % pseudo.name)
  444
+    def pseudo_never_matches(self, xpath):
  445
+        """Common implementation for pseudo-classes that never match."""
  446
+        xpath.add_condition("0")
  447
+        return xpath
447 448
 
448  
-    xpath_link_pseudo = pseudo_unsupported
449  
-    xpath_visited_pseudo = pseudo_unsupported
450  
-    xpath_hover_pseudo = pseudo_unsupported
451  
-    xpath_active_pseudo = pseudo_unsupported
452  
-    xpath_focus_pseudo = pseudo_unsupported
453  
-    xpath_target_pseudo = pseudo_unsupported
454  
-    xpath_enabled_pseudo = pseudo_unsupported
455  
-    xpath_disabled_pseudo = pseudo_unsupported
  449
+    xpath_link_pseudo = pseudo_never_matches
  450
+    xpath_visited_pseudo = pseudo_never_matches
  451
+    xpath_hover_pseudo = pseudo_never_matches
  452
+    xpath_active_pseudo = pseudo_never_matches
  453
+    xpath_focus_pseudo = pseudo_never_matches
  454
+    xpath_target_pseudo = pseudo_never_matches
  455
+    xpath_enabled_pseudo = pseudo_never_matches
  456
+    xpath_disabled_pseudo = pseudo_never_matches
  457
+    xpath_checked_pseudo = pseudo_never_matches
456 458
 
457 459
     # Attrib: dispatch by attribute operator
458 460
 
459  
-    def xpath_attrib_exists(self, xpath, name, operator, value):
  461
+    def xpath_attrib_exists(self, xpath, name, value):
460 462
         assert not value
461 463
         xpath.add_condition(name)
462 464
         return xpath
463 465
 
464  
-    def xpath_attrib_equals(self, xpath, name, operator, value):
465  
-        xpath.add_condition('%s = %s' % (name, xpath_literal(value)))
  466
+    def xpath_attrib_equals(self, xpath, name, value):
  467
+        xpath.add_condition('%s = %s' % (name, self.xpath_literal(value)))
466 468
         return xpath
467 469
 
468  
-    def xpath_attrib_different(self, xpath, name, operator, value):
  470
+    def xpath_attrib_different(self, xpath, name, value):
469 471
         # FIXME: this seems like a weird hack...
470 472
         if value:
471 473
             xpath.add_condition('not(%s) or %s != %s'
472  
-                                % (name, name, xpath_literal(value)))
  474
+                                % (name, name, self.xpath_literal(value)))
473 475
         else:
474 476
             xpath.add_condition('%s != %s'
475  
-                                % (name, xpath_literal(value)))
  477
+                                % (name, self.xpath_literal(value)))
476 478
         return xpath
477 479
 
478  
-    def xpath_attrib_includes(self, xpath, name, operator, value):
  480
+    def xpath_attrib_includes(self, xpath, name, value):
479 481
         xpath.add_condition(
480 482
             "%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
481  
-            % (name, name, xpath_literal(' '+value+' ')))
  483
+            % (name, name, self.xpath_literal(' '+value+' ')))
482 484
         return xpath
483 485
 
484  
-    def xpath_attrib_dashmatch(self, xpath, name, operator, value):
  486
+    def xpath_attrib_dashmatch(self, xpath, name, value):
485 487
         # Weird, but true...
486 488
         xpath.add_condition('%s and (%s = %s or starts-with(%s, %s))' % (
487 489
             name,
488  
-            name, xpath_literal(value),
489  
-            name, xpath_literal(value + '-')))
  490
+            name, self.xpath_literal(value),
  491
+            name, self.xpath_literal(value + '-')))
490 492
         return xpath
491 493
 
492  
-    def xpath_attrib_prefixmatch(self, xpath, name, operator, value):
  494
+    def xpath_attrib_prefixmatch(self, xpath, name, value):
493 495
         xpath.add_condition('%s and starts-with(%s, %s)' % (
494  
-            name, name, xpath_literal(value)))
  496
+            name, name, self.xpath_literal(value)))
495 497
         return xpath
496 498
 
497  
-    def xpath_attrib_suffixmatch(self, xpath, name, operator, value):
  499
+    def xpath_attrib_suffixmatch(self, xpath, name, value):
498 500
         # Oddly there is a starts-with in XPath 1.0, but not ends-with
499 501
         xpath.add_condition(
500 502
             '%s and substring(%s, string-length(%s)-%s) = %s'
501  
-            % (name, name, name, len(value)-1, xpath_literal(value)))
  503
+            % (name, name, name, len(value)-1, self.xpath_literal(value)))
502 504
         return xpath
503 505
 
504  
-    def xpath_attrib_substringmatch(self, xpath, name, operator, value):
  506
+    def xpath_attrib_substringmatch(self, xpath, name, value):
505 507
         # Attribute selectors are case sensitive
506 508
         xpath.add_condition('%s and contains(%s, %s)' % (
507  
-            name, name, xpath_literal(value)))
  509
+            name, name, self.xpath_literal(value)))
  510
+        return xpath
  511
+
  512
+
  513
+class HTMLTranslator(GenericTranslator):
  514
+    """
  515
+    Translator for HTML documents.
  516
+    """
  517
+    def xpath_checked_pseudo(self, xpath):
  518
+        # FIXME: is this really all the elements?
  519
+        xpath.add_condition(
  520
+            "(@selected and name(.) = 'option') or "
  521
+            "(@checked and name(.) = 'input')")
  522
+        return xpath
  523
+
  524
+    def xpath_link_pseudo(self, xpath):
  525
+        xpath.add_condition("@href and name(.) = 'a'")
  526
+        return xpath
  527
+
  528
+    # Links are never visited, the implementation for :visited is the same
  529
+    # as in GenericTranslator
  530
+
  531
+    def xpath_disabled_pseudo(self, xpath):
  532
+        # http://www.w3.org/TR/html5/section-index.html#attributes-1
  533
+        xpath.add_condition('''
  534
+        (
  535
+            @disabled and
  536
+            (
  537
+                name(.) = 'input' or
  538
+                name(.) = 'button' or
  539
+                name(.) = 'select' or
  540
+                name(.) = 'textarea' or
  541
+                name(.) = 'keygen' or
  542
+                name(.) = 'command' or
  543
+                name(.) = 'fieldset' or
  544
+                name(.) = 'optgroup' or
  545
+                name(.) = 'option'
  546
+            )
  547
+        ) or (
  548
+            (
  549
+                name(.) = 'input' or
  550
+                name(.) = 'button' or
  551
+                name(.) = 'select' or
  552
+                name(.) = 'textarea' or
  553
+                name(.) = 'keygen'
  554
+            )
  555
+            and ancestor::fieldset[@disabled]
  556
+        )
  557
+        ''')
  558
+        # FIXME: in the second half, add "and is not a descendant of that
  559
+        # fieldset element's first legend element child, if any."
  560
+        return xpath
  561
+
  562
+    def xpath_enabled_pseudo(self, xpath):
  563
+        # http://www.w3.org/TR/html5/section-index.html#attributes-1
  564
+        xpath.add_condition('''
  565
+        (
  566
+            (
  567
+                name(.) = 'command' or
  568
+                name(.) = 'fieldset' or
  569
+                name(.) = 'optgroup' or
  570
+                name(.) = 'option'
  571
+            )
  572
+            and not(@disabled)
  573
+        ) or (
  574
+            (
  575
+                name(.) = 'input' or
  576
+                name(.) = 'button' or
  577
+                name(.) = 'select' or
  578
+                name(.) = 'textarea' or
  579
+                name(.) = 'keygen'
  580
+            )
  581
+            and not (@disabled or ancestor::fieldset[@disabled])
  582
+        )
  583
+        ''')
  584
+        # FIXME: in the second half, add "and is not a descendant of that
  585
+        # fieldset element's first legend element child, if any."
508 586
         return xpath
89  docs/index.rst
Source Rendered
... ...
@@ -1,53 +1,98 @@
  1
+.. module:: cssselect
1 2
 .. include:: ../README.rst
2 3
 
3 4
 User API
4 5
 ========
5 6
 
6  
-.. module:: cssselect
  7
+The ``cssselect`` module provides two classes:
7 8
 
8  
-Currently, the only public API is the ``css_to_xpath()`` function. This API
  9
+* :class:`GenericTranslator` for "generic" XML documents.
  10
+* :class:`HTMLTranslator` for HTML documents.
  11
+
  12
+Both are instanciated without arguments.
  13
+Currently, their only public API is the :meth:`~GenericTranslator.css_to_xpath`
  14
+method. This API
9 15
 expected to expand to provide more information about the parsed selectors,
10 16
 and to allow customization of the translation.
11 17
 
12  
-.. autofunction:: css_to_xpath(css, prefix='descendant-or-self::')
13 18
 
14  
-
15  
-Namespaces
16  
-==========
17  
-
18  
-In CSS you can use ``namespace-prefix|element``, similar to
19  
-``namespace-prefix:element`` in an XPath expression.  In fact, it maps
20  
-one-to-one. How prefixes are mapped to namespace URIs depends on the
21  
-XPath implementation.
  19
+.. automethod:: GenericTranslator.css_to_xpath
22 20
 
23 21
 
24 22
 Limitations and supported selectors
25 23
 ===================================
26 24
 
27  
-This library attempts to implement CSS3 selectors as described in
28  
-`the W3C specification
29  
-<http://www.w3.org/TR/2011/REC-css3-selectors-20110929/>`_. Some of
30  
-the pseudo-classes do not apply in this context.
31  
-In particular these will not be available:
  25
+This library implements CSS3 selectors as described in `the W3C specification
  26
+<http://www.w3.org/TR/2011/REC-css3-selectors-20110929/>`_.
  27
+In this context however, there is no interactivity or history of visited links.
  28
+Therefore, these pseudo-classes are accepted but never match anything:
32 29
 
33  
-* link state: ``:link``, ``:visited``, ``:target``
34  
-* actions: ``:hover``, ``:active``, ``:focus``
35  
-* UI states: ``:enabled``, ``:disabled`` (``:checked`` *is* available)
  30
+* ``:hover``
  31
+* ``:active``
  32
+* ``:focus``
  33
+* ``:target``
  34
+* ``:visited``
36 35
 
37  
-Also, none of the pseudo-elements apply since XPath only knows about “real”
38  
-elements.
  36
+Additionally, these depend on document knowledge and only have a useful
  37
+implementation in :class:`HTMLTranslator`. In :class:`GenericTranslator`,
  38
+they never match:
39 39
 
  40
+* ``:link``
  41
+* ``:enabled``
  42
+* ``:disabled``
  43
+* ``:checked``
40 44
 
41  
-These applicable pseudoclasses are not yet implemented:
  45
+These applicable pseudo-classes are not yet implemented:
42 46
 
43 47
 * ``:lang(language)``
44 48
 * ``*:first-of-type``, ``*:last-of-type``, ``*:nth-of-type``,
45 49
   ``*:nth-last-of-type``, ``*:only-of-type``.  All of these work when
46 50
   you specify an element type, but not with ``*``
47 51
 
  52
+None of the pseudo-elements apply since XPath only knows about “real”
  53
+elements.
  54
+
  55
+..
  56
+    The following claim was copied from lxml.
  57
+    TODO: is this true? What kind of situation could cause trouble?
  58
+    Maybe add an example?
  59
+
48 60
 XPath has underspecified string quoting rules (there seems to be no
49 61
 string quoting at all), so if you use expressions that contain
50 62
 characters that requiring quoting you might have problems with the
51 63
 translation from CSS to XPath.
52 64
 
  65
+On the other hand, *cssselect* supports some selectors that are not
  66
+in the Level 3 specification:
  67
+
  68
+* The ``:contains(text)`` pseudo-class that `existed in an early draft
  69
+  <http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors>`_
  70
+  but was then removed.
  71
+* The ``!=`` attribute operator. ``[foo!=bar]`` is the same as
  72
+  ``:not([foo=bar])``
  73
+
  74
+
  75
+Customizing the translation
  76
+===========================
  77
+
  78
+Just like :class:`HTMLTranslator` is a subclass of :class:`GenericTranslator`,
  79
+you can make new sub-classes of either of them and override some methods.
  80
+This way, you can customize how eg. some pseudo-class is implemented or change
  81
+some other detail of the XPath translation, without forking or monkey-patching
  82
+cssselect.
  83
+
  84
+The "customization API" is the set of methods in translation classes
  85
+and their signature. You can look at the source code to see how it works.
  86
+However, be aware that this API is not very stable yet. It might change
  87
+and break you sub-class.
  88
+
  89
+
  90
+Namespaces
  91
+==========
  92
+
  93
+In CSS you can use ``namespace-prefix|element``, similar to
  94
+``namespace-prefix:element`` in an XPath expression.  In fact, it maps
  95
+one-to-one. How prefixes are mapped to namespace URIs depends on the
  96
+XPath implementation.
  97
+
53 98
 .. include:: ../CHANGES

No commit comments for this range

Something went wrong with that request. Please try again.