Skip to content
This repository
Browse code

Add support for :lang(), close #3

  • Loading branch information...
commit 856621a21479fd51daebfce42d5d94ddabef13e3 1 parent acb6c39
Simon Sapin authored
11 CHANGES
... ... @@ -1,6 +1,17 @@
1 1 Changelog
2 2 =========
3 3
  4 +Version 0.6
  5 +-----------
  6 +
  7 +* In ``setup.py`` use setuptools/distribute if available, but fall back
  8 + on distutils.
  9 +* Implement the ``:lang()`` pseudo-class, although it is only based on
  10 + ``xml:lang`` or ``lang`` attributes. If the document language is known from
  11 + some other meta-data (like a ``Content-Language`` HTTP header or ``<meta>``
  12 + element), a workaround is to set a lang attribute on the root element.
  13 +
  14 +
4 15 Version 0.5
5 16 -----------
6 17
2  cssselect/__init__.py
@@ -18,5 +18,5 @@
18 18 from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
19 19
20 20
21   -VERSION = '0.5'
  21 +VERSION = '0.6'
22 22 __version__ = VERSION
13 cssselect/tests.py
@@ -18,7 +18,6 @@
18 18 """
19 19
20 20 import sys
21   -import operator
22 21 import unittest
23 22
24 23 from lxml import etree, html
@@ -392,7 +391,6 @@ def xpath(css):
392 391 self.assertRaises(ExpressionError, xpath, ':last-of-type')
393 392 self.assertRaises(ExpressionError, xpath, ':nth-of-type(1)')
394 393 self.assertRaises(ExpressionError, xpath, ':nth-last-of-type(1)')
395   - self.assertRaises(ExpressionError, xpath, ':lang(fr)')
396 394 self.assertRaises(ExpressionError, xpath, ':nth-child(n-)')
397 395 self.assertRaises(ExpressionError, xpath, ':after')
398 396 self.assertRaises(ExpressionError, xpath, ':lorem-ipsum')
@@ -497,8 +495,14 @@ def pcss(main, *selectors, **kwargs):
497 495 assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == [
498 496 'foobar-div']
499 497 assert pcss('div[foobar~="cd"]') == []
500   - assert pcss('*[lang|="en"]', '[lang|="en-US"]') == ['second-li']
  498 + assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ['second-li']
  499 + # Attribute values are case sensitive
  500 + assert pcss('*[lang|="en"]', '[lang|="en-US"]') == []
501 501 assert pcss('*[lang|="e"]') == []
  502 + # ... :lang() is not.
  503 + assert pcss(':lang("EN")', '*:lang(en-US)', html_only=True) == [
  504 + 'second-li', 'li-div']
  505 + assert pcss(':lang("e")', html_only=True) == []
502 506 assert pcss('li:nth-child(3)') == ['third-li']
503 507 assert pcss('li:nth-child(10)') == []
504 508 assert pcss('li:nth-child(2n)', 'li:nth-child(even)',
@@ -524,7 +528,6 @@ def pcss(main, *selectors, **kwargs):
524 528 assert pcss('li div:only-child') == ['li-div']
525 529 assert pcss('div *:only-child') == ['li-div', 'foobar-span']
526 530 self.assertRaises(ExpressionError, pcss, 'p *:only-of-type')
527   - self.assertRaises(ExpressionError, pcss, 'p:lang(fr)')
528 531 assert pcss('p:only-of-type') == ['paragraph']
529 532 assert pcss('a:empty', 'a:EMpty') == ['name-anchor']
530 533 assert pcss('li:empty') == [
@@ -661,7 +664,7 @@ def count(selector):
661 664 link</a>
662 665 <ol id="first-ol" class="a b c">
663 666 <li id="first-li">content</li>
664   - <li id="second-li" lang="en-US">
  667 + <li id="second-li" lang="En-us">
665 668 <div id="li-div">
666 669 </div>
667 670 </li>
21 cssselect/xpath.py
@@ -116,6 +116,10 @@ class GenericTranslator(object):
116 116 #: http://www.w3.org/TR/selectors/#id-selectors
117 117 id_attribute = 'id'
118 118
  119 + #: The attribute used for ``:lang()`` depends on the document language:
  120 + #: http://www.w3.org/TR/selectors/#lang-pseudo
  121 + lang_attribute = 'xml:lang'
  122 +
119 123 #: The case sensitivity of document language element names,
120 124 #: attribute names, and attribute values in selectors depends
121 125 #: on the document language.
@@ -366,11 +370,15 @@ def xpath_contains_function(self, xpath, function):
366 370 return xpath.add_condition('contains(string(.), %s)'
367 371 % self.xpath_literal(function.arguments))
368 372
369   - def function_unsupported(self, xpath, pseudo):
370   - raise ExpressionError(
371   - "The pseudo-class :%s() is not supported" % pseudo.name)
372   -
373   - xpath_lang_function = function_unsupported
  373 + def xpath_lang_function(self, xpath, function):
  374 + return xpath.add_condition(
  375 + "ancestor-or-self::*[@lang][1][starts-with(concat("
  376 + # XPath 1.0 has no lower-case function...
  377 + "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
  378 + "'abcdefghijklmnopqrstuvwxyz'), "
  379 + "'-'), %s)]"
  380 + % (self.lang_attribute, self.xpath_literal(
  381 + function.arguments.lower() + '-')))
374 382
375 383
376 384 # Pseudo: dispatch by pseudo-class name
@@ -497,6 +505,9 @@ class HTMLTranslator(GenericTranslator):
497 505 are case-insensitive.
498 506
499 507 """
  508 +
  509 + lang_attribute = 'lang'
  510 +
500 511 def __init__(self, xhtml=False):
501 512 self.xhtml = xhtml # Might be useful for sub-classes?
502 513 if not xhtml:
3  docs/index.rst
Source Rendered
@@ -92,7 +92,6 @@ they never match:
92 92
93 93 These applicable pseudo-classes are not yet implemented:
94 94
95   -* ``:lang(language)``
96 95 * ``*:first-of-type``, ``*:last-of-type``, ``*:nth-of-type``,
97 96 ``*:nth-last-of-type``, ``*:only-of-type``. All of these work when
98 97 you specify an element type, but not with ``*``
@@ -136,7 +135,7 @@ implemented without forking or monkey-patching cssselect.
136 135 The "customization API" is the set of methods in translation classes
137 136 and their signature. You can look at the `source code`_ to see how it works.
138 137 However, be aware that this API is not very stable yet. It might change
139   -and break you sub-class.
  138 +and break your sub-class.
140 139
141 140 .. _source code: https://github.com/SimonSapin/cssselect/blob/master/cssselect/xpath.py
142 141

0 comments on commit 856621a

Please sign in to comment.
Something went wrong with that request. Please try again.