Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Use XPath 'lang()' in XML docs.

  • Loading branch information...
commit 9846271380f1187221d5b404409a54e81a7e84bf 1 parent 3e5abd8
@sjp sjp authored
Showing with 53 additions and 6 deletions.
  1. +38 −0 cssselect/tests.py
  2. +15 −6 cssselect/xpath.py
View
38 cssselect/tests.py
@@ -464,6 +464,30 @@ def series(css):
assert series('foo') == None
assert series('n+') == None
+ def test_lang(self):
+ document = etree.fromstring(XMLLANG_IDS)
+ sort_key = dict(
+ (el, count) for count, el in enumerate(document.getiterator())
+ ).__getitem__
+ css_to_xpath = GenericTranslator().css_to_xpath
+
+ def langid(selector):
+ xpath = css_to_xpath(selector)
+ items = document.xpath(xpath)
+ items.sort(key=sort_key)
+ return [element.get('id', 'nil') for element in items]
+
+ assert langid(':lang("EN")') == ['first', 'second', 'third', 'fourth']
+ assert langid(':lang("en-us")') == ['second', 'fourth']
+ assert langid(':lang(en-nz)') == ['third']
+ assert langid(':lang(fr)') == ['fifth']
+ assert langid(':lang(ru)') == ['sixth']
+ assert langid(":lang('ZH')") == ['eighth']
+ assert langid(':lang(de) :lang(zh)') == ['eighth']
+ assert langid(':lang(en), :lang(zh)') == [
+ 'first', 'second', 'third', 'fourth', 'eighth']
+ assert langid(':lang(es)') == []
+
def test_select(self):
document = etree.fromstring(HTML_IDS)
sort_key = dict(
@@ -675,6 +699,20 @@ def count(selector):
assert count('div[class!=madeup]') == 243 # ? Seems right
assert count('div[class~=dialog]') == 51 # ? Seems right
+XMLLANG_IDS = '''
+<test>
+ <a id="first" xml:lang="en">a</a>
+ <b id="second" xml:lang="en-US">b</b>
+ <c id="third" xml:lang="en-Nz">c</c>
+ <d id="fourth" xml:lang="En-us">d</d>
+ <e id="fifth" xml:lang="fr">e</e>
+ <f id="sixth" xml:lang="ru">f</f>
+ <g id="seventh" xml:lang="de">
+ <h id="eighth" xml:lang="zh"/>
+ </g>
+</test>
+'''
+
HTML_IDS = '''
<html id="html"><head>
<link id="link-href" href="foo" />
View
21 cssselect/xpath.py
@@ -410,12 +410,7 @@ def xpath_lang_function(self, xpath, function):
% function.arguments)
value = function.arguments[0].value
return xpath.add_condition(
- "ancestor-or-self::*[@lang][1][starts-with(concat("
- # XPath 1.0 has no lower-case function...
- "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
- "'abcdefghijklmnopqrstuvwxyz'), "
- "'-'), %s)]"
- % (self.lang_attribute, self.xpath_literal(value.lower() + '-')))
+ "lang(%s)" % (self.xpath_literal(value)))
# Pseudo: dispatch by pseudo-class name
@@ -575,6 +570,20 @@ def xpath_checked_pseudo(self, xpath):
"and (name(.) = 'input' or name(.) = 'command')"
"and (@type = 'checkbox' or @type = 'radio'))")
+ def xpath_lang_function(self, xpath, function):
+ if function.argument_types() not in (['STRING'], ['IDENT']):
+ raise ExpressionError(
+ "Expected a single string or ident for :lang(), got %r"
+ % function.arguments)
+ value = function.arguments[0].value
+ return xpath.add_condition(
+ "ancestor-or-self::*[@lang][1][starts-with(concat("
+ # XPath 1.0 has no lower-case function...
+ "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
+ "'abcdefghijklmnopqrstuvwxyz'), "
+ "'-'), %s)]"
+ % (self.lang_attribute, self.xpath_literal(value.lower() + '-')))
+
def xpath_link_pseudo(self, xpath):
return xpath.add_condition("@href and "
"(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')")
Please sign in to comment.
Something went wrong with that request. Please try again.