Skip to content

Commit

Permalink
[svn r4209] r5239@delle: sbehnel | 2009-09-10 06:45:11 +0200
Browse files Browse the repository at this point in the history
 cssselect: fix error reporting and infinite loop on syntax error

--HG--
branch : trunk
  • Loading branch information
scoder committed Sep 10, 2009
1 parent 3fbdcfc commit e36e4ae
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 9 deletions.
26 changes: 17 additions & 9 deletions src/lxml/cssselect.py
Expand Up @@ -649,8 +649,12 @@ def parse(string):
except SelectorSyntaxError:
import sys
e = sys.exc_info()[1]
e.args = tuple(["%s at %s -> %s" % (
e, stream.used, list(stream))])
message = "%s at %s -> %r" % (
e, stream.used, stream.peek())
e.msg = message
if sys.version_info < (2,6):
e.message = message
e.args = tuple([message])
raise

def parse_selector_group(stream):
Expand All @@ -677,7 +681,11 @@ def parse_selector(stream):
combinator = stream.next()
else:
combinator = ' '
consumed = len(stream.used)
next_selector = parse_simple_selector(stream)
if consumed == len(stream.used):
raise SelectorSyntaxError(
"Expected selector, got '%s'" % stream.peek())
result = CombinedSelector(result, combinator, next_selector)
return result

Expand All @@ -689,14 +697,14 @@ def parse_simple_selector(stream):
next = stream.next()
if next != '*' and not isinstance(next, Symbol):
raise SelectorSyntaxError(
"Expected symbol, got %r" % next)
"Expected symbol, got '%s'" % next)
if stream.peek() == '|':
namespace = next
stream.next()
element = stream.next()
if element != '*' and not isinstance(next, Symbol):
raise SelectorSyntaxError(
"Expected symbol, got %r" % next)
"Expected symbol, got '%s'" % next)
else:
namespace = '*'
element = next
Expand All @@ -723,14 +731,14 @@ def parse_simple_selector(stream):
next = stream.next()
if not next == ']':
raise SelectorSyntaxError(
"] expected, got %r" % next)
"] expected, got '%s'" % next)
continue
elif peek == ':' or peek == '::':
type = stream.next()
ident = stream.next()
if not isinstance(ident, Symbol):
raise SelectorSyntaxError(
"Expected symbol, got %r" % ident)
"Expected symbol, got '%s'" % ident)
if stream.peek() == '(':
stream.next()
peek = stream.peek()
Expand All @@ -744,7 +752,7 @@ def parse_simple_selector(stream):
next = stream.next()
if not next == ')':
raise SelectorSyntaxError(
"Expected ), got %r and %r"
"Expected ')', got '%s' and '%s'"
% (next, selector))
result = Function(result, type, ident, selector)
else:
Expand Down Expand Up @@ -778,11 +786,11 @@ def parse_attrib(selector, stream):
op = stream.next()
if not op in ('^=', '$=', '*=', '=', '~=', '|=', '!='):
raise SelectorSyntaxError(
"Operator expected, got %r" % op)
"Operator expected, got '%s'" % op)
value = stream.next()
if not isinstance(value, (Symbol, String)):
raise SelectorSyntaxError(
"Expected string or symbol, got %r" % value)
"Expected string or symbol, got '%s'" % value)
return Attrib(selector, namespace, attrib, op, value)

def parse_series(s):
Expand Down
8 changes: 8 additions & 0 deletions src/lxml/tests/test_css.txt
Expand Up @@ -49,6 +49,14 @@ Then of parsing:
>>> parse('td ~ th')
CombinedSelector[Element[td] ~ Element[th]]

Some parse error tests:

>>> try: parse('attributes(href)/html/body/a')
... except: # Py2, Py3, ...
... import sys
... print(str(sys.exc_info()[1]).replace("(u'", "('"))
Expected selector, got '(' at [Symbol('attributes', 0)] -> Token('(', 10)

Now of translation:

>>> def xpath(css):
Expand Down

0 comments on commit e36e4ae

Please sign in to comment.