Skip to content

Commit

Permalink
[CssSelector] fully rewritted component
Browse files Browse the repository at this point in the history
Squashed commits:
[CssSelector] removed previous implementation
[CssSelector] rewriting, step 1
[CssSelector] rewriting, step 2
[CssSelector] rewriting, step 3
[CssSelector] rewriting, step 4
[CssSelector] rewriting, step 5
[CssSelector] rewriting, step 6
[CssSelector] fixed shortcuts regex
[CssSelector] tests, step1
[CssSelector] tests, step2
[CssSelector] tests, step3
[CssSelector] tests, step4
[CssSelector] fixed problems based @stof's on feedback
[CssSelector] tests, step5
[CssSelector] tests, step6
[CssSelector] tests, step7
[CssSelector] added my name in composer.json
  • Loading branch information
jfsimon committed Mar 23, 2013
1 parent bd53382 commit c6f87d0
Show file tree
Hide file tree
Showing 91 changed files with 6,298 additions and 2,161 deletions.
301 changes: 23 additions & 278 deletions src/Symfony/Component/CssSelector/CssSelector.php
Expand Up @@ -11,16 +11,22 @@

namespace Symfony\Component\CssSelector;

use Symfony\Component\CssSelector\Exception\ParseException;
use Symfony\Component\CssSelector\Exception;
use Symfony\Component\CssSelector\Parser\Shortcut\ClassParser;
use Symfony\Component\CssSelector\Parser\Shortcut\ElementParser;
use Symfony\Component\CssSelector\Parser\Shortcut\EmptyStringParser;
use Symfony\Component\CssSelector\Parser\Shortcut\HashParser;
use Symfony\Component\CssSelector\XPath\Extension\HtmlExtension;
use Symfony\Component\CssSelector\XPath\Translator;

/**
* CssSelector is the main entry point of the component and can convert CSS
* selectors to XPath expressions.
*
* $xpath = CssSelector::toXpath('h1.foo');
*
* This component is a port of the Python lxml library,
* which is copyright Infrae and distributed under the BSD license.
* This component is a port of the Python cssselector library,
* which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect.
*
* @author Fabien Potencier <fabien@symfony.com>
*
Expand All @@ -33,290 +39,29 @@ class CssSelector
* Optionally, a prefix can be added to the resulting XPath
* expression with the $prefix parameter.
*
* @param mixed $cssExpr The CSS expression.
* @param string $prefix An optional prefix for the XPath expression.
* @param mixed $cssExpr The CSS expression.
* @param string $prefix An optional prefix for the XPath expression.
* @param boolean $html Enables HTML extension.
*
* @return string
*
* @throws ParseException When got None for xpath expression
*
* @api
*/
public static function toXPath($cssExpr, $prefix = 'descendant-or-self::')
{
if (is_string($cssExpr)) {
if (!$cssExpr) {
return $prefix.'*';
}

if (preg_match('#^\w+\s*$#u', $cssExpr, $match)) {
return $prefix.trim($match[0]);
}

if (preg_match('~^(\w*)#(\w+)\s*$~u', $cssExpr, $match)) {
return sprintf("%s%s[@id = '%s']", $prefix, $match[1] ? $match[1] : '*', $match[2]);
}

if (preg_match('#^(\w*)\.(\w+)\s*$#u', $cssExpr, $match)) {
return sprintf("%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]", $prefix, $match[1] ? $match[1] : '*', $match[2]);
}

$parser = new self();
$cssExpr = $parser->parse($cssExpr);
}

$expr = $cssExpr->toXpath();

// @codeCoverageIgnoreStart
if (!$expr) {
throw new ParseException(sprintf('Got None for xpath expression from %s.', $cssExpr));
}
// @codeCoverageIgnoreEnd

if ($prefix) {
$expr->addPrefix($prefix);
}

return (string) $expr;
}

/**
* Parses an expression and returns the Node object that represents
* the parsed expression.
*
* @param string $string The expression to parse
*
* @return Node\NodeInterface
*
* @throws \Exception When tokenizer throws it while parsing
*/
public function parse($string)
{
$tokenizer = new Tokenizer();

$stream = new TokenStream($tokenizer->tokenize($string), $string);

try {
return $this->parseSelectorGroup($stream);
} catch (\Exception $e) {
$class = get_class($e);

throw new $class(sprintf('%s at %s -> %s', $e->getMessage(), implode($stream->getUsed(), ''), $stream->peek()), 0, $e);
}
}

/**
* Parses a selector group contained in $stream and returns
* the Node object that represents the expression.
*
* @param TokenStream $stream The stream to parse.
*
* @return Node\NodeInterface
*/
private function parseSelectorGroup($stream)
{
$result = array();
while (true) {
$result[] = $this->parseSelector($stream);
if ($stream->peek() == ',') {
$stream->next();
} else {
break;
}
}

if (count($result) == 1) {
return $result[0];
}

return new Node\OrNode($result);
}

/**
* Parses a selector contained in $stream and returns the Node
* object that represents it.
*
* @param TokenStream $stream The stream containing the selector.
*
* @return Node\NodeInterface
*
* @throws ParseException When expected selector but got something else
*/
private function parseSelector($stream)
{
$result = $this->parseSimpleSelector($stream);

while (true) {
$peek = $stream->peek();
if (',' == $peek || null === $peek) {
return $result;
} elseif (in_array($peek, array('+', '>', '~'))) {
// A combinator
$combinator = (string) $stream->next();

// Ignore optional whitespace after a combinator
while (' ' == $stream->peek()) {
$stream->next();
}
} else {
$combinator = ' ';
}
$consumed = count($stream->getUsed());
$nextSelector = $this->parseSimpleSelector($stream);
if ($consumed == count($stream->getUsed())) {
throw new ParseException(sprintf("Expected selector, got '%s'", $stream->peek()));
}

$result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
}

return $result;
}

/**
* Parses a simple selector (the current token) from $stream and returns
* the resulting Node object.
*
* @param TokenStream $stream The stream containing the selector.
*
* @return Node\NodeInterface
*
* @throws ParseException When expected symbol but got something else
*/
private function parseSimpleSelector($stream)
{
$peek = $stream->peek();
if ('*' != $peek && !$peek->isType('Symbol')) {
$element = $namespace = '*';
} else {
$next = $stream->next();
if ('*' != $next && !$next->isType('Symbol')) {
throw new ParseException(sprintf("Expected symbol, got '%s'", $next));
}

if ($stream->peek() == '|') {
$namespace = $next;
$stream->next();
$element = $stream->next();
if ('*' != $element && !$next->isType('Symbol')) {
throw new ParseException(sprintf("Expected symbol, got '%s'", $next));
}
} else {
$namespace = '*';
$element = $next;
}
}

$result = new Node\ElementNode($namespace, $element);
$hasHash = false;
while (true) {
$peek = $stream->peek();
if ('#' == $peek) {
if ($hasHash) {
/* You can't have two hashes
(FIXME: is there some more general rule I'm missing?) */
// @codeCoverageIgnoreStart
break;
// @codeCoverageIgnoreEnd
}
$stream->next();
$result = new Node\HashNode($result, $stream->next());
$hasHash = true;

continue;
} elseif ('.' == $peek) {
$stream->next();
$result = new Node\ClassNode($result, $stream->next());

continue;
} elseif ('[' == $peek) {
$stream->next();
$result = $this->parseAttrib($result, $stream);
$next = $stream->next();
if (']' != $next) {
throw new ParseException(sprintf("] expected, got '%s'", $next));
}

continue;
} elseif (':' == $peek || '::' == $peek) {
$type = $stream->next();
$ident = $stream->next();
if (!$ident || !$ident->isType('Symbol')) {
throw new ParseException(sprintf("Expected symbol, got '%s'", $ident));
}

if ($stream->peek() == '(') {
$stream->next();
$peek = $stream->peek();
if ($peek->isType('String')) {
$selector = $stream->next();
} elseif ($peek->isType('Symbol') && is_int($peek)) {
$selector = intval($stream->next());
} else {
// FIXME: parseSimpleSelector, or selector, or...?
$selector = $this->parseSimpleSelector($stream);
}
$next = $stream->next();
if (')' != $next) {
throw new ParseException(sprintf("Expected ')', got '%s' and '%s'", $next, $selector));
}

$result = new Node\FunctionNode($result, $type, $ident, $selector);
} else {
$result = new Node\PseudoNode($result, $type, $ident);
}

continue;
} else {
if (' ' == $peek) {
$stream->next();
}

break;
}
// FIXME: not sure what "negation" is
}

return $result;
}

/**
* Parses an attribute from a selector contained in $stream and returns
* the resulting AttribNode object.
*
* @param Node\NodeInterface $selector The selector object whose attribute
* is to be parsed.
* @param TokenStream $stream The container token stream.
*
* @return Node\AttribNode
*
* @throws ParseException When encountered unexpected selector
*/
private function parseAttrib($selector, $stream)
public static function toXPath($cssExpr, $prefix = 'descendant-or-self::', $html = true)
{
$attrib = $stream->next();
if ($stream->peek() == '|') {
$namespace = $attrib;
$stream->next();
$attrib = $stream->next();
} else {
$namespace = '*';
}
$translator = new Translator();

if ($stream->peek() == ']') {
return new Node\AttribNode($selector, $namespace, $attrib, 'exists', null);
if ($html) {
$translator->registerExtension(new HtmlExtension($translator));
}

$op = $stream->next();
if (!in_array($op, array('^=', '$=', '*=', '=', '~=', '|=', '!='))) {
throw new ParseException(sprintf("Operator expected, got '%s'", $op));
}

$value = $stream->next();
if (!$value->isType('Symbol') && !$value->isType('String')) {
throw new ParseException(sprintf("Expected string or symbol, got '%s'", $value));
}
$translator
->registerParserShortcut(new EmptyStringParser())
->registerParserShortcut(new ElementParser())
->registerParserShortcut(new ClassParser())
->registerParserShortcut(new HashParser())
;

return new Node\AttribNode($selector, $namespace, $attrib, $op, $value);
return $translator->cssToXPath($cssExpr, $prefix);
}
}
62 changes: 62 additions & 0 deletions src/Symfony/Component/CssSelector/CssSelectorTest.php
@@ -0,0 +1,62 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Component\CssSelector;

class CssSelectorTest extends \PHPUnit_Framework_TestCase
{
public function testCssToXPath()
{
$this->assertEquals('descendant-or-self::*', CssSelector::toXPath(''));
$this->assertEquals('descendant-or-self::h1', CssSelector::toXPath('h1'));
$this->assertEquals("descendant-or-self::h1[@id = 'foo']", CssSelector::toXPath('h1#foo'));
$this->assertEquals("descendant-or-self::h1[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]", CssSelector::toXPath('h1.foo'));
$this->assertEquals('descendant-or-self::foo:h1', CssSelector::toXPath('foo|h1'));
}

/** @dataProvider getCssToXPathWithoutPrefixTestData */
public function testCssToXPathWithoutPrefix($css, $xpath)
{
$this->assertEquals($xpath, CssSelector::toXPath($css, ''), '->parse() parses an input string and returns a node');
}

public function testParseExceptions()
{
try {
CssSelector::toXPath('h1:');
$this->fail('->parse() throws an Exception if the css selector is not valid');
} catch (\Exception $e) {
$this->assertInstanceOf('\Symfony\Component\CssSelector\Exception\ParseException', $e, '->parse() throws an Exception if the css selector is not valid');
$this->assertEquals("Expected identifier, but <eof at 3> found.", $e->getMessage(), '->parse() throws an Exception if the css selector is not valid');
}
}

public function getCssToXPathWithoutPrefixTestData()
{
return array(
array('h1', "h1"),
array('foo|h1', "foo:h1"),
array('h1, h2, h3', "h1 | h2 | h3"),
array('h1:nth-child(3n+1)', "*/*[name() = 'h1' and ((position() -1) mod 3 = 0 and position() >= 1)]"),
array('h1 > p', "h1/p"),
array('h1#foo', "h1[@id = 'foo']"),
array('h1.foo', "h1[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]"),
array('h1[class*="foo bar"]', "h1[@class and contains(@class, 'foo bar')]"),
array('h1[foo|class*="foo bar"]', "h1[@foo:class and contains(@foo:class, 'foo bar')]"),
array('h1[class]', "h1[@class]"),
array('h1 .foo', "h1/descendant-or-self::*/*[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]"),
array('h1 #foo', "h1/descendant-or-self::*/*[@id = 'foo']"),
array('h1 [class*=foo]', "h1/descendant-or-self::*/*[@class and contains(@class, 'foo')]"),
array('div>.foo', "div/*[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]"),
array('div > .foo', "div/*[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]"),
);
}
}

0 comments on commit c6f87d0

Please sign in to comment.