Skip to content

Commit

Permalink
[DomCrawler] Allow pipe (|) character in link tags when using Xpath e…
Browse files Browse the repository at this point in the history
…xpressions
  • Loading branch information
klausi authored and nicolas-grekas committed Oct 18, 2016
1 parent ef48f59 commit 5b26e33
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 3 deletions.
44 changes: 42 additions & 2 deletions src/Symfony/Component/DomCrawler/Crawler.php
Expand Up @@ -856,13 +856,12 @@ private function relativize($xpath)
{
$expressions = array();

$unionPattern = '/\|(?![^\[]*\])/';
// An expression which will never match to replace expressions which cannot match in the crawler
// We cannot simply drop
$nonMatchingExpression = 'a[name() = "b"]';

// Split any unions into individual expressions.
foreach (preg_split($unionPattern, $xpath) as $expression) {
foreach ($this->splitUnionParts($xpath) as $expression) {
$expression = trim($expression);
$parenthesis = '';

Expand Down Expand Up @@ -912,6 +911,47 @@ private function relativize($xpath)
return implode(' | ', $expressions);
}

/**
* Splits the XPath into parts that are separated by the union operator.
*
* @param string $xpath
*
* @return string[]
*/
private function splitUnionParts($xpath)
{
// Split any unions into individual expressions. We need to iterate
// through the string to correctly parse opening/closing quotes and
// braces which is not possible with regular expressions.
$unionParts = array();
$inSingleQuotedString = false;
$inDoubleQuotedString = false;
$openedBrackets = 0;
$lastUnion = 0;
$xpathLength = strlen($xpath);
for ($i = 0; $i < $xpathLength; ++$i) {
$char = $xpath[$i];

if ($char === "'" && !$inDoubleQuotedString) {
$inSingleQuotedString = !$inSingleQuotedString;
} elseif ($char === '"' && !$inSingleQuotedString) {
$inDoubleQuotedString = !$inDoubleQuotedString;
} elseif (!$inSingleQuotedString && !$inDoubleQuotedString) {
if ($char === '[') {
++$openedBrackets;
} elseif ($char === ']') {
--$openedBrackets;
} elseif ($char === '|' && $openedBrackets === 0) {
$unionParts[] = substr($xpath, $lastUnion, $i - $lastUnion);
$lastUnion = $i + 1;
}
}
}
$unionParts[] = substr($xpath, $lastUnion);

return $unionParts;
}

/**
* @param int $position
*
Expand Down
5 changes: 4 additions & 1 deletion src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php
Expand Up @@ -387,6 +387,7 @@ public function testFilterXpathComplexQueries()
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
$this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]"));
}

public function testFilterXPath()
Expand Down Expand Up @@ -548,7 +549,7 @@ public function testFilterXPathWithSelfAxes()

$this->assertCount(0, $crawler->filterXPath('self::a'), 'The fake root node has no "real" element name');
$this->assertCount(0, $crawler->filterXPath('self::a/img'), 'The fake root node has no "real" element name');
$this->assertCount(9, $crawler->filterXPath('self::*/a'));
$this->assertCount(10, $crawler->filterXPath('self::*/a'));
}

public function testFilter()
Expand Down Expand Up @@ -969,6 +970,8 @@ public function createTestCrawler($uri = null)
<a href="?get=param">GetLink</a>
<a href="/example">Klausi|Claudiu</a>
<form action="foo" id="FooFormId">
<input type="text" value="TextValue" name="TextName" />
<input type="submit" value="FooValue" name="FooName" id="FooId" />
Expand Down

0 comments on commit 5b26e33

Please sign in to comment.