Skip to content

Commit

Permalink
bug #20235 [DomCrawler] Allow pipe (|) character in link tags when us…
Browse files Browse the repository at this point in the history
…ing Xpath expressions (klausi, nicolas-grekas)

This PR was merged into the 2.7 branch.

Discussion
----------

[DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions

| Q             | A
| ------------- | ---
| Branch?       | master
| Bug fix?      | yes
| New feature?  | no
| BC breaks?    | no
| Deprecations? | no
| Tests pass?   | yes
| Fixed tickets | #20229
| License       | MIT
| Doc PR        | -

@klausi could you please validate this patch? Is it an improvement over yours? (sorry I don't have the proper use case to test.)

Commits
-------

17757d8 [DomCrawler] Optimize DomCrawler::relativize()
5b26e33 [DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions
  • Loading branch information
nicolas-grekas committed Oct 18, 2016
2 parents 0e8b2a3 + 17757d8 commit 8dee4be
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 14 deletions.
58 changes: 45 additions & 13 deletions src/Symfony/Component/DomCrawler/Crawler.php
Expand Up @@ -856,22 +856,47 @@ private function relativize($xpath)
{
$expressions = array();

$unionPattern = '/\|(?![^\[]*\])/';
// An expression which will never match to replace expressions which cannot match in the crawler
// We cannot simply drop
$nonMatchingExpression = 'a[name() = "b"]';

// Split any unions into individual expressions.
foreach (preg_split($unionPattern, $xpath) as $expression) {
$expression = trim($expression);
$parenthesis = '';
$xpathLen = strlen($xpath);
$openedBrackets = 0;
$startPosition = strspn($xpath, " \t\n\r\0\x0B");

// If the union is inside some braces, we need to preserve the opening braces and apply
// the change only inside it.
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
$parenthesis = $matches[0];
$expression = substr($expression, strlen($parenthesis));
for ($i = $startPosition; $i <= $xpathLen; ++$i) {
$i += strcspn($xpath, '"\'[]|', $i);

if ($i < $xpathLen) {
switch ($xpath[$i]) {
case '"':
case "'":
if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) {
return $xpath; // The XPath expression is invalid
}
continue 2;
case '[':
++$openedBrackets;
continue 2;
case ']':
--$openedBrackets;
continue 2;
}
}
if ($openedBrackets) {
continue;
}

if ($startPosition < $xpathLen && '(' === $xpath[$startPosition]) {
// If the union is inside some braces, we need to preserve the opening braces and apply
// the change only inside it.
$j = 1 + strspn($xpath, "( \t\n\r\0\x0B", $startPosition + 1);
$parenthesis = substr($xpath, $startPosition, $j);
$startPosition += $j;
} else {
$parenthesis = '';
}
$expression = rtrim(substr($xpath, $startPosition, $i - $startPosition));

// BC for Symfony 2.4 and lower were elements were adding in a fake _root parent
if (0 === strpos($expression, '/_root/')) {
Expand All @@ -881,7 +906,7 @@ private function relativize($xpath)
}

// add prefix before absolute element selector
if (empty($expression)) {
if ('' === $expression) {
$expression = $nonMatchingExpression;
} elseif (0 === strpos($expression, '//')) {
$expression = 'descendant-or-self::'.substr($expression, 2);
Expand All @@ -899,17 +924,24 @@ private function relativize($xpath)
// '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results
$expression = $nonMatchingExpression;
} elseif (0 === strpos($expression, 'descendant::')) {
$expression = 'descendant-or-self::'.substr($expression, strlen('descendant::'));
$expression = 'descendant-or-self::'.substr($expression, 12);
} elseif (preg_match('/^(ancestor|ancestor-or-self|attribute|following|following-sibling|namespace|parent|preceding|preceding-sibling)::/', $expression)) {
// the fake root has no parent, preceding or following nodes and also no attributes (even no namespace attributes)
$expression = $nonMatchingExpression;
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
$expression = 'self::'.$expression;
}
$expressions[] = $parenthesis.$expression;

if ($i === $xpathLen) {
return implode(' | ', $expressions);
}

$i += strspn($xpath, " \t\n\r\0\x0B", $i + 1);
$startPosition = $i + 1;
}

return implode(' | ', $expressions);
return $xpath; // The XPath expression is invalid
}

/**
Expand Down
5 changes: 4 additions & 1 deletion src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php
Expand Up @@ -387,6 +387,7 @@ public function testFilterXpathComplexQueries()
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
$this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]"));
}

public function testFilterXPath()
Expand Down Expand Up @@ -548,7 +549,7 @@ public function testFilterXPathWithSelfAxes()

$this->assertCount(0, $crawler->filterXPath('self::a'), 'The fake root node has no "real" element name');
$this->assertCount(0, $crawler->filterXPath('self::a/img'), 'The fake root node has no "real" element name');
$this->assertCount(9, $crawler->filterXPath('self::*/a'));
$this->assertCount(10, $crawler->filterXPath('self::*/a'));
}

public function testFilter()
Expand Down Expand Up @@ -969,6 +970,8 @@ public function createTestCrawler($uri = null)
<a href="?get=param">GetLink</a>
<a href="/example">Klausi|Claudiu</a>
<form action="foo" id="FooFormId">
<input type="text" value="TextValue" name="TextName" />
<input type="submit" value="FooValue" name="FooName" id="FooId" />
Expand Down

0 comments on commit 8dee4be

Please sign in to comment.