diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 37822e53c2a0..a1ddffd797a1 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -856,13 +856,12 @@ private function relativize($xpath) { $expressions = array(); - $unionPattern = '/\|(?![^\[]*\])/'; // An expression which will never match to replace expressions which cannot match in the crawler // We cannot simply drop $nonMatchingExpression = 'a[name() = "b"]'; // Split any unions into individual expressions. - foreach (preg_split($unionPattern, $xpath) as $expression) { + foreach ($this->splitUnionParts($xpath) as $expression) { $expression = trim($expression); $parenthesis = ''; @@ -912,6 +911,47 @@ private function relativize($xpath) return implode(' | ', $expressions); } + /** + * Splits the XPath into parts that are separated by the union operator. + * + * @param string $xpath + * + * @return string[] + */ + private function splitUnionParts($xpath) + { + // Split any unions into individual expressions. We need to iterate + // through the string to correctly parse opening/closing quotes and + // braces which is not possible with regular expressions. + $unionParts = array(); + $inSingleQuotedString = false; + $inDoubleQuotedString = false; + $openedBrackets = 0; + $lastUnion = 0; + $xpathLength = strlen($xpath); + for ($i = 0; $i < $xpathLength; ++$i) { + $char = $xpath[$i]; + + if ($char === "'" && !$inDoubleQuotedString) { + $inSingleQuotedString = !$inSingleQuotedString; + } elseif ($char === '"' && !$inSingleQuotedString) { + $inDoubleQuotedString = !$inDoubleQuotedString; + } elseif (!$inSingleQuotedString && !$inDoubleQuotedString) { + if ($char === '[') { + ++$openedBrackets; + } elseif ($char === ']') { + --$openedBrackets; + } elseif ($char === '|' && $openedBrackets === 0) { + $unionParts[] = substr($xpath, $lastUnion, $i - $lastUnion); + $lastUnion = $i + 1; + } + } + } + $unionParts[] = substr($xpath, $lastUnion); + + return $unionParts; + } + /** * @param int $position * diff --git a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php index 45bbb2f8e599..65e2a90e8751 100755 --- a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php @@ -387,6 +387,7 @@ public function testFilterXpathComplexQueries() $this->assertCount(5, $crawler->filterXPath('(//a | //div)//img')); $this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)')); $this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )')); + $this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]")); } public function testFilterXPath() @@ -548,7 +549,7 @@ public function testFilterXPathWithSelfAxes() $this->assertCount(0, $crawler->filterXPath('self::a'), 'The fake root node has no "real" element name'); $this->assertCount(0, $crawler->filterXPath('self::a/img'), 'The fake root node has no "real" element name'); - $this->assertCount(9, $crawler->filterXPath('self::*/a')); + $this->assertCount(10, $crawler->filterXPath('self::*/a')); } public function testFilter() @@ -969,6 +970,8 @@ public function createTestCrawler($uri = null) GetLink + Klausi|Claudiu +