Skip to content

Commit

Permalink
[DomCrawler] added a way to get parsing errors for Crawler::addHtmlCo…
Browse files Browse the repository at this point in the history
…ntent() and Crawler::addXmlContent() via libxml functions
  • Loading branch information
fabpot committed Sep 28, 2011
1 parent 382a421 commit a57a4af
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG-2.1.md
Expand Up @@ -35,6 +35,7 @@ To get the diff between two versions, go to https://github.com/symfony/symfony/c

### DomCrawler

* added a way to get parsing errors for Crawler::addHtmlContent() and Crawler::addXmlContent() via libxml functions
* added support for submitting a form without a submit button

### Finder
Expand Down
20 changes: 20 additions & 0 deletions src/Symfony/Component/DomCrawler/Crawler.php
Expand Up @@ -107,6 +107,13 @@ public function addContent($content, $type = null)
/**
* Adds an HTML content to the list of nodes.
*
* The libxml errors are disabled when the content is parsed.
*
* If you want to get parsing errors, be sure to enable
* internal errors via libxml_use_internal_errors(true)
* and then, get the errors via libxml_get_errors(). Be
* sure to clear errors with libxml_clear_errors() afterward.
*
* @param string $content The HTML content
* @param string $charset The charset
*
Expand All @@ -117,7 +124,10 @@ public function addHtmlContent($content, $charset = 'UTF-8')
$dom = new \DOMDocument('1.0', $charset);
$dom->validateOnParse = true;

$current = libxml_use_internal_errors(true);
@$dom->loadHTML($content);
libxml_use_internal_errors($current);

$this->addDocument($dom);

$base = $this->filter('base')->extract(array('href'));
Expand All @@ -130,6 +140,13 @@ public function addHtmlContent($content, $charset = 'UTF-8')
/**
* Adds an XML content to the list of nodes.
*
* The libxml errors are disabled when the content is parsed.
*
* If you want to get parsing errors, be sure to enable
* internal errors via libxml_use_internal_errors(true)
* and then, get the errors via libxml_get_errors(). Be
* sure to clear errors with libxml_clear_errors() afterward.
*
* @param string $content The XML content
* @param string $charset The charset
*
Expand All @@ -141,7 +158,10 @@ public function addXmlContent($content, $charset = 'UTF-8')
$dom->validateOnParse = true;

// remove the default namespace to make XPath expressions simpler
$current = libxml_use_internal_errors(true);
@$dom->loadXML(str_replace('xmlns', 'ns', $content));
libxml_use_internal_errors($current);

$this->addDocument($dom);
}

Expand Down
54 changes: 54 additions & 0 deletions tests/Symfony/Tests/Component/DomCrawler/CrawlerTest.php
Expand Up @@ -69,6 +69,34 @@ public function testAddHtmlContent()
$this->assertEquals('http://symfony.com/contact', $crawler->filter('a')->link()->getUri(), '->addHtmlContent() adds nodes from an HTML string');
}

/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
*/
public function testAddHtmlContentWithErrors()
{
libxml_use_internal_errors(true);

$crawler = new Crawler();
$crawler->addHtmlContent(<<<EOF
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<nav><a href="#"><a href="#"></nav>
</body>
</html>
EOF
, 'UTF-8');

$errors = libxml_get_errors();
$this->assertEquals(1, count($errors));
$this->assertEquals("Tag nav invalid\n", $errors[0]->message);

libxml_clear_errors();
libxml_use_internal_errors(false);
}

/**
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
*/
Expand All @@ -80,6 +108,32 @@ public function testAddXmlContent()
$this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addXmlContent() adds nodes from an XML string');
}

/**
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
*/
public function testAddXmlContentWithErrors()
{
libxml_use_internal_errors(true);

$crawler = new Crawler();
$crawler->addXmlContent(<<<EOF
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<nav><a href="#"><a href="#"></nav>
</body>
</html>
EOF
, 'UTF-8');

$this->assertTrue(count(libxml_get_errors()) > 1);

libxml_clear_errors();
libxml_use_internal_errors(false);
}

/**
* @covers Symfony\Component\DomCrawler\Crawler::addContent
*/
Expand Down

0 comments on commit a57a4af

Please sign in to comment.