Skip to content

Commit

Permalink
fix #9321 Crawler::addHtmlContent add gbk encoding support
Browse files Browse the repository at this point in the history
  • Loading branch information
bronze1man authored and fabpot committed Dec 29, 2013
1 parent 0285bfd commit acb2df0
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
14 changes: 12 additions & 2 deletions src/Symfony/Component/DomCrawler/Crawler.php
Expand Up @@ -147,8 +147,18 @@ public function addHtmlContent($content, $charset = 'UTF-8')
$dom = new \DOMDocument('1.0', $charset);
$dom->validateOnParse = true;

if (function_exists('mb_convert_encoding') && in_array(strtolower($charset), array_map('strtolower', mb_list_encodings()))) {
$content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
if (function_exists('mb_convert_encoding')) {
$has_error = false;
$previous = set_error_handler(function()use(&$has_error){
$has_error = true;
});
$tmpContent = @mb_convert_encoding($content, 'HTML-ENTITIES', $charset);

set_error_handler($previous);

if (!$has_error) {
$content = $tmpContent;
}
}

@$dom->loadHTML($content);
Expand Down
12 changes: 12 additions & 0 deletions src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php
Expand Up @@ -112,6 +112,18 @@ public function testAddHtmlContentUnsupportedCharset()
$this->assertEquals('Žťčýů', $crawler->filterXPath('//p')->text());
}

/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
*/
public function testAddHtmlContentCharsetGbk()
{
$crawler = new Crawler();
//gbk encode of <html><p>中文</p></html>
$crawler->addHtmlContent(base64_decode('PGh0bWw+PHA+1tDOxDwvcD48L2h0bWw+'), 'gbk');

$this->assertEquals('中文', $crawler->filterXPath('//p')->text());
}

/**
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
*/
Expand Down

0 comments on commit acb2df0

Please sign in to comment.