Skip to content

Commit

Permalink
Fix only first child being saved, improve UTF8 support
Browse files Browse the repository at this point in the history
  • Loading branch information
loranmutafov committed Mar 13, 2017
1 parent 12267b8 commit bfff619
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 10 deletions.
21 changes: 16 additions & 5 deletions src/Amara/Varcon/HtmlCrawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,26 @@ public function crawlAndModify($content, callable $callable)
$this->stripDoctypeHtmlBodyAndHeadElements($document);

$xpath = new DOMXPath($document);
$nodes = $xpath->query(implode('|', $this->xpathExpressions));

$textNodes = $xpath->query(implode('|', $this->xpathExpressions));
$isContentModified = false;

/** @var DOMNode $textNode */
foreach ($textNodes as $textNode) {
$textNode->nodeValue = $callable($textNode->nodeValue);
/** @var DOMNode $node */
foreach ($nodes as $node) {
$modifiedNodeValue = $callable($node->nodeValue);

if ($node->nodeValue != $modifiedNodeValue) {
$isContentModified = true;

$node->nodeValue = $modifiedNodeValue;
}
}

if (false === $isContentModified) {
return $content;
}

return $document->saveHTML($document->documentElement);
return trim($document->saveHTML());
}

/**
Expand Down
4 changes: 4 additions & 0 deletions tests/Amara/Varcon/HtmlCrawlerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ public function provideCrawlAndModifyWithChangedXpathExpressions()
$html = '<span unsupported="Text">Text text text</span>',
$changedHtml = '<span unsupported="Changed">Text text text</span>',
],
[
$html = '<span>Unchanged</span>',
$changedHtml = '<span>Unchanged</span>',
],
];
}
}
9 changes: 4 additions & 5 deletions tests/Amara/Varcon/HtmlTranslatorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public function testTranslatePreservesWhitespace()
$htmlTranslator = new HtmlTranslator();

$this->assertSame(
'<p>Colour <strong>pyjama</strong> паралелепипед</p>', // Tests UTF8 characters as well
'<p>Colour <strong>pyjama</strong> &#1087;&#1072;&#1088;&#1072;&#1083;&#1077;&#1083;&#1077;&#1087;&#1080;&#1087;&#1077;&#1076;</p>', // Tests UTF8 characters as well
$htmlTranslator->translate(
'<p>Color <strong>pajama</strong> паралелепипед</p>',
'A',
Expand Down Expand Up @@ -58,13 +58,12 @@ public function provideTranslate()
{
return [
[
$html = '<p>Text & text text</p>',
$translatedHtml = '<p>Translated &amp; translated translated</p>',
$html = '<p>Text & text text</p><p>More text</p><p>More text</p>',
$translatedHtml = '<p>Translated &amp; translated translated</p><p>More translated</p><p>More translated</p>',
],
[
// Some day, this will stay as &bull; ..some ..day
$html = '<p>&bull; Text &amp; <strong>text</strong>: text</p>',
$translatedHtml = '<p> Translated &amp; <strong>translated</strong>: translated</p>',
$translatedHtml = '<p>&bull; Translated &amp; <strong>translated</strong>: translated</p>',
],
[
$html = '<img src="#" alt="Text text text">',
Expand Down

0 comments on commit bfff619

Please sign in to comment.