Skip to content

Commit

Permalink
Improve tests and documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
loranmutafov committed Mar 13, 2017
1 parent 35a7b00 commit 12267b8
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 16 deletions.
14 changes: 10 additions & 4 deletions src/Amara/Varcon/HtmlCrawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
namespace Amara\Varcon;

use DOMDocument;
use DOMText;
use DOMNode;
use DOMXPath;

/**
Expand Down Expand Up @@ -40,8 +40,8 @@ public function setXpathExpressions(array $xpathExpressions)
public function crawlAndModify($content, callable $callable)
{
$document = new DOMDocument();
$document->loadHTML(mb_convert_encoding(
sprintf('<div>%s</div>', $content),
@$document->loadHTML(mb_convert_encoding(
sprintf('<div>%s</div>', $content), /** @see stripDoctypeHtmlBodyAndHeadElements */
'HTML-ENTITIES',
'UTF-8'
));
Expand All @@ -51,7 +51,7 @@ public function crawlAndModify($content, callable $callable)

$textNodes = $xpath->query(implode('|', $this->xpathExpressions));

/** @var DOMText $textNode */
/** @var DOMNode $textNode */
foreach ($textNodes as $textNode) {
$textNode->nodeValue = $callable($textNode->nodeValue);
}
Expand All @@ -63,17 +63,23 @@ public function crawlAndModify($content, callable $callable)
* This method is a short hack to avoid incompatibilities between different PHP and Libxml setups. It has the same
* effect as passing the LIBXML_HTML_NOIMPLIED and LIBXML_HTML_NODEFDTD flags to loadHtml's options
*
* It works by, first of all, wrapping all of the contents in a div, and then extracting only them back to the
* DOM document. This way, we can get rid of the Doctype and all tags so kindly inserted by loadHtml
*
* @param DOMDocument $document
*/
private function stripDoctypeHtmlBodyAndHeadElements(DOMDocument $document)
{
// First step - extract the div wrapper from the document
$container = $document->getElementsByTagName('div')->item(0);
$container = $container->parentNode->removeChild($container);

// Remove all document children
while ($document->firstChild) {
$document->removeChild($document->firstChild);
}

// Append the div wrapper's children as children of the document
while ($container->firstChild) {
$document->appendChild($container->firstChild);
}
Expand Down
17 changes: 17 additions & 0 deletions src/Amara/Varcon/TranslatorInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,25 @@
*/
interface TranslatorInterface
{
/**
* In case of multiple translations available, don't translate (recommended)
*
* @var int
*/
const QUESTIONABLE_IGNORE = 0;

/**
* In case of multiple translations available, use the first
*
* @var int
*/
const QUESTIONABLE_INCLUDE = 1;

/**
* In case of multiple translations available, mark them ?like/so? (useful for debugging)
*
* @var int
*/
const QUESTIONABLE_MARK = 2;

/**
Expand Down
33 changes: 21 additions & 12 deletions tests/Amara/Varcon/HtmlTranslatorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,19 @@ public function testTranslate($html, $translatedHtml)
{
$translator = $this->prophesize(Translator::class);
$translator->translate(
Argument::any(),
Argument::any(),
Argument::any(),
Argument::any()
)->willReturn('Translated'); // Keep in mind we ignore whitespace this way
Argument::type('string'),
'A',
'B',
0
)->will(function ($arguments) {
$string = array_shift($arguments);

return str_replace(
['Text', 'text'],
['Translated', 'translated'],
$string
);
});

$htmlTranslator = new HtmlTranslator($translator->reveal());

Expand All @@ -50,24 +58,25 @@ public function provideTranslate()
{
return [
[
$html = '<p>Text text text</p>',
$translatedHtml = '<p>Translated</p>',
$html = '<p>Text & text text</p>',
$translatedHtml = '<p>Translated &amp; translated translated</p>',
],
[
$html = '<p>Text <strong>text</strong> text</p>',
$translatedHtml = '<p>Translated<strong>Translated</strong>Translated</p>',
// Some day, this will stay as &bull; ..some ..day
$html = '<p>&bull; Text &amp; <strong>text</strong>: text</p>',
$translatedHtml = '<p>• Translated &amp; <strong>translated</strong>: translated</p>',
],
[
$html = '<img src="#" alt="Text text text">',
$translatedHtml = '<img src="#" alt="Translated">',
$translatedHtml = '<img src="#" alt="Translated translated translated">',
],
[
$html = '<img src="#" title="Text text text">',
$translatedHtml = '<img src="#" title="Translated">',
$translatedHtml = '<img src="#" title="Translated translated translated">',
],
[
$html = '<meta name="description" content="Text text text">',
$translatedHtml = '<meta name="description" content="Translated">',
$translatedHtml = '<meta name="description" content="Translated translated translated">',
],
];
}
Expand Down

0 comments on commit 12267b8

Please sign in to comment.