diff --git a/src/Amara/Varcon/HtmlCrawler.php b/src/Amara/Varcon/HtmlCrawler.php
index ebf7114..1ac8d1e 100644
--- a/src/Amara/Varcon/HtmlCrawler.php
+++ b/src/Amara/Varcon/HtmlCrawler.php
@@ -3,7 +3,7 @@
namespace Amara\Varcon;
use DOMDocument;
-use DOMText;
+use DOMNode;
use DOMXPath;
/**
@@ -40,8 +40,8 @@ public function setXpathExpressions(array $xpathExpressions)
public function crawlAndModify($content, callable $callable)
{
$document = new DOMDocument();
- $document->loadHTML(mb_convert_encoding(
- sprintf('
%s
', $content),
+ @$document->loadHTML(mb_convert_encoding(
+ sprintf('%s
', $content), /** @see stripDoctypeHtmlBodyAndHeadElements */
'HTML-ENTITIES',
'UTF-8'
));
@@ -51,7 +51,7 @@ public function crawlAndModify($content, callable $callable)
$textNodes = $xpath->query(implode('|', $this->xpathExpressions));
- /** @var DOMText $textNode */
+ /** @var DOMNode $textNode */
foreach ($textNodes as $textNode) {
$textNode->nodeValue = $callable($textNode->nodeValue);
}
@@ -63,17 +63,23 @@ public function crawlAndModify($content, callable $callable)
* This method is a short hack to avoid incompatibilities between different PHP and Libxml setups. It has the same
* effect as passing the LIBXML_HTML_NOIMPLIED and LIBXML_HTML_NODEFDTD flags to loadHtml's options
*
+ * It works by, first of all, wrapping all of the contents in a div, and then extracting only them back to the
+ * DOM document. This way, we can get rid of the Doctype and all tags so kindly inserted by loadHtml
+ *
* @param DOMDocument $document
*/
private function stripDoctypeHtmlBodyAndHeadElements(DOMDocument $document)
{
+ // First step - extract the div wrapper from the document
$container = $document->getElementsByTagName('div')->item(0);
$container = $container->parentNode->removeChild($container);
+ // Remove all document children
while ($document->firstChild) {
$document->removeChild($document->firstChild);
}
+ // Append the div wrapper's children as children of the document
while ($container->firstChild) {
$document->appendChild($container->firstChild);
}
diff --git a/src/Amara/Varcon/TranslatorInterface.php b/src/Amara/Varcon/TranslatorInterface.php
index 334ea15..f407c76 100644
--- a/src/Amara/Varcon/TranslatorInterface.php
+++ b/src/Amara/Varcon/TranslatorInterface.php
@@ -7,8 +7,25 @@
*/
interface TranslatorInterface
{
+ /**
+ * In case of multiple translations available, don't translate (recommended)
+ *
+ * @var int
+ */
const QUESTIONABLE_IGNORE = 0;
+
+ /**
+ * In case of multiple translations available, use the first
+ *
+ * @var int
+ */
const QUESTIONABLE_INCLUDE = 1;
+
+ /**
+ * In case of multiple translations available, mark them ?like/so? (useful for debugging)
+ *
+ * @var int
+ */
const QUESTIONABLE_MARK = 2;
/**
diff --git a/tests/Amara/Varcon/HtmlTranslatorTest.php b/tests/Amara/Varcon/HtmlTranslatorTest.php
index f21e855..a386b64 100644
--- a/tests/Amara/Varcon/HtmlTranslatorTest.php
+++ b/tests/Amara/Varcon/HtmlTranslatorTest.php
@@ -32,11 +32,19 @@ public function testTranslate($html, $translatedHtml)
{
$translator = $this->prophesize(Translator::class);
$translator->translate(
- Argument::any(),
- Argument::any(),
- Argument::any(),
- Argument::any()
- )->willReturn('Translated'); // Keep in mind we ignore whitespace this way
+ Argument::type('string'),
+ 'A',
+ 'B',
+ 0
+ )->will(function ($arguments) {
+ $string = array_shift($arguments);
+
+ return str_replace(
+ ['Text', 'text'],
+ ['Translated', 'translated'],
+ $string
+ );
+ });
$htmlTranslator = new HtmlTranslator($translator->reveal());
@@ -50,24 +58,25 @@ public function provideTranslate()
{
return [
[
- $html = 'Text text text
',
- $translatedHtml = 'Translated
',
+ $html = 'Text & text text
',
+ $translatedHtml = 'Translated & translated translated
',
],
[
- $html = 'Text text text
',
- $translatedHtml = 'TranslatedTranslatedTranslated
',
+ // Some day, this will stay as • ..some ..day
+ $html = '• Text & text: text
',
+ $translatedHtml = '• Translated & translated: translated
',
],
[
$html = '',
- $translatedHtml = '',
+ $translatedHtml = '',
],
[
$html = '',
- $translatedHtml = '',
+ $translatedHtml = '',
],
[
$html = '',
- $translatedHtml = '',
+ $translatedHtml = '',
],
];
}