Skip to content

Commit

Permalink
Fix EZP-29116 : Conversions from ezxmltext to ezrichtext fails if ezx…
Browse files Browse the repository at this point in the history
…mtext have duplicate xhtml ids (ezsystems#36)

* Added warning if ezxmltext includes duplicates ids

* Refactoring : Moved XmlTextToRichText conversion to separate converter

* Added tests for duplicate ids in ezxmltext to richtext conversion

* fixup! Added tests for duplicate ids in ezxmltext to richtext conversion

* fixup! Refactoring : Moved XmlTextToRichText conversion to separate converter

* fixup! Refactoring : Moved XmlTextToRichText conversion to separate converter

* fixup! fixup! Refactoring : Moved XmlTextToRichText conversion to separate converter

* fixup! Added tests for duplicate ids in ezxmltext to richtext conversion

* Added more ezxmltext->richtext conversion tests
  • Loading branch information
vidarl authored and andrerom committed Apr 25, 2018
1 parent 4f71a7f commit 9ffe1e7
Show file tree
Hide file tree
Showing 35 changed files with 695 additions and 81 deletions.
94 changes: 13 additions & 81 deletions bundle/Command/ConvertXmlTextToRichTextCommand.php
Expand Up @@ -5,22 +5,15 @@
namespace EzSystems\EzPlatformXmlTextFieldTypeBundle\Command;

use DOMDocument;
use DOMXPath;
use eZ\Publish\Core\FieldType\RichText\Converter;
use eZ\Publish\Core\Persistence\Database\DatabaseHandler;
use PDO;
use Psr\Log\LoggerInterface;
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use eZ\Publish\Core\FieldType\RichText\Converter\Aggregate;
use eZ\Publish\Core\FieldType\XmlText\Converter\Expanding;
use eZ\Publish\Core\FieldType\RichText\Converter\Ezxml\ToRichTextPreNormalize;
use eZ\Publish\Core\FieldType\XmlText\Converter\EmbedLinking;
use eZ\Publish\Core\FieldType\RichText\Converter\Xslt;
use eZ\Publish\Core\FieldType\RichText\Validator;
use eZ\Publish\Core\FieldType\XmlText\Value;
use eZ\Publish\Core\FieldType\XmlText\Converter\RichText as RichTextConverter;

class ConvertXmlTextToRichTextCommand extends ContainerAwareCommand
{
Expand All @@ -29,16 +22,6 @@ class ConvertXmlTextToRichTextCommand extends ContainerAwareCommand
*/
private $db;

/**
* @var \eZ\Publish\Core\FieldType\RichText\Converter
*/
private $converter;

/**
* @var \eZ\Publish\Core\FieldType\RichText\Validator
*/
private $validator;

/**
* @var \Psr\Log\LoggerInterface
*/
Expand All @@ -50,28 +33,6 @@ public function __construct(DatabaseHandler $db, LoggerInterface $logger = null)

$this->db = $db;
$this->logger = $logger;

$this->converter = new Aggregate(
array(
new ToRichTextPreNormalize(new Expanding(), new EmbedLinking()),
new Xslt(
'./vendor/ezsystems/ezpublish-kernel/eZ/Publish/Core/FieldType/RichText/Resources/stylesheets/ezxml/docbook/docbook.xsl',
array(
array(
'path' => './vendor/ezsystems/ezpublish-kernel/eZ/Publish/Core/FieldType/RichText/Resources/stylesheets/ezxml/docbook/core.xsl',
'priority' => 99,
),
)
),
)
);

$this->validator = new Validator(
array(
'./vendor/ezsystems/ezpublish-kernel/eZ/Publish/Core/FieldType/RichText/Resources/schemas/docbook/ezpublish.rng',
'./vendor/ezsystems/ezpublish-kernel/eZ/Publish/Core/FieldType/RichText/Resources/schemas/docbook/docbook.iso.sch.xsl',
)
);
}

protected function configure()
Expand All @@ -92,6 +53,12 @@ protected function configure()
InputOption::VALUE_NONE,
'Run the converter without writing anything to the database'
)
->addOption(
'disable-duplicate-id-check',
null,
InputOption::VALUE_NONE,
'Disable the check for duplicate html ids in every attribute. This might increase execution time on large databases'
)
->addOption(
'test-content-object',
null,
Expand All @@ -115,10 +82,10 @@ protected function execute(InputInterface $input, OutputInterface $output)
} else {
$dryRun = true;
}
$this->convertFields($dryRun, $testContentObjectId, $output);
$this->convertFields($dryRun, $testContentObjectId, !$input->getOption('disable-duplicate-id-check'), $output);
}

function convertFieldDefinitions($dryRun, OutputInterface $output)
protected function convertFieldDefinitions($dryRun, OutputInterface $output)
{
$query = $this->db->createSelectQuery();
$query->select($query->expr->count('*'));
Expand Down Expand Up @@ -174,8 +141,9 @@ function convertFieldDefinitions($dryRun, OutputInterface $output)
$output->writeln("Converted $count ezxmltext field definitions to ezrichtext");
}

function convertFields($dryRun, $contentObjectId, OutputInterface $output)
protected function convertFields($dryRun, $contentObjectId, $checkDuplicateIds, OutputInterface $output)
{
$converter = new RichTextConverter($this->logger);
$query = $this->db->createSelectQuery();
$query->select($query->expr->count('*'));
$query->from('ezcontentobject_attribute');
Expand Down Expand Up @@ -238,7 +206,7 @@ function convertFields($dryRun, $contentObjectId, OutputInterface $output)
$inputValue = $row['data_text'];
}

$converted = $this->convert($inputValue);
$converted = $converter->convert($this->createDocument($inputValue), $checkDuplicateIds, $row['id']);

$updateQuery = $this->db->createUpdateQuery();
$updateQuery->update($this->db->quoteIdentifier('ezcontentobject_attribute'));
Expand Down Expand Up @@ -279,7 +247,7 @@ function convertFields($dryRun, $contentObjectId, OutputInterface $output)
$output->writeln("Converted $count ezxmltext fields to richtext");
}

function createDocument($xmlString)
protected function createDocument($xmlString)
{
$document = new DOMDocument();

Expand All @@ -290,40 +258,4 @@ function createDocument($xmlString)

return $document;
}

function removeComments(DOMDocument $document)
{
$xpath = new DOMXpath($document);
$nodes = $xpath->query('//comment()');

for ($i = 0; $i < $nodes->length; ++$i) {
$nodes->item($i)->parentNode->removeChild($nodes->item($i));
}
}

function convert($xmlString)
{
$inputDocument = $this->createDocument($xmlString);

$this->removeComments($inputDocument);

$convertedDocument = $this->converter->convert($inputDocument);

// Needed by some disabled output escaping (eg. legacy ezxml paragraph <line/> elements)
$convertedDocumentNormalized = new DOMDocument();
$convertedDocumentNormalized->loadXML($convertedDocument->saveXML());

$errors = $this->validator->validate($convertedDocument);

$result = $convertedDocumentNormalized->saveXML();

if (!empty($errors)) {
$this->logger->error(
"Validation errors when converting xmlstring",
['result' => $result, 'errors' => $errors, 'xmlString' => $xmlString]
);
}

return $result;
}
}
110 changes: 110 additions & 0 deletions lib/FieldType/XmlText/Converter/RichText.php
@@ -0,0 +1,110 @@
<?php

/**
* This file is part of the eZ Platform XmlText Field Type package.
*
* @copyright Copyright (C) eZ Systems AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
namespace eZ\Publish\Core\FieldType\XmlText\Converter;

use eZ\Publish\Core\FieldType\XmlText\Converter;
use DOMDocument;
use DOMXPath;
use Psr\Log\LoggerInterface;
use eZ\Publish\Core\FieldType\RichText\Converter\Aggregate;
use eZ\Publish\Core\FieldType\RichText\Converter\Ezxml\ToRichTextPreNormalize;
use eZ\Publish\Core\FieldType\RichText\Converter\Xslt;
use eZ\Publish\Core\FieldType\RichText\Validator;

class RichText implements Converter
{
/**
* @var \eZ\Publish\Core\FieldType\RichText\Converter
*/
private $converter;

/**
* @var \eZ\Publish\Core\FieldType\RichText\Validator
*/
private $validator;

public function __construct(LoggerInterface $logger = null)
{
$this->logger = $logger;

$this->converter = new Aggregate(
array(
new ToRichTextPreNormalize(new Expanding(), new EmbedLinking()),
new Xslt(
'./vendor/ezsystems/ezpublish-kernel/eZ/Publish/Core/FieldType/RichText/Resources/stylesheets/ezxml/docbook/docbook.xsl',
array(
array(
'path' => './vendor/ezsystems/ezpublish-kernel/eZ/Publish/Core/FieldType/RichText/Resources/stylesheets/ezxml/docbook/core.xsl',
'priority' => 99,
),
)
),
)
);

$this->validator = new Validator(
array(
'./vendor/ezsystems/ezpublish-kernel/eZ/Publish/Core/FieldType/RichText/Resources/schemas/docbook/ezpublish.rng',
'./vendor/ezsystems/ezpublish-kernel/eZ/Publish/Core/FieldType/RichText/Resources/schemas/docbook/docbook.iso.sch.xsl',
)
);
}

protected function removeComments(DOMDocument $document)
{
$xpath = new DOMXpath($document);
$nodes = $xpath->query('//comment()');

for ($i = 0; $i < $nodes->length; ++$i) {
$nodes->item($i)->parentNode->removeChild($nodes->item($i));
}
}

protected function reportNonUniqueIds(DOMDocument $document, $contentObjectAttributeId)
{
$xpath = new DOMXPath($document);
$ns = $document->documentElement->namespaceURI;
$nodes = $xpath->query("//*[contains(@xml:id, 'duplicated_id_')]");
foreach ($nodes as $node) {
$id=$node->attributes->getNamedItem('id')->nodeValue;
// id has format "duplicated_id_foo_bar_idm45226413447104" where "foo_bar" is the duplicated id
$duplicatedId = substr($id, strlen('duplicated_id_'), strrpos($id, '_') - strlen('duplicated_id_'));
if ($this->logger !== null) {
$this->logger->warning("Duplicated id in original ezxmltext for contentobject_attribute.id=$contentObjectAttributeId, automatically generated new id : $duplicatedId --> $id");
}
}
}

public function convert(DOMDocument $inputDocument, $checkDuplicateIds = false, $contentObjectAttributeId = null)
{
$this->removeComments($inputDocument);

$convertedDocument = $this->converter->convert($inputDocument);
if ($checkDuplicateIds) {
$this->reportNonUniqueIds($convertedDocument, $contentObjectAttributeId);
}

// Needed by some disabled output escaping (eg. legacy ezxml paragraph <line/> elements)
$convertedDocumentNormalized = new DOMDocument();
$convertedDocumentNormalized->loadXML($convertedDocument->saveXML());

$errors = $this->validator->validate($convertedDocument);

$result = $convertedDocumentNormalized->saveXML();

if (!empty($errors) && $this->logger !== null) {
$this->logger->error(
"Validation errors when converting ezxmltext for contentobject_attribute.id=$contentObjectAttributeId",
['result' => $result, 'errors' => $errors, 'xmlString' => $inputDocument->saveXML()]
);
}

return $result;
}
}
95 changes: 95 additions & 0 deletions tests/lib/FieldType/Converter/RichTextTest.php
@@ -0,0 +1,95 @@
<?php

/**
* This file is part of the eZ Platform XmlText Field Type package.
*
* @copyright Copyright (C) eZ Systems AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
namespace EzSystems\EzPlatformXmlTextFieldType\Tests\FieldType\Converter;

use eZ\Publish\Core\FieldType\XmlText\Converter\RichText;
use DOMDocument;
use DOMXPath;
use PHPUnit\Framework\TestCase;

class RichTextTest extends TestCase
{
/**
* @param string $xml
* @param bool $isPath
*
* @return \DOMDocument
*/
protected function createDocument($xml, $isPath = true)
{
$document = new DOMDocument();

$document->preserveWhiteSpace = false;
$document->formatOutput = false;

if ($isPath === true) {
$xml = file_get_contents($xml);
}

$document->loadXml($xml);

return $document;
}

/**
* Provider for conversion test.
*
* @return array
*/
public function providerForTestConvert()
{
$map = array();

foreach (glob(__DIR__ . '/_fixtures/richtext/input/*.xml') as $inputFilePath) {
$basename = basename($inputFilePath, '.xml');
$outputFilePath = __DIR__ . "/_fixtures/richtext/output/{$basename}.xml";

$map[] = array($inputFilePath, $outputFilePath);
}

return $map;
}

protected function normalizeRewrittenIds(DOMDocument $xmlDoc)
{
$counter = 0;
$xpath = new DOMXPath($xmlDoc);
$nodes = $xpath->query("//*[contains(@xml:id, 'duplicated_id_')]");
foreach ($nodes as $node) {
$id=$node->attributes->getNamedItem('id')->nodeValue;
$node->attributes->getNamedItem('id')->nodeValue = "duplicated_id_foobar$counter";
++$counter;
}
}

/**
* @param string $inputFilePath
* @param string $outputFilePath
*
* @dataProvider providerForTestConvert
*/
public function testConvert($inputFilePath, $outputFilePath)
{
$inputDocument = $this->createDocument($inputFilePath);
$richText = new RichText(null);

$result = $richText->convert($inputDocument, true);

$convertedDocument = $this->createDocument($result, false);
$expectedDocument = $this->createDocument($outputFilePath);

// since duplicate ids are rewritten with random values, we need to normalize those
$this->normalizeRewrittenIds($convertedDocument);

$this->assertEquals(
$expectedDocument,
$convertedDocument
);
}
}
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="utf-8"?>
<section xmlns:image="http://ez.no/namespaces/ezpublish3/image/" xmlns:xhtml="http://ez.no/namespaces/ezpublish3/xhtml/" xmlns:custom="http://ez.no/namespaces/ezpublish3/custom/">
<paragraph xmlns:tmp="http://ez.no/namespaces/ezpublish3/temporary/">
<ul>
<li>
<paragraph xmlns:tmp="http://ez.no/namespaces/ezpublish3/temporary/">
<line><link target="_blank" xhtml:id="inv5" url_id="2309">link with id inv5</link></line>
<line> </line>
</paragraph>
</li>
<li>
<paragraph xmlns:tmp="http://ez.no/namespaces/ezpublish3/temporary/">
<line><link target="_blank" xhtml:id="inv5" url_id="2309">another link with id inv5</link></line>
<line> </line>
</paragraph>
</li>
</ul>
</paragraph>
</section>

0 comments on commit 9ffe1e7

Please sign in to comment.