From 42aab01e71c3e1c5aec8dcb4dff8233146b275a5 Mon Sep 17 00:00:00 2001 From: Fernando Herrero Date: Sat, 2 Jun 2018 02:56:46 +0200 Subject: [PATCH] Create method for import html loadHtml parse html string and create either SimpleXmlElement object or a DOMDocument. use of empty($options['loadEntities']) instead of !$options['loadEntities'] --- src/Utility/Xml.php | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/Utility/Xml.php b/src/Utility/Xml.php index 01346d60861..f120c995ddd 100644 --- a/src/Utility/Xml.php +++ b/src/Utility/Xml.php @@ -142,7 +142,7 @@ protected static function _loadXml($input, $options) { $hasDisable = function_exists('libxml_disable_entity_loader'); $internalErrors = libxml_use_internal_errors(true); - if ($hasDisable && !$options['loadEntities']) { + if ($hasDisable && empty($options['loadEntities'])) { libxml_disable_entity_loader(true); } $flags = 0; @@ -162,7 +162,46 @@ protected static function _loadXml($input, $options) } catch (Exception $e) { throw new XmlException('Xml cannot be read. ' . $e->getMessage(), null, $e); } finally { - if ($hasDisable && !$options['loadEntities']) { + if ($hasDisable && empty($options['loadEntities'])) { + libxml_disable_entity_loader(false); + } + libxml_use_internal_errors($internalErrors); + } + } + + /** + * Parse the input html string and create either a SimpleXmlElement object or a DOMDocument. + * + * @param string $input The input html string to load. + * @param array $options The options to use. See Xml::build() + * @return \SimpleXMLElement|\DOMDocument + * @throws \Cake\Utility\Exception\XmlException + */ + public static function loadHtml($input, $options) + { + $hasDisable = function_exists('libxml_disable_entity_loader'); + $internalErrors = libxml_use_internal_errors(true); + if ($hasDisable && empty($options['loadEntities'])) { + libxml_disable_entity_loader(true); + } + $flags = 0; + if (!empty($options['parseHuge'])) { + $flags |= LIBXML_PARSEHUGE; + } + try { + $xml = new DOMDocument(); + $xml->loadHTML($input, $flags); + + if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') { + $flags |= LIBXML_NOCDATA; + $xml = simplexml_import_dom($xml); + } + + return $xml; + } catch (Exception $e) { + throw new XmlException('Xml cannot be read. ' . $e->getMessage(), null, $e); + } finally { + if ($hasDisable && empty($options['loadEntities'])) { libxml_disable_entity_loader(false); } libxml_use_internal_errors($internalErrors);