Skip to content

Commit 42aab01

Browse files
authored
Create method for import html
loadHtml parse html string and create either SimpleXmlElement object or a DOMDocument. use of empty($options['loadEntities']) instead of !$options['loadEntities']
1 parent d9a61c3 commit 42aab01

File tree

1 file changed

+41
-2
lines changed

1 file changed

+41
-2
lines changed

src/Utility/Xml.php

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ protected static function _loadXml($input, $options)
142142
{
143143
$hasDisable = function_exists('libxml_disable_entity_loader');
144144
$internalErrors = libxml_use_internal_errors(true);
145-
if ($hasDisable && !$options['loadEntities']) {
145+
if ($hasDisable && empty($options['loadEntities'])) {
146146
libxml_disable_entity_loader(true);
147147
}
148148
$flags = 0;
@@ -162,7 +162,46 @@ protected static function _loadXml($input, $options)
162162
} catch (Exception $e) {
163163
throw new XmlException('Xml cannot be read. ' . $e->getMessage(), null, $e);
164164
} finally {
165-
if ($hasDisable && !$options['loadEntities']) {
165+
if ($hasDisable && empty($options['loadEntities'])) {
166+
libxml_disable_entity_loader(false);
167+
}
168+
libxml_use_internal_errors($internalErrors);
169+
}
170+
}
171+
172+
/**
173+
* Parse the input html string and create either a SimpleXmlElement object or a DOMDocument.
174+
*
175+
* @param string $input The input html string to load.
176+
* @param array $options The options to use. See Xml::build()
177+
* @return \SimpleXMLElement|\DOMDocument
178+
* @throws \Cake\Utility\Exception\XmlException
179+
*/
180+
public static function loadHtml($input, $options)
181+
{
182+
$hasDisable = function_exists('libxml_disable_entity_loader');
183+
$internalErrors = libxml_use_internal_errors(true);
184+
if ($hasDisable && empty($options['loadEntities'])) {
185+
libxml_disable_entity_loader(true);
186+
}
187+
$flags = 0;
188+
if (!empty($options['parseHuge'])) {
189+
$flags |= LIBXML_PARSEHUGE;
190+
}
191+
try {
192+
$xml = new DOMDocument();
193+
$xml->loadHTML($input, $flags);
194+
195+
if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
196+
$flags |= LIBXML_NOCDATA;
197+
$xml = simplexml_import_dom($xml);
198+
}
199+
200+
return $xml;
201+
} catch (Exception $e) {
202+
throw new XmlException('Xml cannot be read. ' . $e->getMessage(), null, $e);
203+
} finally {
204+
if ($hasDisable && empty($options['loadEntities'])) {
166205
libxml_disable_entity_loader(false);
167206
}
168207
libxml_use_internal_errors($internalErrors);

0 commit comments

Comments
 (0)