diff --git a/rdflib/term.py b/rdflib/term.py index ff357d4de..0a136684c 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -1641,16 +1641,17 @@ def _parseXML(xmlstring: str) -> xml.dom.minidom.Document: # noqa: N802 def _parseHTML(htmltext: str) -> xml.dom.minidom.DocumentFragment: # noqa: N802 try: import html5lib - - parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("dom")) - retval = parser.parseFragment(htmltext) - retval.normalize() - return retval except ImportError: raise ImportError( "HTML5 parser not available. Try installing" + " html5lib " ) + parser = html5lib.HTMLParser( + tree=html5lib.treebuilders.getTreeBuilder("dom"), strict=True + ) + retval = parser.parseFragment(htmltext) + retval.normalize() + return retval def _writeXML( # noqa: N802 diff --git a/test/test_literal/test_xmlliterals.py b/test/test_literal/test_xmlliterals.py index 626755e24..ab0e8e5f5 100644 --- a/test/test_literal/test_xmlliterals.py +++ b/test/test_literal/test_xmlliterals.py @@ -100,6 +100,11 @@ def testHTML(): assert l2.value is not None, "xml must have been parsed" assert l2.datatype == RDF.HTML, "literal must have right datatype" + l3 = Literal("