Skip to content

Commit

Permalink
streaming loader, catch severe syntax errors without exception trace
Browse files Browse the repository at this point in the history
  • Loading branch information
Herm Fischer authored and Herm Fischer committed May 2, 2015
1 parent f8c1402 commit 789a9c5
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 36 deletions.
6 changes: 4 additions & 2 deletions arelle/ModelDocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ def load(modelXbrl, uri, base=None, referringElement=None, isEntry=False, isDisc
for pluginMethod in pluginClassMethods("ModelDocument.PullLoader"):
# assumes not possible to check file in string format or not all available at once
modelDocument = pluginMethod(modelXbrl, normalizedUri, filepath, isEntry=isEntry, namespace=namespace, **kwargs)
if isinstance(modelDocument, Exception):
return None
if modelDocument is not None:
return modelDocument
if (modelXbrl.modelManager.validateDisclosureSystem and
Expand Down Expand Up @@ -161,7 +163,7 @@ def load(modelXbrl, uri, base=None, referringElement=None, isEntry=False, isDisc
modelObject=referringElement, fileName=os.path.basename(uri), error=str(err))
modelXbrl.urlUnloadableDocs[normalizedUri] = True # not loadable due to IO issue
return None
except (etree.LxmlError,
except (etree.LxmlError, etree.XMLSyntaxError,
SAXParseException,
ValueError) as err: # ValueError raised on bad format of qnames, xmlns'es, or parameters
if file:
Expand Down Expand Up @@ -779,7 +781,7 @@ def baseForElement(self, element):
baseAttr = baseElt.get("{http://www.w3.org/XML/1998/namespace}base")
if baseAttr:
if self.modelXbrl.modelManager.validateDisclosureSystem:
self.modelXbrl.error(("EFM.6.03.11", "GFM.1.1.7", "EBA.2.1"),
self.modelXbrl.error(("EFM.6.03.11", "GFM.1.1.7", "EBA.2.1", "EIOPA.2.1"),
_("Prohibited base attribute: %(attribute)s"),
modelObject=element, attribute=baseAttr, element=element.qname)
else:
Expand Down
73 changes: 39 additions & 34 deletions arelle/plugin/streamingExtensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,40 +137,45 @@ def close(self):
numElts = 0
elt = None
instInfoContext = etree.iterparse(_file, events=("start","end"), huge_tree=True)
for event, elt in instInfoContext:
if event == "start":
if elt.getparent() is not None:
if elt.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl":
if not foundInstance:
foundInstance = True
pi = precedingProcessingInstruction(elt, "xbrl-streamable-instance")
if pi is None:
break
else:
streamingAspects = dict(pi.attrib.copy())
if creationSoftwareComment is None:
creationSoftwareComment = precedingComment(elt)
if not elt.tag.startswith("{http://www.xbrl.org/"):
instInfoNumRootFacts += 1
if instInfoNumRootFacts % 1000 == 0:
modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
elif not foundInstance:
break
elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl":
creationSoftwareComment = precedingComment(elt)
if precedingProcessingInstruction(elt, "xbrl-streamable-instance") is not None:
modelXbrl.error("streamingExtensions:headerMisplaced",
_("Header is misplaced: %(error)s, must follow xbrli:xbrl element"),
modelObject=elt)
elif event == "end":
elt.clear()
numElts += 1
if numElts % 1000 == 0 and elt.getparent() is not None:
while elt.getprevious() is not None and elt.getparent() is not None:
del elt.getparent()[0]
if elt is not None:
elt.clear()

try:
for event, elt in instInfoContext:
if event == "start":
if elt.getparent() is not None:
if elt.getparent().tag == "{http://www.xbrl.org/2003/instance}xbrl":
if not foundInstance:
foundInstance = True
pi = precedingProcessingInstruction(elt, "xbrl-streamable-instance")
if pi is None:
break
else:
streamingAspects = dict(pi.attrib.copy())
if creationSoftwareComment is None:
creationSoftwareComment = precedingComment(elt)
if not elt.tag.startswith("{http://www.xbrl.org/"):
instInfoNumRootFacts += 1
if instInfoNumRootFacts % 1000 == 0:
modelXbrl.profileActivity("... streaming tree check", minTimeToShow=20.0)
elif not foundInstance:
break
elif elt.tag == "{http://www.xbrl.org/2003/instance}xbrl":
creationSoftwareComment = precedingComment(elt)
if precedingProcessingInstruction(elt, "xbrl-streamable-instance") is not None:
modelXbrl.error("streamingExtensions:headerMisplaced",
_("Header is misplaced: %(error)s, must follow xbrli:xbrl element"),
modelObject=elt)
elif event == "end":
elt.clear()
numElts += 1
if numElts % 1000 == 0 and elt.getparent() is not None:
while elt.getprevious() is not None and elt.getparent() is not None:
del elt.getparent()[0]
except etree.XMLSyntaxError as err:
modelXbrl.error("xmlSchema:syntax",
_("Unrecoverable error: %(error)s"),
error=err)
_file.close()
return err

_file.seek(0,io.SEEK_SET) # allow reparsing
if not foundInstance or streamingAspects is None:
del elt
Expand Down

0 comments on commit 789a9c5

Please sign in to comment.