Skip to content

Commit

Permalink
Add a safe_minidom_parse_string function.
Browse files Browse the repository at this point in the history
Adds a new utils.safe_minidom_parse_string function and
updates external API facing Nova modules to use it.
This ensures we have safe defaults on our incoming API XML parsing.

Internally safe_minidom_parse_string uses a ProtectedExpatParser
class to disable DTDs and entities from being parsed when using
minidom.

Fixes LP Bug #1100282.

Change-Id: Ib90d6379320ff1d007f8a661f7ddaa286ba6918e
  • Loading branch information
dprince committed Feb 19, 2013
1 parent 1b9b66b commit 5993324
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 24 deletions.
10 changes: 5 additions & 5 deletions nova/api/openstack/common.py
Expand Up @@ -21,7 +21,6 @@
import urlparse

import webob
from xml.dom import minidom

from nova.api.openstack import wsgi
from nova.api.openstack import xmlutil
Expand All @@ -32,6 +31,7 @@
from nova.openstack.common import cfg
from nova.openstack.common import log as logging
from nova import quota
from nova import utils

osapi_opts = [
cfg.IntOpt('osapi_max_limit',
Expand Down Expand Up @@ -356,15 +356,15 @@ def raise_http_conflict_for_instance_invalid_state(exc, action):

class MetadataDeserializer(wsgi.MetadataXMLDeserializer):
def deserialize(self, text):
dom = minidom.parseString(text)
dom = utils.safe_minidom_parse_string(text)
metadata_node = self.find_first_child_named(dom, "metadata")
metadata = self.extract_metadata(metadata_node)
return {'body': {'metadata': metadata}}


class MetaItemDeserializer(wsgi.MetadataXMLDeserializer):
def deserialize(self, text):
dom = minidom.parseString(text)
dom = utils.safe_minidom_parse_string(text)
metadata_item = self.extract_metadata(dom)
return {'body': {'meta': metadata_item}}

Expand All @@ -382,7 +382,7 @@ def extract_metadata(self, metadata_node):
return metadata

def _extract_metadata_container(self, datastring):
dom = minidom.parseString(datastring)
dom = utils.safe_minidom_parse_string(datastring)
metadata_node = self.find_first_child_named(dom, "metadata")
metadata = self.extract_metadata(metadata_node)
return {'body': {'metadata': metadata}}
Expand All @@ -394,7 +394,7 @@ def update_all(self, datastring):
return self._extract_metadata_container(datastring)

def update(self, datastring):
dom = minidom.parseString(datastring)
dom = utils.safe_minidom_parse_string(datastring)
metadata_item = self.extract_metadata(dom)
return {'body': {'meta': metadata_item}}

Expand Down
4 changes: 2 additions & 2 deletions nova/api/openstack/compute/contrib/cells.py
Expand Up @@ -16,7 +16,6 @@
# under the License.

"""The cells extension."""
from xml.dom import minidom
from xml.parsers import expat

from webob import exc
Expand All @@ -32,6 +31,7 @@
from nova.openstack.common import cfg
from nova.openstack.common import log as logging
from nova.openstack.common import timeutils
from nova import utils


LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -99,7 +99,7 @@ def _extract_cell(self, node):
def default(self, string):
"""Deserialize an xml-formatted cell create request."""
try:
node = minidom.parseString(string)
node = utils.safe_minidom_parse_string(string)
except expat.ExpatError:
msg = _("cannot understand XML")
raise exception.MalformedRequestBody(reason=msg)
Expand Down
4 changes: 2 additions & 2 deletions nova/api/openstack/compute/contrib/hosts.py
Expand Up @@ -16,7 +16,6 @@
"""The hosts admin extension."""

import webob.exc
from xml.dom import minidom
from xml.parsers import expat

from nova.api.openstack import extensions
Expand All @@ -25,6 +24,7 @@
from nova import compute
from nova import exception
from nova.openstack.common import log as logging
from nova import utils

LOG = logging.getLogger(__name__)
authorize = extensions.extension_authorizer('compute', 'hosts')
Expand Down Expand Up @@ -72,7 +72,7 @@ def construct(self):
class HostUpdateDeserializer(wsgi.XMLDeserializer):
def default(self, string):
try:
node = minidom.parseString(string)
node = utils.safe_minidom_parse_string(string)
except expat.ExpatError:
msg = _("cannot understand XML")
raise exception.MalformedRequestBody(reason=msg)
Expand Down
7 changes: 3 additions & 4 deletions nova/api/openstack/compute/contrib/security_groups.py
Expand Up @@ -16,8 +16,6 @@

"""The security groups extension."""

from xml.dom import minidom

import webob
from webob import exc

Expand All @@ -30,6 +28,7 @@
from nova import db
from nova import exception
from nova.openstack.common import log as logging
from nova import utils
from nova.virt import netutils

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -109,7 +108,7 @@ class SecurityGroupXMLDeserializer(wsgi.MetadataXMLDeserializer):
"""
def default(self, string):
"""Deserialize an xml-formatted security group create request."""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
security_group = {}
sg_node = self.find_first_child_named(dom,
'security_group')
Expand All @@ -130,7 +129,7 @@ class SecurityGroupRulesXMLDeserializer(wsgi.MetadataXMLDeserializer):

def default(self, string):
"""Deserialize an xml-formatted security group create request."""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
security_group_rule = self._extract_security_group_rule(dom)
return {'body': {'security_group_rule': security_group_rule}}

Expand Down
3 changes: 1 addition & 2 deletions nova/api/openstack/compute/contrib/volumes.py
Expand Up @@ -17,7 +17,6 @@

import webob
from webob import exc
from xml.dom import minidom

from nova.api.openstack import common
from nova.api.openstack import extensions
Expand Down Expand Up @@ -155,7 +154,7 @@ class CreateDeserializer(CommonDeserializer):

def default(self, string):
"""Deserialize an xml-formatted volume create request."""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
vol = self._extract_volume(dom)
return {'body': {'volume': vol}}

Expand Down
5 changes: 2 additions & 3 deletions nova/api/openstack/compute/servers.py
Expand Up @@ -21,7 +21,6 @@

import webob
from webob import exc
from xml.dom import minidom

from nova.api.openstack import common
from nova.api.openstack.compute import ips
Expand Down Expand Up @@ -312,7 +311,7 @@ class ActionDeserializer(CommonDeserializer):
"""

def default(self, string):
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
action_node = dom.childNodes[0]
action_name = action_node.tagName

Expand Down Expand Up @@ -419,7 +418,7 @@ class CreateDeserializer(CommonDeserializer):

def default(self, string):
"""Deserialize an xml-formatted server create request."""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
server = self._extract_server(dom)
return {'body': {'server': server}}

Expand Down
13 changes: 7 additions & 6 deletions nova/api/openstack/wsgi.py
Expand Up @@ -27,6 +27,7 @@
from nova import exception
from nova.openstack.common import jsonutils
from nova.openstack.common import log as logging
from nova import utils
from nova import wsgi


Expand Down Expand Up @@ -217,7 +218,7 @@ def _from_xml(self, datastring):
plurals = set(self.metadata.get('plurals', {}))

try:
node = minidom.parseString(datastring).childNodes[0]
node = utils.safe_minidom_parse_string(datastring).childNodes[0]
return {node.nodeName: self._from_xml_node(node, plurals)}
except expat.ExpatError:
msg = _("cannot understand XML")
Expand Down Expand Up @@ -268,11 +269,11 @@ def find_children_named(self, parent, name):

def extract_text(self, node):
"""Get the text field contained by the given node."""
if len(node.childNodes) == 1:
child = node.childNodes[0]
ret_val = ""
for child in node.childNodes:
if child.nodeType == child.TEXT_NODE:
return child.nodeValue
return ""
ret_val += child.nodeValue
return ret_val

def extract_elements(self, node):
"""Get only Element type childs from node."""
Expand Down Expand Up @@ -633,7 +634,7 @@ def action_peek_json(body):
def action_peek_xml(body):
"""Determine action to invoke."""

dom = minidom.parseString(body)
dom = utils.safe_minidom_parse_string(body)
action_node = dom.childNodes[0]

return action_node.tagName
Expand Down
33 changes: 33 additions & 0 deletions nova/tests/test_utils.py
Expand Up @@ -449,6 +449,39 @@ def fake_execute(*args, **kwargs):
self.assertEqual(fake_execute.uid, 2)
self.assertEqual(fake_execute.uid, os.getuid())

def test_safe_parse_xml(self):

normal_body = ("""
<?xml version="1.0" ?><foo>
<bar>
<v1>hey</v1>
<v2>there</v2>
</bar>
</foo>""").strip()

def killer_body():
return (("""<!DOCTYPE x [
<!ENTITY a "%(a)s">
<!ENTITY b "%(b)s">
<!ENTITY c "%(c)s">]>
<foo>
<bar>
<v1>%(d)s</v1>
</bar>
</foo>""") % {
'a': 'A' * 10,
'b': '&a;' * 10,
'c': '&b;' * 10,
'd': '&c;' * 9999,
}).strip()

dom = utils.safe_minidom_parse_string(normal_body)
self.assertEqual(normal_body, str(dom.toxml()))

self.assertRaises(ValueError,
utils.safe_minidom_parse_string,
killer_body())

def test_xhtml_escape(self):
self.assertEqual('&quot;foo&quot;', utils.xhtml_escape('"foo"'))
self.assertEqual('&apos;foo&apos;', utils.xhtml_escape("'foo'"))
Expand Down
44 changes: 44 additions & 0 deletions nova/utils.py
Expand Up @@ -36,6 +36,10 @@
import sys
import tempfile
import time
from xml.dom import minidom
from xml.parsers import expat
from xml import sax
from xml.sax import expatreader
from xml.sax import saxutils

from eventlet import event
Expand Down Expand Up @@ -652,6 +656,46 @@ def _inner():
return self.done


class ProtectedExpatParser(expatreader.ExpatParser):
"""An expat parser which disables DTD's and entities by default."""

def __init__(self, forbid_dtd=True, forbid_entities=True,
*args, **kwargs):
# Python 2.x old style class
expatreader.ExpatParser.__init__(self, *args, **kwargs)
self.forbid_dtd = forbid_dtd
self.forbid_entities = forbid_entities

def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
raise ValueError("Inline DTD forbidden")

def entity_decl(self, entityName, is_parameter_entity, value, base,
systemId, publicId, notationName):
raise ValueError("<!ENTITY> forbidden")

def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
# expat 1.2
raise ValueError("<!ENTITY> forbidden")

def reset(self):
expatreader.ExpatParser.reset(self)
if self.forbid_dtd:
self._parser.StartDoctypeDeclHandler = self.start_doctype_decl
if self.forbid_entities:
self._parser.EntityDeclHandler = self.entity_decl
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl


def safe_minidom_parse_string(xml_string):
"""Parse an XML string using minidom safely.
"""
try:
return minidom.parseString(xml_string, parser=ProtectedExpatParser())
except sax.SAXParseException as se:
raise expat.ExpatError()


def xhtml_escape(value):
"""Escapes a string so it is valid within XML or XHTML.
Expand Down

0 comments on commit 5993324

Please sign in to comment.