Skip to content

Commit

Permalink
Add a safe_minidom_parse_string function.
Browse files Browse the repository at this point in the history
Adds a new utils.safe_minidom_parse_string function and
updates external API facing Nova modules to use it.
This ensures we have safe defaults on our incoming API XML parsing.

Internally safe_minidom_parse_string uses a ProtectedExpatParser
class to disable DTDs and entities from being parsed when using
minidom.

Fixes LP Bug #1100282 for Folsom.

Change-Id: I6a4051b5e66f3ce5a330b2589c42e6e9e5b9268e
  • Loading branch information
dprince committed Feb 4, 2013
1 parent e5d0f4b commit 2ae74f8
Show file tree
Hide file tree
Showing 10 changed files with 100 additions and 26 deletions.
10 changes: 5 additions & 5 deletions nova/api/openstack/common.py
Expand Up @@ -21,7 +21,6 @@
import urlparse

import webob
from xml.dom import minidom

from nova.api.openstack import wsgi
from nova.api.openstack import xmlutil
Expand All @@ -32,6 +31,7 @@
from nova import flags
from nova.openstack.common import log as logging
from nova import quota
from nova import utils


LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -341,15 +341,15 @@ def raise_http_conflict_for_instance_invalid_state(exc, action):

class MetadataDeserializer(wsgi.MetadataXMLDeserializer):
def deserialize(self, text):
dom = minidom.parseString(text)
dom = utils.safe_minidom_parse_string(text)
metadata_node = self.find_first_child_named(dom, "metadata")
metadata = self.extract_metadata(metadata_node)
return {'body': {'metadata': metadata}}


class MetaItemDeserializer(wsgi.MetadataXMLDeserializer):
def deserialize(self, text):
dom = minidom.parseString(text)
dom = utils.safe_minidom_parse_string(text)
metadata_item = self.extract_metadata(dom)
return {'body': {'meta': metadata_item}}

Expand All @@ -367,7 +367,7 @@ def extract_metadata(self, metadata_node):
return metadata

def _extract_metadata_container(self, datastring):
dom = minidom.parseString(datastring)
dom = utils.safe_minidom_parse_string(datastring)
metadata_node = self.find_first_child_named(dom, "metadata")
metadata = self.extract_metadata(metadata_node)
return {'body': {'metadata': metadata}}
Expand All @@ -379,7 +379,7 @@ def update_all(self, datastring):
return self._extract_metadata_container(datastring)

def update(self, datastring):
dom = minidom.parseString(datastring)
dom = utils.safe_minidom_parse_string(datastring)
metadata_item = self.extract_metadata(dom)
return {'body': {'meta': metadata_item}}

Expand Down
4 changes: 2 additions & 2 deletions nova/api/openstack/compute/contrib/hosts.py
Expand Up @@ -16,7 +16,6 @@
"""The hosts admin extension."""

import webob.exc
from xml.dom import minidom
from xml.parsers import expat

from nova.api.openstack import extensions
Expand All @@ -27,6 +26,7 @@
from nova import exception
from nova import flags
from nova.openstack.common import log as logging
from nova import utils


LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -80,7 +80,7 @@ def construct(self):
class HostDeserializer(wsgi.XMLDeserializer):
def default(self, string):
try:
node = minidom.parseString(string)
node = utils.safe_minidom_parse_string(string)
except expat.ExpatError:
msg = _("cannot understand XML")
raise exception.MalformedRequestBody(reason=msg)
Expand Down
7 changes: 3 additions & 4 deletions nova/api/openstack/compute/contrib/security_groups.py
Expand Up @@ -16,8 +16,6 @@

"""The security groups extension."""

from xml.dom import minidom

import webob
from webob import exc

Expand All @@ -30,6 +28,7 @@
from nova import exception
from nova import flags
from nova.openstack.common import log as logging
from nova import utils


LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -110,7 +109,7 @@ class SecurityGroupXMLDeserializer(wsgi.MetadataXMLDeserializer):
"""
def default(self, string):
"""Deserialize an xml-formatted security group create request"""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
security_group = {}
sg_node = self.find_first_child_named(dom,
'security_group')
Expand All @@ -131,7 +130,7 @@ class SecurityGroupRulesXMLDeserializer(wsgi.MetadataXMLDeserializer):

def default(self, string):
"""Deserialize an xml-formatted security group create request"""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
security_group_rule = self._extract_security_group_rule(dom)
return {'body': {'security_group_rule': security_group_rule}}

Expand Down
3 changes: 1 addition & 2 deletions nova/api/openstack/compute/contrib/volumes.py
Expand Up @@ -17,7 +17,6 @@

import webob
from webob import exc
from xml.dom import minidom

from nova.api.openstack import common
from nova.api.openstack import extensions
Expand Down Expand Up @@ -155,7 +154,7 @@ class CreateDeserializer(CommonDeserializer):

def default(self, string):
"""Deserialize an xml-formatted volume create request."""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
volume = self._extract_volume(dom)
return {'body': {'volume': volume}}

Expand Down
5 changes: 2 additions & 3 deletions nova/api/openstack/compute/servers.py
Expand Up @@ -21,7 +21,6 @@

import webob
from webob import exc
from xml.dom import minidom

from nova.api.openstack import common
from nova.api.openstack.compute import ips
Expand Down Expand Up @@ -297,7 +296,7 @@ class ActionDeserializer(CommonDeserializer):
"""

def default(self, string):
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
action_node = dom.childNodes[0]
action_name = action_node.tagName

Expand Down Expand Up @@ -404,7 +403,7 @@ class CreateDeserializer(CommonDeserializer):

def default(self, string):
"""Deserialize an xml-formatted server create request."""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
server = self._extract_server(dom)
return {'body': {'server': server}}

Expand Down
4 changes: 2 additions & 2 deletions nova/api/openstack/volume/contrib/volume_actions.py
Expand Up @@ -13,7 +13,6 @@
# under the License.

import webob
from xml.dom import minidom

from nova.api.openstack import extensions
from nova.api.openstack import wsgi
Expand All @@ -22,6 +21,7 @@
from nova import flags
from nova.openstack.common import log as logging
from nova.openstack.common.rpc import common as rpc_common
from nova import utils
from nova import volume


Expand Down Expand Up @@ -54,7 +54,7 @@ def construct(self):
class VolumeToImageDeserializer(wsgi.XMLDeserializer):
"""Deserializer to handle xml-formatted requests"""
def default(self, string):
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
action_node = dom.childNodes[0]
action_name = action_node.tagName

Expand Down
3 changes: 1 addition & 2 deletions nova/api/openstack/volume/volumes.py
Expand Up @@ -17,7 +17,6 @@

import webob
from webob import exc
from xml.dom import minidom

from nova.api.openstack import common
from nova.api.openstack import wsgi
Expand Down Expand Up @@ -191,7 +190,7 @@ class CreateDeserializer(CommonDeserializer):

def default(self, string):
"""Deserialize an xml-formatted volume create request."""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
volume = self._extract_volume(dom)
return {'body': {'volume': volume}}

Expand Down
13 changes: 7 additions & 6 deletions nova/api/openstack/wsgi.py
Expand Up @@ -27,6 +27,7 @@
from nova import exception
from nova.openstack.common import jsonutils
from nova.openstack.common import log as logging
from nova import utils
from nova import wsgi


Expand Down Expand Up @@ -217,7 +218,7 @@ def _from_xml(self, datastring):
plurals = set(self.metadata.get('plurals', {}))

try:
node = minidom.parseString(datastring).childNodes[0]
node = utils.safe_minidom_parse_string(datastring).childNodes[0]
return {node.nodeName: self._from_xml_node(node, plurals)}
except expat.ExpatError:
msg = _("cannot understand XML")
Expand Down Expand Up @@ -268,11 +269,11 @@ def find_children_named(self, parent, name):

def extract_text(self, node):
"""Get the text field contained by the given node"""
if len(node.childNodes) == 1:
child = node.childNodes[0]
ret_val = ""
for child in node.childNodes:
if child.nodeType == child.TEXT_NODE:
return child.nodeValue
return ""
ret_val += child.nodeValue
return ret_val

def extract_elements(self, node):
"""Get only Element type childs from node"""
Expand Down Expand Up @@ -631,7 +632,7 @@ def action_peek_json(body):
def action_peek_xml(body):
"""Determine action to invoke."""

dom = minidom.parseString(body)
dom = utils.safe_minidom_parse_string(body)
action_node = dom.childNodes[0]

return action_node.tagName
Expand Down
33 changes: 33 additions & 0 deletions nova/tests/test_utils.py
Expand Up @@ -457,6 +457,39 @@ def test_service_is_up(self):
result = utils.service_is_up(service)
self.assertFalse(result)

def test_safe_parse_xml(self):

normal_body = ("""
<?xml version="1.0" ?><foo>
<bar>
<v1>hey</v1>
<v2>there</v2>
</bar>
</foo>""").strip()

def killer_body():
return (("""<!DOCTYPE x [
<!ENTITY a "%(a)s">
<!ENTITY b "%(b)s">
<!ENTITY c "%(c)s">]>
<foo>
<bar>
<v1>%(d)s</v1>
</bar>
</foo>""") % {
'a': 'A' * 10,
'b': '&a;' * 10,
'c': '&b;' * 10,
'd': '&c;' * 9999,
}).strip()

dom = utils.safe_minidom_parse_string(normal_body)
self.assertEqual(normal_body, str(dom.toxml()))

self.assertRaises(ValueError,
utils.safe_minidom_parse_string,
killer_body())

def test_xhtml_escape(self):
self.assertEqual('&quot;foo&quot;', utils.xhtml_escape('"foo"'))
self.assertEqual('&apos;foo&apos;', utils.xhtml_escape("'foo'"))
Expand Down
44 changes: 44 additions & 0 deletions nova/utils.py
Expand Up @@ -39,6 +39,10 @@
import time
import uuid
import weakref
from xml.dom import minidom
from xml.parsers import expat
from xml import sax
from xml.sax import expatreader
from xml.sax import saxutils

from eventlet import event
Expand Down Expand Up @@ -567,6 +571,46 @@ def wait(self):
return self.done.wait()


class ProtectedExpatParser(expatreader.ExpatParser):
"""An expat parser which disables DTD's and entities by default."""

def __init__(self, forbid_dtd=True, forbid_entities=True,
*args, **kwargs):
# Python 2.x old style class
expatreader.ExpatParser.__init__(self, *args, **kwargs)
self.forbid_dtd = forbid_dtd
self.forbid_entities = forbid_entities

def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
raise ValueError("Inline DTD forbidden")

def entity_decl(self, entityName, is_parameter_entity, value, base,
systemId, publicId, notationName):
raise ValueError("<!ENTITY> forbidden")

def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
# expat 1.2
raise ValueError("<!ENTITY> forbidden")

def reset(self):
expatreader.ExpatParser.reset(self)
if self.forbid_dtd:
self._parser.StartDoctypeDeclHandler = self.start_doctype_decl
if self.forbid_entities:
self._parser.EntityDeclHandler = self.entity_decl
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl


def safe_minidom_parse_string(xml_string):
"""Parse an XML string using minidom safely.
"""
try:
return minidom.parseString(xml_string, parser=ProtectedExpatParser())
except sax.SAXParseException as se:
raise expat.ExpatError()


def xhtml_escape(value):
"""Escapes a string so it is valid within XML or XHTML.
Expand Down

0 comments on commit 2ae74f8

Please sign in to comment.