-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use defusedxml
for Parsing XML
#2
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
|
||
|
||
from __future__ import with_statement | ||
import defusedxml.ElementTree | ||
|
||
|
||
|
||
|
@@ -30,21 +31,20 @@ | |
'get_storage_class', | ||
] | ||
|
||
import logging | ||
import StringIO | ||
import urllib | ||
import os | ||
import itertools | ||
import types | ||
import xml.etree.cElementTree as ET | ||
from . import api_utils | ||
from . import common | ||
from . import errors | ||
from . import storage_api | ||
|
||
|
||
|
||
def open(filename, | ||
Check warning on line 47 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualityredefined-builtin
Check warning on line 47 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualitymissing-type-doc
|
||
mode='r', | ||
content_type=None, | ||
options=None, | ||
|
@@ -92,7 +92,7 @@ | |
account_id=_account_id) | ||
filename = api_utils._quote_filename(filename) | ||
|
||
if mode == 'w': | ||
common.validate_options(options) | ||
return storage_api.StreamingBuffer(api, filename, content_type, options) | ||
elif mode == 'r': | ||
|
@@ -104,10 +104,10 @@ | |
buffer_size=read_buffer_size, | ||
offset=offset) | ||
else: | ||
raise ValueError('Invalid mode %s.' % mode) | ||
|
||
|
||
def delete(filename, retry_params=None, _account_id=None): | ||
Check warning on line 110 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualitymissing-type-doc
|
||
"""Delete a Google Cloud Storage file. | ||
|
||
Args: | ||
|
@@ -128,7 +128,7 @@ | |
body=content) | ||
|
||
|
||
def get_location(bucket, retry_params=None, _account_id=None): | ||
Check warning on line 131 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualitymissing-type-doc
|
||
"""Returns the location for the given bucket. | ||
|
||
https://cloud.google.com/storage/docs/bucket-locations | ||
|
@@ -154,7 +154,7 @@ | |
_account_id=_account_id) | ||
|
||
|
||
def get_storage_class(bucket, retry_params=None, _account_id=None): | ||
Check warning on line 157 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualitymissing-type-doc
|
||
"""Returns the storage class for the given bucket. | ||
|
||
https://cloud.google.com/storage/docs/storage-classes | ||
|
@@ -180,7 +180,7 @@ | |
_account_id=_account_id) | ||
|
||
|
||
def _get_bucket_attribute(bucket, | ||
Check warning on line 183 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualitymissing-type-doc
|
||
query_param, | ||
xml_response_tag, | ||
retry_params=None, | ||
|
@@ -206,17 +206,17 @@ | |
api = storage_api._get_storage_api(retry_params=retry_params, | ||
account_id=_account_id) | ||
common.validate_bucket_path(bucket) | ||
status, headers, content = api.get_bucket('%s?%s' % (bucket, query_param)) | ||
|
||
errors.check_status(status, [200], bucket, resp_headers=headers, body=content) | ||
|
||
root = ET.fromstring(content) | ||
root = defusedxml.ElementTree.fromstring(content) | ||
if root.tag == xml_response_tag and root.text: | ||
return root.text | ||
return None | ||
|
||
|
||
def stat(filename, retry_params=None, _account_id=None): | ||
Check warning on line 219 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualitymissing-type-doc
|
||
"""Get GCSFileStat of a Google Cloud storage file. | ||
|
||
Args: | ||
|
@@ -250,7 +250,7 @@ | |
return file_stat | ||
|
||
|
||
def copy2(src, dst, metadata=None, retry_params=None): | ||
"""Copy the file content from src to dst. | ||
|
||
Args: | ||
|
@@ -282,7 +282,7 @@ | |
errors.check_status(status, [200], src, metadata, resp_headers, body=content) | ||
|
||
|
||
def listbucket(path_prefix, marker=None, prefix=None, max_keys=None, | ||
Check warning on line 285 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualitymissing-type-doc
|
||
delimiter=None, retry_params=None, _account_id=None): | ||
"""Returns a GCSFileStat iterator over a bucket. | ||
|
||
|
@@ -369,7 +369,7 @@ | |
|
||
return _Bucket(api, bucket, options) | ||
|
||
def compose(list_of_files, destination_file, files_metadata=None, | ||
Check warning on line 372 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualitymissing-type-doc
|
||
content_type=None, retry_params=None, _account_id=None): | ||
"""Runs the GCS Compose on the given files. | ||
|
||
|
@@ -413,7 +413,7 @@ | |
compose_object(file_list, destination_file, content_type) | ||
|
||
|
||
def _file_exists(destination): | ||
"""Checks if a file exists. | ||
|
||
Tries to open the file. | ||
|
@@ -432,7 +432,7 @@ | |
return False | ||
|
||
|
||
def _validate_compose_list(destination_file, file_list, | ||
Check warning on line 435 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualitymissing-param-doc
Check warning on line 435 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualitymissing-type-doc
Check warning on line 435 in cloudstorage/cloudstorage_api.py Codeac.io / Codeac Code Qualitydiffering-param-doc
|
||
files_metadata=None, number_of_files=32): | ||
"""Validates the file_list and merges the file_list, files_metadata. | ||
|
||
|
@@ -450,37 +450,37 @@ | |
common.validate_file_path(destination_file) | ||
bucket = destination_file[0:(destination_file.index('/', 1) + 1)] | ||
try: | ||
if isinstance(file_list, types.StringTypes): | ||
raise TypeError | ||
list_len = len(file_list) | ||
except TypeError: | ||
raise TypeError('file_list must be a list') | ||
|
||
if list_len > number_of_files: | ||
raise ValueError( | ||
'Compose attempted to create composite with too many' | ||
'(%i) components; limit is (%i).' % (list_len, number_of_files)) | ||
if list_len <= 1: | ||
raise ValueError('Compose operation requires at' | ||
' least two components; %i provided.' % list_len) | ||
|
||
if files_metadata is None: | ||
files_metadata = [] | ||
elif len(files_metadata) > list_len: | ||
raise ValueError('files_metadata contains more entries(%i)' | ||
' than file_list(%i)' | ||
% (len(files_metadata), list_len)) | ||
list_of_files = [] | ||
for source_file, meta_data in itertools.izip_longest(file_list, | ||
files_metadata): | ||
if not isinstance(source_file, str): | ||
raise TypeError('Each item of file_list must be a string') | ||
if source_file.startswith('/'): | ||
logging.warn('Detected a "/" at the start of the file, ' | ||
'Unless the file name contains a "/" it ' | ||
' may cause files to be misread') | ||
if source_file.startswith(bucket): | ||
logging.warn('Detected bucket name at the start of the file, ' | ||
'must not specify the bucket when listing file_names.' | ||
' May cause files to be misread') | ||
common.validate_file_path(bucket + source_file) | ||
|
@@ -495,7 +495,7 @@ | |
return list_of_files, bucket | ||
|
||
|
||
class _Bucket(object): | ||
"""A wrapper for a GCS bucket as the return value of listbucket.""" | ||
|
||
def __init__(self, api, path, options): | ||
|
@@ -513,7 +513,7 @@ | |
self._path = path | ||
self._options = options.copy() | ||
self._get_bucket_fut = self._api.get_bucket_async( | ||
self._path + '?' + urllib.urlencode(self._options)) | ||
self._last_yield = None | ||
self._new_max_keys = self._options.get('max-keys') | ||
|
||
|
@@ -551,27 +551,27 @@ | |
else: | ||
self._get_bucket_fut = None | ||
|
||
root = ET.fromstring(content) | ||
root = defusedxml.ElementTree.fromstring(content) | ||
dirs = self._next_dir_gen(root) | ||
files = self._next_file_gen(root) | ||
next_file = files.next() | ||
Comment on lines
+554
to
557
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The method |
||
next_dir = dirs.next() | ||
|
||
while ((max_keys is None or total < max_keys) and | ||
not (next_file is None and next_dir is None)): | ||
total += 1 | ||
if next_file is None: | ||
self._last_yield = next_dir | ||
next_dir = dirs.next() | ||
elif next_dir is None: | ||
self._last_yield = next_file | ||
next_file = files.next() | ||
elif next_dir < next_file: | ||
self._last_yield = next_dir | ||
next_dir = dirs.next() | ||
elif next_file < next_dir: | ||
self._last_yield = next_file | ||
next_file = files.next() | ||
else: | ||
logging.error( | ||
'Should never reach. next file is %r. next dir is %r.', | ||
|
@@ -580,7 +580,7 @@ | |
self._new_max_keys -= 1 | ||
yield self._last_yield | ||
|
||
def _next_file_gen(self, root): | ||
"""Generator for next file element in the document. | ||
|
||
Args: | ||
|
@@ -605,7 +605,7 @@ | |
e.clear() | ||
yield None | ||
|
||
def _next_dir_gen(self, root): | ||
"""Generator for next directory element in the document. | ||
|
||
Args: | ||
|
@@ -621,7 +621,7 @@ | |
e.clear() | ||
yield None | ||
|
||
def _should_get_another_batch(self, content): | ||
"""Whether to issue another GET bucket call. | ||
|
||
Args: | ||
|
@@ -648,7 +648,7 @@ | |
self._options['marker'] = next_marker | ||
return True | ||
|
||
def _find_elements(self, result, elements): | ||
"""Find interesting elements from XML. | ||
|
||
This function tries to only look for specified elements | ||
|
@@ -664,7 +664,7 @@ | |
""" | ||
element_mapping = {} | ||
result = StringIO.StringIO(result) | ||
for _, e in ET.iterparse(result, events=('end',)): | ||
for _, e in defusedxml.ElementTree.iterparse(result, events=('end',)): | ||
if not elements: | ||
break | ||
if e.tag in elements: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code is using
defusedxml.ElementTree.fromstring(content)
to parse XML content without any apparent validation of the content before parsing. This could potentially lead to XML External Entity (XXE) attacks if the content includes malicious external entities. Althoughdefusedxml
is designed to mitigate such attacks, it is still good practice to validate or sanitize the input before parsing to ensure that the content is trusted. Recommended solution is to implement proper content validation before parsing the XML.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Micro-Learning Topic: External entity injection (Detected by phrase)
Matched on "XXE"
What is this? (2min video)
An XML External Entity attack is a type of attack against an application that parses XML input. This attack occurs when XML input containing a reference to an external entity is processed by a weakly configured XML parser. This attack may lead to the disclosure of confidential data, denial of service, server-side request forgery, port scanning from the perspective of the machine where the parser is located, and other system impacts.
Try a challenge in Secure Code Warrior
Helpful references