Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use defusedxml for Parsing XML #2

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cloudstorage/cloudstorage_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@


from __future__ import with_statement
import defusedxml.ElementTree



Expand All @@ -30,21 +31,20 @@
'get_storage_class',
]

import logging

Check warning on line 34 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

wrong-import-order

standard import "import logging" should be placed before "import defusedxml.ElementTree"
import StringIO
import urllib

Check warning on line 36 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

wrong-import-order

standard import "import urllib" should be placed before "import defusedxml.ElementTree"
import os

Check warning on line 37 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

wrong-import-order

standard import "import os" should be placed before "import defusedxml.ElementTree"
import itertools

Check warning on line 38 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

wrong-import-order

standard import "import itertools" should be placed before "import defusedxml.ElementTree"
import types

Check warning on line 39 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

wrong-import-order

standard import "import types" should be placed before "import defusedxml.ElementTree"
import xml.etree.cElementTree as ET
from . import api_utils

Check failure on line 40 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-name-in-module

No name 'api_utils' in module 'cloudstorage'
from . import common
from . import errors
from . import storage_api

Check failure on line 43 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-name-in-module

No name 'storage_api' in module 'cloudstorage'



def open(filename,

Check warning on line 47 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

redefined-builtin

Redefining built-in 'open'

Check warning on line 47 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"content_type, filename, mode, offset, options, read_buffer_size, retry_params" missing in parameter type documentation

Check warning on line 47 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

useless-param-doc

"_account_id" useless ignored parameter documentation
mode='r',
content_type=None,
options=None,
Expand Down Expand Up @@ -92,7 +92,7 @@
account_id=_account_id)
filename = api_utils._quote_filename(filename)

if mode == 'w':

Check warning on line 95 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-else-return

Unnecessary "elif" after "return", remove the leading "el" from "elif"
common.validate_options(options)
return storage_api.StreamingBuffer(api, filename, content_type, options)
elif mode == 'r':
Expand All @@ -104,10 +104,10 @@
buffer_size=read_buffer_size,
offset=offset)
else:
raise ValueError('Invalid mode %s.' % mode)

Check warning on line 107 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

consider-using-f-string

Formatting a regular string which could be a f-string


def delete(filename, retry_params=None, _account_id=None):

Check warning on line 110 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"filename, retry_params" missing in parameter type documentation

Check warning on line 110 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

useless-param-doc

"_account_id" useless ignored parameter documentation
"""Delete a Google Cloud Storage file.

Args:
Expand All @@ -128,7 +128,7 @@
body=content)


def get_location(bucket, retry_params=None, _account_id=None):

Check warning on line 131 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"bucket, retry_params" missing in parameter type documentation

Check warning on line 131 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

useless-param-doc

"_account_id" useless ignored parameter documentation
"""Returns the location for the given bucket.

https://cloud.google.com/storage/docs/bucket-locations
Expand All @@ -154,7 +154,7 @@
_account_id=_account_id)


def get_storage_class(bucket, retry_params=None, _account_id=None):

Check warning on line 157 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"bucket, retry_params" missing in parameter type documentation

Check warning on line 157 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

useless-param-doc

"_account_id" useless ignored parameter documentation
"""Returns the storage class for the given bucket.

https://cloud.google.com/storage/docs/storage-classes
Expand All @@ -180,7 +180,7 @@
_account_id=_account_id)


def _get_bucket_attribute(bucket,

Check warning on line 183 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"bucket, query_param, retry_params, xml_response_tag" missing in parameter type documentation

Check warning on line 183 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

useless-param-doc

"_account_id" useless ignored parameter documentation
query_param,
xml_response_tag,
retry_params=None,
Expand All @@ -206,17 +206,17 @@
api = storage_api._get_storage_api(retry_params=retry_params,
account_id=_account_id)
common.validate_bucket_path(bucket)
status, headers, content = api.get_bucket('%s?%s' % (bucket, query_param))

Check warning on line 209 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

consider-using-f-string

Formatting a regular string which could be a f-string

errors.check_status(status, [200], bucket, resp_headers=headers, body=content)

root = ET.fromstring(content)
root = defusedxml.ElementTree.fromstring(content)
if root.tag == xml_response_tag and root.text:
return root.text
Comment on lines +213 to 215

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code is using defusedxml.ElementTree.fromstring(content) to parse XML content without any apparent validation of the content before parsing. This could potentially lead to XML External Entity (XXE) attacks if the content includes malicious external entities. Although defusedxml is designed to mitigate such attacks, it is still good practice to validate or sanitize the input before parsing to ensure that the content is trusted. Recommended solution is to implement proper content validation before parsing the XML.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Micro-Learning Topic: External entity injection (Detected by phrase)

Matched on "XXE"

What is this? (2min video)

An XML External Entity attack is a type of attack against an application that parses XML input. This attack occurs when XML input containing a reference to an external entity is processed by a weakly configured XML parser. This attack may lead to the disclosure of confidential data, denial of service, server-side request forgery, port scanning from the perspective of the machine where the parser is located, and other system impacts.

Try a challenge in Secure Code Warrior

Helpful references

return None


def stat(filename, retry_params=None, _account_id=None):

Check warning on line 219 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"filename, retry_params" missing in parameter type documentation

Check warning on line 219 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

useless-param-doc

"_account_id" useless ignored parameter documentation
"""Get GCSFileStat of a Google Cloud storage file.

Args:
Expand Down Expand Up @@ -250,7 +250,7 @@
return file_stat


def copy2(src, dst, metadata=None, retry_params=None):

Check warning on line 253 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"dst, metadata, retry_params, src" missing in parameter type documentation
"""Copy the file content from src to dst.

Args:
Expand Down Expand Up @@ -282,7 +282,7 @@
errors.check_status(status, [200], src, metadata, resp_headers, body=content)


def listbucket(path_prefix, marker=None, prefix=None, max_keys=None,

Check warning on line 285 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"delimiter, marker, max_keys, path_prefix, prefix, retry_params" missing in parameter type documentation

Check warning on line 285 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

useless-param-doc

"_account_id" useless ignored parameter documentation
delimiter=None, retry_params=None, _account_id=None):
"""Returns a GCSFileStat iterator over a bucket.

Expand Down Expand Up @@ -369,7 +369,7 @@

return _Bucket(api, bucket, options)

def compose(list_of_files, destination_file, files_metadata=None,

Check warning on line 372 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"content_type, destination_file, files_metadata, list_of_files, retry_params" missing in parameter type documentation

Check warning on line 372 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

useless-param-doc

"_account_id" useless ignored parameter documentation
content_type=None, retry_params=None, _account_id=None):
"""Runs the GCS Compose on the given files.

Expand Down Expand Up @@ -413,7 +413,7 @@
compose_object(file_list, destination_file, content_type)


def _file_exists(destination):

Check warning on line 416 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"destination" missing in parameter type documentation
"""Checks if a file exists.

Tries to open the file.
Expand All @@ -432,7 +432,7 @@
return False


def _validate_compose_list(destination_file, file_list,

Check warning on line 435 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-param-doc

"destination_file" missing in parameter documentation

Check warning on line 435 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"destination_file, file_list, files_metadata, number_of_files" missing in parameter type documentation

Check warning on line 435 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

differing-param-doc

"destination" differing in parameter documentation

Check warning on line 435 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

too-complex

'_validate_compose_list' is too complex. The McCabe rating is 13
files_metadata=None, number_of_files=32):
"""Validates the file_list and merges the file_list, files_metadata.

Expand All @@ -450,37 +450,37 @@
common.validate_file_path(destination_file)
bucket = destination_file[0:(destination_file.index('/', 1) + 1)]
try:
if isinstance(file_list, types.StringTypes):

Check failure on line 453 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-member

Module 'types' has no 'StringTypes' member
raise TypeError
list_len = len(file_list)
except TypeError:
raise TypeError('file_list must be a list')

Check warning on line 457 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

raise-missing-from

Consider explicitly re-raising using 'except TypeError as exc' and 'raise TypeError('file_list must be a list') from exc'

if list_len > number_of_files:
raise ValueError(
'Compose attempted to create composite with too many'

Check warning on line 461 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

consider-using-f-string

Formatting a regular string which could be a f-string
'(%i) components; limit is (%i).' % (list_len, number_of_files))
if list_len <= 1:
raise ValueError('Compose operation requires at'

Check warning on line 464 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

consider-using-f-string

Formatting a regular string which could be a f-string
' least two components; %i provided.' % list_len)

if files_metadata is None:
files_metadata = []
elif len(files_metadata) > list_len:
raise ValueError('files_metadata contains more entries(%i)'

Check warning on line 470 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

consider-using-f-string

Formatting a regular string which could be a f-string
' than file_list(%i)'
% (len(files_metadata), list_len))
list_of_files = []
for source_file, meta_data in itertools.izip_longest(file_list,

Check failure on line 474 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-member

Module 'itertools' has no 'izip_longest' member; maybe 'zip_longest'?
files_metadata):
if not isinstance(source_file, str):
raise TypeError('Each item of file_list must be a string')
if source_file.startswith('/'):
logging.warn('Detected a "/" at the start of the file, '

Check warning on line 479 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

deprecated-method

Using deprecated method warn()
'Unless the file name contains a "/" it '
' may cause files to be misread')
if source_file.startswith(bucket):
logging.warn('Detected bucket name at the start of the file, '

Check warning on line 483 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

deprecated-method

Using deprecated method warn()
'must not specify the bucket when listing file_names.'
' May cause files to be misread')
common.validate_file_path(bucket + source_file)
Expand All @@ -495,7 +495,7 @@
return list_of_files, bucket


class _Bucket(object):

Check warning on line 498 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

useless-object-inheritance

Class '_Bucket' inherits from object, can be safely removed from bases in python3
"""A wrapper for a GCS bucket as the return value of listbucket."""

def __init__(self, api, path, options):
Expand All @@ -513,7 +513,7 @@
self._path = path
self._options = options.copy()
self._get_bucket_fut = self._api.get_bucket_async(
self._path + '?' + urllib.urlencode(self._options))

Check failure on line 516 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-member

Module 'urllib' has no 'urlencode' member
self._last_yield = None
self._new_max_keys = self._options.get('max-keys')

Expand Down Expand Up @@ -551,27 +551,27 @@
else:
self._get_bucket_fut = None

root = ET.fromstring(content)
root = defusedxml.ElementTree.fromstring(content)
dirs = self._next_dir_gen(root)
files = self._next_file_gen(root)
next_file = files.next()

Check failure on line 557 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-member

Generator 'generator' has no 'next' member
Comment on lines +554 to 557

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The method files.next() is used, which is not compatible with Python 3. In Python 3, next() is a built-in function and should be used as next(files). This code will raise an AttributeError in Python 3. The recommended solution is to update the code to use the built-in next() function to ensure compatibility with Python 3.

next_dir = dirs.next()

Check failure on line 558 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-member

Generator 'generator' has no 'next' member

while ((max_keys is None or total < max_keys) and
not (next_file is None and next_dir is None)):
total += 1
if next_file is None:
self._last_yield = next_dir
next_dir = dirs.next()

Check failure on line 565 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-member

Generator 'generator' has no 'next' member
elif next_dir is None:
self._last_yield = next_file
next_file = files.next()

Check failure on line 568 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-member

Generator 'generator' has no 'next' member
elif next_dir < next_file:
self._last_yield = next_dir
next_dir = dirs.next()

Check failure on line 571 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-member

Generator 'generator' has no 'next' member
elif next_file < next_dir:
self._last_yield = next_file
next_file = files.next()

Check failure on line 574 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

no-member

Generator 'generator' has no 'next' member
else:
logging.error(
'Should never reach. next file is %r. next dir is %r.',
Expand All @@ -580,7 +580,7 @@
self._new_max_keys -= 1
yield self._last_yield

def _next_file_gen(self, root):

Check warning on line 583 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"root" missing in parameter type documentation
"""Generator for next file element in the document.

Args:
Expand All @@ -605,7 +605,7 @@
e.clear()
yield None

def _next_dir_gen(self, root):

Check warning on line 608 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"root" missing in parameter type documentation
"""Generator for next directory element in the document.

Args:
Expand All @@ -621,7 +621,7 @@
e.clear()
yield None

def _should_get_another_batch(self, content):

Check warning on line 624 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"content" missing in parameter type documentation
"""Whether to issue another GET bucket call.

Args:
Expand All @@ -648,7 +648,7 @@
self._options['marker'] = next_marker
return True

def _find_elements(self, result, elements):

Check warning on line 651 in cloudstorage/cloudstorage_api.py

View check run for this annotation

Codeac.io / Codeac Code Quality

missing-type-doc

"elements, result" missing in parameter type documentation
"""Find interesting elements from XML.

This function tries to only look for specified elements
Expand All @@ -664,7 +664,7 @@
"""
element_mapping = {}
result = StringIO.StringIO(result)
for _, e in ET.iterparse(result, events=('end',)):
for _, e in defusedxml.ElementTree.iterparse(result, events=('end',)):
if not elements:
break
if e.tag in elements:
Expand Down
Loading