Skip to content

Commit

Permalink
Fixed #24452 -- Fixed HashedFilesMixin correctness with nested paths.
Browse files Browse the repository at this point in the history
  • Loading branch information
dsanders11 authored and timgraham committed Jan 11, 2017
1 parent 7156a6c commit 53bffe8
Show file tree
Hide file tree
Showing 6 changed files with 342 additions and 50 deletions.
150 changes: 124 additions & 26 deletions django/contrib/staticfiles/storage.py
Expand Up @@ -18,6 +18,7 @@
from django.utils.encoding import force_bytes, force_text
from django.utils.functional import LazyObject
from django.utils.six import iteritems
from django.utils.six.moves import range
from django.utils.six.moves.urllib.parse import (
unquote, urldefrag, urlsplit, urlunsplit,
)
Expand Down Expand Up @@ -54,6 +55,7 @@ def path(self, name):

class HashedFilesMixin(object):
default_template = """url("%s")"""
max_post_process_passes = 5
patterns = (
("*.css", (
r"""(url\(['"]{0,1}\s*(.*?)["']{0,1}\))""",
Expand Down Expand Up @@ -85,16 +87,20 @@ def file_hash(self, name, content=None):
md5.update(chunk)
return md5.hexdigest()[:12]

def hashed_name(self, name, content=None):
def hashed_name(self, name, content=None, filename=None):
# `filename` is the name of file to hash if `content` isn't given.
# `name` is the base name to construct the new hashed filename from.
parsed_name = urlsplit(unquote(name))
clean_name = parsed_name.path.strip()
if filename:
filename = urlsplit(unquote(filename)).path.strip()
filename = filename or clean_name
opened = False
if content is None:
if not self.exists(clean_name):
raise ValueError("The file '%s' could not be found with %r." %
(clean_name, self))
if not self.exists(filename):
raise ValueError("The file '%s' could not be found with %r." % (filename, self))
try:
content = self.open(clean_name)
content = self.open(filename)
except IOError:
# Handle directory paths and fragments
return name
Expand All @@ -118,9 +124,9 @@ def hashed_name(self, name, content=None):
unparsed_name[2] += '?'
return urlunsplit(unparsed_name)

def url(self, name, force=False):
def _url(self, hashed_name_func, name, force=False, hashed_files=None):
"""
Return the real URL in DEBUG mode.
Return the non-hashed URL in DEBUG mode.
"""
if settings.DEBUG and not force:
hashed_name, fragment = name, ''
Expand All @@ -129,7 +135,10 @@ def url(self, name, force=False):
if urlsplit(clean_name).path.endswith('/'): # don't hash paths
hashed_name = name
else:
hashed_name = self.stored_name(clean_name)
args = (clean_name,)
if hashed_files is not None:
args += (hashed_files,)
hashed_name = hashed_name_func(*args)

final_url = super(HashedFilesMixin, self).url(hashed_name)

Expand All @@ -146,7 +155,13 @@ def url(self, name, force=False):

return unquote(final_url)

def url_converter(self, name, template=None):
def url(self, name, force=False):
"""
Return the non-hashed URL in DEBUG mode.
"""
return self._url(self.stored_name, name, force)

def url_converter(self, name, hashed_files, template=None):
"""
Return the custom URL converter for the given file name.
"""
Expand Down Expand Up @@ -184,7 +199,10 @@ def converter(matchobj):
target_name = posixpath.join(posixpath.dirname(source_name), url_path)

# Determine the hashed name of the target file with the storage backend.
hashed_url = self.url(unquote(target_name), force=True)
hashed_url = self._url(
self._stored_name, unquote(target_name),
force=True, hashed_files=hashed_files,
)

transformed_url = '/'.join(url_path.split('/')[:-1] + hashed_url.split('/')[-1:])

Expand Down Expand Up @@ -223,21 +241,48 @@ def post_process(self, paths, dry_run=False, **options):
path for path in paths
if matches_patterns(path, self._patterns.keys())
]
# Do a single pass first. Post-process all files once, then repeat for
# adjustable files.
for name, hashed_name, processed, _ in self._post_process(paths, adjustable_paths, hashed_files):
yield name, hashed_name, processed

paths = {path: paths[path] for path in adjustable_paths}

# then sort the files by the directory level
for i in range(self.max_post_process_passes):
substitutions = False
for name, hashed_name, processed, subst in self._post_process(paths, adjustable_paths, hashed_files):
yield name, hashed_name, processed
substitutions = substitutions or subst

if not substitutions:
break

if substitutions:
yield 'All', None, RuntimeError('Max post-process passes exceeded.')

# Store the processed paths
self.hashed_files.update(hashed_files)

def _post_process(self, paths, adjustable_paths, hashed_files):
# Sort the files by directory level
def path_level(name):
return len(name.split(os.sep))

for name in sorted(paths.keys(), key=path_level, reverse=True):

substitutions = True
# use the original, local file, not the copied-but-unprocessed
# file, which might be somewhere far away, like S3
storage, path = paths[name]
with storage.open(path) as original_file:
cleaned_name = self.clean_name(name)
hash_key = self.hash_key(cleaned_name)

# generate the hash with the original content, even for
# adjustable files.
hashed_name = self.hashed_name(name, original_file)
if hash_key not in hashed_files:
hashed_name = self.hashed_name(name, original_file)
else:
hashed_name = hashed_files[hash_key]

# then get the original's file content..
if hasattr(original_file, 'seek'):
Expand All @@ -248,23 +293,35 @@ def path_level(name):

# ..to apply each replacement pattern to the content
if name in adjustable_paths:
old_hashed_name = hashed_name
content = original_file.read().decode(settings.FILE_CHARSET)
for extension, patterns in iteritems(self._patterns):
if matches_patterns(path, (extension,)):
for pattern, template in patterns:
converter = self.url_converter(name, template)
converter = self.url_converter(name, hashed_files, template)
try:
content = pattern.sub(converter, content)
except ValueError as exc:
yield name, None, exc
yield name, None, exc, False
if hashed_file_exists:
self.delete(hashed_name)
# then save the processed result
content_file = ContentFile(force_bytes(content))
# Save intermediate file for reference
saved_name = self._save(hashed_name, content_file)
hashed_name = self.hashed_name(name, content_file)

if self.exists(hashed_name):
self.delete(hashed_name)

saved_name = self._save(hashed_name, content_file)
hashed_name = force_text(self.clean_name(saved_name))
# If the file hash stayed the same, this file didn't change
if old_hashed_name == hashed_name:
substitutions = False
processed = True
else:

if not processed:
# or handle the case in which neither processing nor
# a change to the original file happened
if not hashed_file_exists:
Expand All @@ -273,32 +330,56 @@ def path_level(name):
hashed_name = force_text(self.clean_name(saved_name))

# and then set the cache accordingly
hashed_files[self.hash_key(name)] = hashed_name
yield name, hashed_name, processed
hashed_files[hash_key] = hashed_name

# Finally store the processed paths
self.hashed_files.update(hashed_files)
yield name, hashed_name, processed, substitutions

def clean_name(self, name):
return name.replace('\\', '/')

def hash_key(self, name):
return name

def stored_name(self, name):
hash_key = self.hash_key(name)
cache_name = self.hashed_files.get(hash_key)
def _stored_name(self, name, hashed_files):
# Normalize the path to avoid multiple names for the same file like
# ../foo/bar.css and ../foo/../foo/bar.css which normalize to the same
# path.
name = posixpath.normpath(name)
cleaned_name = self.clean_name(name)
hash_key = self.hash_key(cleaned_name)
cache_name = hashed_files.get(hash_key)
if cache_name is None:
cache_name = self.clean_name(self.hashed_name(name))
# store the hashed name if there was a miss, e.g.
# when the files are still processed
self.hashed_files[hash_key] = cache_name
return cache_name

def stored_name(self, name):
cleaned_name = self.clean_name(name)
hash_key = self.hash_key(cleaned_name)
cache_name = self.hashed_files.get(hash_key)
if cache_name:
return cache_name
# No cached name found, recalculate it from the files.
intermediate_name = name
for i in range(self.max_post_process_passes + 1):
cache_name = self.clean_name(
self.hashed_name(name, content=None, filename=intermediate_name)
)
if intermediate_name == cache_name:
# Store the hashed name if there was a miss.
self.hashed_files[hash_key] = cache_name
return cache_name
else:
# Move on to the next intermediate file.
intermediate_name = cache_name
# If the cache name can't be determined after the max number of passes,
# the intermediate files on disk may be corrupt; avoid an infinite loop.
raise ValueError("The name '%s' could not be hashed with %r." % (name, self))


class ManifestFilesMixin(HashedFilesMixin):
manifest_version = '1.0' # the manifest format standard
manifest_name = 'staticfiles.json'
manifest_strict = True

def __init__(self, *args, **kwargs):
super(ManifestFilesMixin, self).__init__(*args, **kwargs)
Expand Down Expand Up @@ -341,6 +422,23 @@ def save_manifest(self):
contents = json.dumps(payload).encode('utf-8')
self._save(self.manifest_name, ContentFile(contents))

def stored_name(self, name):
parsed_name = urlsplit(unquote(name))
clean_name = parsed_name.path.strip()
hash_key = self.hash_key(clean_name)
cache_name = self.hashed_files.get(hash_key)
if cache_name is None:
if self.manifest_strict:
raise ValueError("Missing staticfiles manifest entry for '%s'" % clean_name)
cache_name = self.clean_name(self.hashed_name(name))
unparsed_name = list(parsed_name)
unparsed_name[2] = cache_name
# Special casing for a @font-face hack, like url(myfont.eot?#iefix")
# http://www.fontspring.com/blog/the-new-bulletproof-font-face-syntax
if '?#' in name and not unparsed_name[3]:
unparsed_name[2] += '?'
return urlunsplit(unparsed_name)


class _MappingCache(object):
"""
Expand Down
40 changes: 40 additions & 0 deletions docs/ref/contrib/staticfiles.txt
Expand Up @@ -293,6 +293,24 @@ content:

@import url("../admin/css/base.27e20196a850.css");

.. attribute:: storage.ManifestStaticFilesStorage.max_post_process_passes

Since static files might reference other static files that need to have their
paths replaced, multiple passes of replacing paths may be needed until the file
hashes converge. To prevent an infinite loop due to hashes not converging (for
example, if ``'foo.css'`` references ``'bar.css'`` which references
``'foo.css'``) there is a maximum number of passes before post-processing is
abandoned. In cases with a large number of references, a higher number of
passes might be needed. Increase the maximum number of passes by subclassing
``ManifestStaticFilesStorage`` and setting the ``max_post_process_passes``
attribute. It defaults to 5.

.. versionchanged:: 1.11

Previous versions didn't make multiple passes to ensure file hashes
converged, so often times file hashes weren't correct. The
``max_post_process_passes`` attribute was added.

To enable the ``ManifestStaticFilesStorage`` you have to make sure the
following requirements are met:

Expand All @@ -315,6 +333,18 @@ hashed names for all processed files in a file called ``staticfiles.json``.
This happens once when you run the :djadmin:`collectstatic` management
command.

.. attribute:: storage.ManifestStaticFilesStorage.manifest_strict

If a file isn't found in the ``staticfiles.json`` manifest at runtime, a
``ValueError`` is raised. This behavior can be disabled by subclassing
``ManifestStaticFilesStorage`` and setting the ``manifest_strict`` attribute to
``False`` -- nonexistent paths will remain unchanged.

.. versionchanged:: 1.11

The ``manifest_strict`` attribute was added. In older versions, the
behavior is the same as ``manifest_strict=False``.

Due to the requirement of running :djadmin:`collectstatic`, this storage
typically shouldn't be used when running tests as ``collectstatic`` isn't run
as part of the normal test setup. During testing, ensure that the
Expand Down Expand Up @@ -350,6 +380,16 @@ If you want to override certain options of the cache backend the storage uses,
simply specify a custom entry in the :setting:`CACHES` setting named
``'staticfiles'``. It falls back to using the ``'default'`` cache backend.

.. warning::

``CachedStaticFilesStorage`` isn't recommended -- in almost all cases
``ManifestStaticFilesStorage`` is a better choice. There are several
performance penalties when using ``CachedStaticFilesStorage`` since a cache
miss requires hashing files at runtime. Remote file storage require several
round-trips to hash a file on a cache miss, as several file accesses are
required to ensure that the file hash is correct in the case of nested file
paths.

Finders Module
==============

Expand Down
15 changes: 15 additions & 0 deletions docs/releases/1.11.txt
Expand Up @@ -481,6 +481,21 @@ Backwards incompatible changes in 1.11
updated. Check your project if you subclass these widgets or extend the
templates.

:mod:`django.contrib.staticfiles`
---------------------------------

* ``collectstatic`` may now fail during post-processing when using a hashed
static files storage if a reference loop exists (e.g. ``'foo.css'``
references ``'bar.css'`` which itself references ``'foo.css'``) or if the
chain of files referencing other files is too deep to resolve in several
passes. In the latter case, increase the number of passes using
:attr:`.ManifestStaticFilesStorage.max_post_process_passes`.

* When using ``ManifestStaticFilesStorage``, static files not found in the
manifest at runtime now raise a ``ValueError`` instead of returning an
unchanged path. You can revert to the old behavior by setting
:attr:`.ManifestStaticFilesStorage.manifest_strict` to ``False``.

Database backend API
--------------------

Expand Down
1 change: 1 addition & 0 deletions tests/staticfiles_tests/project/loop/bar.css
@@ -0,0 +1 @@
@import url("foo.css")
1 change: 1 addition & 0 deletions tests/staticfiles_tests/project/loop/foo.css
@@ -0,0 +1 @@
@import url("bar.css")

0 comments on commit 53bffe8

Please sign in to comment.