Skip to content

Commit

Permalink
add back filename parameter to content-disposition header
Browse files Browse the repository at this point in the history
 * Add the `filename` parameter to the Content-Disposition header
   returned by WB for file downloads. Since `filename` cannot
   correctly encode multibyte characters, those characters will be
   transformed into their nearest ascii equivalent or removed if no
   such equivalent exists.

   When WB standardized the format of the header in c804e04 to
   support multibyte filenames, it was decided to only provide
   `filename*`, the extended-value form of the parameter.  While this
   works for modern browsers, it inadvertantly broke scripts[1] that
   expected the old form of the parameter.

   Thanks, @jcohenadad, for the report!

   [1] spinalcordtoolbox/spinalcordtoolbox#2056
  • Loading branch information
felliott committed Dec 13, 2018
1 parent bf8fe6d commit 1e47eb1
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 23 deletions.
16 changes: 14 additions & 2 deletions tests/core/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,22 @@ async def test_all_retry(self):

class TestContentDisposition:

@pytest.mark.parametrize("filename,expected", [
('meow.txt', 'meow.txt'),
('résumé.txt', 'resume.txt'),
(' ¿.surprise', ' .surprise'),
('a "file"', 'a \\"file\\"'),
('yes\\no', 'yes\\\\no'),
('ctrl\x09ch\x08ar', 'ctrl_ch_ar'),
])
def test_strip_for_disposition(self, filename, expected):
disposition = utils.strip_for_disposition(filename)
assert disposition == expected

@pytest.mark.parametrize("filename,expected", [
(None, 'attachment'),
('foo.txt', "attachment; filename*=UTF-8''foo.txt"),
(' ¿.surprise', "attachment; filename*=UTF-8''%20%C2%BF.surprise"),
('foo.txt', "attachment; filename=\"foo.txt\"; filename*=UTF-8''foo.txt"),
(' ¿.surprise', "attachment; filename=\" .surprise\"; filename*=UTF-8''%20%C2%BF.surprise"),
])
def test_content_disposition(self, filename, expected):
disposition = utils.make_disposition(filename)
Expand Down
12 changes: 9 additions & 3 deletions tests/providers/googlecloud/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,10 @@ async def test_download_file(self, mock_time, mock_provider, file_wb_path, file_
@pytest.mark.aiohttpretty
async def test_download_file_with_accept_url(self, mock_time, mock_provider, file_wb_path):
file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
query = {'response-content-disposition': 'attachment; filename*=UTF-8\'\'text-file-1.txt'}
query = {
'response-content-disposition': ('attachment; filename="text-file-1.txt"; '
'filename*=UTF-8\'\'text-file-1.txt')
}
signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **query)
return_url = await mock_provider.download(file_wb_path, accept_url=True)

Expand All @@ -233,8 +236,11 @@ async def test_download_file_with_accept_url(self, mock_time, mock_provider, fil
async def test_download_file_with_display_name(self, mock_time, mock_provider, file_wb_path,
display_name_arg, expected_name):
file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False)
query = {'response-content-disposition':
'attachment; filename*=UTF-8\'\'{}'.format(expected_name)}
query = {
'response-content-disposition': ('attachment; filename="{}"; '
'filename*=UTF-8\'\'{}').format(expected_name,
expected_name)
}
signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **query)
return_url = await mock_provider.download(file_wb_path, accept_url=True,
display_name=display_name_arg)
Expand Down
17 changes: 10 additions & 7 deletions tests/providers/s3/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ class TestCRUD:
async def test_download(self, provider, mock_time):
path = WaterButlerPath('/muhtriangle')
response_headers = {'response-content-disposition':
'attachment; filename*=UTF-8\'\'muhtriangle'}
'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'}
url = provider.bucket.new_key(path.path).generate_url(100,
response_headers=response_headers)
aiohttpretty.register_uri('GET', url, body=b'delicious', auto_length=True)
Expand All @@ -311,7 +311,7 @@ async def test_download(self, provider, mock_time):
async def test_download_range(self, provider, mock_time):
path = WaterButlerPath('/muhtriangle')
response_headers = {'response-content-disposition':
'attachment; filename*=UTF-8\'\'muhtriangle'}
'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'}
url = provider.bucket.new_key(path.path).generate_url(100,
response_headers=response_headers)
aiohttpretty.register_uri('GET', url, body=b'de', auto_length=True, status=206)
Expand All @@ -327,7 +327,7 @@ async def test_download_range(self, provider, mock_time):
async def test_download_version(self, provider, mock_time):
path = WaterButlerPath('/muhtriangle')
response_headers = {'response-content-disposition':
'attachment; filename*=UTF-8\'\'muhtriangle'}
'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'}
url = provider.bucket.new_key(path.path).generate_url(
100,
query_parameters={'versionId': 'someversion'},
Expand All @@ -350,8 +350,11 @@ async def test_download_version(self, provider, mock_time):
async def test_download_with_display_name(self, provider, mock_time, display_name_arg,
expected_name):
path = WaterButlerPath('/muhtriangle')
response_headers = {'response-content-disposition':
"attachment; filename*=UTF-8''{}".format(expected_name)}
response_headers = {
'response-content-disposition': ('attachment; filename="{}"; '
'filename*=UTF-8\'\'{}').format(expected_name,
expected_name)
}
url = provider.bucket.new_key(path.path).generate_url(100,
response_headers=response_headers)
aiohttpretty.register_uri('GET', url, body=b'delicious', auto_length=True)
Expand All @@ -366,7 +369,7 @@ async def test_download_with_display_name(self, provider, mock_time, display_nam
async def test_download_not_found(self, provider, mock_time):
path = WaterButlerPath('/muhtriangle')
response_headers = {'response-content-disposition':
'attachment; filename*=UTF-8\'\'muhtriangle'}
'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'}
url = provider.bucket.new_key(path.path).generate_url(100,
response_headers=response_headers)
aiohttpretty.register_uri('GET', url, status=404)
Expand Down Expand Up @@ -998,7 +1001,7 @@ async def test_large_folder_delete(self, provider, mock_time):
async def test_accepts_url(self, provider, mock_time):
path = WaterButlerPath('/my-image')
response_headers = {'response-content-disposition':
'attachment; filename*=UTF-8\'\'my-image'}
'attachment; filename="my-image"; filename*=UTF-8\'\'my-image'}
url = provider.bucket.new_key(path.path).generate_url(100,
'GET',
response_headers=response_headers)
Expand Down
20 changes: 10 additions & 10 deletions tests/server/api/v1/test_metadata_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ async def test_download_file_headers(self, handler, mock_stream):

assert handler._headers['Content-Length'] == bytes(str(mock_stream.size), 'latin-1')
assert handler._headers['Content-Type'] == bytes(mock_stream.content_type, 'latin-1')
disposition = b'attachment; filename*=UTF-8\'\'peanut-butter.docx'
disposition = b'attachment; filename="peanut-butter.docx"; filename*=UTF-8\'\'peanut-butter.docx'
assert handler._headers['Content-Disposition'] == disposition

handler.write_stream.assert_awaited_once()
Expand All @@ -104,19 +104,19 @@ async def test_download_file_headers_no_stream_name(self, handler, mock_stream):

assert handler._headers['Content-Length'] == bytes(str(mock_stream.size), 'latin-1')
assert handler._headers['Content-Type'] == bytes(mock_stream.content_type, 'latin-1')
disposition = b'attachment; filename*=UTF-8\'\'test_file'
disposition = b'attachment; filename="test_file"; filename*=UTF-8\'\'test_file'
assert handler._headers['Content-Disposition'] == disposition

handler.write_stream.assert_awaited_once()

@pytest.mark.asyncio
@pytest.mark.parametrize("given_arg,expected_name", [
(['résumé.doc'], b'r%C3%A9sum%C3%A9.doc'),
([''], b'test_file'),
([], b'test_file'),
@pytest.mark.parametrize("given_arg,expected_name,filtered_name", [
(['résumé.doc'], b'r%C3%A9sum%C3%A9.doc', b'resume.doc'),
([''], b'test_file', b'test_file'),
([], b'test_file', b'test_file'),
])
async def test_download_file_with_display_name(self, handler, mock_stream, given_arg,
expected_name):
expected_name, filtered_name):

handler.provider.download = MockCoroutine(return_value=mock_stream)
handler.path = WaterButlerPath('/test_file')
Expand All @@ -126,7 +126,7 @@ async def test_download_file_with_display_name(self, handler, mock_stream, given

assert handler._headers['Content-Length'] == bytes(str(mock_stream.size), 'latin-1')
assert handler._headers['Content-Type'] == bytes(mock_stream.content_type, 'latin-1')
disposition = b'attachment; filename*=UTF-8\'\'' + expected_name
disposition = b'attachment; filename="' + filtered_name + b'"; filename*=UTF-8\'\'' + expected_name
assert handler._headers['Content-Disposition'] == disposition

handler.write_stream.assert_awaited_once()
Expand Down Expand Up @@ -212,7 +212,7 @@ async def test_download_folder_as_zip(self, handler, mock_stream):
await handler.download_folder_as_zip()

assert handler._headers['Content-Type'] == bytes('application/zip', 'latin-1')
expected = b'attachment; filename*=UTF-8\'\'test_file.zip'
expected = b'attachment; filename="test_file.zip"; filename*=UTF-8\'\'test_file.zip'
assert handler._headers['Content-Disposition'] == expected

handler.write_stream.assert_called_once_with(mock_stream)
Expand All @@ -226,7 +226,7 @@ async def test_download_folder_as_zip_root(self, handler, mock_stream):
await handler.download_folder_as_zip()

assert handler._headers['Content-Type'] == bytes('application/zip', 'latin-1')
expected = b'attachment; filename*=UTF-8\'\'MockProvider-archive.zip'
expected = b'attachment; filename="MockProvider-archive.zip"; filename*=UTF-8\'\'MockProvider-archive.zip'
assert handler._headers['Content-Disposition'] == expected

handler.write_stream.assert_called_once_with(mock_stream)
23 changes: 22 additions & 1 deletion waterbutler/core/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import re
import json
import pytz
import asyncio
import logging
import functools
import unicodedata
import dateutil.parser
from urllib import parse
# from concurrent.futures import ProcessPoolExecutor TODO Get this working
Expand Down Expand Up @@ -117,6 +119,23 @@ def normalize_datetime(date_string):
return parsed_datetime.isoformat()


def strip_for_disposition(filename):
"""Convert given filename to a form useable by a non-extended parameter.
The permitted characters allowed in a non-extended parameter are defined in RFC-2616, Section
2.2. This is a subset of the ascii character set. This function converts non-ascii characters
to their nearest ascii equivalent or strips them if there is no equivalent. It then replaces
control characters with underscores and escapes blackslashes and double quotes.
:param str filename: a filename to encode
"""

nfkd_form = unicodedata.normalize('NFKD', filename)
only_ascii = nfkd_form.encode('ASCII', 'ignore')
no_ctrl = re.sub(r'[\x00-\x1f]', '_', only_ascii.decode('ascii'))
return no_ctrl.replace('\\', '\\\\').replace('"', '\\"')


def encode_for_disposition(filename):
"""Convert given filename into utf-8 octets, then percent encode them.
Expand Down Expand Up @@ -149,8 +168,10 @@ def make_disposition(filename):
if not filename:
return 'attachment'
else:
stripped_filename = strip_for_disposition(filename)
encoded_filename = encode_for_disposition(filename)
return 'attachment; filename*=UTF-8\'\'{}'.format(encoded_filename)
return 'attachment; filename="{}"; filename*=UTF-8\'\'{}'.format(stripped_filename,
encoded_filename)


class ZipStreamGenerator:
Expand Down

0 comments on commit 1e47eb1

Please sign in to comment.