From 1e47eb155d263d3d4916205f65a56770ffacaab6 Mon Sep 17 00:00:00 2001 From: Fitz Elliott Date: Mon, 10 Dec 2018 17:51:25 -0500 Subject: [PATCH] add back filename parameter to content-disposition header * Add the `filename` parameter to the Content-Disposition header returned by WB for file downloads. Since `filename` cannot correctly encode multibyte characters, those characters will be transformed into their nearest ascii equivalent or removed if no such equivalent exists. When WB standardized the format of the header in c804e04789b2 to support multibyte filenames, it was decided to only provide `filename*`, the extended-value form of the parameter. While this works for modern browsers, it inadvertantly broke scripts[1] that expected the old form of the parameter. Thanks, @jcohenadad, for the report! [1] https://github.com/neuropoly/spinalcordtoolbox/issues/2056 --- tests/core/test_utils.py | 16 ++++++++++++-- tests/providers/googlecloud/test_provider.py | 12 +++++++--- tests/providers/s3/test_provider.py | 17 +++++++++------ tests/server/api/v1/test_metadata_mixin.py | 20 ++++++++--------- waterbutler/core/utils.py | 23 +++++++++++++++++++- 5 files changed, 65 insertions(+), 23 deletions(-) diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index d4eb86c50..91a55c40d 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -89,10 +89,22 @@ async def test_all_retry(self): class TestContentDisposition: + @pytest.mark.parametrize("filename,expected", [ + ('meow.txt', 'meow.txt'), + ('résumé.txt', 'resume.txt'), + (' ¿.surprise', ' .surprise'), + ('a "file"', 'a \\"file\\"'), + ('yes\\no', 'yes\\\\no'), + ('ctrl\x09ch\x08ar', 'ctrl_ch_ar'), + ]) + def test_strip_for_disposition(self, filename, expected): + disposition = utils.strip_for_disposition(filename) + assert disposition == expected + @pytest.mark.parametrize("filename,expected", [ (None, 'attachment'), - ('foo.txt', "attachment; filename*=UTF-8''foo.txt"), - (' ¿.surprise', "attachment; filename*=UTF-8''%20%C2%BF.surprise"), + ('foo.txt', "attachment; filename=\"foo.txt\"; filename*=UTF-8''foo.txt"), + (' ¿.surprise', "attachment; filename=\" .surprise\"; filename*=UTF-8''%20%C2%BF.surprise"), ]) def test_content_disposition(self, filename, expected): disposition = utils.make_disposition(filename) diff --git a/tests/providers/googlecloud/test_provider.py b/tests/providers/googlecloud/test_provider.py index 4a6233768..6d22e1850 100644 --- a/tests/providers/googlecloud/test_provider.py +++ b/tests/providers/googlecloud/test_provider.py @@ -215,7 +215,10 @@ async def test_download_file(self, mock_time, mock_provider, file_wb_path, file_ @pytest.mark.aiohttpretty async def test_download_file_with_accept_url(self, mock_time, mock_provider, file_wb_path): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) - query = {'response-content-disposition': 'attachment; filename*=UTF-8\'\'text-file-1.txt'} + query = { + 'response-content-disposition': ('attachment; filename="text-file-1.txt"; ' + 'filename*=UTF-8\'\'text-file-1.txt') + } signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **query) return_url = await mock_provider.download(file_wb_path, accept_url=True) @@ -233,8 +236,11 @@ async def test_download_file_with_accept_url(self, mock_time, mock_provider, fil async def test_download_file_with_display_name(self, mock_time, mock_provider, file_wb_path, display_name_arg, expected_name): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) - query = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'{}'.format(expected_name)} + query = { + 'response-content-disposition': ('attachment; filename="{}"; ' + 'filename*=UTF-8\'\'{}').format(expected_name, + expected_name) + } signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **query) return_url = await mock_provider.download(file_wb_path, accept_url=True, display_name=display_name_arg) diff --git a/tests/providers/s3/test_provider.py b/tests/providers/s3/test_provider.py index fb60c9c6e..a0fc71d42 100644 --- a/tests/providers/s3/test_provider.py +++ b/tests/providers/s3/test_provider.py @@ -296,7 +296,7 @@ class TestCRUD: async def test_download(self, provider, mock_time): path = WaterButlerPath('/muhtriangle') response_headers = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'muhtriangle'} + 'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'} url = provider.bucket.new_key(path.path).generate_url(100, response_headers=response_headers) aiohttpretty.register_uri('GET', url, body=b'delicious', auto_length=True) @@ -311,7 +311,7 @@ async def test_download(self, provider, mock_time): async def test_download_range(self, provider, mock_time): path = WaterButlerPath('/muhtriangle') response_headers = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'muhtriangle'} + 'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'} url = provider.bucket.new_key(path.path).generate_url(100, response_headers=response_headers) aiohttpretty.register_uri('GET', url, body=b'de', auto_length=True, status=206) @@ -327,7 +327,7 @@ async def test_download_range(self, provider, mock_time): async def test_download_version(self, provider, mock_time): path = WaterButlerPath('/muhtriangle') response_headers = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'muhtriangle'} + 'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'} url = provider.bucket.new_key(path.path).generate_url( 100, query_parameters={'versionId': 'someversion'}, @@ -350,8 +350,11 @@ async def test_download_version(self, provider, mock_time): async def test_download_with_display_name(self, provider, mock_time, display_name_arg, expected_name): path = WaterButlerPath('/muhtriangle') - response_headers = {'response-content-disposition': - "attachment; filename*=UTF-8''{}".format(expected_name)} + response_headers = { + 'response-content-disposition': ('attachment; filename="{}"; ' + 'filename*=UTF-8\'\'{}').format(expected_name, + expected_name) + } url = provider.bucket.new_key(path.path).generate_url(100, response_headers=response_headers) aiohttpretty.register_uri('GET', url, body=b'delicious', auto_length=True) @@ -366,7 +369,7 @@ async def test_download_with_display_name(self, provider, mock_time, display_nam async def test_download_not_found(self, provider, mock_time): path = WaterButlerPath('/muhtriangle') response_headers = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'muhtriangle'} + 'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'} url = provider.bucket.new_key(path.path).generate_url(100, response_headers=response_headers) aiohttpretty.register_uri('GET', url, status=404) @@ -998,7 +1001,7 @@ async def test_large_folder_delete(self, provider, mock_time): async def test_accepts_url(self, provider, mock_time): path = WaterButlerPath('/my-image') response_headers = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'my-image'} + 'attachment; filename="my-image"; filename*=UTF-8\'\'my-image'} url = provider.bucket.new_key(path.path).generate_url(100, 'GET', response_headers=response_headers) diff --git a/tests/server/api/v1/test_metadata_mixin.py b/tests/server/api/v1/test_metadata_mixin.py index 7633f024d..ba9dff481 100644 --- a/tests/server/api/v1/test_metadata_mixin.py +++ b/tests/server/api/v1/test_metadata_mixin.py @@ -89,7 +89,7 @@ async def test_download_file_headers(self, handler, mock_stream): assert handler._headers['Content-Length'] == bytes(str(mock_stream.size), 'latin-1') assert handler._headers['Content-Type'] == bytes(mock_stream.content_type, 'latin-1') - disposition = b'attachment; filename*=UTF-8\'\'peanut-butter.docx' + disposition = b'attachment; filename="peanut-butter.docx"; filename*=UTF-8\'\'peanut-butter.docx' assert handler._headers['Content-Disposition'] == disposition handler.write_stream.assert_awaited_once() @@ -104,19 +104,19 @@ async def test_download_file_headers_no_stream_name(self, handler, mock_stream): assert handler._headers['Content-Length'] == bytes(str(mock_stream.size), 'latin-1') assert handler._headers['Content-Type'] == bytes(mock_stream.content_type, 'latin-1') - disposition = b'attachment; filename*=UTF-8\'\'test_file' + disposition = b'attachment; filename="test_file"; filename*=UTF-8\'\'test_file' assert handler._headers['Content-Disposition'] == disposition handler.write_stream.assert_awaited_once() @pytest.mark.asyncio - @pytest.mark.parametrize("given_arg,expected_name", [ - (['résumé.doc'], b'r%C3%A9sum%C3%A9.doc'), - ([''], b'test_file'), - ([], b'test_file'), + @pytest.mark.parametrize("given_arg,expected_name,filtered_name", [ + (['résumé.doc'], b'r%C3%A9sum%C3%A9.doc', b'resume.doc'), + ([''], b'test_file', b'test_file'), + ([], b'test_file', b'test_file'), ]) async def test_download_file_with_display_name(self, handler, mock_stream, given_arg, - expected_name): + expected_name, filtered_name): handler.provider.download = MockCoroutine(return_value=mock_stream) handler.path = WaterButlerPath('/test_file') @@ -126,7 +126,7 @@ async def test_download_file_with_display_name(self, handler, mock_stream, given assert handler._headers['Content-Length'] == bytes(str(mock_stream.size), 'latin-1') assert handler._headers['Content-Type'] == bytes(mock_stream.content_type, 'latin-1') - disposition = b'attachment; filename*=UTF-8\'\'' + expected_name + disposition = b'attachment; filename="' + filtered_name + b'"; filename*=UTF-8\'\'' + expected_name assert handler._headers['Content-Disposition'] == disposition handler.write_stream.assert_awaited_once() @@ -212,7 +212,7 @@ async def test_download_folder_as_zip(self, handler, mock_stream): await handler.download_folder_as_zip() assert handler._headers['Content-Type'] == bytes('application/zip', 'latin-1') - expected = b'attachment; filename*=UTF-8\'\'test_file.zip' + expected = b'attachment; filename="test_file.zip"; filename*=UTF-8\'\'test_file.zip' assert handler._headers['Content-Disposition'] == expected handler.write_stream.assert_called_once_with(mock_stream) @@ -226,7 +226,7 @@ async def test_download_folder_as_zip_root(self, handler, mock_stream): await handler.download_folder_as_zip() assert handler._headers['Content-Type'] == bytes('application/zip', 'latin-1') - expected = b'attachment; filename*=UTF-8\'\'MockProvider-archive.zip' + expected = b'attachment; filename="MockProvider-archive.zip"; filename*=UTF-8\'\'MockProvider-archive.zip' assert handler._headers['Content-Disposition'] == expected handler.write_stream.assert_called_once_with(mock_stream) diff --git a/waterbutler/core/utils.py b/waterbutler/core/utils.py index bc264be77..1598ac24c 100644 --- a/waterbutler/core/utils.py +++ b/waterbutler/core/utils.py @@ -1,8 +1,10 @@ +import re import json import pytz import asyncio import logging import functools +import unicodedata import dateutil.parser from urllib import parse # from concurrent.futures import ProcessPoolExecutor TODO Get this working @@ -117,6 +119,23 @@ def normalize_datetime(date_string): return parsed_datetime.isoformat() +def strip_for_disposition(filename): + """Convert given filename to a form useable by a non-extended parameter. + + The permitted characters allowed in a non-extended parameter are defined in RFC-2616, Section + 2.2. This is a subset of the ascii character set. This function converts non-ascii characters + to their nearest ascii equivalent or strips them if there is no equivalent. It then replaces + control characters with underscores and escapes blackslashes and double quotes. + + :param str filename: a filename to encode + """ + + nfkd_form = unicodedata.normalize('NFKD', filename) + only_ascii = nfkd_form.encode('ASCII', 'ignore') + no_ctrl = re.sub(r'[\x00-\x1f]', '_', only_ascii.decode('ascii')) + return no_ctrl.replace('\\', '\\\\').replace('"', '\\"') + + def encode_for_disposition(filename): """Convert given filename into utf-8 octets, then percent encode them. @@ -149,8 +168,10 @@ def make_disposition(filename): if not filename: return 'attachment' else: + stripped_filename = strip_for_disposition(filename) encoded_filename = encode_for_disposition(filename) - return 'attachment; filename*=UTF-8\'\'{}'.format(encoded_filename) + return 'attachment; filename="{}"; filename*=UTF-8\'\'{}'.format(stripped_filename, + encoded_filename) class ZipStreamGenerator: