diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index d4eb86c50..91a55c40d 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -89,10 +89,22 @@ async def test_all_retry(self): class TestContentDisposition: + @pytest.mark.parametrize("filename,expected", [ + ('meow.txt', 'meow.txt'), + ('résumé.txt', 'resume.txt'), + (' ¿.surprise', ' .surprise'), + ('a "file"', 'a \\"file\\"'), + ('yes\\no', 'yes\\\\no'), + ('ctrl\x09ch\x08ar', 'ctrl_ch_ar'), + ]) + def test_strip_for_disposition(self, filename, expected): + disposition = utils.strip_for_disposition(filename) + assert disposition == expected + @pytest.mark.parametrize("filename,expected", [ (None, 'attachment'), - ('foo.txt', "attachment; filename*=UTF-8''foo.txt"), - (' ¿.surprise', "attachment; filename*=UTF-8''%20%C2%BF.surprise"), + ('foo.txt', "attachment; filename=\"foo.txt\"; filename*=UTF-8''foo.txt"), + (' ¿.surprise', "attachment; filename=\" .surprise\"; filename*=UTF-8''%20%C2%BF.surprise"), ]) def test_content_disposition(self, filename, expected): disposition = utils.make_disposition(filename) diff --git a/tests/providers/googlecloud/test_provider.py b/tests/providers/googlecloud/test_provider.py index 4a6233768..6d22e1850 100644 --- a/tests/providers/googlecloud/test_provider.py +++ b/tests/providers/googlecloud/test_provider.py @@ -215,7 +215,10 @@ async def test_download_file(self, mock_time, mock_provider, file_wb_path, file_ @pytest.mark.aiohttpretty async def test_download_file_with_accept_url(self, mock_time, mock_provider, file_wb_path): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) - query = {'response-content-disposition': 'attachment; filename*=UTF-8\'\'text-file-1.txt'} + query = { + 'response-content-disposition': ('attachment; filename="text-file-1.txt"; ' + 'filename*=UTF-8\'\'text-file-1.txt') + } signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **query) return_url = await mock_provider.download(file_wb_path, accept_url=True) @@ -233,8 +236,11 @@ async def test_download_file_with_accept_url(self, mock_time, mock_provider, fil async def test_download_file_with_display_name(self, mock_time, mock_provider, file_wb_path, display_name_arg, expected_name): file_obj_name = utils.get_obj_name(file_wb_path, is_folder=False) - query = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'{}'.format(expected_name)} + query = { + 'response-content-disposition': ('attachment; filename="{}"; ' + 'filename*=UTF-8\'\'{}').format(expected_name, + expected_name) + } signed_url = mock_provider._build_and_sign_url('GET', file_obj_name, **query) return_url = await mock_provider.download(file_wb_path, accept_url=True, display_name=display_name_arg) diff --git a/tests/providers/s3/test_provider.py b/tests/providers/s3/test_provider.py index fb60c9c6e..a0fc71d42 100644 --- a/tests/providers/s3/test_provider.py +++ b/tests/providers/s3/test_provider.py @@ -296,7 +296,7 @@ class TestCRUD: async def test_download(self, provider, mock_time): path = WaterButlerPath('/muhtriangle') response_headers = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'muhtriangle'} + 'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'} url = provider.bucket.new_key(path.path).generate_url(100, response_headers=response_headers) aiohttpretty.register_uri('GET', url, body=b'delicious', auto_length=True) @@ -311,7 +311,7 @@ async def test_download(self, provider, mock_time): async def test_download_range(self, provider, mock_time): path = WaterButlerPath('/muhtriangle') response_headers = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'muhtriangle'} + 'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'} url = provider.bucket.new_key(path.path).generate_url(100, response_headers=response_headers) aiohttpretty.register_uri('GET', url, body=b'de', auto_length=True, status=206) @@ -327,7 +327,7 @@ async def test_download_range(self, provider, mock_time): async def test_download_version(self, provider, mock_time): path = WaterButlerPath('/muhtriangle') response_headers = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'muhtriangle'} + 'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'} url = provider.bucket.new_key(path.path).generate_url( 100, query_parameters={'versionId': 'someversion'}, @@ -350,8 +350,11 @@ async def test_download_version(self, provider, mock_time): async def test_download_with_display_name(self, provider, mock_time, display_name_arg, expected_name): path = WaterButlerPath('/muhtriangle') - response_headers = {'response-content-disposition': - "attachment; filename*=UTF-8''{}".format(expected_name)} + response_headers = { + 'response-content-disposition': ('attachment; filename="{}"; ' + 'filename*=UTF-8\'\'{}').format(expected_name, + expected_name) + } url = provider.bucket.new_key(path.path).generate_url(100, response_headers=response_headers) aiohttpretty.register_uri('GET', url, body=b'delicious', auto_length=True) @@ -366,7 +369,7 @@ async def test_download_with_display_name(self, provider, mock_time, display_nam async def test_download_not_found(self, provider, mock_time): path = WaterButlerPath('/muhtriangle') response_headers = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'muhtriangle'} + 'attachment; filename="muhtriangle"; filename*=UTF-8\'\'muhtriangle'} url = provider.bucket.new_key(path.path).generate_url(100, response_headers=response_headers) aiohttpretty.register_uri('GET', url, status=404) @@ -998,7 +1001,7 @@ async def test_large_folder_delete(self, provider, mock_time): async def test_accepts_url(self, provider, mock_time): path = WaterButlerPath('/my-image') response_headers = {'response-content-disposition': - 'attachment; filename*=UTF-8\'\'my-image'} + 'attachment; filename="my-image"; filename*=UTF-8\'\'my-image'} url = provider.bucket.new_key(path.path).generate_url(100, 'GET', response_headers=response_headers) diff --git a/tests/server/api/v1/test_metadata_mixin.py b/tests/server/api/v1/test_metadata_mixin.py index 7633f024d..ba9dff481 100644 --- a/tests/server/api/v1/test_metadata_mixin.py +++ b/tests/server/api/v1/test_metadata_mixin.py @@ -89,7 +89,7 @@ async def test_download_file_headers(self, handler, mock_stream): assert handler._headers['Content-Length'] == bytes(str(mock_stream.size), 'latin-1') assert handler._headers['Content-Type'] == bytes(mock_stream.content_type, 'latin-1') - disposition = b'attachment; filename*=UTF-8\'\'peanut-butter.docx' + disposition = b'attachment; filename="peanut-butter.docx"; filename*=UTF-8\'\'peanut-butter.docx' assert handler._headers['Content-Disposition'] == disposition handler.write_stream.assert_awaited_once() @@ -104,19 +104,19 @@ async def test_download_file_headers_no_stream_name(self, handler, mock_stream): assert handler._headers['Content-Length'] == bytes(str(mock_stream.size), 'latin-1') assert handler._headers['Content-Type'] == bytes(mock_stream.content_type, 'latin-1') - disposition = b'attachment; filename*=UTF-8\'\'test_file' + disposition = b'attachment; filename="test_file"; filename*=UTF-8\'\'test_file' assert handler._headers['Content-Disposition'] == disposition handler.write_stream.assert_awaited_once() @pytest.mark.asyncio - @pytest.mark.parametrize("given_arg,expected_name", [ - (['résumé.doc'], b'r%C3%A9sum%C3%A9.doc'), - ([''], b'test_file'), - ([], b'test_file'), + @pytest.mark.parametrize("given_arg,expected_name,filtered_name", [ + (['résumé.doc'], b'r%C3%A9sum%C3%A9.doc', b'resume.doc'), + ([''], b'test_file', b'test_file'), + ([], b'test_file', b'test_file'), ]) async def test_download_file_with_display_name(self, handler, mock_stream, given_arg, - expected_name): + expected_name, filtered_name): handler.provider.download = MockCoroutine(return_value=mock_stream) handler.path = WaterButlerPath('/test_file') @@ -126,7 +126,7 @@ async def test_download_file_with_display_name(self, handler, mock_stream, given assert handler._headers['Content-Length'] == bytes(str(mock_stream.size), 'latin-1') assert handler._headers['Content-Type'] == bytes(mock_stream.content_type, 'latin-1') - disposition = b'attachment; filename*=UTF-8\'\'' + expected_name + disposition = b'attachment; filename="' + filtered_name + b'"; filename*=UTF-8\'\'' + expected_name assert handler._headers['Content-Disposition'] == disposition handler.write_stream.assert_awaited_once() @@ -212,7 +212,7 @@ async def test_download_folder_as_zip(self, handler, mock_stream): await handler.download_folder_as_zip() assert handler._headers['Content-Type'] == bytes('application/zip', 'latin-1') - expected = b'attachment; filename*=UTF-8\'\'test_file.zip' + expected = b'attachment; filename="test_file.zip"; filename*=UTF-8\'\'test_file.zip' assert handler._headers['Content-Disposition'] == expected handler.write_stream.assert_called_once_with(mock_stream) @@ -226,7 +226,7 @@ async def test_download_folder_as_zip_root(self, handler, mock_stream): await handler.download_folder_as_zip() assert handler._headers['Content-Type'] == bytes('application/zip', 'latin-1') - expected = b'attachment; filename*=UTF-8\'\'MockProvider-archive.zip' + expected = b'attachment; filename="MockProvider-archive.zip"; filename*=UTF-8\'\'MockProvider-archive.zip' assert handler._headers['Content-Disposition'] == expected handler.write_stream.assert_called_once_with(mock_stream) diff --git a/waterbutler/core/utils.py b/waterbutler/core/utils.py index bc264be77..1598ac24c 100644 --- a/waterbutler/core/utils.py +++ b/waterbutler/core/utils.py @@ -1,8 +1,10 @@ +import re import json import pytz import asyncio import logging import functools +import unicodedata import dateutil.parser from urllib import parse # from concurrent.futures import ProcessPoolExecutor TODO Get this working @@ -117,6 +119,23 @@ def normalize_datetime(date_string): return parsed_datetime.isoformat() +def strip_for_disposition(filename): + """Convert given filename to a form useable by a non-extended parameter. + + The permitted characters allowed in a non-extended parameter are defined in RFC-2616, Section + 2.2. This is a subset of the ascii character set. This function converts non-ascii characters + to their nearest ascii equivalent or strips them if there is no equivalent. It then replaces + control characters with underscores and escapes blackslashes and double quotes. + + :param str filename: a filename to encode + """ + + nfkd_form = unicodedata.normalize('NFKD', filename) + only_ascii = nfkd_form.encode('ASCII', 'ignore') + no_ctrl = re.sub(r'[\x00-\x1f]', '_', only_ascii.decode('ascii')) + return no_ctrl.replace('\\', '\\\\').replace('"', '\\"') + + def encode_for_disposition(filename): """Convert given filename into utf-8 octets, then percent encode them. @@ -149,8 +168,10 @@ def make_disposition(filename): if not filename: return 'attachment' else: + stripped_filename = strip_for_disposition(filename) encoded_filename = encode_for_disposition(filename) - return 'attachment; filename*=UTF-8\'\'{}'.format(encoded_filename) + return 'attachment; filename="{}"; filename*=UTF-8\'\'{}'.format(stripped_filename, + encoded_filename) class ZipStreamGenerator: