diff --git a/doc/changelog.rst b/doc/changelog.rst index 91ef9add..c5082234 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -5,6 +5,7 @@ What's New * Updated certificate bundle. * Fixed TypeError crash on bad Meta Refresh HTML element. +* Fixed unable to fetch FTP files with spaces and other special characters. * Added ``--no-cache``. * Added ``--report-speed``. diff --git a/wpull/app_test.py b/wpull/app_test.py index 53d67277..0561c074 100644 --- a/wpull/app_test.py +++ b/wpull/app_test.py @@ -1525,7 +1525,7 @@ def test_basic(self): def test_login(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ - self.get_url('/example.txt'), + self.get_url('/example (copy).txt'), '--user', 'smaug', '--password', 'gold1', ]) @@ -1542,7 +1542,7 @@ def test_login(self): def test_login_fail(self): arg_parser = AppArgumentParser() args = arg_parser.parse_args([ - self.get_url('/example.txt'), + self.get_url('/example (copy).txt'), '--user', 'smaug', '--password', 'hunter2', '--tries', '1' @@ -1582,7 +1582,7 @@ def test_args(self): print(os.listdir('.')) self.assertTrue(os.path.exists('.listing')) - self.assertTrue(os.path.exists('example.txt')) + self.assertTrue(os.path.exists('example (copy).txt')) self.assertTrue(os.path.exists('readme.txt')) self.assertFalse(os.path.islink('readme.txt')) self.assertTrue(os.path.exists('example1/.listing')) @@ -1618,7 +1618,7 @@ def test_retr_symlinks_off(self): print(os.listdir('.')) - self.assertTrue(os.path.exists('example.txt')) + self.assertTrue(os.path.exists('example (copy).txt')) self.assertTrue(os.path.exists('readme.txt')) self.assertTrue(os.path.islink('readme.txt')) diff --git a/wpull/ftp/client.py b/wpull/ftp/client.py index 0b611b35..97293122 100644 --- a/wpull/ftp/client.py +++ b/wpull/ftp/client.py @@ -125,7 +125,7 @@ def fetch(self, request): yield From(self._open_data_stream()) - command = Command('RETR', request.url_info.path) + command = Command('RETR', request.file_path) yield From(self._begin_stream(command)) @@ -147,8 +147,8 @@ def fetch_file_listing(self, request): yield From(self._prepare_fetch(request, response)) yield From(self._open_data_stream()) - mlsd_command = Command('MLSD', self._request.url_info.path) - list_command = Command('LIST', self._request.url_info.path) + mlsd_command = Command('MLSD', self._request.file_path) + list_command = Command('LIST', self._request.file_path) try: yield From(self._begin_stream(mlsd_command)) @@ -343,7 +343,7 @@ def _fetch_size(self, request): Coroutine. ''' try: - size = yield From(self._commander.size(request.url_info.path)) + size = yield From(self._commander.size(request.file_path)) raise Return(size) except FTPServerError: return diff --git a/wpull/ftp/client_test.py b/wpull/ftp/client_test.py index a17acc3b..f0ca95eb 100644 --- a/wpull/ftp/client_test.py +++ b/wpull/ftp/client_test.py @@ -25,7 +25,7 @@ def test_fetch_file(self): with client.session() as session: response = yield From( - session.fetch(Request(self.get_url('/example.txt'))) + session.fetch(Request(self.get_url('/example (copy).txt'))) ) yield From(session.read_content(file)) @@ -67,7 +67,7 @@ def test_fetch_file_restart(self): file = io.BytesIO() with client.session() as session: - request = Request(self.get_url('/example.txt')) + request = Request(self.get_url('/example (copy).txt')) request.set_continue(10) response = yield From(session.fetch(request)) self.assertEqual(10, response.restart_value) @@ -84,7 +84,7 @@ def test_fetch_file_restart_not_supported(self): file = io.BytesIO() with client.session() as session: - request = Request(self.get_url('/example.txt')) + request = Request(self.get_url('/example (copy).txt')) request.set_continue(99999) # Magic value in the test server response = yield From(session.fetch(request)) self.assertFalse(response.restart_value) @@ -110,7 +110,7 @@ def test_fetch_listing(self): self.assertEqual('junk', response.files[0].name) self.assertEqual('example1', response.files[1].name) self.assertEqual('example2', response.files[2].name) - self.assertEqual('example.txt', response.files[3].name) + self.assertEqual('example (copy).txt', response.files[3].name) self.assertEqual('readme.txt', response.files[4].name) @wpull.testing.async.async_test(timeout=DEFAULT_TIMEOUT) @@ -131,5 +131,5 @@ def override_func(): with self.assertRaises(ProtocolError): yield From( - session.fetch(Request(self.get_url('/example.txt'))) + session.fetch(Request(self.get_url('/example (copy).txt'))) ) diff --git a/wpull/ftp/request.py b/wpull/ftp/request.py index 60368a1e..32dbea23 100644 --- a/wpull/ftp/request.py +++ b/wpull/ftp/request.py @@ -1,5 +1,6 @@ '''FTP conversation classes''' import re +import urllib.parse from wpull.abstract.request import SerializableMixin, DictableMixin, \ URLPropertyMixin, ProtocolResponseMixin @@ -120,6 +121,7 @@ class Request(URLPropertyMixin): username (str): Username for login. password (str): Password for login. restart_value (int): Optional value for ``REST`` command. + file_path (str): Path of the file. ''' def __init__(self, url): super().__init__() @@ -130,6 +132,10 @@ def __init__(self, url): self.password = None self.restart_value = None + @property + def file_path(self): + return urllib.parse.unquote(self.url_info.path) + def to_dict(self): return { 'protocol': 'ftp', @@ -138,6 +144,7 @@ def to_dict(self): 'username': self.username, 'password': self.password, 'restart_value': self.restart_value, + 'file_path': self.file_path, } def set_continue(self, offset): diff --git a/wpull/ftp/stream_test.py b/wpull/ftp/stream_test.py index 7648b653..d698f4fa 100644 --- a/wpull/ftp/stream_test.py +++ b/wpull/ftp/stream_test.py @@ -49,7 +49,7 @@ def log_cb(data_type, data): data_stream = DataStream(data_connection) - yield From(control_stream.write_command(Command('RETR', 'example.txt'))) + yield From(control_stream.write_command(Command('RETR', 'example (copy).txt'))) reply = yield From(control_stream.read_reply()) self.assertEqual(150, reply.code) diff --git a/wpull/path.py b/wpull/path.py index 8a9091a1..9acda66b 100644 --- a/wpull/path.py +++ b/wpull/path.py @@ -72,6 +72,9 @@ def get_filename(self, url_info): alt_char=alt_char )) + if url_info.scheme == 'ftp': + parts = [urllib.parse.unquote(part) for part in parts] + parts = [self.safe_filename(part) for part in parts] return os.path.join(self._root, *parts) diff --git a/wpull/testing/ftp.py b/wpull/testing/ftp.py index dd5bac92..baf62bdf 100644 --- a/wpull/testing/ftp.py +++ b/wpull/testing/ftp.py @@ -42,14 +42,14 @@ def __init__(self, reader, writer): self.routes = { '/': ('dir', - b'junk\nexample1\nexample2\nexample.txt\nreadme.txt\n', + b'junk\nexample1\nexample2\nexample (copy).txt\nreadme.txt\n', ('drw-r--r-- 1 smaug smaug 0 Apr 01 00:00 junk\r\n' 'drw-r--r-- 1 smaug smaug 0 Apr 01 00:00 example1\r\n' 'drw-r--r-- 1 smaug smaug 0 Apr 01 00:00 example2\r\n' - '-rw-r--r-- 1 smaug smaug 42 Apr 01 00:00 example.txt\r\n' - 'lrwxrwxrwx 1 smaug smaug 4 Apr 01 00:00 readme.txt -> example.txt\r\n' + '-rw-r--r-- 1 smaug smaug 42 Apr 01 00:00 example (copy).txt\r\n' + 'lrwxrwxrwx 1 smaug smaug 4 Apr 01 00:00 readme.txt -> example (copy).txt\r\n' ).encode('utf-8')), - '/example.txt': + '/example (copy).txt': ('file', 'The real treasure is in Smaug’s heart 💗.\n'.encode('utf-8')), '/readme.txt': diff --git a/wpull/writer_test.py b/wpull/writer_test.py index b15f1c12..470abf1b 100644 --- a/wpull/writer_test.py +++ b/wpull/writer_test.py @@ -276,11 +276,11 @@ class TestWriterFTPApp(FTPTestCase): @wpull.testing.async.async_test(timeout=DEFAULT_TIMEOUT) def test_file_continue(self): arg_parser = AppArgumentParser() - args = arg_parser.parse_args([self.get_url('/example.txt'), + args = arg_parser.parse_args([self.get_url('/example (copy).txt'), '--continue', '--debug']) with cd_tempdir() as temp_dir: - filename = os.path.join(temp_dir, 'example.txt') + filename = os.path.join(temp_dir, 'example (copy).txt') with open(filename, 'wb') as out_file: out_file.write(b'The')