From e8a2c93773b26a6b8b735c9a7ddfbde1f2cc65ab Mon Sep 17 00:00:00 2001 From: Vadim Date: Wed, 11 Jun 2025 15:49:27 -0400 Subject: [PATCH] =?UTF-8?q?Revert=20"PILOT-7878:=20CLI=20download=20of=20l?= =?UTF-8?q?arge=20folder=20on=20bd2data=20resulting=20in=20glob=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit bc0772dc6f75cb11c30307ebc0e0d057068780f9. --- .../file_download/download_client.py | 113 +++++++----------- app/services/output_manager/error_handler.py | 1 - docs/compatible_version.ndjson | 1 - pyproject.toml | 2 +- .../test_file_download_client.py | 45 ------- 5 files changed, 45 insertions(+), 117 deletions(-) diff --git a/app/services/file_manager/file_download/download_client.py b/app/services/file_manager/file_download/download_client.py index 6b3e546e..ecb451f2 100644 --- a/app/services/file_manager/file_download/download_client.py +++ b/app/services/file_manager/file_download/download_client.py @@ -244,77 +244,52 @@ def check_download_preparing_status(self): executor.shutdown(wait=False) @require_valid_token() - def download_file(self, url, local_filename, download_mode='single'): # noqa: C901 + def download_file(self, url, local_filename, download_mode='single'): logger.info('start downloading...') filename = local_filename.split('/')[-1] - max_retries = 3 - retry_count = 0 - - while retry_count <= max_retries: - try: - with httpx.stream('GET', url, timeout=httpx.Timeout(30.0, read=300.0)) as r: - r.raise_for_status() - - if r.headers.get('Content-Type') == 'application/zip' or download_mode == 'batch': - size = r.headers.get('Content-length') - self.total_size = int(size) if size else self.total_size - - stream_iter = r.iter_bytes(chunk_size=1024) - if self.total_size: - downloaded_size = 0 - with open(local_filename, 'wb') as file, tqdm( - desc=f'Downloading {filename}', - total=self.total_size, - unit='iB', - unit_scale=True, - unit_divisor=1024, - bar_format='{desc} |{bar:30} {percentage:3.0f}% {remaining}', - ) as bar: - try: - for data in stream_iter: - size = file.write(data) - bar.update(size) - downloaded_size += len(data) - except httpx.ReadTimeout: - # Re-raise so it's caught by the outer exception handler - raise - - if downloaded_size != self.total_size: - SrvErrorHandler.customized_handle( - ECustomizedError.DOWNLOAD_SIZE_MISMATCH, - if_exit=self.interactive, - value=(self.total_size, downloaded_size), - ) - else: - with open(local_filename, 'wb') as file: - part = 0 - for data in r.iter_bytes(chunk_size=1024): - size = file.write(data) - progress = '.' * part - click.echo(f'Downloading{progress}\r', nl=False) - if part > 5: - part = 0 - else: - part += 1 - logger.info('Download complete') - - return local_filename - - except httpx.ReadTimeout: - retry_count += 1 - if retry_count > max_retries: - logger.error(f'Download failed after {max_retries} attempts due to read timeout') - SrvErrorHandler.customized_handle( - ECustomizedError.DOWNLOAD_TIMEOUT, if_exit=self.interactive, value=filename - ) - return None - wait_time = 2**retry_count - logger.warning(f'Read timeout encountered, retrying in {wait_time}s ({retry_count}/{max_retries})') - time.sleep(wait_time) - - except Exception as e: - logger.error(f'Error downloading: {e}') - return None + try: + with httpx.stream('GET', url) as r: + r.raise_for_status() + if r.headers.get('Content-Type') == 'application/zip' or download_mode == 'batch': + size = r.headers.get('Content-length') + self.total_size = int(size) if size else self.total_size + if self.total_size: + downloaded_size = 0 + with open(local_filename, 'wb') as file, tqdm( + desc='Downloading {}'.format(filename), + total=self.total_size, + unit='iB', + unit_scale=True, + unit_divisor=1024, + bar_format='{desc} |{bar:30} {percentage:3.0f}% {remaining}', + ) as bar: + for data in r.iter_bytes(chunk_size=1024): + size = file.write(data) + bar.update(size) + downloaded_size += len(data) + + # integrity check for downloaded file + if downloaded_size != self.total_size: + SrvErrorHandler.customized_handle( + ECustomizedError.DOWNLOAD_SIZE_MISMATCH, + if_exit=self.interactive, + value=(self.total_size, downloaded_size), + ) + else: + with open(local_filename, 'wb') as file: + part = 0 + for data in r.iter_bytes(chunk_size=1024): + size = file.write(data) + progress = '.' * part + click.echo(f'Downloading{progress}\r', nl=False) + if part > 5: + part = 0 + else: + part += 1 + logger.info('Download complete') + except Exception as e: + logger.error(f'Error downloading: {e}') + return local_filename @require_valid_token() def group_file_geid_by_project(self, file_info): diff --git a/app/services/output_manager/error_handler.py b/app/services/output_manager/error_handler.py index f9f9da8a..07e86f96 100644 --- a/app/services/output_manager/error_handler.py +++ b/app/services/output_manager/error_handler.py @@ -43,7 +43,6 @@ class ECustomizedError(enum.Enum): UPLOAD_FAIL = 'UPLOAD_FAIL' UPLOAD_SKIP_DUPLICATION = 'UPLOAD_SKIP_DUPLICATION' UPLOAD_TIMEOUT = 'UPLOAD_TIMEOUT' - DOWNLOAD_TIMEOUT = 'DOWNLOAD_TIMEOUT' # the error when multipart upload id is not exist UPLOAD_ID_NOT_EXIST = 'UPLOAD_ID_NOT_EXIST' MANIFEST_OF_FOLDER_FILE_EXIST = 'MANIFEST_OF_FOLDER_FILE_EXIST' diff --git a/docs/compatible_version.ndjson b/docs/compatible_version.ndjson index 08339c06..c39b2f33 100644 --- a/docs/compatible_version.ndjson +++ b/docs/compatible_version.ndjson @@ -11,4 +11,3 @@ {"Cli Version": "3.19.2", "Pilot Release Version": "2.15.2", "Compatible Version": "2.15.2"} {"Cli Version": "3.19.3", "Pilot Release Version": "2.15.2", "Compatible Version": "2.15.2"} {"Cli Version": "3.19.4", "Pilot Release Version": "2.15.2", "Compatible Version": "2.15.2"} -{"Cli Version": "3.19.5", "Pilot Release Version": "2.15.2", "Compatible Version": "2.15.2"} diff --git a/pyproject.toml b/pyproject.toml index 24ed0290..48bea3cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "app" -version = "3.19.5" +version = "3.19.4" description = "This service is designed to support pilot platform" authors = ["Indoc Systems"] diff --git a/tests/app/services/file_manager/file_download/test_file_download_client.py b/tests/app/services/file_manager/file_download/test_file_download_client.py index e7e650c9..d3475715 100644 --- a/tests/app/services/file_manager/file_download/test_file_download_client.py +++ b/tests/app/services/file_manager/file_download/test_file_download_client.py @@ -6,7 +6,6 @@ import time import click -import httpx import jwt import pytest from pytest_httpx import IteratorStream @@ -316,47 +315,3 @@ def mock_long_running_task(): assert result == EFileStatus.FAILED assert test_client.check_point is True error_handler_mock.assert_called_once_with(ECustomizedError.DOWNLOAD_STATUS_CHECK_FAILED, if_exit=True) - - -def test_download_file_read_timeout_retry_and_failure(mocker): - """Test that file download properly handles ReadTimeout with retries and eventual failure.""" - test_client = SrvFileDownload(ItemZone.GREENROOM.value, True) - test_client.total_size = 1000 # Set some total size for the download - - mocker.patch( - 'app.services.user_authentication.token_manager.SrvTokenManager.decode_access_token', - return_value=decoded_token(), - ) - - logger_mock = mocker.patch('app.services.logger_services.log_functions.warning') - error_logger_mock = mocker.patch('app.services.logger_services.log_functions.error') - - error_handler_mock = mocker.patch( - 'app.services.output_manager.error_handler.SrvErrorHandler.customized_handle', - ) - - mocker.patch('time.sleep') - - mock_response = mocker.MagicMock() - mock_response.raise_for_status.return_value = None - mock_response.headers = {'Content-Type': 'application/zip', 'Content-length': '1000'} - mock_response.__enter__ = mocker.MagicMock(return_value=mock_response) - mock_response.__exit__ = mocker.MagicMock(return_value=None) - - def iter_bytes_side_effect(*args, **kwargs): - raise httpx.ReadTimeout('Simulated timeout during download', request=mocker.MagicMock()) - - mock_response.iter_bytes.side_effect = iter_bytes_side_effect - - stream_mock = mocker.patch('httpx.stream', return_value=mock_response) - - result = test_client.download_file('http://test-url.com', 'test_file.zip', download_mode='single') - - assert result is None - assert stream_mock.call_count == 4 - - assert logger_mock.call_count == 3 - - error_logger_mock.assert_called_with('Download failed after 3 attempts due to read timeout') - - error_handler_mock.assert_called_once_with(ECustomizedError.DOWNLOAD_TIMEOUT, if_exit=True, value='test_file.zip')