From f953e85cfec845c598781f5b027b48e2f04ea851 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Urba=C5=84ski?= <122983254+mjurbanski-reef@users.noreply.github.com> Date: Mon, 19 Feb 2024 14:39:07 +0100 Subject: [PATCH] allow bigger min_part_size without having to explicitly set recommened_part_size as well (#476) * allow bigger min_part_size without having to explicitly set recommended_part_size as well * fix min_part_size param docstrings --- b2sdk/bucket.py | 56 +++++++++++----------- b2sdk/transfer/emerge/planner/planner.py | 40 +++++++++++----- changelog.d/+min_part_size.added.md | 1 + test/unit/internal/test_emerge_planner.py | 58 +++++++++++++++++++++++ 4 files changed, 117 insertions(+), 38 deletions(-) create mode 100644 changelog.d/+min_part_size.added.md diff --git a/b2sdk/bucket.py b/b2sdk/bucket.py index 36a2ed7e6..6b729f457 100644 --- a/b2sdk/bucket.py +++ b/b2sdk/bucket.py @@ -602,10 +602,10 @@ def upload_local_file( self, local_file, file_name, - content_type=None, + content_type: str | None = None, file_info: dict | None = None, - sha1_sum=None, - min_part_size=None, + sha1_sum: str | None = None, + min_part_size: int | None = None, progress_listener=None, encryption: EncryptionSetting | None = None, file_retention: FileRetentionSetting | None = None, @@ -630,10 +630,10 @@ def upload_local_file( :param str local_file: a path to a file on local disk :param str file_name: a file name of the new B2 file - :param str,None content_type: the MIME type, or ``None`` to accept the default based on file extension of the B2 file name - :param dict,None file_info: a file info to store with the file or ``None`` to not store anything - :param str,None sha1_sum: file SHA1 hash or ``None`` to compute it automatically - :param int min_part_size: a minimum size of a part + :param content_type: the MIME type, or ``None`` to accept the default based on file extension of the B2 file name + :param file_info: a file info to store with the file or ``None`` to not store anything + :param sha1_sum: file SHA1 hash or ``None`` to compute it automatically + :param min_part_size: lower limit of part size for the transfer planner, in bytes :param b2sdk.v2.AbstractProgressListener,None progress_listener: a progress listener object to use, or ``None`` to not report progress :param b2sdk.v2.EncryptionSetting encryption: encryption settings (``None`` if unknown) :param b2sdk.v2.FileRetentionSetting file_retention: file retention setting @@ -770,10 +770,10 @@ def upload_unbound_stream( :param encryption: encryption settings (``None`` if unknown) :param file_retention: file retention setting :param legal_hold: legal hold setting - :param min_part_size: a minimum size of a part :param recommended_upload_part_size: the recommended part size to use for uploading local sources or ``None`` to determine automatically - :param max_part_size: a maximum size of a part + :param min_part_size: lower limit of part size for the transfer planner, in bytes + :param max_part_size: upper limit of part size for the transfer planner, in bytes :param large_file_sha1: SHA-1 hash of the result file or ``None`` if unknown :param buffers_count: desired number of buffers allocated, cannot be smaller than 2 :param buffer_size: size of a single buffer that we pull data to or upload data to B2. If ``None``, @@ -849,7 +849,7 @@ def upload( file_name, content_type=None, file_info=None, - min_part_size=None, + min_part_size: int | None = None, progress_listener=None, encryption: EncryptionSetting | None = None, file_retention: FileRetentionSetting | None = None, @@ -880,7 +880,7 @@ def upload( :param str file_name: the file name of the new B2 file :param str,None content_type: the MIME type, or ``None`` to accept the default based on file extension of the B2 file name :param dict,None file_info: a file info to store with the file or ``None`` to not store anything - :param int,None min_part_size: the smallest part size to use or ``None`` to determine automatically + :param min_part_size: lower limit of part size for the transfer planner, in bytes :param b2sdk.v2.AbstractProgressListener,None progress_listener: a progress listener object to use, or ``None`` to not report progress :param b2sdk.v2.EncryptionSetting encryption: encryption settings (``None`` if unknown) :param b2sdk.v2.FileRetentionSetting file_retention: file retention setting @@ -933,8 +933,8 @@ def create_file( encryption: EncryptionSetting | None = None, file_retention: FileRetentionSetting | None = None, legal_hold: LegalHold | None = None, - min_part_size=None, - max_part_size=None, + min_part_size: int | None = None, + max_part_size: int | None = None, large_file_sha1=None, custom_upload_timestamp: int | None = None, cache_control: str | None = None, @@ -969,8 +969,8 @@ def create_file( :param b2sdk.v2.EncryptionSetting encryption: encryption settings (``None`` if unknown) :param b2sdk.v2.FileRetentionSetting file_retention: file retention setting :param bool legal_hold: legal hold setting - :param int min_part_size: lower limit of part size for the transfer planner, in bytes - :param int max_part_size: upper limit of part size for the transfer planner, in bytes + :param min_part_size: lower limit of part size for the transfer planner, in bytes + :param max_part_size: upper limit of part size for the transfer planner, in bytes :param Sha1HexDigest,None large_file_sha1: SHA-1 hash of the result file or ``None`` if unknown :param int,None custom_upload_timestamp: override object creation date, expressed as a number of milliseconds since epoch :param str,None cache_control: an optional cache control setting. Syntax based on the section 14.9 of RFC 2616. @@ -1021,8 +1021,8 @@ def create_file_stream( encryption: EncryptionSetting | None = None, file_retention: FileRetentionSetting | None = None, legal_hold: LegalHold | None = None, - min_part_size=None, - max_part_size=None, + min_part_size: int | None = None, + max_part_size: int | None = None, large_file_sha1=None, custom_upload_timestamp: int | None = None, cache_control: str | None = None, @@ -1059,8 +1059,8 @@ def create_file_stream( :param b2sdk.v2.EncryptionSetting encryption: encryption settings (``None`` if unknown) :param b2sdk.v2.FileRetentionSetting file_retention: file retention setting :param bool legal_hold: legal hold setting - :param int min_part_size: lower limit of part size for the transfer planner, in bytes - :param int max_part_size: upper limit of part size for the transfer planner, in bytes + :param min_part_size: lower limit of part size for the transfer planner, in bytes + :param max_part_size: upper limit of part size for the transfer planner, in bytes :param Sha1HexDigest,None large_file_sha1: SHA-1 hash of the result file or ``None`` if unknown :param int,None custom_upload_timestamp: override object creation date, expressed as a number of milliseconds since epoch :param str,None cache_control: an optional cache control setting. Syntax based on the section 14.9 of RFC 2616. @@ -1112,8 +1112,8 @@ def _create_file( encryption: EncryptionSetting | None = None, file_retention: FileRetentionSetting | None = None, legal_hold: LegalHold | None = None, - min_part_size=None, - max_part_size=None, + min_part_size: int | None = None, + max_part_size: int | None = None, large_file_sha1=None, cache_control: str | None = None, expires: str | dt.datetime | None = None, @@ -1163,8 +1163,8 @@ def concatenate( encryption: EncryptionSetting | None = None, file_retention: FileRetentionSetting | None = None, legal_hold: LegalHold | None = None, - min_part_size=None, - max_part_size=None, + min_part_size: int | None = None, + max_part_size: int | None = None, large_file_sha1=None, custom_upload_timestamp: int | None = None, cache_control: str | None = None, @@ -1196,8 +1196,8 @@ def concatenate( :param b2sdk.v2.EncryptionSetting encryption: encryption settings (``None`` if unknown) :param b2sdk.v2.FileRetentionSetting file_retention: file retention setting :param bool legal_hold: legal hold setting - :param int min_part_size: lower limit of part size for the transfer planner, in bytes - :param int max_part_size: upper limit of part size for the transfer planner, in bytes + :param min_part_size: lower limit of part size for the transfer planner, in bytes + :param max_part_size: upper limit of part size for the transfer planner, in bytes :param Sha1HexDigest,None large_file_sha1: SHA-1 hash of the result file or ``None`` if unknown :param int,None custom_upload_timestamp: override object creation date, expressed as a number of milliseconds since epoch :param str,None cache_control: an optional cache control setting. Syntax based on the section 14.9 of RFC 2616. @@ -1350,8 +1350,8 @@ def copy( file_retention: FileRetentionSetting | None = None, legal_hold: LegalHold | None = None, cache_control: str | None = None, - min_part_size=None, - max_part_size=None, + min_part_size: int | None = None, + max_part_size: int | None = None, expires: str | dt.datetime | None = None, content_disposition: str | None = None, content_encoding: str | None = None, @@ -1383,6 +1383,8 @@ def copy( :param bool legal_hold: legal hold setting for the new file. :param str,None cache_control: an optional cache control setting. Syntax based on the section 14.9 of RFC 2616. Example string value: 'public, max-age=86400, s-maxage=3600, no-transform'. + :param min_part_size: lower limit of part size for the transfer planner, in bytes + :param max_part_size: upper limit of part size for the transfer planner, in bytes :param str,datetime.datetime,None expires: an optional cache expiration setting. If this argument is a string, its syntax must be based on the section 14.21 of RFC 2616. Example string value: 'Thu, 01 Dec 2050 16:00:00 GMT'. If this argument is a datetime, diff --git a/b2sdk/transfer/emerge/planner/planner.py b/b2sdk/transfer/emerge/planner/planner.py index ab195db34..2b7edfc80 100644 --- a/b2sdk/transfer/emerge/planner/planner.py +++ b/b2sdk/transfer/emerge/planner/planner.py @@ -11,6 +11,7 @@ import hashlib import json +import typing from abc import ABCMeta, abstractmethod from collections import deque from math import ceil @@ -32,6 +33,9 @@ ) from b2sdk.utils import iterator_peek +if typing.TYPE_CHECKING: + from b2sdk.account_info.abstract import AbstractAccountInfo + class UploadBuffer: """ data container used by EmergePlanner for temporary storage of write intents """ @@ -83,18 +87,30 @@ def get_slice(self, start_idx=None, end_idx=None, start_offset=None): return self.__class__(start_offset, buff_slice) +def _filter_out_none(*args): + return (arg for arg in args if arg is not None) + + class EmergePlanner: """ Creates a list of actions required for advanced creation of an object in the cloud from an iterator of write intent objects """ def __init__( self, - min_part_size=None, - recommended_upload_part_size=None, - max_part_size=None, + min_part_size: int | None = None, + recommended_upload_part_size: int | None = None, + max_part_size: int | None = None, ): - self.min_part_size = min_part_size or DEFAULT_MIN_PART_SIZE - self.recommended_upload_part_size = recommended_upload_part_size or DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE - self.max_part_size = max_part_size or DEFAULT_MAX_PART_SIZE + # ensure default values do not break min<=recommended<=max condition, + # while respecting user input and not auto fixing if something was provided explicitly + self.min_part_size = min( + DEFAULT_MIN_PART_SIZE, *_filter_out_none(recommended_upload_part_size, max_part_size) + ) if min_part_size is None else min_part_size + self.recommended_upload_part_size = recommended_upload_part_size or max( + DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE, self.min_part_size + ) + self.max_part_size = max_part_size or max( + DEFAULT_MAX_PART_SIZE, self.recommended_upload_part_size + ) if self.min_part_size > self.recommended_upload_part_size: raise InvalidUserInput( f"min_part_size value ({self.min_part_size}) exceeding recommended_upload_part_size value ({self.recommended_upload_part_size})" @@ -107,17 +123,19 @@ def __init__( @classmethod def from_account_info( cls, - account_info, + account_info: AbstractAccountInfo, min_part_size=None, recommended_upload_part_size=None, max_part_size=None ): if recommended_upload_part_size is None: recommended_upload_part_size = account_info.get_recommended_part_size() - if min_part_size is None and recommended_upload_part_size < DEFAULT_MIN_PART_SIZE: - min_part_size = recommended_upload_part_size - if max_part_size is None and recommended_upload_part_size > DEFAULT_MAX_PART_SIZE: - max_part_size = recommended_upload_part_size + # AccountInfo defaults should not break the min<=recommended<=max condition when + # other params were provided explicitly + if min_part_size is not None: + recommended_upload_part_size = max(recommended_upload_part_size, min_part_size) + if max_part_size is not None: + recommended_upload_part_size = min(recommended_upload_part_size, max_part_size) kwargs = { 'min_part_size': min_part_size, 'recommended_upload_part_size': recommended_upload_part_size, diff --git a/changelog.d/+min_part_size.added.md b/changelog.d/+min_part_size.added.md new file mode 100644 index 000000000..588fbab2c --- /dev/null +++ b/changelog.d/+min_part_size.added.md @@ -0,0 +1 @@ +Allow for `min_part_size` that is greater than default `recommended_part_size` value, without having to explicitly set `recommended_part_size` value. diff --git a/test/unit/internal/test_emerge_planner.py b/test/unit/internal/test_emerge_planner.py index d74ec86df..d5810ba79 100644 --- a/test/unit/internal/test_emerge_planner.py +++ b/test/unit/internal/test_emerge_planner.py @@ -9,7 +9,15 @@ ###################################################################### from __future__ import annotations +from unittest.mock import create_autospec + +import pytest + +from b2sdk.account_info.abstract import AbstractAccountInfo from b2sdk.http_constants import ( + DEFAULT_MAX_PART_SIZE, + DEFAULT_MIN_PART_SIZE, + DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE, GIGABYTE, MEGABYTE, ) @@ -624,3 +632,53 @@ def assert_same_part_definitions(self, emerge_plan, expected_part_defs): self.assertIs(emerge_part_def.copy_source, expected_part_def.copy_source) self.assertEqual(emerge_part_def.relative_offset, expected_part_def.relative_offset) self.assertEqual(emerge_part_def.length, expected_part_def.length) + + +@pytest.fixture +def account_info(): + mock_account_info = create_autospec(AbstractAccountInfo) + mock_account_info.get_recommended_part_size.return_value = 150 * MEGABYTE + return mock_account_info + + +def test_emerge_planner_from_account_info(account_info): + planner = EmergePlanner.from_account_info(account_info=account_info) + assert planner.min_part_size == DEFAULT_MIN_PART_SIZE + assert planner.recommended_upload_part_size == 150 * MEGABYTE + assert planner.max_part_size == DEFAULT_MAX_PART_SIZE + + +@pytest.mark.parametrize( + 'min_part_size, recommended_upload_part_size, max_part_size, expected', + [ + (100 * MEGABYTE, None, None, { + 'min_part_size': 100 * MEGABYTE + }), + ( + GIGABYTE, GIGABYTE, None, { + 'min_part_size': GIGABYTE, + 'recommended_upload_part_size': GIGABYTE + } + ), + ( + None, None, DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE // 2, { + 'recommended_upload_part_size': DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE // 2, + 'max_part_size': DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE // 2 + } + ), + ], +) +def test_emerge_planner_from_account_info__with_explicitly_set_params( + min_part_size, recommended_upload_part_size, max_part_size, expected, account_info +): + planner = EmergePlanner.from_account_info( + account_info=account_info, + min_part_size=min_part_size, + recommended_upload_part_size=recommended_upload_part_size, + max_part_size=max_part_size + ) + assert planner.min_part_size == expected.get('min_part_size', DEFAULT_MIN_PART_SIZE) + assert planner.recommended_upload_part_size == expected.get( + 'recommended_upload_part_size', 150 * MEGABYTE + ) + assert planner.max_part_size == expected.get('max_part_size', DEFAULT_MAX_PART_SIZE)