Skip to content

Commit

Permalink
allow bigger min_part_size without having to explicitly set recommene…
Browse files Browse the repository at this point in the history
…d_part_size as well (#476)

* allow bigger min_part_size without having to explicitly set recommended_part_size as well

* fix min_part_size param docstrings
  • Loading branch information
mjurbanski-reef committed Feb 19, 2024
1 parent d22f37c commit f953e85
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 38 deletions.
56 changes: 29 additions & 27 deletions b2sdk/bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,10 +602,10 @@ def upload_local_file(
self,
local_file,
file_name,
content_type=None,
content_type: str | None = None,
file_info: dict | None = None,
sha1_sum=None,
min_part_size=None,
sha1_sum: str | None = None,
min_part_size: int | None = None,
progress_listener=None,
encryption: EncryptionSetting | None = None,
file_retention: FileRetentionSetting | None = None,
Expand All @@ -630,10 +630,10 @@ def upload_local_file(
:param str local_file: a path to a file on local disk
:param str file_name: a file name of the new B2 file
:param str,None content_type: the MIME type, or ``None`` to accept the default based on file extension of the B2 file name
:param dict,None file_info: a file info to store with the file or ``None`` to not store anything
:param str,None sha1_sum: file SHA1 hash or ``None`` to compute it automatically
:param int min_part_size: a minimum size of a part
:param content_type: the MIME type, or ``None`` to accept the default based on file extension of the B2 file name
:param file_info: a file info to store with the file or ``None`` to not store anything
:param sha1_sum: file SHA1 hash or ``None`` to compute it automatically
:param min_part_size: lower limit of part size for the transfer planner, in bytes
:param b2sdk.v2.AbstractProgressListener,None progress_listener: a progress listener object to use, or ``None`` to not report progress
:param b2sdk.v2.EncryptionSetting encryption: encryption settings (``None`` if unknown)
:param b2sdk.v2.FileRetentionSetting file_retention: file retention setting
Expand Down Expand Up @@ -770,10 +770,10 @@ def upload_unbound_stream(
:param encryption: encryption settings (``None`` if unknown)
:param file_retention: file retention setting
:param legal_hold: legal hold setting
:param min_part_size: a minimum size of a part
:param recommended_upload_part_size: the recommended part size to use for uploading local sources
or ``None`` to determine automatically
:param max_part_size: a maximum size of a part
:param min_part_size: lower limit of part size for the transfer planner, in bytes
:param max_part_size: upper limit of part size for the transfer planner, in bytes
:param large_file_sha1: SHA-1 hash of the result file or ``None`` if unknown
:param buffers_count: desired number of buffers allocated, cannot be smaller than 2
:param buffer_size: size of a single buffer that we pull data to or upload data to B2. If ``None``,
Expand Down Expand Up @@ -849,7 +849,7 @@ def upload(
file_name,
content_type=None,
file_info=None,
min_part_size=None,
min_part_size: int | None = None,
progress_listener=None,
encryption: EncryptionSetting | None = None,
file_retention: FileRetentionSetting | None = None,
Expand Down Expand Up @@ -880,7 +880,7 @@ def upload(
:param str file_name: the file name of the new B2 file
:param str,None content_type: the MIME type, or ``None`` to accept the default based on file extension of the B2 file name
:param dict,None file_info: a file info to store with the file or ``None`` to not store anything
:param int,None min_part_size: the smallest part size to use or ``None`` to determine automatically
:param min_part_size: lower limit of part size for the transfer planner, in bytes
:param b2sdk.v2.AbstractProgressListener,None progress_listener: a progress listener object to use, or ``None`` to not report progress
:param b2sdk.v2.EncryptionSetting encryption: encryption settings (``None`` if unknown)
:param b2sdk.v2.FileRetentionSetting file_retention: file retention setting
Expand Down Expand Up @@ -933,8 +933,8 @@ def create_file(
encryption: EncryptionSetting | None = None,
file_retention: FileRetentionSetting | None = None,
legal_hold: LegalHold | None = None,
min_part_size=None,
max_part_size=None,
min_part_size: int | None = None,
max_part_size: int | None = None,
large_file_sha1=None,
custom_upload_timestamp: int | None = None,
cache_control: str | None = None,
Expand Down Expand Up @@ -969,8 +969,8 @@ def create_file(
:param b2sdk.v2.EncryptionSetting encryption: encryption settings (``None`` if unknown)
:param b2sdk.v2.FileRetentionSetting file_retention: file retention setting
:param bool legal_hold: legal hold setting
:param int min_part_size: lower limit of part size for the transfer planner, in bytes
:param int max_part_size: upper limit of part size for the transfer planner, in bytes
:param min_part_size: lower limit of part size for the transfer planner, in bytes
:param max_part_size: upper limit of part size for the transfer planner, in bytes
:param Sha1HexDigest,None large_file_sha1: SHA-1 hash of the result file or ``None`` if unknown
:param int,None custom_upload_timestamp: override object creation date, expressed as a number of milliseconds since epoch
:param str,None cache_control: an optional cache control setting. Syntax based on the section 14.9 of RFC 2616.
Expand Down Expand Up @@ -1021,8 +1021,8 @@ def create_file_stream(
encryption: EncryptionSetting | None = None,
file_retention: FileRetentionSetting | None = None,
legal_hold: LegalHold | None = None,
min_part_size=None,
max_part_size=None,
min_part_size: int | None = None,
max_part_size: int | None = None,
large_file_sha1=None,
custom_upload_timestamp: int | None = None,
cache_control: str | None = None,
Expand Down Expand Up @@ -1059,8 +1059,8 @@ def create_file_stream(
:param b2sdk.v2.EncryptionSetting encryption: encryption settings (``None`` if unknown)
:param b2sdk.v2.FileRetentionSetting file_retention: file retention setting
:param bool legal_hold: legal hold setting
:param int min_part_size: lower limit of part size for the transfer planner, in bytes
:param int max_part_size: upper limit of part size for the transfer planner, in bytes
:param min_part_size: lower limit of part size for the transfer planner, in bytes
:param max_part_size: upper limit of part size for the transfer planner, in bytes
:param Sha1HexDigest,None large_file_sha1: SHA-1 hash of the result file or ``None`` if unknown
:param int,None custom_upload_timestamp: override object creation date, expressed as a number of milliseconds since epoch
:param str,None cache_control: an optional cache control setting. Syntax based on the section 14.9 of RFC 2616.
Expand Down Expand Up @@ -1112,8 +1112,8 @@ def _create_file(
encryption: EncryptionSetting | None = None,
file_retention: FileRetentionSetting | None = None,
legal_hold: LegalHold | None = None,
min_part_size=None,
max_part_size=None,
min_part_size: int | None = None,
max_part_size: int | None = None,
large_file_sha1=None,
cache_control: str | None = None,
expires: str | dt.datetime | None = None,
Expand Down Expand Up @@ -1163,8 +1163,8 @@ def concatenate(
encryption: EncryptionSetting | None = None,
file_retention: FileRetentionSetting | None = None,
legal_hold: LegalHold | None = None,
min_part_size=None,
max_part_size=None,
min_part_size: int | None = None,
max_part_size: int | None = None,
large_file_sha1=None,
custom_upload_timestamp: int | None = None,
cache_control: str | None = None,
Expand Down Expand Up @@ -1196,8 +1196,8 @@ def concatenate(
:param b2sdk.v2.EncryptionSetting encryption: encryption settings (``None`` if unknown)
:param b2sdk.v2.FileRetentionSetting file_retention: file retention setting
:param bool legal_hold: legal hold setting
:param int min_part_size: lower limit of part size for the transfer planner, in bytes
:param int max_part_size: upper limit of part size for the transfer planner, in bytes
:param min_part_size: lower limit of part size for the transfer planner, in bytes
:param max_part_size: upper limit of part size for the transfer planner, in bytes
:param Sha1HexDigest,None large_file_sha1: SHA-1 hash of the result file or ``None`` if unknown
:param int,None custom_upload_timestamp: override object creation date, expressed as a number of milliseconds since epoch
:param str,None cache_control: an optional cache control setting. Syntax based on the section 14.9 of RFC 2616.
Expand Down Expand Up @@ -1350,8 +1350,8 @@ def copy(
file_retention: FileRetentionSetting | None = None,
legal_hold: LegalHold | None = None,
cache_control: str | None = None,
min_part_size=None,
max_part_size=None,
min_part_size: int | None = None,
max_part_size: int | None = None,
expires: str | dt.datetime | None = None,
content_disposition: str | None = None,
content_encoding: str | None = None,
Expand Down Expand Up @@ -1383,6 +1383,8 @@ def copy(
:param bool legal_hold: legal hold setting for the new file.
:param str,None cache_control: an optional cache control setting. Syntax based on the section 14.9 of RFC 2616.
Example string value: 'public, max-age=86400, s-maxage=3600, no-transform'.
:param min_part_size: lower limit of part size for the transfer planner, in bytes
:param max_part_size: upper limit of part size for the transfer planner, in bytes
:param str,datetime.datetime,None expires: an optional cache expiration setting.
If this argument is a string, its syntax must be based on the section 14.21 of RFC 2616.
Example string value: 'Thu, 01 Dec 2050 16:00:00 GMT'. If this argument is a datetime,
Expand Down
40 changes: 29 additions & 11 deletions b2sdk/transfer/emerge/planner/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import hashlib
import json
import typing
from abc import ABCMeta, abstractmethod
from collections import deque
from math import ceil
Expand All @@ -32,6 +33,9 @@
)
from b2sdk.utils import iterator_peek

if typing.TYPE_CHECKING:
from b2sdk.account_info.abstract import AbstractAccountInfo


class UploadBuffer:
""" data container used by EmergePlanner for temporary storage of write intents """
Expand Down Expand Up @@ -83,18 +87,30 @@ def get_slice(self, start_idx=None, end_idx=None, start_offset=None):
return self.__class__(start_offset, buff_slice)


def _filter_out_none(*args):
return (arg for arg in args if arg is not None)


class EmergePlanner:
""" Creates a list of actions required for advanced creation of an object in the cloud from an iterator of write intent objects """

def __init__(
self,
min_part_size=None,
recommended_upload_part_size=None,
max_part_size=None,
min_part_size: int | None = None,
recommended_upload_part_size: int | None = None,
max_part_size: int | None = None,
):
self.min_part_size = min_part_size or DEFAULT_MIN_PART_SIZE
self.recommended_upload_part_size = recommended_upload_part_size or DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE
self.max_part_size = max_part_size or DEFAULT_MAX_PART_SIZE
# ensure default values do not break min<=recommended<=max condition,
# while respecting user input and not auto fixing if something was provided explicitly
self.min_part_size = min(
DEFAULT_MIN_PART_SIZE, *_filter_out_none(recommended_upload_part_size, max_part_size)
) if min_part_size is None else min_part_size
self.recommended_upload_part_size = recommended_upload_part_size or max(
DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE, self.min_part_size
)
self.max_part_size = max_part_size or max(
DEFAULT_MAX_PART_SIZE, self.recommended_upload_part_size
)
if self.min_part_size > self.recommended_upload_part_size:
raise InvalidUserInput(
f"min_part_size value ({self.min_part_size}) exceeding recommended_upload_part_size value ({self.recommended_upload_part_size})"
Expand All @@ -107,17 +123,19 @@ def __init__(
@classmethod
def from_account_info(
cls,
account_info,
account_info: AbstractAccountInfo,
min_part_size=None,
recommended_upload_part_size=None,
max_part_size=None
):
if recommended_upload_part_size is None:
recommended_upload_part_size = account_info.get_recommended_part_size()
if min_part_size is None and recommended_upload_part_size < DEFAULT_MIN_PART_SIZE:
min_part_size = recommended_upload_part_size
if max_part_size is None and recommended_upload_part_size > DEFAULT_MAX_PART_SIZE:
max_part_size = recommended_upload_part_size
# AccountInfo defaults should not break the min<=recommended<=max condition when
# other params were provided explicitly
if min_part_size is not None:
recommended_upload_part_size = max(recommended_upload_part_size, min_part_size)
if max_part_size is not None:
recommended_upload_part_size = min(recommended_upload_part_size, max_part_size)
kwargs = {
'min_part_size': min_part_size,
'recommended_upload_part_size': recommended_upload_part_size,
Expand Down
1 change: 1 addition & 0 deletions changelog.d/+min_part_size.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Allow for `min_part_size` that is greater than default `recommended_part_size` value, without having to explicitly set `recommended_part_size` value.
58 changes: 58 additions & 0 deletions test/unit/internal/test_emerge_planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@
######################################################################
from __future__ import annotations

from unittest.mock import create_autospec

import pytest

from b2sdk.account_info.abstract import AbstractAccountInfo
from b2sdk.http_constants import (
DEFAULT_MAX_PART_SIZE,
DEFAULT_MIN_PART_SIZE,
DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE,
GIGABYTE,
MEGABYTE,
)
Expand Down Expand Up @@ -624,3 +632,53 @@ def assert_same_part_definitions(self, emerge_plan, expected_part_defs):
self.assertIs(emerge_part_def.copy_source, expected_part_def.copy_source)
self.assertEqual(emerge_part_def.relative_offset, expected_part_def.relative_offset)
self.assertEqual(emerge_part_def.length, expected_part_def.length)


@pytest.fixture
def account_info():
mock_account_info = create_autospec(AbstractAccountInfo)
mock_account_info.get_recommended_part_size.return_value = 150 * MEGABYTE
return mock_account_info


def test_emerge_planner_from_account_info(account_info):
planner = EmergePlanner.from_account_info(account_info=account_info)
assert planner.min_part_size == DEFAULT_MIN_PART_SIZE
assert planner.recommended_upload_part_size == 150 * MEGABYTE
assert planner.max_part_size == DEFAULT_MAX_PART_SIZE


@pytest.mark.parametrize(
'min_part_size, recommended_upload_part_size, max_part_size, expected',
[
(100 * MEGABYTE, None, None, {
'min_part_size': 100 * MEGABYTE
}),
(
GIGABYTE, GIGABYTE, None, {
'min_part_size': GIGABYTE,
'recommended_upload_part_size': GIGABYTE
}
),
(
None, None, DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE // 2, {
'recommended_upload_part_size': DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE // 2,
'max_part_size': DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE // 2
}
),
],
)
def test_emerge_planner_from_account_info__with_explicitly_set_params(
min_part_size, recommended_upload_part_size, max_part_size, expected, account_info
):
planner = EmergePlanner.from_account_info(
account_info=account_info,
min_part_size=min_part_size,
recommended_upload_part_size=recommended_upload_part_size,
max_part_size=max_part_size
)
assert planner.min_part_size == expected.get('min_part_size', DEFAULT_MIN_PART_SIZE)
assert planner.recommended_upload_part_size == expected.get(
'recommended_upload_part_size', 150 * MEGABYTE
)
assert planner.max_part_size == expected.get('max_part_size', DEFAULT_MAX_PART_SIZE)

0 comments on commit f953e85

Please sign in to comment.