Skip to content

Commit

Permalink
Merge pull request #495 from reef-technologies/fix_ls_file
Browse files Browse the repository at this point in the history
Add `folder_to_list_can_be_a_file` parameter to `b2sdk.v2.Bucket.ls`
  • Loading branch information
mjurbanski-reef committed May 15, 2024
2 parents 9fbe73c + bdafd30 commit 9c84727
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 5 deletions.
27 changes: 22 additions & 5 deletions b2sdk/_internal/bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import datetime as dt
import fnmatch
import itertools
import logging
import pathlib
from contextlib import suppress
Expand Down Expand Up @@ -334,7 +335,7 @@ def list_parts(self, file_id, start_part_number=None, batch_size=None):

def list_file_versions(
self, file_name: str, fetch_count: int | None = LIST_FILE_NAMES_MAX_LIMIT
):
) -> Iterable[FileVersion]:
"""
Lists all of the versions for a single file.
Expand Down Expand Up @@ -366,13 +367,13 @@ def list_file_versions(

def ls(
self,
folder_to_list: str = '',
path: str = '',
latest_only: bool = True,
recursive: bool = False,
fetch_count: int | None = LIST_FILE_NAMES_MAX_LIMIT,
with_wildcard: bool = False,
filters: Sequence[Filter] = (),
):
) -> Iterable[tuple[FileVersion, str]]:
"""
Pretend that folders exist and yields the information about the files in a folder.
Expand All @@ -384,8 +385,10 @@ def ls(
When the `recursive` flag is set, lists all of the files in the given
folder, and all of its sub-folders.
:param folder_to_list: the name of the folder to list; must not start with "/".
Empty string means top-level folder
:param path: Path to list.
To reduce the number of API calls, if path points to a folder, it should end with "/".
Must not start with "/".
Empty string means top-level folder.
:param latest_only: when ``False`` returns info about all versions of a file,
when ``True``, just returns info about the most recent versions
:param recursive: if ``True``, list folders recursively
Expand All @@ -404,6 +407,20 @@ def ls(
if with_wildcard and not recursive:
raise ValueError('with_wildcard requires recursive to be turned on as well')

# check if path points to an object instead of a folder
if path and not with_wildcard and not path.endswith('/'):
file_versions = self.list_file_versions(path, 1 if latest_only else fetch_count)
if latest_only:
file_versions = itertools.islice(file_versions, 1)
path_pointed_to_file = False
for file_version in file_versions:
path_pointed_to_file = True
if not latest_only or file_version.action == 'upload':
yield file_version, None
if path_pointed_to_file:
return

folder_to_list = path
# Every file returned must have a name that starts with the
# folder name and a "/".
prefix = folder_to_list
Expand Down
60 changes: 60 additions & 0 deletions b2sdk/v2/bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@
from b2sdk import _v3 as v3
from b2sdk._v3.exception import BucketIdNotFound as v3BucketIdNotFound
from b2sdk.v2._compat import _file_infos_rename
from b2sdk._internal.http_constants import LIST_FILE_NAMES_MAX_LIMIT
from .exception import BucketIdNotFound
from .file_version import FileVersionFactory

if typing.TYPE_CHECKING:
from b2sdk._internal.utils import Sha1HexDigest
from b2sdk._internal.filter import Filter
from .file_version import FileVersion


# Overridden to raise old style BucketIdNotFound exception
Expand Down Expand Up @@ -102,6 +105,63 @@ def upload_local_file(
**kwargs,
)

def ls(
self,
folder_to_list: str = '',
latest_only: bool = True,
recursive: bool = False,
fetch_count: int | None = LIST_FILE_NAMES_MAX_LIMIT,
with_wildcard: bool = False,
filters: typing.Sequence[Filter] = (),
folder_to_list_can_be_a_file: bool = False,
**kwargs
) -> typing.Iterable[tuple[FileVersion, str]]:
"""
Pretend that folders exist and yields the information about the files in a folder.
B2 has a flat namespace for the files in a bucket, but there is a convention
of using "/" as if there were folders. This method searches through the
flat namespace to find the files and "folders" that live within a given
folder.
When the `recursive` flag is set, lists all of the files in the given
folder, and all of its sub-folders.
:param folder_to_list: the name of the folder to list; must not start with "/".
Empty string means top-level folder
:param latest_only: when ``False`` returns info about all versions of a file,
when ``True``, just returns info about the most recent versions
:param recursive: if ``True``, list folders recursively
:param fetch_count: how many entries to list per API call or ``None`` to use the default. Acceptable values: 1 - 10000
:param with_wildcard: Accepts "*", "?", "[]" and "[!]" in folder_to_list, similarly to what shell does.
As of 1.19.0 it can only be enabled when recursive is also enabled.
Also, in this mode, folder_to_list is considered to be a filename or a pattern.
:param filters: list of filters to apply to the files returned by the server.
:param folder_to_list_can_be_a_file: if ``True``, folder_to_list can be a file, not just a folder
This enabled default behavior of b2sdk.v3.Bucket.ls, in which for all
paths that do not end with '/', first we try to check if file with this
exact name exists, and only if it does not then we try to list files with
this prefix.
:rtype: generator[tuple[b2sdk.v2.FileVersion, str]]
:returns: generator of (file_version, folder_name) tuples
.. note::
In case of `recursive=True`, folder_name is not returned.
"""
if not folder_to_list_can_be_a_file and folder_to_list and not folder_to_list.endswith(
'/'
) and not with_wildcard:
folder_to_list += '/'
yield from super().ls(
path=folder_to_list,
latest_only=latest_only,
recursive=recursive,
fetch_count=fetch_count,
with_wildcard=with_wildcard,
filters=filters,
**kwargs
)


# Overridden to use old style Bucket
class BucketFactory(v3.BucketFactory):
Expand Down
2 changes: 2 additions & 0 deletions changelog.d/+ls_file.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add `folder_to_list_can_be_a_file` parameter to `b2sdk.v2.Bucket.ls`, that if set to `True` will allow listing a file versions if path is an exact match.
This parameter won't be included in `b2sdk.v3.Bucket.ls` and unless supplied `path` ends with `/`, the possibility of path pointing to file will be considered first.
59 changes: 59 additions & 0 deletions test/unit/bucket/test_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,65 @@ def bucket_ls(bucket, *args, show_versions=False, **kwargs):
return bucket.ls(*args, **ls_all_versions_kwarg, **kwargs)


@pytest.fixture
def exact_filename_match_ls_setup(bucket):
data = b'hello world'
filename1 = 'hello.txt'
hidden_file = filename1 + 'postfix'
filename3 = filename1 + 'postfix3'
files = [
bucket.upload_bytes(data, filename1),
bucket.upload_bytes(data, hidden_file),
bucket.upload_bytes(data, filename3),
]
bucket.hide_file(hidden_file)
return files


@pytest.mark.apiver(from_ver=2, to_ver=2)
def test_bucket_ls__pre_v3_does_not_match_exact_filename(bucket, exact_filename_match_ls_setup):
assert not list(bucket.ls(exact_filename_match_ls_setup[0].file_name))


@pytest.mark.apiver(from_ver=2)
def test_bucket_ls__matches_exact_filename(bucket, exact_filename_match_ls_setup, apiver_int):
assert len(list(bucket.ls())) == 2
assert len(list(bucket.ls(latest_only=False))) == 4

kwargs = {}
if apiver_int < 3:
kwargs['folder_to_list_can_be_a_file'] = True

assert [
fv.file_name for fv, _ in bucket.ls(exact_filename_match_ls_setup[0].file_name, **kwargs)
] == ['hello.txt']

# hidden file should not be returned unless latest_only is False
assert len(list(bucket.ls(exact_filename_match_ls_setup[1].file_name, **kwargs))) == 0
assert len(
list(bucket.ls(exact_filename_match_ls_setup[1].file_name, **kwargs, latest_only=False))
) == 2


@pytest.mark.apiver(from_ver=2)
def test_bucket_ls__matches_exact_filename__wildcard(
bucket, exact_filename_match_ls_setup, apiver_int
):
kwargs = {'with_wildcard': True, 'recursive': True}
if apiver_int < 3:
kwargs['folder_to_list_can_be_a_file'] = True

assert [
fv.file_name for fv, _ in bucket.ls(exact_filename_match_ls_setup[0].file_name, **kwargs)
] == ['hello.txt']

# hidden file should not be returned unless latest_only is False
assert len(list(bucket.ls(exact_filename_match_ls_setup[1].file_name, **kwargs))) == 0
assert len(
list(bucket.ls(exact_filename_match_ls_setup[1].file_name, **kwargs, latest_only=False))
) == 2


class TestCaseWithBucket(TestBase):
RAW_SIMULATOR_CLASS = RawSimulator
CACHE_CLASS = DummyCache
Expand Down

0 comments on commit 9c84727

Please sign in to comment.