From e0fa6775695a3a3587a1b1463b4ca238758fd61c Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Wed, 12 Jun 2024 18:07:56 +0900 Subject: [PATCH 01/19] move exlusions up before reading file/dir --- b2sdk/_internal/scan/folder.py | 35 +++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/b2sdk/_internal/scan/folder.py b/b2sdk/_internal/scan/folder.py index 653df4d2..1641f031 100644 --- a/b2sdk/_internal/scan/folder.py +++ b/b2sdk/_internal/scan/folder.py @@ -244,6 +244,32 @@ def _walk_relative_paths( for local_path in local_dir.iterdir(): name = local_path.name relative_file_path = join_b2_path(relative_dir_path, name) + + if policies_manager.exclude_all_symlinks and local_path.is_symlink(): + if reporter is not None: + reporter.symlink_skipped(str(local_path)) + continue + + if local_path.is_dir(): + if policies_manager.should_exclude_local_directory(str(relative_file_path)): + continue + else: + try: + file_mod_time = get_file_mtime(str(local_path)) + file_size = local_path.stat().st_size + except OSError: + # Skip broken symlinks or other inaccessible files + file_mod_time = 0 + file_size = 0 + + local_scan_path = LocalPath( + absolute_path=str(local_path.absolute()), + relative_path=str(relative_file_path), + mod_time=file_mod_time, + size=file_size, + ) + if policies_manager.should_exclude_local_path(local_scan_path): + continue try: validate_b2_file_name(name) @@ -256,15 +282,9 @@ def _walk_relative_paths( if not is_file_readable(str(local_path), reporter): continue - if policies_manager.exclude_all_symlinks and local_path.is_symlink(): - if reporter is not None: - reporter.symlink_skipped(str(local_path)) - continue if local_path.is_dir(): name += '/' - if policies_manager.should_exclude_local_directory(str(relative_file_path)): - continue # remove the leading './' from the relative path to ensure backward compatibility relative_file_path_str = str(relative_file_path) @@ -299,9 +319,6 @@ def _walk_relative_paths( size=file_size, ) - if policies_manager.should_exclude_local_path(local_scan_path): - continue - yield local_scan_path @classmethod From c814a2b5137549c489da932d5b783049c856c264 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Wed, 12 Jun 2024 19:06:19 +0900 Subject: [PATCH 02/19] fix linting --- b2sdk/_internal/scan/folder.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/b2sdk/_internal/scan/folder.py b/b2sdk/_internal/scan/folder.py index 1641f031..7ce16d97 100644 --- a/b2sdk/_internal/scan/folder.py +++ b/b2sdk/_internal/scan/folder.py @@ -244,7 +244,7 @@ def _walk_relative_paths( for local_path in local_dir.iterdir(): name = local_path.name relative_file_path = join_b2_path(relative_dir_path, name) - + if policies_manager.exclude_all_symlinks and local_path.is_symlink(): if reporter is not None: reporter.symlink_skipped(str(local_path)) @@ -261,7 +261,7 @@ def _walk_relative_paths( # Skip broken symlinks or other inaccessible files file_mod_time = 0 file_size = 0 - + local_scan_path = LocalPath( absolute_path=str(local_path.absolute()), relative_path=str(relative_file_path), @@ -282,7 +282,6 @@ def _walk_relative_paths( if not is_file_readable(str(local_path), reporter): continue - if local_path.is_dir(): name += '/' From b6d19ae9bc8a0e967fed8e7820de8a79958878e8 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Wed, 12 Jun 2024 19:30:51 +0900 Subject: [PATCH 03/19] add changelog --- changelog.d/456.fixed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/456.fixed.md diff --git a/changelog.d/456.fixed.md b/changelog.d/456.fixed.md new file mode 100644 index 00000000..d4be57c3 --- /dev/null +++ b/changelog.d/456.fixed.md @@ -0,0 +1 @@ +Move exclusions up before reading file/dir in _walk_relative_paths \ No newline at end of file From 10f9e3162fd5c0bec4a5716c5518550b4ab89ba6 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Wed, 12 Jun 2024 19:33:37 +0900 Subject: [PATCH 04/19] update changelog --- changelog.d/456.fixed.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/changelog.d/456.fixed.md b/changelog.d/456.fixed.md index d4be57c3..fb101adc 100644 --- a/changelog.d/456.fixed.md +++ b/changelog.d/456.fixed.md @@ -1 +1,2 @@ -Move exclusions up before reading file/dir in _walk_relative_paths \ No newline at end of file +Move exclusions up before reading file/dir in _walk_relative_paths. +erform exclusion checks before file access attempts. This will prevent unnecessary warnings and IO operations on paths that are not relevant to the operation. \ No newline at end of file From 4c15ed62a9d9a831d1f6a439aae8eb98ce23cb14 Mon Sep 17 00:00:00 2001 From: Luca Medeiros <167838694+luca-medeiros-reef@users.noreply.github.com> Date: Wed, 12 Jun 2024 23:43:55 +0900 Subject: [PATCH 05/19] Change default file meta to -1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Maciej Urbański <122983254+mjurbanski-reef@users.noreply.github.com> --- b2sdk/_internal/scan/folder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/b2sdk/_internal/scan/folder.py b/b2sdk/_internal/scan/folder.py index 7ce16d97..225561da 100644 --- a/b2sdk/_internal/scan/folder.py +++ b/b2sdk/_internal/scan/folder.py @@ -259,8 +259,8 @@ def _walk_relative_paths( file_size = local_path.stat().st_size except OSError: # Skip broken symlinks or other inaccessible files - file_mod_time = 0 - file_size = 0 + file_mod_time = -1 + file_size = -1 local_scan_path = LocalPath( absolute_path=str(local_path.absolute()), From 0b21bea5b8df6c7bdcda1217d2dc5ad73a50c2e0 Mon Sep 17 00:00:00 2001 From: Luca Medeiros <167838694+luca-medeiros-reef@users.noreply.github.com> Date: Wed, 12 Jun 2024 23:45:34 +0900 Subject: [PATCH 06/19] Update changelog.d/456.fixed.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Maciej Urbański <122983254+mjurbanski-reef@users.noreply.github.com> --- changelog.d/456.fixed.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/changelog.d/456.fixed.md b/changelog.d/456.fixed.md index fb101adc..a88e1010 100644 --- a/changelog.d/456.fixed.md +++ b/changelog.d/456.fixed.md @@ -1,2 +1 @@ -Move exclusions up before reading file/dir in _walk_relative_paths. -erform exclusion checks before file access attempts. This will prevent unnecessary warnings and IO operations on paths that are not relevant to the operation. \ No newline at end of file +Move scan filters before a read on filesystem access attempt. This will prevent unnecessary warnings and IO operations on paths that are not relevant to the operation. \ No newline at end of file From 0815b884640753359d8e6663f5b837ec56ba1154 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Thu, 13 Jun 2024 00:51:37 +0900 Subject: [PATCH 07/19] add unit tests for excluded directory os access --- test/unit/scan/test_folder_traversal.py | 55 ++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/test/unit/scan/test_folder_traversal.py b/test/unit/scan/test_folder_traversal.py index bd52a8d4..85f1031b 100644 --- a/test/unit/scan/test_folder_traversal.py +++ b/test/unit/scan/test_folder_traversal.py @@ -14,7 +14,7 @@ import platform import re import sys -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import pytest @@ -654,3 +654,56 @@ def test_folder_all_files__dir_excluded_by_regex(self, tmp_path): assert absolute_paths == [ fix_windows_path_limit(str(d1_dir / "file1.txt")), ] + + def test_excluded_folder_no_access_check(self, tmp_path): + """Test that a directory is not checked for access if it is excluded.""" + # Create directories and files + excluded_dir = tmp_path / "excluded_no_access" + excluded_dir.mkdir() + excluded_file = excluded_dir / "should_not_access.txt" + excluded_file.touch() + + # Setup exclusion regex that matches the directory name + scan_policy = ScanPoliciesManager(exclude_dir_regexes=[r"excluded_no_access$"]) + reporter = ProgressReport(sys.stdout, False) + + # Patch os.access to monitor if it is called on the excluded file + with patch('os.access', MagicMock(return_value=True)) as mocked_access: + folder = LocalFolder(str(tmp_path)) + list(folder.all_files(reporter=reporter, policies_manager=scan_policy)) + + # Verify os.access was not called for the excluded file + mocked_access.assert_not_called() + + reporter.close() + + def test_excluded_folder_without_permissions(self, tmp_path): + """Test that a excluded directory without permissions is not processed and no warning is issued.""" + excluded_dir = tmp_path / "excluded_dir" + excluded_dir.mkdir() + (excluded_dir / "file.txt").touch() + + # Modify directory permissions to simulate lack of access + excluded_dir.chmod(0o000) + + scan_policy = ScanPoliciesManager(exclude_dir_regexes=[r"excluded_dir$"]) + reporter = ProgressReport(sys.stdout, False) + + folder = LocalFolder(str(tmp_path)) + local_paths = folder.all_files(reporter=reporter, policies_manager=scan_policy) + absolute_paths = [path.absolute_path for path in local_paths] + + # Restore directory permissions to clean up + excluded_dir.chmod(0o755) + + # Check that no files from the excluded directory are processed + assert not any( + "excluded_dir" in path for path in absolute_paths + ), "Files from the excluded directory were processed" + + # Check that no access warnings are issued for the excluded directory + assert not reporter.warnings == [ + f"WARNING: {tmp_path}/excluded_dir could not be accessed (no permissions to read?)" + ], "Access warning was issued for the excluded directory" + + reporter.close() From 8d243ccd6f810f50bb5fdd025161dd2074bb378d Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Thu, 13 Jun 2024 01:50:57 +0900 Subject: [PATCH 08/19] refactor _walk_relative_paths for performance and readability --- b2sdk/_internal/scan/folder.py | 95 ++++++++-------------------------- test/unit/v1/test_sync.py | 10 ++-- 2 files changed, 28 insertions(+), 77 deletions(-) diff --git a/b2sdk/_internal/scan/folder.py b/b2sdk/_internal/scan/folder.py index 225561da..707fc09a 100644 --- a/b2sdk/_internal/scan/folder.py +++ b/b2sdk/_internal/scan/folder.py @@ -219,107 +219,58 @@ def _walk_relative_paths( # a0.txt # # This is because in Unicode '.' comes before '/', which comes before '0'. - names = [] # list of (name, local_path, relative_file_path) - visited_symlinks = visited_symlinks or set() if local_dir.is_symlink(): - real_path = local_dir.resolve() - inode_number = real_path.stat().st_ino - - visited_symlinks_count = len(visited_symlinks) - - # Add symlink to visited_symlinks to prevent infinite symlink loops - visited_symlinks.add(inode_number) - - # Check if set size has changed, if not, symlink has already been visited - if len(visited_symlinks) == visited_symlinks_count: - # Infinite symlink loop detected, report warning and skip symlink - if reporter is not None: + inode_number = local_dir.resolve().stat().st_ino + if inode_number in visited_symlinks: + if reporter: reporter.circular_symlink_skipped(str(local_dir)) - return - + return # Skip if symlink already visited visited_symlinks.add(inode_number) - for local_path in local_dir.iterdir(): + for local_path in sorted(local_dir.iterdir(), key=lambda x: x.name): name = local_path.name relative_file_path = join_b2_path(relative_dir_path, name) - + if policies_manager.exclude_all_symlinks and local_path.is_symlink(): if reporter is not None: reporter.symlink_skipped(str(local_path)) continue + try: + validate_b2_file_name(name) + except ValueError as e: + if reporter is not None: + reporter.invalid_name(str(local_path), str(e)) + continue if local_path.is_dir(): if policies_manager.should_exclude_local_directory(str(relative_file_path)): - continue + continue # Skip excluded directories + # Recurse into directories + yield from self._walk_relative_paths( + local_path, relative_file_path, reporter, policies_manager, visited_symlinks) else: try: file_mod_time = get_file_mtime(str(local_path)) file_size = local_path.stat().st_size except OSError: - # Skip broken symlinks or other inaccessible files - file_mod_time = -1 - file_size = -1 - + file_mod_time, file_size = -1, -1 local_scan_path = LocalPath( - absolute_path=str(local_path.absolute()), + absolute_path=self.make_full_path(str(relative_file_path)), relative_path=str(relative_file_path), mod_time=file_mod_time, - size=file_size, + size=file_size ) if policies_manager.should_exclude_local_path(local_scan_path): - continue + continue # Skip excluded files - try: - validate_b2_file_name(name) - except ValueError as e: - if reporter is not None: - reporter.invalid_name(str(local_path), str(e)) - continue - - # Skip broken symlinks or other inaccessible files - if not is_file_readable(str(local_path), reporter): - continue - - if local_path.is_dir(): - name += '/' - - # remove the leading './' from the relative path to ensure backward compatibility - relative_file_path_str = str(relative_file_path) - if relative_file_path_str.startswith("./"): - relative_file_path_str = relative_file_path_str[2:] - names.append((name, local_path, relative_file_path_str)) - - # Yield all of the answers. - # - # Sorting the list of triples puts them in the right order because 'name', - # the sort key, is the first thing in the triple. - for (name, local_path, relative_file_path) in sorted(names): - if name.endswith('/'): - yield from self._walk_relative_paths( - local_path, - relative_file_path, - reporter, - policies_manager, - visited_symlinks, - ) - else: - # Check that the file still exists and is accessible, since it can take a long time - # to iterate through large folders if is_file_readable(str(local_path), reporter): - file_mod_time = get_file_mtime(str(local_path)) - file_size = local_path.stat().st_size - - local_scan_path = LocalPath( - absolute_path=self.make_full_path(str(relative_file_path)), - relative_path=str(relative_file_path), - mod_time=file_mod_time, - size=file_size, - ) - yield local_scan_path + else: + continue # Skip inaccessible files + @classmethod def _handle_non_unicode_file_name(cls, name): """ diff --git a/test/unit/v1/test_sync.py b/test/unit/v1/test_sync.py index 444f6b3f..9192de0f 100644 --- a/test/unit/v1/test_sync.py +++ b/test/unit/v1/test_sync.py @@ -69,10 +69,10 @@ class TestFolder(TestSync): NAMES = [ '.dot_file', - 'hello.', os.path.join('hello', 'a', '1'), os.path.join('hello', 'a', '2'), os.path.join('hello', 'b'), + 'hello.', 'hello0', os.path.join('inner', 'a.bin'), os.path.join('inner', 'a.txt'), @@ -109,10 +109,10 @@ def assert_filtered_files(self, scan_results, expected_scan_results): def test_exclusions(self): expected_list = [ '.dot_file', - 'hello.', 'hello/a/1', 'hello/a/2', 'hello/b', + 'hello.', 'hello0', 'inner/a.txt', 'inner/b.txt', @@ -132,10 +132,10 @@ def test_exclude_all(self): def test_exclusions_inclusions(self): expected_list = [ '.dot_file', - 'hello.', 'hello/a/1', 'hello/a/2', 'hello/b', + 'hello.', 'hello0', 'inner/a.bin', 'inner/a.txt', @@ -154,8 +154,8 @@ def test_exclusions_inclusions(self): def test_exclude_matches_prefix(self): expected_list = [ '.dot_file', - 'hello.', 'hello/b', + 'hello.', 'hello0', 'inner/b.bin', 'inner/b.txt', @@ -207,10 +207,10 @@ def test_exclude_directory_trailing_slash_does_not_match(self): def test_exclusion_with_exact_match(self): expected_list = [ '.dot_file', - 'hello.', 'hello/a/1', 'hello/a/2', 'hello/b', + 'hello.', 'inner/a.bin', 'inner/a.txt', 'inner/b.bin', From ccfba8de480ace37732d26a4b5a8781c89967cc4 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Thu, 13 Jun 2024 01:52:37 +0900 Subject: [PATCH 09/19] remove lambda sorting by path.name --- b2sdk/_internal/scan/folder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/b2sdk/_internal/scan/folder.py b/b2sdk/_internal/scan/folder.py index 707fc09a..74faa24f 100644 --- a/b2sdk/_internal/scan/folder.py +++ b/b2sdk/_internal/scan/folder.py @@ -229,7 +229,7 @@ def _walk_relative_paths( return # Skip if symlink already visited visited_symlinks.add(inode_number) - for local_path in sorted(local_dir.iterdir(), key=lambda x: x.name): + for local_path in sorted(local_dir.iterdir()): name = local_path.name relative_file_path = join_b2_path(relative_dir_path, name) From d373050fe806bccdb09d340c350e62509a2d6768 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Thu, 13 Jun 2024 01:54:32 +0900 Subject: [PATCH 10/19] fix linting --- b2sdk/_internal/scan/folder.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/b2sdk/_internal/scan/folder.py b/b2sdk/_internal/scan/folder.py index 74faa24f..874e99e7 100644 --- a/b2sdk/_internal/scan/folder.py +++ b/b2sdk/_internal/scan/folder.py @@ -232,7 +232,7 @@ def _walk_relative_paths( for local_path in sorted(local_dir.iterdir()): name = local_path.name relative_file_path = join_b2_path(relative_dir_path, name) - + if policies_manager.exclude_all_symlinks and local_path.is_symlink(): if reporter is not None: reporter.symlink_skipped(str(local_path)) @@ -249,7 +249,8 @@ def _walk_relative_paths( continue # Skip excluded directories # Recurse into directories yield from self._walk_relative_paths( - local_path, relative_file_path, reporter, policies_manager, visited_symlinks) + local_path, relative_file_path, reporter, policies_manager, visited_symlinks + ) else: try: file_mod_time = get_file_mtime(str(local_path)) From 700a4c75d24a6f165d84dae470d22550b704f2b5 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Thu, 13 Jun 2024 02:02:05 +0900 Subject: [PATCH 11/19] fix v0 tests --- test/unit/v0/test_sync.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/unit/v0/test_sync.py b/test/unit/v0/test_sync.py index 273fccdf..0cc7009f 100644 --- a/test/unit/v0/test_sync.py +++ b/test/unit/v0/test_sync.py @@ -66,10 +66,10 @@ class TestFolder(TestSync): NAMES = [ '.dot_file', - 'hello.', os.path.join('hello', 'a', '1'), os.path.join('hello', 'a', '2'), os.path.join('hello', 'b'), + 'hello.', 'hello0', os.path.join('inner', 'a.bin'), os.path.join('inner', 'a.txt'), @@ -106,10 +106,10 @@ def assert_filtered_files(self, scan_results, expected_scan_results): def test_exclusions(self): expected_list = [ '.dot_file', - 'hello.', 'hello/a/1', 'hello/a/2', 'hello/b', + 'hello.', 'hello0', 'inner/a.txt', 'inner/b.txt', @@ -129,10 +129,10 @@ def test_exclude_all(self): def test_exclusions_inclusions(self): expected_list = [ '.dot_file', - 'hello.', 'hello/a/1', 'hello/a/2', 'hello/b', + 'hello.', 'hello0', 'inner/a.bin', 'inner/a.txt', @@ -151,8 +151,8 @@ def test_exclusions_inclusions(self): def test_exclude_matches_prefix(self): expected_list = [ '.dot_file', - 'hello.', 'hello/b', + 'hello.', 'hello0', 'inner/b.bin', 'inner/b.txt', @@ -204,10 +204,10 @@ def test_exclude_directory_trailing_slash_does_not_match(self): def test_exclusion_with_exact_match(self): expected_list = [ '.dot_file', - 'hello.', 'hello/a/1', 'hello/a/2', 'hello/b', + 'hello.', 'inner/a.bin', 'inner/a.txt', 'inner/b.bin', From 80c4d716510a0880e768f5974a2654b4b70fe72c Mon Sep 17 00:00:00 2001 From: Luca Medeiros <167838694+luca-medeiros-reef@users.noreply.github.com> Date: Thu, 13 Jun 2024 13:33:01 +0900 Subject: [PATCH 12/19] Update b2sdk/_internal/scan/folder.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Maciej Urbański <122983254+mjurbanski-reef@users.noreply.github.com> --- b2sdk/_internal/scan/folder.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/b2sdk/_internal/scan/folder.py b/b2sdk/_internal/scan/folder.py index 874e99e7..b8fd7c9b 100644 --- a/b2sdk/_internal/scan/folder.py +++ b/b2sdk/_internal/scan/folder.py @@ -269,9 +269,6 @@ def _walk_relative_paths( if is_file_readable(str(local_path), reporter): yield local_scan_path - else: - continue # Skip inaccessible files - @classmethod def _handle_non_unicode_file_name(cls, name): """ From ec424b831662396edceb481cd770d858f370cf52 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Thu, 13 Jun 2024 13:45:54 +0900 Subject: [PATCH 13/19] remove and replace is_file_readable --- b2sdk/_internal/scan/folder.py | 15 +++++++++++---- b2sdk/_internal/utils/__init__.py | 20 -------------------- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/b2sdk/_internal/scan/folder.py b/b2sdk/_internal/scan/folder.py index b8fd7c9b..e4a3e366 100644 --- a/b2sdk/_internal/scan/folder.py +++ b/b2sdk/_internal/scan/folder.py @@ -18,7 +18,7 @@ from pathlib import Path from typing import Iterator -from ..utils import fix_windows_path_limit, get_file_mtime, is_file_readable, validate_b2_file_name +from ..utils import fix_windows_path_limit, get_file_mtime, validate_b2_file_name from .exception import ( EmptyDirectory, EnvironmentEncodingError, @@ -256,7 +256,10 @@ def _walk_relative_paths( file_mod_time = get_file_mtime(str(local_path)) file_size = local_path.stat().st_size except OSError: - file_mod_time, file_size = -1, -1 + if reporter is not None: + reporter.local_access_error(str(local_path)) + continue + local_scan_path = LocalPath( absolute_path=self.make_full_path(str(relative_file_path)), relative_path=str(relative_file_path), @@ -266,8 +269,12 @@ def _walk_relative_paths( if policies_manager.should_exclude_local_path(local_scan_path): continue # Skip excluded files - if is_file_readable(str(local_path), reporter): - yield local_scan_path + if not os.access(local_path, os.R_OK): + if reporter is not None: + reporter.local_permission_error(str(local_path)) + continue + + yield local_scan_path @classmethod def _handle_non_unicode_file_name(cls, name): diff --git a/b2sdk/_internal/utils/__init__.py b/b2sdk/_internal/utils/__init__.py index a06f4efd..f4ae98ce 100644 --- a/b2sdk/_internal/utils/__init__.py +++ b/b2sdk/_internal/utils/__init__.py @@ -252,26 +252,6 @@ def validate_b2_file_name(name): raise ValueError("file names segments (between '/') can be at most 250 utf-8 bytes") -def is_file_readable(local_path, reporter=None): - """ - Check if the local file has read permissions. - - :param local_path: a file path - :type local_path: str - :param reporter: reporter object to put errors on - :rtype: bool - """ - if not os.path.exists(local_path): - if reporter is not None: - reporter.local_access_error(local_path) - return False - elif not os.access(local_path, os.R_OK): - if reporter is not None: - reporter.local_permission_error(local_path) - return False - return True - - def get_file_mtime(local_path): """ Get modification time of a file in milliseconds. From 95c2a251a22f4d18f1636766cf5d0bca93afcce1 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Thu, 13 Jun 2024 18:15:33 +0900 Subject: [PATCH 14/19] add test for file without permission --- test/unit/scan/test_folder_traversal.py | 27 +++++++++++++++---------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/test/unit/scan/test_folder_traversal.py b/test/unit/scan/test_folder_traversal.py index 85f1031b..044b85cf 100644 --- a/test/unit/scan/test_folder_traversal.py +++ b/test/unit/scan/test_folder_traversal.py @@ -678,15 +678,21 @@ def test_excluded_folder_no_access_check(self, tmp_path): reporter.close() def test_excluded_folder_without_permissions(self, tmp_path): - """Test that a excluded directory without permissions is not processed and no warning is issued.""" + """Test that a excluded directory/file without permissions is not processed and no warning is issued.""" excluded_dir = tmp_path / "excluded_dir" excluded_dir.mkdir() (excluded_dir / "file.txt").touch() + + included_dir = tmp_path / "included_dir" + included_dir.mkdir() + (included_dir / "excluded_file.txt").touch() + (included_dir / "included_file.txt").touch() # Modify directory permissions to simulate lack of access + (included_dir / "excluded_file.txt").chmod(0o000) excluded_dir.chmod(0o000) - scan_policy = ScanPoliciesManager(exclude_dir_regexes=[r"excluded_dir$"]) + scan_policy = ScanPoliciesManager(exclude_dir_regexes=[r"excluded_dir$"], exclude_file_regexes=[r"excluded_file.txt"]) reporter = ProgressReport(sys.stdout, False) folder = LocalFolder(str(tmp_path)) @@ -694,16 +700,15 @@ def test_excluded_folder_without_permissions(self, tmp_path): absolute_paths = [path.absolute_path for path in local_paths] # Restore directory permissions to clean up + (included_dir / "excluded_file.txt").chmod(0o755) excluded_dir.chmod(0o755) - # Check that no files from the excluded directory are processed - assert not any( - "excluded_dir" in path for path in absolute_paths - ), "Files from the excluded directory were processed" - - # Check that no access warnings are issued for the excluded directory - assert not reporter.warnings == [ - f"WARNING: {tmp_path}/excluded_dir could not be accessed (no permissions to read?)" - ], "Access warning was issued for the excluded directory" + # Check that only included_dir/included_file.txt was return + assert absolute_paths == [f"{tmp_path}/included_dir/included_file.txt"] + # Check that no access warnings are issued for the excluded directory/file + assert not any(re.match( + r"WARNING: '.+excluded_.+' could not be accessed (no permissions to read?)", + warning, + ) for warning in reporter.warnings), "Access warning was issued for the excluded directory/file" reporter.close() From 40e8df1078838723f8065d095e43f2e49a531db6 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Fri, 14 Jun 2024 14:27:10 +0900 Subject: [PATCH 15/19] split metadata based filtering from filename based --- b2sdk/_internal/scan/folder.py | 2 ++ b2sdk/_internal/scan/policies.py | 6 +++--- b2sdk/v1/sync/scan_policies.py | 5 ++++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/b2sdk/_internal/scan/folder.py b/b2sdk/_internal/scan/folder.py index e4a3e366..0be2ec87 100644 --- a/b2sdk/_internal/scan/folder.py +++ b/b2sdk/_internal/scan/folder.py @@ -252,6 +252,8 @@ def _walk_relative_paths( local_path, relative_file_path, reporter, policies_manager, visited_symlinks ) else: + if policies_manager.should_exclude_relative_path(relative_file_path): + continue # Skip excluded files try: file_mod_time = get_file_mtime(str(local_path)) file_size = local_path.stat().st_size diff --git a/b2sdk/_internal/scan/policies.py b/b2sdk/_internal/scan/policies.py index 6befa857..93921cb5 100644 --- a/b2sdk/_internal/scan/policies.py +++ b/b2sdk/_internal/scan/policies.py @@ -184,7 +184,7 @@ def __init__( exclude_uploaded_before, exclude_uploaded_after ) - def _should_exclude_relative_path(self, relative_path: str): + def should_exclude_relative_path(self, relative_path: str): if self._include_file_set.matches(relative_path): return False return self._exclude_file_set.matches(relative_path) @@ -197,7 +197,7 @@ def should_exclude_local_path(self, local_path: LocalPath): """ if local_path.mod_time not in self._include_mod_time_range: return True - return self._should_exclude_relative_path(local_path.relative_path) + return self.should_exclude_relative_path(local_path.relative_path) def should_exclude_b2_file_version(self, file_version: FileVersion, relative_path: str): """ @@ -209,7 +209,7 @@ def should_exclude_b2_file_version(self, file_version: FileVersion, relative_pat return True if file_version.mod_time_millis not in self._include_mod_time_range: return True - return self._should_exclude_relative_path(relative_path) + return self.should_exclude_relative_path(relative_path) def should_exclude_b2_directory(self, dir_path: str): """ diff --git a/b2sdk/v1/sync/scan_policies.py b/b2sdk/v1/sync/scan_policies.py index a287bfe0..a1999d02 100644 --- a/b2sdk/v1/sync/scan_policies.py +++ b/b2sdk/v1/sync/scan_policies.py @@ -133,7 +133,10 @@ def __init__(self, scan_policies_manager: ScanPoliciesManager): def __repr__(self): return f"{self.__class__.__name__}({self.scan_policies_manager})" - + + def should_exclude_relative_path(self, relative_path: str): + self.scan_policies_manager.should_exclude_file(relative_path) + def should_exclude_local_path(self, local_path: v2.LocalSyncPath): if self.scan_policies_manager.should_exclude_file_version( _translate_local_path_to_file(local_path).latest_version() From 8a4c505e9162848fe2a00445c9181a58c1eacabd Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Fri, 14 Jun 2024 14:27:25 +0900 Subject: [PATCH 16/19] add file tests --- test/unit/scan/test_folder_traversal.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/test/unit/scan/test_folder_traversal.py b/test/unit/scan/test_folder_traversal.py index 044b85cf..2dc0089d 100644 --- a/test/unit/scan/test_folder_traversal.py +++ b/test/unit/scan/test_folder_traversal.py @@ -655,16 +655,19 @@ def test_folder_all_files__dir_excluded_by_regex(self, tmp_path): fix_windows_path_limit(str(d1_dir / "file1.txt")), ] - def test_excluded_folder_no_access_check(self, tmp_path): - """Test that a directory is not checked for access if it is excluded.""" + def test_excluded_no_access_check(self, tmp_path): + """Test that a directory/file is not checked for access if it is excluded.""" # Create directories and files - excluded_dir = tmp_path / "excluded_no_access" + excluded_dir = tmp_path / "excluded_dir" excluded_dir.mkdir() - excluded_file = excluded_dir / "should_not_access.txt" + excluded_file = excluded_dir / "excluded_file.txt" excluded_file.touch() + included_dir = tmp_path / "included_dir" + included_dir.mkdir() + (included_dir / "excluded_file.txt").touch() # Setup exclusion regex that matches the directory name - scan_policy = ScanPoliciesManager(exclude_dir_regexes=[r"excluded_no_access$"]) + scan_policy = ScanPoliciesManager(exclude_dir_regexes=[r"excluded_dir$"], exclude_file_regexes=[r'.*excluded_file.txt']) reporter = ProgressReport(sys.stdout, False) # Patch os.access to monitor if it is called on the excluded file @@ -692,7 +695,7 @@ def test_excluded_folder_without_permissions(self, tmp_path): (included_dir / "excluded_file.txt").chmod(0o000) excluded_dir.chmod(0o000) - scan_policy = ScanPoliciesManager(exclude_dir_regexes=[r"excluded_dir$"], exclude_file_regexes=[r"excluded_file.txt"]) + scan_policy = ScanPoliciesManager(exclude_dir_regexes=[r"excluded_dir$"], exclude_file_regexes=[r'.*excluded_file.txt']) reporter = ProgressReport(sys.stdout, False) folder = LocalFolder(str(tmp_path)) @@ -702,13 +705,14 @@ def test_excluded_folder_without_permissions(self, tmp_path): # Restore directory permissions to clean up (included_dir / "excluded_file.txt").chmod(0o755) excluded_dir.chmod(0o755) - + print(reporter.warnings) # Check that only included_dir/included_file.txt was return assert absolute_paths == [f"{tmp_path}/included_dir/included_file.txt"] # Check that no access warnings are issued for the excluded directory/file assert not any(re.match( - r"WARNING: '.+excluded_.+' could not be accessed (no permissions to read?)", + r"WARNING: .*excluded_.* could not be accessed \(no permissions to read\?\)", warning, ) for warning in reporter.warnings), "Access warning was issued for the excluded directory/file" + reporter.close() From 421dca9608958e63959ad59a48764c77e5a64ae1 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Fri, 14 Jun 2024 14:31:24 +0900 Subject: [PATCH 17/19] linting --- b2sdk/v1/sync/scan_policies.py | 4 ++-- test/unit/scan/test_folder_traversal.py | 26 +++++++++++++++---------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/b2sdk/v1/sync/scan_policies.py b/b2sdk/v1/sync/scan_policies.py index a1999d02..d94c78c0 100644 --- a/b2sdk/v1/sync/scan_policies.py +++ b/b2sdk/v1/sync/scan_policies.py @@ -133,10 +133,10 @@ def __init__(self, scan_policies_manager: ScanPoliciesManager): def __repr__(self): return f"{self.__class__.__name__}({self.scan_policies_manager})" - + def should_exclude_relative_path(self, relative_path: str): self.scan_policies_manager.should_exclude_file(relative_path) - + def should_exclude_local_path(self, local_path: v2.LocalSyncPath): if self.scan_policies_manager.should_exclude_file_version( _translate_local_path_to_file(local_path).latest_version() diff --git a/test/unit/scan/test_folder_traversal.py b/test/unit/scan/test_folder_traversal.py index 2dc0089d..83bd7075 100644 --- a/test/unit/scan/test_folder_traversal.py +++ b/test/unit/scan/test_folder_traversal.py @@ -666,8 +666,10 @@ def test_excluded_no_access_check(self, tmp_path): included_dir.mkdir() (included_dir / "excluded_file.txt").touch() - # Setup exclusion regex that matches the directory name - scan_policy = ScanPoliciesManager(exclude_dir_regexes=[r"excluded_dir$"], exclude_file_regexes=[r'.*excluded_file.txt']) + # Setup exclusion regex that matches the excluded directory/file name + scan_policy = ScanPoliciesManager( + exclude_dir_regexes=[r"excluded_dir$"], exclude_file_regexes=[r'.*excluded_file.txt'] + ) reporter = ProgressReport(sys.stdout, False) # Patch os.access to monitor if it is called on the excluded file @@ -675,17 +677,17 @@ def test_excluded_no_access_check(self, tmp_path): folder = LocalFolder(str(tmp_path)) list(folder.all_files(reporter=reporter, policies_manager=scan_policy)) - # Verify os.access was not called for the excluded file + # Verify os.access was not called for the excluded directory/file mocked_access.assert_not_called() reporter.close() - def test_excluded_folder_without_permissions(self, tmp_path): + def test_excluded_without_permissions(self, tmp_path): """Test that a excluded directory/file without permissions is not processed and no warning is issued.""" excluded_dir = tmp_path / "excluded_dir" excluded_dir.mkdir() (excluded_dir / "file.txt").touch() - + included_dir = tmp_path / "included_dir" included_dir.mkdir() (included_dir / "excluded_file.txt").touch() @@ -695,7 +697,9 @@ def test_excluded_folder_without_permissions(self, tmp_path): (included_dir / "excluded_file.txt").chmod(0o000) excluded_dir.chmod(0o000) - scan_policy = ScanPoliciesManager(exclude_dir_regexes=[r"excluded_dir$"], exclude_file_regexes=[r'.*excluded_file.txt']) + scan_policy = ScanPoliciesManager( + exclude_dir_regexes=[r"excluded_dir$"], exclude_file_regexes=[r'.*excluded_file.txt'] + ) reporter = ProgressReport(sys.stdout, False) folder = LocalFolder(str(tmp_path)) @@ -710,9 +714,11 @@ def test_excluded_folder_without_permissions(self, tmp_path): assert absolute_paths == [f"{tmp_path}/included_dir/included_file.txt"] # Check that no access warnings are issued for the excluded directory/file - assert not any(re.match( - r"WARNING: .*excluded_.* could not be accessed \(no permissions to read\?\)", - warning, - ) for warning in reporter.warnings), "Access warning was issued for the excluded directory/file" + assert not any( + re.match( + r"WARNING: .*excluded_.* could not be accessed \(no permissions to read\?\)", + warning, + ) for warning in reporter.warnings + ), "Access warning was issued for the excluded directory/file" reporter.close() From 08011a490a1314db58bb50ad3fa1358827fd1779 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Fri, 14 Jun 2024 14:41:18 +0900 Subject: [PATCH 18/19] fix windows testing --- test/unit/scan/test_folder_traversal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/scan/test_folder_traversal.py b/test/unit/scan/test_folder_traversal.py index 83bd7075..078a1393 100644 --- a/test/unit/scan/test_folder_traversal.py +++ b/test/unit/scan/test_folder_traversal.py @@ -709,9 +709,9 @@ def test_excluded_without_permissions(self, tmp_path): # Restore directory permissions to clean up (included_dir / "excluded_file.txt").chmod(0o755) excluded_dir.chmod(0o755) - print(reporter.warnings) + # Check that only included_dir/included_file.txt was return - assert absolute_paths == [f"{tmp_path}/included_dir/included_file.txt"] + assert any('included_file.txt' in path for path in absolute_paths) # Check that no access warnings are issued for the excluded directory/file assert not any( From 4ea9a57a9872ec1297d766c1951df0e1e47b3fe6 Mon Sep 17 00:00:00 2001 From: luca-medeiros-reef Date: Fri, 14 Jun 2024 14:55:15 +0900 Subject: [PATCH 19/19] linting --- test/unit/scan/test_folder_traversal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/scan/test_folder_traversal.py b/test/unit/scan/test_folder_traversal.py index 078a1393..d791dcd0 100644 --- a/test/unit/scan/test_folder_traversal.py +++ b/test/unit/scan/test_folder_traversal.py @@ -709,7 +709,7 @@ def test_excluded_without_permissions(self, tmp_path): # Restore directory permissions to clean up (included_dir / "excluded_file.txt").chmod(0o755) excluded_dir.chmod(0o755) - + # Check that only included_dir/included_file.txt was return assert any('included_file.txt' in path for path in absolute_paths)