Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing circular symlinks (issue 513) #390

Merged
merged 26 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
d770d0e
Fixing circular symlinks (issue 513)
przadka Apr 27, 2023
0ac51c0
Improved symlinks compatibility with Windows
przadka May 1, 2023
6559deb
Changelog updated properly
przadka May 2, 2023
447d706
Refactored _walk_relative_paths to use pathlib
przadka May 2, 2023
e0b71d8
Improved _walk_relative_paths signature
przadka May 2, 2023
0375f8c
Removed redundant asserts
przadka May 2, 2023
1e0e43f
Test refactoring after PR
przadka May 2, 2023
502e970
PR feedback implemented
przadka May 4, 2023
4a0e1df
Directory traversal refactored
przadka May 5, 2023
1cc6d2a
Tests refactored to improved readability
przadka May 5, 2023
74e1c07
Typo fixed
przadka May 5, 2023
20d475f
Refactoring - moved tests to a separate file
przadka May 5, 2023
bab887b
PR feedback implemented
przadka May 9, 2023
2ce6eb7
Disabling yapf to allow long lines
przadka May 10, 2023
b6ecacf
Lines with paths are now longer and more readable
przadka May 10, 2023
64be896
Refactored symlinks set to improve performance
przadka May 15, 2023
b11addb
Refactor directory iteration to use generator expression instead of l…
przadka May 15, 2023
24595c7
Update test readability
przadka May 15, 2023
f2d80a9
Removed redundant comment
przadka May 15, 2023
634cd70
Removed unused re import
przadka May 15, 2023
1e6098d
New tests added after PR feedback.
przadka May 16, 2023
34e8ab9
Refactored relative symlinks paths in tests
przadka May 19, 2023
29d667b
New test added
przadka May 20, 2023
86d1616
Added unused imports to test CI
przadka May 20, 2023
d2bfb40
Disable yapf only where necessary
ppolewicz May 23, 2023
09a8a3d
Polishing style in test/unit/scan/test_folder_traversal.py
ppolewicz May 23, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed
* Circular symlinks no longer cause infinite loops when syncing a folder

## [1.21.0] - 2023-04-17

### Added
Expand Down
86 changes: 59 additions & 27 deletions b2sdk/scan/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@

import logging
import os
from pathlib import Path
import platform
import re
import sys

from abc import ABCMeta, abstractmethod
from typing import Iterator, Optional
from typing import Iterator, Optional, Set

from ..utils import fix_windows_path_limit, get_file_mtime, is_file_readable
from .exception import EmptyDirectory, EnvironmentEncodingError, NotADirectory, UnableToCreateDirectory, UnsupportedFilename
Expand Down Expand Up @@ -132,7 +133,8 @@ def all_files(self, reporter: Optional[ProgressReport],
:param reporter: a place to report errors
:param policies_manager: a policy manager object, default is DEFAULT_SCAN_MANAGER
"""
yield from self._walk_relative_paths(self.root, '', reporter, policies_manager)
root_path = Path(self.root)
yield from self._walk_relative_paths(root_path, Path(''), reporter, policies_manager)

def make_full_path(self, file_name):
"""
Expand Down Expand Up @@ -178,17 +180,23 @@ def ensure_non_empty(self):
raise EmptyDirectory(self.root)

def _walk_relative_paths(
self, local_dir: str, relative_dir_path: str, reporter,
policies_manager: ScanPoliciesManager
self,
local_dir: Path,
relative_dir_path: Path,
reporter: ProgressReport,
policies_manager: ScanPoliciesManager,
visited_symlinks: Optional[Set[int]] = None,
):
"""
Yield a File object for each of the files anywhere under this folder, in the
order they would appear in B2, unless the path is excluded by policies manager.

:param relative_dir_path: the path of this dir relative to the scan point, or '' if at scan point
:param local_dir: the path to the local directory that we are currently inspecting
:param relative_dir_path: the path of this dir relative to the scan point, or Path('') if at scan point
:param reporter: a reporter object to report errors and warnings
:param policies_manager: a policies manager object
:param visited_symlinks: a set of paths to symlinks that have already been visited. Using inode numbers to reduce memory usage
"""
if not isinstance(local_dir, str):
raise ValueError('folder path should be unicode: %s' % repr(local_dir))

# Collect the names. We do this before returning any results, because
# directories need to sort as if their names end in '/'.
Expand All @@ -204,39 +212,59 @@ def _walk_relative_paths(
#
# This is because in Unicode '.' comes before '/', which comes before '0'.
names = [] # list of (name, local_path, relative_file_path)
for name in os.listdir(local_dir):
# We expect listdir() to return unicode if dir_path is unicode.
# If the file name is not valid, based on the file system
# encoding, then listdir() will return un-decoded str/bytes.
if not isinstance(name, str):
name = self._handle_non_unicode_file_name(name)

visited_symlinks = visited_symlinks or set()

if local_dir.is_symlink():
real_path = local_dir.resolve()
inode_number = real_path.stat().st_ino
przadka marked this conversation as resolved.
Show resolved Hide resolved

visited_symlinks_count = len(visited_symlinks)

# Add symlink to visited_symlinks to prevent infinite symlink loops
visited_symlinks.add(inode_number)

# Check if set size has changed, if not, symlink has already been visited
if len(visited_symlinks) == visited_symlinks_count:
# Infinite symlink loop detected, report warning and skip symlink
if reporter is not None:
reporter.circular_symlink_skipped(str(local_dir))
return

visited_symlinks.add(inode_number)

for name in (x.name for x in local_dir.iterdir()):

if '/' in name:
raise UnsupportedFilename(
"scan does not support file names that include '/'",
"%s in dir %s" % (name, local_dir)
)

local_path = os.path.join(local_dir, name)
local_path = local_dir / name
relative_file_path = join_b2_path(
relative_dir_path, name
str(relative_dir_path), name
) # file path relative to the scan point

# Skip broken symlinks or other inaccessible files
if not is_file_readable(local_path, reporter):
if not is_file_readable(str(local_path), reporter):
continue

if policies_manager.exclude_all_symlinks and os.path.islink(local_path):
if policies_manager.exclude_all_symlinks and local_path.is_symlink():
if reporter is not None:
reporter.symlink_skipped(local_path)
reporter.symlink_skipped(str(local_path))
continue

if os.path.isdir(local_path):
if local_path.is_dir():
name += '/'
if policies_manager.should_exclude_local_directory(relative_file_path):
if policies_manager.should_exclude_local_directory(str(relative_file_path)):
continue

names.append((name, local_path, relative_file_path))
# remove the leading './' from the relative path to ensure backward compatibility
relative_file_path_str = str(relative_file_path)
if relative_file_path_str.startswith("./"):
relative_file_path_str = relative_file_path_str[2:]
names.append((name, local_path, relative_file_path_str))

# Yield all of the answers.
#
Expand All @@ -245,19 +273,23 @@ def _walk_relative_paths(
for (name, local_path, relative_file_path) in sorted(names):
if name.endswith('/'):
for subdir_file in self._walk_relative_paths(
local_path, relative_file_path, reporter, policies_manager
local_path,
relative_file_path,
reporter,
policies_manager,
visited_symlinks,
):
yield subdir_file
else:
# Check that the file still exists and is accessible, since it can take a long time
# to iterate through large folders
if is_file_readable(local_path, reporter):
file_mod_time = get_file_mtime(local_path)
file_size = os.path.getsize(local_path)
if is_file_readable(str(local_path), reporter):
file_mod_time = get_file_mtime(str(local_path))
file_size = local_path.stat().st_size

local_scan_path = LocalPath(
absolute_path=self.make_full_path(relative_file_path),
relative_path=relative_file_path,
absolute_path=self.make_full_path(str(relative_file_path)),
relative_path=str(relative_file_path),
mod_time=file_mod_time,
size=file_size,
)
Expand Down
2 changes: 1 addition & 1 deletion b2sdk/scan/folder_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ def _parse_bucket_and_folder(bucket_and_path, api, b2_folder_class):
(bucket_name, folder_name) = bucket_and_path.split('/', 1)
if folder_name.endswith('/'):
folder_name = folder_name[:-1]
return b2_folder_class(bucket_name, folder_name, api)
return b2_folder_class(bucket_name, folder_name, api)
2 changes: 1 addition & 1 deletion b2sdk/scan/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,4 @@ def __eq__(self, other):
self.relative_path == other.relative_path and
self.selected_version == other.selected_version and
self.all_versions == other.all_versions
)
)
35 changes: 19 additions & 16 deletions b2sdk/scan/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,29 +66,27 @@ def __enter__(self):
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()

def error(self, message):
def error(self, message: str) -> None:
"""
Print an error, gracefully interleaving it with a progress bar.

:param message: an error message
:type message: str
"""
self.print_completion(message)

def print_completion(self, message):
def print_completion(self, message: str) -> None:
"""
Remove the progress bar, prints a message, and puts the progress
bar back.

:param message: an error message
:type message: str
"""
with self.lock:
self._print_line(message, True)
self._last_update_time = 0
self._update_progress()

def update_count(self, delta: int):
def update_count(self, delta: int) -> None:
"""
Report that items have been processed.
"""
Expand Down Expand Up @@ -117,14 +115,12 @@ def _update_progress(self):

self._print_line(message, False)

def _print_line(self, line, newline):
def _print_line(self, line: str, newline: bool) -> None:
"""
Print a line to stdout.

:param line: a string without a \r or \n in it.
:type line: str
:param newline: True if the output should move to a new line after this one.
:type newline: bool
"""
if len(line) < len(self.current_line):
line += ' ' * (len(self.current_line) - len(line))
Expand All @@ -150,48 +146,55 @@ def _print_line(self, line, newline):
self.current_line = line
self.stdout.flush()

def update_total(self, delta):
def update_total(self, delta: int) -> None:
"""
Report that more files have been found for comparison.

:param delta: number of files found since the last check
:type delta: int
"""
with self.lock:
self.total_count += delta
self._update_progress()

def end_total(self):
def end_total(self) -> None:
"""
Total files count is done. Can proceed to step 2.
"""
with self.lock:
self.total_done = True
self._update_progress()

def local_access_error(self, path):
def local_access_error(self, path: str) -> None:
"""
Add a file access error message to the list of warnings.

:param path: file path
:type path: str
"""
self.warnings.append('WARNING: %s could not be accessed (broken symlink?)' % (path,))

def local_permission_error(self, path):
def local_permission_error(self, path: str) -> None:
"""
Add a permission error message to the list of warnings.

:param path: file path
:type path: str
"""
self.warnings.append(
'WARNING: %s could not be accessed (no permissions to read?)' % (path,)
)

def symlink_skipped(self, path):
def symlink_skipped(self, path: str) -> None:
pass

def circular_symlink_skipped(self, path: str) -> None:
"""
Add a circular symlink error message to the list of warnings.

:param path: file path
"""
self.warnings.append(
'WARNING: %s is a circular symlink, which was already visited. Skipping.' % (path,)
)


def sample_report_run():
"""
Expand Down
1 change: 1 addition & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
'pytest-lazy-fixture==0.6.3',
'pyfakefs==4.5.6',
'pytest-xdist==2.5.0',
'pytest-timeout==2.1.0',
przadka marked this conversation as resolved.
Show resolved Hide resolved
]
REQUIREMENTS_BUILD = ['setuptools>=20.2']

Expand Down
Loading
Loading