Skip to content

Commit

Permalink
Merge pull request #523 from tfeldmann/move_file-optimization
Browse files Browse the repository at this point in the history
Optimize `move.move_file` for moving files between different OSFS instances.
  • Loading branch information
althonos committed May 2, 2022
2 parents 4834232 + da33922 commit 62d2def
Show file tree
Hide file tree
Showing 9 changed files with 242 additions and 38 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
([#527](https://github.com/PyFilesystem/pyfilesystem2/pull/527)).
- Mark `fs.zipfs.ReadZipFS` as a case-sensitive filesystem
([#527](https://github.com/PyFilesystem/pyfilesystem2/pull/527)).
- Optimized moving files between filesystems with syspaths.
([#523](https://github.com/PyFilesystem/pyfilesystem2/pull/523)).
- Fixed `fs.move.move_file` to clean up the copy on the destination in case of errors.
- `fs.opener.manage_fs` with `writeable=True` will now raise a `ResourceReadOnly`
exception if the managed filesystem is not writeable.
- Marked filesystems wrapped with `fs.wrap.WrapReadOnly` as read-only.


## [2.4.15] - 2022-02-07
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Many thanks to the following developers for contributing to this project:
- [Silvan Spross](https://github.com/sspross)
- [@sqwishy](https://github.com/sqwishy)
- [Sven Schliesing](https://github.com/muffl0n)
- [Thomas Feldmann](https://github.com/tfeldmann)
- [Tim Gates](https://github.com/timgates42/)
- [@tkossak](https://github.com/tkossak)
- [Todd Levi](https://github.com/televi)
Expand Down
41 changes: 41 additions & 0 deletions fs/_pathcompat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# mypy: ignore-errors
try:
from os.path import commonpath
except ImportError:
# Return the longest common sub-path of the sequence of paths given as input.
# The paths are not normalized before comparing them (this is the
# responsibility of the caller). Any trailing separator is stripped from the
# returned path.

def commonpath(paths):
"""Given a sequence of path names, returns the longest common sub-path."""

if not paths:
raise ValueError("commonpath() arg is an empty sequence")

paths = tuple(paths)
if isinstance(paths[0], bytes):
sep = b"/"
curdir = b"."
else:
sep = "/"
curdir = "."

split_paths = [path.split(sep) for path in paths]

try:
(isabs,) = set(p[:1] == sep for p in paths)
except ValueError:
raise ValueError("Can't mix absolute and relative paths")

split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
s1 = min(split_paths)
s2 = max(split_paths)
common = s1
for i, c in enumerate(s1):
if c != s2[i]:
common = s1[:i]
break

prefix = sep if isabs else sep[:0]
return prefix + sep.join(common)
6 changes: 4 additions & 2 deletions fs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

import six

from . import copy, errors, fsencode, iotools, move, tools, walk, wildcard
from . import copy, errors, fsencode, iotools, tools, walk, wildcard
from .copy import copy_modified_time
from .glob import BoundGlobber
from .mode import validate_open_mode
Expand Down Expand Up @@ -1083,10 +1083,12 @@ def movedir(self, src_path, dst_path, create=False, preserve_time=False):
ancestors is not a directory.
"""
from .move import move_dir

with self._lock:
if not create and not self.exists(dst_path):
raise errors.ResourceNotFound(dst_path)
move.move_dir(self, src_path, self, dst_path, preserve_time=preserve_time)
move_dir(self, src_path, self, dst_path, preserve_time=preserve_time)

def makedirs(
self,
Expand Down
90 changes: 59 additions & 31 deletions fs/move.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@

from .copy import copy_dir
from .copy import copy_file
from .errors import FSError
from .opener import manage_fs
from .osfs import OSFS
from .path import frombase
from ._pathcompat import commonpath

if typing.TYPE_CHECKING:
from .base import FS
Expand Down Expand Up @@ -42,6 +46,7 @@ def move_file(
dst_fs, # type: Union[Text, FS]
dst_path, # type: Text
preserve_time=False, # type: bool
cleanup_dst_on_error=True, # type: bool
):
# type: (...) -> None
"""Move a file from one filesystem to another.
Expand All @@ -53,26 +58,55 @@ def move_file(
dst_path (str): Path to a file on ``dst_fs``.
preserve_time (bool): If `True`, try to preserve mtime of the
resources (defaults to `False`).
cleanup_dst_on_error (bool): If `True`, tries to delete the file copied to
``dst_fs`` if deleting the file from ``src_fs`` fails (defaults to `True`).
"""
with manage_fs(src_fs) as _src_fs:
with manage_fs(dst_fs, create=True) as _dst_fs:
with manage_fs(src_fs, writeable=True) as _src_fs:
with manage_fs(dst_fs, writeable=True, create=True) as _dst_fs:
if _src_fs is _dst_fs:
# Same filesystem, may be optimized
_src_fs.move(
src_path, dst_path, overwrite=True, preserve_time=preserve_time
)
else:
# Standard copy and delete
with _src_fs.lock(), _dst_fs.lock():
copy_file(
_src_fs,
src_path,
_dst_fs,
dst_path,
preserve_time=preserve_time,
)
return

if _src_fs.hassyspath(src_path) and _dst_fs.hassyspath(dst_path):
# if both filesystems have a syspath we create a new OSFS from a
# common parent folder and use it to move the file.
try:
src_syspath = _src_fs.getsyspath(src_path)
dst_syspath = _dst_fs.getsyspath(dst_path)
common = commonpath([src_syspath, dst_syspath])
if common:
rel_src = frombase(common, src_syspath)
rel_dst = frombase(common, dst_syspath)
with _src_fs.lock(), _dst_fs.lock():
with OSFS(common) as base:
base.move(rel_src, rel_dst, preserve_time=preserve_time)
return # optimization worked, exit early
except ValueError:
# This is raised if we cannot find a common base folder.
# In this case just fall through to the standard method.
pass

# Standard copy and delete
with _src_fs.lock(), _dst_fs.lock():
copy_file(
_src_fs,
src_path,
_dst_fs,
dst_path,
preserve_time=preserve_time,
)
try:
_src_fs.remove(src_path)
except FSError as e:
# if the source cannot be removed we delete the copy on the
# destination
if cleanup_dst_on_error:
_dst_fs.remove(dst_path)
raise e


def move_dir(
Expand All @@ -97,22 +131,16 @@ def move_dir(
resources (defaults to `False`).
"""

def src():
return manage_fs(src_fs, writeable=False)

def dst():
return manage_fs(dst_fs, create=True)

with src() as _src_fs, dst() as _dst_fs:
with _src_fs.lock(), _dst_fs.lock():
_dst_fs.makedir(dst_path, recreate=True)
copy_dir(
src_fs,
src_path,
dst_fs,
dst_path,
workers=workers,
preserve_time=preserve_time,
)
_src_fs.removetree(src_path)
with manage_fs(src_fs, writeable=True) as _src_fs:
with manage_fs(dst_fs, writeable=True, create=True) as _dst_fs:
with _src_fs.lock(), _dst_fs.lock():
_dst_fs.makedir(dst_path, recreate=True)
copy_dir(
src_fs,
src_path,
dst_fs,
dst_path,
workers=workers,
preserve_time=preserve_time,
)
_src_fs.removetree(src_path)
9 changes: 9 additions & 0 deletions fs/opener/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from .base import Opener
from .errors import UnsupportedProtocol, EntryPointError
from ..errors import ResourceReadOnly
from .parse import parse_fs_url

if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -282,10 +283,18 @@ def manage_fs(
"""
from ..base import FS

def assert_writeable(fs):
if fs.getmeta().get("read_only", True):
raise ResourceReadOnly(path="/")

if isinstance(fs_url, FS):
if writeable:
assert_writeable(fs_url)
yield fs_url
else:
_fs = self.open_fs(fs_url, create=create, writeable=writeable, cwd=cwd)
if writeable:
assert_writeable(_fs)
try:
yield _fs
finally:
Expand Down
8 changes: 8 additions & 0 deletions fs/wrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
Dict,
Iterator,
IO,
Mapping,
Optional,
Text,
Tuple,
Expand Down Expand Up @@ -320,3 +321,10 @@ def touch(self, path):
# type: (Text) -> None
self.check()
raise ResourceReadOnly(path)

def getmeta(self, namespace="standard"):
# type: (Text) -> Mapping[Text, object]
self.check()
meta = dict(self.delegate_fs().getmeta(namespace=namespace))
meta.update(read_only=True, supports_rename=False)
return meta
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ per-file-ignores =
tests/*:E501
fs/opener/*:F811
fs/_fscompat.py:F401
fs/_pathcompat.py:C401
[isort]
default_section = THIRD_PARTY
Expand Down
Loading

0 comments on commit 62d2def

Please sign in to comment.