Skip to content

Commit

Permalink
optimized copying
Browse files Browse the repository at this point in the history
  • Loading branch information
willmcgugan committed Jan 28, 2018
1 parent d8c47d0 commit 3a06301
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 21 deletions.
40 changes: 32 additions & 8 deletions fs/copy.py
Expand Up @@ -103,14 +103,41 @@ def copy_file(src_fs, src_path, dst_fs, dst_path):
else:
# Standard copy
with src_fs.lock(), dst_fs.lock():
if src_fs.getmeta().get('network', False):
if dst_fs.hassyspath(dst_path):
with dst_fs.openbin(dst_path, 'w') as write_file:
src_fs.getfile(src_path, write_file)
else:
with src_fs.openbin(src_path) as read_file:
dst_fs.setbinfile(dst_path, read_file)


def copy_file_internal(src_fs, src_path, dst_fs, dst_path):
"""Low level copy, that doesn't call manage_fs or lock.
If the destination exists, and is a file, it will be first truncated.
This method exists to optimize copying in loops. In general you
should prefer `copy_file`.
Arguments:
src_fs (FS or str): Source filesystem.
src_path (str): Path to a file on the source filesystem.
dst_fs (FS or str): Destination filesystem.
dst_path (str): Path to a file on the destination filesystem.
"""
if src_fs is dst_fs:
# Same filesystem, so we can do a potentially optimized
# copy
src_fs.copy(src_path, dst_path, overwrite=True)
elif dst_fs.hassyspath(dst_path):
with dst_fs.openbin(dst_path, 'w') as write_file:
src_fs.getfile(src_path, write_file)
else:
with src_fs.openbin(src_path) as read_file:
dst_fs.setbinfile(dst_path, read_file)


def copy_file_if_newer(src_fs, src_path, dst_fs, dst_path):
"""Copy a file from one filesystem to another, checking times.
Expand Down Expand Up @@ -145,11 +172,8 @@ def copy_file_if_newer(src_fs, src_path, dst_fs, dst_path):
with src_fs.lock(), dst_fs.lock():
if _source_is_newer(src_fs, src_path,
dst_fs, dst_path):
with src_fs.openbin(src_path) as read_file:
# There may be an optimized copy available
# on dst_fs
dst_fs.setbinfile(dst_path, read_file)
return True
copy_file_internal(src_fs, src_path, dst_fs, dst_path)
return True
else:
return False

Expand Down Expand Up @@ -211,7 +235,7 @@ def copy_dir(src_fs, src_path, dst_fs, dst_path,
for info in files:
src_path = info.make_path(dir_path)
dst_path = info.make_path(copy_path)
copy_file(
copy_file_internal(
src_fs,
src_path,
dst_fs,
Expand Down Expand Up @@ -278,5 +302,5 @@ def copy_dir_if_newer(src_fs, src_path, dst_fs, dst_path,
src_modified > dst_state[dir_path].modified
)
if do_copy:
copy_file(src_fs, dir_path, dst_fs, copy_path)
copy_file_internal(src_fs, dir_path, dst_fs, copy_path)
on_copy(src_fs, dir_path, dst_fs, copy_path)
4 changes: 2 additions & 2 deletions fs/mirror.py
Expand Up @@ -20,7 +20,7 @@
from __future__ import unicode_literals


from .copy import copy_file
from .copy import copy_file_internal
from .errors import ResourceNotFound
from .walk import Walker
from .opener import manage_fs
Expand Down Expand Up @@ -92,7 +92,7 @@ def _mirror(src_fs, dst_fs, walker=None, copy_if_newer=True):
# Compare file info
if copy_if_newer and not _compare(_file, dst_file):
continue
copy_file(src_fs, _path, dst_fs, _path)
copy_file_internal(src_fs, _path, dst_fs, _path)

# Make directories
for _dir in dirs:
Expand Down
8 changes: 4 additions & 4 deletions fs/multifs.py
Expand Up @@ -291,13 +291,13 @@ def geturl(self, path, purpose='download'):

def hassyspath(self, path):
self.check()
fs = self._delegate_required(path)
return fs.hassyspath(path)
fs = self._delegate(path)
return fs is not None and fs.hassyspath(path)

def hasurl(self, path, purpose='download'):
self.check()
fs = self._delegate_required(path)
return fs.hasurl(path, purpose=purpose)
fs = self._delegate(path)
return fs is not None and fs.hasurl(path, purpose=purpose)

def isdir(self, path):
self.check()
Expand Down
4 changes: 4 additions & 0 deletions fs/test.py
Expand Up @@ -401,6 +401,8 @@ def test_getsyspath(self):
self.assertFalse(self.fs.hassyspath('foo'))
else:
self.assertTrue(self.fs.hassyspath('foo'))
# Should not throw an error
self.fs.hassyspath('a/b/c/foo/bar')

def test_geturl(self):
self.fs.create('foo')
Expand All @@ -410,6 +412,8 @@ def test_geturl(self):
self.assertFalse(self.fs.hasurl('foo'))
else:
self.assertTrue(self.fs.hasurl('foo'))
# Should not throw an error
self.fs.hasurl('a/b/c/foo/bar')

def test_geturl_purpose(self):
"""Check an unknown purpose raises a NoURL error.
Expand Down
12 changes: 5 additions & 7 deletions fs/walk.py
Expand Up @@ -144,14 +144,12 @@ def __repr__(self):
max_depth=(self.max_depth, None)
)

@property
def _iter_walk(self):
def _iter_walk(self, fs, path, namespaces=None):
"""Get the walk generator."""
return (
self._walk_breadth
if self.search == 'breadth' else
self._walk_depth
)
if self.search == 'breadth':
return self._walk_breadth(fs, path, namespaces=namespaces)
else:
return self._walk_depth(fs, path, namespaces=namespaces)

def _check_open_dir(self, fs, path, info):
"""Check if a directory should be considered in the walk.
Expand Down

0 comments on commit 3a06301

Please sign in to comment.