diff --git a/.travis.yml b/.travis.yml index 58b35f5..461fc5d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,21 +2,14 @@ language: python cache: pip python: - 2.7 - - 3.3 - 3.4 - 3.5 - 3.6 install: - travis_retry pip install --upgrade pip - - travis_retry pip install --upgrade setuptools wheel coveralls + - travis_retry pip install --upgrade setuptools wheel + - travis_retry pip install --upgrade coveralls tox-travis script: - - | - if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]] || [[ $TRAVIS_PYTHON_VERSION == '3.3' ]] || [[ $TRAVIS_PYTHON_VERSION == '3.4' ]]; then - travis_retry pip install tox - tox -e $(echo py$TRAVIS_PYTHON_VERSION | tr -d .) - else - travis_retry pip install tox-travis - tox - fi + - tox after_success: - coveralls --rcfile=.coveragerc --verbose diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f6e189..41cc96f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,27 @@ ## [Unreleased] +## [1.0.0b1] - 2017-08-28 +### Added +- Cross-mode synchronous copy support +- Duplicate detection (different local source paths mapping to the same +destination) on upload + +### Changed +- Python 3.3 is no longer supported (due to `cryptography` dropping support +for 3.3). +- `--strip-components` now defaults to `0` +- `timeout_sec` YAML property is now named `timeout` and is a complex property +comprised of `connect` and `read` values expressed in seconds +- Test coverage improved +- Dependencies updated to latest + +### Fixed +- Properly merge CLI options with YAML config options. You can now override +most YAML config settings with CLI options at runtime. +- Issue with zero-byte uploads +- Check for max page blob size + ## [1.0.0a5] - 2017-06-09 ### Added - Synchronous copy support with the `synccopy` command. This command supports @@ -210,7 +231,8 @@ usage documentation carefully when upgrading from 0.12.1. `--no-skiponmatch`. - 0.8.2: performance regression fixes -[Unreleased]: https://github.com/Azure/blobxfer/compare/1.0.0a5...HEAD +[Unreleased]: https://github.com/Azure/blobxfer/compare/1.0.0b1...HEAD +[1.0.0b1]: https://github.com/Azure/blobxfer/compare/1.0.0a5...1.0.0b1 [1.0.0a5]: https://github.com/Azure/blobxfer/compare/1.0.0a4...1.0.0a5 [1.0.0a4]: https://github.com/Azure/blobxfer/compare/0.12.1...1.0.0a4 [0.12.1]: https://github.com/Azure/blobxfer/compare/0.12.0...0.12.1 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..fa8fa7b --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,8 @@ +# Code of Conduct + +This project has adopted the +[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). +For more information see the +[Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) +or contact [](mailto:opencode@microsoft.com) with any +additional questions or comments. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f80665d..b298193 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,12 +1,4 @@ -Contributing Code ------------------ - -This project has adopted the -[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). -For more information see the -[Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) -or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any -additional questions or comments. +# Contributing If you would like to contribute to this project, please view the [Microsoft Contribution guidelines](https://azure.github.io/guidelines/). diff --git a/README.md b/README.md index 14d866b..2df45e5 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ from Azure Blob and File Storage throughput limits * `replica` mode allows replication of a file across multiple destinations including to multiple storage accounts -* Synchronous copy with replication support (including block-level copies -for Block blobs) +* Synchronous copy with cross-mode replication support (including block-level +copies for Block blobs) * Client-side encryption support * Support all Azure Blob types and Azure Files for both upload and download * Advanced skip options for rsync-like operations @@ -39,6 +39,7 @@ for Block blobs) * Include and exclude filtering support * Rsync-like delete support * No clobber support in either direction +* Automatic content type tagging * File logging support ## Installation @@ -56,11 +57,6 @@ For recent changes, please refer to the [CHANGELOG.md](https://github.com/Azure/blobxfer/blob/master/CHANGELOG.md) file. ------------------------------------------------------------------------- - -This project has adopted the -[Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). -For more information see the -[Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) -or contact [](mailto:opencode@microsoft.com) with any -additional questions or comments. +* * * +Please see this project's [Code of Conduct](CODE_OF_CONDUCT.md) and +[Contributing](CONTRIBUTING.md) guidelines. diff --git a/blobxfer/__init__.py b/blobxfer/__init__.py index 0a8432f..e2c94ed 100644 --- a/blobxfer/__init__.py +++ b/blobxfer/__init__.py @@ -30,15 +30,12 @@ azure.storage._constants.USER_AGENT_STRING = 'blobxfer/{} {}'.format( __version__, azure.storage._constants.USER_AGENT_STRING) -# monkeypatch SOCKET_TIMEOUT value in Azure Storage SDK -azure.storage._constants.SOCKET_TIMEOUT = (5, 300) - # set stdin source -if sys.version_info >= (3, 0): +if sys.version_info >= (3, 0): # noqa STDIN = sys.stdin.buffer -else: +else: # noqa # set stdin to binary mode on Windows - if sys.platform == 'win32': # noqa + if sys.platform == 'win32': import msvcrt import os msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY) diff --git a/blobxfer/models/download.py b/blobxfer/models/download.py index cc363ef..bd08c97 100644 --- a/blobxfer/models/download.py +++ b/blobxfer/models/download.py @@ -191,7 +191,7 @@ class Descriptor(object): _AES_BLOCKSIZE = blobxfer.models.crypto.AES256_BLOCKSIZE_BYTES def __init__(self, lpath, ase, options, resume_mgr): - # type: (Descriptior, pathlib.Path, + # type: (Descriptor, pathlib.Path, # blobxfer.models.azure.StorageEntity, # blobxfer.models.options.Download, # blobxfer.operations.resume.DownloadResumeManager) -> None @@ -321,10 +321,10 @@ def compute_allocated_size(size, is_encrypted): size // blobxfer.models.download.Descriptor._AES_BLOCKSIZE - 1 ) * blobxfer.models.download.Descriptor._AES_BLOCKSIZE + if allocatesize < 0: + raise RuntimeError('allocatesize is negative') else: allocatesize = size - if allocatesize < 0: - allocatesize = 0 else: allocatesize = 0 return allocatesize @@ -364,10 +364,9 @@ def convert_vectored_io_slice_to_final_path_name(local_path, ase): :rtype: pathlib.Path :return: converted final path """ - name = local_path.name name = blobxfer.models.metadata.\ remove_vectored_io_slice_suffix_from_name( - name, ase.vectored_io.slice_id) + local_path.name, ase.vectored_io.slice_id) _tmp = list(local_path.parts[:-1]) _tmp.append(name) return pathlib.Path(*_tmp) @@ -460,7 +459,7 @@ def _resume(self): return None self._allocate_disk_space() # check if final path exists - if not self.final_path.exists(): + if not self.final_path.exists(): # noqa logger.warning('download path {} does not exist'.format( self.final_path)) return None @@ -493,7 +492,7 @@ def _resume(self): if rr.md5hexdigest != hexdigest: logger.warning( 'MD5 mismatch resume={} computed={} for {}'.format( - rr.md5hexdigest, hexdigest, self.final_path)) + rr.md5hexdigest, hexdigest, self.final_path)) # reset hasher self.md5 = blobxfer.util.new_md5_hasher() return None @@ -768,12 +767,12 @@ def _restore_file_attributes(self): if self._ase.file_attributes is None: return # set file uid/gid and mode - if blobxfer.util.on_windows(): + if blobxfer.util.on_windows(): # noqa # TODO not implemented yet pass else: self.final_path.chmod(int(self._ase.file_attributes.mode, 8)) - if os.getuid() == 0: + if os.getuid() == 0: # noqa os.chown( str(self.final_path), self._ase.file_attributes.uid, diff --git a/blobxfer/models/metadata.py b/blobxfer/models/metadata.py index ead4b79..f47098c 100644 --- a/blobxfer/models/metadata.py +++ b/blobxfer/models/metadata.py @@ -164,18 +164,6 @@ def fileattr_from_metadata(md): return fileattr -def restore_fileattr(path, metadata): - # type: (pathlib.Path, dict) -> None - """Restore file attributes from metadata - :param pathlib.Path path: path to modify - :param dict metadata: existing metadata dict - """ - if blobxfer.util.on_windows(): - logger.warning( - 'file attributes store/restore on Windows is not supported yet') - raise NotImplementedError() - - def create_vectored_io_next_entry(ase): # type: (blobxfer.models.azure.StorageEntity) -> str """Create Vectored IO next entry id diff --git a/blobxfer/models/options.py b/blobxfer/models/options.py index 92b86f7..085a37d 100644 --- a/blobxfer/models/options.py +++ b/blobxfer/models/options.py @@ -43,6 +43,8 @@ # create logger logger = logging.getLogger(__name__) +# global defines +_DEFAULT_REQUESTS_TIMEOUT = (3.1, 12.1) # named tuples VectoredIo = collections.namedtuple( @@ -95,6 +97,7 @@ SyncCopy = collections.namedtuple( 'SyncCopy', [ 'delete_extraneous_destination', + 'dest_mode', 'mode', 'overwrite', 'recursive', @@ -102,6 +105,48 @@ ) +class Timeout(object): + """Timeout Options""" + def __init__(self, connect, read): + """Ctor for Timeout options + :param Timeout self: this + :param float connect: connect timeout + :param float read: read timeout + """ + if connect is None or connect <= 0: + self._connect = _DEFAULT_REQUESTS_TIMEOUT[0] + else: + self._connect = connect + if read is None or read <= 0: + self._read = _DEFAULT_REQUESTS_TIMEOUT[1] + else: + self._read = read + + @property + def connect(self): + """Connect timeout + :rtype: float + :return: connect timeout + """ + return self._connect + + @property + def read(self): + """Read timeout + :rtype: float + :return: read timeout + """ + return self._read + + @property + def timeout(self): + """Timeout property in requests format + :rtype: tuple + :return: (connect, read) timeout tuple + """ + return (self._connect, self._read) + + class Concurrency(object): """Concurrency Options""" def __init__( @@ -157,14 +202,14 @@ class General(object): """General Options""" def __init__( self, concurrency, log_file=None, progress_bar=True, - resume_file=None, timeout_sec=None, verbose=False): + resume_file=None, timeout=None, verbose=False): """Ctor for General Options :param General self: this :param Concurrency concurrency: concurrency options :param bool progress_bar: progress bar :param str log_file: log file :param str resume_file: resume file - :param int timeout_sec: timeout in seconds + :param Timeout timeout: timeout options :param bool verbose: verbose output """ if concurrency is None: @@ -176,5 +221,5 @@ def __init__( self.resume_file = pathlib.Path(resume_file) else: self.resume_file = None - self.timeout_sec = timeout_sec + self.timeout = timeout self.verbose = verbose diff --git a/blobxfer/models/resume.py b/blobxfer/models/resume.py index 69d1c2f..7779d7f 100644 --- a/blobxfer/models/resume.py +++ b/blobxfer/models/resume.py @@ -156,8 +156,7 @@ def __repr__(self): 'next_integrity_chunk={} completed={} md5={}>').format( self.final_path, self.length, self.chunk_size, self.next_integrity_chunk, self.completed, - self.md5hexdigest, - ) + self.md5hexdigest) class Upload(object): @@ -295,8 +294,7 @@ def __repr__(self): 'md5={}>').format( self.local_path, self.length, self.chunk_size, self.total_chunks, self.completed_chunks, self.completed, - self.md5hexdigest, - ) + self.md5hexdigest) class SyncCopy(object): @@ -428,5 +426,4 @@ def __repr__(self): return ('SyncCopy').format( self.length, self.chunk_size, self.total_chunks, - self.completed_chunks, self.completed, - ) + self.completed_chunks, self.completed) diff --git a/blobxfer/models/synccopy.py b/blobxfer/models/synccopy.py index 8ec1d88..15874ad 100644 --- a/blobxfer/models/synccopy.py +++ b/blobxfer/models/synccopy.py @@ -97,11 +97,16 @@ class Descriptor(object): """Synccopy Descriptor""" def __init__(self, src_ase, dst_ase, block_list, options, resume_mgr): # type: (Descriptior, blobxfer.models.azure.StorageEntity, + # blobxfer.models.azure.StorageEntity, list, # blobxfer.models.options.SyncCopy, # blobxfer.operations.resume.SyncCopyResumeManager) -> None """Ctor for Descriptor :param Descriptor self: this - :param blobxfer.models.azure.StorageEntity ase: Azure Storage Entity + :param blobxfer.models.azure.StorageEntity src_ase: + source Azure Storage Entity + :param blobxfer.models.azure.StorageEntity dst_ase: + destination Azure Storage Entity + :param list block_list: source blob block list :param blobxfer.models.options.SyncCopy options: synccopy options :param blobxfer.operations.resume.SyncCopyResumeManager resume_mgr: synccopy resume manager @@ -186,7 +191,7 @@ def remote_is_file(self): :rtype: bool :return: remote is an Azure File """ - return self.src_entity.mode == blobxfer.models.azure.StorageModes.File + return self.dst_entity.mode == blobxfer.models.azure.StorageModes.File @property def remote_is_page_blob(self): @@ -196,7 +201,7 @@ def remote_is_page_blob(self): :rtype: bool :return: remote is an Azure Page Blob """ - return self.src_entity.mode == blobxfer.models.azure.StorageModes.Page + return self.dst_entity.mode == blobxfer.models.azure.StorageModes.Page @property def remote_is_append_blob(self): @@ -207,7 +212,7 @@ def remote_is_append_blob(self): :return: remote is an Azure Append Blob """ return ( - self.src_entity.mode == blobxfer.models.azure.StorageModes.Append + self.dst_entity.mode == blobxfer.models.azure.StorageModes.Append ) @property @@ -219,7 +224,7 @@ def is_one_shot_block_blob(self): :return: if upload is a one-shot block blob """ return ( - self.src_entity.mode == + self.dst_entity.mode == blobxfer.models.azure.StorageModes.Block and self._total_chunks == 1 ) @@ -233,7 +238,7 @@ def requires_put_block_list(self): :return: if finalize requires a put block list """ return ( - self.src_entity.mode == + self.dst_entity.mode == blobxfer.models.azure.StorageModes.Block and self._total_chunks > 1 ) @@ -337,7 +342,7 @@ def _resume(self): with self._meta_lock: logger.debug('{} upload already completed'.format( self._dst_ase.path)) - self._offset = rr._offset + self._offset = rr.offset self._src_block_list = rr.src_block_list self._chunk_num = rr.total_chunks self._chunk_size = rr.chunk_size diff --git a/blobxfer/models/upload.py b/blobxfer/models/upload.py index 8b5134d..c214513 100644 --- a/blobxfer/models/upload.py +++ b/blobxfer/models/upload.py @@ -57,8 +57,9 @@ _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES = 104857600 _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES = 4194304 _MAX_NUM_CHUNKS = 50000 +_MAX_PAGE_BLOB_SIZE = 8796093022208 _DEFAULT_AUTO_CHUNKSIZE_BYTES = 16777216 -_MAX_MD5_CACHE_RESUME_ENTRIES = 25 +_MD5_CACHE_RESUME_ENTRIES_GC_THRESHOLD = 25 # named tuples @@ -111,7 +112,7 @@ def __init__(self, parent_path, relative_path, use_stdin=False, view=None): # populate properties if self.use_stdin: # create dummy stat object - self._stat = type('stat', (object,), {}) + self._stat = lambda: None self._stat.st_size = 0 self._stat.st_mtime = 0 self._stat.st_mode = 0 @@ -255,17 +256,17 @@ def files(self): relative_path=pathlib.Path(tmp.name), use_stdin=False, ) - continue - del tmp - for entry in blobxfer.util.scantree(_ppath): - _rpath = pathlib.Path(entry.path).relative_to(_ppath) - if not self._inclusion_check(_rpath): - continue - yield LocalPath( - parent_path=_expath, - relative_path=_rpath, - use_stdin=False, - ) + else: + del tmp + for entry in blobxfer.util.scantree(_ppath): + _rpath = pathlib.Path(entry.path).relative_to(_ppath) + if not self._inclusion_check(_rpath): + continue + yield LocalPath( + parent_path=_expath, + relative_path=_rpath, + use_stdin=False, + ) class Specification(object): @@ -514,7 +515,9 @@ def complete_offset_upload(self, chunk_num): # chunk are complete if blobxfer.util.is_not_empty(self._ase.replica_targets): if chunk_num not in self._replica_counters: - self._replica_counters[chunk_num] = 0 + # start counter at -1 since we need 1 "extra" for the + # primary in addition to the replica targets + self._replica_counters[chunk_num] = -1 self._replica_counters[chunk_num] += 1 if (self._replica_counters[chunk_num] != len(self._ase.replica_targets)): @@ -530,17 +533,17 @@ def complete_offset_upload(self, chunk_num): md5digest = self._md5_cache[last_consecutive] else: md5digest = None - if completed: - last_consecutive = None - self._md5_cache.clear() self._resume_mgr.add_or_update_record( self.local_path.absolute_path, self._ase, self._chunk_size, self._total_chunks, self._completed_chunks.int, completed, md5digest, ) # prune md5 cache - if (last_consecutive is not None and - len(self._md5_cache) > _MAX_MD5_CACHE_RESUME_ENTRIES): + if completed: + self._md5_cache.clear() + elif (last_consecutive is not None and + len(self._md5_cache) > + _MD5_CACHE_RESUME_ENTRIES_GC_THRESHOLD): mkeys = sorted(list(self._md5_cache.keys())) for key in mkeys: if key >= last_consecutive: @@ -585,8 +588,6 @@ def _compute_remote_size(self): self._AES_BLOCKSIZE else: allocatesize = size - if allocatesize < 0: - allocatesize = 0 else: allocatesize = 0 self._ase.size = allocatesize @@ -654,6 +655,11 @@ def _adjust_chunk_size(self, options): 'adjusting chunk size to {} for file from {}'.format( self._chunk_size, self.local_path.absolute_path)) elif self._ase.mode == blobxfer.models.azure.StorageModes.Page: + if self._ase.size > _MAX_PAGE_BLOB_SIZE: + raise RuntimeError( + '{} size {} exceeds maximum page blob size of {}'.format( + self.local_path.absolute_path, self._ase.size, + _MAX_PAGE_BLOB_SIZE)) if self._chunk_size > _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES: self._chunk_size = _MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES logger.debug( @@ -674,7 +680,8 @@ def _compute_total_chunks(self, chunk_size): chunks = 1 if self.local_path.use_stdin and chunks == 0: chunks = 1 - if chunks > 50000: + if (self._ase.mode != blobxfer.models.azure.StorageModes.Page and + chunks > 50000): max_vector = False if self._ase.mode == blobxfer.models.azure.StorageModes.Block: if self._chunk_size == _MAX_BLOCK_BLOB_CHUNKSIZE_BYTES: @@ -717,6 +724,12 @@ def _initialize_integrity_checkers(self, options): self.md5 = blobxfer.util.new_md5_hasher() def _resume(self): + # type: (Descriptor) -> int + """Resume upload + :param Descriptor self: this + :rtype: int + :return: resume bytes + """ if self._resume_mgr is None or self._offset > 0: return None # check if path exists in resume db @@ -786,7 +799,7 @@ def _resume(self): if rr.md5hexdigest != hexdigest: logger.warning( 'MD5 mismatch resume={} computed={} for {}'.format( - rr.md5hexdigest, hexdigest, self._ase.path)) + rr.md5hexdigest, hexdigest, self._ase.path)) # reset hasher self.md5 = blobxfer.util.new_md5_hasher() return None diff --git a/blobxfer/operations/azure/__init__.py b/blobxfer/operations/azure/__init__.py index 07b4059..af03303 100644 --- a/blobxfer/operations/azure/__init__.py +++ b/blobxfer/operations/azure/__init__.py @@ -34,7 +34,9 @@ import requests # local imports import blobxfer.models +import blobxfer.models.crypto import blobxfer.models.metadata +import blobxfer.models.options import blobxfer.operations.azure.blob.append import blobxfer.operations.azure.blob.block import blobxfer.operations.azure.blob.page @@ -65,7 +67,8 @@ def add_storage_account(self, name, key, endpoint): '{} already exists in storage accounts'.format(name)) self._storage_accounts[name] = StorageAccount( name, key, endpoint, - self._general_options.concurrency.transfer_threads + self._general_options.concurrency.transfer_threads, + self._general_options.timeout.timeout, ) def get_storage_account(self, name): @@ -81,14 +84,19 @@ def get_storage_account(self, name): class StorageAccount(object): """Azure Storage Account""" - def __init__(self, name, key, endpoint, transfer_threads): - # type: (StorageAccount, str, str, str, int) -> None + def __init__(self, name, key, endpoint, transfer_threads, timeout): + # type: (StorageAccount, str, str, str, int, tuple) -> None """Ctor for StorageAccount :param str name: name of storage account :param str key: storage key or sas :param str endpoint: endpoint :param int transfer_threads: number of transfer threads + :param tuple timeout: timeout tuple """ + if blobxfer.util.is_none_or_empty(key): + raise ValueError( + ('no authentication credential given for storage ' + 'account: {}').format(name)) self._append_blob_client = None self._block_blob_client = None self._file_client = None @@ -110,7 +118,7 @@ def __init__(self, name, key, endpoint, transfer_threads): pool_maxsize=transfer_threads << 1, ) ) - self._create_clients() + self._create_clients(timeout) @staticmethod def _key_is_sas(key): @@ -155,18 +163,20 @@ def _container_creation_allowed(self): return True return False - def _create_clients(self): - # type: (StorageAccount) -> None + def _create_clients(self, timeout): + # type: (StorageAccount, tuple) -> None """Create Azure Storage clients :param StorageAccount self: this + :param tuple timeout: timeout tuple """ self._append_blob_client = \ - blobxfer.operations.azure.blob.append.create_client(self) + blobxfer.operations.azure.blob.append.create_client(self, timeout) self._block_blob_client = \ - blobxfer.operations.azure.blob.block.create_client(self) - self._file_client = blobxfer.operations.azure.file.create_client(self) + blobxfer.operations.azure.blob.block.create_client(self, timeout) + self._file_client = blobxfer.operations.azure.file.create_client( + self, timeout) self._page_blob_client = \ - blobxfer.operations.azure.blob.page.create_client(self) + blobxfer.operations.azure.blob.page.create_client(self, timeout) @property def append_blob_client(self): @@ -243,25 +253,21 @@ def lookup_storage_account(self, remote_path): """ return self._path_map[blobxfer.util.normalize_azure_path(remote_path)] - def files(self, creds, options, general_options): + def files(self, creds, options): # type: (SourcePath, StorageCredentials, - # blobxfer.models.options.Download, - # blobxfer.models.options.General) -> StorageEntity + # blobxfer.models.options.Download) -> StorageEntity """Generator of Azure remote files or blobs :param SourcePath self: this :param StorageCredentials creds: storage creds :param blobxfer.models.options.Download options: download options - :param blobxfer.models.options.General general_options: general options :rtype: StorageEntity :return: Azure storage entity object """ if options.mode == blobxfer.models.azure.StorageModes.File: - for file in self._populate_from_list_files( - creds, options, general_options): + for file in self._populate_from_list_files(creds, options): yield file else: - for blob in self._populate_from_list_blobs( - creds, options, general_options): + for blob in self._populate_from_list_blobs(creds, options): yield blob def _convert_to_storage_entity_with_encryption_metadata( @@ -303,16 +309,14 @@ def _convert_to_storage_entity_with_encryption_metadata( return ase def _handle_vectored_io_stripe( - self, creds, options, general_options, store_raw_metadata, - sa, entity, is_file, container, dir=None): - # type: (SourcePath, StorageCredentials, any, - # blobxfer.models.options.General, bool, StorageAccount, any, - # bool, str, str) -> StorageEntity + self, creds, options, store_raw_metadata, sa, entity, is_file, + container, dir=None): + # type: (SourcePath, StorageCredentials, any, bool, StorageAccount, + # any, bool, str, str) -> StorageEntity """Handle Vectored IO stripe entries :param SourcePath self: this :param StorageCredentials creds: storage creds :param object options: download or synccopy options - :param blobxfer.models.options.General general_options: general options :param bool store_raw_metadata: store raw metadata :param StorageAccount sa: storage account :param object entity: Storage File or Blob object @@ -351,13 +355,12 @@ def _handle_vectored_io_stripe( sa = creds.get_storage_account(vio.next.storage_account_name) if is_file: entity = blobxfer.operations.azure.file.get_file_properties( - sa.file_client, vio.next.container, vio.next.name, - timeout=general_options.timeout_sec) + sa.file_client, vio.next.container, vio.next.name) _, dir = blobxfer.util.explode_azure_path(vio.next.name) else: entity = blobxfer.operations.azure.blob.get_blob_properties( sa.block_blob_client, vio.next.container, vio.next.name, - ase.mode, timeout=general_options.timeout_sec) + ase.mode) vio = blobxfer.models.metadata.vectored_io_from_metadata( entity.metadata) # yield next @@ -366,14 +369,12 @@ def _handle_vectored_io_stripe( container, dir) yield ase - def _populate_from_list_files(self, creds, options, general_options): - # type: (SourcePath, StorageCredentials, any, - # blobxfer.models.options.General) -> StorageEntity + def _populate_from_list_files(self, creds, options): + # type: (SourcePath, StorageCredentials, any) -> StorageEntity """Internal generator for Azure remote files :param SourcePath self: this :param StorageCredentials creds: storage creds :param object options: download or synccopy options - :param blobxfer.models.options.General general_options: general options :rtype: StorageEntity :return: Azure storage entity object """ @@ -384,28 +385,25 @@ def _populate_from_list_files(self, creds, options, general_options): cont, dir = blobxfer.util.explode_azure_path(rpath) sa = creds.get_storage_account(self.lookup_storage_account(rpath)) for file in blobxfer.operations.azure.file.list_files( - sa.file_client, cont, dir, options.recursive, - general_options.timeout_sec): + sa.file_client, cont, dir, options.recursive): if not self._inclusion_check(file.name): continue if dir is not None: dir, _ = blobxfer.operations.azure.file.parse_file_path( dir) for ase in self._handle_vectored_io_stripe( - creds, options, general_options, store_raw_metadata, - sa, file, True, cont, dir): + creds, options, store_raw_metadata, sa, file, True, + cont, dir): if ase is None: continue yield ase - def _populate_from_list_blobs(self, creds, options, general_options): - # type: (SourcePath, StorageCredentials, any, - # blobxfer.models.options.General) -> StorageEntity + def _populate_from_list_blobs(self, creds, options): + # type: (SourcePath, StorageCredentials, any) -> StorageEntity """Internal generator for Azure remote blobs :param SourcePath self: this :param StorageCredentials creds: storage creds :param object options: download or synccopy options - :param blobxfer.models.options.General general_options: general options :rtype: StorageEntity :return: Azure storage entity object """ @@ -417,12 +415,12 @@ def _populate_from_list_blobs(self, creds, options, general_options): sa = creds.get_storage_account(self.lookup_storage_account(rpath)) for blob in blobxfer.operations.azure.blob.list_blobs( sa.block_blob_client, cont, dir, options.mode, - options.recursive, general_options.timeout_sec): + options.recursive): if not self._inclusion_check(blob.name): continue for ase in self._handle_vectored_io_stripe( - creds, options, general_options, store_raw_metadata, - sa, blob, False, cont): + creds, options, store_raw_metadata, sa, blob, + False, cont): if ase is None: continue yield ase diff --git a/blobxfer/operations/azure/blob/__init__.py b/blobxfer/operations/azure/blob/__init__.py index 45933c2..dc25fad 100644 --- a/blobxfer/operations/azure/blob/__init__.py +++ b/blobxfer/operations/azure/blob/__init__.py @@ -75,27 +75,23 @@ def get_blob_properties(client, container, prefix, mode, timeout=None): :return: blob """ if mode == blobxfer.models.azure.StorageModes.File: - raise RuntimeError('cannot list Azure Files from blob client') + raise RuntimeError( + 'cannot list Azure Blobs with incompatible mode: {}'.format( + mode)) try: blob = client.get_blob_properties( container_name=container, blob_name=prefix, timeout=timeout) except azure.common.AzureMissingResourceHttpError: return None - if (mode == blobxfer.models.azure.StorageModes.Append and - blob.properties.blob_type != - azure.storage.blob.models._BlobTypes.AppendBlob): - raise RuntimeError( - 'existing blob type {} mismatch with mode {}'.format( - blob.properties.blob_type, mode)) - elif (mode == blobxfer.models.azure.StorageModes.Block and - blob.properties.blob_type != - azure.storage.blob.models._BlobTypes.BlockBlob): - raise RuntimeError( - 'existing blob type {} mismatch with mode {}'.format( - blob.properties.blob_type, mode)) - elif (mode == blobxfer.models.azure.StorageModes.Page and - blob.properties.blob_type != - azure.storage.blob.models._BlobTypes.PageBlob): + if ((mode == blobxfer.models.azure.StorageModes.Append and + blob.properties.blob_type != + azure.storage.blob.models._BlobTypes.AppendBlob) or + (mode == blobxfer.models.azure.StorageModes.Block and + blob.properties.blob_type != + azure.storage.blob.models._BlobTypes.BlockBlob) or + (mode == blobxfer.models.azure.StorageModes.Page and + blob.properties.blob_type != + azure.storage.blob.models._BlobTypes.PageBlob)): raise RuntimeError( 'existing blob type {} mismatch with mode {}'.format( blob.properties.blob_type, mode)) @@ -183,7 +179,7 @@ def delete_blob(client, container, name, timeout=None): blob_name=name, delete_snapshots=azure.storage.blob.models.DeleteSnapshot.Include, timeout=timeout, - ) + ) # noqa def get_blob_range(ase, offsets, timeout=None): @@ -208,10 +204,10 @@ def get_blob_range(ase, offsets, timeout=None): def create_container(ase, containers_created, timeout=None): - # type: (blobxfer.models.azure.StorageEntity, dict, int) -> None + # type: (blobxfer.models.azure.StorageEntity, set, int) -> None """Create blob container :param blobxfer.models.azure.StorageEntity ase: Azure StorageEntity - :param dict containers_created: containers already created map + :param set containers_created: containers already created map :param int timeout: timeout """ # check if auth allows create container @@ -247,7 +243,7 @@ def set_blob_md5(ase, md5, timeout=None): content_type=blobxfer.util.get_mime_type(ase.name), content_md5=md5, ), - timeout=timeout) + timeout=timeout) # noqa def set_blob_metadata(ase, metadata, timeout=None): @@ -261,4 +257,4 @@ def set_blob_metadata(ase, metadata, timeout=None): container_name=ase.container, blob_name=ase.name, metadata=metadata, - timeout=timeout) + timeout=timeout) # noqa diff --git a/blobxfer/operations/azure/blob/append.py b/blobxfer/operations/azure/blob/append.py index abc276a..bd589a6 100644 --- a/blobxfer/operations/azure/blob/append.py +++ b/blobxfer/operations/azure/blob/append.py @@ -39,11 +39,13 @@ logger = logging.getLogger(__name__) -def create_client(storage_account): - # type: (blobxfer.operations.azure.StorageAccount) -> AppendBlobService +def create_client(storage_account, timeout): + # type: (blobxfer.operations.azure.StorageAccount, + # tuple) -> AppendBlobService """Create Append blob client :param blobxfer.operations.azure.StorageAccount storage_account: storage account + :param tuple timeout: timeout tuple :rtype: AppendBlobService :return: append blob service client """ @@ -52,13 +54,15 @@ def create_client(storage_account): account_name=storage_account.name, sas_token=storage_account.key, endpoint_suffix=storage_account.endpoint, - request_session=storage_account.session) + request_session=storage_account.session, + socket_timeout=timeout) else: client = azure.storage.blob.AppendBlobService( account_name=storage_account.name, account_key=storage_account.key, endpoint_suffix=storage_account.endpoint, - request_session=storage_account.session) + request_session=storage_account.session, + socket_timeout=timeout) # set retry policy client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry return client @@ -76,7 +80,7 @@ def create_blob(ase, timeout=None): content_settings=azure.storage.blob.models.ContentSettings( content_type=blobxfer.util.get_mime_type(ase.name) ), - timeout=timeout) + timeout=timeout) # noqa def append_block(ase, data, timeout=None): @@ -91,4 +95,4 @@ def append_block(ase, data, timeout=None): blob_name=ase.name, block=data, validate_content=False, # integrity is enforced with HTTPS - timeout=timeout) + timeout=timeout) # noqa diff --git a/blobxfer/operations/azure/blob/block.py b/blobxfer/operations/azure/blob/block.py index e50c209..dc9cd05 100644 --- a/blobxfer/operations/azure/blob/block.py +++ b/blobxfer/operations/azure/blob/block.py @@ -39,11 +39,13 @@ logger = logging.getLogger(__name__) -def create_client(storage_account): - # type: (blobxfer.operations.azure.StorageAccount) -> BlockBlobService +def create_client(storage_account, timeout): + # type: (blobxfer.operations.azure.StorageAccount, + # tuple) -> BlockBlobService """Create block blob client :param blobxfer.operations.azure.StorageAccount storage_account: storage account + :param tuple timeout: timeout tuple :rtype: azure.storage.blob.BlockBlobService :return: block blob service client """ @@ -52,13 +54,15 @@ def create_client(storage_account): account_name=storage_account.name, sas_token=storage_account.key, endpoint_suffix=storage_account.endpoint, - request_session=storage_account.session) + request_session=storage_account.session, + socket_timeout=timeout) else: client = azure.storage.blob.BlockBlobService( account_name=storage_account.name, account_key=storage_account.key, endpoint_suffix=storage_account.endpoint, - request_session=storage_account.session) + request_session=storage_account.session, + socket_timeout=timeout) # set retry policy client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry return client @@ -84,7 +88,7 @@ def create_blob(ase, data, md5, metadata, timeout=None): ), metadata=metadata, validate_content=False, # integrity is enforced with HTTPS - timeout=timeout) + timeout=timeout) # noqa def _format_block_id(chunk_num): @@ -112,7 +116,7 @@ def put_block(ase, offsets, data, timeout=None): block=data, block_id=_format_block_id(offsets.chunk_num), validate_content=False, # integrity is enforced with HTTPS - timeout=timeout) + timeout=timeout) # noqa def put_block_list(ase, last_block_num, md5, metadata, timeout=None): diff --git a/blobxfer/operations/azure/blob/page.py b/blobxfer/operations/azure/blob/page.py index aa92b14..c99f226 100644 --- a/blobxfer/operations/azure/blob/page.py +++ b/blobxfer/operations/azure/blob/page.py @@ -39,11 +39,13 @@ logger = logging.getLogger(__name__) -def create_client(storage_account): - # type: (blobxfer.operations.azure.StorageAccount) -> PageBlobService +def create_client(storage_account, timeout): + # type: (blobxfer.operations.azure.StorageAccount, + # tuple) -> PageBlobService """Create block blob client :param blobxfer.operations.azure.StorageAccount storage_account: storage account + :param tuple timeout: timeout tuple :rtype: PageBlobService :return: block blob service client """ @@ -52,13 +54,15 @@ def create_client(storage_account): account_name=storage_account.name, sas_token=storage_account.key, endpoint_suffix=storage_account.endpoint, - request_session=storage_account.session) + request_session=storage_account.session, + socket_timeout=timeout) else: client = azure.storage.blob.PageBlobService( account_name=storage_account.name, account_key=storage_account.key, endpoint_suffix=storage_account.endpoint, - request_session=storage_account.session) + request_session=storage_account.session, + socket_timeout=timeout) # set retry policy client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry return client @@ -77,7 +81,7 @@ def create_blob(ase, timeout=None): content_settings=azure.storage.blob.models.ContentSettings( content_type=blobxfer.util.get_mime_type(ase.name) ), - timeout=timeout) + timeout=timeout) # noqa def put_page(ase, page_start, page_end, data, timeout=None): @@ -97,4 +101,4 @@ def put_page(ase, page_start, page_end, data, timeout=None): start_range=page_start, end_range=page_end, validate_content=False, # integrity is enforced with HTTPS - timeout=timeout) + timeout=timeout) # noqa diff --git a/blobxfer/operations/azure/file.py b/blobxfer/operations/azure/file.py index 3a905bf..cb54aa9 100644 --- a/blobxfer/operations/azure/file.py +++ b/blobxfer/operations/azure/file.py @@ -45,11 +45,12 @@ logger = logging.getLogger(__name__) -def create_client(storage_account): - # type: (blobxfer.operations.azure.StorageAccount) -> FileService +def create_client(storage_account, timeout): + # type: (blobxfer.operations.azure.StorageAccount, tuple) -> FileService """Create file client :param blobxfer.operations.azure.StorageAccount storage_account: storage account + :param tuple timeout: timeout tuple :rtype: FileService :return: file service client """ @@ -58,13 +59,15 @@ def create_client(storage_account): account_name=storage_account.name, sas_token=storage_account.key, endpoint_suffix=storage_account.endpoint, - request_session=storage_account.session) + request_session=storage_account.session, + socket_timeout=timeout) else: client = azure.storage.file.FileService( account_name=storage_account.name, account_key=storage_account.key, endpoint_suffix=storage_account.endpoint, - request_session=storage_account.session) + request_session=storage_account.session, + socket_timeout=timeout) # set retry policy client.retry = blobxfer.retry.ExponentialRetryWithMaxWait().retry return client diff --git a/blobxfer/operations/download.py b/blobxfer/operations/download.py index c9bb068..ca11a90 100644 --- a/blobxfer/operations/download.py +++ b/blobxfer/operations/download.py @@ -540,10 +540,10 @@ def _process_download_descriptor(self, dd): # issue get range if dd.entity.mode == blobxfer.models.azure.StorageModes.File: data = blobxfer.operations.azure.file.get_file_range( - dd.entity, offsets, self._general_options.timeout_sec) + dd.entity, offsets) else: data = blobxfer.operations.azure.blob.get_blob_range( - dd.entity, offsets, self._general_options.timeout_sec) + dd.entity, offsets) # enqueue data for processing with self._disk_operation_lock: self._disk_set.add( @@ -696,8 +696,7 @@ def _run(self): skipped_size = 0 # iterate through source paths to download for src in self._spec.sources: - for rfile in src.files( - self._creds, self._spec.options, self._general_options): + for rfile in src.files(self._creds, self._spec.options): # form local path for remote file if (not self._spec.destination.is_dir and self._spec.options.rename): diff --git a/blobxfer/operations/md5.py b/blobxfer/operations/md5.py index f14431f..e14fa62 100644 --- a/blobxfer/operations/md5.py +++ b/blobxfer/operations/md5.py @@ -49,10 +49,12 @@ def compute_md5_for_file_asbase64( filename, pagealign=False, start=None, end=None, blocksize=65536): - # type: (str, bool, int) -> str + # type: (str, bool, int, int, int) -> str """Compute MD5 hash for file and encode as Base64 :param str filename: file to compute MD5 for :param bool pagealign: page align data + :param int start: file start offset + :param int end: file end offset :param int blocksize: block size :rtype: str :return: MD5 for file encoded as Base64 diff --git a/blobxfer/operations/progress.py b/blobxfer/operations/progress.py index 522da78..40633ff 100644 --- a/blobxfer/operations/progress.py +++ b/blobxfer/operations/progress.py @@ -158,8 +158,8 @@ def output_parameters(general_options, spec): general_options.log_file)) log.append(' resume file: {}'.format( general_options.resume_file)) - log.append(' timeout: {}'.format( - general_options.timeout_sec)) + log.append(' timeout: connect={} read={}'.format( + general_options.timeout.connect, general_options.timeout.read)) log.append(' mode: {}'.format( spec.options.mode)) log.append( @@ -194,6 +194,8 @@ def output_parameters(general_options, spec): spec.options.chunk_size_bytes)) log.append(' one shot bytes: {}'.format( spec.options.one_shot_bytes)) + log.append(' strip components: {}'.format( + spec.options.strip_components)) log.append(' store properties: attr={} md5={}'.format( spec.options.store_file_properties.attributes, spec.options.store_file_properties.md5)) diff --git a/blobxfer/operations/resume.py b/blobxfer/operations/resume.py index a7fa573..51b8650 100644 --- a/blobxfer/operations/resume.py +++ b/blobxfer/operations/resume.py @@ -220,10 +220,10 @@ def add_or_update_record( else: if ul.completed or completed_chunks == ul.completed_chunks: return + ul.completed_chunks = completed_chunks if completed: ul.completed = completed else: - ul.completed_chunks = completed_chunks ul.md5hexdigest = md5 self._data[key] = ul self._data.sync() @@ -242,13 +242,14 @@ def __init__(self, resume_file): def add_or_update_record( self, dst_ase, src_block_list, offset, chunk_size, total_chunks, completed_chunks, completed): - # type: (SyncCopyResumeManager, pathlib.Path, - # blobxfer.models.azure.StorageEntity, int, int, int, - # bool) -> None + # type: (SyncCopyResumeManager, + # blobxfer.models.azure.StorageEntity, list, int, int, int, + # int, bool) -> None """Add or update a resume record :param SyncCopyResumeManager self: this - :param pathlib.Path local_path: local path - :param blobxfer.models.azure.StorageEntity ase: Storage Entity + :param blobxfer.models.azure.StorageEntity dst_ase: Storage Entity + :param list src_block_list: source block list + :param int offset: offset :param int chunk_size: chunk size in bytes :param int total_chunks: total chunks :param int completed_chunks: completed chunks bitarray @@ -271,10 +272,9 @@ def add_or_update_record( else: if sc.completed or completed_chunks == sc.completed_chunks: return + sc.offset = offset + sc.completed_chunks = completed_chunks if completed: sc.completed = completed - else: - sc.offset = offset - sc.completed_chunks = completed_chunks self._data[key] = sc self._data.sync() diff --git a/blobxfer/operations/synccopy.py b/blobxfer/operations/synccopy.py index 117724d..f30c51b 100644 --- a/blobxfer/operations/synccopy.py +++ b/blobxfer/operations/synccopy.py @@ -46,6 +46,7 @@ import blobxfer.models.metadata import blobxfer.operations.azure.blob import blobxfer.operations.azure.file +import blobxfer.operations.md5 import blobxfer.operations.progress import blobxfer.operations.resume import blobxfer.util @@ -166,31 +167,27 @@ def _delete_extraneous_files(self): logger.debug( 'attempting to delete extraneous blobs/files from: {}'.format( key)) - if (self._spec.options.mode == + if (self._spec.options.dest_mode == blobxfer.models.azure.StorageModes.File): files = blobxfer.operations.azure.file.list_all_files( - sa.file_client, container, - timeout=self._general_options.timeout_sec) + sa.file_client, container) for file in files: id = blobxfer.operations.synccopy.SyncCopy.\ create_deletion_id(sa.file_client, container, file) if id not in self._delete_exclude: blobxfer.operations.azure.file.delete_file( - sa.file_client, container, file, - timeout=self._general_options.timeout_sec) + sa.file_client, container, file) deleted += 1 else: blobs = blobxfer.operations.azure.blob.list_all_blobs( - sa.block_blob_client, container, - timeout=self._general_options.timeout_sec) + sa.block_blob_client, container) for blob in blobs: id = blobxfer.operations.synccopy.SyncCopy.\ create_deletion_id( sa.block_blob_client, container, blob.name) if id not in self._delete_exclude: blobxfer.operations.azure.blob.delete_blob( - sa.block_blob_client, container, blob.name, - timeout=self._general_options.timeout_sec) + sa.block_blob_client, container, blob.name) deleted += 1 checked.add(key) logger.info('deleted {} extraneous blobs/files'.format(deleted)) @@ -205,9 +202,10 @@ def _add_to_transfer_queue(self, src_ase, dst_ase): """ # prepare remote file for download # if remote file is a block blob, need to retrieve block list - if src_ase.mode == blobxfer.models.azure.StorageModes.Block: + if (src_ase.mode == dst_ase.mode == + blobxfer.models.azure.StorageModes.Block): bl = blobxfer.operations.azure.blob.block.get_committed_block_list( - src_ase, timeout=self._general_options.timeout_sec) + src_ase) else: bl = None # TODO future optimization for page blob synccopies: query @@ -274,8 +272,7 @@ def _put_data(self, sd, ase, offsets, data): if ase.mode == blobxfer.models.azure.StorageModes.Append: # append block if data is not None: - blobxfer.operations.azure.blob.append.append_block( - ase, data, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.append.append_block(ase, data) elif ase.mode == blobxfer.models.azure.StorageModes.Block: # handle one-shot uploads if sd.is_one_shot_block_blob: @@ -284,20 +281,17 @@ def _put_data(self, sd, ase, offsets, data): else: digest = None blobxfer.operations.azure.blob.block.create_blob( - ase, data, digest, sd.src_entity.raw_metadata, - timeout=self._general_options.timeout_sec) + ase, data, digest, sd.src_entity.raw_metadata) return # upload block if data is not None: blobxfer.operations.azure.blob.block.put_block( - ase, offsets, data, - timeout=self._general_options.timeout_sec) + ase, offsets, data) elif ase.mode == blobxfer.models.azure.StorageModes.File: # upload range if data is not None: blobxfer.operations.azure.file.put_file_range( - ase, offsets, data, - timeout=self._general_options.timeout_sec) + ase, offsets, data) elif ase.mode == blobxfer.models.azure.StorageModes.Page: if data is not None: # no need to align page as page should already be aligned @@ -305,8 +299,7 @@ def _put_data(self, sd, ase, offsets, data): return # upload page blobxfer.operations.azure.blob.page.put_page( - ase, offsets.range_start, offsets.range_end, - data, timeout=self._general_options.timeout_sec) + ase, offsets.range_start, offsets.range_end, data) def _process_data(self, sd, ase, offsets, data): # type: (SyncCopy, blobxfer.models.download.Descriptor, @@ -337,38 +330,30 @@ def _prepare_upload(self, ase): if ase.append_create: # create container if necessary blobxfer.operations.azure.blob.create_container( - ase, self._containers_created, - timeout=self._general_options.timeout_sec) + ase, self._containers_created) # create remote blob - blobxfer.operations.azure.blob.append.create_blob( - ase, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.append.create_blob(ase) elif ase.mode == blobxfer.models.azure.StorageModes.Block: # create container if necessary blobxfer.operations.azure.blob.create_container( - ase, self._containers_created, - timeout=self._general_options.timeout_sec) + ase, self._containers_created) elif ase.mode == blobxfer.models.azure.StorageModes.File: # create share directory structure with self._fileshare_dir_lock: # create container if necessary blobxfer.operations.azure.file.create_share( - ase, self._containers_created, - timeout=self._general_options.timeout_sec) + ase, self._containers_created) # create parent directories blobxfer.operations.azure.file.create_all_parent_directories( - ase, self._dirs_created, - timeout=self._general_options.timeout_sec) + ase, self._dirs_created) # create remote file - blobxfer.operations.azure.file.create_file( - ase, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.file.create_file(ase) elif ase.mode == blobxfer.models.azure.StorageModes.Page: # create container if necessary blobxfer.operations.azure.blob.create_container( - ase, self._containers_created, - timeout=self._general_options.timeout_sec) + ase, self._containers_created) # create remote blob - blobxfer.operations.azure.blob.page.create_blob( - ase, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.page.create_blob(ase) def _process_synccopy_descriptor(self, sd): # type: (SyncCopy, blobxfer.models.download.Descriptor) -> None @@ -419,10 +404,10 @@ def _process_synccopy_descriptor(self, sd): # issue get range if sd.src_entity.mode == blobxfer.models.azure.StorageModes.File: data = blobxfer.operations.azure.file.get_file_range( - sd.src_entity, offsets, self._general_options.timeout_sec) + sd.src_entity, offsets) else: data = blobxfer.operations.azure.blob.get_blob_range( - sd.src_entity, offsets, self._general_options.timeout_sec) + sd.src_entity, offsets) # process data for upload self._process_data(sd, sd.dst_entity, offsets, data) # iterate replicas @@ -443,13 +428,11 @@ def _finalize_block_blob(self, sd, metadata, digest): :param str digest: md5 digest """ blobxfer.operations.azure.blob.block.put_block_list( - sd.dst_entity, sd.last_block_num, digest, metadata, - timeout=self._general_options.timeout_sec) + sd.dst_entity, sd.last_block_num, digest, metadata) if blobxfer.util.is_not_empty(sd.dst_entity.replica_targets): for ase in sd.dst_entity.replica_targets: blobxfer.operations.azure.blob.block.put_block_list( - ase, sd.last_block_num, digest, metadata, - timeout=self._general_options.timeout_sec) + ase, sd.last_block_num, digest, metadata) def _set_blob_md5(self, sd, digest): # type: (SyncCopy, blobxfer.models.synccopy.Descriptor, str) -> None @@ -458,12 +441,10 @@ def _set_blob_md5(self, sd, digest): :param blobxfer.models.synccopy.Descriptor sd: synccopy descriptor :param str digest: md5 digest """ - blobxfer.operations.azure.blob.set_blob_md5( - sd.dst_entity, digest, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.set_blob_md5(sd.dst_entity, digest) if blobxfer.util.is_not_empty(sd.dst_entity.replica_targets): for ase in sd.dst_entity.replica_targets: - blobxfer.operations.azure.blob.set_blob_md5( - ase, digest, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.set_blob_md5(ase, digest) def _set_blob_metadata(self, sd, metadata): # type: (SyncCopy, blobxfer.models.synccopy.Descriptor, dict) -> None @@ -474,11 +455,10 @@ def _set_blob_metadata(self, sd, metadata): :param dict metadata: metadata dict """ blobxfer.operations.azure.blob.set_blob_metadata( - sd.dst_entity, metadata, timeout=self._general_options.timeout_sec) + sd.dst_entity, metadata) if blobxfer.util.is_not_empty(sd.dst_entity.replica_targets): for ase in sd.dst_entity.replica_targets: - blobxfer.operations.azure.blob.set_blob_metadata( - ase, metadata, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.set_blob_metadata(ase, metadata) def _finalize_nonblock_blob(self, sd, metadata, digest): # type: (SyncCopy, blobxfer.models.synccopy.Descriptor, dict, @@ -507,23 +487,18 @@ def _finalize_azure_file(self, sd, metadata, digest): """ # set md5 file property if required if blobxfer.util.is_not_empty(digest): - blobxfer.operations.azure.file.set_file_md5( - sd.dst_entity, digest, - timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.file.set_file_md5(sd.dst_entity, digest) if blobxfer.util.is_not_empty(sd.dst_entity.replica_targets): for ase in sd.dst_entity.replica_targets: - blobxfer.operations.azure.file.set_file_md5( - ase, digest, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.file.set_file_md5(ase, digest) # set file metadata if needed if blobxfer.util.is_not_empty(metadata): blobxfer.operations.azure.file.set_file_metadata( - sd.dst_entity, metadata, - timeout=self._general_options.timeout_sec) + sd.dst_entity, metadata) if blobxfer.util.is_not_empty(sd.dst_entity.replica_targets): for ase in sd.dst_entity.replica_targets: blobxfer.operations.azure.file.set_file_metadata( - ase, metadata, - timeout=self._general_options.timeout_sec) + ase, metadata) def _finalize_upload(self, sd): # type: (SyncCopy, blobxfer.models.synccopy.Descriptor) -> None @@ -547,7 +522,7 @@ def _finalize_upload(self, sd): self._finalize_azure_file(sd, metadata, digest) def _check_copy_conditions(self, src, dst): - # type: (SyncCopy, blobxfer.models.upload.LocalPath, + # type: (SyncCopy, blobxfer.models.azure.StorageEntity, # blobxfer.models.azure.StorageEntity) -> UploadAction """Check for synccopy conditions :param SyncCopy self: this @@ -608,14 +583,13 @@ def _check_for_existing_remote(self, sa, cont, name): :rtype: blobxfer.models.azure.StorageEntity :return: remote storage entity """ - if self._spec.options.mode == blobxfer.models.azure.StorageModes.File: + if (self._spec.options.dest_mode == + blobxfer.models.azure.StorageModes.File): fp = blobxfer.operations.azure.file.get_file_properties( - sa.file_client, cont, name, - timeout=self._general_options.timeout_sec) + sa.file_client, cont, name) else: fp = blobxfer.operations.azure.blob.get_blob_properties( - sa.block_blob_client, cont, name, self._spec.options.mode, - timeout=self._general_options.timeout_sec) + sa.block_blob_client, cont, name, self._spec.options.dest_mode) if fp is not None: if blobxfer.models.crypto.EncryptionMetadata.\ encryption_metadata_exists(fp.metadata): @@ -624,7 +598,7 @@ def _check_for_existing_remote(self, sa, cont, name): else: ed = None ase = blobxfer.models.azure.StorageEntity(cont, ed) - if (self._spec.options.mode == + if (self._spec.options.dest_mode == blobxfer.models.azure.StorageModes.File): dir, _ = blobxfer.operations.azure.file.parse_file_path(name) ase.populate_from_file(sa, fp, dir) @@ -666,7 +640,8 @@ def _generate_destination_for_source(self, src_ase): if dst_ase is None: dst_ase = blobxfer.models.azure.StorageEntity(cont, ed=None) dst_ase.populate_from_local( - sa, cont, name, self._spec.options.mode) + sa, cont, name, self._spec.options.dest_mode) + dst_ase.size = src_ase.size # check condition for dst action = self._check_copy_conditions(src_ase, dst_ase) if action == SynccopyAction.Copy: @@ -683,8 +658,7 @@ def _bind_sources_to_destination(self): """ # iterate through source paths to download for src in self._spec.sources: - for src_ase in src.files( - self._creds, self._spec.options, self._general_options): + for src_ase in src.files(self._creds, self._spec.options): # generate copy destinations for source dest = [ dst_ase for dst_ase in diff --git a/blobxfer/operations/upload.py b/blobxfer/operations/upload.py index eee58cb..974ac21 100644 --- a/blobxfer/operations/upload.py +++ b/blobxfer/operations/upload.py @@ -173,14 +173,14 @@ def create_unique_transfer_id(local_path, ase, offsets): ) @staticmethod - def create_deletion_id(client, container, name): + def create_destination_id(client, container, name): # type: (azure.storage.StorageClient, str, str) -> str - """Create a unique deletion id + """Create a unique destination id :param azure.storage.StorageClient client: storage client :param str container: container name :param str name: entity name :rtype: str - :return: unique id for deletion + :return: unique id for the destination """ return ';'.join((client.primary_endpoint, container, name)) @@ -297,7 +297,7 @@ def _initialize_disk_threads(self): :param Uploader self: this """ logger.debug('spawning {} disk threads'.format( - self._general_options.concurrency.transfer_threads)) + self._general_options.concurrency.disk_threads)) for _ in range(self._general_options.concurrency.disk_threads): thr = threading.Thread(target=self._worker_thread_upload) self._disk_threads.append(thr) @@ -398,8 +398,7 @@ def _put_data(self, ud, ase, offsets, data): if ase.mode == blobxfer.models.azure.StorageModes.Append: # append block if data is not None: - blobxfer.operations.azure.blob.append.append_block( - ase, data, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.append.append_block(ase, data) elif ase.mode == blobxfer.models.azure.StorageModes.Block: # handle one-shot uploads if ud.is_one_shot_block_blob: @@ -410,20 +409,17 @@ def _put_data(self, ud, ase, offsets, data): else: digest = None blobxfer.operations.azure.blob.block.create_blob( - ase, data, digest, metadata, - timeout=self._general_options.timeout_sec) + ase, data, digest, metadata) return # upload block if data is not None: blobxfer.operations.azure.blob.block.put_block( - ase, offsets, data, - timeout=self._general_options.timeout_sec) + ase, offsets, data) elif ase.mode == blobxfer.models.azure.StorageModes.File: # upload range if data is not None: blobxfer.operations.azure.file.put_file_range( - ase, offsets, data, - timeout=self._general_options.timeout_sec) + ase, offsets, data) elif ase.mode == blobxfer.models.azure.StorageModes.Page: if data is None: return @@ -438,7 +434,7 @@ def _put_data(self, ud, ase, offsets, data): # upload page blobxfer.operations.azure.blob.page.put_page( ase, offsets.range_start, offsets.range_start + aligned - 1, - data, timeout=self._general_options.timeout_sec) + data) def _worker_thread_upload(self): # type: (Uploader) -> None @@ -471,38 +467,30 @@ def _prepare_upload(self, ase): if ase.append_create: # create container if necessary blobxfer.operations.azure.blob.create_container( - ase, self._containers_created, - timeout=self._general_options.timeout_sec) + ase, self._containers_created) # create remote blob - blobxfer.operations.azure.blob.append.create_blob( - ase, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.append.create_blob(ase) elif ase.mode == blobxfer.models.azure.StorageModes.Block: # create container if necessary blobxfer.operations.azure.blob.create_container( - ase, self._containers_created, - timeout=self._general_options.timeout_sec) + ase, self._containers_created) elif ase.mode == blobxfer.models.azure.StorageModes.File: # create share directory structure with self._fileshare_dir_lock: # create container if necessary blobxfer.operations.azure.file.create_share( - ase, self._containers_created, - timeout=self._general_options.timeout_sec) + ase, self._containers_created) # create parent directories blobxfer.operations.azure.file.create_all_parent_directories( - ase, self._dirs_created, - timeout=self._general_options.timeout_sec) + ase, self._dirs_created) # create remote file - blobxfer.operations.azure.file.create_file( - ase, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.file.create_file(ase) elif ase.mode == blobxfer.models.azure.StorageModes.Page: # create container if necessary blobxfer.operations.azure.blob.create_container( - ase, self._containers_created, - timeout=self._general_options.timeout_sec) + ase, self._containers_created) # create remote blob - blobxfer.operations.azure.blob.page.create_blob( - ase, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.page.create_blob(ase) def _process_upload_descriptor(self, ud): # type: (Uploader, blobxfer.models.upload.Descriptor) -> None @@ -580,7 +568,7 @@ def _process_upload_descriptor(self, ud): if ud.entity.mode != blobxfer.models.azure.StorageModes.Append: self._upload_queue.put(ud) # no data can be returned on stdin uploads - if not data: + if ud.local_path.use_stdin and not data: return # add data to transfer queue with self._transfer_lock: @@ -610,13 +598,11 @@ def _finalize_block_blob(self, ud, metadata): else: digest = None blobxfer.operations.azure.blob.block.put_block_list( - ud.entity, ud.last_block_num, digest, metadata, - timeout=self._general_options.timeout_sec) + ud.entity, ud.last_block_num, digest, metadata) if blobxfer.util.is_not_empty(ud.entity.replica_targets): for ase in ud.entity.replica_targets: blobxfer.operations.azure.blob.block.put_block_list( - ase, ud.last_block_num, digest, metadata, - timeout=self._general_options.timeout_sec) + ase, ud.last_block_num, digest, metadata) def _set_blob_md5(self, ud): # type: (Uploader, blobxfer.models.upload.Descriptor) -> None @@ -625,12 +611,10 @@ def _set_blob_md5(self, ud): :param blobxfer.models.upload.Descriptor ud: upload descriptor """ digest = blobxfer.util.base64_encode_as_string(ud.md5.digest()) - blobxfer.operations.azure.blob.set_blob_md5( - ud.entity, digest, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.set_blob_md5(ud.entity, digest) if blobxfer.util.is_not_empty(ud.entity.replica_targets): for ase in ud.entity.replica_targets: - blobxfer.operations.azure.blob.set_blob_md5( - ase, digest, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.set_blob_md5(ase, digest) def _set_blob_metadata(self, ud, metadata): # type: (Uploader, blobxfer.models.upload.Descriptor, dict) -> None @@ -639,12 +623,10 @@ def _set_blob_metadata(self, ud, metadata): :param blobxfer.models.upload.Descriptor ud: upload descriptor :param dict metadata: metadata dict """ - blobxfer.operations.azure.blob.set_blob_metadata( - ud.entity, metadata, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.set_blob_metadata(ud.entity, metadata) if blobxfer.util.is_not_empty(ud.entity.replica_targets): for ase in ud.entity.replica_targets: - blobxfer.operations.azure.blob.set_blob_metadata( - ase, metadata, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.blob.set_blob_metadata(ase, metadata) def _finalize_nonblock_blob(self, ud, metadata): # type: (Uploader, blobxfer.models.upload.Descriptor, dict) -> None @@ -670,21 +652,18 @@ def _finalize_azure_file(self, ud, metadata): # set md5 file property if required if ud.requires_non_encrypted_md5_put: digest = blobxfer.util.base64_encode_as_string(ud.md5.digest()) - blobxfer.operations.azure.file.set_file_md5( - ud.entity, digest, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.file.set_file_md5(ud.entity, digest) if blobxfer.util.is_not_empty(ud.entity.replica_targets): for ase in ud.entity.replica_targets: - blobxfer.operations.azure.file.set_file_md5( - ase, digest, timeout=self._general_options.timeout_sec) + blobxfer.operations.azure.file.set_file_md5(ase, digest) # set file metadata if needed if blobxfer.util.is_not_empty(metadata): blobxfer.operations.azure.file.set_file_metadata( - ud.entity, metadata, timeout=self._general_options.timeout_sec) + ud.entity, metadata) if blobxfer.util.is_not_empty(ud.entity.replica_targets): for ase in ud.entity.replica_targets: blobxfer.operations.azure.file.set_file_metadata( - ase, metadata, - timeout=self._general_options.timeout_sec) + ase, metadata) def _finalize_upload(self, ud): # type: (Uploader, blobxfer.models.upload.Descriptor) -> None @@ -739,28 +718,24 @@ def _delete_extraneous_files(self): if (self._spec.options.mode == blobxfer.models.azure.StorageModes.File): files = blobxfer.operations.azure.file.list_all_files( - sa.file_client, container, - timeout=self._general_options.timeout_sec) + sa.file_client, container) for file in files: id = blobxfer.operations.upload.Uploader.\ - create_deletion_id(sa.file_client, container, file) + create_destination_id(sa.file_client, container, file) if id not in self._delete_exclude: blobxfer.operations.azure.file.delete_file( - sa.file_client, container, file, - timeout=self._general_options.timeout_sec) + sa.file_client, container, file) deleted += 1 else: blobs = blobxfer.operations.azure.blob.list_all_blobs( - sa.block_blob_client, container, - timeout=self._general_options.timeout_sec) + sa.block_blob_client, container) for blob in blobs: id = blobxfer.operations.upload.Uploader.\ - create_deletion_id( + create_destination_id( sa.block_blob_client, container, blob.name) if id not in self._delete_exclude: blobxfer.operations.azure.blob.delete_blob( - sa.block_blob_client, container, blob.name, - timeout=self._general_options.timeout_sec) + sa.block_blob_client, container, blob.name) deleted += 1 checked.add(key) logger.info('deleted {} extraneous blobs/files'.format(deleted)) @@ -836,12 +811,10 @@ def _check_for_existing_remote(self, sa, cont, name): """ if self._spec.options.mode == blobxfer.models.azure.StorageModes.File: fp = blobxfer.operations.azure.file.get_file_properties( - sa.file_client, cont, name, - timeout=self._general_options.timeout_sec) + sa.file_client, cont, name) else: fp = blobxfer.operations.azure.blob.get_blob_properties( - sa.block_blob_client, cont, name, self._spec.options.mode, - timeout=self._general_options.timeout_sec) + sa.block_blob_client, cont, name, self._spec.options.mode) if fp is not None: if blobxfer.models.crypto.EncryptionMetadata.\ encryption_metadata_exists(fp.metadata): @@ -961,7 +934,7 @@ def _vectorize_and_bind(self, local_path, dest): ase = slice_map[i] if i < slices - 1: next_entry = blobxfer.models.metadata.\ - create_vectored_io_next_entry(slice_map[i+1]) + create_vectored_io_next_entry(slice_map[i + 1]) else: next_entry = None lp_slice = blobxfer.models.upload.LocalPath( @@ -1007,7 +980,7 @@ def _vectorize_and_bind(self, local_path, dest): for rt in primary_ase.replica_targets: self._delete_exclude.add( blobxfer.operations.upload.Uploader. - create_deletion_id( + create_destination_id( rt._client, rt.container, rt.name) ) yield action, local_path, primary_ase @@ -1024,6 +997,10 @@ def _run(self): # mark start self._start_time = blobxfer.util.datetime_now() logger.info('blobxfer start time: {0}'.format(self._start_time)) + # check renames + if not self._spec.sources.can_rename() and self._spec.options.rename: + raise RuntimeError( + 'cannot rename to specified destination with multiple sources') # initialize resume db if specified if self._general_options.resume_file is not None: self._resume = blobxfer.operations.resume.UploadResumeManager( @@ -1052,10 +1029,8 @@ def _run(self): skipped_files = 0 skipped_size = 0 approx_total_bytes = 0 - if not self._spec.sources.can_rename() and self._spec.options.rename: - raise RuntimeError( - 'cannot rename to specified destination with multiple sources') # iterate through source paths to upload + dupes = set() for src in self._spec.sources.files(): # create a destination array for the source dest = [ @@ -1063,11 +1038,15 @@ def _run(self): self._generate_destination_for_source(src) ] for action, lp, ase in self._vectorize_and_bind(src, dest): + dest_id = blobxfer.operations.upload.Uploader.\ + create_destination_id(ase._client, ase.container, ase.name) + if dest_id in dupes: + raise RuntimeError( + 'duplicate destination entity detected: {}/{}'.format( + ase._client.primary_endpoint, ase.path)) + dupes.add(dest_id) if self._spec.options.delete_extraneous_destination: - self._delete_exclude.add( - blobxfer.operations.upload.Uploader.create_deletion_id( - ase._client, ase.container, ase.name) - ) + self._delete_exclude.add(dest_id) if action == UploadAction.Skip: skipped_files += 1 skipped_size += ase.size if ase.size is not None else 0 @@ -1085,6 +1064,7 @@ def _run(self): self._pre_md5_skip_on_check(lp, ase) elif action == UploadAction.Upload: self._add_to_upload_queue(lp, ase, uid) + del dupes # set remote files processed with self._md5_meta_lock: self._all_files_processed = True diff --git a/blobxfer/retry.py b/blobxfer/retry.py index daee22a..513fa48 100644 --- a/blobxfer/retry.py +++ b/blobxfer/retry.py @@ -36,7 +36,7 @@ class ExponentialRetryWithMaxWait(azure.storage.retry._Retry): - """Exponential Retry with Max Wait (infinite retries)""" + """Exponential Retry with Max Wait Reset""" def __init__( self, initial_backoff=0.1, max_backoff=1, max_retries=None, reset_at_max=True): diff --git a/blobxfer/util.py b/blobxfer/util.py index a17b8a5..6c7a37f 100644 --- a/blobxfer/util.py +++ b/blobxfer/util.py @@ -42,9 +42,8 @@ from scandir import scandir as scandir import platform import re -import sys # non-stdlib imports -import dateutil +import dateutil.parser import dateutil.tz import future.utils # local imports @@ -170,9 +169,9 @@ def datetime_now(): def datetime_from_timestamp(ts, tz=None): - # type: (int, dateutil.tz) -> datetime.datetime + # type: (float, dateutil.tz) -> datetime.datetime """Convert a timestamp into datetime with offset - :param int ts: timestamp + :param float ts: timestamp :param dateutil.tz tz: time zone or local tz if not specified :rtype: datetime.datetime :return: converted timestamp to datetime @@ -198,20 +197,6 @@ def scantree(path): yield entry -def replace_file(src, dst): - # type: (pathlib.Path, pathlib.Path) -> None - """Replace a file, using atomic replace if available - :param pathlib.Path src: source path - :param pathlib.Path dst: destination path - """ - if sys.version_info < (3, 3): - if dst.exists(): - dst.unlink() - src.rename(dst) - else: - src.replace(dst) - - def get_mime_type(filename): # type: (str) -> str """Guess the type of a file based on its filename diff --git a/blobxfer/version.py b/blobxfer/version.py index 94461e5..0d93064 100644 --- a/blobxfer/version.py +++ b/blobxfer/version.py @@ -22,4 +22,4 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. -__version__ = '1.0.0a5' +__version__ = '1.0.0b1' diff --git a/cli/cli.py b/cli/cli.py index e226900..9dd7c85 100644 --- a/cli/cli.py +++ b/cli/cli.py @@ -108,8 +108,6 @@ def _init_config(self): if self.config['options'].get('verbose', False): blobxfer.util.set_verbose_logger_handlers() logger.debug('config: \n' + json.dumps(self.config, indent=4)) - # free mem - del self.cli_options # create a pass decorator for shared context between commands @@ -124,6 +122,7 @@ def callback(ctx, param, value): return click.option( '--config', expose_value=False, + default=None, help='YAML configuration file', envvar='BLOBXFER_CONFIG_FILE', callback=callback)(f) @@ -138,7 +137,7 @@ def callback(ctx, param, value): '--crypto-processes', expose_value=False, type=int, - default=0, + default=None, help='Concurrent crypto processes (download only)', callback=callback)(f) @@ -152,7 +151,7 @@ def callback(ctx, param, value): '--disk-threads', expose_value=False, type=int, - default=0, + default=None, help='Concurrent disk threads', callback=callback)(f) @@ -180,7 +179,7 @@ def callback(ctx, param, value): '--md5-processes', expose_value=False, type=int, - default=0, + default=None, help='Concurrent MD5 processes', callback=callback)(f) @@ -193,7 +192,7 @@ def callback(ctx, param, value): return click.option( '--progress-bar/--no-progress-bar', expose_value=False, - default=True, + default=None, help='Display progress bar instead of console logs; log file must ' 'be specified [True]', callback=callback)(f) @@ -220,8 +219,10 @@ def callback(ctx, param, value): return click.option( '--timeout', expose_value=False, - type=int, - help='Individual chunk transfer timeout', + type=float, + default=None, + help='Timeout, in seconds, applied to both connect and read ' + 'operations', callback=callback)(f) @@ -234,7 +235,7 @@ def callback(ctx, param, value): '--transfer-threads', expose_value=False, type=int, - default=0, + default=None, help='Concurrent transfer threads', callback=callback)(f) @@ -248,6 +249,7 @@ def callback(ctx, param, value): '-v', '--verbose', expose_value=False, is_flag=True, + default=None, help='Verbose output', callback=callback)(f) @@ -260,6 +262,7 @@ def callback(ctx, param, value): return click.option( '--local-path', expose_value=False, + default=None, help='Local path; use - for stdin', callback=callback)(f) @@ -272,6 +275,7 @@ def callback(ctx, param, value): return click.option( '--storage-account', expose_value=False, + default=None, help='Storage account name', envvar='BLOBXFER_STORAGE_ACCOUNT', callback=callback)(f) @@ -285,6 +289,7 @@ def callback(ctx, param, value): return click.option( '--remote-path', expose_value=False, + default=None, help='Remote path on Azure Storage', callback=callback)(f) @@ -318,6 +323,7 @@ def callback(ctx, param, value): return click.option( '--storage-account-key', expose_value=False, + default=None, help='Storage account access key', envvar='BLOBXFER_STORAGE_ACCOUNT_KEY', callback=callback)(f) @@ -332,7 +338,7 @@ def callback(ctx, param, value): '--chunk-size-bytes', expose_value=False, type=int, - default=0, + default=None, help='Block or chunk size in bytes; set to 0 for auto-select ' 'on upload [0]', callback=callback)(f) @@ -347,6 +353,7 @@ def callback(ctx, param, value): '--delete', expose_value=False, is_flag=True, + default=None, help='Delete extraneous files on target [False]', callback=callback)(f) @@ -354,12 +361,12 @@ def callback(ctx, param, value): def _distribution_mode(f): def callback(ctx, param, value): clictx = ctx.ensure_object(CliContext) - clictx.cli_options['distribution_mode'] = value.lower() + clictx.cli_options['distribution_mode'] = value return value return click.option( '--distribution-mode', expose_value=False, - default='disabled', + default=None, help='Vectored IO distribution mode: disabled, replica, ' 'stripe [disabled]', callback=callback)(f) @@ -373,7 +380,7 @@ def callback(ctx, param, value): return click.option( '--endpoint', expose_value=False, - default='core.windows.net', + default=None, help='Azure Storage endpoint [core.windows.net]', callback=callback)(f) @@ -386,8 +393,8 @@ def callback(ctx, param, value): return click.option( '--exclude', expose_value=False, - default=None, multiple=True, + default=None, help='Exclude pattern', callback=callback)(f) @@ -400,7 +407,7 @@ def callback(ctx, param, value): return click.option( '--file-attributes/--no-file-attributes', expose_value=False, - default=False, + default=None, help='Store or restore file attributes [False]', callback=callback)(f) @@ -413,7 +420,7 @@ def callback(ctx, param, value): return click.option( '--file-md5/--no-file-md5', expose_value=False, - default=False, + default=None, help='Compute file MD5 [False]', callback=callback)(f) @@ -426,8 +433,8 @@ def callback(ctx, param, value): return click.option( '--include', expose_value=False, - default=None, multiple=True, + default=None, help='Include pattern', callback=callback)(f) @@ -440,7 +447,7 @@ def callback(ctx, param, value): return click.option( '--mode', expose_value=False, - default='auto', + default=None, help='Transfer mode: auto, append, block, file, page [auto]', callback=callback)(f) @@ -454,7 +461,7 @@ def callback(ctx, param, value): '--one-shot-bytes', expose_value=False, type=int, - default=0, + default=None, help='File sizes less than or equal to the specified byte threshold ' 'will be uploaded as one-shot for block blobs; the valid range that ' 'can be specified is 0 to 256MiB [0]', @@ -469,7 +476,7 @@ def callback(ctx, param, value): return click.option( '--overwrite/--no-overwrite', expose_value=False, - default=True, + default=None, help='Overwrite destination if exists. For append blobs, ' '--no-overwrite will append to any existing blob. [True]', callback=callback)(f) @@ -483,7 +490,7 @@ def callback(ctx, param, value): return click.option( '--recursive/--no-recursive', expose_value=False, - default=True, + default=None, help='Recursive [True]', callback=callback)(f) @@ -497,7 +504,7 @@ def callback(ctx, param, value): '--rename', expose_value=False, is_flag=True, - default=False, + default=None, help='Rename a single file upload or download [False]', callback=callback)(f) @@ -552,6 +559,7 @@ def callback(ctx, param, value): return click.option( '--sas', expose_value=False, + default=None, help='Shared access signature', envvar='BLOBXFER_SAS', callback=callback)(f) @@ -566,6 +574,7 @@ def callback(ctx, param, value): '--skip-on-filesize-match', expose_value=False, is_flag=True, + default=None, help='Skip on equivalent file size [False]', callback=callback)(f) @@ -579,6 +588,7 @@ def callback(ctx, param, value): '--skip-on-lmt-ge', expose_value=False, is_flag=True, + default=None, help='Skip on last modified time greater than or equal to [False]', callback=callback)(f) @@ -592,6 +602,7 @@ def callback(ctx, param, value): '--skip-on-md5-match', expose_value=False, is_flag=True, + default=None, help='Skip on MD5 match [False]', callback=callback)(f) @@ -605,8 +616,8 @@ def callback(ctx, param, value): '--strip-components', expose_value=False, type=int, - default=1, - help='Strip leading file path components on upload [1]', + default=None, + help='Strip leading file path components on upload [0]', callback=callback)(f) @@ -619,7 +630,7 @@ def callback(ctx, param, value): '--stripe-chunk-size-bytes', expose_value=False, type=int, - default=1073741824, + default=None, help='Vectored IO stripe width in bytes [1073741824]', callback=callback)(f) @@ -632,21 +643,22 @@ def callback(ctx, param, value): return click.option( '--sync-copy-dest-storage-account-key', expose_value=False, + default=None, help='Storage account access key for synccopy destination', envvar='BLOBXFER_SYNC_COPY_DEST_STORAGE_ACCOUNT_KEY', callback=callback)(f) -def _sync_copy_dest_storage_account_option(f): +def _sync_copy_dest_mode_option(f): def callback(ctx, param, value): clictx = ctx.ensure_object(CliContext) - clictx.cli_options['sync_copy_dest_storage_account'] = value + clictx.cli_options['sync_copy_dest_mode'] = value return value return click.option( - '--sync-copy-dest-storage-account', + '--sync-copy-dest-mode', expose_value=False, - help='Storage account name for synccopy destination', - envvar='BLOBXFER_SYNC_COPY_DEST_STORAGE_ACCOUNT', + default=None, + help='Mode for synccopy destination', callback=callback)(f) @@ -658,6 +670,7 @@ def callback(ctx, param, value): return click.option( '--sync-copy-dest-remote-path', expose_value=False, + default=None, help='Remote path on Azure Storage for synccopy destination', callback=callback)(f) @@ -670,11 +683,26 @@ def callback(ctx, param, value): return click.option( '--sync-copy-dest-sas', expose_value=False, + default=None, help='Shared access signature for synccopy destination', envvar='BLOBXFER_SYNC_COPY_DEST_SAS', callback=callback)(f) +def _sync_copy_dest_storage_account_option(f): + def callback(ctx, param, value): + clictx = ctx.ensure_object(CliContext) + clictx.cli_options['sync_copy_dest_storage_account'] = value + return value + return click.option( + '--sync-copy-dest-storage-account', + expose_value=False, + default=None, + help='Storage account name for synccopy destination', + envvar='BLOBXFER_SYNC_COPY_DEST_STORAGE_ACCOUNT', + callback=callback)(f) + + def upload_options(f): f = _stripe_chunk_size_bytes_option(f) f = _strip_components_option(f) @@ -728,6 +756,7 @@ def sync_copy_options(f): f = _sync_copy_dest_storage_account_option(f) f = _sync_copy_dest_sas_option(f) f = _sync_copy_dest_remote_path_option(f) + f = _sync_copy_dest_mode_option(f) f = _sync_copy_dest_access_key_option(f) f = _storage_account_option(f) f = _skip_on_md5_match_option(f) @@ -762,7 +791,9 @@ def download(ctx): """Download blobs or files from Azure Storage""" settings.add_cli_options(ctx.cli_options, settings.TransferAction.Download) ctx.initialize(settings.TransferAction.Download) - specs = settings.create_download_specifications(ctx.config) + specs = settings.create_download_specifications( + ctx.cli_options, ctx.config) + del ctx.cli_options for spec in specs: blobxfer.api.Downloader( ctx.general_options, ctx.credentials, spec @@ -777,7 +808,9 @@ def synccopy(ctx): """Synchronously copy blobs between Azure Storage accounts""" settings.add_cli_options(ctx.cli_options, settings.TransferAction.Synccopy) ctx.initialize(settings.TransferAction.Synccopy) - specs = settings.create_synccopy_specifications(ctx.config) + specs = settings.create_synccopy_specifications( + ctx.cli_options, ctx.config) + del ctx.cli_options for spec in specs: blobxfer.api.SyncCopy( ctx.general_options, ctx.credentials, spec @@ -793,7 +826,9 @@ def upload(ctx): """Upload files to Azure Storage""" settings.add_cli_options(ctx.cli_options, settings.TransferAction.Upload) ctx.initialize(settings.TransferAction.Upload) - specs = settings.create_upload_specifications(ctx.config) + specs = settings.create_upload_specifications( + ctx.cli_options, ctx.config) + del ctx.cli_options for spec in specs: blobxfer.api.Uploader( ctx.general_options, ctx.credentials, spec diff --git a/cli/settings.py b/cli/settings.py index 39e5772..bdfe8ad 100644 --- a/cli/settings.py +++ b/cli/settings.py @@ -57,160 +57,154 @@ def add_cli_options(cli_options, action): :param TransferAction action: action """ cli_options['_action'] = action.name.lower() - storage_account = cli_options['storage_account'] + storage_account = cli_options.get('storage_account') + azstorage = { + 'endpoint': cli_options.get('endpoint') + } if blobxfer.util.is_not_empty(storage_account): - try: - local_resource = cli_options['local_resource'] - if blobxfer.util.is_none_or_empty(local_resource): - raise KeyError() - except KeyError: - raise ValueError('--local-path must be specified') - try: - remote_path = cli_options['remote_path'] - if blobxfer.util.is_none_or_empty(remote_path): - raise KeyError() - except KeyError: - raise ValueError('--remote-path must be specified') - # add credentials - try: - key = cli_options['access_key'] - if blobxfer.util.is_none_or_empty(key): - raise KeyError() - except KeyError: - try: - key = cli_options['sas'] - if blobxfer.util.is_none_or_empty(key): - raise KeyError() - except KeyError: - raise RuntimeError('access key or sas must be provided') - azstorage = { - 'endpoint': cli_options['endpoint'], - 'accounts': { - storage_account: key - } + azstorage['accounts'] = { + storage_account: ( + cli_options.get('access_key') or cli_options.get('sas') + ) } - del key - # construct "argument" from cli options - sa_rp = {storage_account: remote_path} - if action == TransferAction.Upload: - arg = { - 'source': [local_resource], - 'destination': [sa_rp], - 'include': cli_options['include'], - 'exclude': cli_options['exclude'], - 'options': { - 'chunk_size_bytes': cli_options['chunk_size_bytes'], - 'delete_extraneous_destination': cli_options['delete'], - 'mode': cli_options['mode'], - 'one_shot_bytes': cli_options['one_shot_bytes'], - 'overwrite': cli_options['overwrite'], - 'recursive': cli_options['recursive'], - 'rename': cli_options['rename'], - 'rsa_private_key': cli_options['rsa_private_key'], - 'rsa_private_key_passphrase': cli_options[ - 'rsa_private_key_passphrase'], - 'rsa_public_key': cli_options['rsa_public_key'], - 'skip_on': { - 'filesize_match': cli_options[ - 'skip_on_filesize_match'], - 'lmt_ge': cli_options['skip_on_lmt_ge'], - 'md5_match': cli_options['skip_on_md5_match'], - }, - 'store_file_properties': { - 'attributes': cli_options['file_attributes'], - 'md5': cli_options['file_md5'], - }, - 'strip_components': cli_options['strip_components'], - 'vectored_io': { - 'stripe_chunk_size_bytes': cli_options[ - 'stripe_chunk_size_bytes'], - 'distribution_mode': cli_options['distribution_mode'], - }, + sa_rp = { + storage_account: cli_options.get('remote_path') + } + local_resource = cli_options.get('local_resource') + # construct "argument" from cli options + if action == TransferAction.Upload: + arg = { + 'source': [local_resource] if local_resource is not None else None, + 'destination': [sa_rp] if sa_rp[storage_account] is not None else + None, + 'include': cli_options.get('include'), + 'exclude': cli_options.get('exclude'), + 'options': { + 'chunk_size_bytes': cli_options.get('chunk_size_bytes'), + 'delete_extraneous_destination': cli_options.get('delete'), + 'mode': cli_options.get('mode'), + 'one_shot_bytes': cli_options.get('one_shot_bytes'), + 'overwrite': cli_options.get('overwrite'), + 'recursive': cli_options.get('recursive'), + 'rename': cli_options.get('rename'), + 'rsa_private_key': cli_options.get('rsa_private_key'), + 'rsa_private_key_passphrase': cli_options.get( + 'rsa_private_key_passphrase'), + 'rsa_public_key': cli_options.get('rsa_public_key'), + 'skip_on': { + 'filesize_match': cli_options.get( + 'skip_on_filesize_match'), + 'lmt_ge': cli_options.get('skip_on_lmt_ge'), + 'md5_match': cli_options.get('skip_on_md5_match'), }, - } - elif action == TransferAction.Download: - arg = { - 'source': [sa_rp], - 'destination': local_resource, - 'include': cli_options['include'], - 'exclude': cli_options['exclude'], - 'options': { - 'check_file_md5': cli_options['file_md5'], - 'chunk_size_bytes': cli_options['chunk_size_bytes'], - 'delete_extraneous_destination': cli_options['delete'], - 'mode': cli_options['mode'], - 'overwrite': cli_options['overwrite'], - 'recursive': cli_options['recursive'], - 'rename': cli_options['rename'], - 'rsa_private_key': cli_options['rsa_private_key'], - 'rsa_private_key_passphrase': cli_options[ - 'rsa_private_key_passphrase'], - 'restore_file_attributes': cli_options['file_attributes'], - 'skip_on': { - 'filesize_match': cli_options[ - 'skip_on_filesize_match'], - 'lmt_ge': cli_options['skip_on_lmt_ge'], - 'md5_match': cli_options['skip_on_md5_match'], - }, + 'store_file_properties': { + 'attributes': cli_options.get('file_attributes'), + 'md5': cli_options.get('file_md5'), }, - } - elif action == TransferAction.Synccopy: - try: - sync_copy_dest_storage_account = \ - cli_options['sync_copy_dest_storage_account'] - if blobxfer.util.is_none_or_empty( - sync_copy_dest_storage_account): - raise KeyError() - except KeyError: - raise ValueError( - '--sync-copy-dest-storage-account must be specified') - try: - sync_copy_dest_remote_path = \ - cli_options['sync_copy_dest_remote_path'] - if blobxfer.util.is_none_or_empty(sync_copy_dest_remote_path): - raise KeyError() - except KeyError: - raise ValueError( - '--sync-copy-dest-remote-path must be specified') - arg = { - 'source': sa_rp, - 'destination': [ - { - sync_copy_dest_storage_account: - sync_copy_dest_remote_path - } - ], - 'include': cli_options['include'], - 'exclude': cli_options['exclude'], - 'options': { - 'chunk_size_bytes': cli_options['chunk_size_bytes'], - 'mode': cli_options['mode'], - 'overwrite': cli_options['overwrite'], - 'skip_on': { - 'filesize_match': cli_options[ - 'skip_on_filesize_match'], - 'lmt_ge': cli_options['skip_on_lmt_ge'], - 'md5_match': cli_options['skip_on_md5_match'], - }, + 'strip_components': cli_options.get('strip_components'), + 'vectored_io': { + 'stripe_chunk_size_bytes': cli_options.get( + 'stripe_chunk_size_bytes'), + 'distribution_mode': cli_options.get('distribution_mode'), }, - } - try: - destkey = cli_options['sync_copy_dest_access_key'] - if blobxfer.util.is_none_or_empty(destkey): - raise KeyError() - except KeyError: - try: - destkey = cli_options['sync_copy_dest_sas'] - if blobxfer.util.is_none_or_empty(destkey): - raise KeyError() - except KeyError: - raise RuntimeError( - 'destination access key or sas must be provided') - azstorage['accounts'][ - cli_options['sync_copy_dest_storage_account']] = destkey - del destkey - cli_options[action.name.lower()] = arg + }, + } + elif action == TransferAction.Download: + arg = { + 'source': [sa_rp] if sa_rp[storage_account] is not None else None, + 'destination': local_resource if local_resource is not None else + None, + 'include': cli_options.get('include'), + 'exclude': cli_options.get('exclude'), + 'options': { + 'check_file_md5': cli_options.get('file_md5'), + 'chunk_size_bytes': cli_options.get('chunk_size_bytes'), + 'delete_extraneous_destination': cli_options.get('delete'), + 'mode': cli_options.get('mode'), + 'overwrite': cli_options.get('overwrite'), + 'recursive': cli_options.get('recursive'), + 'rename': cli_options.get('rename'), + 'rsa_private_key': cli_options.get('rsa_private_key'), + 'rsa_private_key_passphrase': cli_options.get( + 'rsa_private_key_passphrase'), + 'restore_file_attributes': cli_options.get( + 'file_attributes'), + 'skip_on': { + 'filesize_match': cli_options.get( + 'skip_on_filesize_match'), + 'lmt_ge': cli_options.get('skip_on_lmt_ge'), + 'md5_match': cli_options.get('skip_on_md5_match'), + }, + }, + } + elif action == TransferAction.Synccopy: + sync_copy_dest_storage_account = cli_options.get( + 'sync_copy_dest_storage_account') + sync_copy_dest_remote_path = cli_options.get( + 'sync_copy_dest_remote_path') + if (sync_copy_dest_storage_account is not None and + sync_copy_dest_remote_path is not None): + sync_copy_dest = [ + { + sync_copy_dest_storage_account: + sync_copy_dest_remote_path + } + ] + azstorage['accounts'][sync_copy_dest_storage_account] = ( + cli_options.get('sync_copy_dest_access_key') or + cli_options.get('sync_copy_dest_sas') + ) + else: + sync_copy_dest = None + arg = { + 'source': sa_rp if sa_rp[storage_account] is not None else None, + 'destination': sync_copy_dest, + 'include': cli_options.get('include'), + 'exclude': cli_options.get('exclude'), + 'options': { + 'chunk_size_bytes': cli_options.get('chunk_size_bytes'), + 'dest_mode': cli_options.get('sync_copy_dest_mode'), + 'mode': cli_options.get('mode'), + 'overwrite': cli_options.get('overwrite'), + 'skip_on': { + 'filesize_match': cli_options.get( + 'skip_on_filesize_match'), + 'lmt_ge': cli_options.get('skip_on_lmt_ge'), + 'md5_match': cli_options.get('skip_on_md5_match'), + }, + }, + } + count = 0 + if arg['source'] is None: + arg.pop('source') + count += 1 + if arg['destination'] is None: + arg.pop('destination') + count += 1 + if count == 1: + raise ValueError( + '--local-path and --remote-path must be specified together ' + 'through the commandline') + if 'accounts' in azstorage: cli_options['azure_storage'] = azstorage + cli_options[action.name.lower()] = arg + + +def _merge_setting(cli_options, conf, name, name_cli=None, default=None): + # type: (dict, dict, str, str, Any) -> Any + """Merge a setting, preferring the CLI option if set + :param dict cli_options: cli options + :param dict conf: configuration sub-block + :param str name: key name + :param str name_cli: override key name from cli_options + :param Any default: default value to set if missing + :rtype: Any + :return: merged setting value + """ + val = cli_options.get(name_cli or name) + if val is None: + val = conf.get(name, default) + return val def merge_settings(config, cli_options): @@ -224,37 +218,67 @@ def merge_settings(config, cli_options): action != TransferAction.Download.name.lower() and action != TransferAction.Synccopy.name.lower()): raise ValueError('invalid action: {}'.format(action)) - # create action options - if action not in config: - config[action] = [] - # merge any argument options - if action in cli_options: - config[action].append(cli_options[action]) # merge credentials if 'azure_storage' in cli_options: if 'azure_storage' not in config: config['azure_storage'] = {} config['azure_storage'] = blobxfer.util.merge_dict( config['azure_storage'], cli_options['azure_storage']) - # merge general options + if blobxfer.util.is_none_or_empty(config['azure_storage']): + raise ValueError('azure storage settings not specified') + # create action options + if action not in config: + config[action] = [] + # append full specs, if they exist + if action in cli_options: + if 'source' in cli_options[action]: + srcdst = { + 'source': cli_options[action]['source'], + 'destination': cli_options[action]['destination'], + } + cli_options[action].pop('source') + cli_options[action].pop('destination') + config[action].append(srcdst) + # merge general and concurrency options if 'options' not in config: config['options'] = {} - config['options']['log_file'] = cli_options['log_file'] - config['options']['progress_bar'] = cli_options['progress_bar'] - config['options']['resume_file'] = cli_options['resume_file'] - config['options']['timeout_sec'] = cli_options['timeout'] - config['options']['verbose'] = cli_options['verbose'] - # merge concurrency options if 'concurrency' not in config['options']: config['options']['concurrency'] = {} - config['options']['concurrency']['crypto_processes'] = \ - cli_options['crypto_processes'] - config['options']['concurrency']['disk_threads'] = \ - cli_options['disk_threads'] - config['options']['concurrency']['md5_processes'] = \ - cli_options['md5_processes'] - config['options']['concurrency']['transfer_threads'] = \ - cli_options['transfer_threads'] + if 'timeout' not in config['options']: + config['options']['timeout'] = {} + options = { + 'log_file': _merge_setting(cli_options, config['options'], 'log_file'), + 'progress_bar': _merge_setting( + cli_options, config['options'], 'progress_bar', default=True), + 'resume_file': _merge_setting( + cli_options, config['options'], 'resume_file'), + 'timeout': { + 'connect': _merge_setting( + cli_options, config['options']['timeout'], 'connect', + name_cli='timeout'), + 'read': _merge_setting( + cli_options, config['options']['timeout'], 'read', + name_cli='timeout'), + }, + 'verbose': _merge_setting( + cli_options, config['options'], 'verbose', default=False), + 'concurrency': { + 'crypto_processes': _merge_setting( + cli_options, config['options']['concurrency'], + 'crypto_processes', default=0), + 'disk_threads': _merge_setting( + cli_options, config['options']['concurrency'], + 'disk_threads', default=0), + 'md5_processes': _merge_setting( + cli_options, config['options']['concurrency'], + 'md5_processes', default=0), + 'transfer_threads': _merge_setting( + cli_options, config['options']['concurrency'], + 'transfer_threads', default=0), + } + } + config['options'] = options + cli_options = cli_options[action] def create_azure_storage_credentials(config, general_options): @@ -267,7 +291,7 @@ def create_azure_storage_credentials(config, general_options): :return: credentials object """ creds = blobxfer.operations.azure.StorageCredentials(general_options) - endpoint = config['azure_storage']['endpoint'] + endpoint = config['azure_storage'].get('endpoint', 'core.windows.net') for name in config['azure_storage']['accounts']: key = config['azure_storage']['accounts'][name] creds.add_storage_account(name, key, endpoint) @@ -282,50 +306,60 @@ def create_general_options(config, action): :rtype: blobxfer.models.options.General :return: general options object """ - conc = config['options'].get('concurrency', {}) + conc = config['options']['concurrency'] return blobxfer.models.options.General( concurrency=blobxfer.models.options.Concurrency( - crypto_processes=conc.get('crypto_processes', 0), - disk_threads=conc.get('disk_threads', 0), - md5_processes=conc.get('md5_processes', 0), - transfer_threads=conc.get('transfer_threads', 0), + crypto_processes=conc['crypto_processes'], + disk_threads=conc['disk_threads'], + md5_processes=conc['md5_processes'], + transfer_threads=conc['transfer_threads'], action=action.value[0], ), - log_file=config['options'].get('log_file', None), - progress_bar=config['options'].get('progress_bar', True), - resume_file=config['options'].get('resume_file', None), - timeout_sec=config['options'].get('timeout_sec', None), - verbose=config['options'].get('verbose', False), + log_file=config['options']['log_file'], + progress_bar=config['options']['progress_bar'], + resume_file=config['options']['resume_file'], + timeout=blobxfer.models.options.Timeout( + connect=config['options']['timeout']['connect'], + read=config['options']['timeout']['read'], + ), + verbose=config['options']['verbose'], ) -def create_download_specifications(config): - # type: (dict) -> List[blobxfer.models.download.Specification] +def create_download_specifications(cli_options, config): + # type: (dict, dict) -> List[blobxfer.models.download.Specification] """Create a list of Download Specification objects from configuration + :param dict cli_options: cli options :param dict config: config dict :rtype: list :return: list of Download Specification objects """ specs = [] for conf in config['download']: + if 'options' not in conf: + conf['options'] = {} # create download options - confmode = conf['options'].get('mode', 'auto').lower() - if confmode == 'auto': + mode = _merge_setting( + cli_options, conf['options'], 'mode', default='auto').lower() + if mode == 'auto': mode = blobxfer.models.azure.StorageModes.Auto - elif confmode == 'append': + elif mode == 'append': mode = blobxfer.models.azure.StorageModes.Append - elif confmode == 'block': + elif mode == 'block': mode = blobxfer.models.azure.StorageModes.Block - elif confmode == 'file': + elif mode == 'file': mode = blobxfer.models.azure.StorageModes.File - elif confmode == 'page': + elif mode == 'page': mode = blobxfer.models.azure.StorageModes.Page else: - raise ValueError('unknown mode: {}'.format(confmode)) + raise ValueError('unknown mode: {}'.format(mode)) # load RSA private key PEM file if specified - rpk = conf['options'].get('rsa_private_key', None) + rpk = _merge_setting( + cli_options, conf['options'], 'rsa_private_key', default=None) if blobxfer.util.is_not_empty(rpk): - rpkp = conf['options'].get('rsa_private_key_passphrase', None) + rpkp = _merge_setting( + cli_options, conf['options'], 'rsa_private_key_passphrase', + default=None) rpk = blobxfer.operations.crypto.load_rsa_private_key_file( rpk, rpkp) else: @@ -334,22 +368,37 @@ def create_download_specifications(config): sod = conf['options'].get('skip_on', {}) ds = blobxfer.models.download.Specification( download_options=blobxfer.models.options.Download( - check_file_md5=conf['options'].get('check_file_md5', False), - chunk_size_bytes=conf['options'].get('chunk_size_bytes', 0), - delete_extraneous_destination=conf['options'].get( - 'delete_extraneous_destination', False), + check_file_md5=_merge_setting( + cli_options, conf['options'], 'check_file_md5', + default=False), + chunk_size_bytes=_merge_setting( + cli_options, conf['options'], 'chunk_size_bytes', + default=0), + delete_extraneous_destination=_merge_setting( + cli_options, conf['options'], + 'delete_extraneous_destination', default=False), mode=mode, - overwrite=conf['options'].get('overwrite', True), - recursive=conf['options'].get('recursive', True), - rename=conf['options'].get('rename', False), - restore_file_attributes=conf[ - 'options'].get('restore_file_attributes', False), + overwrite=_merge_setting( + cli_options, conf['options'], 'overwrite', default=True), + recursive=_merge_setting( + cli_options, conf['options'], 'recursive', default=True), + rename=_merge_setting( + cli_options, conf['options'], 'rename', default=False), + restore_file_attributes=_merge_setting( + cli_options, conf['options'], 'restore_file_attributes', + default=False), rsa_private_key=rpk, ), skip_on_options=blobxfer.models.options.SkipOn( - filesize_match=sod.get('filesize_match', False), - lmt_ge=sod.get('lmt_ge', False), - md5_match=sod.get('md5_match', False), + filesize_match=_merge_setting( + cli_options, sod, 'filesize_match', + name_cli='skip_on_filesize_match', default=False), + lmt_ge=_merge_setting( + cli_options, sod, 'lmt_ge', name_cli='skip_on_lmt_ge', + default=False), + md5_match=_merge_setting( + cli_options, sod, 'md5_match', + name_cli='skip_on_md5_match', default=False), ), local_destination_path=blobxfer.models.download. LocalDestinationPath( @@ -364,10 +413,10 @@ def create_download_specifications(config): sa = next(iter(src)) asp = blobxfer.operations.azure.SourcePath() asp.add_path_with_storage_account(src[sa], sa) - incl = conf.get('include', None) + incl = _merge_setting(cli_options, conf, 'include', default=None) if blobxfer.util.is_not_empty(incl): asp.add_includes(incl) - excl = conf.get('exclude', None) + excl = _merge_setting(cli_options, conf, 'exclude', default=None) if blobxfer.util.is_not_empty(excl): asp.add_excludes(excl) ds.add_azure_source_path(asp) @@ -376,43 +425,77 @@ def create_download_specifications(config): return specs -def create_synccopy_specifications(config): - # type: (dict) -> List[blobxfer.models.synccopy.Specification] +def create_synccopy_specifications(cli_options, config): + # type: (dict, dict) -> List[blobxfer.models.synccopy.Specification] """Create a list of SyncCopy Specification objects from configuration + :param dict cli_options: cli options :param dict config: config dict :rtype: list :return: list of SyncCopy Specification objects """ specs = [] for conf in config['synccopy']: - # create download options - confmode = conf['options'].get('mode', 'auto').lower() - if confmode == 'auto': + if 'options' not in conf: + conf['options'] = {} + # get source mode + mode = _merge_setting( + cli_options, conf['options'], 'mode', default='auto').lower() + if mode == 'auto': mode = blobxfer.models.azure.StorageModes.Auto - elif confmode == 'append': + elif mode == 'append': mode = blobxfer.models.azure.StorageModes.Append - elif confmode == 'block': + elif mode == 'block': mode = blobxfer.models.azure.StorageModes.Block - elif confmode == 'file': + elif mode == 'file': mode = blobxfer.models.azure.StorageModes.File - elif confmode == 'page': + elif mode == 'page': mode = blobxfer.models.azure.StorageModes.Page else: - raise ValueError('unknown mode: {}'.format(confmode)) + raise ValueError('unknown source mode: {}'.format(mode)) + # get destination mode + destmode = _merge_setting( + cli_options, conf['options'], 'dest_mode', + name_cli='sync_copy_dest_mode') + if blobxfer.util.is_none_or_empty(destmode): + destmode = mode + else: + destmode = destmode.lower() + if destmode == 'auto': + destmode = blobxfer.models.azure.StorageModes.Auto + elif destmode == 'append': + destmode = blobxfer.models.azure.StorageModes.Append + elif destmode == 'block': + destmode = blobxfer.models.azure.StorageModes.Block + elif destmode == 'file': + destmode = blobxfer.models.azure.StorageModes.File + elif destmode == 'page': + destmode = blobxfer.models.azure.StorageModes.Page + else: + raise ValueError('unknown dest mode: {}'.format(destmode)) # create specification sod = conf['options'].get('skip_on', {}) scs = blobxfer.models.synccopy.Specification( synccopy_options=blobxfer.models.options.SyncCopy( - delete_extraneous_destination=conf['options'].get( - 'delete_extraneous_destination', False), + delete_extraneous_destination=_merge_setting( + cli_options, conf['options'], + 'delete_extraneous_destination', default=False), + dest_mode=destmode, mode=mode, - overwrite=conf['options'].get('overwrite', True), - recursive=conf['options'].get('recursive', True), + overwrite=_merge_setting( + cli_options, conf['options'], 'overwrite', default=True), + recursive=_merge_setting( + cli_options, conf['options'], 'recursive', default=True), ), skip_on_options=blobxfer.models.options.SkipOn( - filesize_match=sod.get('filesize_match', False), - lmt_ge=sod.get('lmt_ge', False), - md5_match=sod.get('md5_match', False), + filesize_match=_merge_setting( + cli_options, sod, 'filesize_match', + name_cli='skip_on_filesize_match', default=False), + lmt_ge=_merge_setting( + cli_options, sod, 'lmt_ge', name_cli='skip_on_lmt_ge', + default=False), + md5_match=_merge_setting( + cli_options, sod, 'md5_match', + name_cli='skip_on_md5_match', default=False), ), ) # create remote source paths @@ -420,10 +503,10 @@ def create_synccopy_specifications(config): sa = next(iter(src)) asp = blobxfer.operations.azure.SourcePath() asp.add_path_with_storage_account(src[sa], sa) - incl = conf.get('include', None) + incl = _merge_setting(cli_options, conf, 'include', default=None) if blobxfer.util.is_not_empty(incl): asp.add_includes(incl) - excl = conf.get('exclude', None) + excl = _merge_setting(cli_options, conf, 'exclude', default=None) if blobxfer.util.is_not_empty(excl): asp.add_excludes(excl) scs.add_azure_source_path(asp) @@ -441,38 +524,44 @@ def create_synccopy_specifications(config): return specs -def create_upload_specifications(config): - # type: (dict) -> List[blobxfer.models.upload.Specification] +def create_upload_specifications(cli_options, config): + # type: (dict, dict) -> List[blobxfer.models.upload.Specification] """Create a list of Upload Specification objects from configuration + :param dict cli_options: cli options :param dict config: config dict :rtype: list :return: list of Upload Specification objects """ specs = [] for conf in config['upload']: + if 'options' not in conf: + conf['options'] = {} # create upload options - confmode = conf['options'].get('mode', 'auto').lower() - if confmode == 'auto': + mode = _merge_setting( + cli_options, conf['options'], 'mode', default='auto').lower() + if mode == 'auto': mode = blobxfer.models.azure.StorageModes.Auto - elif confmode == 'append': + elif mode == 'append': mode = blobxfer.models.azure.StorageModes.Append - elif confmode == 'block': + elif mode == 'block': mode = blobxfer.models.azure.StorageModes.Block - elif confmode == 'file': + elif mode == 'file': mode = blobxfer.models.azure.StorageModes.File - elif confmode == 'page': + elif mode == 'page': mode = blobxfer.models.azure.StorageModes.Page else: - raise ValueError('unknown mode: {}'.format(confmode)) + raise ValueError('unknown mode: {}'.format(mode)) # load RSA public key PEM if specified - rpk = conf['options'].get('rsa_public_key', None) + rpk = _merge_setting(cli_options, conf['options'], 'rsa_public_key') if blobxfer.util.is_not_empty(rpk): rpk = blobxfer.operations.crypto.load_rsa_public_key_file(rpk) if rpk is None: # load RSA private key PEM file if specified - rpk = conf['options'].get('rsa_private_key', None) + rpk = _merge_setting( + cli_options, conf['options'], 'rsa_private_key') if blobxfer.util.is_not_empty(rpk): - rpkp = conf['options'].get('rsa_private_key_passphrase', None) + rpkp = _merge_setting( + cli_options, conf['options'], 'rsa_private_key_passphrase') rpk = blobxfer.operations.crypto.load_rsa_private_key_file( rpk, rpkp) rpk = rpk.public_key() @@ -481,10 +570,10 @@ def create_upload_specifications(config): # create local source paths lsp = blobxfer.models.upload.LocalSourcePath() lsp.add_paths(conf['source']) - incl = conf.get('include', None) + incl = _merge_setting(cli_options, conf, 'include', default=None) if blobxfer.util.is_not_empty(incl): lsp.add_includes(incl) - excl = conf.get('exclude', None) + excl = _merge_setting(cli_options, conf, 'exclude', default=None) if blobxfer.util.is_not_empty(excl): lsp.add_excludes(excl) # create specification @@ -493,32 +582,54 @@ def create_upload_specifications(config): sod = conf['options'].get('skip_on', {}) us = blobxfer.models.upload.Specification( upload_options=blobxfer.models.options.Upload( - chunk_size_bytes=conf['options'].get('chunk_size_bytes', 0), - delete_extraneous_destination=conf['options'].get( - 'delete_extraneous_destination', False), + chunk_size_bytes=_merge_setting( + cli_options, conf['options'], 'chunk_size_bytes', + default=0), + delete_extraneous_destination=_merge_setting( + cli_options, conf['options'], + 'delete_extraneous_destination', default=False), mode=mode, - one_shot_bytes=conf['options'].get('one_shot_bytes', 0), - overwrite=conf['options'].get('overwrite', True), - recursive=conf['options'].get('recursive', True), - rename=conf['options'].get('rename', False), + one_shot_bytes=_merge_setting( + cli_options, conf['options'], 'one_shot_bytes', default=0), + overwrite=_merge_setting( + cli_options, conf['options'], 'overwrite', default=True), + recursive=_merge_setting( + cli_options, conf['options'], 'recursive', default=True), + rename=_merge_setting( + cli_options, conf['options'], 'rename', default=False), rsa_public_key=rpk, store_file_properties=blobxfer.models.options.FileProperties( - attributes=sfp.get('attributes', False), - md5=sfp.get('md5', False), + attributes=_merge_setting( + cli_options, sfp, 'attributes', + name_cli='file_attributes', default=False), + md5=_merge_setting( + cli_options, sfp, 'md5', name_cli='file_md5', + default=False), ), - strip_components=conf['options'].get('strip_components', 1), + strip_components=_merge_setting( + cli_options, conf['options'], 'strip_components', + default=0), vectored_io=blobxfer.models.options.VectoredIo( - stripe_chunk_size_bytes=vio.get( - 'stripe_chunk_size_bytes', 1073741824), + stripe_chunk_size_bytes=_merge_setting( + cli_options, vio, 'stripe_chunk_size_bytes', + default=1073741824), distribution_mode=blobxfer. models.upload.VectoredIoDistributionMode( - vio.get('distribution_mode', 'disabled').lower()), + _merge_setting( + cli_options, vio, 'distribution_mode', + default='disabled').lower()), ), ), skip_on_options=blobxfer.models.options.SkipOn( - filesize_match=sod.get('filesize_match', False), - lmt_ge=sod.get('lmt_ge', False), - md5_match=sod.get('md5_match', False), + filesize_match=_merge_setting( + cli_options, sod, 'filesize_match', + name_cli='skip_on_filesize_match', default=False), + lmt_ge=_merge_setting( + cli_options, sod, 'lmt_ge', name_cli='skip_on_lmt_ge', + default=False), + md5_match=_merge_setting( + cli_options, sod, 'md5_match', + name_cli='skip_on_md5_match', default=False), ), local_source_path=lsp, ) diff --git a/docs/10-cli-usage.md b/docs/10-cli-usage.md index cabf60c..3867101 100644 --- a/docs/10-cli-usage.md +++ b/docs/10-cli-usage.md @@ -41,8 +41,9 @@ command requires at the minimum, the following options if invoked without a YAML configuration file: * `--storage-account` for the source remote Azure path * `--remote-path` for the source remote Azure path -* `--sync-copy-dest-storage-account` for the destination remote Azure path +* `--sync-copy-dest-mode` for the destination mode * `--sync-copy-dest-remote-path` for the destination remote Azure path +* `--sync-copy-dest-storage-account` for the destination remote Azure path Additionally, an authentication option for both storage accounts is required. Please see the `Authentication` and `Connection` sub-section below under the @@ -84,7 +85,9 @@ recursively uploaded or downloaded. Blob container or File share at the begining, e.g., `mycontainer/vdir` * `--resume-file` specifies the resume database to write to or read from. Resume files should be specific for a session. -* `--timeout` is the integral timeout value in seconds to use. +* `--timeout` is the timeout value, in seconds, applied to both connect +and read operations. To apply separate values for these operations +individually, use YAML configuration instead. * `-h` or `--help` can be passed at every command level to receive context sensitive help. * `-v` will output verbose messages including the configuration used @@ -190,7 +193,7 @@ behavior. * `--rename` renames a single file upload or download to the target destination or source path, respectively. * `--strip-components N` will strip the leading `N` components from the -local file path. The default is `1`. +local file path on upload. The default is `0`. ## Example Invocations ### `download` Examples diff --git a/docs/20-yaml-configuration.md b/docs/20-yaml-configuration.md index 95f3ae0..b8273b3 100644 --- a/docs/20-yaml-configuration.md +++ b/docs/20-yaml-configuration.md @@ -46,7 +46,9 @@ options: resume_file: /path/to/resumefile.db progress_bar: true verbose: true - timeout_sec: null + timeout: + connect: null + read: null concurrency: md5_processes: 2 crypto_processes: 2 @@ -58,7 +60,9 @@ options: * `resume_file` is the location of the resume database to create * `progress_bar` controls display of a progress bar output to the console * `verbose` controls if verbose logging is enabled -* `timeout_sec` is the timeout to apply to requests/responses +* `timeout` is a dictionary of timeout values in seconds + * `connect` is the connect timeout to apply to the request + * `read` is the read timeout to apply to the request * `concurrency` is a dictionary of concurrency limits * `md5_processes` is the number of MD5 offload processes to create for MD5 comparison checking @@ -232,6 +236,7 @@ synccopy: - "*.tmp" options: mode: auto + dest_mode: auto delete_extraneous_destination: true overwrite: true recursive: true @@ -247,7 +252,8 @@ are copied to each destination specified. * `include` is a list of include patterns * `exclude` is a list of exclude patterns * `options` are synccopy-specific options - * `mode` is the operating mode + * `mode` is the source mode + * `dest_mode` is the destination mode * `delete_extraneous_destination` will cleanup any files in remote destinations that are not found in the remote sources. Note that this interacts with include and exclude filters. diff --git a/docs/30-vectored-io.md b/docs/30-vectored-io.md index b22fad9..8aee0d2 100644 --- a/docs/30-vectored-io.md +++ b/docs/30-vectored-io.md @@ -50,16 +50,16 @@ configuration file to define multiple destinations. ### Stripe `stripe` mode will splice a file into multiple chunks and scatter these -chunks across destinations specified. These destinations can be different -a single or multiple containers within the same storage account or even -containers distributed across multiple storage accounts if single storage -account bandwidth limits are insufficient. +chunks across destinations specified. These destinations can be single or +multiple containers within the same storage account or even containers +distributed across multiple storage accounts if single storage account +bandwidth limits are insufficient. `blobxfer` will slice the source file into multiple chunks where the `stripe_chunk_size_bytes` is the stripe width of each chunk. This parameter will allow you to effectively control how many blobs/files are created on Azure. `blobxfer` will then round-robin through all of the destinations -specified to store the slices. Information required to reconstruct the +specified to scatter the slices. Information required to reconstruct the original file is stored on the blob or file metadata. It is important to keep this metadata in-tact or reconstruction will fail. diff --git a/docs/98-performance-considerations.md b/docs/98-performance-considerations.md index 2bf63de..3a923bf 100644 --- a/docs/98-performance-considerations.md +++ b/docs/98-performance-considerations.md @@ -75,7 +75,7 @@ instead. ## MD5 Hashing MD5 hashing will impose some performance penalties to check if the file should be uploaded or downloaded. For instance, if uploading and the local -file is determined to be different than it's remote counterpart, then the +file is determined to be different than its remote counterpart, then the time spent performing the MD5 comparison is effectively "lost." ## Client-side Encryption diff --git a/docs/99-current-limitations.md b/docs/99-current-limitations.md index dd9ef4b..f1ef51d 100644 --- a/docs/99-current-limitations.md +++ b/docs/99-current-limitations.md @@ -8,20 +8,24 @@ Azure Files. * `stdin` sources cannot be encrypted. * Azure KeyVault key references are currently not supported. -### Platform-specific Issues -* File attribute store/restore is not supported on Windows. +### Platform-specific +* File attribute store/restore is currently not supported on Windows. ### Resume Support * Encrypted uploads/downloads cannot currently be resumed as the Python SHA256 object cannot be pickled. * Append blobs currently cannot be resumed for upload. +### General Azure File Limitations +* Please see [this article](https://msdn.microsoft.com/en-us/library/azure/dn744326.aspx) +for more information. + ### Other Limitations * MD5 is not computed for append blobs. * Empty directories are not created locally when downloading from an Azure File share which has empty directories. * Empty directories are not deleted if `--delete` is specified and no files remain in the directory on the Azure File share. -* Directories with no characters, e.g. `/mycontainer//mydir` are not +* Directories with no characters, e.g. `mycontainer//mydir` are not supported. * `/dev/null` or `nul` destinations are not supported. diff --git a/setup.py b/setup.py index 4497849..792f8c5 100644 --- a/setup.py +++ b/setup.py @@ -38,16 +38,15 @@ ] install_requires = [ - 'azure-storage==0.34.2', + 'azure-storage==0.36.0', 'bitstring==3.1.5', 'click==6.7', - 'cryptography>=1.9', - 'enum34==1.1.6;python_version<"3.4"', + 'cryptography>=2.0.3', 'future==0.16.0', - 'pathlib2==2.2.1;python_version<"3.5"', - 'python-dateutil==2.6.0', - 'requests==2.14.2', - 'ruamel.yaml==0.14.12', + 'pathlib2==2.3.0;python_version<"3.5"', + 'python-dateutil==2.6.1', + 'requests==2.18.4', + 'ruamel.yaml==0.15.32', 'scandir==1.5;python_version<"3.5"', ] @@ -71,7 +70,7 @@ install_requires=install_requires, tests_require=['pytest'], classifiers=[ - 'Development Status :: 3 - Alpha', + 'Development Status :: 4 - Beta', 'Environment :: Console', 'Intended Audience :: Developers', 'Intended Audience :: System Administrators', @@ -80,7 +79,6 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', diff --git a/test_requirements.txt b/test_requirements.txt index c576b44..97190ae 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,5 +1,5 @@ -flake8>=3.3.0 +flake8>=3.4.1 mock>=2.0.0; python_version < '3.3' pypandoc>=1.4 -pytest>=3.1.1 +pytest>=3.2.0 pytest-cov>=2.5.1 diff --git a/tests/test_blobxfer_models_azure.py b/tests/test_blobxfer_models_azure.py index f075092..35115cc 100644 --- a/tests/test_blobxfer_models_azure.py +++ b/tests/test_blobxfer_models_azure.py @@ -10,6 +10,8 @@ import azure.storage import azure.storage.blob import azure.storage.file +# local imports +import blobxfer.models.crypto # module under test import blobxfer.models.azure as azmodels @@ -30,19 +32,27 @@ def test_azurestorageentity(): blob.properties.blob_type = azure.storage.blob.models._BlobTypes.BlockBlob ase.populate_from_blob(mock.MagicMock(), blob) + assert ase.create_containers is not None assert ase.client is not None assert ase.name == 'name' assert ase.lmt == 'lmt' assert ase.size == 123 assert ase.md5 == 'abc' + assert not ase.from_local + assert ase.append_create + assert ase.encryption_metadata is None + assert ase.raw_metadata is None assert ase.snapshot is None assert ase.mode == azmodels.StorageModes.Block blob.properties.blob_type = azure.storage.blob.models._BlobTypes.AppendBlob - ase.populate_from_blob(mock.MagicMock(), blob) + blob.metadata = '{}' + ase.populate_from_blob(mock.MagicMock(), blob, store_raw_metadata=True) assert ase.mode == azmodels.StorageModes.Append + assert ase.raw_metadata == blob.metadata blob.properties.blob_type = azure.storage.blob.models._BlobTypes.PageBlob + blob.metadata = None blob.snapshot = 'abc' ase.populate_from_blob(mock.MagicMock(), blob) assert ase.mode == azmodels.StorageModes.Page @@ -52,3 +62,47 @@ def test_azurestorageentity(): ase.populate_from_file(mock.MagicMock(), blob, 'path') assert ase.mode == azmodels.StorageModes.File assert ase.snapshot is None + + blob.metadata = '{}' + ase.populate_from_file( + mock.MagicMock(), blob, None, store_raw_metadata=True) + assert ase.mode == azmodels.StorageModes.File + assert ase.raw_metadata == blob.metadata + assert ase.name == blob.name + + ase.populate_from_local( + mock.MagicMock(), 'cont', 'path', azmodels.StorageModes.Append) + assert ase.from_local + assert ase.mode == azmodels.StorageModes.Append + + ase.populate_from_local( + mock.MagicMock(), 'cont', 'path', azmodels.StorageModes.Block) + assert ase.from_local + assert ase.mode == azmodels.StorageModes.Block + + ase.populate_from_local( + mock.MagicMock(), 'cont', 'path', azmodels.StorageModes.File) + assert ase.from_local + assert ase.mode == azmodels.StorageModes.File + + ase.populate_from_local( + mock.MagicMock(), 'cont', 'path', azmodels.StorageModes.Page) + assert ase.from_local + assert ase.mode == azmodels.StorageModes.Page + + ase.populate_from_local( + mock.MagicMock(), 'cont', 'path.vhdx', azmodels.StorageModes.Auto) + assert ase.from_local + assert ase.mode == azmodels.StorageModes.Page + + ase.populate_from_local( + mock.MagicMock(), 'cont', 'path.bin', azmodels.StorageModes.Auto) + assert ase.from_local + assert ase.mode == azmodels.StorageModes.Block + + ase.size = 456 + ase.append_create = False + ase.encryption_metadata = blobxfer.models.crypto.EncryptionMetadata() + assert ase.size == 456 + assert not ase.append_create + assert ase.encryption_metadata is not None diff --git a/tests/test_blobxfer_models_crypto.py b/tests/test_blobxfer_models_crypto.py index 8503a71..9b37a8e 100644 --- a/tests/test_blobxfer_models_crypto.py +++ b/tests/test_blobxfer_models_crypto.py @@ -86,6 +86,18 @@ def test_encryption_metadata_exists(): assert models.EncryptionMetadata.encryption_metadata_exists(md) +def test_create_new_metadata(): + em = models.EncryptionMetadata() + em.create_new_metadata('key') + + assert em._rsa_public_key == 'key' + assert em.symmetric_key is not None + assert em.signing_key is not None + assert em.content_encryption_iv is not None + assert em.encryption_agent is not None + assert em.encryption_mode is not None + + def test_convert_from_json(tmpdir): keyfile = tmpdir.join('keyfile') keyfile.write(_SAMPLE_RSA_KEY) @@ -206,3 +218,21 @@ def test_convert_from_json(tmpdir): assert em._symkey is not None assert em._signkey is None assert hmac is None + + +def test_convert_to_json_with_mac(tmpdir): + keyfile = tmpdir.join('keyfile') + keyfile.write(_SAMPLE_RSA_KEY) + rsaprivatekey = ops.load_rsa_private_key_file(str(keyfile), None) + rsapublickey = rsaprivatekey.public_key() + + em = models.EncryptionMetadata() + em.create_new_metadata(rsapublickey) + symkey = em._symkey + signkey = em._signkey + + encjson = em.convert_to_json_with_mac('md5digest', 'hmacdigest') + assert encjson is not None + em.convert_from_json(encjson, 'entityname', rsaprivatekey) + assert em._symkey == symkey + assert em._signkey == signkey diff --git a/tests/test_blobxfer_models_download.py b/tests/test_blobxfer_models_download.py index c1b568e..c7dadae 100644 --- a/tests/test_blobxfer_models_download.py +++ b/tests/test_blobxfer_models_download.py @@ -91,6 +91,48 @@ def test_downloadspecification(): assert p in ds.sources[0]._path_map assert ds.sources[0]._path_map[p] == 'sa' + with pytest.raises(ValueError): + ds = models.Specification( + download_options=options.Download( + check_file_md5=False, + chunk_size_bytes=4194304, + delete_extraneous_destination=False, + mode=azmodels.StorageModes.Auto, + overwrite=True, + recursive=True, + rename=False, + restore_file_attributes=False, + rsa_private_key=None, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ), + local_destination_path=models.LocalDestinationPath('dest'), + ) + + with pytest.raises(ValueError): + ds = models.Specification( + download_options=options.Download( + check_file_md5=True, + chunk_size_bytes=-1, + delete_extraneous_destination=False, + mode=azmodels.StorageModes.Auto, + overwrite=True, + recursive=True, + rename=False, + restore_file_attributes=True, + rsa_private_key=None, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ), + local_destination_path=models.LocalDestinationPath('dest'), + ) + def test_downloaddescriptor(tmpdir): lp = pathlib.Path(str(tmpdir.join('a'))) @@ -139,6 +181,7 @@ def test_downloaddescriptor(tmpdir): assert d.final_path.stat().st_size == ase._size # pre-existing file check + opts.chunk_size_bytes = 0 ase._size = 0 d = models.Descriptor(lp, ase, opts, None) d._allocate_disk_space() @@ -147,7 +190,61 @@ def test_downloaddescriptor(tmpdir): assert d.final_path.stat().st_size == ase._size -@unittest.skipIf(util.on_python2(), 'fallocate does not exist') +def test_downloaddescriptor_compute_allocated_size(): + with pytest.raises(RuntimeError): + models.Descriptor.compute_allocated_size(1, True) + + assert models.Descriptor.compute_allocated_size(32, True) == 16 + assert models.Descriptor.compute_allocated_size(1, False) == 1 + + +def test_downloaddescriptor_generate_view(): + ase = azmodels.StorageEntity('cont') + ase._size = 1024 + view, total_size = models.Descriptor.generate_view(ase) + assert view.fd_start == 0 + assert view.fd_end == 1024 + assert total_size == ase._size + + ase._vio = mock.MagicMock() + ase._vio.offset_start = 2048 + ase._vio.total_size = 3072 + view, total_size = models.Descriptor.generate_view(ase) + assert view.fd_start == ase.vectored_io.offset_start + assert view.fd_end == ase.vectored_io.offset_start + ase._size + assert total_size == ase.vectored_io.total_size + + +def test_convert_vectored_io_slice_to_final_path_name(): + lp = pathlib.Path('/local/path/abc.bxslice-0') + ase = azmodels.StorageEntity('cont') + ase._vio = mock.MagicMock() + ase._vio.slice_id = 0 + + fp = models.Descriptor.convert_vectored_io_slice_to_final_path_name( + lp, ase) + assert str(fp) == '/local/path/abc' + + +def test_set_final_path_view(): + lp = pathlib.Path('/local/path/abc.bxslice-0') + + opts = mock.MagicMock() + opts.check_file_md5 = True + opts.chunk_size_bytes = 16 + ase = azmodels.StorageEntity('cont') + ase._size = 1024 + ase._vio = mock.MagicMock() + ase._vio.slice_id = 0 + ase._vio.total_size = 1024 + d = models.Descriptor(lp, ase, opts, None) + + total_size = d._set_final_path_view() + assert total_size == ase._size + + +@unittest.skipIf( + util.on_python2() or util.on_windows(), 'fallocate does not exist') def test_downloaddescriptor_allocate_disk_space_via_seek(tmpdir): fp = pathlib.Path(str(tmpdir.join('fp'))) opts = mock.MagicMock() @@ -186,8 +283,10 @@ def test_downloaddescriptor_resume(tmpdir): # test length mismatch rmgr.add_or_update_record(str(fp), ase, 0, 0, False, None) + ase._size = 127 rb = d._resume() assert rb is None + ase._size = 128 # test nothing to resume rmgr.delete() @@ -472,6 +571,22 @@ def test_write_unchecked_hmac_data(tmpdir): assert not ucc['decrypted'] +def test_mark_unchecked_chunk_decrypted(): + opts = mock.MagicMock() + opts.check_file_md5 = False + opts.chunk_size_bytes = 32 + ase = azmodels.StorageEntity('cont') + ase._size = 32 + d = models.Descriptor(mock.MagicMock(), ase, opts, None) + + d._unchecked_chunks[0] = { + 'decrypted': False + } + + d.mark_unchecked_chunk_decrypted(0) + assert d._unchecked_chunks[0] + + def test_perform_chunked_integrity_check(tmpdir): lp = pathlib.Path(str(tmpdir.join('a'))) @@ -596,6 +711,10 @@ def test_cleanup_all_temporary_files(tmpdir): assert not d.final_path.exists() assert not d._unchecked_chunks[0]['ucc'].file_path.exists() + # go through except path + d.cleanup_all_temporary_files() + assert not d.final_path.exists() + def test_write_data(tmpdir): lp = pathlib.Path(str(tmpdir.join('a'))) @@ -726,6 +845,28 @@ def test_finalize_integrity_and_file(tmpdir): assert not d.final_path.exists() +def test_restore_file_attributes(tmpdir): + lp = pathlib.Path(str(tmpdir.join('a'))) + lp.touch(mode=0o666, exist_ok=False) + lp.exists() + + opts = mock.MagicMock() + opts.check_file_md5 = True + opts.chunk_size_bytes = 16 + ase = azmodels.StorageEntity('cont') + ase._size = 32 + ase._fileattr = mock.MagicMock() + ase._fileattr.mode = '0o100777' + ase._fileattr.uid = 1000 + ase._fileattr.gid = 1000 + + d = models.Descriptor(lp, ase, opts, None) + d._restore_file_attributes() + stat = lp.stat() + assert str(oct(stat.st_mode)).replace('o', '') == \ + ase._fileattr.mode.replace('o', '') + + def test_operations(tmpdir): lp = pathlib.Path(str(tmpdir.join('a'))) opts = mock.MagicMock() diff --git a/tests/test_blobxfer_models_metadata.py b/tests/test_blobxfer_models_metadata.py new file mode 100644 index 0000000..64b8cfe --- /dev/null +++ b/tests/test_blobxfer_models_metadata.py @@ -0,0 +1,180 @@ +# coding=utf-8 +"""Tests for models metadata""" + +# stdlib imports +import json +try: + import unittest.mock as mock +except ImportError: # noqa + import mock +# non-stdlib imports +import pytest +# module under test +import blobxfer.models.metadata as md + + +def test_get_md5_from_metadata(): + ase = mock.MagicMock() + ase.is_encrypted = True + ase.encryption_metadata.blobxfer_extensions.pre_encrypted_content_md5 = \ + 'premd5' + assert md.get_md5_from_metadata(ase) == 'premd5' + + ase.is_encrypted = False + ase.md5 = 'md5' + assert md.get_md5_from_metadata(ase) == 'md5' + + +def test_generate_fileattr_metadata(): + with mock.patch('blobxfer.util.on_windows', return_value=True): + assert md.generate_fileattr_metadata(None, None) is None + + with mock.patch('blobxfer.util.on_windows', return_value=False): + lp = mock.MagicMock() + lp.mode = 'mode' + lp.uid = 0 + lp.gid = 0 + + ret = md.generate_fileattr_metadata(lp, {}) + assert len(ret) > 0 + assert md._JSON_KEY_FILE_ATTRIBUTES in ret + assert md._JSON_KEY_FILE_ATTRIBUTES_POSIX in ret[ + md._JSON_KEY_FILE_ATTRIBUTES] + assert ret[md._JSON_KEY_FILE_ATTRIBUTES][ + md._JSON_KEY_FILE_ATTRIBUTES_POSIX][ + md._JSON_KEY_FILE_ATTRIBUTES_MODE] == lp.mode + assert ret[md._JSON_KEY_FILE_ATTRIBUTES][ + md._JSON_KEY_FILE_ATTRIBUTES_POSIX][ + md._JSON_KEY_FILE_ATTRIBUTES_UID] == lp.uid + assert ret[md._JSON_KEY_FILE_ATTRIBUTES][ + md._JSON_KEY_FILE_ATTRIBUTES_POSIX][ + md._JSON_KEY_FILE_ATTRIBUTES_GID] == lp.gid + + +def test_fileattr_from_metadata(): + assert md.fileattr_from_metadata(None) is None + + with mock.patch('blobxfer.util.on_windows', return_value=True): + val = { + md.JSON_KEY_BLOBXFER_METADATA: json.dumps( + {md._JSON_KEY_FILE_ATTRIBUTES: {}}) + } + assert md.fileattr_from_metadata(val) is None + + with mock.patch('blobxfer.util.on_windows', return_value=False): + lp = mock.MagicMock() + lp.mode = 'mode' + lp.uid = 0 + lp.gid = 0 + + val = { + md.JSON_KEY_BLOBXFER_METADATA: json.dumps( + md.generate_fileattr_metadata(lp, {})) + } + assert md.fileattr_from_metadata(val) is not None + + val = { + md.JSON_KEY_BLOBXFER_METADATA: json.dumps( + {md._JSON_KEY_FILE_ATTRIBUTES: {}}) + } + assert md.fileattr_from_metadata(val) is None + + +def test_create_vecotred_io_next_entry(): + ase = mock.MagicMock() + ase.client.primary_endpoint = 'ep' + ase.container = 'cont' + ase.name = 'name' + + assert md.create_vectored_io_next_entry(ase) == 'ep;cont;name' + + +def test_explode_vectored_io_next_entry(): + entry = 'sa.blob.core.windows.net;cont;name;' + + vne = md.explode_vectored_io_next_entry(entry) + assert vne.storage_account_name == 'sa' + assert vne.endpoint == 'core.windows.net' + assert vne.container == 'cont' + assert vne.name == 'name' + + +def test_remove_vectored_io_slice_suffix_from_name(): + name = 'abc.bxslice-100' + assert md.remove_vectored_io_slice_suffix_from_name(name, 100) == 'abc' + + name = 'abc.bob' + assert md.remove_vectored_io_slice_suffix_from_name(name, 0) == 'abc.bob' + + +def test_generate_vectored_io_stripe_metadata(): + lp = mock.MagicMock() + lp.total_size = 100 + lp.view.fd_start = 0 + lp.view.total_slices = 2 + lp.view.slice_num = 0 + lp.view.next = 'next' + + ret = md.generate_vectored_io_stripe_metadata(lp, {}) + assert len(ret) > 0 + assert md._JSON_KEY_VECTORED_IO in ret + assert md._JSON_KEY_VECTORED_IO_STRIPE == ret[md._JSON_KEY_VECTORED_IO][ + md._JSON_KEY_VECTORED_IO_MODE] + assert ret[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_STRIPE][ + md._JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SIZE] == lp.total_size + assert ret[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_STRIPE][ + md._JSON_KEY_VECTORED_IO_STRIPE_OFFSET_START] == lp.view.fd_start + assert ret[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_STRIPE][ + md._JSON_KEY_VECTORED_IO_STRIPE_TOTAL_SLICES] == lp.view.total_slices + assert ret[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_STRIPE][ + md._JSON_KEY_VECTORED_IO_STRIPE_SLICE_ID] == lp.view.slice_num + assert ret[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_STRIPE][ + md._JSON_KEY_VECTORED_IO_STRIPE_NEXT] == lp.view.next + + +def test_vectored_io_from_metadata(): + assert md.vectored_io_from_metadata(None) is None + + lp = mock.MagicMock() + lp.total_size = 100 + lp.view.fd_start = 0 + lp.view.total_slices = 2 + lp.view.slice_num = 0 + lp.view.next = 'sa.blob.core.windows.net;cont;name;' + + val = { + md.JSON_KEY_BLOBXFER_METADATA: json.dumps( + md.generate_vectored_io_stripe_metadata(lp, {})) + } + vio = md.vectored_io_from_metadata(val) + assert vio.total_size == lp.total_size + assert vio.offset_start == lp.view.fd_start + assert vio.total_slices == lp.view.total_slices + assert vio.slice_id == lp.view.slice_num + assert type(vio.next) == md.VectoredNextEntry + + lp = mock.MagicMock() + lp.total_size = 100 + lp.view.fd_start = 0 + lp.view.total_slices = 2 + lp.view.slice_num = 0 + lp.view.next = None + + val = { + md.JSON_KEY_BLOBXFER_METADATA: json.dumps( + md.generate_vectored_io_stripe_metadata(lp, {})) + } + vio = md.vectored_io_from_metadata(val) + assert vio.total_size == lp.total_size + assert vio.offset_start == lp.view.fd_start + assert vio.total_slices == lp.view.total_slices + assert vio.slice_id == lp.view.slice_num + assert vio.next is None + + tmp = md.generate_vectored_io_stripe_metadata(lp, {}) + tmp[md._JSON_KEY_VECTORED_IO][md._JSON_KEY_VECTORED_IO_MODE] = 'oops' + val = { + md.JSON_KEY_BLOBXFER_METADATA: json.dumps(tmp) + } + with pytest.raises(RuntimeError): + md.vectored_io_from_metadata(val) diff --git a/tests/test_blobxfer_models_options.py b/tests/test_blobxfer_models_options.py index 31edde7..21ca78b 100644 --- a/tests/test_blobxfer_models_options.py +++ b/tests/test_blobxfer_models_options.py @@ -16,6 +16,18 @@ import blobxfer.models.options as options +def test_timeout(): + a = options.Timeout(connect=None, read=1) + assert a.connect == options._DEFAULT_REQUESTS_TIMEOUT[0] + assert a.read == 1 + assert a.timeout == (options._DEFAULT_REQUESTS_TIMEOUT[0], 1) + + a = options.Timeout(connect=2, read=0) + assert a.connect == 2 + assert a.read == options._DEFAULT_REQUESTS_TIMEOUT[1] + assert a.timeout == (2, options._DEFAULT_REQUESTS_TIMEOUT[1]) + + @mock.patch('multiprocessing.cpu_count', return_value=1) def test_concurrency_options(patched_cc): a = options.Concurrency( @@ -55,6 +67,30 @@ def test_concurrency_options_max_disk_and_transfer_threads(patched_cc): assert a.disk_threads == 64 assert a.transfer_threads == 96 + a = options.Concurrency( + crypto_processes=1, + md5_processes=1, + disk_threads=None, + transfer_threads=None, + action=1, + ) + + assert a.disk_threads == 16 + assert a.transfer_threads == 32 + + a = options.Concurrency( + crypto_processes=1, + md5_processes=1, + disk_threads=None, + transfer_threads=None, + action=3, + ) + + assert a.md5_processes == 0 + assert a.crypto_processes == 0 + assert a.disk_threads == 0 + assert a.transfer_threads == 96 + def test_general_options(): a = options.General( @@ -67,7 +103,7 @@ def test_general_options(): log_file='abc.log', progress_bar=False, resume_file='abc', - timeout_sec=1, + timeout=options.Timeout(1, 2), verbose=True, ) @@ -78,7 +114,7 @@ def test_general_options(): assert a.log_file == 'abc.log' assert not a.progress_bar assert a.resume_file == pathlib.Path('abc') - assert a.timeout_sec == 1 + assert a.timeout.timeout == (1, 2) assert a.verbose a = options.General( @@ -90,7 +126,7 @@ def test_general_options(): ), progress_bar=False, resume_file=None, - timeout_sec=1, + timeout=options.Timeout(2, 1), verbose=True, ) @@ -101,7 +137,7 @@ def test_general_options(): assert a.log_file is None assert not a.progress_bar assert a.resume_file is None - assert a.timeout_sec == 1 + assert a.timeout.timeout == (2, 1) assert a.verbose with pytest.raises(ValueError): diff --git a/tests/test_blobxfer_models_resume.py b/tests/test_blobxfer_models_resume.py index 7fb12a3..64820e8 100644 --- a/tests/test_blobxfer_models_resume.py +++ b/tests/test_blobxfer_models_resume.py @@ -29,3 +29,50 @@ def test_download(): assert d.completed assert len(str(d)) > 0 + + +def test_upload(): + u = rmodels.Upload('lp', 1, 2, 2, 0, False, '') + assert u.local_path == 'lp' + assert u.length == 1 + assert u.chunk_size == 2 + assert u.total_chunks == 2 + assert u.completed_chunks == 0 + assert not u.completed + assert u.md5hexdigest == '' + + u.md5hexdigest = None + assert u.md5hexdigest == '' + + u.md5hexdigest = 'abc' + assert u.md5hexdigest == 'abc' + + u.completed_chunks = 1 + assert u.completed_chunks == 1 + + u.completed = True + assert u.completed + + assert len(str(u)) > 0 + + +def test_synccopy(): + s = rmodels.SyncCopy(1, [], 0, 2, 2, 0, False) + assert s.length == 1 + assert len(s.src_block_list) == 0 + assert s.offset == 0 + assert s.chunk_size == 2 + assert s.total_chunks == 2 + assert s.completed_chunks == 0 + assert not s.completed + + s.offset = 1 + assert s.offset == 1 + + s.completed_chunks = 1 + assert s.completed_chunks == 1 + + s.completed = True + assert s.completed + + assert len(str(s)) > 0 diff --git a/tests/test_blobxfer_models_synccopy.py b/tests/test_blobxfer_models_synccopy.py new file mode 100644 index 0000000..feb976c --- /dev/null +++ b/tests/test_blobxfer_models_synccopy.py @@ -0,0 +1,294 @@ +# coding=utf-8 +"""Tests for models synccopy""" + +# stdlib imports +try: + import unittest.mock as mock +except ImportError: # noqa + import mock +# non-stdlib imports +import bitstring +# local imports +import blobxfer.models.azure as azmodels +import blobxfer.models.options as options +# module under test +import blobxfer.models.synccopy as synccopy + + +def test_specification(): + spec = synccopy.Specification( + synccopy_options=options.SyncCopy( + delete_extraneous_destination=False, + dest_mode=azmodels.StorageModes.Auto, + mode=azmodels.StorageModes.Auto, + overwrite=True, + recursive=True, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ) + ) + + spec.add_azure_source_path(mock.MagicMock()) + assert len(spec.sources) == 1 + + spec.add_azure_destination_path(mock.MagicMock()) + assert len(spec.destinations) == 1 + + +def test_descriptor(): + opts = mock.MagicMock() + opts.dest_mode = azmodels.StorageModes.Auto + opts.mode = azmodels.StorageModes.Auto + + src_ase = azmodels.StorageEntity('cont') + src_ase._mode = azmodels.StorageModes.Block + src_ase._name = 'name' + src_ase._size = 32 + src_ase._encryption = None + + dst_ase = azmodels.StorageEntity('cont2') + dst_ase._mode = azmodels.StorageModes.Block + dst_ase._name = 'name' + dst_ase._size = 32 + dst_ase._encryption = None + dst_ase.replica_targets = [mock.MagicMock()] + + d = synccopy.Descriptor(src_ase, dst_ase, None, opts, mock.MagicMock()) + assert d._offset == 0 + assert d._chunk_num == 0 + assert not d._finalized + assert d._src_block_list is None + + assert d.src_entity == src_ase + assert d.dst_entity == dst_ase + assert not d.all_operations_completed + assert d.is_resumable + assert d.last_block_num == -1 + assert not d.remote_is_file + assert not d.remote_is_page_blob + assert not d.remote_is_append_blob + assert d.is_one_shot_block_blob + assert not d.requires_put_block_list + + +def test_descriptor_complete_offset_upload(): + opts = mock.MagicMock() + opts.dest_mode = azmodels.StorageModes.Auto + opts.mode = azmodels.StorageModes.Auto + + src_ase = azmodels.StorageEntity('cont') + src_ase._mode = azmodels.StorageModes.Block + src_ase._name = 'name' + src_ase._size = 32 + src_ase._encryption = None + + dst_ase = azmodels.StorageEntity('cont2') + dst_ase._mode = azmodels.StorageModes.Block + dst_ase._name = 'name' + dst_ase._size = 32 + dst_ase._encryption = None + dst_ase.replica_targets = [mock.MagicMock()] + + d = synccopy.Descriptor(src_ase, dst_ase, None, opts, mock.MagicMock()) + d.complete_offset_upload(0) + assert d._outstanding_ops == 1 + + d.complete_offset_upload(0) + assert d._outstanding_ops == 0 + assert 0 not in d._replica_counters + + +def test_descriptor_compute_chunk_size(): + opts = mock.MagicMock() + opts.dest_mode = azmodels.StorageModes.Auto + opts.mode = azmodels.StorageModes.Auto + + src_ase = azmodels.StorageEntity('cont') + src_ase._mode = azmodels.StorageModes.Block + src_ase._name = 'name' + src_ase._size = 32 + src_ase._encryption = None + + dst_ase = azmodels.StorageEntity('cont2') + dst_ase._mode = azmodels.StorageModes.Block + dst_ase._name = 'name' + dst_ase._size = 32 + dst_ase._encryption = None + dst_ase.replica_targets = [mock.MagicMock()] + + d = synccopy.Descriptor(src_ase, dst_ase, None, opts, mock.MagicMock()) + assert d._compute_chunk_size() == \ + synccopy._MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES + + d = synccopy.Descriptor(src_ase, dst_ase, [], opts, mock.MagicMock()) + assert d._compute_chunk_size() == d.src_entity.size + + b = mock.MagicMock() + b.size = 1 + d = synccopy.Descriptor(src_ase, dst_ase, [b], opts, mock.MagicMock()) + assert d._compute_chunk_size() == 1 + + d = synccopy.Descriptor(src_ase, dst_ase, [b, b], opts, mock.MagicMock()) + assert d._compute_chunk_size() == -1 + + +def test_descriptor_compute_total_chunks(): + opts = mock.MagicMock() + opts.dest_mode = azmodels.StorageModes.Auto + opts.mode = azmodels.StorageModes.Auto + + src_ase = azmodels.StorageEntity('cont') + src_ase._mode = azmodels.StorageModes.Block + src_ase._name = 'name' + src_ase._size = 32 + src_ase._encryption = None + + dst_ase = azmodels.StorageEntity('cont2') + dst_ase._mode = azmodels.StorageModes.Block + dst_ase._name = 'name' + dst_ase._size = 32 + dst_ase._encryption = None + dst_ase.replica_targets = [mock.MagicMock()] + + d = synccopy.Descriptor(src_ase, dst_ase, None, opts, mock.MagicMock()) + assert d._compute_total_chunks(0) == 1 + + +def test_resume(): + opts = mock.MagicMock() + opts.dest_mode = azmodels.StorageModes.Auto + opts.mode = azmodels.StorageModes.Auto + + src_ase = azmodels.StorageEntity('cont') + src_ase._mode = azmodels.StorageModes.Block + src_ase._name = 'name' + src_ase._size = 32 + src_ase._encryption = None + + dst_ase = azmodels.StorageEntity('cont2') + dst_ase._mode = azmodels.StorageModes.Block + dst_ase._name = 'name' + dst_ase._size = 32 + dst_ase._encryption = None + dst_ase.replica_targets = [mock.MagicMock()] + + # test no resume + d = synccopy.Descriptor(src_ase, dst_ase, None, opts, None) + assert d._resume() is None + + # check if path exists in resume db + resume = mock.MagicMock() + resume.get_record.return_value = None + d = synccopy.Descriptor(src_ase, dst_ase, None, opts, resume) + assert d._resume() is None + + # check same lengths + bad = mock.MagicMock() + bad.length = 0 + resume.get_record.return_value = bad + assert d._resume() is None + + # check completed resume + comp = mock.MagicMock() + comp.length = 32 + comp.completed = True + comp.total_chunks = 1 + comp.chunk_size = 32 + comp.completed_chunks = 1 + resume.get_record.return_value = comp + dst_ase.replica_targets = None + d._completed_chunks = mock.MagicMock() + assert d._resume() == 32 + + dst_ase.replica_targets = [dst_ase] + d = synccopy.Descriptor(src_ase, dst_ase, None, opts, resume) + d._completed_chunks = mock.MagicMock() + assert d._resume() == 64 + + # check resume no md5 + nc = mock.MagicMock() + nc.offset = 16 + nc.length = 32 + nc.completed = False + nc.total_chunks = 2 + nc.chunk_size = 16 + cc = bitstring.BitArray(length=nc.total_chunks) + cc.set(True, 0) + nc.completed_chunks = cc.int + + resume.get_record.return_value = nc + dst_ase.replica_targets = None + d = synccopy.Descriptor(src_ase, dst_ase, None, opts, resume) + assert d._resume() == 16 + + +def test_descriptor_next_offsets(): + opts = mock.MagicMock() + opts.dest_mode = azmodels.StorageModes.Auto + opts.mode = azmodels.StorageModes.Auto + + src_ase = azmodels.StorageEntity('cont') + src_ase._mode = azmodels.StorageModes.Block + src_ase._name = 'name' + src_ase._size = 32 + src_ase._encryption = None + + dst_ase = azmodels.StorageEntity('cont2') + dst_ase._mode = azmodels.StorageModes.Block + dst_ase._name = 'name' + dst_ase._size = 32 + dst_ase._encryption = None + dst_ase.replica_targets = [mock.MagicMock()] + + # test normal + d = synccopy.Descriptor(src_ase, dst_ase, None, opts, None) + d._resume = mock.MagicMock() + d._resume.return_value = None + + offsets, rb = d.next_offsets() + assert rb is None + assert offsets.chunk_num == 0 + assert offsets.num_bytes == 32 + assert offsets.range_start == 0 + assert offsets.range_end == 31 + assert d._offset == 32 + assert d._chunk_num == 1 + + # test nothing left + offsets, rb = d.next_offsets() + assert rb is None + assert offsets is None + + # test neg chunk size with block list + b = mock.MagicMock() + b.size = 10 + d = synccopy.Descriptor(src_ase, dst_ase, [b], opts, None) + d._resume = mock.MagicMock() + d._resume.return_value = None + d._chunk_size = -1 + + offsets, rb = d.next_offsets() + assert rb is None + assert offsets.chunk_num == 0 + assert offsets.num_bytes == 10 + assert offsets.range_start == 0 + assert offsets.range_end == 9 + assert d._offset == 10 + assert d._chunk_num == 1 + + # test small chunk size + d = synccopy.Descriptor(src_ase, dst_ase, None, opts, None) + d._resume = mock.MagicMock() + d._resume.return_value = None + d._chunk_size = 32 + offsets, rb = d.next_offsets() + assert rb is None + assert offsets.chunk_num == 0 + assert offsets.num_bytes == 32 + assert offsets.range_start == 0 + assert offsets.range_end == 31 + assert d._offset == 32 + assert d._chunk_num == 1 diff --git a/tests/test_blobxfer_models_upload.py b/tests/test_blobxfer_models_upload.py index 7d9e057..277336e 100644 --- a/tests/test_blobxfer_models_upload.py +++ b/tests/test_blobxfer_models_upload.py @@ -2,15 +2,76 @@ """Tests for models upload""" # stdlib imports +import hashlib +try: + import unittest.mock as mock +except ImportError: # noqa + import mock try: import pathlib2 as pathlib except ImportError: # noqa import pathlib # non-stdlib imports +import bitstring +import pytest +# local imports +import blobxfer.models.azure as azmodels +import blobxfer.models.metadata as metadata +import blobxfer.models.options as options +import blobxfer.operations.azure as azops # module under test import blobxfer.models.upload as upload +def test_vectorediodistributionmode(): + a = upload.VectoredIoDistributionMode('stripe') + assert a == upload.VectoredIoDistributionMode.Stripe + assert str(a) == 'stripe' + + +def test_localpath(tmpdir): + tmpdir.join('a').write('zz') + pp = pathlib.Path(str(tmpdir)) + rp = pathlib.Path('a') + file = pp / rp + stat = file.stat() + + lp = upload.LocalPath(pp, rp, use_stdin=True, view=None) + assert lp.absolute_path == file + assert lp.size == 0 + assert lp.total_size == 0 + assert lp.lmt == 0 + assert lp.mode.replace('o', '') == '00' + assert lp.uid == 0 + assert lp.gid == 0 + + lp = upload.LocalPath(pp, rp, use_stdin=False, view=None) + assert lp.absolute_path == file + assert lp.size == stat.st_size + assert lp.total_size == stat.st_size + assert lp.lmt == stat.st_mtime + assert lp.mode.replace('o', '') == str(oct(stat.st_mode)).replace('o', '') + assert lp.uid == stat.st_uid + assert lp.gid == stat.st_gid + + lpview = upload.LocalPathView( + fd_start=1, + fd_end=2, + slice_num=1, + mode=upload.VectoredIoDistributionMode.Stripe, + total_slices=2, + next=None, + ) + lp = upload.LocalPath(pp, rp, use_stdin=False, view=lpview) + assert lp.absolute_path == file + assert lp.size == 1 + assert lp.total_size == stat.st_size + assert lp.lmt == stat.st_mtime + assert lp.mode.replace('o', '') == str(oct(stat.st_mode)).replace('o', '') + assert lp.uid == stat.st_uid + assert lp.gid == stat.st_gid + + def test_localsourcepaths_files(tmpdir): tmpdir.mkdir('abc') tmpdir.join('moo.cow').write('z') @@ -35,6 +96,7 @@ def test_localsourcepaths_files(tmpdir): sfile = str(file.parent_path / file.relative_path) a_set.add(sfile) + assert not a.can_rename() assert len(a.paths) == 1 assert str(abcpath.join('blah.x')) not in a_set assert str(defpath.join('world.txt')) in a_set @@ -49,3 +111,947 @@ def test_localsourcepaths_files(tmpdir): for file in a.files(): sfile = str(file.parent_path / file.relative_path) assert sfile in a_set + + assert upload.LocalSourcePath.is_stdin('-') + assert upload.LocalSourcePath.is_stdin('/dev/stdin') + assert not upload.LocalSourcePath.is_stdin('/') + + c = upload.LocalSourcePath() + c.add_path('-') + for file in c.files(): + assert file.use_stdin + + d = upload.LocalSourcePath() + d.add_path(str(tmpdir.join('moo.cow'))) + i = 0 + for file in d.files(): + assert str(file.parent_path.absolute()) == str(tmpdir) + assert str(file.relative_path) == 'moo.cow' + assert not file.use_stdin + i += 1 + assert i == 1 + + tmpdir.join('moo.cow2').ensure(file=True) + d.add_path(str(tmpdir.join('moo.cow2'))) + i = 0 + for file in d.files(): + i += 1 + assert i == 2 + + +def test_specification(tmpdir): + lsp = upload.LocalSourcePath() + lsp.add_paths(['-', '/dev/stdin']) + with pytest.raises(ValueError): + upload.Specification( + upload_options=options.Upload( + chunk_size_bytes=4194304, + delete_extraneous_destination=False, + mode=azmodels.StorageModes.Auto, + one_shot_bytes=0, + overwrite=True, + recursive=True, + rename=True, + rsa_public_key=None, + store_file_properties=options.FileProperties( + attributes=True, + md5=True, + ), + strip_components=0, + vectored_io=None, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ), + local_source_path=lsp, + ) + + lsp = upload.LocalSourcePath() + lsp.add_path(str(tmpdir)) + with pytest.raises(ValueError): + upload.Specification( + upload_options=options.Upload( + chunk_size_bytes=4194304, + delete_extraneous_destination=False, + mode=azmodels.StorageModes.Auto, + one_shot_bytes=0, + overwrite=True, + recursive=True, + rename=True, + rsa_public_key=None, + store_file_properties=options.FileProperties( + attributes=True, + md5=True, + ), + strip_components=0, + vectored_io=None, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ), + local_source_path=lsp, + ) + + lsp = upload.LocalSourcePath() + lsp.add_path(str(tmpdir)) + with pytest.raises(ValueError): + upload.Specification( + upload_options=options.Upload( + chunk_size_bytes=-1, + delete_extraneous_destination=False, + mode=azmodels.StorageModes.Auto, + one_shot_bytes=0, + overwrite=True, + recursive=True, + rename=False, + rsa_public_key=None, + store_file_properties=options.FileProperties( + attributes=True, + md5=True, + ), + strip_components=0, + vectored_io=None, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ), + local_source_path=lsp, + ) + + with pytest.raises(ValueError): + upload.Specification( + upload_options=options.Upload( + chunk_size_bytes=upload._MAX_BLOCK_BLOB_CHUNKSIZE_BYTES + 1, + delete_extraneous_destination=False, + mode=azmodels.StorageModes.Auto, + one_shot_bytes=0, + overwrite=True, + recursive=True, + rename=False, + rsa_public_key=None, + store_file_properties=options.FileProperties( + attributes=True, + md5=True, + ), + strip_components=0, + vectored_io=None, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ), + local_source_path=lsp, + ) + + with pytest.raises(ValueError): + upload.Specification( + upload_options=options.Upload( + chunk_size_bytes=4194304, + delete_extraneous_destination=False, + mode=azmodels.StorageModes.Auto, + one_shot_bytes=-1, + overwrite=True, + recursive=True, + rename=False, + rsa_public_key=None, + store_file_properties=options.FileProperties( + attributes=True, + md5=True, + ), + strip_components=0, + vectored_io=None, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ), + local_source_path=lsp, + ) + + with pytest.raises(ValueError): + upload.Specification( + upload_options=options.Upload( + chunk_size_bytes=4194304, + delete_extraneous_destination=False, + mode=azmodels.StorageModes.Auto, + one_shot_bytes=upload._MAX_BLOCK_BLOB_ONESHOT_BYTES + 1, + overwrite=True, + recursive=True, + rename=False, + rsa_public_key=None, + store_file_properties=options.FileProperties( + attributes=True, + md5=True, + ), + strip_components=0, + vectored_io=None, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ), + local_source_path=lsp, + ) + + spec = upload.Specification( + upload_options=options.Upload( + chunk_size_bytes=4194304, + delete_extraneous_destination=False, + mode=azmodels.StorageModes.Auto, + one_shot_bytes=0, + overwrite=True, + recursive=True, + rename=False, + rsa_public_key=None, + store_file_properties=options.FileProperties( + attributes=True, + md5=True, + ), + strip_components=0, + vectored_io=None, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ), + local_source_path=lsp, + ) + spec.add_azure_destination_path(azops.DestinationPath()) + assert len(spec.destinations) == 1 + + +def test_descriptor(tmpdir): + size = 32 + tmpdir.join('a').write('z' * size) + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + opts = mock.MagicMock() + opts.chunk_size_bytes = 8 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = False + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._size = size + ase._encryption = None + ase2 = azmodels.StorageEntity('cont') + ase2._mode = azmodels.StorageModes.Block + ase2._name = 'name2' + ase2._size = size + ase2._encryption = None + ase.replica_targets = [ase2] + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + + assert ud.hmac is None + assert ud.md5 is None + assert ud._outstanding_ops == 4 * 2 + assert ud._completed_chunks is not None + assert ud._md5_cache is not None + assert ud._replica_counters is not None + assert ud.entity == ase + assert not ud.must_compute_md5 + assert not ud.all_operations_completed + assert ud.last_block_num == -1 + assert ud.is_resumable + assert not ud.remote_is_file + assert not ud.remote_is_page_blob + assert not ud.remote_is_append_blob + assert not ud.is_one_shot_block_blob + assert ud.requires_put_block_list + assert not ud.requires_non_encrypted_md5_put + assert not ud.requires_set_file_properties_md5 + + # test sym key + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._size = size + ase._encryption = mock.MagicMock() + opts.rsa_public_key = None + with pytest.raises(RuntimeError): + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + + +def test_descriptor_complete_offset_upload(tmpdir): + tmpdir.join('a').write('z' * 32) + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + opts = mock.MagicMock() + opts.chunk_size_bytes = 16 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = True + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._size = 32 + ase._encryption = None + ase.replica_targets = [ase] + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + + ud._md5_cache[0] = 'md50' + ud._md5_cache[1] = 'md51' + + ud.complete_offset_upload(0) + assert ud._outstanding_ops == 3 + assert ud._replica_counters[0] == 0 + ud.complete_offset_upload(1) + assert ud._outstanding_ops == 2 + assert ud._replica_counters[1] == 0 + + # fill md5 cache with junk to trigger gc on next complete + for i in range(-30, -1): + ud._md5_cache[i] = '' + + ud.complete_offset_upload(0) + assert ud._outstanding_ops == 1 + assert 0 not in ud._replica_counters + assert len(ud._md5_cache) == 2 + + ud.complete_offset_upload(1) + assert ud._outstanding_ops == 0 + assert 1 not in ud._replica_counters + assert len(ud._md5_cache) == 0 + + +def test_descriptor_hmac_data(tmpdir): + tmpdir.join('a').write('z' * 32) + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + opts = mock.MagicMock() + opts.chunk_size_bytes = 16 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = True + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._size = 32 + ase._encryption = mock.MagicMock() + ase._encryption.symmetric_key = 'abc' + ase.replica_targets = [ase] + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + assert ud.hmac is not None + ud.hmac_data(b'\0') + + +def test_descriptor_initialize_encryption(tmpdir): + tmpdir.join('a').write('z' * 32) + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + opts = mock.MagicMock() + opts.chunk_size_bytes = 16 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = True + opts.rsa_public_key = 'abc' + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._size = 32 + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + assert ud.hmac is not None + assert ud.entity.is_encrypted + + +def test_descriptor_compute_remote_size(tmpdir): + tmpdir.join('a').write('z' * 32) + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + # encrypted remote size with replica + opts = mock.MagicMock() + opts.chunk_size_bytes = 16 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = True + opts.rsa_public_key = 'abc' + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = mock.MagicMock() + ase._encryption.symmetric_key = 'abc' + ase2 = azmodels.StorageEntity('cont') + ase2._mode = azmodels.StorageModes.Block + ase2._name = 'name2' + ase.replica_targets = [ase2] + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ud._compute_remote_size() + assert ud.entity.size == 48 + for rt in ase.replica_targets: + assert rt.size == ud.entity.size + + # remote size + opts = mock.MagicMock() + opts.chunk_size_bytes = 16 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = True + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = None + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ud._compute_remote_size() + assert ud.entity.size == 32 + + # remote size of zero + tmpdir.join('b').ensure(file=True) + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('b')) + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = None + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ud._compute_remote_size() + assert ud.entity.size == 0 + + +def test_descriptor_adjust_chunk_size(tmpdir): + tmpdir.join('a').ensure(file=True) + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + opts = mock.MagicMock() + opts.chunk_size_bytes = 0 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = True + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = None + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + assert ud._chunk_size == 0 + + with mock.patch('blobxfer.models.upload._DEFAULT_AUTO_CHUNKSIZE_BYTES', 1): + with mock.patch( + 'blobxfer.models.upload._MAX_BLOCK_BLOB_CHUNKSIZE_BYTES', 3): + with mock.patch('blobxfer.models.upload._MAX_NUM_CHUNKS', 2): + tmpdir.join('a').write('z' * 4) + lp = upload.LocalPath( + pathlib.Path(str(tmpdir)), pathlib.Path('a')) + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + assert ud._chunk_size == 2 + + lp = upload.LocalPath( + pathlib.Path(str(tmpdir)), pathlib.Path('-'), use_stdin=True) + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + assert ud._chunk_size == upload._MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES + + tmpdir.join('a').write('z' * 32) + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Page + ase._name = 'name' + ase._encryption = None + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + assert ud._chunk_size == 32 + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Append + ase._name = 'name' + ase._encryption = None + + opts.chunk_size_bytes = upload._MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES + 1 + with mock.patch( + 'blobxfer.models.upload._MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES', 4): + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + assert ud._chunk_size == 4 + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = None + + opts.chunk_size_bytes = 32 + opts.one_shot_bytes = 32 + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + assert ud._chunk_size == 32 + + opts.one_shot_bytes = 31 + with mock.patch( + 'blobxfer.models.upload._MAX_BLOCK_BLOB_CHUNKSIZE_BYTES', 4): + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + assert ud._chunk_size == 4 + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.File + ase._name = 'name' + ase._encryption = None + + opts.chunk_size_bytes = upload._MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES + 1 + with mock.patch( + 'blobxfer.models.upload._MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES', 4): + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + assert ud._chunk_size == 4 + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Page + ase._name = 'name' + ase._encryption = None + + opts.chunk_size_bytes = upload._MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES + 1 + with mock.patch( + 'blobxfer.models.upload._MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES', 4): + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + assert ud._chunk_size == 4 + + with mock.patch('blobxfer.models.upload._MAX_PAGE_BLOB_SIZE', 4): + with pytest.raises(RuntimeError): + upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + + +def test_compute_total_chunks(tmpdir): + tmpdir.join('a').ensure(file=True) + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + opts = mock.MagicMock() + opts.chunk_size_bytes = 0 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = True + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = None + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ud.entity.size = upload._MAX_BLOCK_BLOB_CHUNKSIZE_BYTES + with pytest.raises(RuntimeError): + ud._compute_total_chunks(1) + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ud.entity.size = upload._MAX_BLOCK_BLOB_CHUNKSIZE_BYTES + ud._chunk_size = upload._MAX_BLOCK_BLOB_CHUNKSIZE_BYTES + with pytest.raises(RuntimeError): + ud._compute_total_chunks(1) + + ase._mode = azmodels.StorageModes.Append + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ud.entity.size = upload._MAX_BLOCK_BLOB_CHUNKSIZE_BYTES + ud._chunk_size = upload._MAX_NONBLOCK_BLOB_CHUNKSIZE_BYTES + with pytest.raises(RuntimeError): + ud._compute_total_chunks(1) + + +def test_resume(tmpdir): + tmpdir.join('a').write('zz') + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + opts = mock.MagicMock() + opts.chunk_size_bytes = 0 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = True + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = None + + # test no resume + ud = upload.Descriptor(lp, ase, 'uid', opts, None) + assert ud._resume() is None + + # check if path exists in resume db + resume = mock.MagicMock() + resume.get_record.return_value = None + ud = upload.Descriptor(lp, ase, 'uid', opts, resume) + assert ud._resume() is None + + # check same lengths + bad = mock.MagicMock() + bad.length = 0 + resume.get_record.return_value = bad + assert ud._resume() is None + + # check completed resume + comp = mock.MagicMock() + comp.length = 2 + comp.completed = True + comp.total_chunks = 1 + comp.chunk_size = 2 + comp.completed_chunks = 1 + resume.get_record.return_value = comp + ud._completed_chunks = mock.MagicMock() + ud._src_ase = ase + assert ud._resume() == 2 + + ase.replica_targets = [ase] + ud = upload.Descriptor(lp, ase, 'uid', opts, resume) + ud._completed_chunks = mock.MagicMock() + ud._src_ase = ase + assert ud._resume() == 4 + + # check no encryption + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + opts.rsa_public_key = 'abc' + + nc = mock.MagicMock() + nc.length = 16 + nc.completed = False + nc.total_chunks = 2 + nc.chunk_size = 1 + nc.completed_chunks = 1 + + resume.get_record.return_value = nc + ud = upload.Descriptor(lp, ase, 'uid', opts, resume) + assert ud._resume() is None + + # check rr path exists + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + + nc.length = 2 + nc.local_path = pathlib.Path('yyy') + opts.rsa_public_key = None + + resume.get_record.return_value = nc + ud = upload.Descriptor(lp, ase, 'uid', opts, resume) + assert ud._resume() is None + + # check resume no md5 + opts.store_file_properties.md5 = False + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + + nc = mock.MagicMock() + nc.length = 2 + nc.completed = False + nc.total_chunks = 2 + nc.chunk_size = 1 + cc = bitstring.BitArray(length=nc.total_chunks) + cc.set(True, 0) + nc.completed_chunks = cc.int + nc.local_path = lp.absolute_path + + resume.get_record.return_value = nc + ud = upload.Descriptor(lp, ase, 'uid', opts, resume) + assert ud._resume() == 1 + + # check resume with md5 mismatch + opts.store_file_properties.md5 = True + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + + nc = mock.MagicMock() + nc.length = 2 + nc.completed = False + nc.total_chunks = 2 + nc.chunk_size = 1 + cc = bitstring.BitArray(length=nc.total_chunks) + cc.set(True, 0) + nc.completed_chunks = cc.int + nc.local_path = lp.absolute_path + + resume.get_record.return_value = nc + ud = upload.Descriptor(lp, ase, 'uid', opts, resume) + assert ud._resume() is None + + # check resume with md5 match + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + + nc = mock.MagicMock() + nc.length = 2 + nc.completed = False + nc.total_chunks = 2 + nc.chunk_size = 1 + cc = bitstring.BitArray(length=nc.total_chunks) + cc.set(True, 0) + nc.completed_chunks = cc.int + nc.local_path = lp.absolute_path + md5 = hashlib.md5() + md5.update(b'z') + nc.md5hexdigest = md5.hexdigest() + + resume.get_record.return_value = nc + ud = upload.Descriptor(lp, ase, 'uid', opts, resume) + assert ud._resume() == 1 + + +def test_descriptor_next_offsets(tmpdir): + tmpdir.join('a').write('ab') + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + opts = mock.MagicMock() + opts.chunk_size_bytes = 1 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = True + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = None + + # test normal + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ud._resume = mock.MagicMock() + ud._resume.return_value = None + + offsets, rb = ud.next_offsets() + assert rb is None + assert offsets.chunk_num == 0 + assert offsets.num_bytes == 1 + assert offsets.range_start == 0 + assert offsets.range_end == 0 + assert not offsets.pad + assert ud._offset == 1 + assert ud._chunk_num == 1 + + offsets, rb = ud.next_offsets() + assert rb is None + assert offsets.chunk_num == 1 + assert offsets.num_bytes == 1 + assert offsets.range_start == 1 + assert offsets.range_end == 1 + assert not offsets.pad + assert ud._offset == 2 + assert ud._chunk_num == 2 + + offsets, rb = ud.next_offsets() + assert rb is None + assert offsets is None + + # test chunk size exceeds size + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + opts.chunk_size_bytes = 3 + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ud._chunk_size = 3 + ud._resume = mock.MagicMock() + ud._resume.return_value = None + + offsets, rb = ud.next_offsets() + assert rb is None + assert offsets.chunk_num == 0 + assert offsets.num_bytes == 2 + assert offsets.range_start == 0 + assert offsets.range_end == 1 + assert not offsets.pad + assert ud._offset == 2 + assert ud._chunk_num == 1 + + # test encrypted + tmpdir.join('a').write('z' * 16) + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + opts.chunk_size_bytes = 16 + opts.rsa_public_key = 'abc' + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ud._resume = mock.MagicMock() + ud._resume.return_value = None + + offsets, rb = ud.next_offsets() + assert rb is None + assert offsets.chunk_num == 0 + assert offsets.num_bytes == 16 + assert offsets.range_start == 0 + assert offsets.range_end == 15 + assert not offsets.pad + assert ud._offset == 16 + assert ud._chunk_num == 1 + + offsets, rb = ud.next_offsets() + assert rb is None + assert offsets.chunk_num == 1 + assert offsets.num_bytes == 16 + assert offsets.range_start == 16 + assert offsets.range_end == 31 + assert offsets.pad + assert ud._offset == 32 + assert ud._chunk_num == 2 + + +def test_descriptor_read_data(tmpdir): + tmpdir.join('a').write('ab') + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + # test normal + opts = mock.MagicMock() + opts.chunk_size_bytes = 1 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = True + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = None + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ud._resume = mock.MagicMock() + ud._resume.return_value = None + + # test no data to read + mockoffsets = mock.MagicMock() + mockoffsets.num_bytes = 0 + data, newoffset = ud.read_data(mockoffsets) + assert data is None + assert newoffset is None + + # test normal data to read + offsets, rb = ud.next_offsets() + assert rb is None + data, newoffset = ud.read_data(offsets) + assert data == b'a' + assert newoffset is None + + # test stdin + with mock.patch( + 'blobxfer.STDIN', new_callable=mock.PropertyMock) as patched_stdin: + patched_stdin.read = mock.MagicMock() + patched_stdin.read.return_value = b'z' + ud.local_path.use_stdin = True + data, newoffset = ud.read_data(offsets) + assert data == b'z' + assert newoffset.chunk_num == 0 + assert newoffset.num_bytes == 1 + assert newoffset.range_start == 1 + assert newoffset.range_end == 1 + assert not newoffset.pad + assert ud._total_chunks == 3 + assert ud._outstanding_ops == 3 + assert ud._offset == 2 + assert ud.entity.size == 3 + + with mock.patch( + 'blobxfer.STDIN', new_callable=mock.PropertyMock) as patched_stdin: + patched_stdin.read = mock.MagicMock() + patched_stdin.read.return_value = None + ud.local_path.use_stdin = True + data, newoffset = ud.read_data(offsets) + assert data is None + assert newoffset is None + assert ud._total_chunks == 2 + assert ud._outstanding_ops == 2 + assert ud._chunk_num == 0 + + +def test_descriptor_generate_metadata(tmpdir): + tmpdir.join('a').write('ab') + lp = upload.LocalPath(pathlib.Path(str(tmpdir)), pathlib.Path('a')) + + # test nothing + opts = mock.MagicMock() + opts.chunk_size_bytes = 1 + opts.one_shot_bytes = 0 + opts.store_file_properties.attributes = False + opts.store_file_properties.md5 = False + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = None + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + meta = ud.generate_metadata() + assert meta is None + + # test fileattr meta + opts = mock.MagicMock() + opts.chunk_size_bytes = 1 + opts.one_shot_bytes = 0 + opts.store_file_properties.attributes = True + opts.store_file_properties.md5 = True + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = None + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + meta = ud.generate_metadata() + assert metadata.JSON_KEY_BLOBXFER_METADATA in meta + assert metadata._JSON_KEY_FILE_ATTRIBUTES in meta[ + metadata.JSON_KEY_BLOBXFER_METADATA] + + # test enc meta + opts.store_file_properties.attributes = False + opts.store_file_properties.md5 = False + opts.rsa_public_key = 'abc' + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ase.encryption_metadata = mock.MagicMock() + ase.encryption_metadata.convert_to_json_with_mac.return_value = { + 'encmeta': 'encmeta' + } + meta = ud.generate_metadata() + assert 'encmeta' in meta + + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ud.hmac = None + ase.encryption_metadata = mock.MagicMock() + ase.encryption_metadata.convert_to_json_with_mac.return_value = { + 'encmeta': 'encmeta' + } + meta = ud.generate_metadata() + assert 'encmeta' in meta + + opts.store_file_properties.md5 = True + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + ase.encryption_metadata = mock.MagicMock() + ase.encryption_metadata.convert_to_json_with_mac.return_value = { + 'encmeta': 'encmeta' + } + meta = ud.generate_metadata() + assert 'encmeta' in meta + + # test vio meta + opts = mock.MagicMock() + opts.chunk_size_bytes = 1 + opts.one_shot_bytes = 0 + opts.store_file_properties.md5 = True + opts.rsa_public_key = None + + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.Block + ase._name = 'name' + ase._encryption = None + + lp.view = mock.MagicMock() + lp.view.mode = upload.VectoredIoDistributionMode.Stripe + ud = upload.Descriptor(lp, ase, 'uid', opts, mock.MagicMock()) + with mock.patch( + 'blobxfer.models.metadata.generate_vectored_io_stripe_metadata', + return_value={'viometa': 'viometa'}): + meta = ud.generate_metadata() + assert metadata.JSON_KEY_BLOBXFER_METADATA in meta + assert 'viometa' in meta[metadata.JSON_KEY_BLOBXFER_METADATA] diff --git a/tests/test_blobxfer_operations_azure.py b/tests/test_blobxfer_operations_azure.py index 0322aa4..1618ad2 100644 --- a/tests/test_blobxfer_operations_azure.py +++ b/tests/test_blobxfer_operations_azure.py @@ -11,6 +11,8 @@ import azure.storage.blob import azure.storage.file import pytest +# local imports +import blobxfer.models.metadata as md # module under test import blobxfer.models.azure as azmodels import blobxfer.operations.azure as azops @@ -18,6 +20,10 @@ def test_storage_credentials(): creds = azops.StorageCredentials(mock.MagicMock()) + + with pytest.raises(ValueError): + creds.add_storage_account('sa1', '', 'endpoint') + creds.add_storage_account('sa1', 'somekey1', 'endpoint') a = creds.get_storage_account('sa1') @@ -51,28 +57,159 @@ def test_storage_credentials(): def test_key_is_sas(): - a = azops.StorageAccount('name', 'abcdef', 'endpoint', 10) + a = azops.StorageAccount( + 'name', 'abcdef', 'endpoint', 10, mock.MagicMock()) assert not a.is_sas - a = azops.StorageAccount('name', 'abcdef&blah', 'endpoint', 10) + a = azops.StorageAccount( + 'name', 'abcdef&blah', 'endpoint', 10, mock.MagicMock()) assert not a.is_sas - a = azops.StorageAccount('name', '?abcdef', 'endpoint', 10) + a = azops.StorageAccount( + 'name', '?abcdef', 'endpoint', 10, mock.MagicMock()) assert a.is_sas a = azops.StorageAccount( - 'name', '?sv=0&sr=1&sig=2', 'endpoint', 10) + 'name', '?sv=0&sr=1&sig=2', 'endpoint', 10, mock.MagicMock()) assert a.is_sas a = azops.StorageAccount( - 'name', 'sv=0&sr=1&sig=2', 'endpoint', 10) + 'name', 'sv=0&sr=1&sig=2', 'endpoint', 10, mock.MagicMock()) assert a.is_sas a = azops.StorageAccount( - 'name', 'sig=0&sv=0&sr=1&se=2', 'endpoint', 10) + 'name', 'sig=0&sv=0&sr=1&se=2', 'endpoint', 10, mock.MagicMock()) assert a.is_sas +def test_container_creation_allowed(): + a = azops.StorageAccount( + 'name', 'abcdef', 'endpoint', 10, mock.MagicMock()) + assert a._container_creation_allowed() + + a = azops.StorageAccount( + 'name', '?sv=0&sr=1&sig=2', 'endpoint', 10, mock.MagicMock()) + assert not a._container_creation_allowed() + + a = azops.StorageAccount( + 'name', '?sv=0&sr=1&srt=a&sig=2', 'endpoint', 10, mock.MagicMock()) + assert not a._container_creation_allowed() + + a = azops.StorageAccount( + 'name', '?sv=0&sr=1&srt=c&sig=2', 'endpoint', 10, mock.MagicMock()) + assert a._container_creation_allowed() + + +@mock.patch('blobxfer.operations.azure.file.get_file_properties') +def test_handle_vectored_io_stripe(patched_gfp): + creds = mock.MagicMock() + options = mock.MagicMock() + options.mode = azmodels.StorageModes.Block + store_raw_metadata = False + sa = mock.MagicMock() + is_file = False + container = 'cont' + entity = mock.MagicMock() + + p = '/cont/remote/path' + asp = azops.SourcePath() + asp.add_path_with_storage_account(p, 'sa') + + # test not first slice + with mock.patch( + 'blobxfer.models.metadata.vectored_io_from_metadata', + return_value=md.VectoredStripe( + next='nextpr', + offset_start=0, + slice_id=1, + total_size=10, + total_slices=10, + )): + for part in asp._handle_vectored_io_stripe( + creds, options, store_raw_metadata, sa, entity, is_file, + container, dir=None): + assert part is None + + # blob test + with mock.patch( + 'blobxfer.models.metadata.' + 'vectored_io_from_metadata') as patched_vifm: + patched_vifm.side_effect = [ + md.VectoredStripe( + next=md.VectoredNextEntry( + storage_account_name='sa0', + endpoint='core.windows.net', + container='cont', + name='path-bxslice-0', + ), + offset_start=0, + slice_id=0, + total_size=2, + total_slices=2, + ), + md.VectoredStripe( + next=md.VectoredNextEntry( + storage_account_name='sa1', + endpoint='core.windows.net', + container='cont', + name='path-bxslice-1', + ), + offset_start=1, + slice_id=1, + total_size=2, + total_slices=2, + ), + ] + options.mode = azmodels.StorageModes.Block + i = 0 + for part in asp._handle_vectored_io_stripe( + creds, options, store_raw_metadata, sa, entity, is_file, + container, dir=None): + i += 1 + assert i == 2 + + # file test + with mock.patch( + 'blobxfer.models.metadata.' + 'vectored_io_from_metadata') as patched_vifm: + patched_vifm.side_effect = [ + md.VectoredStripe( + next=md.VectoredNextEntry( + storage_account_name='sa0', + endpoint='core.windows.net', + container='cont', + name='path-bxslice-0', + ), + offset_start=0, + slice_id=0, + total_size=2, + total_slices=2, + ), + md.VectoredStripe( + next=md.VectoredNextEntry( + storage_account_name='sa1', + endpoint='core.windows.net', + container='cont', + name='path-bxslice-1', + ), + offset_start=1, + slice_id=1, + total_size=2, + total_slices=2, + ), + ] + options.mode = azmodels.StorageModes.File + is_file = True + f = azure.storage.file.models.File(name='path-bxslice-1') + patched_gfp.side_effect = [f] + i = 0 + for part in asp._handle_vectored_io_stripe( + creds, options, store_raw_metadata, sa, entity, is_file, + container, dir=None): + i += 1 + assert i == 2 + + def test_azuresourcepath(): p = '/cont/remote/path' asp = azops.SourcePath() @@ -104,12 +241,32 @@ def test_azuresourcepath_files(patched_lf, patched_em): patched_em.encryption_metadata_exists.return_value = False i = 0 - for file in asp.files(creds, options, mock.MagicMock()): + for file in asp.files(creds, options): i += 1 assert file.name == 'remote/name' assert file.encryption_metadata is None assert i == 1 + # test filter + asp = azops.SourcePath() + asp.add_path_with_storage_account(p, 'sa') + asp.add_includes(['zzz']) + patched_lf.side_effect = [[f]] + assert len(list(asp.files(creds, options))) == 0 + + # test no vio return + with mock.patch( + 'blobxfer.operations.azure.SourcePath.' + '_handle_vectored_io_stripe') as patched_hvios: + patched_hvios.side_effect = [[None]] + asp = azops.SourcePath() + asp.add_path_with_storage_account(p, 'sa') + patched_lf.side_effect = [[f]] + assert len(list(asp.files(creds, options))) == 0 + + # test encrypted + asp = azops.SourcePath() + asp.add_path_with_storage_account(p, 'sa') fe = azure.storage.file.models.File(name='name') fe.metadata = {'encryptiondata': {'a': 'b'}} patched_lf.side_effect = [[fe]] @@ -117,7 +274,7 @@ def test_azuresourcepath_files(patched_lf, patched_em): patched_em.convert_from_json = mock.MagicMock() i = 0 - for file in asp.files(creds, options, mock.MagicMock()): + for file in asp.files(creds, options): i += 1 assert file.name == 'remote/name' assert file.encryption_metadata is not None @@ -144,12 +301,29 @@ def test_azuresourcepath_blobs(patched_lb, patched_em): patched_em.encryption_metadata_exists.return_value = False i = 0 - for file in asp.files(creds, options, mock.MagicMock()): + for file in asp.files(creds, options): i += 1 assert file.name == 'name' assert file.encryption_metadata is None assert i == 1 + # test filter + asp = azops.SourcePath() + asp.add_path_with_storage_account(p, 'sa') + asp.add_includes(['zzz']) + patched_lb.side_effect = [[b]] + assert len(list(asp.files(creds, options))) == 0 + + # test no vio return + with mock.patch( + 'blobxfer.operations.azure.SourcePath.' + '_handle_vectored_io_stripe') as patched_hvios: + patched_hvios.side_effect = [[None]] + asp = azops.SourcePath() + asp.add_path_with_storage_account(p, 'sa') + patched_lb.side_effect = [[b]] + assert len(list(asp.files(creds, options))) == 0 + be = azure.storage.blob.models.Blob(name='name') be.metadata = {'encryptiondata': {'a': 'b'}} patched_lb.side_effect = [[be]] @@ -157,8 +331,22 @@ def test_azuresourcepath_blobs(patched_lb, patched_em): patched_em.convert_from_json = mock.MagicMock() i = 0 - for file in asp.files(creds, options, mock.MagicMock()): + for file in asp.files(creds, options): i += 1 assert file.name == 'name' assert file.encryption_metadata is not None assert i == 1 + + +def test_destinationpath(): + dp = azops.DestinationPath() + sa = mock.MagicMock() + dp.add_path_with_storage_account('/remote/path/', sa) + + assert len(dp._paths) == 1 + assert len(dp._path_map) == 1 + + with pytest.raises(RuntimeError): + dp.add_path_with_storage_account('/remote/path2/', sa) + + assert dp.lookup_storage_account('/remote/path/') is not None diff --git a/tests/test_blobxfer_operations_azure_blob.py b/tests/test_blobxfer_operations_azure_blob.py index 0ed626a..6ff521b 100644 --- a/tests/test_blobxfer_operations_azure_blob.py +++ b/tests/test_blobxfer_operations_azure_blob.py @@ -36,6 +36,43 @@ def test_check_if_single_blob(): assert not result +def test_get_blob_properties(): + with pytest.raises(RuntimeError): + ops.get_blob_properties( + None, 'cont', None, azmodels.StorageModes.File) + + client = mock.MagicMock() + blob = mock.MagicMock() + client.get_blob_properties.side_effect = \ + azure.common.AzureMissingResourceHttpError('msg', 'code') + + ret = ops.get_blob_properties( + client, 'cont', None, azmodels.StorageModes.Append) + assert ret is None + + blob = mock.MagicMock() + blob.properties.blob_type = azure.storage.blob.models._BlobTypes.PageBlob + client = mock.MagicMock() + client.get_blob_properties.return_value = blob + + with pytest.raises(RuntimeError): + ops.get_blob_properties( + client, 'cont', None, azmodels.StorageModes.Append) + + with pytest.raises(RuntimeError): + ops.get_blob_properties( + client, 'cont', None, azmodels.StorageModes.Block) + + blob.properties.blob_type = azure.storage.blob.models._BlobTypes.BlockBlob + with pytest.raises(RuntimeError): + ops.get_blob_properties( + client, 'cont', None, azmodels.StorageModes.Page) + + ret = ops.get_blob_properties( + client, 'cont', None, azmodels.StorageModes.Block) + assert ret == blob + + def test_list_blobs(): with pytest.raises(RuntimeError): for blob in ops.list_blobs( @@ -100,6 +137,14 @@ def test_list_blobs(): assert i == 1 +def test_list_all_blobs(): + client = mock.MagicMock() + blob = mock.MagicMock() + client.list_blobs.return_value = [blob, blob] + + assert len(list(ops.list_all_blobs(client, 'cont'))) == 2 + + def test_get_blob_range(): ase = mock.MagicMock() ret = mock.MagicMock() @@ -113,3 +158,25 @@ def test_get_blob_range(): offsets.end_range = 1 assert ops.get_blob_range(ase, offsets) == ret.content + + +def test_create_container(): + ase = mock.MagicMock() + ase.create_containers = False + + ops.create_container(ase, None) + assert ase.client.create_container.call_count == 0 + + ase.create_containers = True + ase.client.account_name = 'sa' + ase.container = 'cont' + + cc = set() + ops.create_container(ase, cc) + assert len(cc) == 1 + + ase.client.create_container.side_effect = \ + azure.common.AzureConflictHttpError('msg', 'code') + ase.container = 'cont2' + ops.create_container(ase, cc) + assert len(cc) == 1 diff --git a/tests/test_blobxfer_operations_azure_blob_append.py b/tests/test_blobxfer_operations_azure_blob_append.py index f6e8c23..842ca4e 100644 --- a/tests/test_blobxfer_operations_azure_blob_append.py +++ b/tests/test_blobxfer_operations_azure_blob_append.py @@ -2,6 +2,10 @@ """Tests for operations: blob append""" # stdlib imports +try: + import unittest.mock as mock +except ImportError: # noqa + import mock # non-stdlib imports import azure.storage # local imports @@ -11,16 +15,17 @@ def test_create_client(): - sa = azops.StorageAccount('name', 'key', 'endpoint', 10) - client = ops.create_client(sa) + sa = azops.StorageAccount('name', 'key', 'endpoint', 10, mock.MagicMock()) + client = ops.create_client(sa, mock.MagicMock()) assert client is not None assert isinstance(client, azure.storage.blob.AppendBlobService) assert isinstance( client.authentication, azure.storage._auth._StorageSharedKeyAuthentication) - sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint', 10) - client = ops.create_client(sa) + sa = azops.StorageAccount( + 'name', '?key&sig=key', 'endpoint', 10, mock.MagicMock()) + client = ops.create_client(sa, mock.MagicMock()) assert client is not None assert isinstance(client, azure.storage.blob.AppendBlobService) assert isinstance( diff --git a/tests/test_blobxfer_operations_azure_blob_block.py b/tests/test_blobxfer_operations_azure_blob_block.py index 2af2f6f..1f299d6 100644 --- a/tests/test_blobxfer_operations_azure_blob_block.py +++ b/tests/test_blobxfer_operations_azure_blob_block.py @@ -2,6 +2,10 @@ """Tests for operations: block blob""" # stdlib imports +try: + import unittest.mock as mock +except ImportError: # noqa + import mock # non-stdlib imports import azure.storage # local imports @@ -11,18 +15,43 @@ def test_create_client(): - sa = azops.StorageAccount('name', 'key', 'endpoint', 10) - client = ops.create_client(sa) + sa = azops.StorageAccount('name', 'key', 'endpoint', 10, mock.MagicMock()) + client = ops.create_client(sa, mock.MagicMock()) assert client is not None assert isinstance(client, azure.storage.blob.BlockBlobService) assert isinstance( client.authentication, azure.storage._auth._StorageSharedKeyAuthentication) - sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint', 10) - client = ops.create_client(sa) + sa = azops.StorageAccount( + 'name', '?key&sig=key', 'endpoint', 10, mock.MagicMock()) + client = ops.create_client(sa, mock.MagicMock()) assert client is not None assert isinstance(client, azure.storage.blob.BlockBlobService) assert isinstance( client.authentication, azure.storage._auth._StorageSASAuthentication) + + +def test_format_block_id(): + assert '00000001' == ops._format_block_id(1) + + +def test_put_block_list(): + ase = mock.MagicMock() + ase.name = 'abc' + ops.put_block_list(ase, 1, None, None) + assert ase.client.put_block_list.call_count == 1 + + +def test_get_committed_block_list(): + ase = mock.MagicMock() + ase.name = 'abc' + gbl = mock.MagicMock() + gbl.committed_blocks = 1 + ase.client.get_block_list.return_value = gbl + assert ops.get_committed_block_list(ase) == 1 + + ase.name = 'abc?snapshot=123' + gbl.committed_blocks = 2 + assert ops.get_committed_block_list(ase) == 2 diff --git a/tests/test_blobxfer_operations_azure_blob_page.py b/tests/test_blobxfer_operations_azure_blob_page.py index f1b4d8c..27c0c36 100644 --- a/tests/test_blobxfer_operations_azure_blob_page.py +++ b/tests/test_blobxfer_operations_azure_blob_page.py @@ -2,6 +2,10 @@ """Tests for models""" # stdlib imports +try: + import unittest.mock as mock +except ImportError: # noqa + import mock # non-stdlib imports import azure.storage # local imports @@ -11,16 +15,17 @@ def test_create_client(): - sa = azops.StorageAccount('name', 'key', 'endpoint', 10) - client = ops.create_client(sa) + sa = azops.StorageAccount('name', 'key', 'endpoint', 10, mock.MagicMock()) + client = ops.create_client(sa, mock.MagicMock()) assert client is not None assert isinstance(client, azure.storage.blob.PageBlobService) assert isinstance( client.authentication, azure.storage._auth._StorageSharedKeyAuthentication) - sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint', 10) - client = ops.create_client(sa) + sa = azops.StorageAccount( + 'name', '?key&sig=key', 'endpoint', 10, mock.MagicMock()) + client = ops.create_client(sa, mock.MagicMock()) assert client is not None assert isinstance(client, azure.storage.blob.PageBlobService) assert isinstance( diff --git a/tests/test_blobxfer_operations_azure_file.py b/tests/test_blobxfer_operations_azure_file.py index 2a45428..af2d9bf 100644 --- a/tests/test_blobxfer_operations_azure_file.py +++ b/tests/test_blobxfer_operations_azure_file.py @@ -17,16 +17,17 @@ def test_create_client(): - sa = azops.StorageAccount('name', 'key', 'endpoint', 10) - client = ops.create_client(sa) + sa = azops.StorageAccount('name', 'key', 'endpoint', 10, mock.MagicMock()) + client = ops.create_client(sa, mock.MagicMock()) assert client is not None assert isinstance(client, azure.storage.file.FileService) assert isinstance( client.authentication, azure.storage._auth._StorageSharedKeyAuthentication) - sa = azops.StorageAccount('name', '?key&sig=key', 'endpoint', 10) - client = ops.create_client(sa) + sa = azops.StorageAccount( + 'name', '?key&sig=key', 'endpoint', 10, mock.MagicMock()) + client = ops.create_client(sa, mock.MagicMock()) assert client is not None assert isinstance(client, azure.storage.file.FileService) assert isinstance( @@ -96,6 +97,24 @@ def test_list_files_single_file(): assert i == 1 +def test_list_all_files(): + client = mock.MagicMock() + client.list_directories_and_files.side_effect = [ + [ + azure.storage.file.models.Directory(name='dir'), + ], + [ + azure.storage.file.models.File(name='a'), + ], + ] + + i = 0 + for f in ops.list_all_files(client, 'fshare'): + assert f == 'dir/a' + i += 1 + assert i == 1 + + @mock.patch( 'blobxfer.operations.azure.file.check_if_single_file', return_value=(False, None) @@ -126,6 +145,10 @@ def test_list_files_directory(patched_cisf): assert i == 1 +def test_delete_file(): + assert ops.delete_file(mock.MagicMock(), 'fshare', 'dir/name') is None + + def test_get_file_range(): ase = mock.MagicMock() ret = mock.MagicMock() @@ -138,3 +161,65 @@ def test_get_file_range(): offsets.end_range = 1 assert ops.get_file_range(ase, offsets) == ret.content + + +def test_create_share(): + ase = mock.MagicMock() + ase.create_containers = False + + ops.create_share(ase, None) + assert ase.client.create_share.call_count == 0 + + ase.create_containers = True + ase.client.account_name = 'sa' + ase.container = 'cont' + + cc = set() + ops.create_share(ase, cc) + assert len(cc) == 1 + + ase.client.create_share.side_effect = \ + azure.common.AzureConflictHttpError('msg', 'code') + ase.container = 'cont2' + ops.create_share(ase, cc) + assert len(cc) == 1 + + +def test_create_all_parent_directories(): + ase = mock.MagicMock() + ase.client.account_name = 'sa' + ase.container = 'cont' + ase.name = 'abc' + + dirs = {} + ops.create_all_parent_directories(ase, dirs) + assert len(dirs) == 0 + + ase.name = 'a/b/c.bin' + ops.create_all_parent_directories(ase, dirs) + assert len(dirs) == 1 + assert len(dirs['sa:cont']) == 2 + + +def test_create_file(): + ase = mock.MagicMock() + ase.name = 'a/b/c.bin' + assert ops.create_file(ase) is None + + +def test_put_file_range(): + ase = mock.MagicMock() + ase.name = 'a/b/c.bin' + assert ops.put_file_range(ase, mock.MagicMock(), b'\0') is None + + +def test_set_file_md5(): + ase = mock.MagicMock() + ase.name = 'a/b/c.bin' + assert ops.set_file_md5(ase, 'md5') is None + + +def test_set_file_metadata(): + ase = mock.MagicMock() + ase.name = 'a/b/c.bin' + assert ops.set_file_metadata(ase, 'md') is None diff --git a/tests/test_blobxfer_operations_download.py b/tests/test_blobxfer_operations_download.py index 626980c..1263d10 100644 --- a/tests/test_blobxfer_operations_download.py +++ b/tests/test_blobxfer_operations_download.py @@ -164,6 +164,10 @@ def test_check_download_conditions(tmpdir): d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), ds) result = d._check_download_conditions(nep, mock.MagicMock()) assert result == ops.DownloadAction.Download + rfile = mock.MagicMock() + rfile.vectored_io = None + result = d._check_download_conditions(nep, rfile) + assert result == ops.DownloadAction.Download result = d._check_download_conditions(ep, mock.MagicMock()) assert result == ops.DownloadAction.Skip @@ -292,14 +296,18 @@ def test_pre_md5_skip_on_check(): rfile._client = mock.MagicMock() rfile._client.primary_endpoint = 'ep' rfile._name = 'name' - rfile._vio = None + rfile._size = 32 + rfile._vio = mock.MagicMock() + rfile._vio.offset_start = 0 + rfile._vio.total_size = 32 - lpath = 'lpath' + lpath = pathlib.Path('lpath') key = ops.Downloader.create_unique_transfer_operation_id(rfile) d._pre_md5_skip_on_check(lpath, rfile) assert key in d._md5_map rfile._name = 'name2' + rfile._vio = None lpath = 'lpath2' rfile._encryption = None rfile._md5 = 'abc' @@ -330,7 +338,7 @@ def test_post_md5_skip_on_check(tmpdir): d._transfer_set.add(key) assert key in d._md5_map - d._post_md5_skip_on_check(key, lpath, rfile._size, True) + d._post_md5_skip_on_check(key, lpath, None, True) assert key not in d._md5_map d._add_to_download_queue = mock.MagicMock() @@ -444,8 +452,9 @@ def test_check_for_crypto_done(): d._crypto_offload.pop_done_queue.side_effect = [ None, (lpath, offsets), + None, ] - patched_tc.side_effect = [False, False, True] + patched_tc.side_effect = [False, False, False, True, True] d._complete_chunk_download = mock.MagicMock() d._check_for_crypto_done() assert dd.perform_chunked_integrity_check.call_count == 1 @@ -487,9 +496,10 @@ def test_add_to_download_queue(tmpdir): assert path in d._dd_map -def test_initialize_and_terminate_transfer_threads(): +def test_initialize_and_terminate_threads(): opts = mock.MagicMock() opts.concurrency.transfer_threads = 2 + opts.concurrency.disk_threads = 2 d = ops.Downloader(opts, mock.MagicMock(), mock.MagicMock()) d._worker_thread_transfer = mock.MagicMock() @@ -501,24 +511,89 @@ def test_initialize_and_terminate_transfer_threads(): for thr in d._transfer_threads: assert not thr.is_alive() + d._initialize_disk_threads() + assert len(d._disk_threads) == 2 + + d._wait_for_disk_threads(terminate=True) + assert d._download_terminate + for thr in d._disk_threads: + assert not thr.is_alive() + + +def test_process_download_descriptor_vio(tmpdir): + with mock.patch( + 'blobxfer.models.download.Descriptor.all_operations_completed', + new_callable=mock.PropertyMock) as patched_aoc: + d = ops.Downloader( + mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + d._general_options.concurrency.transfer_threads = 1 + d._general_options.concurrency.disk_threads = 1 + opts = mock.MagicMock() + opts.check_file_md5 = True + opts.chunk_size_bytes = 16 + ase = azmodels.StorageEntity('cont') + ase._mode = azmodels.StorageModes.File + ase._size = 16 + ase._client = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase._name = 'name' + ase._vio = mock.MagicMock() + ase._vio.total_slices = 2 + + lp = pathlib.Path(str(tmpdir.join('b'))) + dd = models.Descriptor(lp, ase, opts, None) + dd.next_offsets = mock.MagicMock() + dd.next_offsets.return_value = (None, None) + patched_aoc.return_value = True + dd.finalize_file = mock.MagicMock() + key = ops.Downloader.create_unique_transfer_operation_id(ase) + d._transfer_set.add(key) + d._dd_map[str(lp)] = mock.MagicMock() + + d._process_download_descriptor(dd) + assert dd.finalize_file.call_count == 0 + + d._transfer_set.add(key) + d._dd_map[str(lp)] = mock.MagicMock() + d._process_download_descriptor(dd) + assert dd.finalize_file.call_count == 1 + @mock.patch('blobxfer.operations.crypto.aes_cbc_decrypt_data') @mock.patch('blobxfer.operations.azure.file.get_file_range') @mock.patch('blobxfer.operations.azure.blob.get_blob_range') def test_worker_thread_transfer( patched_gbr, patched_gfr, patched_acdd, tmpdir): + # test disk set > max set length + with mock.patch( + 'blobxfer.operations.download.Downloader.termination_check', + new_callable=mock.PropertyMock) as patched_tc: + d = ops.Downloader( + mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + d._process_download_descriptor = mock.MagicMock() + d._general_options.concurrency.disk_threads = 1 + d._disk_set.add(0) + d._disk_set.add(1) + d._disk_set.add(2) + d._disk_set.add(3) + d._disk_set.add(4) + + patched_tc.side_effect = [False, True] + d._worker_thread_transfer() + assert d._process_download_descriptor.call_count == 0 + d = ops.Downloader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) - d._complete_chunk_download = mock.MagicMock() + d._process_download_descriptor = mock.MagicMock() d._download_terminate = True d._general_options.concurrency.transfer_threads = 1 d._general_options.concurrency.disk_threads = 1 d._worker_thread_transfer() - assert d._complete_chunk_download.call_count == 0 + assert d._process_download_descriptor.call_count == 0 d._download_terminate = False d._all_remote_files_processed = True d._worker_thread_transfer() - assert d._complete_chunk_download.call_count == 0 + assert d._process_download_descriptor.call_count == 0 with mock.patch( 'blobxfer.operations.download.Downloader.termination_check', @@ -703,6 +778,43 @@ def test_worker_thread_transfer( assert dd.perform_chunked_integrity_check.call_count == 1 +def test_worker_thread_disk(): + with mock.patch( + 'blobxfer.operations.download.Downloader.termination_check', + new_callable=mock.PropertyMock) as patched_tc: + d = ops.Downloader( + mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + d._general_options.concurrency.disk_threads = 1 + + d._disk_queue = mock.MagicMock() + d._disk_queue.get.side_effect = [ + (mock.MagicMock(), mock.MagicMock(), mock.MagicMock()), + ] + d._process_data = mock.MagicMock() + patched_tc.side_effect = [False, True] + + d._worker_thread_disk() + assert d._process_data.call_count == 1 + + with mock.patch( + 'blobxfer.operations.download.Downloader.termination_check', + new_callable=mock.PropertyMock) as patched_tc: + d = ops.Downloader( + mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + d._general_options.concurrency.disk_threads = 1 + + d._disk_queue = mock.MagicMock() + d._disk_queue.get.side_effect = [ + (mock.MagicMock(), mock.MagicMock(), mock.MagicMock()), + ] + d._process_data = mock.MagicMock() + d._process_data.side_effect = Exception() + patched_tc.side_effect = [False, True] + + d._worker_thread_disk() + assert len(d._exceptions) == 1 + + def test_cleanup_temporary_files(tmpdir): lp = pathlib.Path(str(tmpdir.join('a'))) opts = mock.MagicMock() @@ -791,7 +903,7 @@ def _create_downloader_for_start(td): d._cleanup_temporary_files = mock.MagicMock() d._download_start = datetime.datetime.now(tz=dateutil.tz.tzlocal()) d._initialize_transfer_threads = mock.MagicMock() - d._general_options.concurrency.crypto_processes = 0 + d._general_options.concurrency.crypto_processes = 1 d._general_options.concurrency.md5_processes = 1 d._general_options.concurrency.disk_threads = 1 d._general_options.concurrency.transfer_threads = 1 @@ -840,9 +952,11 @@ def _create_downloader_for_start(td): 'blobxfer.operations.download.Downloader._wait_for_disk_threads', return_value=None ) +@mock.patch( + 'blobxfer.operations.crypto.CryptoOffload', return_value=mock.MagicMock()) def test_start( - patched_wdt, patched_wtt, patched_cutoi, patched_eld, patched_lb, - patched_lfmo, tmpdir): + patched_crypto, patched_wdt, patched_wtt, patched_cutoi, patched_eld, + patched_lb, patched_lfmo, tmpdir): patched_lfmo._check_thread = mock.MagicMock() b = azure.storage.blob.models.Blob(name='remote/path/name') diff --git a/tests/test_blobxfer_operations_md5.py b/tests/test_blobxfer_operations_md5.py index 02be647..0e17b56 100644 --- a/tests/test_blobxfer_operations_md5.py +++ b/tests/test_blobxfer_operations_md5.py @@ -8,6 +8,7 @@ import pytest # local imports import blobxfer.models.azure as azmodels +import blobxfer.models.upload as modelsul # module under test import blobxfer.operations.md5 as ops @@ -21,6 +22,16 @@ def test_compute_md5(tmpdir): md5_data = ops.compute_md5_for_data_asbase64(testdata.encode('utf8')) assert md5_file == md5_data + # test offset + md5_file = ops.compute_md5_for_file_asbase64(lpath, start=1) + md5_data = ops.compute_md5_for_data_asbase64(testdata[1:].encode('utf8')) + assert md5_file == md5_data + + md5_file = ops.compute_md5_for_file_asbase64(lpath, end=2) + md5_data = ops.compute_md5_for_data_asbase64(testdata[:2].encode('utf8')) + assert md5_file == md5_data + + # test mismatch md5_file_page = ops.compute_md5_for_file_asbase64(lpath, True) assert md5_file != md5_file_page @@ -29,6 +40,17 @@ def test_compute_md5(tmpdir): ops.compute_md5_for_file_asbase64(testdata) +def test_check_data_is_empty(): + data = b'\0' * ops._MAX_PAGE_SIZE_BYTES + assert ops.check_data_is_empty(data) + + data = b'\0' * 8 + assert ops.check_data_is_empty(data) + + data = str(uuid.uuid4()).encode('utf8') + assert not ops.check_data_is_empty(data) + + def test_done_cv(): a = None try: @@ -68,6 +90,9 @@ def test_from_add_to_done_non_pagealigned(tmpdir): result = a.pop_done_queue() assert result is None + with pytest.raises(ValueError): + a.add_localfile_for_md5_check(None, None, None, None, None, None) + a.add_localfile_for_md5_check( key, fpath, fpath, remote_md5, azmodels.StorageModes.Block, None) i = 33 @@ -91,6 +116,51 @@ def test_from_add_to_done_non_pagealigned(tmpdir): a.finalize_processes() +def test_from_add_to_done_lpview(tmpdir): + file = tmpdir.join('a') + file.write('abc') + fpath = str(file) + key = 'key' + + remote_md5 = ops.compute_md5_for_file_asbase64(str(file)) + + a = None + lpview = modelsul.LocalPathView( + fd_start=0, + fd_end=3, + mode=None, + next=None, + slice_num=None, + total_slices=1, + ) + try: + a = ops.LocalFileMd5Offload(num_workers=1) + result = a.pop_done_queue() + assert result is None + + a.add_localfile_for_md5_check( + key, fpath, fpath, remote_md5, azmodels.StorageModes.Block, lpview) + i = 33 + checked = False + while i > 0: + result = a.pop_done_queue() + if result is None: + time.sleep(0.3) + i -= 1 + continue + assert len(result) == 4 + assert result[0] == key + assert result[1] == str(file) + assert result[2] == 3 + assert result[3] + checked = True + break + assert checked + finally: + if a: + a.finalize_processes() + + def test_from_add_to_done_pagealigned(tmpdir): file = tmpdir.join('a') file.write('abc') diff --git a/tests/test_blobxfer_operations_progress.py b/tests/test_blobxfer_operations_progress.py index 721501e..fb67ece 100644 --- a/tests/test_blobxfer_operations_progress.py +++ b/tests/test_blobxfer_operations_progress.py @@ -8,6 +8,11 @@ import mock # non-stdlib imports # local imports +import blobxfer.models.azure as azmodels +import blobxfer.models.download as modelsdl +import blobxfer.models.options as options +import blobxfer.models.synccopy as modelssc +import blobxfer.models.upload as modelsul import blobxfer.util as util # module under test import blobxfer.operations.progress as ops @@ -15,11 +20,72 @@ def test_output_parameters(): go = mock.MagicMock() - spec = mock.MagicMock() go.log_file = 'abc' + spec = modelsdl.Specification( + download_options=options.Download( + check_file_md5=True, + chunk_size_bytes=4194304, + delete_extraneous_destination=False, + mode=azmodels.StorageModes.Auto, + overwrite=True, + recursive=True, + rename=False, + restore_file_attributes=False, + rsa_private_key=None, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ), + local_destination_path=mock.MagicMock(), + ) ops.output_parameters(go, spec) + assert util.is_not_empty(go.log_file) + spec = modelsul.Specification( + upload_options=options.Upload( + chunk_size_bytes=4194304, + delete_extraneous_destination=False, + mode=azmodels.StorageModes.Auto, + one_shot_bytes=0, + overwrite=True, + recursive=True, + rename=False, + rsa_public_key=None, + store_file_properties=options.FileProperties( + attributes=True, + md5=True, + ), + strip_components=0, + vectored_io=None, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ), + local_source_path=mock.MagicMock() + ) + ops.output_parameters(go, spec) + assert util.is_not_empty(go.log_file) + + spec = modelssc.Specification( + synccopy_options=options.SyncCopy( + delete_extraneous_destination=False, + dest_mode=azmodels.StorageModes.Auto, + mode=azmodels.StorageModes.Auto, + overwrite=True, + recursive=True, + ), + skip_on_options=options.SkipOn( + filesize_match=True, + lmt_ge=False, + md5_match=True, + ) + ) + ops.output_parameters(go, spec) assert util.is_not_empty(go.log_file) @@ -33,6 +99,9 @@ def test_update_progress_bar(): ops.update_progress_bar( go, 'download', start, None, 1, None, 1) + ops.update_progress_bar( + go, 'upload', start, 1, 0, 256, 0, stdin_upload=True) + with mock.patch('blobxfer.util.datetime_now') as patched_dt: patched_dt.return_value = start ops.update_progress_bar( diff --git a/tests/test_blobxfer_operations_resume.py b/tests/test_blobxfer_operations_resume.py index 9894d3b..e3f769b 100644 --- a/tests/test_blobxfer_operations_resume.py +++ b/tests/test_blobxfer_operations_resume.py @@ -11,10 +11,23 @@ except ImportError: # noqa import pathlib # non-stdlib imports +# local imports # module under test import blobxfer.operations.resume as ops +def test_generate_record_key(): + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'abc' + + with mock.patch('blobxfer.util.on_python2', return_value=True): + assert ops._BaseResumeManager.generate_record_key(ase) == b'ep:abc' + + with mock.patch('blobxfer.util.on_python2', return_value=False): + assert ops._BaseResumeManager.generate_record_key(ase) == 'ep:abc' + + def test_download_resume_manager(tmpdir): tmpdb = pathlib.Path(str(tmpdir.join('tmp.db'))) @@ -72,3 +85,151 @@ def test_download_resume_manager(tmpdir): drm.delete() assert drm._data is None assert not tmpdb.exists() + + +def test_upload_resume_manager(tmpdir): + tmpdb = pathlib.Path(str(tmpdir.join('tmp.db'))) + + urm = ops.UploadResumeManager(tmpdb) + assert urm._data is not None + urm.close() + assert urm._data is None + assert tmpdb.exists() + urm.delete() + assert urm._data is None + assert not tmpdb.exists() + + ase = mock.MagicMock() + ase._name = 'name' + ase._client.primary_endpoint = 'ep' + ase._size = 16 + + local_path = 'fp' + urm = ops.UploadResumeManager(tmpdb) + urm.add_or_update_record(local_path, ase, 2, 8, 0, False, None) + u = urm.get_record(ase) + + assert u.local_path == local_path + assert u.length == ase._size + assert u.chunk_size == 2 + assert u.total_chunks == 8 + assert u.completed_chunks == 0 + assert not u.completed + + urm.add_or_update_record(local_path, ase, 2, 8, 1, False, 'abc') + u = urm.get_record(ase) + + assert u.local_path == local_path + assert u.length == ase._size + assert u.chunk_size == 2 + assert u.total_chunks == 8 + assert u.completed_chunks == 1 + assert not u.completed + assert u.md5hexdigest == 'abc' + + urm.add_or_update_record(local_path, ase, 2, 8, 8, True, None) + u = urm.get_record(ase) + + assert u.local_path == local_path + assert u.length == ase._size + assert u.chunk_size == 2 + assert u.total_chunks == 8 + assert u.completed_chunks == 8 + assert u.completed + assert u.md5hexdigest == 'abc' + + # idempotent check after completed + urm.add_or_update_record(local_path, ase, 2, 8, 8, True, None) + u = urm.get_record(ase) + + assert u.local_path == local_path + assert u.length == ase._size + assert u.chunk_size == 2 + assert u.total_chunks == 8 + assert u.completed_chunks == 8 + assert u.completed + assert u.md5hexdigest == 'abc' + + urm.close() + assert urm._data is None + assert tmpdb.exists() + + tmpdb.unlink() + urm.delete() + assert urm._data is None + assert not tmpdb.exists() + + +def test_synccopy_resume_manager(tmpdir): + tmpdb = pathlib.Path(str(tmpdir.join('tmp.db'))) + + srm = ops.SyncCopyResumeManager(tmpdb) + assert srm._data is not None + srm.close() + assert srm._data is None + assert tmpdb.exists() + srm.delete() + assert srm._data is None + assert not tmpdb.exists() + + ase = mock.MagicMock() + ase._name = 'name' + ase._client.primary_endpoint = 'ep' + ase._size = 16 + + src_block_list = 'srcbl' + + srm = ops.SyncCopyResumeManager(tmpdb) + srm.add_or_update_record(ase, src_block_list, 0, 2, 8, 0, False) + s = srm.get_record(ase) + + assert s.src_block_list == src_block_list + assert s.length == ase._size + assert s.offset == 0 + assert s.chunk_size == 2 + assert s.total_chunks == 8 + assert s.completed_chunks == 0 + assert not s.completed + + srm.add_or_update_record(ase, src_block_list, 1, 2, 8, 1, False) + s = srm.get_record(ase) + + assert s.src_block_list == src_block_list + assert s.length == ase._size + assert s.offset == 1 + assert s.chunk_size == 2 + assert s.total_chunks == 8 + assert s.completed_chunks == 1 + assert not s.completed + + srm.add_or_update_record(ase, src_block_list, 8, 2, 8, 8, True) + s = srm.get_record(ase) + + assert s.src_block_list == src_block_list + assert s.length == ase._size + assert s.offset == 8 + assert s.chunk_size == 2 + assert s.total_chunks == 8 + assert s.completed_chunks == 8 + assert s.completed + + # idempotent check after completed + srm.add_or_update_record(ase, src_block_list, 8, 2, 8, 8, True) + s = srm.get_record(ase) + + assert s.src_block_list == src_block_list + assert s.length == ase._size + assert s.offset == 8 + assert s.chunk_size == 2 + assert s.total_chunks == 8 + assert s.completed_chunks == 8 + assert s.completed + + srm.close() + assert srm._data is None + assert tmpdb.exists() + + tmpdb.unlink() + srm.delete() + assert srm._data is None + assert not tmpdb.exists() diff --git a/tests/test_blobxfer_operations_synccopy.py b/tests/test_blobxfer_operations_synccopy.py new file mode 100644 index 0000000..e16b86b --- /dev/null +++ b/tests/test_blobxfer_operations_synccopy.py @@ -0,0 +1,777 @@ +# coding=utf-8 +"""Tests for synccopy operations""" + +# stdlib imports +try: + import unittest.mock as mock +except ImportError: # noqa + import mock +try: + import pathlib2 as pathlib +except ImportError: # noqa + import pathlib +# non-stdlib imports +import pytest +# local imports +import blobxfer.models.azure as azmodels +# module under test +import blobxfer.operations.synccopy as ops + + +def test_termination_check(): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + assert not s.termination_check + + +def test_create_unique_transfer_operation_id(): + src_ase = mock.MagicMock() + src_ase._client.primary_endpoint = 'ep' + src_ase.path = 'srcasepath' + + dst_ase = mock.MagicMock() + dst_ase._client.primary_endpoint = 'ep2' + dst_ase.path = 'dstasepath' + + id = ops.SyncCopy.create_unique_transfer_operation_id(src_ase, dst_ase) + assert id == 'ep;srcasepath;ep2;dstasepath' + + +def test_create_deletion_id(): + client = mock.MagicMock() + client.primary_endpoint = 'ep' + + id = ops.SyncCopy.create_deletion_id(client, 'cont', 'name') + assert id == 'ep;cont;name' + + +def test_update_progress_bar(): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + with mock.patch( + 'blobxfer.operations.progress.update_progress_bar') as patched_upb: + s._update_progress_bar() + assert patched_upb.call_count == 1 + + +@mock.patch('blobxfer.operations.azure.file.list_all_files') +@mock.patch('blobxfer.operations.azure.file.delete_file') +@mock.patch('blobxfer.operations.azure.blob.list_all_blobs') +@mock.patch('blobxfer.operations.azure.blob.delete_blob') +def test_delete_extraneous_files(db, lab, df, laf): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + # test no delete + s._spec.options.delete_extraneous_destination = False + s._get_destination_paths = mock.MagicMock() + + s._delete_extraneous_files() + assert s._get_destination_paths.call_count == 0 + + # test file delete + s._spec.options.delete_extraneous_destination = True + s._spec.options.dest_mode = azmodels.StorageModes.File + + sa1 = mock.MagicMock() + sa1.name = 'name' + sa1.endpoint = 'ep' + sa1.file_client.primary_endpoint = 'ep' + + s._get_destination_paths = mock.MagicMock() + s._get_destination_paths.return_value = [ + (sa1, 'cont', None, None), + (sa1, 'cont', None, None), + ] + + laf.return_value = ['filename'] + + s._delete_extraneous_files() + assert laf.call_count == 1 + assert df.call_count == 1 + + # test blob delete + s._spec.options.delete_extraneous_destination = True + s._spec.options.dest_mode = azmodels.StorageModes.Block + + sa1 = mock.MagicMock() + sa1.name = 'name' + sa1.endpoint = 'ep' + sa1.block_blob_client.primary_endpoint = 'ep' + + s._get_destination_paths = mock.MagicMock() + s._get_destination_paths.return_value = [ + (sa1, 'cont', None, None), + ] + + blob = mock.MagicMock() + blob.name = 'blobname' + lab.return_value = [blob] + + s._delete_extraneous_files() + assert lab.call_count == 1 + assert db.call_count == 1 + + +@mock.patch('blobxfer.operations.azure.blob.block.get_committed_block_list') +def test_add_to_transfer_queue(gcbl): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + src_ase = mock.MagicMock() + src_ase._client.primary_endpoint = 'ep' + src_ase.path = 'srcasepath' + src_ase.mode = azmodels.StorageModes.Block + + dst_ase = mock.MagicMock() + dst_ase._client.primary_endpoint = 'ep2' + dst_ase.path = 'dstasepath' + dst_ase.mode = azmodels.StorageModes.Block + + gcbl.return_value = None + s._add_to_transfer_queue(src_ase, dst_ase) + assert gcbl.call_count == 1 + assert s._transfer_queue.qsize() == 1 + assert s._synccopy_start_time is not None + + src_ase.mode = azmodels.StorageModes.Page + s._add_to_transfer_queue(src_ase, dst_ase) + assert gcbl.call_count == 1 + assert s._transfer_queue.qsize() == 2 + assert s._synccopy_start_time is not None + + +def test_initialize_transfer_threads(): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + s._general_options.concurrency.transfer_threads = 1 + + try: + s._initialize_transfer_threads() + assert len(s._transfer_threads) == 1 + finally: + s._wait_for_transfer_threads(True) + for thr in s._transfer_threads: + assert not thr.is_alive() + + +def test_worker_thread_transfer(): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + s._transfer_queue.put(mock.MagicMock()) + s._transfer_queue.put(mock.MagicMock()) + s._process_synccopy_descriptor = mock.MagicMock() + s._process_synccopy_descriptor.side_effect = [None, Exception()] + + with mock.patch( + 'blobxfer.operations.synccopy.SyncCopy.termination_check', + new_callable=mock.PropertyMock) as patched_tc: + patched_tc.side_effect = [False, False, True] + s._worker_thread_transfer() + assert s._process_synccopy_descriptor.call_count == 2 + assert len(s._exceptions) == 1 + + +@mock.patch('blobxfer.operations.azure.blob.append.append_block') +@mock.patch('blobxfer.operations.azure.blob.block.create_blob') +@mock.patch('blobxfer.operations.azure.blob.block.put_block') +@mock.patch('blobxfer.operations.azure.file.put_file_range') +@mock.patch('blobxfer.operations.azure.blob.page.put_page') +def test_put_data(pp, pfr, pb, cb, ab): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + offsets = mock.MagicMock() + offsets.chunk_num = 0 + offsets.num_bytes = 1 + offsets.range_start = 10 + + src_ase = mock.MagicMock() + src_ase._client.primary_endpoint = 'ep' + src_ase.path = 'srcasepath' + src_ase.mode = azmodels.StorageModes.Block + src_ase.size = 10 + src_ase.is_encrypted = False + + dst_ase = mock.MagicMock() + dst_ase._client.primary_endpoint = 'ep2' + dst_ase.path = 'dstasepath' + dst_ase.mode = azmodels.StorageModes.Append + dst_ase.size = 10 + dst_ase.is_encrypted = False + + sd = mock.MagicMock() + sd.src_entity = src_ase + sd.dst_entity = dst_ase + sd.complete_offset_upload = mock.MagicMock() + + s._put_data(sd, dst_ase, offsets, b'\0') + assert ab.call_count == 1 + + dst_ase.mode = azmodels.StorageModes.Block + sd.is_one_shot_block_blob = True + sd.must_compute_md5 = True + sd.src_entity.md5 = '' + s._put_data(sd, dst_ase, offsets, b'\0') + assert cb.call_count == 1 + + sd.src_entity.md5 = b'md5' + s._put_data(sd, dst_ase, offsets, b'\0') + assert cb.call_count == 2 + + sd.must_compute_md5 = False + s._put_data(sd, dst_ase, offsets, b'\0') + assert cb.call_count == 3 + + sd.is_one_shot_block_blob = False + s._put_data(sd, dst_ase, offsets, b'\0') + assert pb.call_count == 1 + + dst_ase.mode = azmodels.StorageModes.File + s._put_data(sd, dst_ase, offsets, b'\0') + assert pfr.call_count == 1 + + dst_ase.mode = azmodels.StorageModes.Page + s._put_data(sd, dst_ase, offsets, b'\0' * 512) + assert pp.call_count == 0 + + s._put_data(sd, dst_ase, offsets, b'1') + assert pp.call_count == 1 + + +def test_process_data(): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + s._put_data = mock.MagicMock() + s._complete_offset_upload = mock.MagicMock() + offsets = mock.MagicMock() + offsets.num_bytes = 1 + offsets.chunk_num = 0 + sd = mock.MagicMock() + + s._process_data(sd, mock.MagicMock(), offsets, mock.MagicMock()) + assert s._put_data.call_count == 1 + assert s._synccopy_bytes_sofar == 1 + assert sd.complete_offset_upload.call_count == 1 + + +@mock.patch('blobxfer.operations.azure.blob.create_container') +@mock.patch('blobxfer.operations.azure.blob.append.create_blob') +@mock.patch('blobxfer.operations.azure.file.create_share') +@mock.patch('blobxfer.operations.azure.file.create_all_parent_directories') +@mock.patch('blobxfer.operations.azure.file.create_file') +@mock.patch('blobxfer.operations.azure.blob.page.create_blob') +def test_prepare_upload(page_cb, cf, capd, cs, append_cb, cc): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + + ase.mode = azmodels.StorageModes.Append + ase.append_create = True + s._prepare_upload(ase) + assert cc.call_count == 1 + assert append_cb.call_count == 1 + + ase.mode = azmodels.StorageModes.Block + ase.append_create = False + s._prepare_upload(ase) + assert cc.call_count == 2 + + ase.mode = azmodels.StorageModes.File + s._prepare_upload(ase) + assert cs.call_count == 1 + assert capd.call_count == 1 + assert cf.call_count == 1 + + ase.mode = azmodels.StorageModes.Page + s._prepare_upload(ase) + assert cc.call_count == 3 + assert page_cb.call_count == 1 + + +@mock.patch('blobxfer.operations.azure.file.get_file_range') +@mock.patch('blobxfer.operations.azure.blob.get_blob_range') +def test_process_synccopy_descriptor(gbr, gfr): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + src_ase = mock.MagicMock() + src_ase._client.primary_endpoint = 'ep' + src_ase.path = 'srcasepath' + src_ase.mode = azmodels.StorageModes.Block + src_ase.size = 10 + src_ase.is_encrypted = False + + dst_ase = mock.MagicMock() + dst_ase._client.primary_endpoint = 'ep2' + dst_ase.path = 'dstasepath' + dst_ase.mode = azmodels.StorageModes.Block + dst_ase.size = 10 + dst_ase.is_encrypted = False + + sd = mock.MagicMock() + sd.src_entity = src_ase + sd.dst_entity = dst_ase + sd.complete_offset_upload = mock.MagicMock() + sd.next_offsets.return_value = (None, 1) + sd.is_one_shot_block_blob = False + sd.all_operations_completed = True + + s._finalize_upload = mock.MagicMock() + s._transfer_set.add( + ops.SyncCopy.create_unique_transfer_operation_id(src_ase, dst_ase)) + + # test resume and completed + s._process_synccopy_descriptor(sd) + assert s._synccopy_bytes_sofar == 1 + assert s._finalize_upload.call_count == 1 + assert len(s._transfer_set) == 0 + assert s._synccopy_sofar == 1 + + # test nothing + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + sd.all_operations_completed = False + sd.next_offsets.return_value = (None, None) + s._process_synccopy_descriptor(sd) + assert s._transfer_queue.qsize() == 1 + + # test normal block blob + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + s._prepare_upload = mock.MagicMock() + s._process_data = mock.MagicMock() + offsets = mock.MagicMock() + offsets.chunk_num = 0 + offsets.num_bytes = 1 + offsets.range_start = 10 + sd.next_offsets.return_value = (offsets, None) + + dst_ase.replica_targets = [dst_ase] + + s._process_synccopy_descriptor(sd) + assert gbr.call_count == 1 + assert s._transfer_queue.qsize() == 1 + assert len(s._transfer_set) == 0 + + # test normal append blob + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + s._prepare_upload = mock.MagicMock() + s._process_data = mock.MagicMock() + offsets = mock.MagicMock() + offsets.chunk_num = 0 + offsets.num_bytes = 1 + offsets.range_start = 10 + sd.next_offsets.return_value = (offsets, None) + src_ase.mode = azmodels.StorageModes.Append + + s._process_synccopy_descriptor(sd) + assert gbr.call_count == 2 + assert s._transfer_queue.qsize() == 1 + assert len(s._transfer_set) == 0 + + # test normal file + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + s._prepare_upload = mock.MagicMock() + s._process_data = mock.MagicMock() + offsets = mock.MagicMock() + offsets.chunk_num = 0 + offsets.num_bytes = 1 + offsets.range_start = 10 + sd.next_offsets.return_value = (offsets, None) + src_ase.mode = azmodels.StorageModes.File + + s._process_synccopy_descriptor(sd) + assert gfr.call_count == 1 + assert s._transfer_queue.qsize() == 1 + assert len(s._transfer_set) == 0 + + +@mock.patch('blobxfer.operations.azure.blob.block.put_block_list') +def test_finalize_block_blob(pbl): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.Block + ase.is_encrypted = False + ase.replica_targets = [ase] + + sd = mock.MagicMock() + sd.dst_entity = ase + sd.last_block_num = 1 + + s._finalize_block_blob(sd, mock.MagicMock(), mock.MagicMock()) + assert pbl.call_count == 2 + + +@mock.patch('blobxfer.operations.azure.blob.set_blob_md5') +def test_set_blob_md5(sbm): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.Block + ase.is_encrypted = False + ase.replica_targets = [ase] + + sd = mock.MagicMock() + sd.dst_entity = ase + + s._set_blob_md5(sd, mock.MagicMock()) + assert sbm.call_count == 2 + + +@mock.patch('blobxfer.operations.azure.blob.set_blob_metadata') +def test_set_blob_metadata(sbm): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.Block + ase.is_encrypted = False + ase.replica_targets = [ase] + + sd = mock.MagicMock() + sd.dst_entity = ase + + s._set_blob_metadata(sd, mock.MagicMock()) + assert sbm.call_count == 2 + + +def test_finalize_nonblock_blob(): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + s._set_blob_md5 = mock.MagicMock() + s._set_blob_metadata = mock.MagicMock() + + s._finalize_nonblock_blob(mock.MagicMock(), {'a': 0}, 'digest') + assert s._set_blob_md5.call_count == 1 + assert s._set_blob_metadata.call_count == 1 + + +@mock.patch('blobxfer.operations.azure.file.set_file_md5') +@mock.patch('blobxfer.operations.azure.file.set_file_metadata') +def test_finalize_azure_file(sfmeta, sfmd5): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.File + ase.replica_targets = [ase] + + sd = mock.MagicMock() + sd.dst_entity = ase + + s._finalize_azure_file(sd, {'a': 0}, 'md5') + assert sfmd5.call_count == 2 + assert sfmeta.call_count == 2 + + +def test_finalize_upload(): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.Block + ase.replica_targets = [ase] + ase.md5 = 'md5' + + sd = mock.MagicMock() + sd.dst_entity = ase + sd.src_entity = ase + + s._finalize_block_blob = mock.MagicMock() + s._finalize_upload(sd) + assert s._finalize_block_blob.call_count == 1 + + ase.md5 = None + s._finalize_upload(sd) + assert s._finalize_block_blob.call_count == 2 + + sd.requires_put_block_list = False + sd.remote_is_page_blob = True + s._finalize_nonblock_blob = mock.MagicMock() + s._finalize_upload(sd) + assert s._finalize_nonblock_blob.call_count == 1 + + sd.remote_is_page_blob = False + sd.remote_is_append_blob = False + sd.remote_is_file = True + s._finalize_azure_file = mock.MagicMock() + s._finalize_upload(sd) + assert s._finalize_azure_file.call_count == 1 + + +@mock.patch('blobxfer.models.metadata.get_md5_from_metadata') +def test_check_copy_conditions(gmfm): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + src_ase = mock.MagicMock() + src_ase._client.primary_endpoint = 'ep' + src_ase.path = 'srcasepath' + src_ase.mode = azmodels.StorageModes.Block + src_ase.size = 10 + + dst_ase = mock.MagicMock() + dst_ase._client.primary_endpoint = 'ep2' + dst_ase.path = 'dstasepath' + dst_ase.mode = azmodels.StorageModes.Block + dst_ase.size = 10 + dst_ase.from_local = False + + assert s._check_copy_conditions(src_ase, None) == ops.SynccopyAction.Copy + + s._spec.options.overwrite = False + assert s._check_copy_conditions( + src_ase, dst_ase) == ops.SynccopyAction.Skip + + s._spec.options.overwrite = True + s._spec.skip_on.md5_match = True + gmfm.return_value = 'md5' + assert s._check_copy_conditions( + src_ase, dst_ase) == ops.SynccopyAction.Skip + + gmfm.side_effect = ['md50', 'md51'] + assert s._check_copy_conditions( + src_ase, dst_ase) == ops.SynccopyAction.Copy + + gmfm.return_value = None + gmfm.side_effect = None + s._spec.skip_on.md5_match = False + s._spec.skip_on.filesize_match = False + s._spec.skip_on.lmt_ge = False + assert s._check_copy_conditions( + src_ase, dst_ase) == ops.SynccopyAction.Copy + + # size mismatch + s._spec.skip_on.filesize_match = True + src_ase.size = 1 + assert s._check_copy_conditions( + src_ase, dst_ase) == ops.SynccopyAction.Copy + + # size match + s._spec.skip_on.filesize_match = True + src_ase.size = dst_ase.size + assert s._check_copy_conditions( + src_ase, dst_ase) == ops.SynccopyAction.Skip + + # lmt match + s._spec.skip_on.filesize_match = False + s._spec.skip_on.lmt_ge = True + src_ase.lmt = 0 + dst_ase.lmt = 0 + assert s._check_copy_conditions( + src_ase, dst_ase) == ops.SynccopyAction.Skip + + # lmt mismatch + s._spec.skip_on.lmt_ge = True + src_ase.lmt = 1 + assert s._check_copy_conditions( + src_ase, dst_ase) == ops.SynccopyAction.Copy + + +@mock.patch('blobxfer.operations.azure.file.get_file_properties') +@mock.patch('blobxfer.operations.azure.blob.get_blob_properties') +def test_check_for_existing_remote(gbp, gfp): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + sa = mock.MagicMock() + sa.name = 'name' + sa.endpoint = 'ep' + + s._spec.options.dest_mode = azmodels.StorageModes.File + gfp.return_value = None + assert s._check_for_existing_remote(sa, 'cont', 'name') is None + + with mock.patch( + 'blobxfer.models.crypto.EncryptionMetadata.' + 'encryption_metadata_exists', return_value=False): + gfp.return_value = mock.MagicMock() + assert s._check_for_existing_remote(sa, 'cont', 'name') is not None + + with mock.patch( + 'blobxfer.models.crypto.EncryptionMetadata.' + 'encryption_metadata_exists', return_value=True): + with mock.patch( + 'blobxfer.models.crypto.EncryptionMetadata.convert_from_json'): + gfp.return_value = mock.MagicMock() + assert s._check_for_existing_remote(sa, 'cont', 'name') is not None + + s._spec.options.dest_mode = azmodels.StorageModes.Block + gbp.return_value = None + assert s._check_for_existing_remote(sa, 'cont', 'name') is None + + with mock.patch( + 'blobxfer.models.crypto.EncryptionMetadata.' + 'encryption_metadata_exists', return_value=False): + gbp.return_value = mock.MagicMock() + assert s._check_for_existing_remote(sa, 'cont', 'name') is not None + + +def test_get_destination_paths(): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + paths = mock.MagicMock() + paths.paths = [pathlib.Path('a/b')] + s._spec.destinations = [paths] + + sa, cont, dir, dpath = next(s._get_destination_paths()) + assert cont == 'a' + assert dir == 'b' + assert dpath == pathlib.Path('a/b') + + +def test_generate_destination_for_source(): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + s._spec.options.dest_mode = azmodels.StorageModes.Block + s._check_for_existing_remote = mock.MagicMock() + s._check_for_existing_remote.return_value = None + + src_ase = mock.MagicMock() + src_ase._client.primary_endpoint = 'ep' + src_ase.path = 'srcasepath' + src_ase.mode = azmodels.StorageModes.Block + src_ase.size = 10 + src_ase.name = 'srcase' + + sa = mock.MagicMock() + sa.name = 'name' + sa.endpoint = 'ep' + + s._get_destination_paths = mock.MagicMock() + s._get_destination_paths.return_value = [ + (sa, 'cont', 'name', 'dpath'), + ] + + s._check_copy_conditions = mock.MagicMock() + s._check_copy_conditions.return_value = ops.SynccopyAction.Copy + + ase = next(s._generate_destination_for_source(src_ase)) + assert ase is not None + assert ase.size == src_ase.size + assert ase.mode == s._spec.options.dest_mode + + +def test_bind_sources_to_destination(): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + s._spec.options.delete_extraneous_destination = True + + src_ase = mock.MagicMock() + src_ase._client.primary_endpoint = 'ep' + src_ase.container = 'srccont' + src_ase.path = 'srcasepath' + src_ase.mode = azmodels.StorageModes.Block + src_ase.size = 10 + src_ase.name = 'srcase' + + dst_ase = mock.MagicMock() + dst_ase._client.primary_endpoint = 'ep2' + dst_ase.container = 'dstcont' + dst_ase.path = 'dstasepath' + dst_ase.mode = azmodels.StorageModes.Block + dst_ase.size = 10 + dst_ase.name = 'dstase' + dst_ase.from_local = False + dst_ase.replica_targets = None + + dst2_ase = mock.MagicMock() + dst2_ase._client.primary_endpoint = 'ep2a' + dst2_ase.container = 'dstcont2' + dst2_ase.name = 'dstase2' + + src = mock.MagicMock() + src.files.return_value = [src_ase] + + s._spec.sources = [src] + + s._generate_destination_for_source = mock.MagicMock() + i = 0 + for a, b in s._bind_sources_to_destination(): + i += 1 + assert i == 0 + + s._generate_destination_for_source.return_value = [dst_ase, dst2_ase] + a, b = next(s._bind_sources_to_destination()) + assert a == src_ase + assert b == dst_ase + assert len(b.replica_targets) == 1 + assert b.replica_targets[0] == dst2_ase + + +@mock.patch('blobxfer.operations.azure.file.get_file_range') +@mock.patch('blobxfer.operations.azure.blob.get_blob_range') +@mock.patch('blobxfer.operations.resume.SyncCopyResumeManager') +def test_run(srm, gbr, gfr): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + s._general_options.concurrency.transfer_threads = 1 + s._general_options.resume_file = 'resume' + s._spec.options.chunk_size_bytes = 0 + + src_ase = mock.MagicMock() + src_ase._client.primary_endpoint = 'ep' + src_ase.container = 'srccont' + src_ase.path = 'srcasepath' + src_ase.mode = azmodels.StorageModes.Block + src_ase.size = 10 + src_ase.name = 'srcase' + + dst_ase = mock.MagicMock() + dst_ase._client.primary_endpoint = 'ep2' + dst_ase.container = 'dstcont' + dst_ase.path = 'dstasepath' + dst_ase.mode = azmodels.StorageModes.Block + dst_ase.size = 10 + dst_ase.name = 'dstase' + dst_ase.from_local = False + dst_ase.replica_targets = None + + s._bind_sources_to_destination = mock.MagicMock() + s._bind_sources_to_destination.return_value = [ + (src_ase, dst_ase) + ] + + s._prepare_upload = mock.MagicMock() + s._put_data = mock.MagicMock() + s._finalize_upload = mock.MagicMock() + + # normal execution + s._run() + assert s._prepare_upload.call_count == 1 + assert s._put_data.call_count == 1 + + # replica targets with mismatch + dst_ase.replica_targets = [dst_ase] + with pytest.raises(RuntimeError): + s._run() + + # exception during worker thread + dst_ase.replica_targets = None + with pytest.raises(RuntimeError): + s._process_synccopy_descriptor = mock.MagicMock() + s._process_synccopy_descriptor.side_effect = RuntimeError() + s._run() + + +def test_start(): + s = ops.SyncCopy(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + s._wait_for_transfer_threads = mock.MagicMock() + s._resume = mock.MagicMock() + s._run = mock.MagicMock() + + # test keyboard interrupt + s._run.side_effect = KeyboardInterrupt() + s.start() + + assert s._run.call_count == 1 + assert s._wait_for_transfer_threads.call_count == 1 + assert s._resume.close.call_count == 1 + + # test other exception + s._run.side_effect = RuntimeError() + with pytest.raises(RuntimeError): + s.start() diff --git a/tests/test_blobxfer_operations_upload.py b/tests/test_blobxfer_operations_upload.py new file mode 100644 index 0000000..c5d0514 --- /dev/null +++ b/tests/test_blobxfer_operations_upload.py @@ -0,0 +1,1201 @@ +# coding=utf-8 +"""Tests for upload operations""" + +# stdlib imports +try: + import unittest.mock as mock +except ImportError: # noqa + import mock +try: + import pathlib2 as pathlib +except ImportError: # noqa + import pathlib +# non-stdlib imports +import pytest +# local imports +import blobxfer.models.azure as azmodels +import blobxfer.models.upload as models +# module under test +import blobxfer.operations.upload as ops + + +def test_termination_check(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + assert not u.termination_check + assert not u.termination_check_md5 + + +def test_create_unique_id(): + src = mock.MagicMock() + src.absolute_path = 'abspath' + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + + id = ops.Uploader.create_unique_id(src, ase) + assert id == 'abspath;ep;asepath' + + +def test_create_unique_transfer_id(): + lp = mock.MagicMock() + lp.absolute_path = 'lpabspath' + lp.view.fd_start = 0 + offsets = mock.MagicMock() + offsets.range_start = 10 + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + + id = ops.Uploader.create_unique_transfer_id(lp, ase, offsets) + assert id == 'lpabspath;ep;asepath;0;10' + + +def test_create_destination_id(): + client = mock.MagicMock() + client.primary_endpoint = 'ep' + + id = ops.Uploader.create_destination_id(client, 'cont', 'name') + assert id == 'ep;cont;name' + + +def test_append_slice_suffix_to_name(): + name = ops.Uploader.append_slice_suffix_to_name('name', 0) + assert name == 'name.bxslice-0' + + +def test_update_progress_bar(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + with mock.patch( + 'blobxfer.operations.progress.update_progress_bar') as patched_upb: + u._all_files_processed = False + u._update_progress_bar() + assert patched_upb.call_count == 0 + + u._all_files_processed = True + u._update_progress_bar() + assert patched_upb.call_count == 1 + + +def test_pre_md5_skip_on_check(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + src = mock.MagicMock() + src.absolute_path = 'abspath' + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + + u._md5_offload = mock.MagicMock() + + u._pre_md5_skip_on_check(src, ase) + assert len(u._md5_map) == 1 + assert u._md5_offload.add_localfile_for_md5_check.call_count == 1 + + +def test_post_md5_skip_on_check(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + src = mock.MagicMock() + src.absolute_path = 'abspath' + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + + id = ops.Uploader.create_unique_id(src, ase) + u._md5_map[id] = (src, ase) + u._upload_set.add(id) + u._upload_total += 1 + + u._post_md5_skip_on_check(id, True) + assert len(u._md5_map) == 0 + assert id not in u._upload_set + assert u._upload_total == 0 + + u._md5_map[id] = (src, ase) + u._upload_set.add(id) + u._upload_total += 1 + u._add_to_upload_queue = mock.MagicMock() + u._post_md5_skip_on_check(id, False) + assert len(u._md5_map) == 0 + assert id in u._upload_set + assert u._upload_total == 1 + assert u._add_to_upload_queue.call_count == 1 + + +def test_check_for_uploads_from_md5(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._md5_offload = mock.MagicMock() + u._post_md5_skip_on_check = mock.MagicMock() + + with mock.patch( + 'blobxfer.operations.upload.Uploader.termination_check_md5', + new_callable=mock.PropertyMock) as patched_tcm: + patched_tcm.side_effect = [False, False, False, True, True] + u._md5_offload.pop_done_queue.side_effect = [ + None, mock.MagicMock(), None + ] + + u._check_for_uploads_from_md5() + assert u._post_md5_skip_on_check.call_count == 1 + + +def test_add_to_upload_queue(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._spec.options.chunk_size_bytes = 32 + + src = mock.MagicMock() + src.absolute_path = 'abspath' + src.size = 32 + src.use_stdin = False + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.encryption_metadata.symmetric_key = 'abc' + id = ops.Uploader.create_unique_id(src, ase) + + u._add_to_upload_queue(src, ase, id) + assert len(u._ud_map) == 1 + assert u._upload_queue.qsize() == 1 + assert u._upload_start_time is not None + + +def test_initialize_disk_threads(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._general_options.concurrency.disk_threads = 1 + + try: + u._initialize_disk_threads() + assert len(u._disk_threads) == 1 + finally: + u._wait_for_disk_threads(True) + for thr in u._disk_threads: + assert not thr.is_alive() + + +def test_initialize_transfer_threads(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._general_options.concurrency.transfer_threads = 1 + + try: + u._initialize_transfer_threads() + assert len(u._transfer_threads) == 1 + finally: + u._wait_for_transfer_threads(True) + for thr in u._transfer_threads: + assert not thr.is_alive() + + +def test_worker_thread_transfer(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._transfer_queue.put( + (mock.MagicMock, mock.MagicMock, mock.MagicMock, mock.MagicMock) + ) + u._transfer_queue.put( + (mock.MagicMock, mock.MagicMock, mock.MagicMock, mock.MagicMock) + ) + u._process_transfer = mock.MagicMock() + u._process_transfer.side_effect = [None, Exception()] + + with mock.patch( + 'blobxfer.operations.upload.Uploader.termination_check', + new_callable=mock.PropertyMock) as patched_tc: + patched_tc.side_effect = [False, False, True] + u._worker_thread_transfer() + assert u._process_transfer.call_count == 2 + assert len(u._exceptions) == 1 + + +def test_process_transfer(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._put_data = mock.MagicMock() + u._update_progress_bar = mock.MagicMock() + + offsets = mock.MagicMock() + offsets.chunk_num = 0 + offsets.num_bytes = 1 + offsets.range_start = 10 + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + + lp = mock.MagicMock() + lp.absolute_path = 'lpabspath' + lp.view.fd_start = 0 + lp.use_stdin = True + + ud = mock.MagicMock() + ud.entity.mode = azmodels.StorageModes.Append + ud.complete_offset_upload = mock.MagicMock() + ud.local_path = lp + + id = ops.Uploader.create_unique_transfer_id(lp, ase, offsets) + u._transfer_set.add(id) + + u._process_transfer(ud, ase, offsets, mock.MagicMock()) + assert u._upload_bytes_total == 1 + assert u._upload_bytes_sofar == 1 + assert len(u._transfer_set) == 0 + assert ud.complete_offset_upload.call_count == 1 + assert u._upload_queue.qsize() == 1 + assert u._update_progress_bar.call_count == 1 + + lp.use_stdin = False + u._transfer_set.add(id) + u._process_transfer(ud, ase, offsets, mock.MagicMock()) + assert u._upload_bytes_total == 11 + assert u._upload_bytes_sofar == 2 + assert len(u._transfer_set) == 0 + assert ud.complete_offset_upload.call_count == 2 + assert u._upload_queue.qsize() == 2 + assert u._update_progress_bar.call_count == 2 + + +@mock.patch('blobxfer.operations.azure.blob.append.append_block') +@mock.patch('blobxfer.operations.azure.blob.block.create_blob') +@mock.patch('blobxfer.operations.azure.blob.block.put_block') +@mock.patch('blobxfer.operations.azure.file.put_file_range') +@mock.patch('blobxfer.operations.azure.blob.page.put_page') +def test_put_data(pp, pfr, pb, cb, ab): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + offsets = mock.MagicMock() + offsets.chunk_num = 0 + offsets.num_bytes = 1 + offsets.range_start = 10 + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + + lp = mock.MagicMock() + lp.absolute_path = 'lpabspath' + lp.view.fd_start = 0 + lp.use_stdin = True + + ud = mock.MagicMock() + ud.entity.mode = azmodels.StorageModes.Append + ud.complete_offset_upload = mock.MagicMock() + ud.local_path = lp + + ase.mode = azmodels.StorageModes.Append + u._put_data(ud, ase, offsets, b'\0') + assert ab.call_count == 1 + + ase.mode = azmodels.StorageModes.Block + ud.is_one_shot_block_blob = True + ud.entity.is_encrypted = False + ud.must_compute_md5 = True + ud.md5.digest.return_value = b'md5' + u._put_data(ud, ase, offsets, b'\0') + assert cb.call_count == 1 + + ud.must_compute_md5 = False + u._put_data(ud, ase, offsets, b'\0') + assert cb.call_count == 2 + + ud.is_one_shot_block_blob = False + u._put_data(ud, ase, offsets, b'\0') + assert pb.call_count == 1 + + ase.mode = azmodels.StorageModes.File + u._put_data(ud, ase, offsets, b'\0') + assert pfr.call_count == 1 + + ase.mode = azmodels.StorageModes.Page + u._put_data(ud, ase, offsets, None) + assert pp.call_count == 0 + + ase.mode = azmodels.StorageModes.Page + u._put_data(ud, ase, offsets, b'\0') + assert pp.call_count == 0 + + ase.mode = azmodels.StorageModes.Page + u._put_data(ud, ase, offsets, b'1') + assert pp.call_count == 1 + + +@mock.patch('time.sleep', return_value=None) +def test_worker_thread_upload(ts): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._general_options.concurrency.transfer_threads = 1 + + u._transfer_set = mock.MagicMock() + u._transfer_set.__len__.side_effect = [5, 0, 0, 0] + u._upload_queue.put(mock.MagicMock) + u._upload_queue.put(mock.MagicMock) + u._process_upload_descriptor = mock.MagicMock() + u._process_upload_descriptor.side_effect = [None, Exception()] + + with mock.patch( + 'blobxfer.operations.upload.Uploader.termination_check', + new_callable=mock.PropertyMock) as patched_tc: + patched_tc.side_effect = [False, False, False, False, True] + u._worker_thread_upload() + assert u._process_upload_descriptor.call_count == 2 + assert len(u._exceptions) == 1 + + +@mock.patch('blobxfer.operations.azure.blob.create_container') +@mock.patch('blobxfer.operations.azure.blob.append.create_blob') +@mock.patch('blobxfer.operations.azure.file.create_share') +@mock.patch('blobxfer.operations.azure.file.create_all_parent_directories') +@mock.patch('blobxfer.operations.azure.file.create_file') +@mock.patch('blobxfer.operations.azure.blob.page.create_blob') +def test_prepare_upload(page_cb, cf, capd, cs, append_cb, cc): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + + ase.mode = azmodels.StorageModes.Append + ase.append_create = True + u._prepare_upload(ase) + assert cc.call_count == 1 + assert append_cb.call_count == 1 + + ase.mode = azmodels.StorageModes.Block + ase.append_create = False + u._prepare_upload(ase) + assert cc.call_count == 2 + + ase.mode = azmodels.StorageModes.File + u._prepare_upload(ase) + assert cs.call_count == 1 + assert capd.call_count == 1 + assert cf.call_count == 1 + + ase.mode = azmodels.StorageModes.Page + u._prepare_upload(ase) + assert cc.call_count == 3 + assert page_cb.call_count == 1 + + +def test_process_upload_descriptor(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.Block + ase.is_encrypted = True + + lp = mock.MagicMock() + lp.absolute_path = 'lpabspath' + lp.view.fd_start = 0 + lp.use_stdin = False + + ud = mock.MagicMock() + ud.entity = ase + ud.complete_offset_upload = mock.MagicMock() + ud.local_path = lp + ud.next_offsets.return_value = (None, 1) + ud.all_operations_completed = True + ud.unique_id = 'uid' + + u._finalize_upload = mock.MagicMock() + u._ud_map['uid'] = 0 + u._upload_set.add('uid') + + # test resume and completed + u._process_upload_descriptor(ud) + assert u._upload_bytes_total == 10 + assert u._upload_bytes_sofar == 1 + assert u._finalize_upload.call_count == 1 + assert len(u._ud_map) == 0 + assert len(u._upload_set) == 0 + assert u._upload_sofar == 1 + + # test nothing + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + ud.all_operations_completed = False + ud.next_offsets.return_value = (None, None) + u._process_upload_descriptor(ud) + assert u._upload_queue.qsize() == 1 + + # test encrypted + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + offsets = mock.MagicMock() + offsets.chunk_num = 0 + offsets.num_bytes = 1 + offsets.range_start = 10 + ud.next_offsets.return_value = (offsets, None) + u._prepare_upload = mock.MagicMock() + ase2 = mock.MagicMock() + ase2._client.primary_endpoint = 'ep' + ase2.path = 'asepath2' + ase2.size = 10 + ase2.mode = azmodels.StorageModes.Block + ase.replica_targets = [ase2] + ase.is_encrypted = True + ud.read_data.return_value = (b'\0', None) + + with mock.patch( + 'blobxfer.operations.crypto.aes_cbc_encrypt_data', + return_value=b'\0' * 16): + u._process_upload_descriptor(ud) + assert u._upload_queue.qsize() == 1 + assert u._prepare_upload.call_count == 2 + assert ud.hmac_data.call_count == 2 + assert u._transfer_queue.qsize() == 2 + assert len(u._transfer_set) == 2 + + # test stdin + ase.is_encrypted = False + lp = mock.MagicMock() + lp.absolute_path = 'lpabspath' + lp.view.fd_start = 0 + lp.use_stdin = True + ud.local_path = lp + + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._prepare_upload = mock.MagicMock() + ud.read_data.return_value = (False, offsets) + u._process_upload_descriptor(ud) + assert u._upload_queue.qsize() == 1 + assert u._transfer_queue.qsize() == 0 + assert len(u._transfer_set) == 0 + + +@mock.patch('blobxfer.operations.azure.blob.block.put_block_list') +def test_finalize_block_blob(pbl): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.Block + ase.is_encrypted = False + ase.replica_targets = [ase] + + lp = mock.MagicMock() + lp.absolute_path = 'lpabspath' + lp.view.fd_start = 0 + lp.use_stdin = False + + ud = mock.MagicMock() + ud.entity = ase + ud.complete_offset_upload = mock.MagicMock() + ud.local_path = lp + ud.unique_id = 'uid' + ud.must_compute_md5 = True + ud.md5.digest.return_value = b'md5' + + u._finalize_block_blob(ud, mock.MagicMock()) + assert pbl.call_count == 2 + + ud.must_compute_md5 = False + ase.replica_targets = [] + u._finalize_block_blob(ud, mock.MagicMock()) + assert pbl.call_count == 3 + + +@mock.patch('blobxfer.operations.azure.blob.set_blob_md5') +def test_set_blob_md5(sbm): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.Block + ase.is_encrypted = False + ase.replica_targets = [ase] + + lp = mock.MagicMock() + lp.absolute_path = 'lpabspath' + lp.view.fd_start = 0 + lp.use_stdin = False + + ud = mock.MagicMock() + ud.entity = ase + ud.complete_offset_upload = mock.MagicMock() + ud.local_path = lp + ud.unique_id = 'uid' + ud.must_compute_md5 = True + ud.md5.digest.return_value = b'md5' + + u._set_blob_md5(ud) + assert sbm.call_count == 2 + + +@mock.patch('blobxfer.operations.azure.blob.set_blob_metadata') +def test_set_blob_metadata(sbm): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.Block + ase.is_encrypted = False + ase.replica_targets = [ase] + + lp = mock.MagicMock() + lp.absolute_path = 'lpabspath' + lp.view.fd_start = 0 + lp.use_stdin = False + + ud = mock.MagicMock() + ud.entity = ase + ud.complete_offset_upload = mock.MagicMock() + ud.local_path = lp + ud.unique_id = 'uid' + + u._set_blob_metadata(ud, mock.MagicMock()) + assert sbm.call_count == 2 + + +def test_finalize_nonblock_blob(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.Block + ase.is_encrypted = False + ase.replica_targets = [ase] + + lp = mock.MagicMock() + lp.absolute_path = 'lpabspath' + lp.view.fd_start = 0 + lp.use_stdin = False + + ud = mock.MagicMock() + ud.entity = ase + ud.complete_offset_upload = mock.MagicMock() + ud.local_path = lp + ud.unique_id = 'uid' + ud.requires_non_encrypted_md5_put = True + + u._set_blob_md5 = mock.MagicMock() + u._set_blob_metadata = mock.MagicMock() + + u._finalize_nonblock_blob(ud, {'a': 0}) + assert u._set_blob_md5.call_count == 1 + assert u._set_blob_metadata.call_count == 1 + + +@mock.patch('blobxfer.operations.azure.file.set_file_md5') +@mock.patch('blobxfer.operations.azure.file.set_file_metadata') +def test_finalize_azure_file(sfmeta, sfmd5): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.File + ase.is_encrypted = False + ase.replica_targets = [ase] + + lp = mock.MagicMock() + lp.absolute_path = 'lpabspath' + lp.view.fd_start = 0 + lp.use_stdin = False + + ud = mock.MagicMock() + ud.entity = ase + ud.complete_offset_upload = mock.MagicMock() + ud.local_path = lp + ud.unique_id = 'uid' + ud.must_compute_md5 = True + ud.md5.digest.return_value = b'md5' + ud.requires_non_encrypted_md5_put = True + + u._finalize_azure_file(ud, {'a': 0}) + assert sfmd5.call_count == 2 + assert sfmeta.call_count == 2 + + +def test_finalize_upload(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.Block + ase.is_encrypted = False + ase.replica_targets = [ase] + + lp = mock.MagicMock() + lp.absolute_path = 'lpabspath' + lp.view.fd_start = 0 + lp.use_stdin = False + + ud = mock.MagicMock() + ud.entity = ase + ud.complete_offset_upload = mock.MagicMock() + ud.local_path = lp + ud.unique_id = 'uid' + ud.requires_put_block_list = True + + u._finalize_block_blob = mock.MagicMock() + u._finalize_upload(ud) + assert u._finalize_block_blob.call_count == 1 + + ud.requires_put_block_list = False + ud.remote_is_page_blob = True + u._finalize_nonblock_blob = mock.MagicMock() + u._finalize_upload(ud) + assert u._finalize_nonblock_blob.call_count == 1 + + ud.remote_is_page_blob = False + ud.remote_is_append_blob = False + ud.remote_is_file = True + u._finalize_azure_file = mock.MagicMock() + u._finalize_upload(ud) + assert u._finalize_azure_file.call_count == 1 + + +def test_get_destination_paths(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + paths = mock.MagicMock() + paths.paths = [pathlib.Path('a/b')] + u._spec.destinations = [paths] + + sa, cont, dir, dpath = next(u._get_destination_paths()) + assert cont == 'a' + assert dir == 'b' + assert dpath == pathlib.Path('a/b') + + +@mock.patch('blobxfer.operations.azure.file.list_all_files') +@mock.patch('blobxfer.operations.azure.file.delete_file') +@mock.patch('blobxfer.operations.azure.blob.list_all_blobs') +@mock.patch('blobxfer.operations.azure.blob.delete_blob') +def test_delete_extraneous_files(db, lab, df, laf): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + # test no delete + u._spec.options.delete_extraneous_destination = False + u._get_destination_paths = mock.MagicMock() + + u._delete_extraneous_files() + assert u._get_destination_paths.call_count == 0 + + # test file delete + u._spec.options.delete_extraneous_destination = True + u._spec.options.mode = azmodels.StorageModes.File + + sa1 = mock.MagicMock() + sa1.name = 'name' + sa1.endpoint = 'ep' + sa1.file_client.primary_endpoint = 'ep' + + u._get_destination_paths = mock.MagicMock() + u._get_destination_paths.return_value = [ + (sa1, 'cont', None, None), + (sa1, 'cont', None, None), + ] + + laf.return_value = ['filename'] + + u._delete_extraneous_files() + assert laf.call_count == 1 + assert df.call_count == 1 + + # test blob delete + u._spec.options.delete_extraneous_destination = True + u._spec.options.mode = azmodels.StorageModes.Block + + sa1 = mock.MagicMock() + sa1.name = 'name' + sa1.endpoint = 'ep' + sa1.block_blob_client.primary_endpoint = 'ep' + + u._get_destination_paths = mock.MagicMock() + u._get_destination_paths.return_value = [ + (sa1, 'cont', None, None), + ] + + blob = mock.MagicMock() + blob.name = 'blobname' + lab.return_value = [blob] + + u._delete_extraneous_files() + assert lab.call_count == 1 + assert db.call_count == 1 + + +@mock.patch('blobxfer.models.metadata.get_md5_from_metadata') +def test_check_upload_conditions(gmfm): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + ase = mock.MagicMock() + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.size = 10 + ase.mode = azmodels.StorageModes.Block + ase.append_create = True + ase.is_encrypted = False + ase.from_local = False + + lp = mock.MagicMock() + lp.absolute_path = pathlib.Path('lpabspath') + lp.view.fd_start = 0 + lp.use_stdin = False + + assert u._check_upload_conditions(lp, ase) == ops.UploadAction.Skip + + lp.use_stdin = True + assert u._check_upload_conditions(lp, None) == ops.UploadAction.Upload + + u._spec.options.overwrite = False + ase.mode = azmodels.StorageModes.Append + assert u._check_upload_conditions(lp, ase) == ops.UploadAction.Upload + assert not ase.append_create + + ase.mode = azmodels.StorageModes.Block + ase.append_create = True + assert u._check_upload_conditions(lp, ase) == ops.UploadAction.Skip + assert ase.append_create + + u._spec.options.overwrite = True + u._spec.skip_on.md5_match = True + gmfm.return_value = 'md5' + assert u._check_upload_conditions(lp, ase) == ops.UploadAction.CheckMd5 + + u._spec.skip_on.md5_match = False + u._spec.skip_on.filesize_match = False + u._spec.skip_on.lmt_ge = False + assert u._check_upload_conditions(lp, ase) == ops.UploadAction.Upload + + # size mismatch, page + u._spec.skip_on.filesize_match = True + ase.mode = azmodels.StorageModes.Page + lp.size = 1 + assert u._check_upload_conditions(lp, ase) == ops.UploadAction.Upload + + # size match + u._spec.skip_on.filesize_match = True + ase.mode = azmodels.StorageModes.Block + lp.size = ase.size + assert u._check_upload_conditions(lp, ase) == ops.UploadAction.Skip + + # lmt match + u._spec.skip_on.filesize_match = False + u._spec.skip_on.lmt_ge = True + ase.lmt = 0 + with mock.patch('blobxfer.util.datetime_from_timestamp') as patched_dft: + patched_dft.return_value = 0 + assert u._check_upload_conditions(lp, ase) == ops.UploadAction.Skip + + # lmt mismatch + u._spec.skip_on.lmt_ge = True + ase.lmt = 0 + with mock.patch('blobxfer.util.datetime_from_timestamp') as patched_dft: + patched_dft.return_value = 1 + assert u._check_upload_conditions(lp, ase) == ops.UploadAction.Upload + + +@mock.patch('blobxfer.operations.azure.file.get_file_properties') +@mock.patch('blobxfer.operations.azure.blob.get_blob_properties') +def test_check_for_existing_remote(gbp, gfp): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + + sa = mock.MagicMock() + sa.name = 'name' + sa.endpoint = 'ep' + + u._spec.options.mode = azmodels.StorageModes.File + gfp.return_value = None + assert u._check_for_existing_remote(sa, 'cont', 'name') is None + + with mock.patch( + 'blobxfer.models.crypto.EncryptionMetadata.' + 'encryption_metadata_exists', return_value=False): + gfp.return_value = mock.MagicMock() + assert u._check_for_existing_remote(sa, 'cont', 'name') is not None + + with mock.patch( + 'blobxfer.models.crypto.EncryptionMetadata.' + 'encryption_metadata_exists', return_value=True): + with mock.patch( + 'blobxfer.models.crypto.EncryptionMetadata.convert_from_json'): + gfp.return_value = mock.MagicMock() + assert u._check_for_existing_remote(sa, 'cont', 'name') is not None + + u._spec.options.mode = azmodels.StorageModes.Block + gbp.return_value = None + assert u._check_for_existing_remote(sa, 'cont', 'name') is None + + with mock.patch( + 'blobxfer.models.crypto.EncryptionMetadata.' + 'encryption_metadata_exists', return_value=False): + gbp.return_value = mock.MagicMock() + assert u._check_for_existing_remote(sa, 'cont', 'name') is not None + + +def test_generate_destination_for_source(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._check_for_existing_remote = mock.MagicMock() + + lp = mock.MagicMock() + lp.relative_path = pathlib.Path('a/b/c/d') + lp.absolute_path = pathlib.Path('abs/rel/a/b/c/d') + lp.view.fd_start = 0 + lp.use_stdin = False + + sa = mock.MagicMock() + sa.name = 'name' + sa.endpoint = 'ep' + + u._spec.options.strip_components = 1 + u._spec.options.rename = True + u._get_destination_paths = mock.MagicMock() + u._get_destination_paths.return_value = [ + (sa, 'cont', '', 'dpath'), + ] + + with pytest.raises(ValueError): + next(u._generate_destination_for_source(lp)) + + lp.relative_path = pathlib.Path('rel/a') + lp.absolute_path = pathlib.Path('abs/rel/a') + + u._spec.options.strip_components = 0 + u._spec.options.rename = False + u._get_destination_paths.return_value = [ + (sa, 'cont', 'name', 'dpath'), + ] + u._spec.options.mode = azmodels.StorageModes.Block + u._spec.options.vectored_io.distribution_mode = \ + models.VectoredIoDistributionMode.Stripe + a, b = next(u._generate_destination_for_source(lp)) + assert a == sa + assert b is not None + assert u._check_for_existing_remote.call_count == 0 + + u._spec.options.vectored_io.distribution_mode = \ + models.VectoredIoDistributionMode.Disabled + a, b = next(u._generate_destination_for_source(lp)) + assert a == sa + assert b is not None + assert u._check_for_existing_remote.call_count == 1 + + +def test_vectorize_and_bind(): + ase = mock.MagicMock() + ase.client.primary_endpoint = 'ep' + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.mode = azmodels.StorageModes.Block + ase.is_encrypted = False + ase.replica_targets = None + ase.container = 'cont' + ase.name = 'name' + + ase2 = mock.MagicMock() + ase2.client.primary_endpoint = 'ep2' + ase2._client.primary_endpoint = 'ep2' + ase2.path = 'asepath2' + ase2.mode = azmodels.StorageModes.Block + ase2.is_encrypted = False + ase2.replica_targets = None + ase2.container = 'cont2' + ase2.name = 'name2' + + sa = mock.MagicMock() + sa.name = 'name' + sa.endpoint = 'ep' + sa.block_blob_client.primary_endpoint = 'pep' + + lp = mock.MagicMock() + lp.relative_path = pathlib.Path('rel/a') + lp.absolute_path = pathlib.Path('abs/rel/a') + lp.view.fd_start = 0 + lp.use_stdin = False + lp.total_size = 9 + + # no vectorization + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._spec.options.vectored_io.distribution_mode = \ + models.VectoredIoDistributionMode.Disabled + u._check_upload_conditions = mock.MagicMock() + u._check_upload_conditions.return_value = ops.UploadAction.Upload + + dest = [(sa, ase)] + + a, b, c = next(u._vectorize_and_bind(lp, dest)) + assert a == ops.UploadAction.Upload + assert b == lp + assert c == ase + + # stripe vectorization 1 slice + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._check_upload_conditions = mock.MagicMock() + u._check_upload_conditions.return_value = ops.UploadAction.Upload + u._spec.options.vectored_io.distribution_mode = \ + models.VectoredIoDistributionMode.Stripe + u._spec.options.vectored_io.stripe_chunk_size_bytes = 10 + + dest = [(sa, ase), (sa, ase2)] + + i = 0 + for a, b, c in u._vectorize_and_bind(lp, dest): + assert a == ops.UploadAction.Upload + assert b == lp + assert c == ase + i += 1 + assert i == 1 + + # stripe vectorization multi-slice + u._spec.options.mode = azmodels.StorageModes.Block + u._spec.options.vectored_io.stripe_chunk_size_bytes = 5 + u._check_for_existing_remote = mock.MagicMock() + u._check_for_existing_remote.return_value = None + + dest = [(sa, ase), (sa, ase2)] + + i = 0 + for a, b, c in u._vectorize_and_bind(lp, dest): + assert a == ops.UploadAction.Upload + assert b != lp + assert b.parent_path == lp.parent_path + assert b.relative_path == lp.relative_path + assert not b.use_stdin + if i == 0: + assert b.view.fd_start == 0 + assert b.view.fd_end == 5 + assert b.view.slice_num == 0 + else: + assert b.view.fd_start == 5 + assert b.view.fd_end == 9 + assert b.view.slice_num == 1 + assert b.view.mode == u._spec.options.vectored_io.distribution_mode + assert c != ase + assert c.from_local + i += 1 + assert i == 2 + + # replication single target + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._spec.options.vectored_io.distribution_mode = \ + models.VectoredIoDistributionMode.Replica + u._check_upload_conditions = mock.MagicMock() + u._check_upload_conditions.return_value = ops.UploadAction.CheckMd5 + + dest = [(sa, ase)] + + a, b, c = next(u._vectorize_and_bind(lp, dest)) + assert a == ops.UploadAction.CheckMd5 + assert b == lp + assert c == ase + assert c.replica_targets is None + + # replication multi-target md5 + dest = [(sa, ase), (sa, ase2)] + + a, b, c = next(u._vectorize_and_bind(lp, dest)) + assert a == ops.UploadAction.CheckMd5 + assert b == lp + assert c == ase + assert c.replica_targets is None + + # replication multi-target upload + u._spec.options.delete_extraneous_destination = True + u._check_upload_conditions.return_value = ops.UploadAction.Upload + a, b, c = next(u._vectorize_and_bind(lp, dest)) + assert a == ops.UploadAction.Upload + assert b == lp + assert c == ase + assert len(c.replica_targets) == 1 + assert c.replica_targets[0] == ase2 + + +@mock.patch('blobxfer.operations.resume.UploadResumeManager') +@mock.patch('blobxfer.operations.md5.LocalFileMd5Offload') +def test_run(lfmo, urm, tmpdir): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._initialize_disk_threads = mock.MagicMock() + u._initialize_transfer_threads = mock.MagicMock() + u._general_options.concurrency.disk_threads = 1 + u._general_options.concurrency.transfer_threads = 1 + u._general_options.concurrency.md5_processes = 1 + u._general_options.concurrency.crypto_processes = 1 + u._general_options.resume_file = 'resume' + u._spec.options.store_file_properties.md5 = True + u._spec.skip_on.md5_match = True + u._spec.options.rsa_public_key = 'abc' + u._spec.options.chunk_size_bytes = 0 + u._spec.options.one_shot_bytes = 0 + + # check rename failure + u._spec.sources.can_rename.return_value = False + u._spec.options.rename = True + with pytest.raises(RuntimeError): + u._run() + assert urm.call_count == 0 + assert lfmo.call_count == 0 + assert lfmo.initialize_check_thread.call_count == 0 + assert u._initialize_disk_threads.call_count == 0 + assert u._initialize_transfer_threads.call_count == 0 + + # check dupe + u._spec.sources.can_rename.return_value = False + u._spec.options.rename = False + + ase = mock.MagicMock() + ase.client.primary_endpoint = 'ep' + ase._client.primary_endpoint = 'ep' + ase.path = 'asepath' + ase.mode = azmodels.StorageModes.Block + ase.is_encrypted = False + ase.replica_targets = None + ase.container = 'cont' + ase.name = 'name' + ase.size = 10 + + ase2 = mock.MagicMock() + ase2.client.primary_endpoint = 'ep2' + ase2._client.primary_endpoint = 'ep2' + ase2.path = 'asepath2' + ase2.mode = azmodels.StorageModes.Block + ase2.is_encrypted = False + ase2.replica_targets = None + ase2.container = 'cont2' + ase2.name = 'name2' + ase2.size = 10 + + sa = mock.MagicMock() + sa.name = 'name' + sa.endpoint = 'ep' + sa.block_blob_client.primary_endpoint = 'pep' + + tmpdir.join('a').write('z' * 10) + lp = mock.MagicMock() + lp.relative_path = pathlib.Path('a') + lp.absolute_path = pathlib.Path(str(tmpdir.join('a'))) + lp.view.fd_start = 0 + lp.view.fd_end = 10 + lp.use_stdin = False + lp.size = 10 + lp.total_size = 10 + + u._generate_destination_for_source = mock.MagicMock() + with pytest.raises(RuntimeError): + u._generate_destination_for_source.return_value = [ + (sa, ase), (sa, ase) + ] + u._spec.sources.files.return_value = [lp] + + u._run() + assert urm.call_count == 1 + assert lfmo.call_count == 1 + assert u._md5_offload.initialize_check_thread.call_count == 1 + assert u._initialize_disk_threads.call_count == 1 + assert u._initialize_transfer_threads.call_count == 1 + + # mismatch exception raise + u._spec.options.vectored_io.distribution_mode = \ + models.VectoredIoDistributionMode.Disabled + u._check_upload_conditions = mock.MagicMock() + u._check_upload_conditions.return_value = ops.UploadAction.Skip + u._generate_destination_for_source.return_value = [ + (sa, ase) + ] + u._spec.sources.files.return_value = [lp] + + with pytest.raises(RuntimeError): + u._run() + + u._check_upload_conditions.return_value = ops.UploadAction.CheckMd5 + with pytest.raises(RuntimeError): + u._run() + + # regular execution + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._general_options.concurrency.disk_threads = 1 + u._general_options.concurrency.transfer_threads = 1 + u._general_options.concurrency.md5_processes = 1 + u._general_options.concurrency.crypto_processes = 0 + u._general_options.resume_file = 'resume' + u._spec.options.store_file_properties.md5 = True + u._spec.skip_on.md5_match = True + u._spec.options.rsa_public_key = None + u._spec.options.chunk_size_bytes = 0 + u._spec.options.one_shot_bytes = 0 + u._spec.sources.can_rename.return_value = False + u._spec.options.rename = False + + u._spec.options.vectored_io.distribution_mode = \ + models.VectoredIoDistributionMode.Replica + u._check_upload_conditions = mock.MagicMock() + u._check_upload_conditions.return_value = ops.UploadAction.Upload + u._generate_destination_for_source = mock.MagicMock() + u._generate_destination_for_source.return_value = [ + (sa, ase), (sa, ase2) + ] + u._spec.sources.files.return_value = [lp] + u._put_data = mock.MagicMock() + u._finalize_upload = mock.MagicMock() + u._run() + + # exception raise + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._general_options.concurrency.disk_threads = 1 + u._general_options.concurrency.transfer_threads = 1 + u._general_options.concurrency.md5_processes = 1 + u._general_options.concurrency.crypto_processes = 0 + u._general_options.resume_file = 'resume' + u._spec.options.store_file_properties.md5 = True + u._spec.skip_on.md5_match = True + u._spec.options.rsa_public_key = None + u._spec.options.chunk_size_bytes = 0 + u._spec.options.one_shot_bytes = 0 + u._spec.sources.can_rename.return_value = False + u._spec.options.rename = False + + u._spec.options.vectored_io.distribution_mode = \ + models.VectoredIoDistributionMode.Disabled + u._check_upload_conditions = mock.MagicMock() + u._check_upload_conditions.return_value = ops.UploadAction.Upload + u._generate_destination_for_source = mock.MagicMock() + u._generate_destination_for_source.return_value = [ + (sa, ase) + ] + u._spec.sources.files.return_value = [lp] + + with pytest.raises(RuntimeError): + u._process_upload_descriptor = mock.MagicMock() + u._process_upload_descriptor.side_effect = RuntimeError() + u._run() + + +def test_start(): + u = ops.Uploader(mock.MagicMock(), mock.MagicMock(), mock.MagicMock()) + u._wait_for_transfer_threads = mock.MagicMock() + u._wait_for_disk_threads = mock.MagicMock() + u._md5_offload = mock.MagicMock() + u._md5_offload.finalize_processes = mock.MagicMock() + u._crypto_offload = mock.MagicMock() + u._crypto_offload.finalize_processes = mock.MagicMock() + u._resume = mock.MagicMock() + u._run = mock.MagicMock() + + # test keyboard interrupt + u._run.side_effect = KeyboardInterrupt() + u.start() + + assert u._run.call_count == 1 + assert u._wait_for_transfer_threads.call_count == 1 + assert u._wait_for_disk_threads.call_count == 1 + assert u._md5_offload.finalize_processes.call_count == 1 + assert u._crypto_offload.finalize_processes.call_count == 1 + assert u._resume.close.call_count == 1 + + # test other exception + u._run.side_effect = RuntimeError() + with pytest.raises(RuntimeError): + u.start() diff --git a/tests/test_blobxfer_util.py b/tests/test_blobxfer_util.py index e294a0e..92fdacf 100644 --- a/tests/test_blobxfer_util.py +++ b/tests/test_blobxfer_util.py @@ -2,15 +2,13 @@ """Tests for util""" # stdlib imports +import datetime try: import unittest.mock as mock except ImportError: # noqa import mock -try: - import pathlib2 as pathlib -except ImportError: # noqa - import pathlib import sys +import time # non-stdlib imports import pytest # module under test @@ -60,6 +58,21 @@ def test_is_not_empty(): assert blobxfer.util.is_not_empty(a) +def test_join_thread(): + with mock.patch('blobxfer.util.on_python2', return_value=True): + thr = mock.MagicMock() + thr.isAlive.side_effect = [True, False] + blobxfer.util.join_thread(thr) + assert thr.isAlive.call_count == 2 + + with mock.patch('blobxfer.util.on_python2', return_value=False): + thr = mock.MagicMock() + thr.isAlive.side_effect = [True, False] + blobxfer.util.join_thread(thr) + thr.join.assert_called_once_with() + thr.isAlive.assert_not_called() + + def test_merge_dict(): with pytest.raises(ValueError): blobxfer.util.merge_dict(1, 2) @@ -82,6 +95,17 @@ def test_merge_dict(): assert b['a_and_b'] == 46 +def test_datetime_now(): + a = blobxfer.util.datetime_now() + assert type(a) == datetime.datetime + + +def test_datetime_from_timestamp(): + ts = time.time() + a = blobxfer.util.datetime_from_timestamp(ts) + assert type(a) == datetime.datetime + + def test_scantree(tmpdir): tmpdir.mkdir('abc') abcpath = tmpdir.join('abc') @@ -98,38 +122,6 @@ def test_scantree(tmpdir): assert len(found) == 2 -def test_replace_file(tmpdir): - src = pathlib.Path(str(tmpdir.join('src'))) - dst = pathlib.Path(str(tmpdir.join('dst'))) - src.touch() - dst.touch() - - replace_avail = sys.version_info >= (3, 3) - - with mock.patch( - 'sys.version_info', - new_callable=mock.PropertyMock(return_value=(3, 2, 0))): - blobxfer.util.replace_file(src, dst) - assert not src.exists() - assert dst.exists() - - dst.unlink() - src.touch() - dst.touch() - - with mock.patch( - 'sys.version_info', - new_callable=mock.PropertyMock(return_value=(3, 3, 0))): - if replace_avail: - blobxfer.util.replace_file(src, dst) - assert not src.exists() - assert dst.exists() - else: - src = mock.MagicMock() - blobxfer.util.replace_file(src, dst) - assert src.replace.call_count == 1 - - def test_get_mime_type(): a = 'b.txt' mt = blobxfer.util.get_mime_type(a) @@ -150,6 +142,10 @@ def test_base64_encode_as_string(): assert a == dec +def test_new_md5_hasher(): + assert blobxfer.util.new_md5_hasher() is not None + + def test_page_align_content_length(): assert 0 == blobxfer.util.page_align_content_length(0) assert 512 == blobxfer.util.page_align_content_length(1)