Skip to content
Browse files

Decompress backup data files on restoring to speed up restore process

  • Loading branch information...
1 parent 2282ad2 commit 0ce67ed957a6faef529216c0e608097aa8a3ef31 @KonishchevDmitry committed Dec 28, 2012
Showing with 121 additions and 41 deletions.
  1. +9 −4 pyvsb/backup.py
  2. +30 −9 pyvsb/main.py
  3. +79 −26 pyvsb/utils.py
  4. +3 −2 tests/test.py
View
13 pyvsb/backup.py
@@ -322,7 +322,7 @@ def __write_file_metadata(self, path, file_hash, fingerprint, extern):
class Restore:
"""Controls backup restoring."""
- def __init__(self, backup_path, restore_path = None):
+ def __init__(self, backup_path, restore_path = None, in_place = False):
# Backup name
self.__name = None
@@ -335,6 +335,9 @@ def __init__(self, backup_path, restore_path = None):
# Restore path
self.__restore_path = restore_path
+ # Don't use extra disc space by decompressing backup files
+ self.__in_place = in_place
+
# Current object state
self.__state = _STATE_OPENED
@@ -361,7 +364,8 @@ def __init__(self, backup_path, restore_path = None):
try:
self.__data = utils.CompressedTarFile(
- os.path.join(backup_path, _DATA_FILE_NAME))
+ os.path.join(backup_path, _DATA_FILE_NAME),
+ decompress = not self.__in_place)
except Exception as e:
raise Error("Unable to open data of '{}' backup: {}.",
backup_path, psys.e(e))
@@ -546,8 +550,9 @@ def __load_backup_data(self, name, hashes, paths):
if name == self.__name:
data = self.__data
else:
- data = utils.CompressedTarFile(os.path.join(
- backup_path, _DATA_FILE_NAME))
+ data = utils.CompressedTarFile(
+ os.path.join(backup_path, _DATA_FILE_NAME),
+ decompress = not self.__in_place)
for tar_info in data:
hash = paths.get("/" + tar_info.name)
View
39 pyvsb/main.py
@@ -40,27 +40,48 @@ def emit(self, record):
def main():
"""The script's main function."""
- parser = argparse.ArgumentParser(
- description = "pyvsb - A very simple in configuring but powerful backup tool")
+ parser = argparse.ArgumentParser(add_help = False,
+ description = "A very simple in configuring but powerful backup tool")
- parser.add_argument("-c", "--config", metavar = "CONFIG_PATH", type = str,
+
+ group = parser.add_argument_group("Backup")
+
+ group.add_argument("-c", "--config", metavar = "CONFIG_PATH", type = str,
default = os.path.expanduser("~/.pyvsb.conf"),
help = "configuration file path (default is ~/.pyvsb.conf)")
- parser.add_argument("-r", "--restore", metavar = "BACKUP_PATH",
+
+ group = parser.add_argument_group("Restore")
+
+ group.add_argument("-r", "--restore", metavar = "BACKUP_PATH",
default = None, help = "restore the specified backup")
- parser.add_argument("paths_to_restore", nargs = "*",
+ group.add_argument("-i", "--in-place", action = "store_true",
+ help = "don't use extra disc space by decompressing backup files "
+ "(this option significantly slows down restore process)")
+
+ group.add_argument("paths_to_restore", nargs = "*",
metavar = "PATH_TO_RESTORE", help = "Path to restore (default is /)")
- parser.add_argument("-d", "--debug", action = "store_true",
- help = "turn on debug messages")
- parser.add_argument("--cron", action = "store_true",
+ group = parser.add_argument_group("Optional arguments")
+
+ group.add_argument("--cron", action = "store_true",
help = "show only warning and error messages (intended to be used from cron)")
+ group.add_argument("-d", "--debug", action = "store_true",
+ help = "turn on debug messages")
+
+ group.add_argument("-h", "--help", action = "store_true",
+ help = "show this help message and exit")
+
+
args = parser.parse_args()
+ if args.help:
+ parser.print_help()
+ sys.exit(os.EX_OK)
+
if args.restore is None and args.paths_to_restore:
parser.print_help()
sys.exit(os.EX_USAGE)
@@ -86,7 +107,7 @@ def main():
try:
paths_to_restore = [ os.path.abspath(path) for path in args.paths_to_restore ]
- with Restore(os.path.abspath(args.restore)) as restorer:
+ with Restore(os.path.abspath(args.restore), in_place = args.in_place) as restorer:
success = restorer.restore(paths_to_restore or None)
except Exception as e:
raise Error("Restore failed: {}", e)
View
105 pyvsb/utils.py
@@ -4,11 +4,19 @@
import errno
import grp
import gzip
+import logging
import pwd
+import shutil
import tarfile
+import tempfile
from hashlib import sha1
+import psys
+
+LOG = logging.getLogger(__name__)
+
+
_DB_ENTRIES_CACHE = {}
"""A DB entries cache."""
@@ -18,14 +26,14 @@ class CompressedTarFile:
__formats = {
"bz2": {
- "extension": ".bz2",
- "mode": ":bz2",
- "class": bz2.BZ2File,
+ "extension": ".bz2",
+ "mode": ":bz2",
+ "decompressor": bz2.BZ2File,
},
"gz": {
- "extension": ".gz",
- "mode": ":gz",
- "class": gzip.GzipFile,
+ "extension": ".gz",
+ "mode": ":gz",
+ "decompressor": gzip.GzipFile,
},
"none": {
"extension": "",
@@ -35,29 +43,41 @@ class CompressedTarFile:
"""Available file formats."""
+ __file = None
+ """Opened tar file."""
+
+ __temp_file = None
+ """A temporary file."""
+
+
def __init__(self, path, write = None, decompress = True):
- if write is None:
- for file_format in self.__formats.values():
- try:
-# if decompress and "class" in file_format:
-# with file_format["class"](path + file_format["extension"]):
-# # TODO FIXME
-# self.__file = tarfile.open()
-# else:
- self.__file = tarfile.open(
- path + file_format["extension"], "r" + file_format["mode"])
- except EnvironmentError as error:
- if error.errno != errno.ENOENT:
- raise
+ try:
+ if write is None:
+ for file_format in self.__formats.values():
+ cur_path = path + file_format["extension"]
+
+ try:
+ if decompress and "decompressor" in file_format:
+ with file_format["decompressor"](cur_path) as compressed_file:
+ self.__decompress(cur_path, compressed_file)
+
+ if self.__file is None:
+ self.__file = tarfile.open(cur_path, "r" + file_format["mode"])
+ except EnvironmentError as error:
+ if error.errno != errno.ENOENT:
+ raise
+ else:
+ break
else:
- break
+ raise error
else:
- raise error
- else:
- file_format = self.__formats[write]
+ file_format = self.__formats[write]
- self.__file = tarfile.open(
- path + file_format["extension"], "w" + file_format["mode"])
+ self.__file = tarfile.open(
+ path + file_format["extension"], "w" + file_format["mode"])
+ except:
+ self.close()
+ raise
def __getattr__(self, attr):
@@ -71,7 +91,40 @@ def __iter__(self):
def close(self):
"""Closes the file."""
- self.__file.close()
+ try:
+ if self.__file is not None:
+ self.__file.close()
+ finally:
+ if self.__temp_file is not None:
+ self.__temp_file.close()
+
+
+ def __decompress(self, path, compressed_file):
+ """Decompresses a compressed tar archive."""
+
+ LOG.debug("Decompressing '%s'...", path)
+
+ try:
+ self.__temp_file = tempfile.NamedTemporaryFile(dir = "/var/tmp")
+ shutil.copyfileobj(compressed_file, self.__temp_file)
+ self.__temp_file.flush()
+ except BaseException as e:
+ if self.__temp_file is not None:
+ try:
+ self.__temp_file.close()
+ except Exception as e:
+ LOG.error("Failed to delete a temporary file '%s': %s.",
+ self.__temp_file.name, psys.e(e))
+ finally:
+ self.__temp_file = None
+
+ if not isinstance(e, Exception):
+ raise
+
+ LOG.error("Failed to decompress '%s': %s.", path, psys.e(e))
+ else:
+ LOG.debug("Decompressing finished.")
+ self.__file = tarfile.open(self.__temp_file.name)
View
5 tests/test.py
@@ -316,7 +316,8 @@ def test_complex(env, config):
shutil.rmtree(env["restore_path"])
-def test_compression(env):
+@pytest.mark.parametrize("in_place", ( True, False ))
+def test_compression(env, in_place):
source_trees = []
formats = ( "bz2", "gz", "none" )
@@ -344,7 +345,7 @@ def test_compression(env):
assert len(backups) == len(formats)
for backup, source_tree in zip(backups, source_trees):
- with Restore(backup, env["restore_path"]) as restorer:
+ with Restore(backup, env["restore_path"], in_place = in_place) as restorer:
assert restorer.restore()
assert source_tree == _hash_tree(env["restore_path"] + env["data_path"])

0 comments on commit 0ce67ed

Please sign in to comment.
Something went wrong with that request. Please try again.