Skip to content

Commit

Permalink
A prototype of storing backup data to a tar archive
Browse files Browse the repository at this point in the history
  • Loading branch information
KonishchevDmitry committed Oct 27, 2012
1 parent ab32a4f commit 785ec01
Show file tree
Hide file tree
Showing 6 changed files with 367 additions and 142 deletions.
6 changes: 4 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import print_function

import sys
import logging

from pyvsb.backup import Backuper
from pyvsb.backuper import Backuper

class OutputHandler(logging.Handler):
"""
Expand All @@ -18,7 +20,7 @@ def emit(self, record):

try:
stream = sys.stdout if record.levelno <= logging.INFO else sys.stderr
print >> stream, self.format(record).encode("utf-8")
print(self.format(record), file = stream)
except:
self.handleError(record)
finally:
Expand Down
248 changes: 168 additions & 80 deletions pyvsb/backup.py
Original file line number Diff line number Diff line change
@@ -1,107 +1,195 @@
import bz2
import errno
import grp
import logging
import mmap
import os
import pwd
import shutil
import stat
import tarfile

from hashlib import sha1

import psys
from psys import eintr_retry

from .core import Error
from .storage import Storage
from .core import Error, LogicalError

LOG = logging.getLogger(__name__)

# TODO
class FileTypeChangedError(Exception):
pass

# TODO
class Backuper:
def __init__(self):
self.__items = [ "tests/test_root/etc", "tests/test_root/home" ]
self.__open_flags = os.O_RDONLY | os.O_NOFOLLOW | os.O_NOATIME
self.__storage = Storage()
class Backup:
"""Represents a single backup."""

def backup(self):
for item in self.__items:
self.__backup(item, toplevel = True)
MODE_READ = "read"
"""Reading mode."""

MODE_WRITE = "write"
"""Writing mode."""

def __backup(self, path, toplevel = False):
"""Backups the specified path."""

LOG.debug("Backing up '%s'...", path)
STATE_OPENED = "opened"
"""Opened backup object state."""

try:
stat_info = os.lstat(path)
STATE_COMMITTED = "committed"
"""Committed backup state."""

if stat.S_ISREG(stat_info.st_mode):
self.__backup_file(path)
else:
if stat.S_ISLNK(stat_info.st_mode):
try:
link_target = os.readlink(path)
except EnvironmentError as e:
if e.errno == errno.EINVAL:
raise FileTypeChangedError()
else:
raise
else:
link_target = ""

self.__storage.add(path, stat_info, link_target = link_target)

if stat.S_ISDIR(stat_info.st_mode):
for filename in os.listdir(path):
self.__backup(os.path.join(path, filename))
except FileTypeChangedError as e:
LOG.error("Failed to backup %s: it has suddenly changed its type during the backup.", path)
except Exception as e:
if (
isinstance(e, EnvironmentError) and
e.errno in ( errno.ENOENT, errno.ENOTDIR ) and not toplevel
):
LOG.warning("Failed to backup %s: it has suddenly vanished.", path)
else:
LOG.error("Failed to backup %s: %s.", path, psys.e(e))
STATE_CLOSED = "closed"
"""Closed backup object state."""


def __backup_file(self, path):
"""Backups the specified file."""
def __init__(self, domain_path, name, mode):
# Backup name
self.__name = name

# Backup domain path
self.__domain_path = domain_path

# Current backup path
self.__path = os.path.join(domain_path, "." + name)

# Maps file hashes to their paths
self.__files = {}

# Current object state
self.__state = self.STATE_OPENED


LOG.debug("Opening backup %s/%s in %s mode...",
domain_path, name, mode)

if mode == self.MODE_READ:
raise Exception("TODO")
elif mode == self.MODE_WRITE:
#try:
# domains = sorted(
# domain for domain in os.listdir(self.__config["backup_root"])
# if not domain.startswith("."))
#except EnvironmentError as e:
# raise Error("Error while reading backup directory '{}': {}.",
# self.__config["backup_root"], psys.e(e))

try:
try:
fd = eintr_retry(os.open)(path, self.__open_flags)
except EnvironmentError as e:
# If O_NOATIME flag was specified, but the effective user ID
# of the caller did not match the owner of the file and the
# caller was not privileged (CAP_FOWNER), the EPERM will be
# returned.
if e.errno == errno.EPERM and self.__open_flags & os.O_NOATIME:
# Just disable this flag on a first EPERM error
LOG.error("Got EPERM error. Disabling O_NOATIME for file opening operations...") # TODO: debug
self.__open_flags &= ~os.O_NOATIME
fd = eintr_retry(os.open)(path, self.__open_flags)
else:
raise
except EnvironmentError as e:
# When O_NOFOLLOW is specified, indicates that this is a
# symbolic link.
if e.errno == errno.ELOOP:
raise FileTypeChangedError()
else:
os.mkdir(self.__path)
except Exception as e:
raise Error("Unable to create a backup directory '{}': {}.",
self.__path, psys.e(e))

try:
data_path = os.path.join(self.__path, "data")

try:
self.__data = tarfile.open(data_path, "w")
except Exception as e:
raise Error("Unable to create a backup storage tar archive '{}': {}.",
data_path, psys.e(e))
except:
self.close()
raise
else:
raise LogicalError()

try:
file_obj = os.fdopen(fd, "rb")
except:

def add_file(self, path, stat_info, link_target, file_obj):
"""Adds a file to the storage."""

# Limitation due to using text files for metadata
if "\n" in path or "\r" in path:
raise Error(r"File names with '\r' and '\n' aren't supported")

if file_obj is not None:
file_hash = sha1()

while True:
data = file_obj.read(psys.BUFSIZE)
if not data:
break

file_hash.update(data)

file_hash = file_hash.hexdigest()
file_obj.seek(0)

copy_path = self.__files.get(file_hash)
if copy_path is not None:
LOG.debug("Found a copy of '%s' in this backup: '%s'.", path, copy_path)
link_target = file_hash
else:
self.__files[file_hash] = path

tar_info = _get_tar_info(path, stat_info, link_target)
self.__data.addfile(tar_info, fileobj = file_obj)


# TODO
def close(self):
if self.__state != self.STATE_CLOSED:
try:
eintr_retry(os.close)(fd)
except Exception as e:
LOG.error("Unable to close a file: %s.", psys.e(e))
if self.__state != self.STATE_COMMITTED:
shutil.rmtree(self.__path)
finally:
self.__state = self.STATE_CLOSED


raise
def commit(self):
"""Commits the changes."""

with file_obj:
stat_info = os.fstat(file_obj.fileno())
self.__storage.add(path, stat_info, file_obj = file_obj)
if self.__state != self.STATE_OPENED:
raise Error("Invalid backup state.")

try:
self.__state = self.STATE_COMMITTED
finally:
self.close()



def _get_tar_info(path, stat_info, link_target):
"""Returns a TarInfo object for the specified file."""

tar_info = tarfile.TarInfo()
stat_mode = stat_info.st_mode

if stat.S_ISREG(stat_mode):
tar_info.type = tarfile.LNKTYPE if link_target else tarfile.REGTYPE
elif stat.S_ISDIR(stat_mode):
tar_info.type = tarfile.DIRTYPE
elif stat.S_ISLNK(stat_mode):
tar_info.type = tarfile.SYMTYPE
elif stat.S_ISFIFO(stat_mode):
tar_info.type = tarfile.FIFOTYPE
elif stat.S_ISCHR(stat_mode):
tar_info.type = tarfile.CHRTYPE
elif stat.S_ISBLK(stat_mode):
tar_info.type = tarfile.BLKTYPE
else:
raise Exception("File type is not supported")

tar_info.name = path.lstrip("/")
tar_info.mode = stat_mode
tar_info.uid = stat_info.st_uid
tar_info.gid = stat_info.st_gid
tar_info.mtime = stat_info.st_mtime
tar_info.linkname = link_target

if tar_info.type == tarfile.REGTYPE:
tar_info.size = stat_info.st_size
else:
tar_info.size = 0

try:
tar_info.uname = pwd.getpwuid(stat_info.st_uid)[0]
except KeyError:
pass

try:
tar_info.gname = grp.getgrgid(stat_info.st_gid)[0]
except KeyError:
pass

if tar_info.type in ( tarfile.CHRTYPE, tarfile.BLKTYPE ):
tar_info.devmajor = os.major(stat_info.st_rdev)
tar_info.devminor = os.minor(stat_info.st_rdev)

return tar_info
Loading

0 comments on commit 785ec01

Please sign in to comment.