Skip to content

Commit

Permalink
cleanup msgpack related str/bytes mess, see borgbackup#968
Browse files Browse the repository at this point in the history
see ticket and borg.helpers.msgpack docstring.
  • Loading branch information
ThomasWaldmann committed May 5, 2022
1 parent 69d548a commit 47304cd
Show file tree
Hide file tree
Showing 9 changed files with 239 additions and 131 deletions.
12 changes: 6 additions & 6 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1550,7 +1550,7 @@ class RobustUnpacker:
"""
def __init__(self, validator, item_keys):
super().__init__()
self.item_keys = [msgpack.packb(name.encode()) for name in item_keys]
self.item_keys = [msgpack.packb(name) for name in item_keys]
self.validator = validator
self._buffered_data = []
self._resync = False
Expand Down Expand Up @@ -1769,7 +1769,7 @@ def valid_archive(obj):
# lost manifest on a older borg version than the most recent one that was ever used
# within this repository (assuming that newer borg versions support more item keys).
manifest = Manifest(self.key, self.repository)
archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS]
archive_keys_serialized = [msgpack.packb(name) for name in ARCHIVE_KEYS]
pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Rebuilding manifest %6.2f%%", step=0.01,
msgid='check.rebuild_manifest')
for chunk_id, _ in self.chunks.iteritems():
Expand Down Expand Up @@ -1916,9 +1916,9 @@ def robust_iterator(archive):
Missing item chunks will be skipped and the msgpack stream will be restarted
"""
item_keys = frozenset(key.encode() for key in self.manifest.item_keys)
required_item_keys = frozenset(key.encode() for key in REQUIRED_ITEM_KEYS)
unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and b'path' in item,
item_keys = self.manifest.item_keys
required_item_keys = REQUIRED_ITEM_KEYS
unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and 'path' in item,
self.manifest.item_keys)
_state = 0

Expand All @@ -1943,7 +1943,7 @@ def valid_item(obj):
# A bug in Attic up to and including release 0.13 added a (meaningless) b'acl' key to every item.
# We ignore it here, should it exist. See test_attic013_acl_bug for details.
obj.pop(b'acl', None)
keys = set(obj)
keys = set(k.decode('utf-8', errors='replace') for k in obj)
if not required_item_keys.issubset(keys):
return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys)
if not keys.issubset(item_keys):
Expand Down
10 changes: 5 additions & 5 deletions src/borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1938,12 +1938,12 @@ def do_upgrade(self, args, repository, manifest=None, key=None):
print('This repository is not encrypted, cannot enable TAM.')
return EXIT_ERROR

if not manifest.tam_verified or not manifest.config.get(b'tam_required', False):
if not manifest.tam_verified or not manifest.config.get('tam_required', False):
# The standard archive listing doesn't include the archive ID like in borg 1.1.x
print('Manifest contents:')
for archive_info in manifest.archives.list(sort_by=['ts']):
print(format_archive(archive_info), '[%s]' % bin_to_hex(archive_info.id))
manifest.config[b'tam_required'] = True
manifest.config['tam_required'] = True
manifest.write()
repository.commit(compact=False)
if not key.tam_required:
Expand All @@ -1966,7 +1966,7 @@ def do_upgrade(self, args, repository, manifest=None, key=None):
print('Key updated')
if hasattr(key, 'find_key'):
print('Key location:', key.find_key())
manifest.config[b'tam_required'] = False
manifest.config['tam_required'] = False
manifest.write()
repository.commit(compact=False)
else:
Expand Down Expand Up @@ -2299,7 +2299,7 @@ def do_debug_dump_archive(self, args, repository, manifest, key):
"""dump decoded archive metadata (not: data)"""

try:
archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)]
archive_meta_orig = manifest.archives.get_raw_dict()[args.location.archive]
except KeyError:
raise Archive.DoesNotExist(args.location.archive)

Expand All @@ -2316,7 +2316,7 @@ def output(fd):
fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
fd.write(',\n')

data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id']))
data = key.decrypt(archive_meta_orig['id'], repository.get(archive_meta_orig['id']))
archive_org_dict = msgpack.unpackb(data, object_hook=StableDict)

fd.write(' "_meta":\n')
Expand Down
36 changes: 20 additions & 16 deletions src/borg/cache_sync/unpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -384,19 +384,11 @@ static inline int unpack_callback_map_end(unpack_user* u)

static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int length)
{
/* raw = what Borg uses for binary stuff and strings as well */
/* raw = what Borg uses for text stuff */
/* Note: p points to an internal buffer which contains l bytes. */
(void)b;

switch(u->expect) {
case expect_key:
if(length != 32) {
SET_LAST_ERROR("Incorrect key length");
return -1;
}
memcpy(u->current.key, p, 32);
u->expect = expect_size;
break;
case expect_map_key:
if(length == 6 && !memcmp("chunks", p, 6)) {
u->expect = expect_chunks_begin;
Expand All @@ -409,19 +401,31 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
u->expect = expect_map_item_end;
}
break;
default:
if(u->inside_chunks) {
SET_LAST_ERROR("Unexpected bytes in chunks structure");
return -1;
}
}
return 0;
}

static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int length)
{
(void)u; (void)b; (void)p; (void)length;
UNEXPECTED("bin");
/* bin = what Borg uses for binary stuff */
/* Note: p points to an internal buffer which contains l bytes. */
(void)b;

switch(u->expect) {
case expect_key:
if(length != 32) {
SET_LAST_ERROR("Incorrect key length");
return -1;
}
memcpy(u->current.key, p, 32);
u->expect = expect_size;
break;
default:
if(u->inside_chunks) {
SET_LAST_ERROR("Unexpected bytes in chunks structure");
return -1;
}
}
return 0;
}

Expand Down
46 changes: 21 additions & 25 deletions src/borg/helpers/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
logger = create_logger()

from .datastruct import StableDict
from .parseformat import bin_to_hex, safe_encode, safe_decode
from .parseformat import bin_to_hex
from .time import parse_timestamp
from .. import shellpattern
from ..constants import * # NOQA
Expand All @@ -39,39 +39,35 @@ class Archives(abc.MutableMapping):
str timestamps or datetime timestamps.
"""
def __init__(self):
# key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts)
# key: str archive name, value: dict('id': bytes_id, 'time': str_iso_ts)
self._archives = {}

def __len__(self):
return len(self._archives)

def __iter__(self):
return iter(safe_decode(name) for name in self._archives)
return iter(self._archives)

def __getitem__(self, name):
assert isinstance(name, str)
_name = safe_encode(name)
values = self._archives.get(_name)
values = self._archives.get(name)
if values is None:
raise KeyError
ts = parse_timestamp(values[b'time'].decode())
return ArchiveInfo(name=name, id=values[b'id'], ts=ts)
ts = parse_timestamp(values['time'])
return ArchiveInfo(name=name, id=values['id'], ts=ts)

def __setitem__(self, name, info):
assert isinstance(name, str)
name = safe_encode(name)
assert isinstance(info, tuple)
id, ts = info
assert isinstance(id, bytes)
if isinstance(ts, datetime):
ts = ts.replace(tzinfo=None).strftime(ISO_FORMAT)
assert isinstance(ts, str)
ts = ts.encode()
self._archives[name] = {b'id': id, b'time': ts}
self._archives[name] = {'id': id, 'time': ts}

def __delitem__(self, name):
assert isinstance(name, str)
name = safe_encode(name)
del self._archives[name]

def list(self, *, glob=None, match_end=r'\Z', sort_by=(), consider_checkpoints=True, first=None, last=None, reverse=False):
Expand Down Expand Up @@ -116,8 +112,8 @@ def list_considering(self, args):
def set_raw_dict(self, d):
"""set the dict we get from the msgpack unpacker"""
for k, v in d.items():
assert isinstance(k, bytes)
assert isinstance(v, dict) and b'id' in v and b'time' in v
assert isinstance(k, str)
assert isinstance(v, dict) and 'id' in v and 'time' in v
self._archives[k] = v

def get_raw_dict(self):
Expand Down Expand Up @@ -196,10 +192,10 @@ def load(cls, repository, operations, key=None, force_tam_not_required=False):
manifest.timestamp = m.get('timestamp')
manifest.config = m.config
# valid item keys are whatever is known in the repo or every key we know
manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get('item_keys', []))
manifest.item_keys = ITEM_KEYS | frozenset(m.get('item_keys', []))

if manifest.tam_verified:
manifest_required = manifest.config.get(b'tam_required', False)
manifest_required = manifest.config.get('tam_required', False)
security_required = tam_required(repository)
if manifest_required and not security_required:
logger.debug('Manifest is TAM verified and says TAM is required, updating security database...')
Expand All @@ -214,32 +210,32 @@ def load(cls, repository, operations, key=None, force_tam_not_required=False):
def check_repository_compatibility(self, operations):
for operation in operations:
assert isinstance(operation, self.Operation)
feature_flags = self.config.get(b'feature_flags', None)
feature_flags = self.config.get('feature_flags', None)
if feature_flags is None:
return
if operation.value.encode() not in feature_flags:
if operation.value not in feature_flags:
continue
requirements = feature_flags[operation.value.encode()]
if b'mandatory' in requirements:
unsupported = set(requirements[b'mandatory']) - self.SUPPORTED_REPO_FEATURES
requirements = feature_flags[operation.value]
if 'mandatory' in requirements:
unsupported = set(requirements['mandatory']) - self.SUPPORTED_REPO_FEATURES
if unsupported:
raise MandatoryFeatureUnsupported([f.decode() for f in unsupported])
raise MandatoryFeatureUnsupported(list(unsupported))

def get_all_mandatory_features(self):
result = {}
feature_flags = self.config.get(b'feature_flags', None)
feature_flags = self.config.get('feature_flags', None)
if feature_flags is None:
return result

for operation, requirements in feature_flags.items():
if b'mandatory' in requirements:
result[operation.decode()] = {feature.decode() for feature in requirements[b'mandatory']}
if 'mandatory' in requirements:
result[operation] = set(requirements['mandatory'])
return result

def write(self):
from ..item import ManifestItem
if self.key.tam_required:
self.config[b'tam_required'] = True
self.config['tam_required'] = True
# self.timestamp needs to be strictly monotonically increasing. Clocks often are not set correctly
if self.timestamp is None:
self.timestamp = datetime.utcnow().strftime(ISO_FORMAT)
Expand Down

0 comments on commit 47304cd

Please sign in to comment.