Skip to content

Commit

Permalink
Merge pull request #29 from mschmidt87/enh/subdir_keys_for_blackwhite…
Browse files Browse the repository at this point in the history
…list

Implement option to specify keys of subdirectories in black/whitelist
  • Loading branch information
jakobj committed Apr 17, 2018
2 parents 6dd8232 + 7906a20 commit c762438
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 28 deletions.
103 changes: 75 additions & 28 deletions dicthash/dicthash.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _save_convert_float_to_int(x):
return int(x * FLOAT_FACTOR)


def _unpack_value(value, prefix=''):
def _unpack_value(value, prefix='', whitelist=None, blacklist=None):
"""
Unpack values from a data structure and convert to string. Call
the corresponding functions for dict or iterables or use simple
Expand All @@ -63,28 +63,25 @@ def _unpack_value(value, prefix=''):
Prefix to preprend to resulting string. Defaults to empty
string.
"""

try:
return _generate_string_from_dict(value,
blacklist=None,
whitelist=None,
prefix=prefix)
blacklist=blacklist,
whitelist=whitelist,
prefix=prefix + 'd')
except AttributeError:
# not a dict
try:
return _generate_string_from_dict(value, blacklist=None, whitelist=None, prefix=prefix + 'd')
except AttributeError:
# not a dict
try:
return prefix + _generate_string_from_iterable(value, prefix='i')
except TypeError:
# not an iterable
if isinstance(value, float):
return prefix + str(_save_convert_float_to_int(value))
else:
return prefix + str(value)


def _generate_string_from_iterable(l):
return prefix + _generate_string_from_iterable(value, prefix='i')
except TypeError:
# not an iterable
if isinstance(value, float):
return prefix + str(_save_convert_float_to_int(value))
else:
return prefix + str(value)


def _generate_string_from_iterable(l, prefix=''):
"""
Convert an iterable to a string, by extracting every value. Takes
care of proper handling of floats to avoid rounding errors.
Expand Down Expand Up @@ -121,13 +118,15 @@ def _generate_string_from_dict(d, blacklist, whitelist, prefix=''):
List of keys to include in conversion.
"""
if whitelist is None:
whitelist = d.keys()
whitelist = list(d.keys())
if blacklist is not None:
whitelist = (key for key in whitelist if key not in blacklist)

whitelist = set(whitelist).difference(blacklist)
# Sort whitelist according to the keys converted to str
return ''.join(_unpack_value(d[key], prefix=prefix + str(key)) for
key in sorted(whitelist, key=str))
return ''.join(_unpack_value(d[key],
whitelist=filter_blackwhitelist(whitelist, key),
blacklist=filter_blackwhitelist(blacklist, key),
prefix=prefix + str(key)) for
key in sorted(filter_blackwhitelist(whitelist, None), key=str))


def generate_hash_from_dict(d, blacklist=None, whitelist=None,
Expand All @@ -152,10 +151,14 @@ def generate_hash_from_dict(d, blacklist=None, whitelist=None,
Dictionary to compute the hash from.
blacklist : list, optional
List of keys which *are not* used for generating the hash.
Blacklist overrules whitelist, i.e., keys appearing in the
blacklist will definitely not be used.
Keys of subdirectories can be provided by specifying
the full path of keys in a tuple.
whitelist : list, optional
List of keys which *are* used for generating the hash.
Keys of subdirectories can be provided by specifying
the full path of keys in a tuple.
Blacklist overrules whitelist, i.e., keys appearing in the
blacklist will definitely not be used.
raw : bool, optional
if True, return the unhashed string.
Expand Down Expand Up @@ -200,6 +203,50 @@ def validate_blackwhitelist(d, l):
Blacklist or whitelist to validate.
"""
for key in l:
if key not in d:
raise KeyError('Key "{key}" not found in dictionary.'
' Invalid black/whitelist.'.format(key=key))
if isinstance(key, tuple):
k = key[0]
else:
k = key
if k not in d:
raise KeyError('Key "{key}" not found in dictionary. '
'Invalid black/whitelist.'.format(key=key))
if isinstance(key, tuple) and len(key) > 1:
validate_blackwhitelist(d[key[0]], [key[1:]])


def filter_blackwhitelist(l, key):
"""
Filter black/whitelist for the keys that belong to the
subdirectory which is embedded into the nested dictionary
structure with the given key.
Three different cases:
- if l is None, then return none
- if key is None, then we are at the top-level dictionary, thus
include all scalar keys and the first element of tuples.
- if key is not None, then return only the keys that are tuples
where the first element of the tuple matches the given key
Parameters
----------
l : list
Black- or whitelist to filter
key : scalar variable or None
Key to filter for. See above for the behavior if key is None
"""
if l is None:
return None
else:
fl = []
for k in l:
if isinstance(k, tuple):
if key is not None and k[0] == key:
fl.append(k[1])
elif key is None:
fl.append(k[0])
elif key is None:
fl.append(k)
if len(fl) == 0:
return None
else:
return fl
25 changes: 25 additions & 0 deletions dicthash/test/test_dicthash.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,31 @@ def test_store_and_rehash_h5py():
assert(hash0 == hash1)


def test_subdir_keys_for_whitelist_blacklist():
d0 = {'a': {'b': 1,
'c': 2},
'd': 1}
d1 = {'a': {'b': 1,
'c': 3},
'd': 1}

label0 = dicthash.generate_hash_from_dict(d0, blacklist=[('a', 'c')])
label1 = dicthash.generate_hash_from_dict(d1, blacklist=[('a', 'c')])
assert(label0 == label1)

label0 = dicthash.generate_hash_from_dict(d0, blacklist=[('a', 'b')])
label1 = dicthash.generate_hash_from_dict(d1, blacklist=[('a', 'b')])
assert(label0 != label1)

label0 = dicthash.generate_hash_from_dict(d0, whitelist=[('a', 'c')])
label1 = dicthash.generate_hash_from_dict(d1, whitelist=[('a', 'c')])
assert(label0 != label1)

label0 = dicthash.generate_hash_from_dict(d0, whitelist=[('a', 'b')])
label1 = dicthash.generate_hash_from_dict(d1, whitelist=[('a', 'b')])
assert(label0 == label1)


def test_dict_list_lead_to_different_hash():
d0 = {
'a': ['b', 5],
Expand Down

0 comments on commit c762438

Please sign in to comment.