In [None]:
import collections
import shelve
import sys
import tempfile


PY3 = (sys.version_info >= (3,0))

if PY3:
    _zip = zip
else:
    import itertools
    _zip = itertools.izip


__version__ = '0.4.2'

def flatkeys(d, sep="/"):
    """
    Flatten a dictionary: build a new dictionary from a given one where all
    non-dict values are left untouched but nested ``dict``s are recursively
    merged in the new one with their keys prefixed by their parent key.

    >>> flatkeys({1: 42, 'foo': 12})
    {1: 42, 'foo': 12}
    >>> flatkeys({1: 42, 'foo': 12, 'bar': {'qux': True}})
    {1: 42, 'foo': 12, 'bar.qux': True}
    >>> flatkeys({1: {2: {3: 4}}})
    {'1.2.3': 4}
    >>> flatkeys({1: {2: {3: 4}, 5: 6}})
    {'1.2.3': 4, '1.5': 6}
    
    v0.1.0 by bfontaine, MIT license
    """
    flat = {}
    dicts = [("", d)]

    while dicts:
        prefix, d = dicts.pop()
        for k, v in d.items():
            k_s = str(k)
            if isinstance(v, collections.Mapping):
                dicts.append(("%s%s%s" % (prefix, k_s, sep), v))
            else:
                k_ = prefix + k_s if prefix else k
                flat[k_] = v
    return flat


class fdict(dict):
    '''Flattened nested dict, all items are settable and gettable through ['item1']['item2'] standard form or ['item1/item2'] internal form.
    This allows to replace the internal dict with any on-disk storage system like a shelve's shelf (great for huge nested dicts that cannot fit into memory).
    Main limitation: an entry can be both a singleton and a nested fdict, and there is no way to tell what is what, no error will be shown, the singleton will always be returned.
    '''
    def __init__(self, d=None, rootpath='', delimiter='/', fastview=False, **kwargs):
        if d is not None:
            # Internal call, we get a subitem, we just create a new fdict with the same dictionary but with a restricted rootpath
            if rootpath:
                if isinstance(d, dict):
                    self.d = d
                else:
                    # sometimes (particularly extract(fullpath=True)) we get a list of tuples instead of a dict
                    self.d = dict(d)
            # Else it is not an internal call, the user supplied a dict to initialize the fdict, we have to flatten its keys
            elif isinstance(d, dict):
                self.d = flatkeys(d, sep=delimiter)
            # Else the user supplied another type of object, we try to convert to a dict and flatten it
            else:
                self.d = flatkeys(dict(d), sep=delimiter)
        else:
            self.d = dict()
        self.rootpath = rootpath
        self.delimiter = delimiter
        self.fastview = fastview
        self.kwargs = kwargs
        self._py3compat()

    def _py3compat(self):
        if PY3:
            # Py3
            self._viewkeys = self.d.keys
            self._viewvalues = self.d.values
            self._viewitems = self.d.items
        else:
            # Py2
            if getattr(self.d, 'viewvalues', None):
                # Py2.7
                self._viewkeys = self.d.viewkeys
                self._viewvalues = self.d.viewvalues
                self._viewitems = self.d.viewitems
            else:
                # Py2.6
                self._viewkeys = self.d.iterkeys
                self._viewvalues = self.d.itervalues
                self._viewitems = self.d.iteritems

    def _buildpath(self, key='', prepend=None):
        return (self.delimiter).join(filter(None, [prepend, self.rootpath, key]))

    @staticmethod
    def _get_all_parent_nodes(path, delimiter='/'):
        pos = path.rfind(delimiter)
        i = 0
        while pos != -1:
            yield path[:pos+1]
            pos = path.rfind(delimiter, 0, pos)

    def __getitem__(self, key):
        # Node or leaf?
        if key in self.d: # Leaf: return the value (leaf direct access test is why we do `in self.d` and not `in self`)
            return self.d.__getitem__(key)
        else: # Node: return a new full fdict based on the old one but with a different rootpath to limit the results by default (this is the magic that allows compatibility with the syntax d['item1']['item2'])
            return self.__class__(d=self.d, rootpath=self._buildpath(key), delimiter=self.delimiter, fastview=self.fastview, **self.kwargs)

    def __setitem__(self, key, value):
        #fullkey = self._buildpath(key)
        #if fullkey in self and :
        #    raise ValueError('Conflict detected: the following key is both a singleton and a nested dict: %s' % fullkey)

        fullkey = self._buildpath(key)
        if isinstance(value, dict):
            # if the value is a dict, flatten it recursively or drop if empty
            if not value:
                # User supplied an empty dict, the user wants to create a subdict, but it is not necessary here since nested dict are supported by default, just need to assign nested values
                return
            else:
                # else not empty dict
                # TODO: check and unit test this whole part
                if isinstance(value, self.__class__):
                    # If it is the same class as this, we merge
                    self.update(self.__class__({key: value}))
                else:
                    # If this is just a normal dict, we flatten it and merge
                    value = flatkeys({self._buildpath(prepend=key) : value}, sep=self.delimiter)
                    self.d.update(value)
        else:
            # if the value is not a dict, we just build the flattened key and store the value
            self.d.__setitem__(fullkey, value)

        # Create an empty entry for the parent element, so that we can quickly know if there are children for this key
        # Format is d['item1/'], with the ending delimiter
        #if self.delimiter in fullkey:
        #    self.d.__setitem__(fullkey[:fullkey.rfind(self.delimiter)+1], None)

        # Fastview mode: create additional entries for each parent at every depths of the current leaf
        if self.fastview:
            parents = self._get_all_parent_nodes(fullkey, self.delimiter)
            # TODO: converted nested (normal) dicts should also check paths below, not just up...
            # TODO: if leaf deleted, should also delete in parent. If parent empty, delete and delete from parent etc
            # TODO: if leaf replaced by node, should delete and replace in firstparent (add a '/')
            # TODO: if node replaced by leaf, should delete and replace in firstparent (delete the '/')

            # TODO: should we store in 'a/b/' or 'a/b'? The problem is with normal nested flattened dicts and on updating: if 'a/' points to 'a/b' but then 'a/b' gets reassigned to a dict, what should we do? Check on setitem the existence of key and delete from parent?
            # another way would be to store at 'a/b' and consider internal paths just like objects, by using a special string that automatically redirects (at least it can be a subclass of string so that we know it's a path and not just a string)

            # First parent stores the direct path to the leaf
            # Then we recursively add the path to the nested parent in all super parents.
            lastparent = fullkey
            for parent in parents:
                if parent in self.d:
                    # There is already a parent entry, we add to the set
                    self.d.__getitem__(parent).add(pathstr(lastparent))
                else:
                    # Else we create a set and add this child
                    self.d.__setitem__(parent, set([pathstr(lastparent)]))
                lastparent = parent

    def __delitem__(self, key):
        '''Delete an item in the internal dict, O(1) for any leaf, O(n) for a nested dict'''
        if key in self.d:
            # Key is a leaf, we can directly delete it
            return self.d.__delitem__(self._buildpath(key))
        else:
            # Else there is no direct match, but might be a nested dict, we have to walk through all the dict
            dirkey = self._buildpath(key)+self.delimiter
            keystodel = [k for k in self.viewkeys() if k.startswith(dirkey)]
            for k in keystodel:
                self.__delitem__(k)
            return

    def __contains__(self, key):
        '''Check existence of a key (or subkey) in the dictionary. O(1) for any leaf, O(n) at worst for nested dicts (eg, 'a' in d with d['a/b'] defined)'''
        if self.d.__contains__(self._buildpath(key)):
            # Key is a singleton, there is a direct match
            return True
        else:
            dirkey = self._buildpath(key)+self.delimiter
            #if self.d.__contains__(dirkey):
                # Key is a nested dict, but there is an entry to confirm this nested dict exists
            #    return True
            #else:
            # No key, we have to check all items
            for k in self.viewkeys():
                if k.startswith(dirkey):
                    return True
            return False

    def viewkeys(self, fullpath=False):
        if not self.rootpath:
            if not self.fastview:
                for k in self._viewkeys():
                    yield k
            else:
                for k in self._viewkeys():
                    if not k.endswith(self.delimiter):
                        yield k
        else:
            pattern = self.rootpath+self.delimiter
            lpattern = len(pattern) if not fullpath else 0 # return the shortened path or fullpath?
            if not self.fastview:
                for k in (k[lpattern:] for k in self._viewkeys() if k.startswith(pattern)):
                    yield k
            else:
                # Fastview mode
                if pattern in self.d:
                    children = set()
                    children.update(self.d.__getitem__(pattern).copy())
                    while children:
                        child = children.pop()
                        if child.endswith(self.delimiter):
                            # Node, append all the subchildren to the stack
                            children.update(self.d.__getitem__(child))
                        else:
                            # Leaf, return the key and value
                            yield child[lpattern:]

    def viewitems(self, fullpath=False):
        if not self.rootpath:
            # Return all items
            if not self.fastview:
                # No fastview, just return the internal dict's items
                for k,v in self._viewitems():
                    yield k,v
            else:
                # Fastview mode, filter out nodes (ie, keys ending with delimiter) to keep only leaves
                for k,v in self._viewitems():
                    if not k.endswith(self.delimiter):
                        yield k,v
        else:
            # Else with rootpath, filter items to keep only the ones below the rootpath level
            # Prepare the pattern (the rootpath + delimiter) to filter items keys
            pattern = self.rootpath+self.delimiter
            lpattern = len(pattern) if not fullpath else 0 # return the shortened path or fullpath?
            if not self.fastview:
                # No fastview, just walk through all items and filter out the ones that are not in the current rootpath
                for k,v in ((k[lpattern:], v) for k,v in self._viewitems() if k.startswith(pattern)):
                    yield k,v
            else:
                # Fastview mode, get the list of items directly from the current entry, and walk recursively all children to get down to the leaves
                if pattern in self.d:
                    children = set()
                    children.update(self.d.__getitem__(pattern))
                    while children:
                        child = children.pop()
                        if child.endswith(self.delimiter):
                            # Node, append all the subchildren to the stack
                            children.update(self.d.__getitem__(child))
                        else:
                            # Leaf, return the key and value
                            yield child[lpattern:], self.d.__getitem__(child)

    def viewvalues(self):
        if not self.rootpath:
            if not self.fastview:
                for v in self._viewvalues():
                    yield v
            else:
                for k,v in self._viewitems():
                    if not k.endswith(self.delimiter):
                        yield v
        else:
            pattern = self.rootpath+self.delimiter
            if not self.fastview:
                for v in (v for k,v in self._viewitems() if k.startswith(pattern)):
                    yield v
            else:
                # Fastview mode
                if pattern in self.d:
                    children = set()
                    children.update(self.d.__getitem__(pattern))
                    while children:
                        child = children.pop()
                        if child.endswith(self.delimiter):
                            # Node, append all the subchildren to the stack
                            children.update(self.d.__getitem__(child))
                        else:
                            # Leaf, return the key and value
                            yield self.d.__getitem__(child)

    iterkeys = viewkeys
    itervalues = viewvalues
    iteritems = viewitems
    if PY3:
        keys = viewkeys
        values = viewvalues
        items = viewitems
    else:
        def keys(self, *args, **kwargs):
            return list(self.viewkeys(*args, **kwargs))
        def values(self, *args, **kwargs):
            return list(self.viewvalues(*args, **kwargs))
        def items(self, *args, **kwargs):
            return list(self.viewitems(*args, **kwargs))

    def update(self, d2):
        if isinstance(d2, self.__class__):
            # fdict supplied
            # for each item, rebuild the fullpath of d2 keys rebased on self.d
            for k, v in d2.viewitems(fullpath=False):
                self.d.__setitem__(self._buildpath(k), v)
            return
        else:
            # normal dict supplied
            d2 = flatkeys(d2, sep=self.delimiter) # first, flatten the dict keys
            if self.rootpath:
                # There is a rootpath, so user is selecting a sub dict (eg, d['item1']), so we need to reconstruct d2 with the full key path before merging
                if PY3:
                    d2items = d2.items()
                else:
                    d2items = d2.viewitems()
                #if debug:
                    #print([(self._buildpath(k), v) for k,v in d2items])
                return self.d.update((self._buildpath(k), v) for k,v in d2items)
            else:
                # No rootpath, we can update directly because both dicts are comparable
                return self.d.update(d2)

    def copy(self):
        return self.__class__(d=self.d.copy(), rootpath=self.rootpath, delimiter=self.delimiter, fastview=self.fastview, **self.kwargs)

    @staticmethod
    def _count_iter_items(iterable):
        '''
        Consume an iterable not reading it into memory; return the number of items.
        by zuo: https://stackoverflow.com/a/15112059/1121352
        '''
        counter = itertools.count()
        collections.deque(_zip(iterable, counter), maxlen=0)  # (consume at C speed)
        return next(counter)

    def __len__(self):
        if not self.rootpath:
            return self.d.__len__()
        else:
            # If there is a rootpath, we have to limit the length to the subelements
            return self._count_iter_items(self.viewkeys())

    def __eq__(self, d2):
        is_d2fdict = isinstance(d2, self.__class__)
        if is_d2fdict and not self.rootpath:
            # fdict, we can directly compare
            return (self.d == d2.d)
        else:
            kwargs = {}
            if is_d2fdict:
                if len(self) != len(d2):
                    # If size is different then the dicts are different
                    # Note that we need to compare the items because we need to filter if we are looking at nested keys (ie, if there is a rootpath)
                    return False
                else:
                    kwargs['fullpath'] = False
            elif isinstance(d2, dict): # normal dict, need to flatten it first
                d2 = flatkeys(d2, sep=self.delimiter)
                if len(self) != len(d2):
                    return False

            # Else size is the same, check each item if they are equal
            # TOREMOVE COMMENT: There is a rootpath, this is a subdict, so we have to filter the items we compare (else we will compare the full dict to d2, which is probably not what the user wants if he does d['item1'] == d2)
            if PY3:
                d2items = d2.items(**kwargs)
            else:
                d2items = d2.viewitems(**kwargs)
            for k, v in d2items:
                fullkey = self._buildpath(k)
                if not fullkey in self.d or self.d.__getitem__(fullkey) != v:
                    return False
            return True

    def __repr__(self):
        # Filter the items if there is a rootpath and return as a new fdict
        if self.rootpath:
            return repr(self.__class__(d=dict(self.items()), rootpath='', delimiter=self.delimiter, fastview=self.fastview, **self.kwargs))
        else:
            try:
                return self.d.__repr__()
            except AttributeError as exc:
                return repr(dict(self.items()))

    def __str__(self):
        if self.rootpath:
            return str(self.__class__(d=dict(self.items()), rootpath='', delimiter=self.delimiter, fastview=self.fastview, **self.kwargs))
        else:
            try:
                return self.d.__str__()
            except AttributeError as exc:
                return str(dict(self.items()))

    def to_dict(self):
        return dict(self.items())

    def extract(self, fullpath=True):
        # Return a new fdict shortened to only the currently subselected items, but instead of fdict, should also support sfdict or any child class
        # It was chosen to return a fdict still containing the full keys and not the shortened ones because else it becomes very difficult to merge fdicts
        # And also for subdicts (like sfdict) which might store in a file, so we don't want to start mixing up different paths in the same file, but we would like to extract to a fdict with same parameters as the original, so keeping full path is the only way to do so coherently.
        if fullpath:
            return self.__class__(d=self.items(fullpath=True), rootpath=self.rootpath, delimiter=self.delimiter, fastview=self.fastview, **self.kwargs)
        else:
            return self.__class__(d=self.items(fullpath=False), rootpath='', delimiter=self.delimiter, fastview=self.fastview) # , **self.kwargs)  # if not fullpath for keys, then we do not propagate kwargs because it might implicate propagating filename saving and mixing up keys. For fdict, this does not make a difference, but it might for subclassed dicts. Override this function if you want to ensure that an extract has all same parameters as original when fullpath=False in your subclassed dict.

    #def to_dict_nested(self):
    #    d2 = {}
    #    for k, v in self.viewitems():


class sfdict(fdict):
    '''A nested dict with flattened internal representation, combined with shelve to allow for efficient storage and memory allocation of huge nested dictionnaries.
    If you change leaf items (eg, list.append), do not forget to sync() to commit changes to disk and empty memory cache because else this class has no way to know if leaf items were changed!
    '''
    def __init__(self, *args, **kwargs):
        if not ('filename' in kwargs):
            self.filename = tempfile.NamedTemporaryFile(delete=False).name
        else:
            self.filename = kwargs['filename']
            #del kwargs['filename'] # do not del for auto management of internal sub calls to sfdict

        if 'autosync' in kwargs:
            self.autosync = kwargs['autosync']
            #del kwargs['autosync']
        else:
            self.autosync = True

        fdict.__init__(self, *args, **kwargs)
        self.d = shelve.open(filename=self.filename, flag='c', writeback=True)
        self._py3compat()

    def __setitem__(self, key, value):
        fdict.__setitem__(self, key, value)
        if self.autosync:
            self.sync()

    def get_filename(self):
        if self.filename:
            return self.filename
        else:
            return self.d.dict._datfile

    def sync(self):
        self.d.sync()

    def close(self):
        self.d.close()


In [None]:
# Test fastview mode
a = fdict(fastview=True)
a['a/b/c'] = 1
a['a']['b']['d'] = 2
a['a']['e']['f'] = 3
a['a']['e']['g']['h'] = 4
a['a']['e']['g']['i'] = 5

assert a.d.items() == [('a/e/g/', set(['a/e/g/i', 'a/e/g/h'])), ('a/e/f', 3), ('a/e/', set(['a/e/g/', 'a/e/f'])), ('a/', set(['a/e/', 'a/b/'])), ('a/b/c', 1), ('a/b/d', 2), ('a/b/', set(['a/b/c', 'a/b/d'])), ('a/e/g/i', 5), ('a/e/g/h', 4)]
assert a.items() == [('a/e/f', 3), ('a/b/c', 1), ('a/b/d', 2), ('a/e/g/i', 5), ('a/e/g/h', 4)]  # items() on a fastview fdict should hide the nodes (eg, 'a/b/') and only show leafs, so that behavior is comparable to a non-fastview fdict
assert a['a']['e'].items() == [('g/i', 5), ('g/h', 4), ('f', 3)]
assert a['a']['e'].items(fullpath=True) == [('a/e/g/i', 5), ('a/e/g/h', 4), ('a/e/f', 3)]  # test recursive fastview items()

assert a['a']['e'].items(fullpath=True) == [('a/e/g/i', 5), ('a/e/g/h', 4), ('a/e/f', 3)]
assert a['a']['e'].keys(fullpath=True) == ['a/e/g/i', 'a/e/g/h', 'a/e/f']
assert set(a['a']['e'].values()) == set([5, 4, 3])
assert set(a['a'].values()) == set([1, 2, 3, 4, 5])  # use set() when we do not case about order in a list
assert a['j'].items() == []  # empty nested dict
assert a['j'].keys() == []
assert a['j'].values() == []

### TODO
- [x] pb of not creating sub fdict with all required arguments, like delimiter or filename for sfdict
- [x] Nested contains: 'a' in d should return true if d['a/b'] exists
- [x] Nested del eg d['a'] should del d[a/c/b] etc
- [x] Update using d2.viewkeys et une loop pour set self.d[k] = self._buildpath(d2[k] )
- [ ] unit test `__setitem__` fully
- [ ] If nested, always set d[a/]  = none, et au setitem check conflict if key in d and key+/ in d then conflict. PROBLEM: it would make deleting and contains faster and also allow to check conflict, BUT it would slow down setting, as we would have to check all recursion levels if items are set directly like d['a/b/c'] instead of d['a']['b')('c')
- [x] Fastview store list of just this level of subitems paths , then recursively we can get the othersvif ends with /. We will have similar perfs to standard items at a small additional storage price. PROBLEM: same problem as above, will have to check for each level (we could use a set to ensure no duplicate but still...)
- [ ] Finish fastview:
  * [ ] del
  * [ ] contains
  * [ ] setitem of nested dicts
  * [ ] setitem replacing existing leaf/node
- [ ] benchmarks
- [ ] setup.py
- [ ] travis py3
- [ ] docstring init with arguments, like tqdm
- [ ] readme
- [ ] pypi (use pymake?)
- [ ] codacy, coverage, badge version, etc

In [None]:
# Unit testing

# Test creation of just a nested dict, without anything else
a = fdict()
a['c']['b'] = set([1, 2])
assert a == {'c/b': set([1, 2])}

# Basic test
a = fdict()
a['a'] = {}
a['c']['b'] = set([1, 2])

assert a.keys() == ['c/b']
assert a.items() == [('c/b', set([1, 2]))]

# Copy test
acopy = a.copy()
assert acopy.items() == a.items()
assert acopy is not a

# Referencing into another variable of a nested item + check update of nested items
b = acopy['c']
assert b.items() == [('b', set([1, 2]))]
acopy['c'].update({'d': 3})
assert acopy == {'c/b': set([1, 2]), 'c/d': 3}
assert b == {'b': set([1, 2]), 'd': 3}

# Other tests
d = fdict()
d['b'] = {'a': 1} # test subitem assignment of a dict
d['c/b'] = set([2, 3, 5])
assert d.to_dict() == {'c/b': set([2, 3, 5]), 'b/a': 1}

a.update(d)
assert a.to_dict() == {'c/b': set([2, 3, 5]), 'b/a': 1}
assert a['c'].to_dict() == {'b': set([2, 3, 5])}

# Sfdict test
g = sfdict(filename='testshelf')
g['a'] = 3
g['b/c'] = set([1, 3, 4])
g['d'] = {}
assert g == {'a': 3, 'b/c': set([1, 3, 4])}
assert g == {'a': 3, 'b/c': set([1, 3, 4]), 'd': {}} # empty dicts are stripped out before comparison
assert g['b'].filename == g.filename # check that subdicts also share the same filename (parameters propagation)

# Sfdict reloading test
h = sfdict(filename='testshelf')
assert h == g
g.close()
h.close()

# Flattening test
m = {}
m['a'] = 1
m['b'] = {'c': 3, 'd': {'e': 5}}
m['f'] = set([1, 2, 5])
m2 = fdict(m)
assert dict(m2.items()) == flatkeys(m)

# Update and extract test
n = {}
n['b'] = {'d': {'f': 6}}
n['g'] = 7
m2.update(n)
assert m2 == {'a': 1, 'g': 7, 'b/c': 3, 'b/d/e': 5, 'b/d/f': 6, 'f': set([1, 2, 5])}

assert m2['b'].d == m2.d
assert m2['b'].extract().d == {'b/c': 3, 'b/d/e': 5, 'b/d/f': 6}

# len() test
assert len(m2) == 6
assert len(m2['b']) == 3
assert len(m2['b']['d']) == len(m2['b/d']) == 2
assert not hasattr(m2['g'], '__len__') and isinstance(m2['g'], int)

# Extract extended test
a10 = fdict()
a10['c/b/d'] = set([1, 2])
assert a10['c'].extract(fullpath=True).d == {'c/b/d': {1, 2}}
assert a10['c'].extract(fullpath=True) == {'b/d': {1, 2}}
assert a10['c'].extract(fullpath=False).d == {'b/d': {1, 2}}

# Contains test
p=fdict()
p['a/b/c'] = set([1, 2])
p['a/c'] = 3
p['a/d'] = {'e': {'f': 4}, 'g': 5}
p['h'] = 6
assert 'h' in p # check existence of a leaf (O(1))
assert 'a/b/c' in p # check existence of a nested leaf (O(1))
assert 'a/b' in p # check existence of a nested dict (O(n))

# Del test
p=fdict()
p['a/b/c'] = set([1, 2])
p['a/c'] = 3
p['a/d'] = {'e': {'f': 4}, 'g': 5}
p['h'] = 6
assert 'h' in p # check existence of a leaf (O(1))
assert 'a/b/c' in p # check existence of a nested leaf (O(1))
assert 'a/b' in p # check existence of a nested dict (O(n))
del p['a/b/c']
assert p == {'h': 6, 'a/d/e/f': 4, 'a/c': 3, 'a/d/g': 5}
del p['h']
assert p == {'a/d/e/f': 4, 'a/c': 3, 'a/d/g': 5}
del p['a/d']
assert p == {'a/c': 3}

# Update test and equality test
a1 = {'a': set([1, 2]), 'b': {'c': 3, 'c2': 4}, 'd': 4}
b1 = {'a': set([1, 2, 3]), 'b': {'c': 4, 'c3': 3}, 'e': 5}
a2 = fdict(a1)
b2 = fdict(b1)
a11 = a1.copy()
a12 = a1.copy()
a13 = a1.copy()
a14 = a1.copy()
a15 = a1.copy()
a21 = a2.copy()
a22 = a2.copy()
a23 = a2.copy()
a24 = a2.copy()
a25 = a2.copy()

# no rootpath (ie, use whole dicts)
a11.update(b1)
a21.update(b2)
assert a11 == {'a': set([1, 2, 3]), 'b': {'c3': 3, 'c': 4}, 'e': 5, 'd': 4}
assert a21 == {'a': set([1, 2, 3]), 'b/c': 4, 'b/c2': 4, 'b/c3': 3, 'e': 5, 'd': 4} # by default, fdict supports recursive update (eg, c2 is kept here)

# update a subdict with a subdict
a12['b'].update(b1['b'])
a22['b'].update(b2['b'])
assert a12 == {'a': set([1, 2]), 'b': {'c3': 3, 'c2': 4, 'c': 4}, 'd': 4}
assert a22 == {'a': set([1, 2]), 'b/c': 4, 'b/c2': 4, 'b/c3': 3, 'd': 4}
assert a22 == a12
assert len(a22) == 5 # len() test

# update of a subdict with a whole dict (extracted subdict)
a13['b'].update(b1['b'])
b2sub = b2['b'].extract()
a23['b'].update(b2sub)
b2sub == {'c': 4, 'c3': 3}
assert b2sub == {'c': 4, 'c3': 3}
assert a23 == a22
assert b2['b'].rootpath == b2sub.rootpath # rootpath is kept after extract
assert b2['b'].d == b2.d # dict of a sub fdict is the same as the root fdict's dict
assert dict(b2['b'].items()) == dict(b2sub.items())
assert dict(b2['b'].items(fullpath=True)) == dict(b2sub.items(fullpath=True)) == b2sub.d # but the items (filtered by rootpath) will be different

# update of a subdict with a whole dict (REALLY extracted subdict, rootpath is lost, so it is just like a new fdict)
a14['b'].update(b1['b'])
b2sub_orig = b2['b'].extract(fullpath=False)
for b2sub in [b2sub_orig.to_dict(), b2sub_orig]:
    # This test should pass with both a dict and a fdict
    a24c = a24.copy()
    a24c['b'].update(b2sub)
    b2sub == {'c': 4, 'c3': 3}
    assert b2sub == {'c': 4, 'c3': 3}
    assert a24c == a22
    assert b2['b'].rootpath == 'b'
    assert not b2sub_orig.rootpath # rootpath is lost after extract(fullpath=False) (so it is like creating a new fdict)
    assert b2['b'].d == b2.d # dict of a sub fdict is the same as the root fdict's dict
    assert dict(b2['b'].items()) == dict(b2sub.items())
    assert dict(b2['b'].items(fullpath=False)) == dict(b2sub_orig.items(fullpath=True)) == b2sub_orig.d # but the items (filtered by rootpath) will be different

# update of whole dict (extracted subdict) with subdict
a15sub = a15['b']
a15sub.update(b1['b'])
a25sub = a25['b'].extract(fullpath=False)
a25subc = a25sub.copy()
a25sub.update(b2['b'])
a25subc.update(b2['b'].extract(fullpath=False))
assert a15sub == a25sub == a25subc
assert a15sub.items() == a25sub.items() == a25subc.items()

# Test fastview mode
a = fdict(fastview=True)
a['a/b/c'] = 1
a['a']['b']['d'] = 2
a['a']['e']['f'] = 3
a['a']['e']['g']['h'] = 4
a['a']['e']['g']['i'] = 5

assert a.d.items() == [('a/e/g/', set(['a/e/g/i', 'a/e/g/h'])), ('a/e/f', 3), ('a/e/', set(['a/e/g/', 'a/e/f'])), ('a/', set(['a/e/', 'a/b/'])), ('a/b/c', 1), ('a/b/d', 2), ('a/b/', set(['a/b/c', 'a/b/d'])), ('a/e/g/i', 5), ('a/e/g/h', 4)]
assert a.items() == [('a/e/f', 3), ('a/b/c', 1), ('a/b/d', 2), ('a/e/g/i', 5), ('a/e/g/h', 4)]  # items() on a fastview fdict should hide the nodes (eg, 'a/b/') and only show leafs, so that behavior is comparable to a non-fastview fdict
assert a['a']['e'].items() == [('g/i', 5), ('g/h', 4), ('f', 3)]
assert a['a']['e'].items(fullpath=True) == [('a/e/g/i', 5), ('a/e/g/h', 4), ('a/e/f', 3)]  # test recursive fastview items()

assert a['a']['e'].items(fullpath=True) == [('a/e/g/i', 5), ('a/e/g/h', 4), ('a/e/f', 3)]
assert a['a']['e'].keys(fullpath=True) == ['a/e/g/i', 'a/e/g/h', 'a/e/f']
assert set(a['a']['e'].values()) == set([5, 4, 3])
assert set(a['a'].values()) == set([1, 2, 3, 4, 5])  # use set() when we do not case about order in a list
assert a['j'].items() == []  # empty nested dict
assert a['j'].keys() == []
assert a['j'].values() == []

print('All unit tests passed!')

In [None]:
### BENCHMARKS
try:
    _range = xrange
except NameError as exc:
    _range = range

def benchmark_set(dclass, breadth=5, depth=1000, args=None, kwargs=None):
    d = dclass(*args, **kwargs)
    di = d
    for i in _range(depth):
        for j in _range(breadth):
            d[str(j)] = j
        di = d[str(j)]
    return d

def benchmark_get(dclass, breadth=5, depth=1000, args=None, kwargs=None):
    d = benchmark_set(dclass, breadth=breadth, depth=depth, args=args, kwargs=kwargs)
    di = d
    x = None
    for i in _range(depth):
        for j in _range(breadth):
            x = d[str(j)]
        di = d[str(j)]
    return x
