In [None]:
import collections
import shelve
import sys
import tempfile

PY3 = (sys.version_info >= (3,0))

__version__ = '0.2.1'

def flatkeys(d, sep="/"):
    """
    Flatten a dictionary: build a new dictionary from a given one where all
    non-dict values are left untouched but nested ``dict``s are recursively
    merged in the new one with their keys prefixed by their parent key.

    >>> flatkeys({1: 42, 'foo': 12})
    {1: 42, 'foo': 12}
    >>> flatkeys({1: 42, 'foo': 12, 'bar': {'qux': True}})
    {1: 42, 'foo': 12, 'bar.qux': True}
    >>> flatkeys({1: {2: {3: 4}}})
    {'1.2.3': 4}
    >>> flatkeys({1: {2: {3: 4}, 5: 6}})
    {'1.2.3': 4, '1.5': 6}
    
    v0.1.0 by bfontaine, MIT license
    """
    flat = {}
    dicts = [("", d)]

    while dicts:
        prefix, d = dicts.pop()
        for k, v in d.items():
            k_s = str(k)
            if isinstance(v, collections.Mapping):
                dicts.append(("%s%s%s" % (prefix, k_s, sep), v))
            else:
                k_ = prefix + k_s if prefix else k
                flat[k_] = v
    return flat

class fdict(dict):
    '''Flattened nested dict, all items are settable and gettable through ['item1']['item2'] standard form or ['item1/item2'] internal form.
    This allows to replace the internal dict with any on-disk storage system like a shelve's shelf (great for huge nested dicts that cannot fit into memory).
    Main limitation: an entry can be both a singleton and a nested fdict, and there is no way to tell what is what, no error will be shown, the singleton will always be returned.
    '''
    def __init__(self, d=None, rootpath='', delimiter='/', *args):
        if d:
            # Internal call, we get a subitem, we just create a new fdict with the same dictionary but with a restricted rootpath
            if rootpath:
                self.d = d
            # Else it is not an internal call, the user supplied a dict to initialize the fdict, we have to flatten its keys
            elif isinstance(d, dict):
                self.d = flatkeys(d)
            # Else the user supplied another type of object, we try to convert to a dict and flatten it
            else:
                self.d = flatkeys(dict(d))
        else:
            self.d = {}
        self.rootpath = rootpath
        self.delimiter = delimiter
        self._py3compat()
        #return dict.__init__(self, *args)

    def _py3compat(self):
        if PY3:
            # Py3
            self._viewkeys = self.d.keys
            self._viewvalues = self.d.values
            self._viewitems = self.d.items
        else:
            # Py2
            if getattr(self.d, "viewvalues", None):
                # Py2.7
                self._viewkeys = self.d.viewkeys
                self._viewvalues = self.d.viewvalues
                self._viewitems = self.d.viewitems
            else:
                # Py2.6
                self._viewkeys = self.d.iterkeys
                self._viewvalues = self.d.itervalues
                self._viewitems = self.d.iteritems

    def _buildpath(self, key):
        return self.rootpath+self.delimiter+key if self.rootpath else key

    def __getitem__(self, key):
        # Node or leaf?
        if key in self.d: # Leaf: return the value
            return self.d.__getitem__(key)
        else: # Node: return a new full fdict based on the old one but with a different rootpath to limit the results by default
            return self.__class__(d=self.d, rootpath=self._buildpath(key))
        #return dict.__getitem__(self, key)

    def __setitem__(self, key, value):
        #fullkey = self._buildpath(key)
        #if fullkey in self.d and :
        #    raise ValueError('Conflict detected: the following key is both a singleton and a nested dict: %s' % fullkey)

        #if isinstance(value, dict): # if the value is a dict, flatten it recursively
        #    if not value: # empty dict, just do nothing (because here it is not necessary, nested dicts are supported by default, no need to create sub-dicts)
        #        return
        #    else:
        #        print(value)
        #        value = flatkeys({self.rootpath : value})
        #        self.d.update(value)
        #else:
        #print(key, value)
        #if isinstance(value, dict) and not value:
        #if key == 'a':
        #    print('PASS')
        #    pass
            #return
        #else:
        #    print('OK')
        self.d.__setitem__(self._buildpath(key), value)

        #dict.__setitem__(self, key, value)

    def __delitem__(self, key):
        return self.d.__delitem__(self, self._buildpath(key))

    def __contains__(self, key):
        return self.d.__contains__(self, self._buildpath(key))

    #def addchild(self, key, value=None):
    #    self.d[self._buildpath(key)] = value

    def viewkeys(self):
        if not self.rootpath:
            return self._viewkeys()
        else:
            pattern = self.rootpath+self.delimiter
            lpattern = len(pattern)
            return (k[lpattern:] for k in self._viewkeys() if k.startswith(pattern))

    def viewitems(self):
        # Filter items to keep only the ones below the rootpath level
        if not self.rootpath:
            return self._viewitems()
        else:
            pattern = self.rootpath+self.delimiter
            lpattern = len(pattern)
            return ((k[lpattern:], v) for k,v in self._viewitems() if k.startswith(pattern))

    def viewvalues(self):
        if not self.rootpath:
            return self._viewvalues()
        else:
            pattern = self.rootpath+self.delimiter
            lpattern = len(pattern)
            return (v for k,v in self._viewitems() if k.startswith(pattern))

    iterkeys = viewkeys
    itervalues = viewvalues
    iteritems = viewitems
    if PY3:
        keys = viewkeys
        values = viewvalues
        items = viewitems
    else:
        def keys(self):
            return list(self.viewkeys())
        def values(self):
            return list(self.viewvalues())
        def items(self):
            return list(self.viewitems())

    def update(self, d2):
        if isinstance(d2, self.__class__):
            return self.d.update(d2.d)
        else:
            return self.d.update(flatkeys(d2))

    def __eq__(self, d2):
        return (self.d == d2)

    def __repr__(self):
        # Filter the items if there is a rootpath and return as a new fdict
        if self.rootpath:
            return repr(self.__class__(d=dict(self.items())))
        else:
            try:
                return self.d.__repr__()
            except AttributeError as exc:
                return repr(dict(self.items()))

    def __str__(self):
        if self.rootpath:
            return str(self.__class__(d=dict(self.items())))
        else:
            try:
                return self.d.__str__()
            except AttributeError as exc:
                return str(dict(self.items()))

    def to_dict(self):
        return dict(self.items())

    def extract(self):
        return self.__class__(self.items()) # a new fdict with only the minimal, but instead of fdict, should also support sfdict or any child class

    #def to_dict_nested(self):
    #    d2 = {}
    #    for k, v in self.viewitems():
            

class sfdict(fdict):
    '''A nested dict with flattened internal representation, combined with shelve to allow for efficient storage and memory allocation of huge nested dictionnaries.
    If you change leaf items (eg, list.append), do not forget to sync() to commit changes to disk and empty memory cache because else this class has no way to know if leaf items were changed!
    '''
    def __init__(self, *args, **kwargs):
        if not ('filename' in kwargs):
            self.filename = tempfile.NamedTemporaryFile(delete=False).name
        else:
            self.filename = kwargs['filename']
            del kwargs['filename']
        fdict.__init__(self, *args, **kwargs)
        self.d = shelve.open(filename=self.filename, flag='c', writeback=True)
        self._py3compat()

    def __setitem__(self, key, value):
        fdict.__setitem__(self, key, value)
        self.sync()

    def get_filename(self):
        if self.filename:
            return self.filename
        else:
            return self.d.dict._datfile

    def sync(self):
        self.d.sync()

    def close(self):
        self.d.close()

# Unit testing
a = fdict()
a['a'] = {}
a['c']['b'] = set([1, 2])
b = a['a']

#print(a)
assert a.keys() == ['a', 'c/b']
assert b.items() == []
assert a.items() == [('a', {}), ('c/b', set([1, 2]))]

c = {}
c['b'] = set([1, 2])
assert c.items() == [('b', set([1, 2]))]

assert a.values() == [{}, set([1, 2])]
assert b.values() == []
assert c.values() == [set([1, 2])]

d = fdict()
d['b'] = {'a': 1}
d['c/b'] = set([2, 3, 5])
assert d.to_dict() == {'c/b': set([2, 3, 5]), 'b': {'a': 1}}

a.update(d)
assert a.to_dict() == {'a': {}, 'c/b': set([2, 3, 5]), 'b': {'a': 1}}
assert a['c'].to_dict() == {'b': set([2, 3, 5])}

g = sfdict(filename='testshelf')
g['a'] = 3
g['b/c'] = set([1, 3, 4])
g['d'] = {}
assert g == {'a': 3, 'b/c': set([1, 3, 4]), 'd': {}}

h = sfdict(filename='testshelf')
assert h == g

m = {}
m['a'] = 1
m['b'] = {'c': 3, 'd': {'e': 5}}
m['f'] = set([1, 2, 5])
m2 = fdict(m)
assert dict(m2.items()) == flatkeys(m)

n = {}
n['b'] = {'d': {'f': 6}}
n['g'] = 7
m2.update(n)
assert m2 == {'a': 1, 'g': 7, 'b/d/f': 6, 'b/d/e': 5, 'f': set([1, 2, 5]), 'b/c': 3}

assert m2['b'].d == m2.d
assert m2['b'].extract().d == {'c': 3, 'd/e': 5, 'd/f': 6}

In [None]:
m2['h'] = {'i': {'j': 1}, 'k': set([5, 6, 7])}
m2

In [None]:
print(d.viewvalues())

In [None]:
a['a/b'] = 3
a['c']
a['c']['e'] = 1
a

In [None]:
g = sfdict(filename='testshelf')
g['a'] = 3
g['b/c'] = set([1, 3, 4])
g['d'] = {}
g

In [None]:
h = sfdict(filename='testshelf')
h