# Identity Map

The purpose of the identity map is to keep track of in-memmory data (e.g. Python dicts) and how that data maps to database objects (e.g. a MongoDB document)

In [1]:
import pymongo
cli = pymongo.MongoClient('mongodb://mongo')
db = cli.barin2

In [2]:
user = {
    'name': 'Rick',
    'classes': 'Python ML'.split(),
}
db.users.delete_many({})
insert_result = db.users.insert_one(user)

In [3]:
insert_result

<pymongo.results.InsertOneResult at 0x7f7c4e6497d0>

In [4]:
insert_result.inserted_id

ObjectId('615e1e5736a6109a05ba8e39')

In [5]:
user_doc = db.users.find_one()

In [6]:
user_doc

{'_id': ObjectId('615e1e5736a6109a05ba8e39'),
 'name': 'Rick',
 'classes': ['Python', 'ML']}

In [7]:
STATE_MAGIC = '__b2_state__'

class InstrumentedState:
    def __init__(self):
        self.container = None
        self._dirty = False
        
    def __repr__(self):
        return f'<istate dirty={self._dirty} container={self.container}>'
        
    @property
    def dirty(self):
        return self._dirty
    
    @dirty.setter
    def dirty(self, value):
        self._dirty = value
        if value and self.container:
            get_state(self.container).dirty = True

class Instrumented:
    def __init__(self, *args, **kwargs):
        setattr(self, STATE_MAGIC, InstrumentedState()) 
        super().__init__(*args, **kwargs)
        
def get_state(obj):
    return getattr(obj, STATE_MAGIC)

def instrument_method(cls, method_name):
    super_method = getattr(cls, method_name)
    def f(self, *args, **kwargs):
        get_state(self).dirty = True
        return super_method(self, *args, **kwargs)
    return f

def instrument_class(cls, *dirty_methods):
    methods = {
        method_name: instrument_method(cls, method_name)
        for method_name in dirty_methods
    }
    icls = type('I' + cls.__qualname__, (Instrumented, cls,), methods)
    return icls


In [8]:
IDict = instrument_class(dict, *'__setitem__ __delitem__ clear pop popitem setdefault update'.split())
IList = instrument_class(list, *'__setitem__ __delitem__ append clear extend insert pop remove reverse'.split())

In [9]:
from functools import singledispatch

@singledispatch
def instrument_object(obj, container=None):
    return obj

@instrument_object.register
def _instrument_object_dict(obj: dict, container=None):
    obj = IDict(
        (key, instrument_object(value))
        for key, value in obj.items()
    )
    instrument_object_container(obj, container)
    return obj

@instrument_object.register
def _instrument_object_list(obj: list, container=None):
    obj = IList(
        instrument_object(value)
        for value in obj
    )
    instrument_object_container(obj, container)
    return obj
    
@singledispatch
def instrument_object_container(obj, container):
    pass

@instrument_object_container.register
def instrument_object_container_dict(obj: dict, container):
    get_state(obj).container = container
    for value in obj.values():
        instrument_object_container(value, obj)
        
@instrument_object_container.register
def instrument_object_container_list(obj: list, container):
    get_state(obj).container = container
    for value in obj:
        instrument_object_container(value, obj)

In [10]:
class IdentityMap:
    def __init__(self):
        self._idmap = {}
        
    def process(self, collection, doc):
        """Takes a single 'raw' doc from MongoDB and inserts into identity map"""
        key = (collection, doc['_id'])
        result = self._idmap[key] = instrument_object(doc)
        return result
        
    def get(self, collection, _id, default=None):
        return self._idmap.get((collection, _id), default)

In [11]:
idmap = IdentityMap()

In [12]:
user_mdoc = idmap.process(db.users, user_doc)

In [13]:
get_state(user_mdoc).dirty

False

Test direct modification of mdoc makes dirty

In [14]:
user_mdoc['business'] = 'Arborian'

In [15]:
user_mdoc.update(business='Arborian')

In [16]:
user_mdoc

{'_id': ObjectId('615e1e5736a6109a05ba8e39'),
 'name': 'Rick',
 'classes': ['Python', 'ML'],
 'business': 'Arborian'}

In [17]:
get_state(user_mdoc).dirty

True

Get fresh mdoc, test popping embedded list makes dirty

In [18]:
user_mdoc = idmap.process(db.users, user_doc)
get_state(user_mdoc).dirty

False

In [19]:
user_mdoc['classes'].pop()

'ML'

In [20]:
get_state(user_mdoc).dirty

True

In [21]:
get_state(user_mdoc['classes']).dirty

True

In [22]:
get_state(user_mdoc['classes']).container

{'_id': ObjectId('615e1e5736a6109a05ba8e39'),
 'name': 'Rick',
 'classes': ['Python']}

# Test it in the barin2 package

In [23]:
from barin2.identity_map import IdentityMap
from barin2.instrumentation import get_state, cleanse_object

In [24]:
imap = IdentityMap()

In [25]:
user_mdoc = idmap.process(db.users, user_doc)
get_state(user_mdoc)

<istate dirty=False container=None>

Test direct modification of mdoc makes dirty

In [26]:
user_mdoc['business'] = 'Arborian'
get_state(user_mdoc)

<istate dirty=True container=None>

In [27]:
user_mdoc = idmap.process(db.users, user_doc)
get_state(user_mdoc)

<istate dirty=False container=None>

In [28]:
user_mdoc.update(business='Arborian')
get_state(user_mdoc)

<istate dirty=True container=None>

In [29]:
user_mdoc = idmap.process(db.users, user_doc)
get_state(user_mdoc)

<istate dirty=False container=None>

In [30]:
user_mdoc['classes'].pop()
get_state(user_mdoc)

<istate dirty=True container=None>

In [31]:
get_state(user_mdoc['classes'])

<istate dirty=True container={'_id': ObjectId('615e1e5736a6109a05ba8e39'), 'name': 'Rick', 'classes': ['Python']}>

In [32]:
get_state(user_mdoc['classes']).container

{'_id': ObjectId('615e1e5736a6109a05ba8e39'),
 'name': 'Rick',
 'classes': ['Python']}

In [33]:
cleanse_object(user_mdoc)
get_state(user_mdoc)

<istate dirty=False container=None>

In [34]:
get_state(user_mdoc['classes'])

<istate dirty=False container={'_id': ObjectId('615e1e5736a6109a05ba8e39'), 'name': 'Rick', 'classes': ['Python']}>