In [1]:
# stdlib
from datetime import datetime
from typing import Any
from typing import Dict
from typing import List
from typing import Optional

# syft absolute
from syft import UID
from syft.types.base import SyftBaseModel

In [2]:
class MyBaseModel(SyftBaseModel):
    id: UID

    def __init__(self, id=None, *args, **kwargs):
        id = UID() if id is None else id
        super().__init__(*args, id=id, **kwargs)

In [3]:
class Event(MyBaseModel):
    creator: UID
    creation_date: datetime = datetime.now()

    def handler(self, node):
        method_name = event_handler_registry[self.__class__.__name__]
        return getattr(node, method_name)

In [4]:
class EventLog(MyBaseModel):
    log: List[Event] = []

In [5]:
class LinkedObject(MyBaseModel):
    node_id: UID
    obj_id: UID

In [6]:
class Dataset(MyBaseModel):
    real: LinkedObject
    mock: str
    description: str

In [7]:
def register_event_handler(event_type):
    def inner(method):
        event_handler_registry[event_type.__name__] = method.__name__
        return method

    return inner

In [8]:
event_handler_registry = {}

# Events

In [9]:
class CreateDatasetEvent(Event):
    dataset_id: UID
    mock: Any
    real: LinkedObject
    description: str
    creator: UID

    def execute(self, node):
        handler = self.handler(node)
        handler(
            dataset_id=self.dataset_id,
            mock=self.mock,
            real=self.real,
            description=self.description,
        )

In [12]:
class UpdateDatasetEvent(Event):
    dataset_id: UID
    updates: Dict[str, Any]

    def execute(self, node):
        handler = self.handler(node)
        handler(dataset_id=self.dataset_id, updates=updates)

# Node

In [13]:
class Node(MyBaseModel):
    event_log: EventLog
    store: Dict[UID, Any]
    private_store: Dict[UID, Any]

    def __init__(self, *args, **kwargs):
        event_log = EventLog()
        store = {}
        private_store = {}
        super().__init__(
            *args,
            private_store=private_store,
            store=store,
            event_log=event_log,
            **kwargs,
        )

    def create_dataset(self, mock: str, real: Optional[str], description: str):
        dataset_id = UID()
        real_id = UID()
        real_obj = LinkedObject(node_id=self.id, obj_id=dataset_id)

        self.private_store[real_id] = real

        event = CreateDatasetEvent(
            dataset_id=dataset_id,
            mock=mock,
            real=real_obj,
            description=description,
            creator=self.id,
        )

        self.event_log.log.append(event)
        self._create_dataset(dataset_id, mock, real_obj, description)

    @register_event_handler(CreateDatasetEvent)
    def _create_dataset(self, dataset_id, mock, real, description):
        dataset = Dataset(id=dataset_id, mock=mock, real=real, description=description)
        self.store[dataset.id] = dataset

    def update_dataset(self, id, updates):
        event = UpdateDatasetEvent(
            dataset_id=id,
            updates=updates,
        )
        self.event_log.append(event)
        self._update_dataset(id, updates)

    @register_event_handler(UpdateDatasetEvent)
    def _update_dataset(self, id, updates):
        dataset = self.store[id]

        for k, v in updates:
            setattr(dataset, k, v)

        self.store[id] = dataset

we want to check for 'mutations' of the same object, which is defined as:

- CUD (from CRUD) of objects with the same unique keys
  - create changes all attributes
  - delete changes all attributes
  - update only changes the attributes that were updated


In the case of update, if only non overlapping sets of properties were updated its not a merge conflict, as long as those are not code approval mutations.

In [14]:
def sync(node_high, now_low):
    log1 = node_high.event_log.log
    log2 = node_low.event_log.log

    # find idx of the fork
    fork_idx = max(len(log1), len(log2))
    for i, (e1, e2) in enumerate(list(zip(log1, log2))):
        if e1.id != e2.id:
            fork_idx = i
            break

    branch1 = log1[fork_idx:]
    branch2 = log2[fork_idx:]

    proposed_merge = sorted(branch1 + branch2, key=lambda e: e.creation_date)
    #     print(log1[:fork_idx])
    #     print(branch1)
    #     print(branch2)
    # todo: fix conflicts

    new_events = log1[:fork_idx] + proposed_merge
    new_log = EventLog(log=new_events)

    node_low.event_log = new_log

    # go back to fork state
    # this means either: cleaning the store and the event store and replaying all
    node_low.store = {}
    #     state_at_fork = node_low.event_states[fork_idx]

    for event in node_low.event_log.log:
        event.execute(node_low)

    node_high.event_log = new_log

    assert all(
        [x == y for x, y in zip(node_low.event_log.log, node_high.event_log.log)]
    ) and len(node_low.event_log.log) == len(node_high.event_log.log)

# Sync 1: create dataset

In [15]:
node_high = Node()
node_low = Node()

In [16]:
node_high.create_dataset(real="abc", mock="def", description="blabla")

In [17]:
node_high.event_log.log

In [18]:
sync(node_high, node_low)

In [19]:
node_low.event_log.log

In [20]:
print(node_low.store.keys())

dict_keys([<UID: 903b391a7258441eb213087dd4fa1389>])


In [21]:
assert node_high.store.keys() == node_low.store.keys()

# Sync 2:

In [30]:
# node_high.store

In [25]:
# node_high.update_dataset()

SCENARIOS
- create a dataset and sync
  - should create the dataset object on both sides
- both update the same property (conflict)
- both update a different property (no conflict)
- code approval should have same state
- code execution should be approved