Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FLOC-4421] Diffing of objects #2806

Merged
merged 15 commits into from
Jun 2, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
243 changes: 243 additions & 0 deletions flocker/control/_diffing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
# Copyright ClusterHQ Inc. See LICENSE file for details.
# -*- test-case-name: flocker.control.test.test_diffing -*-

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a module docstring.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

"""
Code to calculate the difference between objects. This is particularly useful
for computing the difference between deeply pyrsisistent objects such as the
flocker configuration or the flocker state.
"""

from pyrsistent import (
PClass,
PMap,
PSet,
field,
pvector,
pvector_field,
)

from zope.interface import Interface, implementer


class _IDiffChange(Interface):
"""
Interface for a diff change.

This is simply something that can be applied to an object to create a new
object.

This interface is created as documentation rather than for any of the
actual zope.interface mechanisms.
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo "more to simply documentation"
And maybe drop the word "simply" from the line above while you're here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


def apply(obj):
"""
Apply this diff change to the passed in object and return a new object
that is obj with the ``self`` diff applied.

:param object obj: The object to apply the diff to.

:returns: A new object that is the passed in object with the diff
applied.
"""


@implementer(_IDiffChange)
class _Remove(PClass):
"""
A ``_IDiffChange`` that removes an object from a ``PSet`` or a key from a
``PMap`` inside a nested object tree.

:ivar path: The path in the nested object tree of the object to be removed
from the import set.

:ivar item: The item to be removed from the set or the key to be removed
from the mapping.
"""
path = pvector_field(object)
item = field()

def apply(self, obj):
return obj.transform(self.path, lambda o: o.remove(self.item))


@implementer(_IDiffChange)
class _Set(PClass):
"""
A ``_IDiffChange`` that sets a field in a ``PClass`` or sets a key in a
``PMap``.

:ivar path: The path in the nested object to the field/key to be set to a
new value.

:ivar value: The value to set the field/key to.
"""
path = pvector_field(object)
value = field()

def apply(self, obj):
return obj.transform(self.path, self.value)


@implementer(_IDiffChange)
class _Add(PClass):
"""
A ``_IDiffChange`` that adds an item to a ``PSet``.

:ivar path: The path to the set to which the item will be added.

:ivar item: The item to be added to the set.
"""
path = pvector_field(object)
item = field()

def apply(self, obj):
return obj.transform(self.path, lambda x: x.add(self.item))


@implementer(_IDiffChange)
class _Diff(PClass):
"""
A ``_IDiffChange`` that is simply the serial application of other diff
changes.

This is the object that external modules get and use to apply diffs to
objects.

:ivar changes: A vector of ``_IDiffChange`` s that represent a diff between
two objects.
"""

changes = pvector_field(object)

def apply(self, obj):
for c in self.changes:
obj = c.apply(obj)
return obj
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❓ This looks like a situation where it'd be faster to operate on obj.evolver() rather than the immutable object.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noted.

Turns out Evolvers don't actually have a .transform method, so this change is a bit non-trivial.

Something similar would be to aggregate all of the transforms, and then apply them all in a single call to obj.transform.

But... That seems like the sort of change that can be done in a subsequent review, and can be done on-demand if required by performance analysis.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sarum90 's comment here is related to the issue described in tobgu/pyrsistent#89 and offers a possible alternative implementation than #2839



def _create_diffs_for_sets(current_path, set_a, set_b):
"""
Computes a series of ``_IDiffChange`` s to turn ``set_a`` into ``set_b``
assuming that these sets are at ``current_path`` inside a nested pyrsistent
object.

:param current_path: An iterable of pyrsistent object describing the path
inside the root pyrsistent object where the other arguments are
located. See ``PMap.transform`` for the format of this sort of path.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo "And iterable of pyrsistent object"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.


:param set_a: The desired input set.

:param set_b: The desired output set.

:returns: An iterable of ``_IDiffChange`` s that will turn ``set_a`` into
``set_b``.
"""
resulting_diffs = pvector([]).evolver()
for item in set_a.difference(set_b):
resulting_diffs.append(
_Remove(path=current_path, item=item)
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❓ Might be neat if _Remove and _Add had the same signature.
_Remove(path=current_path, item=item) should work I think.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea that's much cleaner. Nice catch! Not sure what I was thinking.

for item in set_b.difference(set_a):
resulting_diffs.append(
_Add(path=current_path, item=item)
)
return resulting_diffs.persistent()


def _create_diffs_for_mappings(current_path, mapping_a, mapping_b):
"""
Computes a series of ``_IDiffChange`` s to turn ``mapping_a`` into
``mapping_b`` assuming that these mappings are at ``current_path`` inside a
nested pyrsistent object.

:param current_path: An iterable of pyrsistent object describing the path
inside the root pyrsistent object where the other arguments are
located. See ``PMap.transform`` for the format of this sort of path.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same typo here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.


:param mapping_a: The desired input mapping.

:param mapping_b: The desired output mapping.

:returns: An iterable of ``_IDiffChange`` s that will turn ``mapping_a``
into ``mapping_b``.
"""
resulting_diffs = pvector([]).evolver()
a_keys = frozenset(mapping_a.keys())
b_keys = frozenset(mapping_b.keys())
for key in a_keys.intersection(b_keys):
if mapping_a[key] != mapping_b[key]:
resulting_diffs.extend(
_create_diffs_for(
current_path.append(key),
mapping_a[key],
mapping_b[key]
)
)
for key in b_keys.difference(a_keys):
resulting_diffs.append(
_Set(path=current_path.append(key), value=mapping_b[key])
)
for key in a_keys.difference(b_keys):
resulting_diffs.append(
_Remove(path=current_path, item=key)
)
return resulting_diffs.persistent()


def _create_diffs_for(current_path, subobj_a, subobj_b):
"""
Computes a series of ``_IDiffChange`` s to turn ``subobj_a`` into
``subobj_b`` assuming that these subobjs are at ``current_path`` inside a
nested pyrsistent object.

:param current_path: An iterable of pyrsistent object describing the path
inside the root pyrsistent object where the other arguments are
located. See ``PMap.transform`` for the format of this sort of path.

:param subobj_a: The desired input sub object.

:param subobj_b: The desired output sub object.

:returns: An iterable of ``_IDiffChange`` s that will turn ``subobj_a``
into ``subobj_b``.
"""
if subobj_a == subobj_b:
return pvector([])
elif type(subobj_a) != type(subobj_b):
return pvector([_Set(path=current_path, value=subobj_b)])
elif isinstance(subobj_a, PClass) and isinstance(subobj_b, PClass):
a_dict = subobj_a._to_dict()
b_dict = subobj_b._to_dict()
return _create_diffs_for_mappings(current_path, a_dict, b_dict)
elif isinstance(subobj_a, PMap) and isinstance(subobj_b, PMap):
return _create_diffs_for_mappings(
current_path, subobj_a, subobj_b)
elif isinstance(subobj_a, PSet) and isinstance(subobj_b, PSet):
return _create_diffs_for_sets(
current_path, subobj_a, subobj_b)
# If the objects are not equal, and there is no intelligent way to recurse
# inside the objects to make a smaller diff, simply set the current path
# to the object in b.
return pvector([_Set(path=current_path, value=subobj_b)])


def create_diff(object_a, object_b):
"""
Constructs a diff from ``object_a`` to ``object_b``

:param object_a: The desired input object.

:param object_b: The desired output object.

:returns: A ``_Diff`` that will convert ``object_a`` into ``object_b``
when applied.
"""
changes = _create_diffs_for(pvector([]), object_a, object_b)
return _Diff(changes=changes)


# Ensure that the representation of a ``_Diff`` is entirely serializable:
DIFF_SERIALIZABLE_CLASSES = [
_Set, _Remove, _Add, _Diff
]
6 changes: 4 additions & 2 deletions flocker/control/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

from zope.interface import Interface, implementer

from ._diffing import DIFF_SERIALIZABLE_CLASSES


def _sequence_field(checked_class, suffix, item_type, optional, initial):
"""
Expand Down Expand Up @@ -1235,5 +1237,5 @@ def get_information_wipe(self):
Deployment, Node, DockerImage, Port, Link, RestartNever, RestartAlways,
RestartOnFailure, Application, Dataset, Manifestation, AttachedVolume,
NodeState, DeploymentState, NonManifestDatasets, Configuration,
Lease, Leases, PersistentState,
]
Lease, Leases, PersistentState
] + DIFF_SERIALIZABLE_CLASSES
Loading