Skip to content

Commit

Permalink
Updated Diff engine
Browse files Browse the repository at this point in the history
We've moved to a new diff engine (codename Omnom) which is so much better
than the original diff algorithm as to make any comparison laughable.

The biggest reason for the move was to allow us to support array diffs
in an intelligent way while still allowing single queries to function
correctly.

To achieve this, there are a few important implications which you need
to take into account - namely that using the built in algorithm for combinations
of pushes and pulls on arrays is a TERRIBLE IDEA. This is due to the way
that arrays are handled to support single queries - since MongoDB doesn't
allow combinations of $push/$pull/$set on an array within a single query
for consistency reasons.

USE IT FOR:
 - Changing the values of array elements (and their child values) without
   changing their position in the array
 - Adding array elements and making changes to existing ones (at the same time if you wish)

BIG DON'TS:
 - Removing array elements while adding/modifying others
 - Insertion at the front of an array (Forces the array to be replaced)
  • Loading branch information
notheotherben committed Jan 14, 2014
1 parent d8134a9 commit ab9eb07
Show file tree
Hide file tree
Showing 6 changed files with 527 additions and 206 deletions.
38 changes: 3 additions & 35 deletions lib/Instance.js
Expand Up @@ -7,7 +7,8 @@ var ObjectID = require('mongodb').ObjectID,
EventEmitter = require('events').EventEmitter,
debug = require('debug')('iridium:Instance'),

validate = require('./utils/validation');
validate = require('./utils/validation'),
diff = require('./utils/diff');


(require.modules || {}).Instance = module.exports = Instance;
Expand Down Expand Up @@ -301,37 +302,4 @@ Instance.forModel = function(model) {
return ModelInstance;
};

var diffPatch = Instance.diff = function (oldDoc, newDoc, path) {
/// <signature>
/// <summary>Creates a differential update query for use by MongoDB</summary>
/// <param name="oldDoc" type="Object">The original document prior to any changes</param>
/// <param name="newDoc" type="Object">The document containing the changes made to the original document</param>
/// </signature>

"use strict";

var changes = {};

for (var k in newDoc) {
if (Array.isArray(newDoc[k]) && Array.isArray(oldDoc[k])) {
var different = newDoc.length !== oldDoc.length;
for (var i = 0; i < newDoc[k].length && !different; i++) {
if (oldDoc[k][i] !== newDoc[k][i]) different = true;
}
if (!different) continue;
changes.$set = changes.$set || {};
changes.$set[(path ? (path + '.') : '') + k] = newDoc[k];
}
else if (_.isPlainObject(newDoc[k]) && _.isPlainObject(oldDoc[k])) {
// Make recursive diff update
_.merge(changes, diffPatch(oldDoc[k], newDoc[k], (path ? (path + '.') : '') + k));
}
else {
if (oldDoc[k] === newDoc[k]) continue;
changes.$set = changes.$set || {};
changes.$set[(path ? (path + '.') : '') + k] = newDoc[k];
}
}

return changes;
};
Instance.diff = diff;
199 changes: 199 additions & 0 deletions lib/utils/diff.js
@@ -0,0 +1,199 @@
var _ = require('lodash');

module.exports = diff;

function diff(original, modified) {
var omnom = new Omnom({
cautious: true,
orderedArrays: true
});

omnom.diff(original, modified);

return omnom.changes;
}

function Omnom(options) {
this.options = options;
this.changes = {};
}

Omnom.prototype.diff = function(original, modified) {
this.onObject(original, modified);
};

Omnom.prototype.onObject = function(original, modified, changePath) {
if(original === undefined || original === null)
return (original !== modified) && this.set(changePath, modified);

if(typeof original == 'number' && typeof modified == 'number' && original !== modified)
return this.inc(changePath, modified - original);

if(Array.isArray(original) && Array.isArray(modified))
return this.onArray(original, modified, changePath);

if(!_.isPlainObject(original) || !_.isPlainObject(modified))
return (original != modified) && this.set(changePath, modified);

_.each(modified, function(value, key) {
// Handle array diffs in their own special way
if(Array.isArray(value) && Array.isArray(original[key])) this.onArray(original[key], value, resolve(changePath, key));

// Otherwise, just keep going
else this.onObject(original[key], value, resolve(changePath, key));
}, this);

// Unset removed properties
_.each(original, function(value, key) {
if(modified[key] === undefined || modified[key] === null) return this.unset(resolve(changePath, key));
}, this);
};

Omnom.prototype.onArray = function(original, modified, changePath) {
var i,j;

// Check if we can get from original => modified using just pulls
if(original.length > modified.length) {
var pulls = [];
for(i = 0, j = 0; i < original.length && j < modified.length; i++) {
if(almostEqual(original[i], modified[j])) j++;
else pulls.push(original[i]);
}

for(; i < original.length; i++)
pulls.push(original[i]);

if(j === modified.length) {
if(pulls.length === 1) return this.pull(changePath, pulls[0]);
// We can complete using just pulls
return this.pullAll(changePath, pulls);
}

// If we have a smaller target array than our source, we will need to re-create it
// regardless (if we want to do so in a single operation anyway)
else return this.set(changePath, modified);
}

// Check if we can get from original => modified using just pushes
if(original.length < modified.length) {
var canPush = true;
for(i = 0; i < original.length; i++)
if(almostEqual(original[i], modified[i]) < 1) {
canPush = false;
break;
}

if(canPush) {
for(i = original.length; i < modified.length; i++)
this.push(changePath, modified[i]);
return;
}
}

// Otherwise, we need to use $set to generate the new array

// Check how many manipulations would need to be performed, if it's more than half the array size
// then rather re-create the array

var sets = [];
var partials = [];
for(i = 0; i < modified.length; i++) {
var equality = almostEqual(original[i], modified[i]);
if(equality === 0) sets.push(i);
else if(equality < 1) partials.push(i);
}

if(sets.length > modified.length / 2)
return this.set(changePath, modified);

for(i = 0; i < sets.length; i++)
this.set(resolve(changePath, sets[i].toString()), modified[sets[i]]);

for(i = 0; i < partials.length; i++)
this.onObject(original[partials[i]], modified[partials[i]], resolve(changePath, partials[i].toString()));
};

Omnom.prototype.set = function(path, value) {
if(!this.changes.$set)
this.changes.$set = {};

this.changes.$set[path] = value;
};

Omnom.prototype.unset = function(path, value) {
if(!this.changes.$unset)
this.changes.$unset = {};

this.changes.$unset[path] = 1;
};

Omnom.prototype.inc = function(path, value) {
if(!this.changes.$inc)
this.changes.$inc = {};

this.changes.$inc[path] = value;
};

Omnom.prototype.push = function(path, value) {
if(!this.changes.$push)
this.changes.$push = {};

if(this.changes.$push[path]) {
if(this.changes.$push[path].$each)
this.changes.$push[path].$each.push(value);
else
this.changes.$push[path] = { $each: [this.changes.$push[path], value] };
} else this.changes.$push[path] = value;
};

Omnom.prototype.pull = function(path, value) {
if(!this.changes.$pull)
this.changes.$pull = {};

if(this.changes.$pullAll && this.changes.$pullAll[path]) {
return this.changes.$pullAll[path].push(value);
}

if(this.changes.$pull[path]) {
this.pullAll(path, [this.changes.$pull[path], value]);
delete this.changes.$pull[path];
return;
}

this.changes.$pull[path] = value;
};

Omnom.prototype.pullAll = function(path, values) {
if(!this.changes.$pullAll)
this.changes.$pullAll = {};

this.changes.$pullAll[path] = values;
};

function resolve() {
var validArguments = [];
Array.prototype.forEach.call(arguments, function(arg) {
if(arg) validArguments.push(arg);
});
return validArguments.join('.');
}

var almostEqual = function (o1, o2) {
if(!_.isPlainObject(o1) || !_.isPlainObject(o2)) return o1 == o2 ? 1 : 0;

var o1i, o1k = Object.keys(o1);
var o2i, o2k = Object.keys(o2);

var commonKeys = [];
for(o1i = 0; o1i < o1k.length; o1i++)
if(~o2k.indexOf(o1k[o1i])) commonKeys.push(o1k[o1i]);

var totalKeys = o1k.length + o2k.length - commonKeys.length;
var keysDifference = totalKeys - commonKeys.length;

var requiredChanges = 0;
for(var i = 0; i < commonKeys.length; i++)
if(almostEqual(o1[commonKeys[i]], o2[commonKeys[i]]) < 1) requiredChanges++;

return 1 - (keysDifference / totalKeys) - (requiredChanges / commonKeys.length);
};
133 changes: 133 additions & 0 deletions test/diff.js
@@ -0,0 +1,133 @@
var diff = require('../lib/utils/diff');

describe('diff', function() {
it('should correctly diff basic objects', function() {
var o1 = {
a: 1,
b: 'test',
c: 2,
d: 'constant',
e: 'old'
};

var o2 = {
a: 3,
b: 'tested',
c: 2,
d: 'constant',
f: 'new'
};

var expected = {
$inc: { a: 2 },
$set: { b: 'tested', f: 'new' },
$unset: { e: 1 }
};

diff(o1, o2).should.eql(expected);
});

it('should correctly diff complex objects', function() {
var o1 = {
a: { value: 1 },
b: { value1: 1, value2: 1 },
c: { value: 2 },
d: { value: {} },
e: { value: true }
};

var o2 = {
a: { value: 3 },
b: { value1: 'tested', value2: 2 },
c: { value: 2 },
d: { value: {} },
e: { value2: false }
};

var expected = {
$inc: { 'a.value': 2, 'b.value2': 1 },
$set: { 'b.value1': 'tested', 'e.value2': false },
$unset: { 'e.value': 1 }
};

diff(o1, o2).should.eql(expected);
});

describe('arrays', function() {
it('should correctly handle arrays which can be pulled', function() {
var a1 = { a: [1,2,3,4], b: [1,2,3,4] };
var a2 = { a: [1,3,4], b: [1,3] };
var expected = {
$pull: { a: 2 },
$pullAll: { b: [2,4] }
};

diff(a1, a2).should.eql(expected);
});

it('should correctly handle arrays which can be pushed', function() {
var a1 = { a: [1,2,3,4], b: [1,2,3,4] };
var a2 = { a: [1,2,3,4,5], b: [1,2,3,4,5,6] };
var expected = {
$push: { a: 5, b: { $each: [5,6] }}
};

diff(a1, a2).should.eql(expected);
});

it('should correctly handle arrays which should be replaced', function() {
var a1 = { a: [1,2], b: [1,2,3] };
var a2 = { a: [5,4,3], b: [5,4,3,2] };
var expected = {
$set: {
a: [5,4,3],
b: [5,4,3,2]
}
};

diff(a1, a2).should.eql(expected);
});

it("should correctly handle arrays which can be partially modified", function() {
var a1 = { a: [1,2,3,4], b: [1,2,3,4] };
var a2 = { a: [1,2,5,4,5], b: [1,2,5,4,5,6] };
var expected = {
$set: {
'a.2': 5,
'a.4': 5,
'b.2': 5,
'b.4': 5,
'b.5': 6
}
};

diff(a1, a2).should.eql(expected);
});

it("should correctly diff array elements as objects", function() {
var postDate = new Date();
var a1 = { comments: [
{ id: 1, title: 'Title 1', text: 'test text 1', posted: postDate },
{ id: 2, title: 'Title 2', text: 'test text 2', posted: postDate },
{ id: 3, title: 'Title 3', text: 'test text 3', posted: postDate }
]};

var newDate = new Date(postDate.getTime() + 50);
var a2 = { comments: [
{ id: 1, title: 'Title 1', text: 'tested text 1', posted: postDate },
{ id: 2, title: 'Title 2', text: 'tested text 2', posted: postDate },
{ id: 3, title: 'Title 3', text: 'test text 3', posted: newDate }
]};

var expected = {
$set: {
'comments.0.text': 'tested text 1',
'comments.1.text': 'tested text 2',
'comments.2.posted': newDate
}
};

diff(a1, a2).should.eql(expected);
});
});
});

0 comments on commit ab9eb07

Please sign in to comment.