Skip to content

Commit

Permalink
Implement improvement for diffing indents for JSON
Browse files Browse the repository at this point in the history
  • Loading branch information
Mingun committed Apr 8, 2018
1 parent 0edb212 commit 7301f03
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 28 deletions.
32 changes: 24 additions & 8 deletions lib/diff.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,28 @@ class Diff {
/**
* Creates diffs for given `expected` and `actual` strings.
*
* Supported options:
* - boolean `ignoreWhitespace`: ignores whitespace changes in lines. Default: `false`
* - boolean `newlineIsToken`: if `true`, each symbol `\n` or sequence `\r\n` considered
* as separate token, otherwise it includes as part in preceding token. Default: `false`
* - boolean `ignoreCase`: if `true`, tokens are compared case-insensitive. Default: `false`
* - function(String, String) -> boolean `comparator`: function for compare tokens. Default: none
* - Object|String `hint`: The hint to diff algorithm with what type of objects it works.
* Can be or string with type name or object `{ type: String, ... }` with type and additional
* options dependent on type. Default: none
* For now only one type is known with one additional option:
* - `{ type: 'json', indent: Number }`. Parameter `indent` specify intendation size, used
* for stringification JSON. This hint will be used by diff algorithm in case of
* computation of differences in indents
*
* @api public
* @param {String} expected Base string which will be show as removed part of diff
* @param {String} actual New string which will be showed as added part of diff
* @param {Object?} options The object containing options for a diff algorithm.
* Supported options:
* - boolean `ignoreWhitespace`: ignores whitespace changes in lines. Default: `false`
* - boolean `newlineIsToken`: if `true`, each symbol `\n` or sequence `\r\n` considered
* as separate token, otherwise it includes as part in preceding token. Default: `false`
* - boolean `ignoreCase`: if `true`, tokens are compared case-insensitive. Default: `false`
* - function(String, String) -> boolean `comparator`: function for compare tokens. Default: none
*/
constructor(expected, actual, options) {
this._rawDiff = textDiff.diffLines(expected, actual, options);
this.options = options || {};
}

/**
Expand All @@ -41,7 +50,7 @@ class Diff {
unified(context) {
// istanbul ignore else Caching not tested
if (!this._unified) {
this._unified = unified(this._rawDiff);
this._unified = unified(this._rawDiff, this.indent);
}
return context === undefined ? this._unified : hunks(this._unified, context);
}
Expand All @@ -63,10 +72,17 @@ class Diff {
inline(context) {
// istanbul ignore else Caching not tested
if (!this._inline) {
this._inline = inline(this._rawDiff);
this._inline = inline(this._rawDiff, this.indent);
}
return context === undefined ? this._inline : hunks(this._inline, context);
}

get indent() {
if (this.options.hint && this.options.hint.type === 'json') {
return this.options.hint.indent;
}
return undefined;
}
}

module.exports = Diff;
16 changes: 13 additions & 3 deletions lib/generate.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,29 @@ function stringify(obj) {

/**
* Creates diffs for given error by comparing `expected` and `actual` converted to strings.
* Stringification performed only if or `expected`, or `actual` or both are not strings.
* It is stable (always produce the same results for the same objects) and can handle
* recursive structures.
*
* Currently stringification performed with [`safe-stable-stringify`][1] library.
*
* [1]: https://www.npmjs.com/package/safe-stable-stringify
*
* @api public
* @param {Object} expected Base object which will be show as removed part of diff
* @param {Object} actual New object which will be showed as added part of diff
* @return {Object} Generated structured diff
* @param {Object?} options The object containing options for a diff algorithm.
* See documentation for `Diff` class for supported options
* @return {Object[]} Generated structured diff
*/
function generateDiff(expected, actual) {
function generateDiff(expected, actual, options) {
if (!isString(expected) || !isString(actual)) {
expected = stringify(expected);
actual = stringify(actual);
options = Object.assign({}, options, { hint: { type: 'json', indent: 2 } });
}

return new Diff(expected, actual);
return new Diff(expected, actual, options);
}

module.exports = generateDiff;
12 changes: 7 additions & 5 deletions lib/inline.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ function split(value) {
return tokens;
}

function fillInlineChanges(result, del, ins) {
function fillInlineChanges(result, del, ins, indent) {
let inlineDiff = wordDiff.diff(
del.join('\n'),
ins.join('\n')
ins.join('\n'),
{ indent }
);

// Changes in each line
Expand Down Expand Up @@ -53,9 +54,10 @@ function fillInlineChanges(result, del, ins) {
*
* @api private
* @param {Object[]} lines Array with changes
* @param {number?} indent If specified, whitespaces create tokens of no more specified length
* @return {Object[]} Array with inline changes of each line
*/
function inline(lines) {
function inline(lines, indent) {
let result = [];
let del = [];// texts of deleted lines
let ins = [];// texts of inserted lines
Expand All @@ -71,7 +73,7 @@ function inline(lines) {
push.apply(ins, text);
} else {
if (kind !== lastKind) {
fillInlineChanges(result, del, ins);
fillInlineChanges(result, del, ins, indent);
del = [];
ins = [];
}
Expand All @@ -82,7 +84,7 @@ function inline(lines) {
lastKind = kind;
}
if (del.length > 0 || ins.length > 0) {
fillInlineChanges(result, del, ins);
fillInlineChanges(result, del, ins, indent);
}

return result;
Expand Down
11 changes: 6 additions & 5 deletions lib/unified.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ function append(arr, i, inlineChange) {
arr[arr.length - 1].push(inlineChange);
}

function fillInlineChanges(result, del, ins) {
function fillInlineChanges(result, del, ins, indent) {
let inlineDiff = wordDiff.diff(
del.join('\n'),
ins.join('\n')
ins.join('\n'),
{ indent }
);

// Changes in each line
Expand Down Expand Up @@ -76,7 +77,7 @@ function fillInlineChanges(result, del, ins) {
fill(result, '+', insChanges);
}

function unified(lines) {
function unified(lines, indent) {
let result = [];
let del = [];// texts of deleted lines
let ins = [];// texts of inserted lines
Expand All @@ -92,7 +93,7 @@ function unified(lines) {
push.apply(ins, text);
} else {
if (kind !== lastKind) {
fillInlineChanges(result, del, ins);
fillInlineChanges(result, del, ins, indent);
del = [];
ins = [];
}
Expand All @@ -103,7 +104,7 @@ function unified(lines) {
lastKind = kind;
}
if (del.length > 0 || ins.length > 0) {
fillInlineChanges(result, del, ins);
fillInlineChanges(result, del, ins, indent);
}
return result;
}
Expand Down
18 changes: 11 additions & 7 deletions lib/word-diff.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,30 @@ let diff = require('diff');
// Based on https://en.wikipedia.org/wiki/Latin_script_in_Unicode
//
// Ranges and exceptions:
// Latin-1 Supplement, 0080–00FF
// Latin-1 Supplement, 0080–00FF
// - U+00D7 ? Multiplication sign
// - U+00F7 ? Division sign
// Latin Extended-A, 0100–017F
// Latin Extended-B, 0180–024F
// IPA Extensions, 0250–02AF
// Spacing Modifier Letters, 02B0–02FF
// Latin Extended-A, 0100–017F
// Latin Extended-B, 0180–024F
// IPA Extensions, 0250–02AF
// Spacing Modifier Letters, 02B0–02FF
// - U+02C7 ? ˇ Caron
// - U+02D8 ? ˘ Breve
// - U+02D9 ? ˙ Dot Above
// - U+02DA ? ˚ Ring Above
// - U+02DB ? ˛ Ogonek
// - U+02DC ? ˜ Small Tilde
// - U+02DD ? ˝ Double Acute Accent
// Latin Extended Additional, 1E00–1EFF
// Latin Extended Additional, 1E00–1EFF
const RE_WORDS_TO_MERGE = /^[A-Za-z\xC0-\u02C6\u02C8-\u02D7\u02DE-\u02FF\u1E00-\u1EFF]+$/;

let wordDiff = new diff.Diff();
wordDiff.tokenize = function(value) {
let tokens = value.split(/([^\S\n\r]+|[\r\n()[\]{}"';,]|\b)/);
let repeat = this.options.indent
? '{1,' + this.options.indent + '}'
: '+';
let re = new RegExp('([^\\S\\r\\n]' + repeat + '|[\\r\\n()[\\]{}"\';,]|\\b)');
let tokens = value.split(re);
// Join the boundary splits that we do not consider to be boundaries. This is primarily the extended Latin character set.
for (let i = 0; i < tokens.length - 1; ++i) {
// If we have an empty string in the next field and we have only word chars before and after, merge
Expand Down

0 comments on commit 7301f03

Please sign in to comment.