Implement improvement for diffing indents for JSON

Mingun · Apr 8, 2018 · 7301f03 · 7301f03
1 parent 0edb212
commit 7301f03
Show file tree

Hide file tree

Showing 5 changed files with 61 additions and 28 deletions.
diff --git a/lib/diff.js b/lib/diff.js
@@ -9,19 +9,28 @@ class Diff {
   /**
    * Creates diffs for given `expected` and `actual` strings.
    *
+   * Supported options:
+   * - boolean `ignoreWhitespace`: ignores whitespace changes in lines. Default: `false`
+   * - boolean `newlineIsToken`: if `true`, each symbol `\n` or sequence `\r\n` considered
+   *   as separate token, otherwise it includes as part in preceding token. Default: `false`
+   * - boolean `ignoreCase`: if `true`, tokens are compared case-insensitive. Default: `false`
+   * - function(String, String) -> boolean `comparator`: function for compare tokens. Default: none
+   * - Object|String `hint`: The hint to diff algorithm with what type of objects it works.
+   *   Can be or string with type name or object `{ type: String, ... }` with type and additional
+   *   options dependent on type. Default: none
+   *   For now only one type is known with one additional option:
+   *   - `{ type: 'json', indent: Number }`. Parameter `indent` specify intendation size, used
+   *     for stringification JSON. This hint will be used by diff algorithm in case of
+   *     computation of differences in indents
+   *
    * @api public
    * @param {String} expected Base string which will be show as removed part of diff
    * @param {String} actual New string which will be showed as added part of diff
    * @param {Object?} options The object containing options for a diff algorithm.
-   *        Supported options:
-   *        - boolean `ignoreWhitespace`: ignores whitespace changes in lines. Default: `false`
-   *        - boolean `newlineIsToken`: if `true`, each symbol `\n` or sequence `\r\n` considered
-   *          as separate token, otherwise it includes as part in preceding token. Default: `false`
-   *        - boolean `ignoreCase`: if `true`, tokens are compared case-insensitive. Default: `false`
-   *        - function(String, String) -> boolean `comparator`: function for compare tokens. Default: none
    */
   constructor(expected, actual, options) {
     this._rawDiff = textDiff.diffLines(expected, actual, options);
+    this.options = options || {};
   }
 
   /**
@@ -41,7 +50,7 @@ class Diff {
   unified(context) {
     // istanbul ignore else Caching not tested
     if (!this._unified) {
-      this._unified = unified(this._rawDiff);
+      this._unified = unified(this._rawDiff, this.indent);
     }
     return context === undefined ? this._unified : hunks(this._unified, context);
   }
@@ -63,10 +72,17 @@ class Diff {
   inline(context) {
     // istanbul ignore else Caching not tested
     if (!this._inline) {
-      this._inline = inline(this._rawDiff);
+      this._inline = inline(this._rawDiff, this.indent);
     }
     return context === undefined ? this._inline : hunks(this._inline, context);
   }
+
+  get indent() {
+    if (this.options.hint && this.options.hint.type === 'json') {
+      return this.options.hint.indent;
+    }
+    return undefined;
+  }
 }
 
 module.exports = Diff;
diff --git a/lib/generate.js b/lib/generate.js
@@ -15,19 +15,29 @@ function stringify(obj) {
 
 /**
  * Creates diffs for given error by comparing `expected` and `actual` converted to strings.
+ * Stringification performed only if or `expected`, or `actual` or both are not strings.
+ * It is stable (always produce the same results for the same objects) and can handle
+ * recursive structures.
+ *
+ * Currently stringification performed with [`safe-stable-stringify`][1] library.
+ *
+ * [1]: https://www.npmjs.com/package/safe-stable-stringify
  *
  * @api public
  * @param {Object} expected Base object which will be show as removed part of diff
  * @param {Object} actual New object which will be showed as added part of diff
- * @return {Object} Generated structured diff
+ * @param {Object?} options The object containing options for a diff algorithm.
+ *        See documentation for `Diff` class for supported options
+ * @return {Object[]} Generated structured diff
  */
-function generateDiff(expected, actual) {
+function generateDiff(expected, actual, options) {
   if (!isString(expected) || !isString(actual)) {
     expected = stringify(expected);
     actual   = stringify(actual);
+    options  = Object.assign({}, options, { hint: { type: 'json', indent: 2 } });
   }
 
-  return new Diff(expected, actual);
+  return new Diff(expected, actual, options);
 }
 
 module.exports = generateDiff;
diff --git a/lib/inline.js b/lib/inline.js
@@ -13,10 +13,11 @@ function split(value) {
   return tokens;
 }
 
-function fillInlineChanges(result, del, ins) {
+function fillInlineChanges(result, del, ins, indent) {
   let inlineDiff = wordDiff.diff(
     del.join('\n'),
-    ins.join('\n')
+    ins.join('\n'),
+    { indent }
   );
 
   // Changes in each line
@@ -53,9 +54,10 @@ function fillInlineChanges(result, del, ins) {
  *
  * @api private
  * @param {Object[]} lines Array with changes
+ * @param {number?} indent If specified, whitespaces create tokens of no more specified length
  * @return {Object[]} Array with inline changes of each line
  */
-function inline(lines) {
+function inline(lines, indent) {
   let result = [];
   let del = [];// texts of deleted lines
   let ins = [];// texts of inserted lines
@@ -71,7 +73,7 @@ function inline(lines) {
       push.apply(ins, text);
     } else {
       if (kind !== lastKind) {
-        fillInlineChanges(result, del, ins);
+        fillInlineChanges(result, del, ins, indent);
         del = [];
         ins = [];
       }
@@ -82,7 +84,7 @@ function inline(lines) {
     lastKind = kind;
   }
   if (del.length > 0 || ins.length > 0) {
-    fillInlineChanges(result, del, ins);
+    fillInlineChanges(result, del, ins, indent);
   }
 
   return result;

diff --git a/lib/unified.js b/lib/unified.js
@@ -44,10 +44,11 @@ function append(arr, i, inlineChange) {
   arr[arr.length - 1].push(inlineChange);
 }
 
-function fillInlineChanges(result, del, ins) {
+function fillInlineChanges(result, del, ins, indent) {
   let inlineDiff = wordDiff.diff(
     del.join('\n'),
-    ins.join('\n')
+    ins.join('\n'),
+    { indent }
   );
 
   // Changes in each line
@@ -76,7 +77,7 @@ function fillInlineChanges(result, del, ins) {
   fill(result, '+', insChanges);
 }
 
-function unified(lines) {
+function unified(lines, indent) {
   let result = [];
   let del = [];// texts of deleted lines
   let ins = [];// texts of inserted lines
@@ -92,7 +93,7 @@ function unified(lines) {
       push.apply(ins, text);
     } else {
       if (kind !== lastKind) {
-        fillInlineChanges(result, del, ins);
+        fillInlineChanges(result, del, ins, indent);
         del = [];
         ins = [];
       }
@@ -103,7 +104,7 @@ function unified(lines) {
     lastKind = kind;
   }
   if (del.length > 0 || ins.length > 0) {
-    fillInlineChanges(result, del, ins);
+    fillInlineChanges(result, del, ins, indent);
   }
   return result;
 }

diff --git a/lib/word-diff.js b/lib/word-diff.js
@@ -4,26 +4,30 @@ let diff = require('diff');
 // Based on https://en.wikipedia.org/wiki/Latin_script_in_Unicode
 //
 // Ranges and exceptions:
-// Latin-1 Supplement, 0080–00FF
+// Latin-1 Supplement, 0080–00FF
 //  - U+00D7  ? Multiplication sign
 //  - U+00F7  ? Division sign
-// Latin Extended-A, 0100–017F
-// Latin Extended-B, 0180–024F
-// IPA Extensions, 0250–02AF
-// Spacing Modifier Letters, 02B0–02FF
+// Latin Extended-A, 0100–017F
+// Latin Extended-B, 0180–024F
+// IPA Extensions, 0250–02AF
+// Spacing Modifier Letters, 02B0–02FF
 //  - U+02C7  ? &#711;  Caron
 //  - U+02D8  ? &#728;  Breve
 //  - U+02D9  ? &#729;  Dot Above
 //  - U+02DA  ? &#730;  Ring Above
 //  - U+02DB  ? &#731;  Ogonek
 //  - U+02DC  ? &#732;  Small Tilde
 //  - U+02DD  ? &#733;  Double Acute Accent
-// Latin Extended Additional, 1E00–1EFF
+// Latin Extended Additional, 1E00–1EFF
 const RE_WORDS_TO_MERGE = /^[A-Za-z\xC0-\u02C6\u02C8-\u02D7\u02DE-\u02FF\u1E00-\u1EFF]+$/;
 
 let wordDiff = new diff.Diff();
 wordDiff.tokenize = function(value) {
-  let tokens = value.split(/([^\S\n\r]+|[\r\n()[\]{}"';,]|\b)/);
+  let repeat = this.options.indent
+    ? '{1,' + this.options.indent + '}'
+    : '+';
+  let re = new RegExp('([^\\S\\r\\n]' + repeat + '|[\\r\\n()[\\]{}"\';,]|\\b)');
+  let tokens = value.split(re);
   // Join the boundary splits that we do not consider to be boundaries. This is primarily the extended Latin character set.
   for (let i = 0; i < tokens.length - 1; ++i) {
     // If we have an empty string in the next field and we have only word chars before and after, merge