From 6c4c7052c7793e63cc5bdd70d84930be883c6657 Mon Sep 17 00:00:00 2001 From: Tomas Slusny Date: Wed, 19 Nov 2025 04:30:05 +0100 Subject: [PATCH] refactor(diff): remove diff-match-patch dependency Remove the bundled diff-match-patch Lua port and replace its usage in the diff utility with a simpler, line-based matching and replacement algorithm. This reduces code complexity and removes a large vendored file. The new approach uses context-aware line matching to apply hunks, which should be sufficient for the plugin's needs. Closes #1490 Signed-off-by: Tomas Slusny --- README.md | 8 - lua/CopilotChat/utils/diff.lua | 127 +- lua/CopilotChat/vendor/diff_match_patch.lua | 2085 ------------------- tests/diff_spec.lua | 75 + 4 files changed, 156 insertions(+), 2139 deletions(-) delete mode 100644 lua/CopilotChat/vendor/diff_match_patch.lua diff --git a/README.md b/README.md index ffdbb60b..89607cbc 100644 --- a/README.md +++ b/README.md @@ -523,14 +523,6 @@ make test See [CONTRIBUTING.md](/CONTRIBUTING.md) for detailed guidelines. -# Acknowledgments - -## diff-match-patch - -CopilotChat.nvim includes [diff-match-patch (Lua port)](https://github.com/google/diff-match-patch) for diffing and patching functionality. -Copyright 2018 The diff-match-patch Authors. -Licensed under the Apache License 2.0. - # Contributors Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): diff --git a/lua/CopilotChat/utils/diff.lua b/lua/CopilotChat/utils/diff.lua index 17eb3091..51cf1887 100644 --- a/lua/CopilotChat/utils/diff.lua +++ b/lua/CopilotChat/utils/diff.lua @@ -40,55 +40,81 @@ local function parse_hunks(diff_text) return hunks end ---- Apply a single hunk to content, with fallback/context logic +--- Try to match old_snippet in lines starting at approximate start_line +---@param lines table +---@param old_snippet table +---@param approx_start number +---@param search_range number +---@return number? matched_start +local function find_best_match(lines, old_snippet, approx_start, search_range) + local best_idx, best_score = nil, -1 + local old_len = #old_snippet + + if old_len == 0 then + return approx_start + end + + local min_start = math.max(1, approx_start - search_range) + local max_start = math.min(#lines - old_len + 1, approx_start + search_range) + + for start_idx = min_start, max_start do + local score = 0 + for i = 1, old_len do + if vim.trim(lines[start_idx + i - 1] or '') == vim.trim(old_snippet[i] or '') then + score = score + 1 + end + end + + if score > best_score then + best_score = score + best_idx = start_idx + end + + if score == old_len then + return best_idx + end + end + + if best_score >= math.ceil(old_len * 0.8) then + return best_idx + end + + return nil +end + +--- Apply a single hunk to content ---@param hunk table ---@param content string ---@return string patched_content, boolean applied_cleanly local function apply_hunk(hunk, content) - local dmp = require('CopilotChat.vendor.diff_match_patch') - local patch = dmp.patch_make(table.concat(hunk.old_snippet, '\n'), table.concat(hunk.new_snippet, '\n')) - - -- First try: direct application - local patched, results = dmp.patch_apply(patch, content) - if not vim.tbl_contains(results, false) then - return patched, true - end - - -- Fallback: direct replacement local lines = vim.split(content, '\n') - local insert_idx = hunk.start_old or 1 - if not hunk.start_old then - -- No starting point, try to find best match - local match_idx, best_score = nil, -1 - local context_lines = vim.tbl_filter(function(line) - return line and line ~= '' - end, hunk.old_snippet) - local context_len = #context_lines - if context_len > 0 then - for i = 1, #lines - context_len + 1 do - local score = 0 - for j = 1, context_len do - if vim.trim(lines[i + j - 1] or '') == vim.trim(context_lines[j] or '') then - score = score + 1 - end - end - if score > best_score then - best_score = score - match_idx = i - end - end + local start_idx = hunk.start_old + + -- If we have a start line hint, try to find best match within +/- 2 lines + if start_idx and start_idx > 0 and start_idx <= #lines then + local match_idx = find_best_match(lines, hunk.old_snippet, start_idx, 2) + if match_idx then + start_idx = match_idx end - if best_score > 0 and match_idx then - insert_idx = match_idx + else + -- No valid start line, search for best match in whole content + local match_idx = find_best_match(lines, hunk.old_snippet, 1, #lines) + if match_idx then + start_idx = match_idx + else + start_idx = 1 end end - local start_idx = insert_idx - local end_idx = insert_idx + #hunk.old_snippet + -- Replace old lines with new lines + local end_idx = start_idx + #hunk.old_snippet - 1 local new_lines = vim.list_slice(lines, 1, start_idx - 1) vim.list_extend(new_lines, hunk.new_snippet) vim.list_extend(new_lines, lines, end_idx + 1, #lines) - return table.concat(new_lines, '\n'), false + + -- Check if we matched exactly at the hinted position + local applied_cleanly = find_best_match(lines, hunk.old_snippet, hunk.start_old or start_idx, 0) == start_idx + return table.concat(new_lines, '\n'), applied_cleanly end --- Apply unified diff to a table of lines and return new lines @@ -104,16 +130,25 @@ function M.apply_unified_diff(diff_text, original_content) new_content = patched applied = applied or ok end - local original_lines = vim.split(original_content, '\n', { trimempty = true }) + local new_lines = vim.split(new_content, '\n', { trimempty = true }) + local hunks = vim.diff( + original_content, + new_content, + { algorithm = 'myers', ctxlen = 10, interhunkctxlen = 10, ignore_whitespace_change = true, result_type = 'indices' } + ) + if not hunks or #hunks == 0 then + return new_lines, applied, nil, nil + end local first, last - local max_len = math.max(#original_lines, #new_lines) - for i = 1, max_len do - if original_lines[i] ~= new_lines[i] then - if not first then - first = i - end - last = i + for _, hunk in ipairs(hunks) do + local hunk_start = hunk[1] + local hunk_end = hunk[1] + hunk[2] - 1 + if not first or hunk_start < first then + first = hunk_start + end + if not last or hunk_end > last then + last = hunk_end end end return new_lines, applied, first, last @@ -129,7 +164,7 @@ function M.get_diff(block, lines) return block.content, content end - local patched_lines = vim.split(block.content, '\n') + local patched_lines = vim.split(block.content, '\n', { trimempty = true }) local start_idx = block.header.start_line local end_idx = block.header.end_line local original_lines = lines diff --git a/lua/CopilotChat/vendor/diff_match_patch.lua b/lua/CopilotChat/vendor/diff_match_patch.lua deleted file mode 100644 index b2c397d0..00000000 --- a/lua/CopilotChat/vendor/diff_match_patch.lua +++ /dev/null @@ -1,2085 +0,0 @@ ---[[ -* Diff Match and Patch -* Copyright 2018 The diff-match-patch Authors. -* https://github.com/google/diff-match-patch -* -* Based on the JavaScript implementation by Neil Fraser. -* Ported to Lua by Duncan Cross. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. ---]] - -local bit = require('bit') -local band, bor, lshift = bit.band, bit.bor, bit.lshift -local type, setmetatable, ipairs, select = type, setmetatable, ipairs, select -local unpack, tonumber, error = unpack, tonumber, error -local strsub, strbyte, strchar, gmatch, gsub = string.sub, string.byte, string.char, string.gmatch, string.gsub -local strmatch, strfind, strformat = string.match, string.find, string.format -local tinsert, tremove, tconcat = table.insert, table.remove, table.concat -local max, min, floor, ceil, abs = math.max, math.min, math.floor, math.ceil, math.abs -local clock = os.clock - --- Utility functions. - -local percentEncode_pattern = "[^A-Za-z0-9%-=;',./~!@#$%&*%(%)_%+ %?]" -local function percentEncode_replace(v) - return strformat('%%%02X', strbyte(v)) -end - -local function indexOf(a, b, start) - if #b == 0 then - return nil - end - return strfind(a, b, start, true) -end - -local htmlEncode_pattern = '[&<>\n]' -local htmlEncode_replace = { - ['&'] = '&', - ['<'] = '<', - ['>'] = '>', - ['\n'] = '¶
', -} - --- Public API Functions --- (Exported at the end of the script) - -local diff_main, diff_cleanupSemantic, diff_cleanupEfficiency, diff_levenshtein, diff_prettyHtml - -local match_main - -local patch_make, patch_toText, patch_fromText, patch_apply - ---[[ -* The data structure representing a diff is an array of tuples: -* {{DIFF_DELETE, 'Hello'}, {DIFF_INSERT, 'Goodbye'}, {DIFF_EQUAL, ' world.'}} -* which means: delete 'Hello', add 'Goodbye' and keep ' world.' ---]] -local DIFF_DELETE = -1 -local DIFF_INSERT = 1 -local DIFF_EQUAL = 0 - --- Number of seconds to map a diff before giving up (0 for infinity). -local Diff_Timeout = 1.0 --- Cost of an empty edit operation in terms of edit characters. -local Diff_EditCost = 4 --- At what point is no match declared (0.0 = perfection, 1.0 = very loose). -local Match_Threshold = 0.5 --- How far to search for a match (0 = exact location, 1000+ = broad match). --- A match this many characters away from the expected location will add --- 1.0 to the score (0.0 is a perfect match). -local Match_Distance = 1000 --- When deleting a large block of text (over ~64 characters), how close do --- the contents have to be to match the expected contents. (0.0 = perfection, --- 1.0 = very loose). Note that Match_Threshold controls how closely the --- end points of a delete need to match. -local Patch_DeleteThreshold = 0.5 --- Chunk size for context length. -local Patch_Margin = 4 --- The number of bits in an int. -local Match_MaxBits = 32 - -function settings(new) - if new then - Diff_Timeout = new.Diff_Timeout or Diff_Timeout - Diff_EditCost = new.Diff_EditCost or Diff_EditCost - Match_Threshold = new.Match_Threshold or Match_Threshold - Match_Distance = new.Match_Distance or Match_Distance - Patch_DeleteThreshold = new.Patch_DeleteThreshold or Patch_DeleteThreshold - Patch_Margin = new.Patch_Margin or Patch_Margin - Match_MaxBits = new.Match_MaxBits or Match_MaxBits - else - return { - Diff_Timeout = Diff_Timeout, - Diff_EditCost = Diff_EditCost, - Match_Threshold = Match_Threshold, - Match_Distance = Match_Distance, - Patch_DeleteThreshold = Patch_DeleteThreshold, - Patch_Margin = Patch_Margin, - Match_MaxBits = Match_MaxBits, - } - end -end - --- --------------------------------------------------------------------------- --- DIFF API --- --------------------------------------------------------------------------- - --- The private diff functions -local _diff_compute, _diff_bisect, _diff_halfMatchI, _diff_halfMatch, _diff_cleanupSemanticScore, _diff_cleanupSemanticLossless, _diff_cleanupMerge, _diff_commonPrefix, _diff_commonSuffix, _diff_commonOverlap, _diff_xIndex, _diff_text1, _diff_text2, _diff_toDelta, _diff_fromDelta - ---[[ -* Find the differences between two texts. Simplifies the problem by stripping -* any common prefix or suffix off the texts before diffing. -* @param {string} text1 Old string to be diffed. -* @param {string} text2 New string to be diffed. -* @param {boolean} opt_checklines Has no effect in Lua. -* @param {number} opt_deadline Optional time when the diff should be complete -* by. Used internally for recursive calls. Users should set DiffTimeout -* instead. -* @return {Array.>} Array of diff tuples. ---]] -function diff_main(text1, text2, opt_checklines, opt_deadline) - -- Set a deadline by which time the diff must be complete. - if opt_deadline == nil then - if Diff_Timeout <= 0 then - opt_deadline = 2 ^ 31 - else - opt_deadline = clock() + Diff_Timeout - end - end - local deadline = opt_deadline - - -- Check for null inputs. - if text1 == nil or text1 == nil then - error('Null inputs. (diff_main)') - end - - -- Check for equality (speedup). - if text1 == text2 then - if #text1 > 0 then - return { { DIFF_EQUAL, text1 } } - end - return {} - end - - -- LUANOTE: Due to the lack of Unicode support, Lua is incapable of - -- implementing the line-mode speedup. - local checklines = false - - -- Trim off common prefix (speedup). - local commonlength = _diff_commonPrefix(text1, text2) - local commonprefix - if commonlength > 0 then - commonprefix = strsub(text1, 1, commonlength) - text1 = strsub(text1, commonlength + 1) - text2 = strsub(text2, commonlength + 1) - end - - -- Trim off common suffix (speedup). - commonlength = _diff_commonSuffix(text1, text2) - local commonsuffix - if commonlength > 0 then - commonsuffix = strsub(text1, -commonlength) - text1 = strsub(text1, 1, -commonlength - 1) - text2 = strsub(text2, 1, -commonlength - 1) - end - - -- Compute the diff on the middle block. - local diffs = _diff_compute(text1, text2, checklines, deadline) - - -- Restore the prefix and suffix. - if commonprefix then - tinsert(diffs, 1, { DIFF_EQUAL, commonprefix }) - end - if commonsuffix then - diffs[#diffs + 1] = { DIFF_EQUAL, commonsuffix } - end - - _diff_cleanupMerge(diffs) - return diffs -end - ---[[ -* Reduce the number of edits by eliminating semantically trivial equalities. -* @param {Array.>} diffs Array of diff tuples. ---]] -function diff_cleanupSemantic(diffs) - local changes = false - local equalities = {} -- Stack of indices where equalities are found. - local equalitiesLength = 0 -- Keeping our own length var is faster. - local lastEquality = nil - -- Always equal to diffs[equalities[equalitiesLength]][2] - local pointer = 1 -- Index of current position. - -- Number of characters that changed prior to the equality. - local length_insertions1 = 0 - local length_deletions1 = 0 - -- Number of characters that changed after the equality. - local length_insertions2 = 0 - local length_deletions2 = 0 - - while diffs[pointer] do - if diffs[pointer][1] == DIFF_EQUAL then -- Equality found. - equalitiesLength = equalitiesLength + 1 - equalities[equalitiesLength] = pointer - length_insertions1 = length_insertions2 - length_deletions1 = length_deletions2 - length_insertions2 = 0 - length_deletions2 = 0 - lastEquality = diffs[pointer][2] - else -- An insertion or deletion. - if diffs[pointer][1] == DIFF_INSERT then - length_insertions2 = length_insertions2 + #diffs[pointer][2] - else - length_deletions2 = length_deletions2 + #diffs[pointer][2] - end - -- Eliminate an equality that is smaller or equal to the edits on both - -- sides of it. - if - lastEquality - and (#lastEquality <= max(length_insertions1, length_deletions1)) - and (#lastEquality <= max(length_insertions2, length_deletions2)) - then - -- Duplicate record. - tinsert(diffs, equalities[equalitiesLength], { DIFF_DELETE, lastEquality }) - -- Change second copy to insert. - diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT - -- Throw away the equality we just deleted. - equalitiesLength = equalitiesLength - 1 - -- Throw away the previous equality (it needs to be reevaluated). - equalitiesLength = equalitiesLength - 1 - pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0 - length_insertions1, length_deletions1 = 0, 0 -- Reset the counters. - length_insertions2, length_deletions2 = 0, 0 - lastEquality = nil - changes = true - end - end - pointer = pointer + 1 - end - - -- Normalize the diff. - if changes then - _diff_cleanupMerge(diffs) - end - _diff_cleanupSemanticLossless(diffs) - - -- Find any overlaps between deletions and insertions. - -- e.g: abcxxxxxxdef - -- -> abcxxxdef - -- e.g: xxxabcdefxxx - -- -> defxxxabc - -- Only extract an overlap if it is as big as the edit ahead or behind it. - pointer = 2 - while diffs[pointer] do - if diffs[pointer - 1][1] == DIFF_DELETE and diffs[pointer][1] == DIFF_INSERT then - local deletion = diffs[pointer - 1][2] - local insertion = diffs[pointer][2] - local overlap_length1 = _diff_commonOverlap(deletion, insertion) - local overlap_length2 = _diff_commonOverlap(insertion, deletion) - if overlap_length1 >= overlap_length2 then - if overlap_length1 >= #deletion / 2 or overlap_length1 >= #insertion / 2 then - -- Overlap found. Insert an equality and trim the surrounding edits. - tinsert(diffs, pointer, { DIFF_EQUAL, strsub(insertion, 1, overlap_length1) }) - diffs[pointer - 1][2] = strsub(deletion, 1, #deletion - overlap_length1) - diffs[pointer + 1][2] = strsub(insertion, overlap_length1 + 1) - pointer = pointer + 1 - end - else - if overlap_length2 >= #deletion / 2 or overlap_length2 >= #insertion / 2 then - -- Reverse overlap found. - -- Insert an equality and swap and trim the surrounding edits. - tinsert(diffs, pointer, { DIFF_EQUAL, strsub(deletion, 1, overlap_length2) }) - diffs[pointer - 1] = { DIFF_INSERT, strsub(insertion, 1, #insertion - overlap_length2) } - diffs[pointer + 1] = { DIFF_DELETE, strsub(deletion, overlap_length2 + 1) } - pointer = pointer + 1 - end - end - pointer = pointer + 1 - end - pointer = pointer + 1 - end -end - ---[[ -* Reduce the number of edits by eliminating operationally trivial equalities. -* @param {Array.>} diffs Array of diff tuples. ---]] -function diff_cleanupEfficiency(diffs) - local changes = false - -- Stack of indices where equalities are found. - local equalities = {} - -- Keeping our own length var is faster. - local equalitiesLength = 0 - -- Always equal to diffs[equalities[equalitiesLength]][2] - local lastEquality = nil - -- Index of current position. - local pointer = 1 - - -- The following four are really booleans but are stored as numbers because - -- they are used at one point like this: - -- - -- (pre_ins + pre_del + post_ins + post_del) == 3 - -- - -- ...i.e. checking that 3 of them are true and 1 of them is false. - - -- Is there an insertion operation before the last equality. - local pre_ins = 0 - -- Is there a deletion operation before the last equality. - local pre_del = 0 - -- Is there an insertion operation after the last equality. - local post_ins = 0 - -- Is there a deletion operation after the last equality. - local post_del = 0 - - while diffs[pointer] do - if diffs[pointer][1] == DIFF_EQUAL then -- Equality found. - local diffText = diffs[pointer][2] - if (#diffText < Diff_EditCost) and (post_ins == 1 or post_del == 1) then - -- Candidate found. - equalitiesLength = equalitiesLength + 1 - equalities[equalitiesLength] = pointer - pre_ins, pre_del = post_ins, post_del - lastEquality = diffText - else - -- Not a candidate, and can never become one. - equalitiesLength = 0 - lastEquality = nil - end - post_ins, post_del = 0, 0 - else -- An insertion or deletion. - if diffs[pointer][1] == DIFF_DELETE then - post_del = 1 - else - post_ins = 1 - end - --[[ - * Five types to be split: - * ABXYCD - * AXCD - * ABXC - * AXCD - * ABXC - --]] - if - lastEquality - and ( - (pre_ins + pre_del + post_ins + post_del == 4) - or ((#lastEquality < Diff_EditCost / 2) and (pre_ins + pre_del + post_ins + post_del == 3)) - ) - then - -- Duplicate record. - tinsert(diffs, equalities[equalitiesLength], { DIFF_DELETE, lastEquality }) - -- Change second copy to insert. - diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT - -- Throw away the equality we just deleted. - equalitiesLength = equalitiesLength - 1 - lastEquality = nil - if (pre_ins == 1) and (pre_del == 1) then - -- No changes made which could affect previous entry, keep going. - post_ins, post_del = 1, 1 - equalitiesLength = 0 - else - -- Throw away the previous equality. - equalitiesLength = equalitiesLength - 1 - pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0 - post_ins, post_del = 0, 0 - end - changes = true - end - end - pointer = pointer + 1 - end - - if changes then - _diff_cleanupMerge(diffs) - end -end - ---[[ -* Compute the Levenshtein distance; the number of inserted, deleted or -* substituted characters. -* @param {Array.>} diffs Array of diff tuples. -* @return {number} Number of changes. ---]] -function diff_levenshtein(diffs) - local levenshtein = 0 - local insertions, deletions = 0, 0 - for x, diff in ipairs(diffs) do - local op, data = diff[1], diff[2] - if op == DIFF_INSERT then - insertions = insertions + #data - elseif op == DIFF_DELETE then - deletions = deletions + #data - elseif op == DIFF_EQUAL then - -- A deletion and an insertion is one substitution. - levenshtein = levenshtein + max(insertions, deletions) - insertions = 0 - deletions = 0 - end - end - levenshtein = levenshtein + max(insertions, deletions) - return levenshtein -end - ---[[ -* Convert a diff array into a pretty HTML report. -* @param {Array.>} diffs Array of diff tuples. -* @return {string} HTML representation. ---]] -function diff_prettyHtml(diffs) - local html = {} - for x, diff in ipairs(diffs) do - local op = diff[1] -- Operation (insert, delete, equal) - local data = diff[2] -- Text of change. - local text = gsub(data, htmlEncode_pattern, htmlEncode_replace) - if op == DIFF_INSERT then - html[x] = '' .. text .. '' - elseif op == DIFF_DELETE then - html[x] = '' .. text .. '' - elseif op == DIFF_EQUAL then - html[x] = '' .. text .. '' - end - end - return tconcat(html) -end - --- --------------------------------------------------------------------------- --- UNOFFICIAL/PRIVATE DIFF FUNCTIONS --- --------------------------------------------------------------------------- - ---[[ -* Find the differences between two texts. Assumes that the texts do not -* have any common prefix or suffix. -* @param {string} text1 Old string to be diffed. -* @param {string} text2 New string to be diffed. -* @param {boolean} checklines Has no effect in Lua. -* @param {number} deadline Time when the diff should be complete by. -* @return {Array.>} Array of diff tuples. -* @private ---]] -function _diff_compute(text1, text2, checklines, deadline) - if #text1 == 0 then - -- Just add some text (speedup). - return { { DIFF_INSERT, text2 } } - end - - if #text2 == 0 then - -- Just delete some text (speedup). - return { { DIFF_DELETE, text1 } } - end - - local diffs - - local longtext = (#text1 > #text2) and text1 or text2 - local shorttext = (#text1 > #text2) and text2 or text1 - local i = indexOf(longtext, shorttext) - - if i ~= nil then - -- Shorter text is inside the longer text (speedup). - diffs = { - { DIFF_INSERT, strsub(longtext, 1, i - 1) }, - { DIFF_EQUAL, shorttext }, - { DIFF_INSERT, strsub(longtext, i + #shorttext) }, - } - -- Swap insertions for deletions if diff is reversed. - if #text1 > #text2 then - diffs[1][1], diffs[3][1] = DIFF_DELETE, DIFF_DELETE - end - return diffs - end - - if #shorttext == 1 then - -- Single character string. - -- After the previous speedup, the character can't be an equality. - return { { DIFF_DELETE, text1 }, { DIFF_INSERT, text2 } } - end - - -- Check to see if the problem can be split in two. - do - local text1_a, text1_b, text2_a, text2_b, mid_common = _diff_halfMatch(text1, text2) - - if text1_a then - -- A half-match was found, sort out the return data. - -- Send both pairs off for separate processing. - local diffs_a = diff_main(text1_a, text2_a, checklines, deadline) - local diffs_b = diff_main(text1_b, text2_b, checklines, deadline) - -- Merge the results. - local diffs_a_len = #diffs_a - diffs = diffs_a - diffs[diffs_a_len + 1] = { DIFF_EQUAL, mid_common } - for i, b_diff in ipairs(diffs_b) do - diffs[diffs_a_len + 1 + i] = b_diff - end - return diffs - end - end - - return _diff_bisect(text1, text2, deadline) -end - ---[[ -* Find the 'middle snake' of a diff, split the problem in two -* and return the recursively constructed diff. -* See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. -* @param {string} text1 Old string to be diffed. -* @param {string} text2 New string to be diffed. -* @param {number} deadline Time at which to bail if not yet complete. -* @return {Array.>} Array of diff tuples. -* @private ---]] -function _diff_bisect(text1, text2, deadline) - -- Cache the text lengths to prevent multiple calls. - local text1_length = #text1 - local text2_length = #text2 - local _sub, _element - local max_d = ceil((text1_length + text2_length) / 2) - local v_offset = max_d - local v_length = 2 * max_d - local v1 = {} - local v2 = {} - -- Setting all elements to -1 is faster in Lua than mixing integers and nil. - for x = 0, v_length - 1 do - v1[x] = -1 - v2[x] = -1 - end - v1[v_offset + 1] = 0 - v2[v_offset + 1] = 0 - local delta = text1_length - text2_length - -- If the total number of characters is odd, then - -- the front path will collide with the reverse path. - local front = (delta % 2 ~= 0) - -- Offsets for start and end of k loop. - -- Prevents mapping of space beyond the grid. - local k1start = 0 - local k1end = 0 - local k2start = 0 - local k2end = 0 - for d = 0, max_d - 1 do - -- Bail out if deadline is reached. - if clock() > deadline then - break - end - - -- Walk the front path one step. - for k1 = -d + k1start, d - k1end, 2 do - local k1_offset = v_offset + k1 - local x1 - if (k1 == -d) or ((k1 ~= d) and (v1[k1_offset - 1] < v1[k1_offset + 1])) then - x1 = v1[k1_offset + 1] - else - x1 = v1[k1_offset - 1] + 1 - end - local y1 = x1 - k1 - while (x1 <= text1_length) and (y1 <= text2_length) and (strsub(text1, x1, x1) == strsub(text2, y1, y1)) do - x1 = x1 + 1 - y1 = y1 + 1 - end - v1[k1_offset] = x1 - if x1 > text1_length + 1 then - -- Ran off the right of the graph. - k1end = k1end + 2 - elseif y1 > text2_length + 1 then - -- Ran off the bottom of the graph. - k1start = k1start + 2 - elseif front then - local k2_offset = v_offset + delta - k1 - if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] ~= -1 then - -- Mirror x2 onto top-left coordinate system. - local x2 = text1_length - v2[k2_offset] + 1 - if x1 > x2 then - -- Overlap detected. - return _diff_bisectSplit(text1, text2, x1, y1, deadline) - end - end - end - end - - -- Walk the reverse path one step. - for k2 = -d + k2start, d - k2end, 2 do - local k2_offset = v_offset + k2 - local x2 - if (k2 == -d) or ((k2 ~= d) and (v2[k2_offset - 1] < v2[k2_offset + 1])) then - x2 = v2[k2_offset + 1] - else - x2 = v2[k2_offset - 1] + 1 - end - local y2 = x2 - k2 - while (x2 <= text1_length) and (y2 <= text2_length) and (strsub(text1, -x2, -x2) == strsub(text2, -y2, -y2)) do - x2 = x2 + 1 - y2 = y2 + 1 - end - v2[k2_offset] = x2 - if x2 > text1_length + 1 then - -- Ran off the left of the graph. - k2end = k2end + 2 - elseif y2 > text2_length + 1 then - -- Ran off the top of the graph. - k2start = k2start + 2 - elseif not front then - local k1_offset = v_offset + delta - k2 - if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] ~= -1 then - local x1 = v1[k1_offset] - local y1 = v_offset + x1 - k1_offset - -- Mirror x2 onto top-left coordinate system. - x2 = text1_length - x2 + 1 - if x1 > x2 then - -- Overlap detected. - return _diff_bisectSplit(text1, text2, x1, y1, deadline) - end - end - end - end - end - -- Diff took too long and hit the deadline or - -- number of diffs equals number of characters, no commonality at all. - return { { DIFF_DELETE, text1 }, { DIFF_INSERT, text2 } } -end - ---[[ - * Given the location of the 'middle snake', split the diff in two parts - * and recurse. - * @param {string} text1 Old string to be diffed. - * @param {string} text2 New string to be diffed. - * @param {number} x Index of split point in text1. - * @param {number} y Index of split point in text2. - * @param {number} deadline Time at which to bail if not yet complete. - * @return {Array.>} Array of diff tuples. - * @private ---]] -function _diff_bisectSplit(text1, text2, x, y, deadline) - local text1a = strsub(text1, 1, x - 1) - local text2a = strsub(text2, 1, y - 1) - local text1b = strsub(text1, x) - local text2b = strsub(text2, y) - - -- Compute both diffs serially. - local diffs = diff_main(text1a, text2a, false, deadline) - local diffsb = diff_main(text1b, text2b, false, deadline) - - local diffs_len = #diffs - for i, v in ipairs(diffsb) do - diffs[diffs_len + i] = v - end - return diffs -end - ---[[ -* Determine the common prefix of two strings. -* @param {string} text1 First string. -* @param {string} text2 Second string. -* @return {number} The number of characters common to the start of each -* string. ---]] -function _diff_commonPrefix(text1, text2) - -- Quick check for common null cases. - if (#text1 == 0) or (#text2 == 0) or (strbyte(text1, 1) ~= strbyte(text2, 1)) then - return 0 - end - -- Binary search. - -- Performance analysis: https://neil.fraser.name/news/2007/10/09/ - local pointermin = 1 - local pointermax = min(#text1, #text2) - local pointermid = pointermax - local pointerstart = 1 - while pointermin < pointermid do - if strsub(text1, pointerstart, pointermid) == strsub(text2, pointerstart, pointermid) then - pointermin = pointermid - pointerstart = pointermin - else - pointermax = pointermid - end - pointermid = floor(pointermin + (pointermax - pointermin) / 2) - end - return pointermid -end - ---[[ -* Determine the common suffix of two strings. -* @param {string} text1 First string. -* @param {string} text2 Second string. -* @return {number} The number of characters common to the end of each string. ---]] -function _diff_commonSuffix(text1, text2) - -- Quick check for common null cases. - if (#text1 == 0) or (#text2 == 0) or (strbyte(text1, -1) ~= strbyte(text2, -1)) then - return 0 - end - -- Binary search. - -- Performance analysis: https://neil.fraser.name/news/2007/10/09/ - local pointermin = 1 - local pointermax = min(#text1, #text2) - local pointermid = pointermax - local pointerend = 1 - while pointermin < pointermid do - if strsub(text1, -pointermid, -pointerend) == strsub(text2, -pointermid, -pointerend) then - pointermin = pointermid - pointerend = pointermin - else - pointermax = pointermid - end - pointermid = floor(pointermin + (pointermax - pointermin) / 2) - end - return pointermid -end - ---[[ -* Determine if the suffix of one string is the prefix of another. -* @param {string} text1 First string. -* @param {string} text2 Second string. -* @return {number} The number of characters common to the end of the first -* string and the start of the second string. -* @private ---]] -function _diff_commonOverlap(text1, text2) - -- Cache the text lengths to prevent multiple calls. - local text1_length = #text1 - local text2_length = #text2 - -- Eliminate the null case. - if text1_length == 0 or text2_length == 0 then - return 0 - end - -- Truncate the longer string. - if text1_length > text2_length then - text1 = strsub(text1, text1_length - text2_length + 1) - elseif text1_length < text2_length then - text2 = strsub(text2, 1, text1_length) - end - local text_length = min(text1_length, text2_length) - -- Quick check for the worst case. - if text1 == text2 then - return text_length - end - - -- Start by looking for a single character match - -- and increase length until no match is found. - -- Performance analysis: https://neil.fraser.name/news/2010/11/04/ - local best = 0 - local length = 1 - while true do - local pattern = strsub(text1, text_length - length + 1) - local found = strfind(text2, pattern, 1, true) - if found == nil then - return best - end - length = length + found - 1 - if found == 1 or strsub(text1, text_length - length + 1) == strsub(text2, 1, length) then - best = length - length = length + 1 - end - end -end - ---[[ -* Does a substring of shorttext exist within longtext such that the substring -* is at least half the length of longtext? -* This speedup can produce non-minimal diffs. -* Closure, but does not reference any external variables. -* @param {string} longtext Longer string. -* @param {string} shorttext Shorter string. -* @param {number} i Start index of quarter length substring within longtext. -* @return {?Array.} Five element Array, containing the prefix of -* longtext, the suffix of longtext, the prefix of shorttext, the suffix -* of shorttext and the common middle. Or nil if there was no match. -* @private ---]] -function _diff_halfMatchI(longtext, shorttext, i) - -- Start with a 1/4 length substring at position i as a seed. - local seed = strsub(longtext, i, i + floor(#longtext / 4)) - local j = 0 -- LUANOTE: do not change to 1, was originally -1 - local best_common = '' - local best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b - while true do - j = indexOf(shorttext, seed, j + 1) - if j == nil then - break - end - local prefixLength = _diff_commonPrefix(strsub(longtext, i), strsub(shorttext, j)) - local suffixLength = _diff_commonSuffix(strsub(longtext, 1, i - 1), strsub(shorttext, 1, j - 1)) - if #best_common < suffixLength + prefixLength then - best_common = strsub(shorttext, j - suffixLength, j - 1) .. strsub(shorttext, j, j + prefixLength - 1) - best_longtext_a = strsub(longtext, 1, i - suffixLength - 1) - best_longtext_b = strsub(longtext, i + prefixLength) - best_shorttext_a = strsub(shorttext, 1, j - suffixLength - 1) - best_shorttext_b = strsub(shorttext, j + prefixLength) - end - end - if #best_common * 2 >= #longtext then - return { best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common } - else - return nil - end -end - ---[[ -* Do the two texts share a substring which is at least half the length of the -* longer text? -* @param {string} text1 First string. -* @param {string} text2 Second string. -* @return {?Array.} Five element Array, containing the prefix of -* text1, the suffix of text1, the prefix of text2, the suffix of -* text2 and the common middle. Or nil if there was no match. -* @private ---]] -function _diff_halfMatch(text1, text2) - if Diff_Timeout <= 0 then - -- Don't risk returning a non-optimal diff if we have unlimited time. - return nil - end - local longtext = (#text1 > #text2) and text1 or text2 - local shorttext = (#text1 > #text2) and text2 or text1 - if (#longtext < 4) or (#shorttext * 2 < #longtext) then - return nil -- Pointless. - end - - -- First check if the second quarter is the seed for a half-match. - local hm1 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 4)) - -- Check again based on the third quarter. - local hm2 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 2)) - local hm - if not hm1 and not hm2 then - return nil - elseif not hm2 then - hm = hm1 - elseif not hm1 then - hm = hm2 - else - -- Both matched. Select the longest. - hm = (#hm1[5] > #hm2[5]) and hm1 or hm2 - end - - -- A half-match was found, sort out the return data. - local text1_a, text1_b, text2_a, text2_b - if #text1 > #text2 then - text1_a, text1_b = hm[1], hm[2] - text2_a, text2_b = hm[3], hm[4] - else - text2_a, text2_b = hm[1], hm[2] - text1_a, text1_b = hm[3], hm[4] - end - local mid_common = hm[5] - return text1_a, text1_b, text2_a, text2_b, mid_common -end - ---[[ -* Given two strings, compute a score representing whether the internal -* boundary falls on logical boundaries. -* Scores range from 6 (best) to 0 (worst). -* @param {string} one First string. -* @param {string} two Second string. -* @return {number} The score. -* @private ---]] -function _diff_cleanupSemanticScore(one, two) - if (#one == 0) or (#two == 0) then - -- Edges are the best. - return 6 - end - - -- Each port of this function behaves slightly differently due to - -- subtle differences in each language's definition of things like - -- 'whitespace'. Since this function's purpose is largely cosmetic, - -- the choice has been made to use each language's native features - -- rather than force total conformity. - local char1 = strsub(one, -1) - local char2 = strsub(two, 1, 1) - local nonAlphaNumeric1 = strmatch(char1, '%W') - local nonAlphaNumeric2 = strmatch(char2, '%W') - local whitespace1 = nonAlphaNumeric1 and strmatch(char1, '%s') - local whitespace2 = nonAlphaNumeric2 and strmatch(char2, '%s') - local lineBreak1 = whitespace1 and strmatch(char1, '%c') - local lineBreak2 = whitespace2 and strmatch(char2, '%c') - local blankLine1 = lineBreak1 and strmatch(one, '\n\r?\n$') - local blankLine2 = lineBreak2 and strmatch(two, '^\r?\n\r?\n') - - if blankLine1 or blankLine2 then - -- Five points for blank lines. - return 5 - elseif lineBreak1 or lineBreak2 then - -- Four points for line breaks. - return 4 - elseif nonAlphaNumeric1 and not whitespace1 and whitespace2 then - -- Three points for end of sentences. - return 3 - elseif whitespace1 or whitespace2 then - -- Two points for whitespace. - return 2 - elseif nonAlphaNumeric1 or nonAlphaNumeric2 then - -- One point for non-alphanumeric. - return 1 - end - return 0 -end - ---[[ -* Look for single edits surrounded on both sides by equalities -* which can be shifted sideways to align the edit to a word boundary. -* e.g: The cat came. -> The cat came. -* @param {Array.>} diffs Array of diff tuples. ---]] -function _diff_cleanupSemanticLossless(diffs) - local pointer = 2 - -- Intentionally ignore the first and last element (don't need checking). - while diffs[pointer + 1] do - local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1] - if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then - -- This is a single edit surrounded by equalities. - local diff = diffs[pointer] - - local equality1 = prevDiff[2] - local edit = diff[2] - local equality2 = nextDiff[2] - - -- First, shift the edit as far left as possible. - local commonOffset = _diff_commonSuffix(equality1, edit) - if commonOffset > 0 then - local commonString = strsub(edit, -commonOffset) - equality1 = strsub(equality1, 1, -commonOffset - 1) - edit = commonString .. strsub(edit, 1, -commonOffset - 1) - equality2 = commonString .. equality2 - end - - -- Second, step character by character right, looking for the best fit. - local bestEquality1 = equality1 - local bestEdit = edit - local bestEquality2 = equality2 - local bestScore = _diff_cleanupSemanticScore(equality1, edit) + _diff_cleanupSemanticScore(edit, equality2) - - while strbyte(edit, 1) == strbyte(equality2, 1) do - equality1 = equality1 .. strsub(edit, 1, 1) - edit = strsub(edit, 2) .. strsub(equality2, 1, 1) - equality2 = strsub(equality2, 2) - local score = _diff_cleanupSemanticScore(equality1, edit) + _diff_cleanupSemanticScore(edit, equality2) - -- The >= encourages trailing rather than leading whitespace on edits. - if score >= bestScore then - bestScore = score - bestEquality1 = equality1 - bestEdit = edit - bestEquality2 = equality2 - end - end - if prevDiff[2] ~= bestEquality1 then - -- We have an improvement, save it back to the diff. - if #bestEquality1 > 0 then - diffs[pointer - 1][2] = bestEquality1 - else - tremove(diffs, pointer - 1) - pointer = pointer - 1 - end - diffs[pointer][2] = bestEdit - if #bestEquality2 > 0 then - diffs[pointer + 1][2] = bestEquality2 - else - tremove(diffs, pointer + 1, 1) - pointer = pointer - 1 - end - end - end - pointer = pointer + 1 - end -end - ---[[ -* Reorder and merge like edit sections. Merge equalities. -* Any edit section can move as long as it doesn't cross an equality. -* @param {Array.>} diffs Array of diff tuples. ---]] -function _diff_cleanupMerge(diffs) - diffs[#diffs + 1] = { DIFF_EQUAL, '' } -- Add a dummy entry at the end. - local pointer = 1 - local count_delete, count_insert = 0, 0 - local text_delete, text_insert = '', '' - local commonlength - while diffs[pointer] do - local diff_type = diffs[pointer][1] - if diff_type == DIFF_INSERT then - count_insert = count_insert + 1 - text_insert = text_insert .. diffs[pointer][2] - pointer = pointer + 1 - elseif diff_type == DIFF_DELETE then - count_delete = count_delete + 1 - text_delete = text_delete .. diffs[pointer][2] - pointer = pointer + 1 - elseif diff_type == DIFF_EQUAL then - -- Upon reaching an equality, check for prior redundancies. - if count_delete + count_insert > 1 then - if (count_delete > 0) and (count_insert > 0) then - -- Factor out any common prefixies. - commonlength = _diff_commonPrefix(text_insert, text_delete) - if commonlength > 0 then - local back_pointer = pointer - count_delete - count_insert - if (back_pointer > 1) and (diffs[back_pointer - 1][1] == DIFF_EQUAL) then - diffs[back_pointer - 1][2] = diffs[back_pointer - 1][2] .. strsub(text_insert, 1, commonlength) - else - tinsert(diffs, 1, { DIFF_EQUAL, strsub(text_insert, 1, commonlength) }) - pointer = pointer + 1 - end - text_insert = strsub(text_insert, commonlength + 1) - text_delete = strsub(text_delete, commonlength + 1) - end - -- Factor out any common suffixies. - commonlength = _diff_commonSuffix(text_insert, text_delete) - if commonlength ~= 0 then - diffs[pointer][2] = strsub(text_insert, -commonlength) .. diffs[pointer][2] - text_insert = strsub(text_insert, 1, -commonlength - 1) - text_delete = strsub(text_delete, 1, -commonlength - 1) - end - end - -- Delete the offending records and add the merged ones. - pointer = pointer - count_delete - count_insert - for i = 1, count_delete + count_insert do - tremove(diffs, pointer) - end - if #text_delete > 0 then - tinsert(diffs, pointer, { DIFF_DELETE, text_delete }) - pointer = pointer + 1 - end - if #text_insert > 0 then - tinsert(diffs, pointer, { DIFF_INSERT, text_insert }) - pointer = pointer + 1 - end - pointer = pointer + 1 - elseif (pointer > 1) and (diffs[pointer - 1][1] == DIFF_EQUAL) then - -- Merge this equality with the previous one. - diffs[pointer - 1][2] = diffs[pointer - 1][2] .. diffs[pointer][2] - tremove(diffs, pointer) - else - pointer = pointer + 1 - end - count_insert, count_delete = 0, 0 - text_delete, text_insert = '', '' - end - end - if diffs[#diffs][2] == '' then - diffs[#diffs] = nil -- Remove the dummy entry at the end. - end - - -- Second pass: look for single edits surrounded on both sides by equalities - -- which can be shifted sideways to eliminate an equality. - -- e.g: ABAC -> ABAC - local changes = false - pointer = 2 - -- Intentionally ignore the first and last element (don't need checking). - while pointer < #diffs do - local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1] - if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then - -- This is a single edit surrounded by equalities. - local diff = diffs[pointer] - local currentText = diff[2] - local prevText = prevDiff[2] - local nextText = nextDiff[2] - if #prevText == 0 then - tremove(diffs, pointer - 1) - changes = true - elseif strsub(currentText, -#prevText) == prevText then - -- Shift the edit over the previous equality. - diff[2] = prevText .. strsub(currentText, 1, -#prevText - 1) - nextDiff[2] = prevText .. nextDiff[2] - tremove(diffs, pointer - 1) - changes = true - elseif strsub(currentText, 1, #nextText) == nextText then - -- Shift the edit over the next equality. - prevDiff[2] = prevText .. nextText - diff[2] = strsub(currentText, #nextText + 1) .. nextText - tremove(diffs, pointer + 1) - changes = true - end - end - pointer = pointer + 1 - end - -- If shifts were made, the diff needs reordering and another shift sweep. - if changes then - -- LUANOTE: no return value, but necessary to use 'return' to get - -- tail calls. - return _diff_cleanupMerge(diffs) - end -end - ---[[ -* loc is a location in text1, compute and return the equivalent location in -* text2. -* e.g. 'The cat' vs 'The big cat', 1->1, 5->8 -* @param {Array.>} diffs Array of diff tuples. -* @param {number} loc Location within text1. -* @return {number} Location within text2. ---]] -function _diff_xIndex(diffs, loc) - local chars1 = 1 - local chars2 = 1 - local last_chars1 = 1 - local last_chars2 = 1 - local x - for _x, diff in ipairs(diffs) do - x = _x - if diff[1] ~= DIFF_INSERT then -- Equality or deletion. - chars1 = chars1 + #diff[2] - end - if diff[1] ~= DIFF_DELETE then -- Equality or insertion. - chars2 = chars2 + #diff[2] - end - if chars1 > loc then -- Overshot the location. - break - end - last_chars1 = chars1 - last_chars2 = chars2 - end - -- Was the location deleted? - if diffs[x + 1] and (diffs[x][1] == DIFF_DELETE) then - return last_chars2 - end - -- Add the remaining character length. - return last_chars2 + (loc - last_chars1) -end - ---[[ -* Compute and return the source text (all equalities and deletions). -* @param {Array.>} diffs Array of diff tuples. -* @return {string} Source text. ---]] -function _diff_text1(diffs) - local text = {} - for x, diff in ipairs(diffs) do - if diff[1] ~= DIFF_INSERT then - text[#text + 1] = diff[2] - end - end - return tconcat(text) -end - ---[[ -* Compute and return the destination text (all equalities and insertions). -* @param {Array.>} diffs Array of diff tuples. -* @return {string} Destination text. ---]] -function _diff_text2(diffs) - local text = {} - for x, diff in ipairs(diffs) do - if diff[1] ~= DIFF_DELETE then - text[#text + 1] = diff[2] - end - end - return tconcat(text) -end - ---[[ -* Crush the diff into an encoded string which describes the operations -* required to transform text1 into text2. -* E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. -* Operations are tab-separated. Inserted text is escaped using %xx notation. -* @param {Array.>} diffs Array of diff tuples. -* @return {string} Delta text. ---]] -function _diff_toDelta(diffs) - local text = {} - for x, diff in ipairs(diffs) do - local op, data = diff[1], diff[2] - if op == DIFF_INSERT then - text[x] = '+' .. gsub(data, percentEncode_pattern, percentEncode_replace) - elseif op == DIFF_DELETE then - text[x] = '-' .. #data - elseif op == DIFF_EQUAL then - text[x] = '=' .. #data - end - end - return tconcat(text, '\t') -end - ---[[ -* Given the original text1, and an encoded string which describes the -* operations required to transform text1 into text2, compute the full diff. -* @param {string} text1 Source string for the diff. -* @param {string} delta Delta text. -* @return {Array.>} Array of diff tuples. -* @throws {Errorend If invalid input. ---]] -function _diff_fromDelta(text1, delta) - local diffs = {} - local diffsLength = 0 -- Keeping our own length var is faster - local pointer = 1 -- Cursor in text1 - for token in gmatch(delta, '[^\t]+') do - -- Each token begins with a one character parameter which specifies the - -- operation of this token (delete, insert, equality). - local tokenchar, param = strsub(token, 1, 1), strsub(token, 2) - if tokenchar == '+' then - local invalidDecode = false - local decoded = gsub(param, '%%(.?.?)', function(c) - local n = tonumber(c, 16) - if (#c ~= 2) or (n == nil) then - invalidDecode = true - return '' - end - return strchar(n) - end) - if invalidDecode then - -- Malformed URI sequence. - error('Illegal escape in _diff_fromDelta: ' .. param) - end - diffsLength = diffsLength + 1 - diffs[diffsLength] = { DIFF_INSERT, decoded } - elseif (tokenchar == '-') or (tokenchar == '=') then - local n = tonumber(param) - if (n == nil) or (n < 0) then - error('Invalid number in _diff_fromDelta: ' .. param) - end - local text = strsub(text1, pointer, pointer + n - 1) - pointer = pointer + n - if tokenchar == '=' then - diffsLength = diffsLength + 1 - diffs[diffsLength] = { DIFF_EQUAL, text } - else - diffsLength = diffsLength + 1 - diffs[diffsLength] = { DIFF_DELETE, text } - end - else - error('Invalid diff operation in _diff_fromDelta: ' .. token) - end - end - if pointer ~= #text1 + 1 then - error('Delta length (' .. (pointer - 1) .. ') does not equal source text length (' .. #text1 .. ').') - end - return diffs -end - --- --------------------------------------------------------------------------- --- MATCH API --- --------------------------------------------------------------------------- - -local _match_bitap, _match_alphabet - ---[[ -* Locate the best instance of 'pattern' in 'text' near 'loc'. -* @param {string} text The text to search. -* @param {string} pattern The pattern to search for. -* @param {number} loc The location to search around. -* @return {number} Best match index or -1. ---]] -function match_main(text, pattern, loc) - -- Check for null inputs. - if text == nil or pattern == nil or loc == nil then - error('Null inputs. (match_main)') - end - - if text == pattern then - -- Shortcut (potentially not guaranteed by the algorithm) - return 1 - elseif #text == 0 then - -- Nothing to match. - return -1 - end - loc = max(1, min(loc, #text)) - if strsub(text, loc, loc + #pattern - 1) == pattern then - -- Perfect match at the perfect spot! (Includes case of null pattern) - return loc - else - -- Do a fuzzy compare. - return _match_bitap(text, pattern, loc) - end -end - --- --------------------------------------------------------------------------- --- UNOFFICIAL/PRIVATE MATCH FUNCTIONS --- --------------------------------------------------------------------------- - ---[[ -* Initialise the alphabet for the Bitap algorithm. -* @param {string} pattern The text to encode. -* @return {Object} Hash of character locations. -* @private ---]] -function _match_alphabet(pattern) - local s = {} - local i = 0 - for c in gmatch(pattern, '.') do - s[c] = bor(s[c] or 0, lshift(1, #pattern - i - 1)) - i = i + 1 - end - return s -end - ---[[ -* Locate the best instance of 'pattern' in 'text' near 'loc' using the -* Bitap algorithm. -* @param {string} text The text to search. -* @param {string} pattern The pattern to search for. -* @param {number} loc The location to search around. -* @return {number} Best match index or -1. -* @private ---]] -function _match_bitap(text, pattern, loc) - if #pattern > Match_MaxBits then - error('Pattern too long.') - end - - -- Initialise the alphabet. - local s = _match_alphabet(pattern) - - --[[ - * Compute and return the score for a match with e errors and x location. - * Accesses loc and pattern through being a closure. - * @param {number} e Number of errors in match. - * @param {number} x Location of match. - * @return {number} Overall score for match (0.0 = good, 1.0 = bad). - * @private - --]] - local function _match_bitapScore(e, x) - local accuracy = e / #pattern - local proximity = abs(loc - x) - if Match_Distance == 0 then - -- Dodge divide by zero error. - return (proximity == 0) and 1 or accuracy - end - return accuracy + (proximity / Match_Distance) - end - - -- Highest score beyond which we give up. - local score_threshold = Match_Threshold - -- Is there a nearby exact match? (speedup) - local best_loc = indexOf(text, pattern, loc) - if best_loc then - score_threshold = min(_match_bitapScore(0, best_loc), score_threshold) - -- LUANOTE: Ideally we'd also check from the other direction, but Lua - -- doesn't have an efficent lastIndexOf function. - end - - -- Initialise the bit arrays. - local matchmask = lshift(1, #pattern - 1) - best_loc = -1 - - local bin_min, bin_mid - local bin_max = #pattern + #text - local last_rd - for d = 0, #pattern - 1, 1 do - -- Scan for the best match; each iteration allows for one more error. - -- Run a binary search to determine how far from 'loc' we can stray at this - -- error level. - bin_min = 0 - bin_mid = bin_max - while bin_min < bin_mid do - if _match_bitapScore(d, loc + bin_mid) <= score_threshold then - bin_min = bin_mid - else - bin_max = bin_mid - end - bin_mid = floor(bin_min + (bin_max - bin_min) / 2) - end - -- Use the result from this iteration as the maximum for the next. - bin_max = bin_mid - local start = max(1, loc - bin_mid + 1) - local finish = min(loc + bin_mid, #text) + #pattern - - local rd = {} - for j = start, finish do - rd[j] = 0 - end - rd[finish + 1] = lshift(1, d) - 1 - for j = finish, start, -1 do - local charMatch = s[strsub(text, j - 1, j - 1)] or 0 - if d == 0 then -- First pass: exact match. - rd[j] = band(bor((rd[j + 1] * 2), 1), charMatch) - else - -- Subsequent passes: fuzzy match. - -- Functions instead of operators make this hella messy. - rd[j] = bor( - band(bor(lshift(rd[j + 1], 1), 1), charMatch), - bor(bor(lshift(bor(last_rd[j + 1], last_rd[j]), 1), 1), last_rd[j + 1]) - ) - end - if band(rd[j], matchmask) ~= 0 then - local score = _match_bitapScore(d, j - 1) - -- This match will almost certainly be better than any existing match. - -- But check anyway. - if score <= score_threshold then - -- Told you so. - score_threshold = score - best_loc = j - 1 - if best_loc > loc then - -- When passing loc, don't exceed our current distance from loc. - start = max(1, loc * 2 - best_loc) - else - -- Already passed loc, downhill from here on in. - break - end - end - end - end - -- No hope for a (better) match at greater error levels. - if _match_bitapScore(d + 1, loc) > score_threshold then - break - end - last_rd = rd - end - return best_loc -end - --- ----------------------------------------------------------------------------- --- PATCH API --- ----------------------------------------------------------------------------- - -local _patch_addContext, _patch_deepCopy, _patch_addPadding, _patch_splitMax, _patch_appendText, _new_patch_obj - ---[[ -* Compute a list of patches to turn text1 into text2. -* Use diffs if provided, otherwise compute it ourselves. -* There are four ways to call this function, depending on what data is -* available to the caller: -* Method 1: -* a = text1, b = text2 -* Method 2: -* a = diffs -* Method 3 (optimal): -* a = text1, b = diffs -* Method 4 (deprecated, use method 3): -* a = text1, b = text2, c = diffs -* -* @param {string|Array.>} a text1 (methods 1,3,4) or -* Array of diff tuples for text1 to text2 (method 2). -* @param {string|Array.>} opt_b text2 (methods 1,4) or -* Array of diff tuples for text1 to text2 (method 3) or undefined (method 2). -* @param {string|Array.>} opt_c Array of diff tuples for -* text1 to text2 (method 4) or undefined (methods 1,2,3). -* @return {Array.<_new_patch_obj>} Array of patch objects. ---]] -function patch_make(a, opt_b, opt_c) - local text1, diffs - local type_a, type_b, type_c = type(a), type(opt_b), type(opt_c) - if (type_a == 'string') and (type_b == 'string') and (type_c == 'nil') then - -- Method 1: text1, text2 - -- Compute diffs from text1 and text2. - text1 = a - diffs = diff_main(text1, opt_b, true) - if #diffs > 2 then - diff_cleanupSemantic(diffs) - diff_cleanupEfficiency(diffs) - end - elseif (type_a == 'table') and (type_b == 'nil') and (type_c == 'nil') then - -- Method 2: diffs - -- Compute text1 from diffs. - diffs = a - text1 = _diff_text1(diffs) - elseif (type_a == 'string') and (type_b == 'table') and (type_c == 'nil') then - -- Method 3: text1, diffs - text1 = a - diffs = opt_b - elseif (type_a == 'string') and (type_b == 'string') and (type_c == 'table') then - -- Method 4: text1, text2, diffs - -- text2 is not used. - text1 = a - diffs = opt_c - else - error('Unknown call format to patch_make.') - end - - if diffs[1] == nil then - return {} -- Get rid of the null case. - end - - local patches = {} - local patch = _new_patch_obj() - local patchDiffLength = 0 -- Keeping our own length var is faster. - local char_count1 = 0 -- Number of characters into the text1 string. - local char_count2 = 0 -- Number of characters into the text2 string. - -- Start with text1 (prepatch_text) and apply the diffs until we arrive at - -- text2 (postpatch_text). We recreate the patches one by one to determine - -- context info. - local prepatch_text, postpatch_text = text1, text1 - for x, diff in ipairs(diffs) do - local diff_type, diff_text = diff[1], diff[2] - - if (patchDiffLength == 0) and (diff_type ~= DIFF_EQUAL) then - -- A new patch starts here. - patch.start1 = char_count1 + 1 - patch.start2 = char_count2 + 1 - end - - if diff_type == DIFF_INSERT then - patchDiffLength = patchDiffLength + 1 - patch.diffs[patchDiffLength] = diff - patch.length2 = patch.length2 + #diff_text - postpatch_text = strsub(postpatch_text, 1, char_count2) .. diff_text .. strsub(postpatch_text, char_count2 + 1) - elseif diff_type == DIFF_DELETE then - patch.length1 = patch.length1 + #diff_text - patchDiffLength = patchDiffLength + 1 - patch.diffs[patchDiffLength] = diff - postpatch_text = strsub(postpatch_text, 1, char_count2) .. strsub(postpatch_text, char_count2 + #diff_text + 1) - elseif diff_type == DIFF_EQUAL then - if (#diff_text <= Patch_Margin * 2) and (patchDiffLength ~= 0) and (#diffs ~= x) then - -- Small equality inside a patch. - patchDiffLength = patchDiffLength + 1 - patch.diffs[patchDiffLength] = diff - patch.length1 = patch.length1 + #diff_text - patch.length2 = patch.length2 + #diff_text - elseif #diff_text >= Patch_Margin * 2 then - -- Time for a new patch. - if patchDiffLength ~= 0 then - _patch_addContext(patch, prepatch_text) - patches[#patches + 1] = patch - patch = _new_patch_obj() - patchDiffLength = 0 - -- Unlike Unidiff, our patch lists have a rolling context. - -- https://github.com/google/diff-match-patch/wiki/Unidiff - -- Update prepatch text & pos to reflect the application of the - -- just completed patch. - prepatch_text = postpatch_text - char_count1 = char_count2 - end - end - end - - -- Update the current character count. - if diff_type ~= DIFF_INSERT then - char_count1 = char_count1 + #diff_text - end - if diff_type ~= DIFF_DELETE then - char_count2 = char_count2 + #diff_text - end - end - - -- Pick up the leftover patch if not empty. - if patchDiffLength > 0 then - _patch_addContext(patch, prepatch_text) - patches[#patches + 1] = patch - end - - return patches -end - ---[[ -* Merge a set of patches onto the text. Return a patched text, as well -* as a list of true/false values indicating which patches were applied. -* @param {Array.<_new_patch_obj>} patches Array of patch objects. -* @param {string} text Old text. -* @return {Array.>} Two return values, the -* new text and an array of boolean values. ---]] -function patch_apply(patches, text) - if patches[1] == nil then - return text, {} - end - - -- Deep copy the patches so that no changes are made to originals. - patches = _patch_deepCopy(patches) - - local nullPadding = _patch_addPadding(patches) - text = nullPadding .. text .. nullPadding - - _patch_splitMax(patches) - -- delta keeps track of the offset between the expected and actual location - -- of the previous patch. If there are patches expected at positions 10 and - -- 20, but the first patch was found at 12, delta is 2 and the second patch - -- has an effective expected position of 22. - local delta = 0 - local results = {} - for x, patch in ipairs(patches) do - local expected_loc = patch.start2 + delta - local text1 = _diff_text1(patch.diffs) - local start_loc - local end_loc = -1 - if #text1 > Match_MaxBits then - -- _patch_splitMax will only provide an oversized pattern in - -- the case of a monster delete. - start_loc = match_main(text, strsub(text1, 1, Match_MaxBits), expected_loc) - if start_loc ~= -1 then - end_loc = match_main(text, strsub(text1, -Match_MaxBits), expected_loc + #text1 - Match_MaxBits) - if end_loc == -1 or start_loc >= end_loc then - -- Can't find valid trailing context. Drop this patch. - start_loc = -1 - end - end - else - start_loc = match_main(text, text1, expected_loc) - end - if start_loc == -1 then - -- No match found. :( - results[x] = false - -- Subtract the delta for this failed patch from subsequent patches. - delta = delta - patch.length2 - patch.length1 - else - -- Found a match. :) - results[x] = true - delta = start_loc - expected_loc - local text2 - if end_loc == -1 then - text2 = strsub(text, start_loc, start_loc + #text1 - 1) - else - text2 = strsub(text, start_loc, end_loc + Match_MaxBits - 1) - end - if text1 == text2 then - -- Perfect match, just shove the replacement text in. - text = strsub(text, 1, start_loc - 1) .. _diff_text2(patch.diffs) .. strsub(text, start_loc + #text1) - else - -- Imperfect match. Run a diff to get a framework of equivalent - -- indices. - local diffs = diff_main(text1, text2, false) - if (#text1 > Match_MaxBits) and (diff_levenshtein(diffs) / #text1 > Patch_DeleteThreshold) then - -- The end points match, but the content is unacceptably bad. - results[x] = false - else - _diff_cleanupSemanticLossless(diffs) - local index1 = 1 - local index2 - for y, mod in ipairs(patch.diffs) do - if mod[1] ~= DIFF_EQUAL then - index2 = _diff_xIndex(diffs, index1) - end - if mod[1] == DIFF_INSERT then - text = strsub(text, 1, start_loc + index2 - 2) .. mod[2] .. strsub(text, start_loc + index2 - 1) - elseif mod[1] == DIFF_DELETE then - text = strsub(text, 1, start_loc + index2 - 2) - .. strsub(text, start_loc + _diff_xIndex(diffs, index1 + #mod[2] - 1)) - end - if mod[1] ~= DIFF_DELETE then - index1 = index1 + #mod[2] - end - end - end - end - end - end - -- Strip the padding off. - text = strsub(text, #nullPadding + 1, -#nullPadding - 1) - return text, results -end - ---[[ -* Take a list of patches and return a textual representation. -* @param {Array.<_new_patch_obj>} patches Array of patch objects. -* @return {string} Text representation of patches. ---]] -function patch_toText(patches) - local text = {} - for x, patch in ipairs(patches) do - _patch_appendText(patch, text) - end - return tconcat(text) -end - ---[[ -* Parse a textual representation of patches and return a list of patch objects. -* @param {string} textline Text representation of patches. -* @return {Array.<_new_patch_obj>} Array of patch objects. -* @throws {Error} If invalid input. ---]] -function patch_fromText(textline) - local patches = {} - if #textline == 0 then - return patches - end - local text = {} - for line in gmatch(textline, '([^\n]*)') do - text[#text + 1] = line - end - local textPointer = 1 - while textPointer <= #text do - local start1, length1, start2, length2 = strmatch(text[textPointer], '^@@ %-(%d+),?(%d*) %+(%d+),?(%d*) @@$') - if start1 == nil then - error('Invalid patch string: "' .. text[textPointer] .. '"') - end - local patch = _new_patch_obj() - patches[#patches + 1] = patch - - start1 = tonumber(start1) - length1 = tonumber(length1) or 1 - if length1 == 0 then - start1 = start1 + 1 - end - patch.start1 = start1 - patch.length1 = length1 - - start2 = tonumber(start2) - length2 = tonumber(length2) or 1 - if length2 == 0 then - start2 = start2 + 1 - end - patch.start2 = start2 - patch.length2 = length2 - - textPointer = textPointer + 1 - - while true do - local line = text[textPointer] - if line == nil then - break - end - local sign - sign, line = strsub(line, 1, 1), strsub(line, 2) - - local invalidDecode = false - local decoded = gsub(line, '%%(.?.?)', function(c) - local n = tonumber(c, 16) - if (#c ~= 2) or (n == nil) then - invalidDecode = true - return '' - end - return strchar(n) - end) - if invalidDecode then - -- Malformed URI sequence. - error('Illegal escape in patch_fromText: ' .. line) - end - - line = decoded - - if sign == '-' then - -- Deletion. - patch.diffs[#patch.diffs + 1] = { DIFF_DELETE, line } - elseif sign == '+' then - -- Insertion. - patch.diffs[#patch.diffs + 1] = { DIFF_INSERT, line } - elseif sign == ' ' then - -- Minor equality. - patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, line } - elseif sign == '@' then - -- Start of next patch. - break - elseif sign == '' then - -- Blank line? Whatever. - else - -- WTF? - error('Invalid patch mode "' .. sign .. '" in: ' .. line) - end - textPointer = textPointer + 1 - end - end - return patches -end - --- --------------------------------------------------------------------------- --- UNOFFICIAL/PRIVATE PATCH FUNCTIONS --- --------------------------------------------------------------------------- - -local patch_meta = { - __tostring = function(patch) - local buf = {} - _patch_appendText(patch, buf) - return tconcat(buf) - end, -} - ---[[ -* Class representing one patch operation. -* @constructor ---]] -function _new_patch_obj() - return setmetatable({ - --[[ @type {Array.>} ]] - diffs = {}, - --[[ @type {?number} ]] - start1 = 1, -- nil; - --[[ @type {?number} ]] - start2 = 1, -- nil; - --[[ @type {number} ]] - length1 = 0, - --[[ @type {number} ]] - length2 = 0, - }, patch_meta) -end - ---[[ -* Increase the context until it is unique, -* but don't let the pattern expand beyond Match_MaxBits. -* @param {_new_patch_obj} patch The patch to grow. -* @param {string} text Source text. -* @private ---]] -function _patch_addContext(patch, text) - if #text == 0 then - return - end - local pattern = strsub(text, patch.start2, patch.start2 + patch.length1 - 1) - local padding = 0 - - -- LUANOTE: Lua's lack of a lastIndexOf function results in slightly - -- different logic here than in other language ports. - -- Look for the first two matches of pattern in text. If two are found, - -- increase the pattern length. - local firstMatch = indexOf(text, pattern) - local secondMatch = nil - if firstMatch ~= nil then - secondMatch = indexOf(text, pattern, firstMatch + 1) - end - while (#pattern == 0 or secondMatch ~= nil) and (#pattern < Match_MaxBits - Patch_Margin - Patch_Margin) do - padding = padding + Patch_Margin - pattern = strsub(text, max(1, patch.start2 - padding), patch.start2 + patch.length1 - 1 + padding) - firstMatch = indexOf(text, pattern) - if firstMatch ~= nil then - secondMatch = indexOf(text, pattern, firstMatch + 1) - else - secondMatch = nil - end - end - -- Add one chunk for good luck. - padding = padding + Patch_Margin - - -- Add the prefix. - local prefix = strsub(text, max(1, patch.start2 - padding), patch.start2 - 1) - if #prefix > 0 then - tinsert(patch.diffs, 1, { DIFF_EQUAL, prefix }) - end - -- Add the suffix. - local suffix = strsub(text, patch.start2 + patch.length1, patch.start2 + patch.length1 - 1 + padding) - if #suffix > 0 then - patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, suffix } - end - - -- Roll back the start points. - patch.start1 = patch.start1 - #prefix - patch.start2 = patch.start2 - #prefix - -- Extend the lengths. - patch.length1 = patch.length1 + #prefix + #suffix - patch.length2 = patch.length2 + #prefix + #suffix -end - ---[[ -* Given an array of patches, return another array that is identical. -* @param {Array.<_new_patch_obj>} patches Array of patch objects. -* @return {Array.<_new_patch_obj>} Array of patch objects. ---]] -function _patch_deepCopy(patches) - local patchesCopy = {} - for x, patch in ipairs(patches) do - local patchCopy = _new_patch_obj() - local diffsCopy = {} - for i, diff in ipairs(patch.diffs) do - diffsCopy[i] = { diff[1], diff[2] } - end - patchCopy.diffs = diffsCopy - patchCopy.start1 = patch.start1 - patchCopy.start2 = patch.start2 - patchCopy.length1 = patch.length1 - patchCopy.length2 = patch.length2 - patchesCopy[x] = patchCopy - end - return patchesCopy -end - ---[[ -* Add some padding on text start and end so that edges can match something. -* Intended to be called only from within patch_apply. -* @param {Array.<_new_patch_obj>} patches Array of patch objects. -* @return {string} The padding string added to each side. ---]] -function _patch_addPadding(patches) - local paddingLength = Patch_Margin - local nullPadding = '' - for x = 1, paddingLength do - nullPadding = nullPadding .. strchar(x) - end - - -- Bump all the patches forward. - for x, patch in ipairs(patches) do - patch.start1 = patch.start1 + paddingLength - patch.start2 = patch.start2 + paddingLength - end - - -- Add some padding on start of first diff. - local patch = patches[1] - local diffs = patch.diffs - local firstDiff = diffs[1] - if (firstDiff == nil) or (firstDiff[1] ~= DIFF_EQUAL) then - -- Add nullPadding equality. - tinsert(diffs, 1, { DIFF_EQUAL, nullPadding }) - patch.start1 = patch.start1 - paddingLength -- Should be 0. - patch.start2 = patch.start2 - paddingLength -- Should be 0. - patch.length1 = patch.length1 + paddingLength - patch.length2 = patch.length2 + paddingLength - elseif paddingLength > #firstDiff[2] then - -- Grow first equality. - local extraLength = paddingLength - #firstDiff[2] - firstDiff[2] = strsub(nullPadding, #firstDiff[2] + 1) .. firstDiff[2] - patch.start1 = patch.start1 - extraLength - patch.start2 = patch.start2 - extraLength - patch.length1 = patch.length1 + extraLength - patch.length2 = patch.length2 + extraLength - end - - -- Add some padding on end of last diff. - patch = patches[#patches] - diffs = patch.diffs - local lastDiff = diffs[#diffs] - if (lastDiff == nil) or (lastDiff[1] ~= DIFF_EQUAL) then - -- Add nullPadding equality. - diffs[#diffs + 1] = { DIFF_EQUAL, nullPadding } - patch.length1 = patch.length1 + paddingLength - patch.length2 = patch.length2 + paddingLength - elseif paddingLength > #lastDiff[2] then - -- Grow last equality. - local extraLength = paddingLength - #lastDiff[2] - lastDiff[2] = lastDiff[2] .. strsub(nullPadding, 1, extraLength) - patch.length1 = patch.length1 + extraLength - patch.length2 = patch.length2 + extraLength - end - - return nullPadding -end - ---[[ -* Look through the patches and break up any which are longer than the maximum -* limit of the match algorithm. -* Intended to be called only from within patch_apply. -* @param {Array.<_new_patch_obj>} patches Array of patch objects. ---]] -function _patch_splitMax(patches) - local patch_size = Match_MaxBits - local x = 1 - while true do - local patch = patches[x] - if patch == nil then - return - end - if patch.length1 > patch_size then - local bigpatch = patch - -- Remove the big old patch. - tremove(patches, x) - x = x - 1 - local start1 = bigpatch.start1 - local start2 = bigpatch.start2 - local precontext = '' - while bigpatch.diffs[1] do - -- Create one of several smaller patches. - local patch = _new_patch_obj() - local empty = true - patch.start1 = start1 - #precontext - patch.start2 = start2 - #precontext - if precontext ~= '' then - patch.length1, patch.length2 = #precontext, #precontext - patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, precontext } - end - while bigpatch.diffs[1] and (patch.length1 < patch_size - Patch_Margin) do - local diff_type = bigpatch.diffs[1][1] - local diff_text = bigpatch.diffs[1][2] - if diff_type == DIFF_INSERT then - -- Insertions are harmless. - patch.length2 = patch.length2 + #diff_text - start2 = start2 + #diff_text - patch.diffs[#patch.diffs + 1] = bigpatch.diffs[1] - tremove(bigpatch.diffs, 1) - empty = false - elseif - (diff_type == DIFF_DELETE) - and (#patch.diffs == 1) - and (patch.diffs[1][1] == DIFF_EQUAL) - and (#diff_text > 2 * patch_size) - then - -- This is a large deletion. Let it pass in one chunk. - patch.length1 = patch.length1 + #diff_text - start1 = start1 + #diff_text - empty = false - patch.diffs[#patch.diffs + 1] = { diff_type, diff_text } - tremove(bigpatch.diffs, 1) - else - -- Deletion or equality. - -- Only take as much as we can stomach. - diff_text = strsub(diff_text, 1, patch_size - patch.length1 - Patch_Margin) - patch.length1 = patch.length1 + #diff_text - start1 = start1 + #diff_text - if diff_type == DIFF_EQUAL then - patch.length2 = patch.length2 + #diff_text - start2 = start2 + #diff_text - else - empty = false - end - patch.diffs[#patch.diffs + 1] = { diff_type, diff_text } - if diff_text == bigpatch.diffs[1][2] then - tremove(bigpatch.diffs, 1) - else - bigpatch.diffs[1][2] = strsub(bigpatch.diffs[1][2], #diff_text + 1) - end - end - end - -- Compute the head context for the next patch. - precontext = _diff_text2(patch.diffs) - precontext = strsub(precontext, -Patch_Margin) - -- Append the end context for this patch. - local postcontext = strsub(_diff_text1(bigpatch.diffs), 1, Patch_Margin) - if postcontext ~= '' then - patch.length1 = patch.length1 + #postcontext - patch.length2 = patch.length2 + #postcontext - if patch.diffs[1] and (patch.diffs[#patch.diffs][1] == DIFF_EQUAL) then - patch.diffs[#patch.diffs][2] = patch.diffs[#patch.diffs][2] .. postcontext - else - patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, postcontext } - end - end - if not empty then - x = x + 1 - tinsert(patches, x, patch) - end - end - end - x = x + 1 - end -end - ---[[ -* Emulate GNU diff's format. -* Header: @@ -382,8 +481,9 @@ -* @return {string} The GNU diff string. ---]] -function _patch_appendText(patch, text) - local coords1, coords2 - local length1, length2 = patch.length1, patch.length2 - local start1, start2 = patch.start1, patch.start2 - local diffs = patch.diffs - - if length1 == 1 then - coords1 = start1 - else - coords1 = ((length1 == 0) and (start1 - 1) or start1) .. ',' .. length1 - end - - if length2 == 1 then - coords2 = start2 - else - coords2 = ((length2 == 0) and (start2 - 1) or start2) .. ',' .. length2 - end - text[#text + 1] = '@@ -' .. coords1 .. ' +' .. coords2 .. ' @@\n' - - local op - -- Escape the body of the patch with %xx notation. - for x, diff in ipairs(patch.diffs) do - local diff_type = diff[1] - if diff_type == DIFF_INSERT then - op = '+' - elseif diff_type == DIFF_DELETE then - op = '-' - elseif diff_type == DIFF_EQUAL then - op = ' ' - end - text[#text + 1] = op .. gsub(diffs[x][2], percentEncode_pattern, percentEncode_replace) .. '\n' - end - - return text -end - --- Expose the API -local _M = {} - -_M.DIFF_DELETE = DIFF_DELETE -_M.DIFF_INSERT = DIFF_INSERT -_M.DIFF_EQUAL = DIFF_EQUAL - -_M.diff_main = diff_main -_M.diff_cleanupSemantic = diff_cleanupSemantic -_M.diff_cleanupEfficiency = diff_cleanupEfficiency -_M.diff_levenshtein = diff_levenshtein -_M.diff_prettyHtml = diff_prettyHtml - -_M.match_main = match_main - -_M.patch_make = patch_make -_M.patch_toText = patch_toText -_M.patch_fromText = patch_fromText -_M.patch_apply = patch_apply - --- Expose some non-API functions as well, for testing purposes etc. -_M.diff_commonPrefix = _diff_commonPrefix -_M.diff_commonSuffix = _diff_commonSuffix -_M.diff_commonOverlap = _diff_commonOverlap -_M.diff_halfMatch = _diff_halfMatch -_M.diff_bisect = _diff_bisect -_M.diff_cleanupMerge = _diff_cleanupMerge -_M.diff_cleanupSemanticLossless = _diff_cleanupSemanticLossless -_M.diff_text1 = _diff_text1 -_M.diff_text2 = _diff_text2 -_M.diff_toDelta = _diff_toDelta -_M.diff_fromDelta = _diff_fromDelta -_M.diff_xIndex = _diff_xIndex -_M.match_alphabet = _match_alphabet -_M.match_bitap = _match_bitap -_M.new_patch_obj = _new_patch_obj -_M.patch_addContext = _patch_addContext -_M.patch_splitMax = _patch_splitMax -_M.patch_addPadding = _patch_addPadding -_M.settings = settings - -return _M diff --git a/tests/diff_spec.lua b/tests/diff_spec.lua index 58e2f4c9..41ed262c 100644 --- a/tests/diff_spec.lua +++ b/tests/diff_spec.lua @@ -229,4 +229,79 @@ describe('CopilotChat.utils.diff', function() assert.is_true(applied) assert.are.same({ 'newstart', 'context' }, result) end) + + it('may confuse similar variable names', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ -1,2 +1,2 @@ +-local x = 1 ++local x = 10 +]] + local original = { + 'local x = 1', + 'local y = 2', + 'local x = 3', + 'local z = 4', + } + local result, applied = diff.apply_unified_diff(diff_text, table.concat(original, '\n')) + assert.is_true(applied) + assert.are.same({ + 'local x = 10', + 'local y = 2', + 'local x = 3', + 'local z = 4', + }, result) + end) + + it('may match wrong substring with partial matches', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ -1,2 +1,2 @@ +-old_value ++new_value +]] + local original = { + 'value', + 'old_value', + 'very_old_value', + } + local result, applied = diff.apply_unified_diff(diff_text, table.concat(original, '\n')) + assert.is_false(applied) -- not applied cleanly, but adjusted + assert.are.same({ + 'value', + 'new_value', + 'very_old_value', + }, result) + end) + + it('may apply to wrong instance of identical boilerplate code', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ -1,3 +1,3 @@ + return { +- status = "old" ++ status = "new" +]] + local original = { + 'return {', + ' status = "old"', + '}', + 'return {', + ' status = "old"', + '}', + } + local result, applied = diff.apply_unified_diff(diff_text, table.concat(original, '\n')) + assert.is_true(applied) + assert.are.same({ + 'return {', + ' status = "new"', + '}', + 'return {', + ' status = "old"', + '}', + }, result) + end) end)