diff --git a/README.md b/README.md
index ffdbb60b..89607cbc 100644
--- a/README.md
+++ b/README.md
@@ -523,14 +523,6 @@ make test
See [CONTRIBUTING.md](/CONTRIBUTING.md) for detailed guidelines.
-# Acknowledgments
-
-## diff-match-patch
-
-CopilotChat.nvim includes [diff-match-patch (Lua port)](https://github.com/google/diff-match-patch) for diffing and patching functionality.
-Copyright 2018 The diff-match-patch Authors.
-Licensed under the Apache License 2.0.
-
# Contributors
Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
diff --git a/lua/CopilotChat/utils/diff.lua b/lua/CopilotChat/utils/diff.lua
index 17eb3091..51cf1887 100644
--- a/lua/CopilotChat/utils/diff.lua
+++ b/lua/CopilotChat/utils/diff.lua
@@ -40,55 +40,81 @@ local function parse_hunks(diff_text)
return hunks
end
---- Apply a single hunk to content, with fallback/context logic
+--- Try to match old_snippet in lines starting at approximate start_line
+---@param lines table
+---@param old_snippet table
+---@param approx_start number
+---@param search_range number
+---@return number? matched_start
+local function find_best_match(lines, old_snippet, approx_start, search_range)
+ local best_idx, best_score = nil, -1
+ local old_len = #old_snippet
+
+ if old_len == 0 then
+ return approx_start
+ end
+
+ local min_start = math.max(1, approx_start - search_range)
+ local max_start = math.min(#lines - old_len + 1, approx_start + search_range)
+
+ for start_idx = min_start, max_start do
+ local score = 0
+ for i = 1, old_len do
+ if vim.trim(lines[start_idx + i - 1] or '') == vim.trim(old_snippet[i] or '') then
+ score = score + 1
+ end
+ end
+
+ if score > best_score then
+ best_score = score
+ best_idx = start_idx
+ end
+
+ if score == old_len then
+ return best_idx
+ end
+ end
+
+ if best_score >= math.ceil(old_len * 0.8) then
+ return best_idx
+ end
+
+ return nil
+end
+
+--- Apply a single hunk to content
---@param hunk table
---@param content string
---@return string patched_content, boolean applied_cleanly
local function apply_hunk(hunk, content)
- local dmp = require('CopilotChat.vendor.diff_match_patch')
- local patch = dmp.patch_make(table.concat(hunk.old_snippet, '\n'), table.concat(hunk.new_snippet, '\n'))
-
- -- First try: direct application
- local patched, results = dmp.patch_apply(patch, content)
- if not vim.tbl_contains(results, false) then
- return patched, true
- end
-
- -- Fallback: direct replacement
local lines = vim.split(content, '\n')
- local insert_idx = hunk.start_old or 1
- if not hunk.start_old then
- -- No starting point, try to find best match
- local match_idx, best_score = nil, -1
- local context_lines = vim.tbl_filter(function(line)
- return line and line ~= ''
- end, hunk.old_snippet)
- local context_len = #context_lines
- if context_len > 0 then
- for i = 1, #lines - context_len + 1 do
- local score = 0
- for j = 1, context_len do
- if vim.trim(lines[i + j - 1] or '') == vim.trim(context_lines[j] or '') then
- score = score + 1
- end
- end
- if score > best_score then
- best_score = score
- match_idx = i
- end
- end
+ local start_idx = hunk.start_old
+
+ -- If we have a start line hint, try to find best match within +/- 2 lines
+ if start_idx and start_idx > 0 and start_idx <= #lines then
+ local match_idx = find_best_match(lines, hunk.old_snippet, start_idx, 2)
+ if match_idx then
+ start_idx = match_idx
end
- if best_score > 0 and match_idx then
- insert_idx = match_idx
+ else
+ -- No valid start line, search for best match in whole content
+ local match_idx = find_best_match(lines, hunk.old_snippet, 1, #lines)
+ if match_idx then
+ start_idx = match_idx
+ else
+ start_idx = 1
end
end
- local start_idx = insert_idx
- local end_idx = insert_idx + #hunk.old_snippet
+ -- Replace old lines with new lines
+ local end_idx = start_idx + #hunk.old_snippet - 1
local new_lines = vim.list_slice(lines, 1, start_idx - 1)
vim.list_extend(new_lines, hunk.new_snippet)
vim.list_extend(new_lines, lines, end_idx + 1, #lines)
- return table.concat(new_lines, '\n'), false
+
+ -- Check if we matched exactly at the hinted position
+ local applied_cleanly = find_best_match(lines, hunk.old_snippet, hunk.start_old or start_idx, 0) == start_idx
+ return table.concat(new_lines, '\n'), applied_cleanly
end
--- Apply unified diff to a table of lines and return new lines
@@ -104,16 +130,25 @@ function M.apply_unified_diff(diff_text, original_content)
new_content = patched
applied = applied or ok
end
- local original_lines = vim.split(original_content, '\n', { trimempty = true })
+
local new_lines = vim.split(new_content, '\n', { trimempty = true })
+ local hunks = vim.diff(
+ original_content,
+ new_content,
+ { algorithm = 'myers', ctxlen = 10, interhunkctxlen = 10, ignore_whitespace_change = true, result_type = 'indices' }
+ )
+ if not hunks or #hunks == 0 then
+ return new_lines, applied, nil, nil
+ end
local first, last
- local max_len = math.max(#original_lines, #new_lines)
- for i = 1, max_len do
- if original_lines[i] ~= new_lines[i] then
- if not first then
- first = i
- end
- last = i
+ for _, hunk in ipairs(hunks) do
+ local hunk_start = hunk[1]
+ local hunk_end = hunk[1] + hunk[2] - 1
+ if not first or hunk_start < first then
+ first = hunk_start
+ end
+ if not last or hunk_end > last then
+ last = hunk_end
end
end
return new_lines, applied, first, last
@@ -129,7 +164,7 @@ function M.get_diff(block, lines)
return block.content, content
end
- local patched_lines = vim.split(block.content, '\n')
+ local patched_lines = vim.split(block.content, '\n', { trimempty = true })
local start_idx = block.header.start_line
local end_idx = block.header.end_line
local original_lines = lines
diff --git a/lua/CopilotChat/vendor/diff_match_patch.lua b/lua/CopilotChat/vendor/diff_match_patch.lua
deleted file mode 100644
index b2c397d0..00000000
--- a/lua/CopilotChat/vendor/diff_match_patch.lua
+++ /dev/null
@@ -1,2085 +0,0 @@
---[[
-* Diff Match and Patch
-* Copyright 2018 The diff-match-patch Authors.
-* https://github.com/google/diff-match-patch
-*
-* Based on the JavaScript implementation by Neil Fraser.
-* Ported to Lua by Duncan Cross.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
---]]
-
-local bit = require('bit')
-local band, bor, lshift = bit.band, bit.bor, bit.lshift
-local type, setmetatable, ipairs, select = type, setmetatable, ipairs, select
-local unpack, tonumber, error = unpack, tonumber, error
-local strsub, strbyte, strchar, gmatch, gsub = string.sub, string.byte, string.char, string.gmatch, string.gsub
-local strmatch, strfind, strformat = string.match, string.find, string.format
-local tinsert, tremove, tconcat = table.insert, table.remove, table.concat
-local max, min, floor, ceil, abs = math.max, math.min, math.floor, math.ceil, math.abs
-local clock = os.clock
-
--- Utility functions.
-
-local percentEncode_pattern = "[^A-Za-z0-9%-=;',./~!@#$%&*%(%)_%+ %?]"
-local function percentEncode_replace(v)
- return strformat('%%%02X', strbyte(v))
-end
-
-local function indexOf(a, b, start)
- if #b == 0 then
- return nil
- end
- return strfind(a, b, start, true)
-end
-
-local htmlEncode_pattern = '[&<>\n]'
-local htmlEncode_replace = {
- ['&'] = '&',
- ['<'] = '<',
- ['>'] = '>',
- ['\n'] = '¶
',
-}
-
--- Public API Functions
--- (Exported at the end of the script)
-
-local diff_main, diff_cleanupSemantic, diff_cleanupEfficiency, diff_levenshtein, diff_prettyHtml
-
-local match_main
-
-local patch_make, patch_toText, patch_fromText, patch_apply
-
---[[
-* The data structure representing a diff is an array of tuples:
-* {{DIFF_DELETE, 'Hello'}, {DIFF_INSERT, 'Goodbye'}, {DIFF_EQUAL, ' world.'}}
-* which means: delete 'Hello', add 'Goodbye' and keep ' world.'
---]]
-local DIFF_DELETE = -1
-local DIFF_INSERT = 1
-local DIFF_EQUAL = 0
-
--- Number of seconds to map a diff before giving up (0 for infinity).
-local Diff_Timeout = 1.0
--- Cost of an empty edit operation in terms of edit characters.
-local Diff_EditCost = 4
--- At what point is no match declared (0.0 = perfection, 1.0 = very loose).
-local Match_Threshold = 0.5
--- How far to search for a match (0 = exact location, 1000+ = broad match).
--- A match this many characters away from the expected location will add
--- 1.0 to the score (0.0 is a perfect match).
-local Match_Distance = 1000
--- When deleting a large block of text (over ~64 characters), how close do
--- the contents have to be to match the expected contents. (0.0 = perfection,
--- 1.0 = very loose). Note that Match_Threshold controls how closely the
--- end points of a delete need to match.
-local Patch_DeleteThreshold = 0.5
--- Chunk size for context length.
-local Patch_Margin = 4
--- The number of bits in an int.
-local Match_MaxBits = 32
-
-function settings(new)
- if new then
- Diff_Timeout = new.Diff_Timeout or Diff_Timeout
- Diff_EditCost = new.Diff_EditCost or Diff_EditCost
- Match_Threshold = new.Match_Threshold or Match_Threshold
- Match_Distance = new.Match_Distance or Match_Distance
- Patch_DeleteThreshold = new.Patch_DeleteThreshold or Patch_DeleteThreshold
- Patch_Margin = new.Patch_Margin or Patch_Margin
- Match_MaxBits = new.Match_MaxBits or Match_MaxBits
- else
- return {
- Diff_Timeout = Diff_Timeout,
- Diff_EditCost = Diff_EditCost,
- Match_Threshold = Match_Threshold,
- Match_Distance = Match_Distance,
- Patch_DeleteThreshold = Patch_DeleteThreshold,
- Patch_Margin = Patch_Margin,
- Match_MaxBits = Match_MaxBits,
- }
- end
-end
-
--- ---------------------------------------------------------------------------
--- DIFF API
--- ---------------------------------------------------------------------------
-
--- The private diff functions
-local _diff_compute, _diff_bisect, _diff_halfMatchI, _diff_halfMatch, _diff_cleanupSemanticScore, _diff_cleanupSemanticLossless, _diff_cleanupMerge, _diff_commonPrefix, _diff_commonSuffix, _diff_commonOverlap, _diff_xIndex, _diff_text1, _diff_text2, _diff_toDelta, _diff_fromDelta
-
---[[
-* Find the differences between two texts. Simplifies the problem by stripping
-* any common prefix or suffix off the texts before diffing.
-* @param {string} text1 Old string to be diffed.
-* @param {string} text2 New string to be diffed.
-* @param {boolean} opt_checklines Has no effect in Lua.
-* @param {number} opt_deadline Optional time when the diff should be complete
-* by. Used internally for recursive calls. Users should set DiffTimeout
-* instead.
-* @return {Array.>} Array of diff tuples.
---]]
-function diff_main(text1, text2, opt_checklines, opt_deadline)
- -- Set a deadline by which time the diff must be complete.
- if opt_deadline == nil then
- if Diff_Timeout <= 0 then
- opt_deadline = 2 ^ 31
- else
- opt_deadline = clock() + Diff_Timeout
- end
- end
- local deadline = opt_deadline
-
- -- Check for null inputs.
- if text1 == nil or text1 == nil then
- error('Null inputs. (diff_main)')
- end
-
- -- Check for equality (speedup).
- if text1 == text2 then
- if #text1 > 0 then
- return { { DIFF_EQUAL, text1 } }
- end
- return {}
- end
-
- -- LUANOTE: Due to the lack of Unicode support, Lua is incapable of
- -- implementing the line-mode speedup.
- local checklines = false
-
- -- Trim off common prefix (speedup).
- local commonlength = _diff_commonPrefix(text1, text2)
- local commonprefix
- if commonlength > 0 then
- commonprefix = strsub(text1, 1, commonlength)
- text1 = strsub(text1, commonlength + 1)
- text2 = strsub(text2, commonlength + 1)
- end
-
- -- Trim off common suffix (speedup).
- commonlength = _diff_commonSuffix(text1, text2)
- local commonsuffix
- if commonlength > 0 then
- commonsuffix = strsub(text1, -commonlength)
- text1 = strsub(text1, 1, -commonlength - 1)
- text2 = strsub(text2, 1, -commonlength - 1)
- end
-
- -- Compute the diff on the middle block.
- local diffs = _diff_compute(text1, text2, checklines, deadline)
-
- -- Restore the prefix and suffix.
- if commonprefix then
- tinsert(diffs, 1, { DIFF_EQUAL, commonprefix })
- end
- if commonsuffix then
- diffs[#diffs + 1] = { DIFF_EQUAL, commonsuffix }
- end
-
- _diff_cleanupMerge(diffs)
- return diffs
-end
-
---[[
-* Reduce the number of edits by eliminating semantically trivial equalities.
-* @param {Array.>} diffs Array of diff tuples.
---]]
-function diff_cleanupSemantic(diffs)
- local changes = false
- local equalities = {} -- Stack of indices where equalities are found.
- local equalitiesLength = 0 -- Keeping our own length var is faster.
- local lastEquality = nil
- -- Always equal to diffs[equalities[equalitiesLength]][2]
- local pointer = 1 -- Index of current position.
- -- Number of characters that changed prior to the equality.
- local length_insertions1 = 0
- local length_deletions1 = 0
- -- Number of characters that changed after the equality.
- local length_insertions2 = 0
- local length_deletions2 = 0
-
- while diffs[pointer] do
- if diffs[pointer][1] == DIFF_EQUAL then -- Equality found.
- equalitiesLength = equalitiesLength + 1
- equalities[equalitiesLength] = pointer
- length_insertions1 = length_insertions2
- length_deletions1 = length_deletions2
- length_insertions2 = 0
- length_deletions2 = 0
- lastEquality = diffs[pointer][2]
- else -- An insertion or deletion.
- if diffs[pointer][1] == DIFF_INSERT then
- length_insertions2 = length_insertions2 + #diffs[pointer][2]
- else
- length_deletions2 = length_deletions2 + #diffs[pointer][2]
- end
- -- Eliminate an equality that is smaller or equal to the edits on both
- -- sides of it.
- if
- lastEquality
- and (#lastEquality <= max(length_insertions1, length_deletions1))
- and (#lastEquality <= max(length_insertions2, length_deletions2))
- then
- -- Duplicate record.
- tinsert(diffs, equalities[equalitiesLength], { DIFF_DELETE, lastEquality })
- -- Change second copy to insert.
- diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT
- -- Throw away the equality we just deleted.
- equalitiesLength = equalitiesLength - 1
- -- Throw away the previous equality (it needs to be reevaluated).
- equalitiesLength = equalitiesLength - 1
- pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0
- length_insertions1, length_deletions1 = 0, 0 -- Reset the counters.
- length_insertions2, length_deletions2 = 0, 0
- lastEquality = nil
- changes = true
- end
- end
- pointer = pointer + 1
- end
-
- -- Normalize the diff.
- if changes then
- _diff_cleanupMerge(diffs)
- end
- _diff_cleanupSemanticLossless(diffs)
-
- -- Find any overlaps between deletions and insertions.
- -- e.g: abcxxxxxxdef
- -- -> abcxxxdef
- -- e.g: xxxabcdefxxx
- -- -> defxxxabc
- -- Only extract an overlap if it is as big as the edit ahead or behind it.
- pointer = 2
- while diffs[pointer] do
- if diffs[pointer - 1][1] == DIFF_DELETE and diffs[pointer][1] == DIFF_INSERT then
- local deletion = diffs[pointer - 1][2]
- local insertion = diffs[pointer][2]
- local overlap_length1 = _diff_commonOverlap(deletion, insertion)
- local overlap_length2 = _diff_commonOverlap(insertion, deletion)
- if overlap_length1 >= overlap_length2 then
- if overlap_length1 >= #deletion / 2 or overlap_length1 >= #insertion / 2 then
- -- Overlap found. Insert an equality and trim the surrounding edits.
- tinsert(diffs, pointer, { DIFF_EQUAL, strsub(insertion, 1, overlap_length1) })
- diffs[pointer - 1][2] = strsub(deletion, 1, #deletion - overlap_length1)
- diffs[pointer + 1][2] = strsub(insertion, overlap_length1 + 1)
- pointer = pointer + 1
- end
- else
- if overlap_length2 >= #deletion / 2 or overlap_length2 >= #insertion / 2 then
- -- Reverse overlap found.
- -- Insert an equality and swap and trim the surrounding edits.
- tinsert(diffs, pointer, { DIFF_EQUAL, strsub(deletion, 1, overlap_length2) })
- diffs[pointer - 1] = { DIFF_INSERT, strsub(insertion, 1, #insertion - overlap_length2) }
- diffs[pointer + 1] = { DIFF_DELETE, strsub(deletion, overlap_length2 + 1) }
- pointer = pointer + 1
- end
- end
- pointer = pointer + 1
- end
- pointer = pointer + 1
- end
-end
-
---[[
-* Reduce the number of edits by eliminating operationally trivial equalities.
-* @param {Array.>} diffs Array of diff tuples.
---]]
-function diff_cleanupEfficiency(diffs)
- local changes = false
- -- Stack of indices where equalities are found.
- local equalities = {}
- -- Keeping our own length var is faster.
- local equalitiesLength = 0
- -- Always equal to diffs[equalities[equalitiesLength]][2]
- local lastEquality = nil
- -- Index of current position.
- local pointer = 1
-
- -- The following four are really booleans but are stored as numbers because
- -- they are used at one point like this:
- --
- -- (pre_ins + pre_del + post_ins + post_del) == 3
- --
- -- ...i.e. checking that 3 of them are true and 1 of them is false.
-
- -- Is there an insertion operation before the last equality.
- local pre_ins = 0
- -- Is there a deletion operation before the last equality.
- local pre_del = 0
- -- Is there an insertion operation after the last equality.
- local post_ins = 0
- -- Is there a deletion operation after the last equality.
- local post_del = 0
-
- while diffs[pointer] do
- if diffs[pointer][1] == DIFF_EQUAL then -- Equality found.
- local diffText = diffs[pointer][2]
- if (#diffText < Diff_EditCost) and (post_ins == 1 or post_del == 1) then
- -- Candidate found.
- equalitiesLength = equalitiesLength + 1
- equalities[equalitiesLength] = pointer
- pre_ins, pre_del = post_ins, post_del
- lastEquality = diffText
- else
- -- Not a candidate, and can never become one.
- equalitiesLength = 0
- lastEquality = nil
- end
- post_ins, post_del = 0, 0
- else -- An insertion or deletion.
- if diffs[pointer][1] == DIFF_DELETE then
- post_del = 1
- else
- post_ins = 1
- end
- --[[
- * Five types to be split:
- * ABXYCD
- * AXCD
- * ABXC
- * AXCD
- * ABXC
- --]]
- if
- lastEquality
- and (
- (pre_ins + pre_del + post_ins + post_del == 4)
- or ((#lastEquality < Diff_EditCost / 2) and (pre_ins + pre_del + post_ins + post_del == 3))
- )
- then
- -- Duplicate record.
- tinsert(diffs, equalities[equalitiesLength], { DIFF_DELETE, lastEquality })
- -- Change second copy to insert.
- diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT
- -- Throw away the equality we just deleted.
- equalitiesLength = equalitiesLength - 1
- lastEquality = nil
- if (pre_ins == 1) and (pre_del == 1) then
- -- No changes made which could affect previous entry, keep going.
- post_ins, post_del = 1, 1
- equalitiesLength = 0
- else
- -- Throw away the previous equality.
- equalitiesLength = equalitiesLength - 1
- pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0
- post_ins, post_del = 0, 0
- end
- changes = true
- end
- end
- pointer = pointer + 1
- end
-
- if changes then
- _diff_cleanupMerge(diffs)
- end
-end
-
---[[
-* Compute the Levenshtein distance; the number of inserted, deleted or
-* substituted characters.
-* @param {Array.>} diffs Array of diff tuples.
-* @return {number} Number of changes.
---]]
-function diff_levenshtein(diffs)
- local levenshtein = 0
- local insertions, deletions = 0, 0
- for x, diff in ipairs(diffs) do
- local op, data = diff[1], diff[2]
- if op == DIFF_INSERT then
- insertions = insertions + #data
- elseif op == DIFF_DELETE then
- deletions = deletions + #data
- elseif op == DIFF_EQUAL then
- -- A deletion and an insertion is one substitution.
- levenshtein = levenshtein + max(insertions, deletions)
- insertions = 0
- deletions = 0
- end
- end
- levenshtein = levenshtein + max(insertions, deletions)
- return levenshtein
-end
-
---[[
-* Convert a diff array into a pretty HTML report.
-* @param {Array.>} diffs Array of diff tuples.
-* @return {string} HTML representation.
---]]
-function diff_prettyHtml(diffs)
- local html = {}
- for x, diff in ipairs(diffs) do
- local op = diff[1] -- Operation (insert, delete, equal)
- local data = diff[2] -- Text of change.
- local text = gsub(data, htmlEncode_pattern, htmlEncode_replace)
- if op == DIFF_INSERT then
- html[x] = '' .. text .. ''
- elseif op == DIFF_DELETE then
- html[x] = '' .. text .. ''
- elseif op == DIFF_EQUAL then
- html[x] = '' .. text .. ''
- end
- end
- return tconcat(html)
-end
-
--- ---------------------------------------------------------------------------
--- UNOFFICIAL/PRIVATE DIFF FUNCTIONS
--- ---------------------------------------------------------------------------
-
---[[
-* Find the differences between two texts. Assumes that the texts do not
-* have any common prefix or suffix.
-* @param {string} text1 Old string to be diffed.
-* @param {string} text2 New string to be diffed.
-* @param {boolean} checklines Has no effect in Lua.
-* @param {number} deadline Time when the diff should be complete by.
-* @return {Array.>} Array of diff tuples.
-* @private
---]]
-function _diff_compute(text1, text2, checklines, deadline)
- if #text1 == 0 then
- -- Just add some text (speedup).
- return { { DIFF_INSERT, text2 } }
- end
-
- if #text2 == 0 then
- -- Just delete some text (speedup).
- return { { DIFF_DELETE, text1 } }
- end
-
- local diffs
-
- local longtext = (#text1 > #text2) and text1 or text2
- local shorttext = (#text1 > #text2) and text2 or text1
- local i = indexOf(longtext, shorttext)
-
- if i ~= nil then
- -- Shorter text is inside the longer text (speedup).
- diffs = {
- { DIFF_INSERT, strsub(longtext, 1, i - 1) },
- { DIFF_EQUAL, shorttext },
- { DIFF_INSERT, strsub(longtext, i + #shorttext) },
- }
- -- Swap insertions for deletions if diff is reversed.
- if #text1 > #text2 then
- diffs[1][1], diffs[3][1] = DIFF_DELETE, DIFF_DELETE
- end
- return diffs
- end
-
- if #shorttext == 1 then
- -- Single character string.
- -- After the previous speedup, the character can't be an equality.
- return { { DIFF_DELETE, text1 }, { DIFF_INSERT, text2 } }
- end
-
- -- Check to see if the problem can be split in two.
- do
- local text1_a, text1_b, text2_a, text2_b, mid_common = _diff_halfMatch(text1, text2)
-
- if text1_a then
- -- A half-match was found, sort out the return data.
- -- Send both pairs off for separate processing.
- local diffs_a = diff_main(text1_a, text2_a, checklines, deadline)
- local diffs_b = diff_main(text1_b, text2_b, checklines, deadline)
- -- Merge the results.
- local diffs_a_len = #diffs_a
- diffs = diffs_a
- diffs[diffs_a_len + 1] = { DIFF_EQUAL, mid_common }
- for i, b_diff in ipairs(diffs_b) do
- diffs[diffs_a_len + 1 + i] = b_diff
- end
- return diffs
- end
- end
-
- return _diff_bisect(text1, text2, deadline)
-end
-
---[[
-* Find the 'middle snake' of a diff, split the problem in two
-* and return the recursively constructed diff.
-* See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
-* @param {string} text1 Old string to be diffed.
-* @param {string} text2 New string to be diffed.
-* @param {number} deadline Time at which to bail if not yet complete.
-* @return {Array.>} Array of diff tuples.
-* @private
---]]
-function _diff_bisect(text1, text2, deadline)
- -- Cache the text lengths to prevent multiple calls.
- local text1_length = #text1
- local text2_length = #text2
- local _sub, _element
- local max_d = ceil((text1_length + text2_length) / 2)
- local v_offset = max_d
- local v_length = 2 * max_d
- local v1 = {}
- local v2 = {}
- -- Setting all elements to -1 is faster in Lua than mixing integers and nil.
- for x = 0, v_length - 1 do
- v1[x] = -1
- v2[x] = -1
- end
- v1[v_offset + 1] = 0
- v2[v_offset + 1] = 0
- local delta = text1_length - text2_length
- -- If the total number of characters is odd, then
- -- the front path will collide with the reverse path.
- local front = (delta % 2 ~= 0)
- -- Offsets for start and end of k loop.
- -- Prevents mapping of space beyond the grid.
- local k1start = 0
- local k1end = 0
- local k2start = 0
- local k2end = 0
- for d = 0, max_d - 1 do
- -- Bail out if deadline is reached.
- if clock() > deadline then
- break
- end
-
- -- Walk the front path one step.
- for k1 = -d + k1start, d - k1end, 2 do
- local k1_offset = v_offset + k1
- local x1
- if (k1 == -d) or ((k1 ~= d) and (v1[k1_offset - 1] < v1[k1_offset + 1])) then
- x1 = v1[k1_offset + 1]
- else
- x1 = v1[k1_offset - 1] + 1
- end
- local y1 = x1 - k1
- while (x1 <= text1_length) and (y1 <= text2_length) and (strsub(text1, x1, x1) == strsub(text2, y1, y1)) do
- x1 = x1 + 1
- y1 = y1 + 1
- end
- v1[k1_offset] = x1
- if x1 > text1_length + 1 then
- -- Ran off the right of the graph.
- k1end = k1end + 2
- elseif y1 > text2_length + 1 then
- -- Ran off the bottom of the graph.
- k1start = k1start + 2
- elseif front then
- local k2_offset = v_offset + delta - k1
- if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] ~= -1 then
- -- Mirror x2 onto top-left coordinate system.
- local x2 = text1_length - v2[k2_offset] + 1
- if x1 > x2 then
- -- Overlap detected.
- return _diff_bisectSplit(text1, text2, x1, y1, deadline)
- end
- end
- end
- end
-
- -- Walk the reverse path one step.
- for k2 = -d + k2start, d - k2end, 2 do
- local k2_offset = v_offset + k2
- local x2
- if (k2 == -d) or ((k2 ~= d) and (v2[k2_offset - 1] < v2[k2_offset + 1])) then
- x2 = v2[k2_offset + 1]
- else
- x2 = v2[k2_offset - 1] + 1
- end
- local y2 = x2 - k2
- while (x2 <= text1_length) and (y2 <= text2_length) and (strsub(text1, -x2, -x2) == strsub(text2, -y2, -y2)) do
- x2 = x2 + 1
- y2 = y2 + 1
- end
- v2[k2_offset] = x2
- if x2 > text1_length + 1 then
- -- Ran off the left of the graph.
- k2end = k2end + 2
- elseif y2 > text2_length + 1 then
- -- Ran off the top of the graph.
- k2start = k2start + 2
- elseif not front then
- local k1_offset = v_offset + delta - k2
- if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] ~= -1 then
- local x1 = v1[k1_offset]
- local y1 = v_offset + x1 - k1_offset
- -- Mirror x2 onto top-left coordinate system.
- x2 = text1_length - x2 + 1
- if x1 > x2 then
- -- Overlap detected.
- return _diff_bisectSplit(text1, text2, x1, y1, deadline)
- end
- end
- end
- end
- end
- -- Diff took too long and hit the deadline or
- -- number of diffs equals number of characters, no commonality at all.
- return { { DIFF_DELETE, text1 }, { DIFF_INSERT, text2 } }
-end
-
---[[
- * Given the location of the 'middle snake', split the diff in two parts
- * and recurse.
- * @param {string} text1 Old string to be diffed.
- * @param {string} text2 New string to be diffed.
- * @param {number} x Index of split point in text1.
- * @param {number} y Index of split point in text2.
- * @param {number} deadline Time at which to bail if not yet complete.
- * @return {Array.>} Array of diff tuples.
- * @private
---]]
-function _diff_bisectSplit(text1, text2, x, y, deadline)
- local text1a = strsub(text1, 1, x - 1)
- local text2a = strsub(text2, 1, y - 1)
- local text1b = strsub(text1, x)
- local text2b = strsub(text2, y)
-
- -- Compute both diffs serially.
- local diffs = diff_main(text1a, text2a, false, deadline)
- local diffsb = diff_main(text1b, text2b, false, deadline)
-
- local diffs_len = #diffs
- for i, v in ipairs(diffsb) do
- diffs[diffs_len + i] = v
- end
- return diffs
-end
-
---[[
-* Determine the common prefix of two strings.
-* @param {string} text1 First string.
-* @param {string} text2 Second string.
-* @return {number} The number of characters common to the start of each
-* string.
---]]
-function _diff_commonPrefix(text1, text2)
- -- Quick check for common null cases.
- if (#text1 == 0) or (#text2 == 0) or (strbyte(text1, 1) ~= strbyte(text2, 1)) then
- return 0
- end
- -- Binary search.
- -- Performance analysis: https://neil.fraser.name/news/2007/10/09/
- local pointermin = 1
- local pointermax = min(#text1, #text2)
- local pointermid = pointermax
- local pointerstart = 1
- while pointermin < pointermid do
- if strsub(text1, pointerstart, pointermid) == strsub(text2, pointerstart, pointermid) then
- pointermin = pointermid
- pointerstart = pointermin
- else
- pointermax = pointermid
- end
- pointermid = floor(pointermin + (pointermax - pointermin) / 2)
- end
- return pointermid
-end
-
---[[
-* Determine the common suffix of two strings.
-* @param {string} text1 First string.
-* @param {string} text2 Second string.
-* @return {number} The number of characters common to the end of each string.
---]]
-function _diff_commonSuffix(text1, text2)
- -- Quick check for common null cases.
- if (#text1 == 0) or (#text2 == 0) or (strbyte(text1, -1) ~= strbyte(text2, -1)) then
- return 0
- end
- -- Binary search.
- -- Performance analysis: https://neil.fraser.name/news/2007/10/09/
- local pointermin = 1
- local pointermax = min(#text1, #text2)
- local pointermid = pointermax
- local pointerend = 1
- while pointermin < pointermid do
- if strsub(text1, -pointermid, -pointerend) == strsub(text2, -pointermid, -pointerend) then
- pointermin = pointermid
- pointerend = pointermin
- else
- pointermax = pointermid
- end
- pointermid = floor(pointermin + (pointermax - pointermin) / 2)
- end
- return pointermid
-end
-
---[[
-* Determine if the suffix of one string is the prefix of another.
-* @param {string} text1 First string.
-* @param {string} text2 Second string.
-* @return {number} The number of characters common to the end of the first
-* string and the start of the second string.
-* @private
---]]
-function _diff_commonOverlap(text1, text2)
- -- Cache the text lengths to prevent multiple calls.
- local text1_length = #text1
- local text2_length = #text2
- -- Eliminate the null case.
- if text1_length == 0 or text2_length == 0 then
- return 0
- end
- -- Truncate the longer string.
- if text1_length > text2_length then
- text1 = strsub(text1, text1_length - text2_length + 1)
- elseif text1_length < text2_length then
- text2 = strsub(text2, 1, text1_length)
- end
- local text_length = min(text1_length, text2_length)
- -- Quick check for the worst case.
- if text1 == text2 then
- return text_length
- end
-
- -- Start by looking for a single character match
- -- and increase length until no match is found.
- -- Performance analysis: https://neil.fraser.name/news/2010/11/04/
- local best = 0
- local length = 1
- while true do
- local pattern = strsub(text1, text_length - length + 1)
- local found = strfind(text2, pattern, 1, true)
- if found == nil then
- return best
- end
- length = length + found - 1
- if found == 1 or strsub(text1, text_length - length + 1) == strsub(text2, 1, length) then
- best = length
- length = length + 1
- end
- end
-end
-
---[[
-* Does a substring of shorttext exist within longtext such that the substring
-* is at least half the length of longtext?
-* This speedup can produce non-minimal diffs.
-* Closure, but does not reference any external variables.
-* @param {string} longtext Longer string.
-* @param {string} shorttext Shorter string.
-* @param {number} i Start index of quarter length substring within longtext.
-* @return {?Array.} Five element Array, containing the prefix of
-* longtext, the suffix of longtext, the prefix of shorttext, the suffix
-* of shorttext and the common middle. Or nil if there was no match.
-* @private
---]]
-function _diff_halfMatchI(longtext, shorttext, i)
- -- Start with a 1/4 length substring at position i as a seed.
- local seed = strsub(longtext, i, i + floor(#longtext / 4))
- local j = 0 -- LUANOTE: do not change to 1, was originally -1
- local best_common = ''
- local best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b
- while true do
- j = indexOf(shorttext, seed, j + 1)
- if j == nil then
- break
- end
- local prefixLength = _diff_commonPrefix(strsub(longtext, i), strsub(shorttext, j))
- local suffixLength = _diff_commonSuffix(strsub(longtext, 1, i - 1), strsub(shorttext, 1, j - 1))
- if #best_common < suffixLength + prefixLength then
- best_common = strsub(shorttext, j - suffixLength, j - 1) .. strsub(shorttext, j, j + prefixLength - 1)
- best_longtext_a = strsub(longtext, 1, i - suffixLength - 1)
- best_longtext_b = strsub(longtext, i + prefixLength)
- best_shorttext_a = strsub(shorttext, 1, j - suffixLength - 1)
- best_shorttext_b = strsub(shorttext, j + prefixLength)
- end
- end
- if #best_common * 2 >= #longtext then
- return { best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common }
- else
- return nil
- end
-end
-
---[[
-* Do the two texts share a substring which is at least half the length of the
-* longer text?
-* @param {string} text1 First string.
-* @param {string} text2 Second string.
-* @return {?Array.} Five element Array, containing the prefix of
-* text1, the suffix of text1, the prefix of text2, the suffix of
-* text2 and the common middle. Or nil if there was no match.
-* @private
---]]
-function _diff_halfMatch(text1, text2)
- if Diff_Timeout <= 0 then
- -- Don't risk returning a non-optimal diff if we have unlimited time.
- return nil
- end
- local longtext = (#text1 > #text2) and text1 or text2
- local shorttext = (#text1 > #text2) and text2 or text1
- if (#longtext < 4) or (#shorttext * 2 < #longtext) then
- return nil -- Pointless.
- end
-
- -- First check if the second quarter is the seed for a half-match.
- local hm1 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 4))
- -- Check again based on the third quarter.
- local hm2 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 2))
- local hm
- if not hm1 and not hm2 then
- return nil
- elseif not hm2 then
- hm = hm1
- elseif not hm1 then
- hm = hm2
- else
- -- Both matched. Select the longest.
- hm = (#hm1[5] > #hm2[5]) and hm1 or hm2
- end
-
- -- A half-match was found, sort out the return data.
- local text1_a, text1_b, text2_a, text2_b
- if #text1 > #text2 then
- text1_a, text1_b = hm[1], hm[2]
- text2_a, text2_b = hm[3], hm[4]
- else
- text2_a, text2_b = hm[1], hm[2]
- text1_a, text1_b = hm[3], hm[4]
- end
- local mid_common = hm[5]
- return text1_a, text1_b, text2_a, text2_b, mid_common
-end
-
---[[
-* Given two strings, compute a score representing whether the internal
-* boundary falls on logical boundaries.
-* Scores range from 6 (best) to 0 (worst).
-* @param {string} one First string.
-* @param {string} two Second string.
-* @return {number} The score.
-* @private
---]]
-function _diff_cleanupSemanticScore(one, two)
- if (#one == 0) or (#two == 0) then
- -- Edges are the best.
- return 6
- end
-
- -- Each port of this function behaves slightly differently due to
- -- subtle differences in each language's definition of things like
- -- 'whitespace'. Since this function's purpose is largely cosmetic,
- -- the choice has been made to use each language's native features
- -- rather than force total conformity.
- local char1 = strsub(one, -1)
- local char2 = strsub(two, 1, 1)
- local nonAlphaNumeric1 = strmatch(char1, '%W')
- local nonAlphaNumeric2 = strmatch(char2, '%W')
- local whitespace1 = nonAlphaNumeric1 and strmatch(char1, '%s')
- local whitespace2 = nonAlphaNumeric2 and strmatch(char2, '%s')
- local lineBreak1 = whitespace1 and strmatch(char1, '%c')
- local lineBreak2 = whitespace2 and strmatch(char2, '%c')
- local blankLine1 = lineBreak1 and strmatch(one, '\n\r?\n$')
- local blankLine2 = lineBreak2 and strmatch(two, '^\r?\n\r?\n')
-
- if blankLine1 or blankLine2 then
- -- Five points for blank lines.
- return 5
- elseif lineBreak1 or lineBreak2 then
- -- Four points for line breaks.
- return 4
- elseif nonAlphaNumeric1 and not whitespace1 and whitespace2 then
- -- Three points for end of sentences.
- return 3
- elseif whitespace1 or whitespace2 then
- -- Two points for whitespace.
- return 2
- elseif nonAlphaNumeric1 or nonAlphaNumeric2 then
- -- One point for non-alphanumeric.
- return 1
- end
- return 0
-end
-
---[[
-* Look for single edits surrounded on both sides by equalities
-* which can be shifted sideways to align the edit to a word boundary.
-* e.g: The cat came. -> The cat came.
-* @param {Array.>} diffs Array of diff tuples.
---]]
-function _diff_cleanupSemanticLossless(diffs)
- local pointer = 2
- -- Intentionally ignore the first and last element (don't need checking).
- while diffs[pointer + 1] do
- local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1]
- if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then
- -- This is a single edit surrounded by equalities.
- local diff = diffs[pointer]
-
- local equality1 = prevDiff[2]
- local edit = diff[2]
- local equality2 = nextDiff[2]
-
- -- First, shift the edit as far left as possible.
- local commonOffset = _diff_commonSuffix(equality1, edit)
- if commonOffset > 0 then
- local commonString = strsub(edit, -commonOffset)
- equality1 = strsub(equality1, 1, -commonOffset - 1)
- edit = commonString .. strsub(edit, 1, -commonOffset - 1)
- equality2 = commonString .. equality2
- end
-
- -- Second, step character by character right, looking for the best fit.
- local bestEquality1 = equality1
- local bestEdit = edit
- local bestEquality2 = equality2
- local bestScore = _diff_cleanupSemanticScore(equality1, edit) + _diff_cleanupSemanticScore(edit, equality2)
-
- while strbyte(edit, 1) == strbyte(equality2, 1) do
- equality1 = equality1 .. strsub(edit, 1, 1)
- edit = strsub(edit, 2) .. strsub(equality2, 1, 1)
- equality2 = strsub(equality2, 2)
- local score = _diff_cleanupSemanticScore(equality1, edit) + _diff_cleanupSemanticScore(edit, equality2)
- -- The >= encourages trailing rather than leading whitespace on edits.
- if score >= bestScore then
- bestScore = score
- bestEquality1 = equality1
- bestEdit = edit
- bestEquality2 = equality2
- end
- end
- if prevDiff[2] ~= bestEquality1 then
- -- We have an improvement, save it back to the diff.
- if #bestEquality1 > 0 then
- diffs[pointer - 1][2] = bestEquality1
- else
- tremove(diffs, pointer - 1)
- pointer = pointer - 1
- end
- diffs[pointer][2] = bestEdit
- if #bestEquality2 > 0 then
- diffs[pointer + 1][2] = bestEquality2
- else
- tremove(diffs, pointer + 1, 1)
- pointer = pointer - 1
- end
- end
- end
- pointer = pointer + 1
- end
-end
-
---[[
-* Reorder and merge like edit sections. Merge equalities.
-* Any edit section can move as long as it doesn't cross an equality.
-* @param {Array.>} diffs Array of diff tuples.
---]]
-function _diff_cleanupMerge(diffs)
- diffs[#diffs + 1] = { DIFF_EQUAL, '' } -- Add a dummy entry at the end.
- local pointer = 1
- local count_delete, count_insert = 0, 0
- local text_delete, text_insert = '', ''
- local commonlength
- while diffs[pointer] do
- local diff_type = diffs[pointer][1]
- if diff_type == DIFF_INSERT then
- count_insert = count_insert + 1
- text_insert = text_insert .. diffs[pointer][2]
- pointer = pointer + 1
- elseif diff_type == DIFF_DELETE then
- count_delete = count_delete + 1
- text_delete = text_delete .. diffs[pointer][2]
- pointer = pointer + 1
- elseif diff_type == DIFF_EQUAL then
- -- Upon reaching an equality, check for prior redundancies.
- if count_delete + count_insert > 1 then
- if (count_delete > 0) and (count_insert > 0) then
- -- Factor out any common prefixies.
- commonlength = _diff_commonPrefix(text_insert, text_delete)
- if commonlength > 0 then
- local back_pointer = pointer - count_delete - count_insert
- if (back_pointer > 1) and (diffs[back_pointer - 1][1] == DIFF_EQUAL) then
- diffs[back_pointer - 1][2] = diffs[back_pointer - 1][2] .. strsub(text_insert, 1, commonlength)
- else
- tinsert(diffs, 1, { DIFF_EQUAL, strsub(text_insert, 1, commonlength) })
- pointer = pointer + 1
- end
- text_insert = strsub(text_insert, commonlength + 1)
- text_delete = strsub(text_delete, commonlength + 1)
- end
- -- Factor out any common suffixies.
- commonlength = _diff_commonSuffix(text_insert, text_delete)
- if commonlength ~= 0 then
- diffs[pointer][2] = strsub(text_insert, -commonlength) .. diffs[pointer][2]
- text_insert = strsub(text_insert, 1, -commonlength - 1)
- text_delete = strsub(text_delete, 1, -commonlength - 1)
- end
- end
- -- Delete the offending records and add the merged ones.
- pointer = pointer - count_delete - count_insert
- for i = 1, count_delete + count_insert do
- tremove(diffs, pointer)
- end
- if #text_delete > 0 then
- tinsert(diffs, pointer, { DIFF_DELETE, text_delete })
- pointer = pointer + 1
- end
- if #text_insert > 0 then
- tinsert(diffs, pointer, { DIFF_INSERT, text_insert })
- pointer = pointer + 1
- end
- pointer = pointer + 1
- elseif (pointer > 1) and (diffs[pointer - 1][1] == DIFF_EQUAL) then
- -- Merge this equality with the previous one.
- diffs[pointer - 1][2] = diffs[pointer - 1][2] .. diffs[pointer][2]
- tremove(diffs, pointer)
- else
- pointer = pointer + 1
- end
- count_insert, count_delete = 0, 0
- text_delete, text_insert = '', ''
- end
- end
- if diffs[#diffs][2] == '' then
- diffs[#diffs] = nil -- Remove the dummy entry at the end.
- end
-
- -- Second pass: look for single edits surrounded on both sides by equalities
- -- which can be shifted sideways to eliminate an equality.
- -- e.g: ABAC -> ABAC
- local changes = false
- pointer = 2
- -- Intentionally ignore the first and last element (don't need checking).
- while pointer < #diffs do
- local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1]
- if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then
- -- This is a single edit surrounded by equalities.
- local diff = diffs[pointer]
- local currentText = diff[2]
- local prevText = prevDiff[2]
- local nextText = nextDiff[2]
- if #prevText == 0 then
- tremove(diffs, pointer - 1)
- changes = true
- elseif strsub(currentText, -#prevText) == prevText then
- -- Shift the edit over the previous equality.
- diff[2] = prevText .. strsub(currentText, 1, -#prevText - 1)
- nextDiff[2] = prevText .. nextDiff[2]
- tremove(diffs, pointer - 1)
- changes = true
- elseif strsub(currentText, 1, #nextText) == nextText then
- -- Shift the edit over the next equality.
- prevDiff[2] = prevText .. nextText
- diff[2] = strsub(currentText, #nextText + 1) .. nextText
- tremove(diffs, pointer + 1)
- changes = true
- end
- end
- pointer = pointer + 1
- end
- -- If shifts were made, the diff needs reordering and another shift sweep.
- if changes then
- -- LUANOTE: no return value, but necessary to use 'return' to get
- -- tail calls.
- return _diff_cleanupMerge(diffs)
- end
-end
-
---[[
-* loc is a location in text1, compute and return the equivalent location in
-* text2.
-* e.g. 'The cat' vs 'The big cat', 1->1, 5->8
-* @param {Array.>} diffs Array of diff tuples.
-* @param {number} loc Location within text1.
-* @return {number} Location within text2.
---]]
-function _diff_xIndex(diffs, loc)
- local chars1 = 1
- local chars2 = 1
- local last_chars1 = 1
- local last_chars2 = 1
- local x
- for _x, diff in ipairs(diffs) do
- x = _x
- if diff[1] ~= DIFF_INSERT then -- Equality or deletion.
- chars1 = chars1 + #diff[2]
- end
- if diff[1] ~= DIFF_DELETE then -- Equality or insertion.
- chars2 = chars2 + #diff[2]
- end
- if chars1 > loc then -- Overshot the location.
- break
- end
- last_chars1 = chars1
- last_chars2 = chars2
- end
- -- Was the location deleted?
- if diffs[x + 1] and (diffs[x][1] == DIFF_DELETE) then
- return last_chars2
- end
- -- Add the remaining character length.
- return last_chars2 + (loc - last_chars1)
-end
-
---[[
-* Compute and return the source text (all equalities and deletions).
-* @param {Array.>} diffs Array of diff tuples.
-* @return {string} Source text.
---]]
-function _diff_text1(diffs)
- local text = {}
- for x, diff in ipairs(diffs) do
- if diff[1] ~= DIFF_INSERT then
- text[#text + 1] = diff[2]
- end
- end
- return tconcat(text)
-end
-
---[[
-* Compute and return the destination text (all equalities and insertions).
-* @param {Array.>} diffs Array of diff tuples.
-* @return {string} Destination text.
---]]
-function _diff_text2(diffs)
- local text = {}
- for x, diff in ipairs(diffs) do
- if diff[1] ~= DIFF_DELETE then
- text[#text + 1] = diff[2]
- end
- end
- return tconcat(text)
-end
-
---[[
-* Crush the diff into an encoded string which describes the operations
-* required to transform text1 into text2.
-* E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
-* Operations are tab-separated. Inserted text is escaped using %xx notation.
-* @param {Array.>} diffs Array of diff tuples.
-* @return {string} Delta text.
---]]
-function _diff_toDelta(diffs)
- local text = {}
- for x, diff in ipairs(diffs) do
- local op, data = diff[1], diff[2]
- if op == DIFF_INSERT then
- text[x] = '+' .. gsub(data, percentEncode_pattern, percentEncode_replace)
- elseif op == DIFF_DELETE then
- text[x] = '-' .. #data
- elseif op == DIFF_EQUAL then
- text[x] = '=' .. #data
- end
- end
- return tconcat(text, '\t')
-end
-
---[[
-* Given the original text1, and an encoded string which describes the
-* operations required to transform text1 into text2, compute the full diff.
-* @param {string} text1 Source string for the diff.
-* @param {string} delta Delta text.
-* @return {Array.>} Array of diff tuples.
-* @throws {Errorend If invalid input.
---]]
-function _diff_fromDelta(text1, delta)
- local diffs = {}
- local diffsLength = 0 -- Keeping our own length var is faster
- local pointer = 1 -- Cursor in text1
- for token in gmatch(delta, '[^\t]+') do
- -- Each token begins with a one character parameter which specifies the
- -- operation of this token (delete, insert, equality).
- local tokenchar, param = strsub(token, 1, 1), strsub(token, 2)
- if tokenchar == '+' then
- local invalidDecode = false
- local decoded = gsub(param, '%%(.?.?)', function(c)
- local n = tonumber(c, 16)
- if (#c ~= 2) or (n == nil) then
- invalidDecode = true
- return ''
- end
- return strchar(n)
- end)
- if invalidDecode then
- -- Malformed URI sequence.
- error('Illegal escape in _diff_fromDelta: ' .. param)
- end
- diffsLength = diffsLength + 1
- diffs[diffsLength] = { DIFF_INSERT, decoded }
- elseif (tokenchar == '-') or (tokenchar == '=') then
- local n = tonumber(param)
- if (n == nil) or (n < 0) then
- error('Invalid number in _diff_fromDelta: ' .. param)
- end
- local text = strsub(text1, pointer, pointer + n - 1)
- pointer = pointer + n
- if tokenchar == '=' then
- diffsLength = diffsLength + 1
- diffs[diffsLength] = { DIFF_EQUAL, text }
- else
- diffsLength = diffsLength + 1
- diffs[diffsLength] = { DIFF_DELETE, text }
- end
- else
- error('Invalid diff operation in _diff_fromDelta: ' .. token)
- end
- end
- if pointer ~= #text1 + 1 then
- error('Delta length (' .. (pointer - 1) .. ') does not equal source text length (' .. #text1 .. ').')
- end
- return diffs
-end
-
--- ---------------------------------------------------------------------------
--- MATCH API
--- ---------------------------------------------------------------------------
-
-local _match_bitap, _match_alphabet
-
---[[
-* Locate the best instance of 'pattern' in 'text' near 'loc'.
-* @param {string} text The text to search.
-* @param {string} pattern The pattern to search for.
-* @param {number} loc The location to search around.
-* @return {number} Best match index or -1.
---]]
-function match_main(text, pattern, loc)
- -- Check for null inputs.
- if text == nil or pattern == nil or loc == nil then
- error('Null inputs. (match_main)')
- end
-
- if text == pattern then
- -- Shortcut (potentially not guaranteed by the algorithm)
- return 1
- elseif #text == 0 then
- -- Nothing to match.
- return -1
- end
- loc = max(1, min(loc, #text))
- if strsub(text, loc, loc + #pattern - 1) == pattern then
- -- Perfect match at the perfect spot! (Includes case of null pattern)
- return loc
- else
- -- Do a fuzzy compare.
- return _match_bitap(text, pattern, loc)
- end
-end
-
--- ---------------------------------------------------------------------------
--- UNOFFICIAL/PRIVATE MATCH FUNCTIONS
--- ---------------------------------------------------------------------------
-
---[[
-* Initialise the alphabet for the Bitap algorithm.
-* @param {string} pattern The text to encode.
-* @return {Object} Hash of character locations.
-* @private
---]]
-function _match_alphabet(pattern)
- local s = {}
- local i = 0
- for c in gmatch(pattern, '.') do
- s[c] = bor(s[c] or 0, lshift(1, #pattern - i - 1))
- i = i + 1
- end
- return s
-end
-
---[[
-* Locate the best instance of 'pattern' in 'text' near 'loc' using the
-* Bitap algorithm.
-* @param {string} text The text to search.
-* @param {string} pattern The pattern to search for.
-* @param {number} loc The location to search around.
-* @return {number} Best match index or -1.
-* @private
---]]
-function _match_bitap(text, pattern, loc)
- if #pattern > Match_MaxBits then
- error('Pattern too long.')
- end
-
- -- Initialise the alphabet.
- local s = _match_alphabet(pattern)
-
- --[[
- * Compute and return the score for a match with e errors and x location.
- * Accesses loc and pattern through being a closure.
- * @param {number} e Number of errors in match.
- * @param {number} x Location of match.
- * @return {number} Overall score for match (0.0 = good, 1.0 = bad).
- * @private
- --]]
- local function _match_bitapScore(e, x)
- local accuracy = e / #pattern
- local proximity = abs(loc - x)
- if Match_Distance == 0 then
- -- Dodge divide by zero error.
- return (proximity == 0) and 1 or accuracy
- end
- return accuracy + (proximity / Match_Distance)
- end
-
- -- Highest score beyond which we give up.
- local score_threshold = Match_Threshold
- -- Is there a nearby exact match? (speedup)
- local best_loc = indexOf(text, pattern, loc)
- if best_loc then
- score_threshold = min(_match_bitapScore(0, best_loc), score_threshold)
- -- LUANOTE: Ideally we'd also check from the other direction, but Lua
- -- doesn't have an efficent lastIndexOf function.
- end
-
- -- Initialise the bit arrays.
- local matchmask = lshift(1, #pattern - 1)
- best_loc = -1
-
- local bin_min, bin_mid
- local bin_max = #pattern + #text
- local last_rd
- for d = 0, #pattern - 1, 1 do
- -- Scan for the best match; each iteration allows for one more error.
- -- Run a binary search to determine how far from 'loc' we can stray at this
- -- error level.
- bin_min = 0
- bin_mid = bin_max
- while bin_min < bin_mid do
- if _match_bitapScore(d, loc + bin_mid) <= score_threshold then
- bin_min = bin_mid
- else
- bin_max = bin_mid
- end
- bin_mid = floor(bin_min + (bin_max - bin_min) / 2)
- end
- -- Use the result from this iteration as the maximum for the next.
- bin_max = bin_mid
- local start = max(1, loc - bin_mid + 1)
- local finish = min(loc + bin_mid, #text) + #pattern
-
- local rd = {}
- for j = start, finish do
- rd[j] = 0
- end
- rd[finish + 1] = lshift(1, d) - 1
- for j = finish, start, -1 do
- local charMatch = s[strsub(text, j - 1, j - 1)] or 0
- if d == 0 then -- First pass: exact match.
- rd[j] = band(bor((rd[j + 1] * 2), 1), charMatch)
- else
- -- Subsequent passes: fuzzy match.
- -- Functions instead of operators make this hella messy.
- rd[j] = bor(
- band(bor(lshift(rd[j + 1], 1), 1), charMatch),
- bor(bor(lshift(bor(last_rd[j + 1], last_rd[j]), 1), 1), last_rd[j + 1])
- )
- end
- if band(rd[j], matchmask) ~= 0 then
- local score = _match_bitapScore(d, j - 1)
- -- This match will almost certainly be better than any existing match.
- -- But check anyway.
- if score <= score_threshold then
- -- Told you so.
- score_threshold = score
- best_loc = j - 1
- if best_loc > loc then
- -- When passing loc, don't exceed our current distance from loc.
- start = max(1, loc * 2 - best_loc)
- else
- -- Already passed loc, downhill from here on in.
- break
- end
- end
- end
- end
- -- No hope for a (better) match at greater error levels.
- if _match_bitapScore(d + 1, loc) > score_threshold then
- break
- end
- last_rd = rd
- end
- return best_loc
-end
-
--- -----------------------------------------------------------------------------
--- PATCH API
--- -----------------------------------------------------------------------------
-
-local _patch_addContext, _patch_deepCopy, _patch_addPadding, _patch_splitMax, _patch_appendText, _new_patch_obj
-
---[[
-* Compute a list of patches to turn text1 into text2.
-* Use diffs if provided, otherwise compute it ourselves.
-* There are four ways to call this function, depending on what data is
-* available to the caller:
-* Method 1:
-* a = text1, b = text2
-* Method 2:
-* a = diffs
-* Method 3 (optimal):
-* a = text1, b = diffs
-* Method 4 (deprecated, use method 3):
-* a = text1, b = text2, c = diffs
-*
-* @param {string|Array.>} a text1 (methods 1,3,4) or
-* Array of diff tuples for text1 to text2 (method 2).
-* @param {string|Array.>} opt_b text2 (methods 1,4) or
-* Array of diff tuples for text1 to text2 (method 3) or undefined (method 2).
-* @param {string|Array.>} opt_c Array of diff tuples for
-* text1 to text2 (method 4) or undefined (methods 1,2,3).
-* @return {Array.<_new_patch_obj>} Array of patch objects.
---]]
-function patch_make(a, opt_b, opt_c)
- local text1, diffs
- local type_a, type_b, type_c = type(a), type(opt_b), type(opt_c)
- if (type_a == 'string') and (type_b == 'string') and (type_c == 'nil') then
- -- Method 1: text1, text2
- -- Compute diffs from text1 and text2.
- text1 = a
- diffs = diff_main(text1, opt_b, true)
- if #diffs > 2 then
- diff_cleanupSemantic(diffs)
- diff_cleanupEfficiency(diffs)
- end
- elseif (type_a == 'table') and (type_b == 'nil') and (type_c == 'nil') then
- -- Method 2: diffs
- -- Compute text1 from diffs.
- diffs = a
- text1 = _diff_text1(diffs)
- elseif (type_a == 'string') and (type_b == 'table') and (type_c == 'nil') then
- -- Method 3: text1, diffs
- text1 = a
- diffs = opt_b
- elseif (type_a == 'string') and (type_b == 'string') and (type_c == 'table') then
- -- Method 4: text1, text2, diffs
- -- text2 is not used.
- text1 = a
- diffs = opt_c
- else
- error('Unknown call format to patch_make.')
- end
-
- if diffs[1] == nil then
- return {} -- Get rid of the null case.
- end
-
- local patches = {}
- local patch = _new_patch_obj()
- local patchDiffLength = 0 -- Keeping our own length var is faster.
- local char_count1 = 0 -- Number of characters into the text1 string.
- local char_count2 = 0 -- Number of characters into the text2 string.
- -- Start with text1 (prepatch_text) and apply the diffs until we arrive at
- -- text2 (postpatch_text). We recreate the patches one by one to determine
- -- context info.
- local prepatch_text, postpatch_text = text1, text1
- for x, diff in ipairs(diffs) do
- local diff_type, diff_text = diff[1], diff[2]
-
- if (patchDiffLength == 0) and (diff_type ~= DIFF_EQUAL) then
- -- A new patch starts here.
- patch.start1 = char_count1 + 1
- patch.start2 = char_count2 + 1
- end
-
- if diff_type == DIFF_INSERT then
- patchDiffLength = patchDiffLength + 1
- patch.diffs[patchDiffLength] = diff
- patch.length2 = patch.length2 + #diff_text
- postpatch_text = strsub(postpatch_text, 1, char_count2) .. diff_text .. strsub(postpatch_text, char_count2 + 1)
- elseif diff_type == DIFF_DELETE then
- patch.length1 = patch.length1 + #diff_text
- patchDiffLength = patchDiffLength + 1
- patch.diffs[patchDiffLength] = diff
- postpatch_text = strsub(postpatch_text, 1, char_count2) .. strsub(postpatch_text, char_count2 + #diff_text + 1)
- elseif diff_type == DIFF_EQUAL then
- if (#diff_text <= Patch_Margin * 2) and (patchDiffLength ~= 0) and (#diffs ~= x) then
- -- Small equality inside a patch.
- patchDiffLength = patchDiffLength + 1
- patch.diffs[patchDiffLength] = diff
- patch.length1 = patch.length1 + #diff_text
- patch.length2 = patch.length2 + #diff_text
- elseif #diff_text >= Patch_Margin * 2 then
- -- Time for a new patch.
- if patchDiffLength ~= 0 then
- _patch_addContext(patch, prepatch_text)
- patches[#patches + 1] = patch
- patch = _new_patch_obj()
- patchDiffLength = 0
- -- Unlike Unidiff, our patch lists have a rolling context.
- -- https://github.com/google/diff-match-patch/wiki/Unidiff
- -- Update prepatch text & pos to reflect the application of the
- -- just completed patch.
- prepatch_text = postpatch_text
- char_count1 = char_count2
- end
- end
- end
-
- -- Update the current character count.
- if diff_type ~= DIFF_INSERT then
- char_count1 = char_count1 + #diff_text
- end
- if diff_type ~= DIFF_DELETE then
- char_count2 = char_count2 + #diff_text
- end
- end
-
- -- Pick up the leftover patch if not empty.
- if patchDiffLength > 0 then
- _patch_addContext(patch, prepatch_text)
- patches[#patches + 1] = patch
- end
-
- return patches
-end
-
---[[
-* Merge a set of patches onto the text. Return a patched text, as well
-* as a list of true/false values indicating which patches were applied.
-* @param {Array.<_new_patch_obj>} patches Array of patch objects.
-* @param {string} text Old text.
-* @return {Array.>} Two return values, the
-* new text and an array of boolean values.
---]]
-function patch_apply(patches, text)
- if patches[1] == nil then
- return text, {}
- end
-
- -- Deep copy the patches so that no changes are made to originals.
- patches = _patch_deepCopy(patches)
-
- local nullPadding = _patch_addPadding(patches)
- text = nullPadding .. text .. nullPadding
-
- _patch_splitMax(patches)
- -- delta keeps track of the offset between the expected and actual location
- -- of the previous patch. If there are patches expected at positions 10 and
- -- 20, but the first patch was found at 12, delta is 2 and the second patch
- -- has an effective expected position of 22.
- local delta = 0
- local results = {}
- for x, patch in ipairs(patches) do
- local expected_loc = patch.start2 + delta
- local text1 = _diff_text1(patch.diffs)
- local start_loc
- local end_loc = -1
- if #text1 > Match_MaxBits then
- -- _patch_splitMax will only provide an oversized pattern in
- -- the case of a monster delete.
- start_loc = match_main(text, strsub(text1, 1, Match_MaxBits), expected_loc)
- if start_loc ~= -1 then
- end_loc = match_main(text, strsub(text1, -Match_MaxBits), expected_loc + #text1 - Match_MaxBits)
- if end_loc == -1 or start_loc >= end_loc then
- -- Can't find valid trailing context. Drop this patch.
- start_loc = -1
- end
- end
- else
- start_loc = match_main(text, text1, expected_loc)
- end
- if start_loc == -1 then
- -- No match found. :(
- results[x] = false
- -- Subtract the delta for this failed patch from subsequent patches.
- delta = delta - patch.length2 - patch.length1
- else
- -- Found a match. :)
- results[x] = true
- delta = start_loc - expected_loc
- local text2
- if end_loc == -1 then
- text2 = strsub(text, start_loc, start_loc + #text1 - 1)
- else
- text2 = strsub(text, start_loc, end_loc + Match_MaxBits - 1)
- end
- if text1 == text2 then
- -- Perfect match, just shove the replacement text in.
- text = strsub(text, 1, start_loc - 1) .. _diff_text2(patch.diffs) .. strsub(text, start_loc + #text1)
- else
- -- Imperfect match. Run a diff to get a framework of equivalent
- -- indices.
- local diffs = diff_main(text1, text2, false)
- if (#text1 > Match_MaxBits) and (diff_levenshtein(diffs) / #text1 > Patch_DeleteThreshold) then
- -- The end points match, but the content is unacceptably bad.
- results[x] = false
- else
- _diff_cleanupSemanticLossless(diffs)
- local index1 = 1
- local index2
- for y, mod in ipairs(patch.diffs) do
- if mod[1] ~= DIFF_EQUAL then
- index2 = _diff_xIndex(diffs, index1)
- end
- if mod[1] == DIFF_INSERT then
- text = strsub(text, 1, start_loc + index2 - 2) .. mod[2] .. strsub(text, start_loc + index2 - 1)
- elseif mod[1] == DIFF_DELETE then
- text = strsub(text, 1, start_loc + index2 - 2)
- .. strsub(text, start_loc + _diff_xIndex(diffs, index1 + #mod[2] - 1))
- end
- if mod[1] ~= DIFF_DELETE then
- index1 = index1 + #mod[2]
- end
- end
- end
- end
- end
- end
- -- Strip the padding off.
- text = strsub(text, #nullPadding + 1, -#nullPadding - 1)
- return text, results
-end
-
---[[
-* Take a list of patches and return a textual representation.
-* @param {Array.<_new_patch_obj>} patches Array of patch objects.
-* @return {string} Text representation of patches.
---]]
-function patch_toText(patches)
- local text = {}
- for x, patch in ipairs(patches) do
- _patch_appendText(patch, text)
- end
- return tconcat(text)
-end
-
---[[
-* Parse a textual representation of patches and return a list of patch objects.
-* @param {string} textline Text representation of patches.
-* @return {Array.<_new_patch_obj>} Array of patch objects.
-* @throws {Error} If invalid input.
---]]
-function patch_fromText(textline)
- local patches = {}
- if #textline == 0 then
- return patches
- end
- local text = {}
- for line in gmatch(textline, '([^\n]*)') do
- text[#text + 1] = line
- end
- local textPointer = 1
- while textPointer <= #text do
- local start1, length1, start2, length2 = strmatch(text[textPointer], '^@@ %-(%d+),?(%d*) %+(%d+),?(%d*) @@$')
- if start1 == nil then
- error('Invalid patch string: "' .. text[textPointer] .. '"')
- end
- local patch = _new_patch_obj()
- patches[#patches + 1] = patch
-
- start1 = tonumber(start1)
- length1 = tonumber(length1) or 1
- if length1 == 0 then
- start1 = start1 + 1
- end
- patch.start1 = start1
- patch.length1 = length1
-
- start2 = tonumber(start2)
- length2 = tonumber(length2) or 1
- if length2 == 0 then
- start2 = start2 + 1
- end
- patch.start2 = start2
- patch.length2 = length2
-
- textPointer = textPointer + 1
-
- while true do
- local line = text[textPointer]
- if line == nil then
- break
- end
- local sign
- sign, line = strsub(line, 1, 1), strsub(line, 2)
-
- local invalidDecode = false
- local decoded = gsub(line, '%%(.?.?)', function(c)
- local n = tonumber(c, 16)
- if (#c ~= 2) or (n == nil) then
- invalidDecode = true
- return ''
- end
- return strchar(n)
- end)
- if invalidDecode then
- -- Malformed URI sequence.
- error('Illegal escape in patch_fromText: ' .. line)
- end
-
- line = decoded
-
- if sign == '-' then
- -- Deletion.
- patch.diffs[#patch.diffs + 1] = { DIFF_DELETE, line }
- elseif sign == '+' then
- -- Insertion.
- patch.diffs[#patch.diffs + 1] = { DIFF_INSERT, line }
- elseif sign == ' ' then
- -- Minor equality.
- patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, line }
- elseif sign == '@' then
- -- Start of next patch.
- break
- elseif sign == '' then
- -- Blank line? Whatever.
- else
- -- WTF?
- error('Invalid patch mode "' .. sign .. '" in: ' .. line)
- end
- textPointer = textPointer + 1
- end
- end
- return patches
-end
-
--- ---------------------------------------------------------------------------
--- UNOFFICIAL/PRIVATE PATCH FUNCTIONS
--- ---------------------------------------------------------------------------
-
-local patch_meta = {
- __tostring = function(patch)
- local buf = {}
- _patch_appendText(patch, buf)
- return tconcat(buf)
- end,
-}
-
---[[
-* Class representing one patch operation.
-* @constructor
---]]
-function _new_patch_obj()
- return setmetatable({
- --[[ @type {Array.>} ]]
- diffs = {},
- --[[ @type {?number} ]]
- start1 = 1, -- nil;
- --[[ @type {?number} ]]
- start2 = 1, -- nil;
- --[[ @type {number} ]]
- length1 = 0,
- --[[ @type {number} ]]
- length2 = 0,
- }, patch_meta)
-end
-
---[[
-* Increase the context until it is unique,
-* but don't let the pattern expand beyond Match_MaxBits.
-* @param {_new_patch_obj} patch The patch to grow.
-* @param {string} text Source text.
-* @private
---]]
-function _patch_addContext(patch, text)
- if #text == 0 then
- return
- end
- local pattern = strsub(text, patch.start2, patch.start2 + patch.length1 - 1)
- local padding = 0
-
- -- LUANOTE: Lua's lack of a lastIndexOf function results in slightly
- -- different logic here than in other language ports.
- -- Look for the first two matches of pattern in text. If two are found,
- -- increase the pattern length.
- local firstMatch = indexOf(text, pattern)
- local secondMatch = nil
- if firstMatch ~= nil then
- secondMatch = indexOf(text, pattern, firstMatch + 1)
- end
- while (#pattern == 0 or secondMatch ~= nil) and (#pattern < Match_MaxBits - Patch_Margin - Patch_Margin) do
- padding = padding + Patch_Margin
- pattern = strsub(text, max(1, patch.start2 - padding), patch.start2 + patch.length1 - 1 + padding)
- firstMatch = indexOf(text, pattern)
- if firstMatch ~= nil then
- secondMatch = indexOf(text, pattern, firstMatch + 1)
- else
- secondMatch = nil
- end
- end
- -- Add one chunk for good luck.
- padding = padding + Patch_Margin
-
- -- Add the prefix.
- local prefix = strsub(text, max(1, patch.start2 - padding), patch.start2 - 1)
- if #prefix > 0 then
- tinsert(patch.diffs, 1, { DIFF_EQUAL, prefix })
- end
- -- Add the suffix.
- local suffix = strsub(text, patch.start2 + patch.length1, patch.start2 + patch.length1 - 1 + padding)
- if #suffix > 0 then
- patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, suffix }
- end
-
- -- Roll back the start points.
- patch.start1 = patch.start1 - #prefix
- patch.start2 = patch.start2 - #prefix
- -- Extend the lengths.
- patch.length1 = patch.length1 + #prefix + #suffix
- patch.length2 = patch.length2 + #prefix + #suffix
-end
-
---[[
-* Given an array of patches, return another array that is identical.
-* @param {Array.<_new_patch_obj>} patches Array of patch objects.
-* @return {Array.<_new_patch_obj>} Array of patch objects.
---]]
-function _patch_deepCopy(patches)
- local patchesCopy = {}
- for x, patch in ipairs(patches) do
- local patchCopy = _new_patch_obj()
- local diffsCopy = {}
- for i, diff in ipairs(patch.diffs) do
- diffsCopy[i] = { diff[1], diff[2] }
- end
- patchCopy.diffs = diffsCopy
- patchCopy.start1 = patch.start1
- patchCopy.start2 = patch.start2
- patchCopy.length1 = patch.length1
- patchCopy.length2 = patch.length2
- patchesCopy[x] = patchCopy
- end
- return patchesCopy
-end
-
---[[
-* Add some padding on text start and end so that edges can match something.
-* Intended to be called only from within patch_apply.
-* @param {Array.<_new_patch_obj>} patches Array of patch objects.
-* @return {string} The padding string added to each side.
---]]
-function _patch_addPadding(patches)
- local paddingLength = Patch_Margin
- local nullPadding = ''
- for x = 1, paddingLength do
- nullPadding = nullPadding .. strchar(x)
- end
-
- -- Bump all the patches forward.
- for x, patch in ipairs(patches) do
- patch.start1 = patch.start1 + paddingLength
- patch.start2 = patch.start2 + paddingLength
- end
-
- -- Add some padding on start of first diff.
- local patch = patches[1]
- local diffs = patch.diffs
- local firstDiff = diffs[1]
- if (firstDiff == nil) or (firstDiff[1] ~= DIFF_EQUAL) then
- -- Add nullPadding equality.
- tinsert(diffs, 1, { DIFF_EQUAL, nullPadding })
- patch.start1 = patch.start1 - paddingLength -- Should be 0.
- patch.start2 = patch.start2 - paddingLength -- Should be 0.
- patch.length1 = patch.length1 + paddingLength
- patch.length2 = patch.length2 + paddingLength
- elseif paddingLength > #firstDiff[2] then
- -- Grow first equality.
- local extraLength = paddingLength - #firstDiff[2]
- firstDiff[2] = strsub(nullPadding, #firstDiff[2] + 1) .. firstDiff[2]
- patch.start1 = patch.start1 - extraLength
- patch.start2 = patch.start2 - extraLength
- patch.length1 = patch.length1 + extraLength
- patch.length2 = patch.length2 + extraLength
- end
-
- -- Add some padding on end of last diff.
- patch = patches[#patches]
- diffs = patch.diffs
- local lastDiff = diffs[#diffs]
- if (lastDiff == nil) or (lastDiff[1] ~= DIFF_EQUAL) then
- -- Add nullPadding equality.
- diffs[#diffs + 1] = { DIFF_EQUAL, nullPadding }
- patch.length1 = patch.length1 + paddingLength
- patch.length2 = patch.length2 + paddingLength
- elseif paddingLength > #lastDiff[2] then
- -- Grow last equality.
- local extraLength = paddingLength - #lastDiff[2]
- lastDiff[2] = lastDiff[2] .. strsub(nullPadding, 1, extraLength)
- patch.length1 = patch.length1 + extraLength
- patch.length2 = patch.length2 + extraLength
- end
-
- return nullPadding
-end
-
---[[
-* Look through the patches and break up any which are longer than the maximum
-* limit of the match algorithm.
-* Intended to be called only from within patch_apply.
-* @param {Array.<_new_patch_obj>} patches Array of patch objects.
---]]
-function _patch_splitMax(patches)
- local patch_size = Match_MaxBits
- local x = 1
- while true do
- local patch = patches[x]
- if patch == nil then
- return
- end
- if patch.length1 > patch_size then
- local bigpatch = patch
- -- Remove the big old patch.
- tremove(patches, x)
- x = x - 1
- local start1 = bigpatch.start1
- local start2 = bigpatch.start2
- local precontext = ''
- while bigpatch.diffs[1] do
- -- Create one of several smaller patches.
- local patch = _new_patch_obj()
- local empty = true
- patch.start1 = start1 - #precontext
- patch.start2 = start2 - #precontext
- if precontext ~= '' then
- patch.length1, patch.length2 = #precontext, #precontext
- patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, precontext }
- end
- while bigpatch.diffs[1] and (patch.length1 < patch_size - Patch_Margin) do
- local diff_type = bigpatch.diffs[1][1]
- local diff_text = bigpatch.diffs[1][2]
- if diff_type == DIFF_INSERT then
- -- Insertions are harmless.
- patch.length2 = patch.length2 + #diff_text
- start2 = start2 + #diff_text
- patch.diffs[#patch.diffs + 1] = bigpatch.diffs[1]
- tremove(bigpatch.diffs, 1)
- empty = false
- elseif
- (diff_type == DIFF_DELETE)
- and (#patch.diffs == 1)
- and (patch.diffs[1][1] == DIFF_EQUAL)
- and (#diff_text > 2 * patch_size)
- then
- -- This is a large deletion. Let it pass in one chunk.
- patch.length1 = patch.length1 + #diff_text
- start1 = start1 + #diff_text
- empty = false
- patch.diffs[#patch.diffs + 1] = { diff_type, diff_text }
- tremove(bigpatch.diffs, 1)
- else
- -- Deletion or equality.
- -- Only take as much as we can stomach.
- diff_text = strsub(diff_text, 1, patch_size - patch.length1 - Patch_Margin)
- patch.length1 = patch.length1 + #diff_text
- start1 = start1 + #diff_text
- if diff_type == DIFF_EQUAL then
- patch.length2 = patch.length2 + #diff_text
- start2 = start2 + #diff_text
- else
- empty = false
- end
- patch.diffs[#patch.diffs + 1] = { diff_type, diff_text }
- if diff_text == bigpatch.diffs[1][2] then
- tremove(bigpatch.diffs, 1)
- else
- bigpatch.diffs[1][2] = strsub(bigpatch.diffs[1][2], #diff_text + 1)
- end
- end
- end
- -- Compute the head context for the next patch.
- precontext = _diff_text2(patch.diffs)
- precontext = strsub(precontext, -Patch_Margin)
- -- Append the end context for this patch.
- local postcontext = strsub(_diff_text1(bigpatch.diffs), 1, Patch_Margin)
- if postcontext ~= '' then
- patch.length1 = patch.length1 + #postcontext
- patch.length2 = patch.length2 + #postcontext
- if patch.diffs[1] and (patch.diffs[#patch.diffs][1] == DIFF_EQUAL) then
- patch.diffs[#patch.diffs][2] = patch.diffs[#patch.diffs][2] .. postcontext
- else
- patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, postcontext }
- end
- end
- if not empty then
- x = x + 1
- tinsert(patches, x, patch)
- end
- end
- end
- x = x + 1
- end
-end
-
---[[
-* Emulate GNU diff's format.
-* Header: @@ -382,8 +481,9 @@
-* @return {string} The GNU diff string.
---]]
-function _patch_appendText(patch, text)
- local coords1, coords2
- local length1, length2 = patch.length1, patch.length2
- local start1, start2 = patch.start1, patch.start2
- local diffs = patch.diffs
-
- if length1 == 1 then
- coords1 = start1
- else
- coords1 = ((length1 == 0) and (start1 - 1) or start1) .. ',' .. length1
- end
-
- if length2 == 1 then
- coords2 = start2
- else
- coords2 = ((length2 == 0) and (start2 - 1) or start2) .. ',' .. length2
- end
- text[#text + 1] = '@@ -' .. coords1 .. ' +' .. coords2 .. ' @@\n'
-
- local op
- -- Escape the body of the patch with %xx notation.
- for x, diff in ipairs(patch.diffs) do
- local diff_type = diff[1]
- if diff_type == DIFF_INSERT then
- op = '+'
- elseif diff_type == DIFF_DELETE then
- op = '-'
- elseif diff_type == DIFF_EQUAL then
- op = ' '
- end
- text[#text + 1] = op .. gsub(diffs[x][2], percentEncode_pattern, percentEncode_replace) .. '\n'
- end
-
- return text
-end
-
--- Expose the API
-local _M = {}
-
-_M.DIFF_DELETE = DIFF_DELETE
-_M.DIFF_INSERT = DIFF_INSERT
-_M.DIFF_EQUAL = DIFF_EQUAL
-
-_M.diff_main = diff_main
-_M.diff_cleanupSemantic = diff_cleanupSemantic
-_M.diff_cleanupEfficiency = diff_cleanupEfficiency
-_M.diff_levenshtein = diff_levenshtein
-_M.diff_prettyHtml = diff_prettyHtml
-
-_M.match_main = match_main
-
-_M.patch_make = patch_make
-_M.patch_toText = patch_toText
-_M.patch_fromText = patch_fromText
-_M.patch_apply = patch_apply
-
--- Expose some non-API functions as well, for testing purposes etc.
-_M.diff_commonPrefix = _diff_commonPrefix
-_M.diff_commonSuffix = _diff_commonSuffix
-_M.diff_commonOverlap = _diff_commonOverlap
-_M.diff_halfMatch = _diff_halfMatch
-_M.diff_bisect = _diff_bisect
-_M.diff_cleanupMerge = _diff_cleanupMerge
-_M.diff_cleanupSemanticLossless = _diff_cleanupSemanticLossless
-_M.diff_text1 = _diff_text1
-_M.diff_text2 = _diff_text2
-_M.diff_toDelta = _diff_toDelta
-_M.diff_fromDelta = _diff_fromDelta
-_M.diff_xIndex = _diff_xIndex
-_M.match_alphabet = _match_alphabet
-_M.match_bitap = _match_bitap
-_M.new_patch_obj = _new_patch_obj
-_M.patch_addContext = _patch_addContext
-_M.patch_splitMax = _patch_splitMax
-_M.patch_addPadding = _patch_addPadding
-_M.settings = settings
-
-return _M
diff --git a/tests/diff_spec.lua b/tests/diff_spec.lua
index 58e2f4c9..41ed262c 100644
--- a/tests/diff_spec.lua
+++ b/tests/diff_spec.lua
@@ -229,4 +229,79 @@ describe('CopilotChat.utils.diff', function()
assert.is_true(applied)
assert.are.same({ 'newstart', 'context' }, result)
end)
+
+ it('may confuse similar variable names', function()
+ local diff_text = [[
+--- a/foo.txt
++++ b/foo.txt
+@@ -1,2 +1,2 @@
+-local x = 1
++local x = 10
+]]
+ local original = {
+ 'local x = 1',
+ 'local y = 2',
+ 'local x = 3',
+ 'local z = 4',
+ }
+ local result, applied = diff.apply_unified_diff(diff_text, table.concat(original, '\n'))
+ assert.is_true(applied)
+ assert.are.same({
+ 'local x = 10',
+ 'local y = 2',
+ 'local x = 3',
+ 'local z = 4',
+ }, result)
+ end)
+
+ it('may match wrong substring with partial matches', function()
+ local diff_text = [[
+--- a/foo.txt
++++ b/foo.txt
+@@ -1,2 +1,2 @@
+-old_value
++new_value
+]]
+ local original = {
+ 'value',
+ 'old_value',
+ 'very_old_value',
+ }
+ local result, applied = diff.apply_unified_diff(diff_text, table.concat(original, '\n'))
+ assert.is_false(applied) -- not applied cleanly, but adjusted
+ assert.are.same({
+ 'value',
+ 'new_value',
+ 'very_old_value',
+ }, result)
+ end)
+
+ it('may apply to wrong instance of identical boilerplate code', function()
+ local diff_text = [[
+--- a/foo.txt
++++ b/foo.txt
+@@ -1,3 +1,3 @@
+ return {
+- status = "old"
++ status = "new"
+]]
+ local original = {
+ 'return {',
+ ' status = "old"',
+ '}',
+ 'return {',
+ ' status = "old"',
+ '}',
+ }
+ local result, applied = diff.apply_unified_diff(diff_text, table.concat(original, '\n'))
+ assert.is_true(applied)
+ assert.are.same({
+ 'return {',
+ ' status = "new"',
+ '}',
+ 'return {',
+ ' status = "old"',
+ '}',
+ }, result)
+ end)
end)