From 0ebc5b66e693e637e68ae3990d86770eeba4ba73 Mon Sep 17 00:00:00 2001 From: Tom Fleet Date: Sun, 12 Apr 2026 08:40:50 +0100 Subject: [PATCH] Use my diff library --- go.mod | 4 +- go.sum | 6 +- internal/diff/diff.go | 296 ---------------------------- internal/diff/diff_test.go | 60 ------ internal/diff/testdata/allnew.txtar | 13 -- internal/diff/testdata/allold.txtar | 13 -- internal/diff/testdata/basic.txtar | 35 ---- internal/diff/testdata/dups.txtar | 40 ---- internal/diff/testdata/end.txtar | 38 ---- internal/diff/testdata/eof.txtar | 9 - internal/diff/testdata/eof1.txtar | 18 -- internal/diff/testdata/eof2.txtar | 18 -- internal/diff/testdata/long.txtar | 62 ------ internal/diff/testdata/same.txtar | 5 - internal/diff/testdata/start.txtar | 34 ---- internal/diff/testdata/triv.txtar | 40 ---- snapshot.go | 38 +--- 17 files changed, 10 insertions(+), 719 deletions(-) delete mode 100644 internal/diff/diff.go delete mode 100644 internal/diff/diff_test.go delete mode 100644 internal/diff/testdata/allnew.txtar delete mode 100644 internal/diff/testdata/allold.txtar delete mode 100644 internal/diff/testdata/basic.txtar delete mode 100644 internal/diff/testdata/dups.txtar delete mode 100644 internal/diff/testdata/end.txtar delete mode 100644 internal/diff/testdata/eof.txtar delete mode 100644 internal/diff/testdata/eof1.txtar delete mode 100644 internal/diff/testdata/eof2.txtar delete mode 100644 internal/diff/testdata/long.txtar delete mode 100644 internal/diff/testdata/same.txtar delete mode 100644 internal/diff/testdata/start.txtar delete mode 100644 internal/diff/testdata/triv.txtar diff --git a/go.mod b/go.mod index 224acd0..07ff513 100644 --- a/go.mod +++ b/go.mod @@ -3,13 +3,13 @@ module go.followtheprocess.codes/snapshot go 1.26 require ( + go.followtheprocess.codes/diff v0.1.1 go.followtheprocess.codes/hue v1.1.0 go.followtheprocess.codes/test v1.1.0 go.yaml.in/yaml/v4 v4.0.0-rc.4 - golang.org/x/tools v0.44.0 ) require ( golang.org/x/sys v0.43.0 // indirect - golang.org/x/term v0.41.0 // indirect + golang.org/x/term v0.42.0 // indirect ) diff --git a/go.sum b/go.sum index 63b3ea8..4a201f4 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +go.followtheprocess.codes/diff v0.1.1 h1:pQpnow+Uj39S4xxNC6FLLfsJnolVM5+a90EIo5QZN6s= +go.followtheprocess.codes/diff v0.1.1/go.mod h1:bDSZPC9CvkRr8HlOwjE1bl/8qFAmiA3LVtkThRnniis= go.followtheprocess.codes/hue v1.1.0 h1:bPq21YLdWxQ0ki4lIvXCYtgutaGaDUYaSIENDdrrlNQ= go.followtheprocess.codes/hue v1.1.0/go.mod h1:VnCeVmYESGmX7fZJSFs59u8G+5zseCwGdFiJGHCFg4o= go.followtheprocess.codes/test v1.1.0 h1:1v2JyMd3STr7dIXzcHEhct2qvS8/mTtGYbSyhpReciI= @@ -6,7 +8,7 @@ go.yaml.in/yaml/v4 v4.0.0-rc.4 h1:UP4+v6fFrBIb1l934bDl//mmnoIZEDK0idg1+AIvX5U= go.yaml.in/yaml/v4 v4.0.0-rc.4/go.mod h1:aZqd9kCMsGL7AuUv/m/PvWLdg5sjJsZ4oHDEnfPPfY0= golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= -golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= -golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= +golang.org/x/term v0.42.0 h1:UiKe+zDFmJobeJ5ggPwOshJIVt6/Ft0rcfrXZDLWAWY= +golang.org/x/term v0.42.0/go.mod h1:Dq/D+snpsbazcBG5+F9Q1n2rXV8Ma+71xEjTRufARgY= golang.org/x/tools v0.44.0 h1:UP4ajHPIcuMjT1GqzDWRlalUEoY+uzoZKnhOjbIPD2c= golang.org/x/tools v0.44.0/go.mod h1:KA0AfVErSdxRZIsOVipbv3rQhVXTnlU6UhKxHd1seDI= diff --git a/internal/diff/diff.go b/internal/diff/diff.go deleted file mode 100644 index 02c6419..0000000 --- a/internal/diff/diff.go +++ /dev/null @@ -1,296 +0,0 @@ -// Taken from Go's internal/diff with only very minor tweaks; those being: -// - Adding an extra space between the diff character (+/-) and the line so we can easily colour it -// - Lint ignores -// - Renaming diff test package to diff_test -// -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package diff - -import ( - "bytes" - "fmt" - "sort" - "strings" -) - -// A pair is a pair of values tracked for both the x and y side of a diff. -// It is typically a pair of line indexes. -type pair struct{ x, y int } - -// Diff returns an anchored diff of the two texts old and new -// in the “unified diff” format. If old and new are identical, -// Diff returns a nil slice (no output). -// -// Unix diff implementations typically look for a diff with -// the smallest number of lines inserted and removed, -// which can in the worst case take time quadratic in the -// number of lines in the texts. As a result, many implementations -// either can be made to run for a long time or cut off the search -// after a predetermined amount of work. -// -// In contrast, this implementation looks for a diff with the -// smallest number of “unique” lines inserted and removed, -// where unique means a line that appears just once in both old and new. -// We call this an “anchored diff” because the unique lines anchor -// the chosen matching regions. An anchored diff is usually clearer -// than a standard diff, because the algorithm does not try to -// reuse unrelated blank lines or closing braces. -// The algorithm also guarantees to run in O(n log n) time -// instead of the standard O(n²) time. -// -// Some systems call this approach a “patience diff,” named for -// the “patience sorting” algorithm, itself named for a solitaire card game. -// We avoid that name for two reasons. First, the name has been used -// for a few different variants of the algorithm, so it is imprecise. -// Second, the name is frequently interpreted as meaning that you have -// to wait longer (to be patient) for the diff, meaning that it is a slower algorithm, -// when in fact the algorithm is faster than the standard one. -func Diff( //nolint: cyclop - oldName string, - old []byte, - newName string, - new []byte, //nolint: predeclared -) []byte { - if bytes.Equal(old, new) { - return nil - } - - x := lines(old) - y := lines(new) - - // Print diff header. - var out bytes.Buffer - - fmt.Fprintf(&out, "diff %s %s\n", oldName, newName) - fmt.Fprintf(&out, "--- %s\n", oldName) - fmt.Fprintf(&out, "+++ %s\n", newName) - - // Loop over matches to consider, - // expanding each match to include surrounding lines, - // and then printing diff chunks. - // To avoid setup/teardown cases outside the loop, - // tgs returns a leading {0,0} and trailing {len(x), len(y)} pair - // in the sequence of matches. - var ( - done pair // printed up to x[:done.x] and y[:done.y] - chunk pair // start lines of current chunk - count pair // number of lines from each side in current chunk - ctext []string // lines for current chunk - ) - - for _, m := range tgs(x, y) { - if m.x < done.x { - // Already handled scanning forward from earlier match. - continue - } - - // Expand matching lines as far possible, - // establishing that x[start.x:end.x] == y[start.y:end.y]. - // Note that on the first (or last) iteration we may (or definitey do) - // have an empty match: start.x==end.x and start.y==end.y. - start := m - for start.x > done.x && start.y > done.y && x[start.x-1] == y[start.y-1] { - start.x-- - start.y-- - } - - end := m - for end.x < len(x) && end.y < len(y) && x[end.x] == y[end.y] { - end.x++ - end.y++ - } - - // Emit the mismatched lines before start into this chunk. - // (No effect on first sentinel iteration, when start = {0,0}.) - for _, s := range x[done.x:start.x] { - ctext = append(ctext, "- "+s) - count.x++ - } - - for _, s := range y[done.y:start.y] { - ctext = append(ctext, "+ "+s) - count.y++ - } - - // If we're not at EOF and have too few common lines, - // the chunk includes all the common lines and continues. - const C = 3 // number of context lines - if (end.x < len(x) || end.y < len(y)) && - (end.x-start.x < C || (len(ctext) > 0 && end.x-start.x < 2*C)) { - for _, s := range x[start.x:end.x] { - ctext = append(ctext, " "+s) - count.x++ - count.y++ - } - - done = end - - continue - } - - // End chunk with common lines for context. - if len(ctext) > 0 { - n := end.x - start.x - if n > C { - n = C - } - - for _, s := range x[start.x : start.x+n] { - ctext = append(ctext, " "+s) - count.x++ - count.y++ - } - - done = pair{start.x + n, start.y + n} - - // Format and emit chunk. - // Convert line numbers to 1-indexed. - // Special case: empty file shows up as 0,0 not 1,0. - if count.x > 0 { - chunk.x++ - } - - if count.y > 0 { - chunk.y++ - } - - fmt.Fprintf(&out, "@@ -%d,%d +%d,%d @@\n", chunk.x, count.x, chunk.y, count.y) - - for _, s := range ctext { - out.WriteString(s) - } - - count.x = 0 - count.y = 0 - ctext = ctext[:0] - } - - // If we reached EOF, we're done. - if end.x >= len(x) && end.y >= len(y) { - break - } - - // Otherwise start a new chunk. - chunk = pair{end.x - C, end.y - C} - for _, s := range x[chunk.x:end.x] { - ctext = append(ctext, " "+s) - count.x++ - count.y++ - } - - done = end - } - - return out.Bytes() -} - -// lines returns the lines in the file x, including newlines. -// If the file does not end in a newline, one is supplied -// along with a warning about the missing newline. -func lines(x []byte) []string { - l := strings.SplitAfter(string(x), "\n") - if l[len(l)-1] == "" { - l = l[:len(l)-1] - } else { - // Treat last line as having a message about the missing newline attached, - // using the same text as BSD/GNU diff (including the leading backslash). - l[len(l)-1] += "\n\\ No newline at end of file\n" - } - - return l -} - -// tgs returns the pairs of indexes of the longest common subsequence -// of unique lines in x and y, where a unique line is one that appears -// once in x and once in y. -// -// The longest common subsequence algorithm is as described in -// Thomas G. Szymanski, “A Special Case of the Maximal Common -// Subsequence Problem,” Princeton TR #170 (January 1975), -// available at https://research.swtch.com/tgs170.pdf. -func tgs(x, y []string) []pair { - // Count the number of times each string appears in a and b. - // We only care about 0, 1, many, counted as 0, -1, -2 - // for the x side and 0, -4, -8 for the y side. - // Using negative numbers now lets us distinguish positive line numbers later. - m := make(map[string]int) - for _, s := range x { - if c := m[s]; c > -2 { - m[s] = c - 1 - } - } - - for _, s := range y { - if c := m[s]; c > -8 { - m[s] = c - 4 //nolint: mnd - } - } - - // Now unique strings can be identified by m[s] = -1+-4. - // - // Gather the indexes of those strings in x and y, building: - // xi[i] = increasing indexes of unique strings in x. - // yi[i] = increasing indexes of unique strings in y. - // inv[i] = index j such that x[xi[i]] = y[yi[j]]. - var xi, yi, inv []int - - for i, s := range y { - if m[s] == -1+-4 { - m[s] = len(yi) - yi = append(yi, i) - } - } - - for i, s := range x { - if j, ok := m[s]; ok && j >= 0 { - xi = append(xi, i) - inv = append(inv, j) - } - } - - // Apply Algorithm A from Szymanski's paper. - // In those terms, A = J = inv and B = [0, n). - // We add sentinel pairs {0,0}, and {len(x),len(y)} - // to the returned sequence, to help the processing loop. - J := inv - n := len(xi) - T := make([]int, n) - L := make([]int, n) - - for i := range T { - T[i] = n + 1 - } - - for i := range n { - k := sort.Search(n, func(k int) bool { - return T[k] >= J[i] - }) - T[k] = J[i] - L[i] = k + 1 - } - - k := 0 - for _, v := range L { - if k < v { - k = v - } - } - - seq := make([]pair, 2+k) //nolint:mnd - seq[1+k] = pair{len(x), len(y)} // sentinel at end - - lastj := n - for i := n - 1; i >= 0; i-- { - if L[i] == k && J[i] < lastj { - seq[k] = pair{xi[i], yi[J[i]]} - k-- - } - } - - seq[0] = pair{0, 0} // sentinel at start - - return seq -} diff --git a/internal/diff/diff_test.go b/internal/diff/diff_test.go deleted file mode 100644 index 4b07ae1..0000000 --- a/internal/diff/diff_test.go +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package diff_test - -import ( - "bytes" - "os" - "path/filepath" - "testing" - - "go.followtheprocess.codes/snapshot/internal/diff" - "golang.org/x/tools/txtar" -) - -func clean(text []byte) []byte { - text = bytes.ReplaceAll(text, []byte("$\n"), []byte("\n")) - text = bytes.TrimSuffix(text, []byte("^D\n")) - - return text -} - -func Test(t *testing.T) { - files, err := filepath.Glob(filepath.Join("testdata", "*.txtar")) - if err != nil { - t.Fatalf("could not glob txtar files: %v", err) - } - - if len(files) == 0 { - t.Fatalf("no testdata") - } - - for _, file := range files { - t.Run(filepath.Base(file), func(t *testing.T) { - contents, err := os.ReadFile(file) - if err != nil { - t.Fatalf("could not read %s: %v", file, err) - } - - archive := txtar.Parse(contents) - if len(archive.Files) != 3 || archive.Files[2].Name != "diff" { - t.Fatalf("%s: want three files, third named \"diff\", got: %v", file, archive.Files) - } - - diffs := diff.Diff( - archive.Files[0].Name, - clean(archive.Files[0].Data), - archive.Files[1].Name, - clean(archive.Files[1].Data), - ) - want := clean(archive.Files[2].Data) - - if !bytes.Equal(diffs, want) { - t.Fatalf("%s: have:\n%s\nwant:\n%s\n%s", file, - diffs, want, diff.Diff("have", diffs, "want", want)) - } - }) - } -} diff --git a/internal/diff/testdata/allnew.txtar b/internal/diff/testdata/allnew.txtar deleted file mode 100644 index 0828b55..0000000 --- a/internal/diff/testdata/allnew.txtar +++ /dev/null @@ -1,13 +0,0 @@ --- old -- --- new -- -a -b -c --- diff -- -diff old new ---- old -+++ new -@@ -0,0 +1,3 @@ -+ a -+ b -+ c diff --git a/internal/diff/testdata/allold.txtar b/internal/diff/testdata/allold.txtar deleted file mode 100644 index 020cedf..0000000 --- a/internal/diff/testdata/allold.txtar +++ /dev/null @@ -1,13 +0,0 @@ --- old -- -a -b -c --- new -- --- diff -- -diff old new ---- old -+++ new -@@ -1,3 +0,0 @@ -- a -- b -- c diff --git a/internal/diff/testdata/basic.txtar b/internal/diff/testdata/basic.txtar deleted file mode 100644 index f12c77d..0000000 --- a/internal/diff/testdata/basic.txtar +++ /dev/null @@ -1,35 +0,0 @@ -# Example from Hunt and McIlroy, “An Algorithm for Differential File Comparison.” -# https://www.cs.dartmouth.edu/~doug/diff.pdf - --- old -- -a -b -c -d -e -f -g --- new -- -w -a -b -x -y -z -e --- diff -- -diff old new ---- old -+++ new -@@ -1,7 +1,7 @@ -+ w - a - b -- c -- d -+ x -+ y -+ z - e -- f -- g diff --git a/internal/diff/testdata/dups.txtar b/internal/diff/testdata/dups.txtar deleted file mode 100644 index f69f6ac..0000000 --- a/internal/diff/testdata/dups.txtar +++ /dev/null @@ -1,40 +0,0 @@ --- old -- -a - -b - -c - -d - -e - -f --- new -- -a - -B - -C - -d - -e - -f --- diff -- -diff old new ---- old -+++ new -@@ -1,8 +1,8 @@ - a - $ -- b -- -- c -+ B -+ -+ C - $ - d - $ diff --git a/internal/diff/testdata/end.txtar b/internal/diff/testdata/end.txtar deleted file mode 100644 index 1c1ef5f..0000000 --- a/internal/diff/testdata/end.txtar +++ /dev/null @@ -1,38 +0,0 @@ --- old -- -1 -2 -3 -4 -5 -6 -7 -eight -nine -ten -eleven --- new -- -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 --- diff -- -diff old new ---- old -+++ new -@@ -5,7 +5,6 @@ - 5 - 6 - 7 -- eight -- nine -- ten -- eleven -+ 8 -+ 9 -+ 10 diff --git a/internal/diff/testdata/eof.txtar b/internal/diff/testdata/eof.txtar deleted file mode 100644 index 5dc145c..0000000 --- a/internal/diff/testdata/eof.txtar +++ /dev/null @@ -1,9 +0,0 @@ --- old -- -a -b -c^D --- new -- -a -b -c^D --- diff -- diff --git a/internal/diff/testdata/eof1.txtar b/internal/diff/testdata/eof1.txtar deleted file mode 100644 index fa9e11f..0000000 --- a/internal/diff/testdata/eof1.txtar +++ /dev/null @@ -1,18 +0,0 @@ --- old -- -a -b -c --- new -- -a -b -c^D --- diff -- -diff old new ---- old -+++ new -@@ -1,3 +1,3 @@ - a - b -- c -+ c -\ No newline at end of file diff --git a/internal/diff/testdata/eof2.txtar b/internal/diff/testdata/eof2.txtar deleted file mode 100644 index 2a3e2d6..0000000 --- a/internal/diff/testdata/eof2.txtar +++ /dev/null @@ -1,18 +0,0 @@ --- old -- -a -b -c^D --- new -- -a -b -c --- diff -- -diff old new ---- old -+++ new -@@ -1,3 +1,3 @@ - a - b -- c -\ No newline at end of file -+ c diff --git a/internal/diff/testdata/long.txtar b/internal/diff/testdata/long.txtar deleted file mode 100644 index 1ab33fb..0000000 --- a/internal/diff/testdata/long.txtar +++ /dev/null @@ -1,62 +0,0 @@ --- old -- -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -14½ -15 -16 -17 -18 -19 -20 --- new -- -1 -2 -3 -4 -5 -6 -8 -9 -10 -11 -12 -13 -14 -17 -18 -19 -20 --- diff -- -diff old new ---- old -+++ new -@@ -4,7 +4,6 @@ - 4 - 5 - 6 -- 7 - 8 - 9 - 10 -@@ -12,9 +11,6 @@ - 12 - 13 - 14 -- 14½ -- 15 -- 16 - 17 - 18 - 19 diff --git a/internal/diff/testdata/same.txtar b/internal/diff/testdata/same.txtar deleted file mode 100644 index 86b1100..0000000 --- a/internal/diff/testdata/same.txtar +++ /dev/null @@ -1,5 +0,0 @@ --- old -- -hello world --- new -- -hello world --- diff -- diff --git a/internal/diff/testdata/start.txtar b/internal/diff/testdata/start.txtar deleted file mode 100644 index 3842583..0000000 --- a/internal/diff/testdata/start.txtar +++ /dev/null @@ -1,34 +0,0 @@ --- old -- -e -pi -4 -5 -6 -7 -8 -9 -10 --- new -- -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 --- diff -- -diff old new ---- old -+++ new -@@ -1,5 +1,6 @@ -- e -- pi -+ 1 -+ 2 -+ 3 - 4 - 5 - 6 diff --git a/internal/diff/testdata/triv.txtar b/internal/diff/testdata/triv.txtar deleted file mode 100644 index a1fea60..0000000 --- a/internal/diff/testdata/triv.txtar +++ /dev/null @@ -1,40 +0,0 @@ -# Another example from Hunt and McIlroy, -# “An Algorithm for Differential File Comparison.” -# https://www.cs.dartmouth.edu/~doug/diff.pdf - -# Anchored diff gives up on finding anything, -# since there are no unique lines. - --- old -- -a -b -c -a -b -b -a --- new -- -c -a -b -a -b -c --- diff -- -diff old new ---- old -+++ new -@@ -1,7 +1,6 @@ -- a -- b -- c -- a -- b -- b -- a -+ c -+ a -+ b -+ a -+ b -+ c diff --git a/snapshot.go b/snapshot.go index 6747f0d..a20acd2 100644 --- a/snapshot.go +++ b/snapshot.go @@ -9,11 +9,10 @@ import ( "os" "path/filepath" "regexp" - "strings" "testing" - "go.followtheprocess.codes/hue" - "go.followtheprocess.codes/snapshot/internal/diff" + "go.followtheprocess.codes/diff" + "go.followtheprocess.codes/diff/render" ) const ( @@ -24,12 +23,6 @@ const ( defaultDirPermissions = 0o755 ) -const ( - red = hue.Red - header = hue.Cyan | hue.Bold - green = hue.Green -) - // Runner is the snapshot testing runner. // // It holds configuration and state for the snapshot test in question. @@ -144,8 +137,8 @@ func (r Runner) Snap(value any) { // Normalise CRLF to LF everywhere old = bytes.ReplaceAll(old, []byte("\r\n"), []byte("\n")) - if diff := diff.Diff("old", old, "new", content); diff != nil { - r.tb.Fatalf("\nMismatch\n--------\n%s\n", prettyDiff(string(diff))) + if lines := diff.Lines("old", old, "new", content); lines != nil { + r.tb.Fatalf("\nMismatch\n--------\n%s\n", render.Render(lines)) } } @@ -182,29 +175,6 @@ func fileExists(path string) (bool, error) { return true, nil } -// prettyDiff takes a string diff in unified diff format and colourises it for easier viewing. -// -// if noColor is true, the original diff is returned unchanged. -func prettyDiff(diff string) string { - lines := strings.Split(diff, "\n") - for i := range lines { - trimmed := strings.TrimSpace(lines[i]) - if strings.HasPrefix(trimmed, "---") || strings.HasPrefix(trimmed, "- ") { - lines[i] = red.Sprint(lines[i]) - } - - if strings.HasPrefix(trimmed, "@@") { - lines[i] = header.Sprint(lines[i]) - } - - if strings.HasPrefix(trimmed, "+++") || strings.HasPrefix(trimmed, "+ ") { - lines[i] = green.Sprint(lines[i]) - } - } - - return strings.Join(lines, "\n") -} - // A filter is a mechanism for normalising non-deterministic snapshot contents such // as windows/unix filepaths, uuids, timestamps etc. //