From 5d458c8a4ce27c49883e4455fb03f3036302a58e Mon Sep 17 00:00:00 2001 From: wjiayis Date: Mon, 20 Apr 2026 01:06:06 +0800 Subject: [PATCH 1/2] fix: "/%" early stop --- internal/libs/tex/latexpand.go | 10 ++++++++-- internal/libs/tex/latexpand_test.go | 12 ++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/internal/libs/tex/latexpand.go b/internal/libs/tex/latexpand.go index e5472bf6..967183e1 100644 --- a/internal/libs/tex/latexpand.go +++ b/internal/libs/tex/latexpand.go @@ -8,14 +8,20 @@ import ( "paperdebugger/internal/libs/shared" ) +// commentRegex matches a LaTeX comment: an unescaped % and everything after it +// until end of line. The leading group captures either start-of-line or a +// non-backslash character so that \% (an escaped percent) is preserved. Pairs +// of backslashes (\\) before % are treated as a line-break followed by a real +// comment, matching LaTeX semantics. +var commentRegex = regexp.MustCompile(`(^|[^\\])((?:\\\\)*)%.*$`) + func removeComments(text string) string { // Split into lines, trim each line and filter empty ones lines := strings.Split(text, "\n") var result []string for _, line := range lines { trimmed := strings.TrimSpace(line) - commentRegex := regexp.MustCompile(`%.*$`) - cleaned := commentRegex.ReplaceAllString(trimmed, "") + cleaned := commentRegex.ReplaceAllString(trimmed, "$1$2") cleaned = strings.TrimSpace(cleaned) if len(cleaned) == 0 { continue diff --git a/internal/libs/tex/latexpand_test.go b/internal/libs/tex/latexpand_test.go index 2b7ac40a..0bcb79d1 100644 --- a/internal/libs/tex/latexpand_test.go +++ b/internal/libs/tex/latexpand_test.go @@ -20,6 +20,18 @@ Hello, world! \end{document}`, removeComments(input)) } +func TestRemoveCommentsPreservesEscapedPercent(t *testing.T) { + const input = `accuracy improved by 12\% over baseline % TODO: recheck` + assert.Equal(t, `accuracy improved by 12\% over baseline`, removeComments(input)) +} + +func TestRemoveCommentsDoubleBackslashBeforePercent(t *testing.T) { + const input = `line one \\% this is a real comment after a line break +next line` + assert.Equal(t, `line one \\ +next line`, removeComments(input)) +} + func TestLatexpand(t *testing.T) { input := map[string]string{ "main.tex": ` From 918316e261202a07b4108e5bf71b16cffbc947b9 Mon Sep 17 00:00:00 2001 From: wjiayis Date: Thu, 23 Apr 2026 23:44:51 +0800 Subject: [PATCH 2/2] feat: improve test cases --- internal/libs/tex/latexpand.go | 9 ++++++--- internal/libs/tex/latexpand_test.go | 29 +++++++++++++++++++---------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/internal/libs/tex/latexpand.go b/internal/libs/tex/latexpand.go index 967183e1..c6bf5ed3 100644 --- a/internal/libs/tex/latexpand.go +++ b/internal/libs/tex/latexpand.go @@ -10,9 +10,12 @@ import ( // commentRegex matches a LaTeX comment: an unescaped % and everything after it // until end of line. The leading group captures either start-of-line or a -// non-backslash character so that \% (an escaped percent) is preserved. Pairs -// of backslashes (\\) before % are treated as a line-break followed by a real -// comment, matching LaTeX semantics. +// non-backslash character, then consumes pairs of backslashes (\\) before %. +// This generalizes to any run of N backslashes preceding %: if N is even +// (including 0), every backslash pairs up as a literal-backslash escape and +// the % is unescaped, so the comment is stripped; if N is odd, the final +// backslash escapes the % itself, so the % (and the surrounding text) is +// preserved. var commentRegex = regexp.MustCompile(`(^|[^\\])((?:\\\\)*)%.*$`) func removeComments(text string) string { diff --git a/internal/libs/tex/latexpand_test.go b/internal/libs/tex/latexpand_test.go index 0bcb79d1..77140a44 100644 --- a/internal/libs/tex/latexpand_test.go +++ b/internal/libs/tex/latexpand_test.go @@ -20,16 +20,25 @@ Hello, world! \end{document}`, removeComments(input)) } -func TestRemoveCommentsPreservesEscapedPercent(t *testing.T) { - const input = `accuracy improved by 12\% over baseline % TODO: recheck` - assert.Equal(t, `accuracy improved by 12\% over baseline`, removeComments(input)) -} - -func TestRemoveCommentsDoubleBackslashBeforePercent(t *testing.T) { - const input = `line one \\% this is a real comment after a line break -next line` - assert.Equal(t, `line one \\ -next line`, removeComments(input)) +func TestRemoveCommentsBackslashRunsBeforePercent(t *testing.T) { + cases := []struct { + name string + input string + want string + }{ + {"1 backslash (odd) preserves %", `a\% keep`, `a\% keep`}, + {"2 backslashes (even) strips comment", `a\\% drop`, `a\\`}, + {"3 backslashes (odd) preserves %", `a\\\% keep`, `a\\\% keep`}, + {"4 backslashes (even) strips comment", `a\\\\% drop`, `a\\\\`}, + {"5 backslashes (odd) preserves %", `a\\\\\% keep`, `a\\\\\% keep`}, + {"3 backslashes at line start preserves %", `\\\% keep`, `\\\% keep`}, + {"4 backslashes at line start strips comment", `\\\\% drop`, `\\\\`}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, removeComments(tc.input)) + }) + } } func TestLatexpand(t *testing.T) {