From 2d8d4e6c7002d3dd02021c1540596c58daf5db12 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 10:24:50 +0200 Subject: [PATCH 01/27] Add `comments` filtering. --- embedding/commentfilter/filter.go | 160 ++++++++++++++++++++++++++++++ embedding/commentfilter/mode.go | 50 ++++++++++ embedding/commentfilter/syntax.go | 134 +++++++++++++++++++++++++ embedding/parsing/instruction.go | 20 +++- embedding/parsing/xml_parse.go | 1 + 5 files changed, 361 insertions(+), 4 deletions(-) create mode 100644 embedding/commentfilter/filter.go create mode 100644 embedding/commentfilter/mode.go create mode 100644 embedding/commentfilter/syntax.go diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go new file mode 100644 index 0000000..fc54fd1 --- /dev/null +++ b/embedding/commentfilter/filter.go @@ -0,0 +1,160 @@ +// Copyright 2026, TeamDev. All rights reserved. +// +// Redistribution and use in source and/or binary forms, with or without +// modification, must retain the above copyright notice and the following +// disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package commentfilter + +import "strings" + +// Filter returns source lines with comments retained according to the requested mode. +func Filter(lines []string, filePath string, mode Mode) []string { + if mode == RetainAll { + return lines + } + syntax := SyntaxFor(filePath) + if len(syntax.Line) == 0 && len(syntax.Block) == 0 { + return lines + } + + return filterLines(lines, syntax, mode) +} + +type blockState struct { + active bool + syntax BlockSyntax + keep bool +} + +// filterLines removes or preserves recognized comments across all lines. +func filterLines(lines []string, syntax Syntax, mode Mode) []string { + var filtered []string + state := blockState{} + for _, line := range lines { + filteredLine, hadComment := filterLine(line, syntax, mode, &state) + if hadComment && strings.TrimSpace(filteredLine) == "" { + continue + } + filtered = append(filtered, filteredLine) + } + + return filtered +} + +// filterLine removes or preserves recognized comments from a single source line. +func filterLine(line string, syntax Syntax, mode Mode, state *blockState) (string, bool) { + var result strings.Builder + position := 0 + hadComment := false + + for position < len(line) { + if state.active { + hadComment = true + end := strings.Index(line[position:], state.syntax.End) + if end < 0 { + if state.keep { + result.WriteString(line[position:]) + } + return result.String(), hadComment + } + endPosition := position + end + len(state.syntax.End) + if state.keep { + result.WriteString(line[position:endPosition]) + } + position = endPosition + state.active = false + continue + } + + if quoteEnd := quotedSegmentEnd(line, position, syntax.QuoteChars); quoteEnd > position { + result.WriteString(line[position:quoteEnd]) + position = quoteEnd + continue + } + if lineSyntax, found := lineCommentAt(line, position, syntax); found { + hadComment = true + if keepLineComment(lineSyntax, mode) { + result.WriteString(line[position:]) + } + break + } + if blockSyntax, found := blockCommentAt(line, position, syntax); found { + hadComment = true + state.active = true + state.syntax = blockSyntax + state.keep = keepBlockComment(blockSyntax, mode) + continue + } + + result.WriteByte(line[position]) + position++ + } + + return result.String(), hadComment +} + +// quotedSegmentEnd returns the end offset of a quoted string starting at position. +func quotedSegmentEnd(line string, position int, quoteChars string) int { + if position >= len(line) || !strings.ContainsRune(quoteChars, rune(line[position])) { + return position + } + quote := line[position] + cursor := position + 1 + for cursor < len(line) { + if line[cursor] == '\\' { + cursor += 2 + continue + } + if line[cursor] == quote { + return cursor + 1 + } + cursor++ + } + + return len(line) +} + +// lineCommentAt reports whether a line comment starts at the given position. +func lineCommentAt(line string, position int, syntax Syntax) (LineSyntax, bool) { + for _, lineSyntax := range syntax.Line { + if strings.HasPrefix(line[position:], lineSyntax.Prefix) { + return lineSyntax, true + } + } + + return LineSyntax{}, false +} + +// blockCommentAt reports whether a block comment starts at the given position. +func blockCommentAt(line string, position int, syntax Syntax) (BlockSyntax, bool) { + for _, blockSyntax := range syntax.Block { + if strings.HasPrefix(line[position:], blockSyntax.Start) { + return blockSyntax, true + } + } + + return BlockSyntax{}, false +} + +// keepLineComment reports whether the mode retains the given line comment kind. +func keepLineComment(lineSyntax LineSyntax, mode Mode) bool { + return mode == RetainEndOfLine || mode == RetainDocumentation && lineSyntax.Documentation +} + +// keepBlockComment reports whether the mode retains the given block comment kind. +func keepBlockComment(blockSyntax BlockSyntax, mode Mode) bool { + return mode == RetainBlock || mode == RetainDocumentation && blockSyntax.Documentation +} diff --git a/embedding/commentfilter/mode.go b/embedding/commentfilter/mode.go new file mode 100644 index 0000000..6f932ec --- /dev/null +++ b/embedding/commentfilter/mode.go @@ -0,0 +1,50 @@ +// Copyright 2026, TeamDev. All rights reserved. +// +// Redistribution and use in source and/or binary forms, with or without +// modification, must retain the above copyright notice and the following +// disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package commentfilter + +import "fmt" + +// Mode controls which source comments are retained in embedded snippets. +type Mode string + +const ( + // RetainAll keeps all comments in the embedded source. + RetainAll Mode = "all" + // RetainNone removes all comments recognized for the source language. + RetainNone Mode = "none" + // RetainDocumentation keeps only API documentation comments. + RetainDocumentation Mode = "documentation" + // RetainEndOfLine keeps only line comments such as `//` and `#`. + RetainEndOfLine Mode = "end-of-line" + // RetainBlock keeps only block comments such as `/* */`. + RetainBlock Mode = "block" +) + +// ParseMode converts an embed-code `comments` attribute value into a Mode. +func ParseMode(value string) (Mode, error) { + switch Mode(value) { + case "": + return RetainAll, nil + case RetainAll, RetainNone, RetainDocumentation, RetainEndOfLine, RetainBlock: + return Mode(value), nil + default: + return "", fmt.Errorf("unsupported comments value `%s`; expected one of "+ + "`all`, `none`, `documentation`, `end-of-line`, or `block`", value) + } +} diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go new file mode 100644 index 0000000..be9ed59 --- /dev/null +++ b/embedding/commentfilter/syntax.go @@ -0,0 +1,134 @@ +// Copyright 2026, TeamDev. All rights reserved. +// +// Redistribution and use in source and/or binary forms, with or without +// modification, must retain the above copyright notice and the following +// disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package commentfilter + +import ( + "path/filepath" + "strings" +) + +// LineSyntax describes a single-line comment marker. +type LineSyntax struct { + Prefix string + Documentation bool +} + +// BlockSyntax describes a block comment marker pair. +type BlockSyntax struct { + Start string + End string + Documentation bool +} + +// Syntax describes comment markers and string delimiters for a language family. +type Syntax struct { + Line []LineSyntax + Block []BlockSyntax + QuoteChars string +} + +// SyntaxFor returns the comment syntax registered for the given file path. +func SyntaxFor(filePath string) Syntax { + extension := normalizeExtension(filepath.Ext(filePath)) + if syntax, found := syntaxesByExtension[extension]; found { + return syntax + } + + return Syntax{} +} + +// RegisterSyntax registers comment syntax for a source file extension. +func RegisterSyntax(extension string, syntax Syntax) { + syntaxesByExtension[normalizeExtension(extension)] = syntax +} + +// normalizeExtension returns a lowercase file extension with a leading dot. +func normalizeExtension(extension string) string { + normalized := strings.ToLower(extension) + if normalized == "" || strings.HasPrefix(normalized, ".") { + return normalized + } + + return "." + normalized +} + +var cLikeSyntax = Syntax{ + Line: []LineSyntax{ + {Prefix: "///", Documentation: true}, + {Prefix: "//!", Documentation: true}, + {Prefix: "//", Documentation: false}, + }, + Block: []BlockSyntax{ + {Start: "/**", End: "*/", Documentation: true}, + {Start: "/*!", End: "*/", Documentation: true}, + {Start: "/*", End: "*/", Documentation: false}, + }, + QuoteChars: "\"'`", +} + +var hashLineSyntax = Syntax{ + Line: []LineSyntax{ + {Prefix: "#", Documentation: false}, + }, + QuoteChars: "\"'", +} + +var xmlSyntax = Syntax{ + Block: []BlockSyntax{ + {Start: "", Documentation: false}, + }, + QuoteChars: "\"'", +} + +var basicSyntax = Syntax{ + Line: []LineSyntax{ + {Prefix: "'", Documentation: false}, + }, + QuoteChars: "\"", +} + +var syntaxesByExtension = map[string]Syntax{ + ".java": cLikeSyntax, + ".groovy": cLikeSyntax, + ".kt": cLikeSyntax, + ".kts": cLikeSyntax, + ".c": cLikeSyntax, + ".cc": cLikeSyntax, + ".cpp": cLikeSyntax, + ".cxx": cLikeSyntax, + ".h": cLikeSyntax, + ".hh": cLikeSyntax, + ".hpp": cLikeSyntax, + ".cs": cLikeSyntax, + ".js": cLikeSyntax, + ".jsx": cLikeSyntax, + ".ts": cLikeSyntax, + ".tsx": cLikeSyntax, + ".go": cLikeSyntax, + ".yml": hashLineSyntax, + ".yaml": hashLineSyntax, + ".xml": xmlSyntax, + ".html": xmlSyntax, + ".htm": xmlSyntax, + ".vb": basicSyntax, + ".bas": basicSyntax, + ".vbs": basicSyntax, + ".vbscript": basicSyntax, + ".properties": hashLineSyntax, +} diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index 76cc87f..98c779b 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -22,6 +22,7 @@ import ( "fmt" "embed-code/embed-code-go/configuration" + "embed-code/embed-code-go/embedding/commentfilter" "embed-code/embed-code-go/fragmentation" "embed-code/embed-code-go/indent" ) @@ -42,12 +43,15 @@ import ( // EndPattern — an optional glob-like pattern. If specified, lines after the matching one // are excluded. // +// CommentMode — specifies which comments are retained in the embedded code. +// // Configuration — a Configuration with all embed-code settings. type Instruction struct { CodeFile string Fragment string StartPattern *Pattern EndPattern *Pattern + CommentMode commentfilter.Mode Configuration configuration.Configuration } @@ -60,6 +64,7 @@ type Instruction struct { // - start — an optional glob-like pattern. If specified, lines before the matching one // are excluded; // - end — an optional glob-like pattern. If specified, lines after the matching one are excluded. +// - comments — an optional comment filtering mode. If omitted, all comments are retained. // // config — a Configuration with all embed-code settings. // @@ -70,6 +75,10 @@ func NewInstruction( fragment := attributes["fragment"] startValue := attributes["start"] endValue := attributes["end"] + commentMode, err := commentfilter.ParseMode(attributes["comments"]) + if err != nil { + return Instruction{}, err + } if fragment != "" && (startValue != "" || endValue != "") { return Instruction{}, @@ -92,6 +101,7 @@ func NewInstruction( Fragment: fragment, StartPattern: start, EndPattern: end, + CommentMode: commentMode, Configuration: config, }, nil } @@ -105,16 +115,18 @@ func (e Instruction) Content() ([]string, error) { return nil, err } if e.StartPattern != nil || e.EndPattern != nil { - return e.matchingLines(fileContent), nil + fileContent = e.matchingLines(fileContent) } - return fileContent, nil + return commentfilter.Filter(fileContent, e.CodeFile, e.CommentMode), nil } // Returns string representation of Instruction. func (e Instruction) String() string { - return fmt.Sprintf("EmbeddingInstruction[file=`%s`, fragment=`%s`, start=`%s`, end=`%s`]", - e.CodeFile, e.Fragment, e.StartPattern, e.EndPattern) + return fmt.Sprintf( + "EmbeddingInstruction[file=`%s`, fragment=`%s`, start=`%s`, end=`%s`, comments=`%s`]", + e.CodeFile, e.Fragment, e.StartPattern, e.EndPattern, e.CommentMode, + ) } // Filters and returns a subset of input lines based on start and end patterns. diff --git a/embedding/parsing/xml_parse.go b/embedding/parsing/xml_parse.go index 4fe51b1..330d87e 100644 --- a/embedding/parsing/xml_parse.go +++ b/embedding/parsing/xml_parse.go @@ -47,6 +47,7 @@ type Item struct { // - start — an optional glob-like pattern. If specified, lines before the matching one // are excluded; // - end — an optional glob-like pattern. If specified, lines after the matching one are excluded. +// - comments — an optional comment filtering mode. If omitted, all comments are retained. // // config — a Configuration with all embed-code settings. // From 04bfc609219d1fdb53d3d7e06efaa26519407547 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 10:25:23 +0200 Subject: [PATCH 02/27] Update readme. --- EMBEDDING.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/EMBEDDING.md b/EMBEDDING.md index 458528a..51eeda1 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -70,6 +70,7 @@ To embed a named fragment, add the following to your Markdown file: - **`file`**: The path to the source file relative to the `code-path` defined in your configuration. - **`fragment`**: The name of the fragment to embed. If omitted, the entire file will be embedded. +- **`comments`**: Optional comment filtering mode. If omitted, all comments are retained. Fragment names can be any string, but avoid using double quotes (`"`) or characters reserved by XML. @@ -103,6 +104,32 @@ Use `^` and `$` to disable this behavior and match the exact line start or end. If you need to match a literal `^` at the start of a line, use `^^`. Similarly, use `$$` to match a literal `$` at the end of a line. +## Comment filtering + +Use the optional `comments` attribute to reduce comment noise in the embedded snippet: + +````markdown + +```java +``` +```` + +Supported values: + +- `all` — retain all comments. This is the default. +- `none` — strip all recognized comments. +- `documentation` — retain documentation comments such as Javadoc, KDoc, and `///`. +- `end-of-line` — retain line comments such as `//` and `#`. +- `block` — retain block comments such as `/* */`. + +Comment filtering is selected by source file extension. The built-in registry covers Java, +Groovy, Kotlin, KTS, C/C++, C#, JavaScript, TypeScript, YAML, XML, HTML, Go, and Basic-family +files. Unknown extensions are embedded unchanged. + ## Advanced use cases ### Joining several parts of code into one fragment From 3e2c57f413340ee83aa49a19674d3c4bd99caf66 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 15:12:01 +0200 Subject: [PATCH 03/27] Rename `end-of-line` to `inline`. --- EMBEDDING.md | 8 +++--- embedding/commentfilter/filter.go | 2 +- embedding/commentfilter/mode.go | 8 +++--- embedding/commentfilter/syntax.go | 41 ++++++++++++++++--------------- 4 files changed, 30 insertions(+), 29 deletions(-) diff --git a/EMBEDDING.md b/EMBEDDING.md index 51eeda1..cf38302 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -122,13 +122,13 @@ Supported values: - `all` — retain all comments. This is the default. - `none` — strip all recognized comments. -- `documentation` — retain documentation comments such as Javadoc, KDoc, and `///`. -- `end-of-line` — retain line comments such as `//` and `#`. +- `documentation` — retain documentation comments such as Javadoc, KDoc, JSDoc, and C# XML docs. +- `inline` — retain line comments such as `//` and `#`. - `block` — retain block comments such as `/* */`. Comment filtering is selected by source file extension. The built-in registry covers Java, -Groovy, Kotlin, KTS, C/C++, C#, JavaScript, TypeScript, YAML, XML, HTML, Go, and Basic-family -files. Unknown extensions are embedded unchanged. +Groovy, Kotlin, KTS, C#, JavaScript, TypeScript, YAML, XML, HTML, and Basic-family files. +Unknown extensions are embedded unchanged. ## Advanced use cases diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go index fc54fd1..356b6e7 100644 --- a/embedding/commentfilter/filter.go +++ b/embedding/commentfilter/filter.go @@ -151,7 +151,7 @@ func blockCommentAt(line string, position int, syntax Syntax) (BlockSyntax, bool // keepLineComment reports whether the mode retains the given line comment kind. func keepLineComment(lineSyntax LineSyntax, mode Mode) bool { - return mode == RetainEndOfLine || mode == RetainDocumentation && lineSyntax.Documentation + return mode == RetainInline || mode == RetainDocumentation && lineSyntax.Documentation } // keepBlockComment reports whether the mode retains the given block comment kind. diff --git a/embedding/commentfilter/mode.go b/embedding/commentfilter/mode.go index 6f932ec..0ab62ed 100644 --- a/embedding/commentfilter/mode.go +++ b/embedding/commentfilter/mode.go @@ -30,8 +30,8 @@ const ( RetainNone Mode = "none" // RetainDocumentation keeps only API documentation comments. RetainDocumentation Mode = "documentation" - // RetainEndOfLine keeps only line comments such as `//` and `#`. - RetainEndOfLine Mode = "end-of-line" + // RetainInline keeps only inline comments such as `//` and `#`. + RetainInline Mode = "inline" // RetainBlock keeps only block comments such as `/* */`. RetainBlock Mode = "block" ) @@ -41,10 +41,10 @@ func ParseMode(value string) (Mode, error) { switch Mode(value) { case "": return RetainAll, nil - case RetainAll, RetainNone, RetainDocumentation, RetainEndOfLine, RetainBlock: + case RetainAll, RetainNone, RetainDocumentation, RetainInline, RetainBlock: return Mode(value), nil default: return "", fmt.Errorf("unsupported comments value `%s`; expected one of "+ - "`all`, `none`, `documentation`, `end-of-line`, or `block`", value) + "`all`, `none`, `documentation`, `inline`, or `block`", value) } } diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index be9ed59..de6eb2b 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -68,15 +68,24 @@ func normalizeExtension(extension string) string { return "." + normalized } -var cLikeSyntax = Syntax{ +var javaStyleSyntax = Syntax{ + Line: []LineSyntax{ + {Prefix: "//", Documentation: false}, + }, + Block: []BlockSyntax{ + {Start: "/**", End: "*/", Documentation: true}, + {Start: "/*", End: "*/", Documentation: false}, + }, + QuoteChars: "\"'`", +} + +var csharpSyntax = Syntax{ Line: []LineSyntax{ {Prefix: "///", Documentation: true}, - {Prefix: "//!", Documentation: true}, {Prefix: "//", Documentation: false}, }, Block: []BlockSyntax{ {Start: "/**", End: "*/", Documentation: true}, - {Start: "/*!", End: "*/", Documentation: true}, {Start: "/*", End: "*/", Documentation: false}, }, QuoteChars: "\"'`", @@ -104,23 +113,15 @@ var basicSyntax = Syntax{ } var syntaxesByExtension = map[string]Syntax{ - ".java": cLikeSyntax, - ".groovy": cLikeSyntax, - ".kt": cLikeSyntax, - ".kts": cLikeSyntax, - ".c": cLikeSyntax, - ".cc": cLikeSyntax, - ".cpp": cLikeSyntax, - ".cxx": cLikeSyntax, - ".h": cLikeSyntax, - ".hh": cLikeSyntax, - ".hpp": cLikeSyntax, - ".cs": cLikeSyntax, - ".js": cLikeSyntax, - ".jsx": cLikeSyntax, - ".ts": cLikeSyntax, - ".tsx": cLikeSyntax, - ".go": cLikeSyntax, + ".java": javaStyleSyntax, + ".groovy": javaStyleSyntax, + ".kt": javaStyleSyntax, + ".kts": javaStyleSyntax, + ".cs": csharpSyntax, + ".js": javaStyleSyntax, + ".jsx": javaStyleSyntax, + ".ts": javaStyleSyntax, + ".tsx": javaStyleSyntax, ".yml": hashLineSyntax, ".yaml": hashLineSyntax, ".xml": xmlSyntax, From 92cec40cf1e315b138213cf289e4c258ae64bf35 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 17:54:42 +0200 Subject: [PATCH 04/27] Support VB comments. --- EMBEDDING.md | 8 +- embedding/commentfilter/filter.go | 99 +++++++++++------- embedding/commentfilter/mode.go | 6 +- embedding/commentfilter/syntax.go | 132 ++++++++++++------------ embedding/commentfilter/visual_basic.go | 109 +++++++++++++++++++ 5 files changed, 248 insertions(+), 106 deletions(-) create mode 100644 embedding/commentfilter/visual_basic.go diff --git a/EMBEDDING.md b/EMBEDDING.md index cf38302..2d94465 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -122,9 +122,11 @@ Supported values: - `all` — retain all comments. This is the default. - `none` — strip all recognized comments. -- `documentation` — retain documentation comments such as Javadoc, KDoc, JSDoc, and C# XML docs. -- `inline` — retain line comments such as `//` and `#`. -- `block` — retain block comments such as `/* */`. +- `documentation` — retain documentation comments such as Javadoc, KDoc, JSDoc, + C# XML docs, and VB XML docs. +- `regular` — retain non-documentation line and block comments. +- `inline` — retain non-documentation line comments such as `//` and `#`. +- `block` — retain non-documentation block comments such as `/* */`. Comment filtering is selected by source file extension. The built-in registry covers Java, Groovy, Kotlin, KTS, C#, JavaScript, TypeScript, YAML, XML, HTML, and Basic-family files. diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go index 356b6e7..2440146 100644 --- a/embedding/commentfilter/filter.go +++ b/embedding/commentfilter/filter.go @@ -25,26 +25,31 @@ func Filter(lines []string, filePath string, mode Mode) []string { if mode == RetainAll { return lines } - syntax := SyntaxFor(filePath) - if len(syntax.Line) == 0 && len(syntax.Block) == 0 { + filter, found := filterFor(filePath) + if !found { return lines } - return filterLines(lines, syntax, mode) + return filter.Filter(lines, mode) +} + +// MarkerCommentFilter removes comments using lexical markers declared in Syntax. +type MarkerCommentFilter struct { + Syntax Syntax } type blockState struct { active bool - syntax BlockSyntax + block BlockSyntax keep bool } -// filterLines removes or preserves recognized comments across all lines. -func filterLines(lines []string, syntax Syntax, mode Mode) []string { +// Filter removes or preserves recognized comments across all lines. +func (f MarkerCommentFilter) Filter(lines []string, mode Mode) []string { var filtered []string state := blockState{} for _, line := range lines { - filteredLine, hadComment := filterLine(line, syntax, mode, &state) + filteredLine, hadComment := f.filterLine(line, mode, &state) if hadComment && strings.TrimSpace(filteredLine) == "" { continue } @@ -55,7 +60,7 @@ func filterLines(lines []string, syntax Syntax, mode Mode) []string { } // filterLine removes or preserves recognized comments from a single source line. -func filterLine(line string, syntax Syntax, mode Mode, state *blockState) (string, bool) { +func (f MarkerCommentFilter) filterLine(line string, mode Mode, state *blockState) (string, bool) { var result strings.Builder position := 0 hadComment := false @@ -63,14 +68,14 @@ func filterLine(line string, syntax Syntax, mode Mode, state *blockState) (strin for position < len(line) { if state.active { hadComment = true - end := strings.Index(line[position:], state.syntax.End) + end := strings.Index(line[position:], state.block.End) if end < 0 { if state.keep { result.WriteString(line[position:]) } return result.String(), hadComment } - endPosition := position + end + len(state.syntax.End) + endPosition := position + end + len(state.block.End) if state.keep { result.WriteString(line[position:endPosition]) } @@ -79,23 +84,37 @@ func filterLine(line string, syntax Syntax, mode Mode, state *blockState) (strin continue } - if quoteEnd := quotedSegmentEnd(line, position, syntax.QuoteChars); quoteEnd > position { + if quoteEnd := quotedSegmentEnd(line, position, f.Syntax.QuoteChars); quoteEnd > position { result.WriteString(line[position:quoteEnd]) position = quoteEnd continue } - if lineSyntax, found := lineCommentAt(line, position, syntax); found { + if _, found := documentationInlineAt(line, position, f.Syntax); found { + hadComment = true + if mode == RetainDocumentation { + result.WriteString(line[position:]) + } + break + } + if block, found := documentationBlockAt(line, position, f.Syntax); found { + hadComment = true + state.active = true + state.block = block + state.keep = mode == RetainDocumentation + continue + } + if _, found := inlineCommentAt(line, position, f.Syntax); found { hadComment = true - if keepLineComment(lineSyntax, mode) { + if mode == RetainInline || mode == RetainRegular { result.WriteString(line[position:]) } break } - if blockSyntax, found := blockCommentAt(line, position, syntax); found { + if block, found := blockCommentAt(line, position, f.Syntax); found { hadComment = true state.active = true - state.syntax = blockSyntax - state.keep = keepBlockComment(blockSyntax, mode) + state.block = block + state.keep = mode == RetainBlock || mode == RetainRegular continue } @@ -127,34 +146,44 @@ func quotedSegmentEnd(line string, position int, quoteChars string) int { return len(line) } -// lineCommentAt reports whether a line comment starts at the given position. -func lineCommentAt(line string, position int, syntax Syntax) (LineSyntax, bool) { - for _, lineSyntax := range syntax.Line { - if strings.HasPrefix(line[position:], lineSyntax.Prefix) { - return lineSyntax, true - } - } +// documentationInlineAt reports whether a documentation line comment starts at the position. +func documentationInlineAt(line string, position int, syntax Syntax) (string, bool) { + return prefixAt(line, position, syntax.Documentation.Inline) +} + +// documentationBlockAt reports whether a documentation block comment starts at the position. +func documentationBlockAt(line string, position int, syntax Syntax) (BlockSyntax, bool) { + return blockAt(line, position, syntax.Documentation.Block) +} - return LineSyntax{}, false +// inlineCommentAt reports whether an inline comment starts at the given position. +func inlineCommentAt(line string, position int, syntax Syntax) (string, bool) { + return prefixAt(line, position, syntax.Inline) } // blockCommentAt reports whether a block comment starts at the given position. func blockCommentAt(line string, position int, syntax Syntax) (BlockSyntax, bool) { - for _, blockSyntax := range syntax.Block { - if strings.HasPrefix(line[position:], blockSyntax.Start) { - return blockSyntax, true + return blockAt(line, position, syntax.Block) +} + +// prefixAt reports whether one of the given prefixes starts at the position. +func prefixAt(line string, position int, prefixes []string) (string, bool) { + for _, prefix := range prefixes { + if strings.HasPrefix(line[position:], prefix) { + return prefix, true } } - return BlockSyntax{}, false + return "", false } -// keepLineComment reports whether the mode retains the given line comment kind. -func keepLineComment(lineSyntax LineSyntax, mode Mode) bool { - return mode == RetainInline || mode == RetainDocumentation && lineSyntax.Documentation -} +// blockAt reports whether one of the given block markers starts at the position. +func blockAt(line string, position int, blocks []BlockSyntax) (BlockSyntax, bool) { + for _, block := range blocks { + if strings.HasPrefix(line[position:], block.Start) { + return block, true + } + } -// keepBlockComment reports whether the mode retains the given block comment kind. -func keepBlockComment(blockSyntax BlockSyntax, mode Mode) bool { - return mode == RetainBlock || mode == RetainDocumentation && blockSyntax.Documentation + return BlockSyntax{}, false } diff --git a/embedding/commentfilter/mode.go b/embedding/commentfilter/mode.go index 0ab62ed..456f261 100644 --- a/embedding/commentfilter/mode.go +++ b/embedding/commentfilter/mode.go @@ -30,6 +30,8 @@ const ( RetainNone Mode = "none" // RetainDocumentation keeps only API documentation comments. RetainDocumentation Mode = "documentation" + // RetainRegular keeps inline and block comments that are not documentation comments. + RetainRegular Mode = "regular" // RetainInline keeps only inline comments such as `//` and `#`. RetainInline Mode = "inline" // RetainBlock keeps only block comments such as `/* */`. @@ -41,10 +43,10 @@ func ParseMode(value string) (Mode, error) { switch Mode(value) { case "": return RetainAll, nil - case RetainAll, RetainNone, RetainDocumentation, RetainInline, RetainBlock: + case RetainAll, RetainNone, RetainDocumentation, RetainRegular, RetainInline, RetainBlock: return Mode(value), nil default: return "", fmt.Errorf("unsupported comments value `%s`; expected one of "+ - "`all`, `none`, `documentation`, `inline`, or `block`", value) + "`all`, `none`, `documentation`, `regular`, `inline`, or `block`", value) } } diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index de6eb2b..e086aff 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -23,39 +23,36 @@ import ( "strings" ) -// LineSyntax describes a single-line comment marker. -type LineSyntax struct { - Prefix string - Documentation bool -} - // BlockSyntax describes a block comment marker pair. type BlockSyntax struct { - Start string - End string - Documentation bool + Start string + End string } -// Syntax describes comment markers and string delimiters for a language family. -type Syntax struct { - Line []LineSyntax - Block []BlockSyntax - QuoteChars string +// DocumentationSyntax describes API documentation comment markers. +type DocumentationSyntax struct { + Inline []string + Block []BlockSyntax } -// SyntaxFor returns the comment syntax registered for the given file path. -func SyntaxFor(filePath string) Syntax { - extension := normalizeExtension(filepath.Ext(filePath)) - if syntax, found := syntaxesByExtension[extension]; found { - return syntax - } +// Syntax describes lexical comment markers and string delimiters for a language family. +type Syntax struct { + Inline []string + Block []BlockSyntax + Documentation DocumentationSyntax + QuoteChars string +} - return Syntax{} +// Filterer removes or preserves source comments according to the requested mode. +type Filterer interface { + Filter(lines []string, mode Mode) []string } -// RegisterSyntax registers comment syntax for a source file extension. -func RegisterSyntax(extension string, syntax Syntax) { - syntaxesByExtension[normalizeExtension(extension)] = syntax +// filterFor returns the comment filter registered for the given file path. +func filterFor(filePath string) (Filterer, bool) { + extension := normalizeExtension(filepath.Ext(filePath)) + filter, found := filtersByExtension[extension] + return filter, found } // normalizeExtension returns a lowercase file extension with a leading dot. @@ -69,67 +66,70 @@ func normalizeExtension(extension string) string { } var javaStyleSyntax = Syntax{ - Line: []LineSyntax{ - {Prefix: "//", Documentation: false}, - }, + Inline: []string{"//"}, Block: []BlockSyntax{ - {Start: "/**", End: "*/", Documentation: true}, - {Start: "/*", End: "*/", Documentation: false}, + {Start: "/*", End: "*/"}, + }, + Documentation: DocumentationSyntax{ + Block: []BlockSyntax{{Start: "/**", End: "*/"}}, }, QuoteChars: "\"'`", } var csharpSyntax = Syntax{ - Line: []LineSyntax{ - {Prefix: "///", Documentation: true}, - {Prefix: "//", Documentation: false}, - }, + Inline: []string{"//"}, Block: []BlockSyntax{ - {Start: "/**", End: "*/", Documentation: true}, - {Start: "/*", End: "*/", Documentation: false}, + {Start: "/*", End: "*/"}, + }, + Documentation: DocumentationSyntax{ + Inline: []string{"///"}, + Block: []BlockSyntax{{Start: "/**", End: "*/"}}, }, QuoteChars: "\"'`", } var hashLineSyntax = Syntax{ - Line: []LineSyntax{ - {Prefix: "#", Documentation: false}, - }, + Inline: []string{"#"}, QuoteChars: "\"'", } var xmlSyntax = Syntax{ Block: []BlockSyntax{ - {Start: "", Documentation: false}, + {Start: ""}, }, QuoteChars: "\"'", } -var basicSyntax = Syntax{ - Line: []LineSyntax{ - {Prefix: "'", Documentation: false}, - }, - QuoteChars: "\"", -} - -var syntaxesByExtension = map[string]Syntax{ - ".java": javaStyleSyntax, - ".groovy": javaStyleSyntax, - ".kt": javaStyleSyntax, - ".kts": javaStyleSyntax, - ".cs": csharpSyntax, - ".js": javaStyleSyntax, - ".jsx": javaStyleSyntax, - ".ts": javaStyleSyntax, - ".tsx": javaStyleSyntax, - ".yml": hashLineSyntax, - ".yaml": hashLineSyntax, - ".xml": xmlSyntax, - ".html": xmlSyntax, - ".htm": xmlSyntax, - ".vb": basicSyntax, - ".bas": basicSyntax, - ".vbs": basicSyntax, - ".vbscript": basicSyntax, - ".properties": hashLineSyntax, +var filtersByExtension = map[string]Filterer{ + // Java/Kotlin + ".java": MarkerCommentFilter{Syntax: javaStyleSyntax}, + ".kt": MarkerCommentFilter{Syntax: javaStyleSyntax}, + ".kts": MarkerCommentFilter{Syntax: javaStyleSyntax}, + ".groovy": MarkerCommentFilter{Syntax: javaStyleSyntax}, + + // C# + ".cs": MarkerCommentFilter{Syntax: csharpSyntax}, + + // JavaScript + ".js": MarkerCommentFilter{Syntax: javaStyleSyntax}, + ".jsx": MarkerCommentFilter{Syntax: javaStyleSyntax}, + ".ts": MarkerCommentFilter{Syntax: javaStyleSyntax}, + ".tsx": MarkerCommentFilter{Syntax: javaStyleSyntax}, + + // YAML + ".yml": MarkerCommentFilter{Syntax: hashLineSyntax}, + ".yaml": MarkerCommentFilter{Syntax: hashLineSyntax}, + + // XML + ".xml": MarkerCommentFilter{Syntax: xmlSyntax}, + + // HTML + ".html": MarkerCommentFilter{Syntax: xmlSyntax}, + ".htm": MarkerCommentFilter{Syntax: xmlSyntax}, + + // Visual Basic + ".vb": VisualBasicFilter{}, + ".bas": VisualBasicFilter{}, + ".vbs": VisualBasicFilter{}, + ".vbscript": VisualBasicFilter{}, } diff --git a/embedding/commentfilter/visual_basic.go b/embedding/commentfilter/visual_basic.go new file mode 100644 index 0000000..8195330 --- /dev/null +++ b/embedding/commentfilter/visual_basic.go @@ -0,0 +1,109 @@ +// Copyright 2026, TeamDev. All rights reserved. +// +// Redistribution and use in source and/or binary forms, with or without +// modification, must retain the above copyright notice and the following +// disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package commentfilter + +import ( + "strings" + "unicode" +) + +const ( + commentPrefix = '\'' + docPrefix = "'''" + rem = "rem" +) + +// VisualBasicFilter filters the Visual Basic comment forms: +// - documentation comments starting with `”'`; +// - apostrophe comments starting with `'`; +// - REM comments starting with `REM`. +type VisualBasicFilter struct{} + +// Filter removes or preserves Visual Basic comments according to mode. +func (VisualBasicFilter) Filter(lines []string, mode Mode) []string { + var filtered []string + for _, line := range lines { + filteredLine, hadComment := filterVisualBasicLine(line, mode) + if hadComment && strings.TrimSpace(filteredLine) == "" { + continue + } + filtered = append(filtered, filteredLine) + } + + return filtered +} + +// filterVisualBasicLine removes or preserves one Visual Basic comment. +func filterVisualBasicLine(line string, mode Mode) (string, bool) { + var result strings.Builder + position := 0 + for position < len(line) { + if quoteEnd := quotedSegmentEnd(line, position, "\""); quoteEnd > position { + result.WriteString(line[position:quoteEnd]) + position = quoteEnd + continue + } + if strings.HasPrefix(line[position:], docPrefix) { + if mode == RetainDocumentation { + result.WriteString(line[position:]) + } + return result.String(), true + } + if line[position] == commentPrefix || remCommentAt(line, position) { + if mode == RetainInline || mode == RetainRegular { + result.WriteString(line[position:]) + } + return result.String(), true + } + result.WriteByte(line[position]) + position++ + } + + return result.String(), false +} + +// remCommentAt reports whether a Visual Basic REM comment starts at position. +func remCommentAt(line string, position int) bool { + if len(line[position:]) < len(rem) || + !strings.EqualFold( + line[position:position+len(rem)], + rem, + ) { + return false + } + return remPrefixBoundary(line, position) && + remSuffixBoundary(line, position+len(rem)) +} + +// remPrefixBoundary reports whether REM appears where a statement can start. +func remPrefixBoundary(line string, position int) bool { + for cursor := position - 1; cursor >= 0; cursor-- { + if unicode.IsSpace(rune(line[cursor])) { + continue + } + return line[cursor] == ':' + } + + return true +} + +// remSuffixBoundary reports whether REM is followed by whitespace or the end of line. +func remSuffixBoundary(line string, position int) bool { + return position >= len(line) || unicode.IsSpace(rune(line[position])) +} From 1570335898a4fc74fc9388951976d24bbedaa3a3 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 18:07:39 +0200 Subject: [PATCH 05/27] Improve readability. --- EMBEDDING.md | 5 ++--- embedding/commentfilter/syntax.go | 8 ++++---- embedding/commentfilter/visual_basic.go | 6 +++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/EMBEDDING.md b/EMBEDDING.md index 2d94465..fd6a7b1 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -122,10 +122,9 @@ Supported values: - `all` — retain all comments. This is the default. - `none` — strip all recognized comments. -- `documentation` — retain documentation comments such as Javadoc, KDoc, JSDoc, - C# XML docs, and VB XML docs. +- `documentation` — retain documentation comments such as Javadoc. - `regular` — retain non-documentation line and block comments. -- `inline` — retain non-documentation line comments such as `//` and `#`. +- `inline` — retain non-documentation line comments such as `//`. - `block` — retain non-documentation block comments such as `/* */`. Comment filtering is selected by source file extension. The built-in registry covers Java, diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index e086aff..1f48d15 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -128,8 +128,8 @@ var filtersByExtension = map[string]Filterer{ ".htm": MarkerCommentFilter{Syntax: xmlSyntax}, // Visual Basic - ".vb": VisualBasicFilter{}, - ".bas": VisualBasicFilter{}, - ".vbs": VisualBasicFilter{}, - ".vbscript": VisualBasicFilter{}, + ".vb": VisualBasicCommentFilter{}, + ".bas": VisualBasicCommentFilter{}, + ".vbs": VisualBasicCommentFilter{}, + ".vbscript": VisualBasicCommentFilter{}, } diff --git a/embedding/commentfilter/visual_basic.go b/embedding/commentfilter/visual_basic.go index 8195330..645d61c 100644 --- a/embedding/commentfilter/visual_basic.go +++ b/embedding/commentfilter/visual_basic.go @@ -29,14 +29,14 @@ const ( rem = "rem" ) -// VisualBasicFilter filters the Visual Basic comment forms: +// VisualBasicCommentFilter filters the Visual Basic comment forms: // - documentation comments starting with `”'`; // - apostrophe comments starting with `'`; // - REM comments starting with `REM`. -type VisualBasicFilter struct{} +type VisualBasicCommentFilter struct{} // Filter removes or preserves Visual Basic comments according to mode. -func (VisualBasicFilter) Filter(lines []string, mode Mode) []string { +func (VisualBasicCommentFilter) Filter(lines []string, mode Mode) []string { var filtered []string for _, line := range lines { filteredLine, hadComment := filterVisualBasicLine(line, mode) From 8bdaa847c060e806914b83020b099532270a413e Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 18:39:56 +0200 Subject: [PATCH 06/27] Add tests. --- embedding/commentfilter/filter_test.go | 215 ++++++++++++++++++ embedding/parsing/instruction_test.go | 90 ++++++++ .../code/java/org/example/Comments.java | 14 ++ 3 files changed, 319 insertions(+) create mode 100644 embedding/commentfilter/filter_test.go create mode 100644 test/resources/code/java/org/example/Comments.java diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go new file mode 100644 index 0000000..47f9240 --- /dev/null +++ b/embedding/commentfilter/filter_test.go @@ -0,0 +1,215 @@ +// Copyright 2026, TeamDev. All rights reserved. +// +// Redistribution and use in source and/or binary forms, with or without +// modification, must retain the above copyright notice and the following +// disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package commentfilter + +import ( + "reflect" + "testing" +) + +// TestFilterYaml verifies YAML line comment filtering. +func TestFilterYaml(t *testing.T) { + lines := []string{ + "name: test # inline", + "# standalone", + "value: \"# literal\"", + } + + expected := []string{ + "name: test ", + "value: \"# literal\"", + } + + assertFiltered(t, "config.yml", RetainNone, lines, expected) +} + +// TestFilterXml verifies XML block comment filtering. +func TestFilterXml(t *testing.T) { + lines := []string{ + "", + " ", + " \"/>", + "", + } + + expected := []string{ + "", + " \"/>", + "", + } + + assertFiltered(t, "layout.xml", RetainNone, lines, expected) +} + +// TestFilterJavaStyle verifies Java-family marker-based filtering. +func TestFilterJavaStyle(t *testing.T) { + t.Run("documentation", func(t *testing.T) { + lines := []string{ + "/** API docs. */", + "// implementation note", + "fun call() = \"// literal\"", + } + + expected := []string{ + "/** API docs. */", + "fun call() = \"// literal\"", + } + + assertFiltered(t, "api.kt", RetainDocumentation, lines, expected) + }) + + t.Run("block", func(t *testing.T) { + lines := []string{ + "/** API docs. */", + "/* implementation note */", + "String create();", + } + + expected := []string{ + "/* implementation note */", + "String create();", + } + + assertFiltered(t, "Api.java", RetainBlock, lines, expected) + }) + + t.Run("regular", func(t *testing.T) { + lines := []string{ + "/** API docs. */", + "/* implementation note */", + "String create(); // inline note", + } + + expected := []string{ + "/* implementation note */", + "String create(); // inline note", + } + + assertFiltered(t, "Api.java", RetainRegular, lines, expected) + }) +} + +// TestFilterCSharp verifies C# XML documentation comment filtering. +func TestFilterCSharp(t *testing.T) { + t.Run("documentation", func(t *testing.T) { + lines := []string{ + "/// Creates a value.", + "// implementation note", + "public string Create() => \"// literal\";", + } + + expected := []string{ + "/// Creates a value.", + "public string Create() => \"// literal\";", + } + + assertFiltered(t, "Api.cs", RetainDocumentation, lines, expected) + }) + + t.Run("inline", func(t *testing.T) { + lines := []string{ + "/// Creates a value.", + "// implementation note", + "public string Create() => \"// literal\";", + } + + expected := []string{ + "// implementation note", + "public string Create() => \"// literal\";", + } + + assertFiltered(t, "Api.cs", RetainInline, lines, expected) + }) +} + +// TestFilterVisualBasic verifies Visual Basic comment filtering. +func TestFilterVisualBasic(t *testing.T) { + t.Run("none", func(t *testing.T) { + lines := []string{ + "' file comment", + "REM module comment", + "Dim text = \"REM not a comment\"", + "Dim value = 1 ' inline", + "Dim ready = True : Rem after statement separator", + "Dim reminder = 1", + } + + expected := []string{ + "Dim text = \"REM not a comment\"", + "Dim value = 1 ", + "Dim ready = True : ", + "Dim reminder = 1", + } + + assertFiltered(t, "Module.vb", RetainNone, lines, expected) + }) + + t.Run("regular", func(t *testing.T) { + lines := []string{ + "''' Creates a value.", + "' file comment", + "REM module comment", + "Dim value = 1 ' inline", + } + + expected := []string{ + "' file comment", + "REM module comment", + "Dim value = 1 ' inline", + } + + assertFiltered(t, "Module.vb", RetainRegular, lines, expected) + }) + + t.Run("documentation", func(t *testing.T) { + lines := []string{ + "''' Creates a value.", + "' implementation note", + "REM module comment", + "Public Function Create() As String", + } + + expected := []string{ + "''' Creates a value.", + "Public Function Create() As String", + } + + assertFiltered(t, "Module.vb", RetainDocumentation, lines, expected) + }) +} + +// TestFilterUnsupportedExtension verifies unsupported files are returned unchanged. +func TestFilterUnsupportedExtension(t *testing.T) { + lines := []string{ + "# docs", + "sub call { } # inline", + } + + assertFiltered(t, "service.pl", RetainNone, lines, lines) +} + +// assertFiltered verifies filtering output for one file path and mode. +func assertFiltered(t *testing.T, filePath string, mode Mode, lines []string, expected []string) { + t.Helper() + + got := Filter(lines, filePath, mode) + if !reflect.DeepEqual(got, expected) { + t.Fatalf("Filter() = %#v, expected %#v", got, expected) + } +} diff --git a/embedding/parsing/instruction_test.go b/embedding/parsing/instruction_test.go index c4183f6..9676f44 100644 --- a/embedding/parsing/instruction_test.go +++ b/embedding/parsing/instruction_test.go @@ -36,6 +36,7 @@ type TestInstructionParams struct { fragment string startGlob string endGlob string + comments string closeTag bool } @@ -82,6 +83,15 @@ var _ = Describe("Instruction", func() { Expect(parsing.FromXML(xmlString, config)).Error().ShouldNot(HaveOccurred()) }) + It("should have an error for unsupported comments mode", func() { + instructionParams := TestInstructionParams{ + comments: "summary", + } + xmlString := buildInstruction("org/example/Comments.java", instructionParams) + + Expect(parsing.FromXML(xmlString, config)).Error().Should(HaveOccurred()) + }) + It("should successfully read source content", func() { instructionParams := TestInstructionParams{ closeTag: true, @@ -98,6 +108,82 @@ var _ = Describe("Instruction", func() { Expect(actualLines[checkedLine]).Should(Equal(expectedLine)) }) + It("should strip all recognized comments", func() { + instructionParams := TestInstructionParams{ + comments: "none", + } + + actualLines := getXMLExtractionContent( + "org/example/Comments.java", instructionParams, config) + + Expect(actualLines).Should(Equal([]string{ + "package org.example;", + "", + "public interface Comments {", + " String marker = \"http://example.org/*not-comment*/\";", + "", + " String create(String name); ", + "}", + })) + }) + + It("should keep documentation comments only", func() { + instructionParams := TestInstructionParams{ + comments: "documentation", + } + + actualLines := getXMLExtractionContent( + "org/example/Comments.java", instructionParams, config) + + Expect(actualLines).Should(ContainElement("/**")) + Expect(actualLines).Should(ContainElement(" * Documents the public API.")) + Expect(actualLines).ShouldNot(ContainElement(" * The block comment.")) + Expect(actualLines).ShouldNot(ContainElement(" // Full-line inline comment.")) + }) + + It("should keep inline comments only", func() { + instructionParams := TestInstructionParams{ + comments: "inline", + } + + actualLines := getXMLExtractionContent( + "org/example/Comments.java", instructionParams, config) + + Expect(actualLines).Should(ContainElement(" // Full-line inline comment.")) + Expect(actualLines).Should(ContainElement(" String create(String name); // end-of-line inline comment.")) + Expect(actualLines).ShouldNot(ContainElement("/**")) + Expect(actualLines).ShouldNot(ContainElement(" * The block comment.")) + }) + + It("should keep block comments only", func() { + instructionParams := TestInstructionParams{ + comments: "block", + } + + actualLines := getXMLExtractionContent( + "org/example/Comments.java", instructionParams, config) + + Expect(actualLines).ShouldNot(ContainElement("/**")) + Expect(actualLines).ShouldNot(ContainElement(" * Documents the public API.")) + Expect(actualLines).Should(ContainElement(" * The block comment.")) + Expect(actualLines).ShouldNot(ContainElement(" // Full-line inline comment.")) + }) + + It("should keep regular comments only", func() { + instructionParams := TestInstructionParams{ + comments: "regular", + } + + actualLines := getXMLExtractionContent( + "org/example/Comments.java", instructionParams, config) + + Expect(actualLines).ShouldNot(ContainElement("/**")) + Expect(actualLines).ShouldNot(ContainElement(" * Documents the public API.")) + Expect(actualLines).Should(ContainElement(" * The block comment.")) + Expect(actualLines).Should(ContainElement(" // Full-line inline comment.")) + Expect(actualLines).Should(ContainElement(" String create(String name); // end-of-line inline comment.")) + }) + It("should have an error when parsing fragment with start glob", func() { instructionParams := TestInstructionParams{ fragment: "fragment", @@ -318,6 +404,10 @@ func buildInstruction(fileName string, params TestInstructionParams) string { endAttr := xmlAttribute("end", params.endGlob) instructionLine += " " + endAttr } + if len(params.comments) > 0 { + commentsAttr := xmlAttribute("comments", params.comments) + instructionLine += " " + commentsAttr + } if params.closeTag { instructionLine += ">" } else { diff --git a/test/resources/code/java/org/example/Comments.java b/test/resources/code/java/org/example/Comments.java new file mode 100644 index 0000000..11934c8 --- /dev/null +++ b/test/resources/code/java/org/example/Comments.java @@ -0,0 +1,14 @@ +package org.example; + +/** + * Documents the public API. + */ +public interface Comments { + /* + * The block comment. + */ + String marker = "http://example.org/*not-comment*/"; + + // Full-line inline comment. + String create(String name); // end-of-line inline comment. +} From 5fa1802d32db7710d54fc1b717aa1c6e54b4305c Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 18:55:18 +0200 Subject: [PATCH 07/27] Update readme. --- EMBEDDING.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/EMBEDDING.md b/EMBEDDING.md index fd6a7b1..d50fb14 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -127,10 +127,21 @@ Supported values: - `inline` — retain non-documentation line comments such as `//`. - `block` — retain non-documentation block comments such as `/* */`. -Comment filtering is selected by source file extension. The built-in registry covers Java, -Groovy, Kotlin, KTS, C#, JavaScript, TypeScript, YAML, XML, HTML, and Basic-family files. Unknown extensions are embedded unchanged. +Not all languages has difference between documentation/regular or inline/block comments. + +The table below lists the supported languages and useful `comments` modes for them: + +| Language | Extensions | Useful `comments` modes | +|------------------------|-----------------------------------------|--------------------------------------------------------------| +| Java, Kotlin, Groovy | `.java`, `.kt`, `.kts`, `.groovy` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | +| C# | `.cs` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | +| JavaScript, TypeScript | `.js`, `.jsx`, `.ts`, `.tsx` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | +| YAML | `.yml`, `.yaml` | `all`, `none` | +| XML, HTML | `.xml`, `.html`, `.htm` | `all`, `none` | +| Visual Basic | `.vb`, `.bas`, `.vbs`, `.vbscript` | `all`, `none`, `documentation`, `regular` | + ## Advanced use cases ### Joining several parts of code into one fragment From 43aaa2737789df59f5bf2ab56692d5d2393f6ae6 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 19:22:36 +0200 Subject: [PATCH 08/27] Add unsupported comments mode warning. --- embedding/commentfilter/filter.go | 4 +- embedding/commentfilter/filter_test.go | 49 +++++++- embedding/commentfilter/syntax.go | 167 +++++++++++++++++++++---- embedding/parsing/instruction.go | 17 +-- embedding/parsing/instruction_token.go | 1 + 5 files changed, 204 insertions(+), 34 deletions(-) diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go index 2440146..76e100f 100644 --- a/embedding/commentfilter/filter.go +++ b/embedding/commentfilter/filter.go @@ -21,11 +21,11 @@ package commentfilter import "strings" // Filter returns source lines with comments retained according to the requested mode. -func Filter(lines []string, filePath string, mode Mode) []string { +func Filter(lines []string, filePath string, mode Mode, embeddingDocPath string) []string { if mode == RetainAll { return lines } - filter, found := filterFor(filePath) + filter, found := filterFor(filePath, mode, embeddingDocPath) if !found { return lines } diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index 47f9240..a687edc 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -19,7 +19,10 @@ package commentfilter import ( + "bytes" + "log/slog" "reflect" + "strings" "testing" ) @@ -201,15 +204,57 @@ func TestFilterUnsupportedExtension(t *testing.T) { "sub call { } # inline", } - assertFiltered(t, "service.pl", RetainNone, lines, lines) + assertFiltered(t, "service.pl", RetainAll, lines, lines) +} + +// TestFilterWarnsAboutUselessMode verifies warnings for modes without language-specific meaning. +func TestFilterWarnsAboutUselessMode(t *testing.T) { + output := captureWarnings(func() { + Filter([]string{""}, "layout.xml", RetainDocumentation, "docs/guide.md") + }) + + if !strings.Contains(output, "documentation") || + !strings.Contains(output, "layout.xml") || + !strings.Contains(output, "file://") || + !strings.Contains(output, "docs/guide.md") || + !strings.Contains(output, "does not have a distinct meaning") { + t.Fatalf("warning output = %q", output) + } +} + +// TestFilterWarnsAboutUnsupportedExtension verifies warnings for unsupported file extensions. +func TestFilterWarnsAboutUnsupportedExtension(t *testing.T) { + output := captureWarnings(func() { + Filter([]string{"# comment"}, "service.pl", RetainNone, "docs/guide.md") + }) + + if !strings.Contains(output, "comment filtering is not supported for this file extension") || + !strings.Contains(output, "file://") || + !strings.Contains(output, "docs/guide.md") { + t.Fatalf("warning output = %q", output) + } } // assertFiltered verifies filtering output for one file path and mode. func assertFiltered(t *testing.T, filePath string, mode Mode, lines []string, expected []string) { t.Helper() - got := Filter(lines, filePath, mode) + got := Filter(lines, filePath, mode, "docs/guide.md") if !reflect.DeepEqual(got, expected) { t.Fatalf("Filter() = %#v, expected %#v", got, expected) } } + +// captureWarnings runs action and returns slog warning output. +func captureWarnings(action func()) string { + var output bytes.Buffer + previous := slog.Default() + slog.SetDefault(slog.New(slog.NewTextHandler(&output, &slog.HandlerOptions{ + Level: slog.LevelWarn, + }))) + defer slog.SetDefault(previous) + + action() + + return output.String() +} diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index 1f48d15..a72bc52 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -19,6 +19,8 @@ package commentfilter import ( + "fmt" + "log/slog" "path/filepath" "strings" ) @@ -48,11 +50,23 @@ type Filterer interface { Filter(lines []string, mode Mode) []string } -// filterFor returns the comment filter registered for the given file path. -func filterFor(filePath string) (Filterer, bool) { +// filterEntry stores a comment filter and the modes that make sense for its language. +type filterEntry struct { + filter Filterer + usefulModes map[Mode]struct{} +} + +// filterFor returns the comment filter registered for the given file path and warns on odd modes. +func filterFor(filePath string, mode Mode, embeddingDocPath string) (Filterer, bool) { extension := normalizeExtension(filepath.Ext(filePath)) - filter, found := filtersByExtension[extension] - return filter, found + entry, found := filtersByExtension[extension] + if !found { + warnUnsupportedCommentsMode(filePath, mode, embeddingDocPath) + return nil, false + } + warnUselessCommentsMode(filePath, mode, embeddingDocPath, entry.usefulModes) + + return entry.filter, true } // normalizeExtension returns a lowercase file extension with a leading dot. @@ -100,36 +114,143 @@ var xmlSyntax = Syntax{ QuoteChars: "\"'", } -var filtersByExtension = map[string]Filterer{ +var allCommentModes = usefulModes( + RetainAll, + RetainNone, + RetainDocumentation, + RetainRegular, + RetainInline, + RetainBlock, +) + +var allOrNoneCommentModes = usefulModes(RetainAll, RetainNone) + +var regularAndDocCommentModes = usefulModes( + RetainAll, + RetainNone, + RetainDocumentation, + RetainRegular, +) + +var filtersByExtension = map[string]filterEntry{ // Java/Kotlin - ".java": MarkerCommentFilter{Syntax: javaStyleSyntax}, - ".kt": MarkerCommentFilter{Syntax: javaStyleSyntax}, - ".kts": MarkerCommentFilter{Syntax: javaStyleSyntax}, - ".groovy": MarkerCommentFilter{Syntax: javaStyleSyntax}, + ".java": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), + ".kt": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), + ".kts": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), + ".groovy": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), // C# - ".cs": MarkerCommentFilter{Syntax: csharpSyntax}, + ".cs": newFilterEntry(MarkerCommentFilter{Syntax: csharpSyntax}, allCommentModes), // JavaScript - ".js": MarkerCommentFilter{Syntax: javaStyleSyntax}, - ".jsx": MarkerCommentFilter{Syntax: javaStyleSyntax}, - ".ts": MarkerCommentFilter{Syntax: javaStyleSyntax}, - ".tsx": MarkerCommentFilter{Syntax: javaStyleSyntax}, + ".js": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), + ".jsx": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), + ".ts": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), + ".tsx": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), // YAML - ".yml": MarkerCommentFilter{Syntax: hashLineSyntax}, - ".yaml": MarkerCommentFilter{Syntax: hashLineSyntax}, + ".yml": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), + ".yaml": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), // XML - ".xml": MarkerCommentFilter{Syntax: xmlSyntax}, + ".xml": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allOrNoneCommentModes), // HTML - ".html": MarkerCommentFilter{Syntax: xmlSyntax}, - ".htm": MarkerCommentFilter{Syntax: xmlSyntax}, + ".html": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allOrNoneCommentModes), + ".htm": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allOrNoneCommentModes), // Visual Basic - ".vb": VisualBasicCommentFilter{}, - ".bas": VisualBasicCommentFilter{}, - ".vbs": VisualBasicCommentFilter{}, - ".vbscript": VisualBasicCommentFilter{}, + ".vb": newFilterEntry(VisualBasicCommentFilter{}, regularAndDocCommentModes), + ".bas": newFilterEntry(VisualBasicCommentFilter{}, regularAndDocCommentModes), + ".vbs": newFilterEntry(VisualBasicCommentFilter{}, regularAndDocCommentModes), + ".vbscript": newFilterEntry(VisualBasicCommentFilter{}, regularAndDocCommentModes), +} + +// usefulModes creates a lookup set for comment modes that make sense for a language. +func usefulModes(modes ...Mode) map[Mode]struct{} { + result := make(map[Mode]struct{}, len(modes)) + for _, mode := range modes { + result[mode] = struct{}{} + } + + return result +} + +// newFilterEntry creates a filter registry entry. +func newFilterEntry(filter Filterer, usefulModes map[Mode]struct{}) filterEntry { + return filterEntry{ + filter: filter, + usefulModes: usefulModes, + } +} + +// warnUnsupportedCommentsMode logs when comments filtering is requested for an unsupported file. +func warnUnsupportedCommentsMode(filePath string, mode Mode, embeddingDocPath string) { + if mode == RetainAll { + return + } + slog.Warn( + fmt.Sprintf( + "`comments=\"%s\"` was requested in `%s` for `%s`, "+ + "but comment filtering is not supported for this file extension.", + mode, + fileURL(embeddingDocPath), + filePath, + ), + ) +} + +// warnUselessCommentsMode logs when the selected mode has no distinct meaning for a file. +func warnUselessCommentsMode( + filePath string, + mode Mode, + embeddingDocPath string, + usefulModes map[Mode]struct{}, +) { + if _, found := usefulModes[mode]; found { + return + } + slog.Warn( + fmt.Sprintf( + "`comments=\"%s\"` was requested in `%s` for `%s`, but this mode does not have "+ + "a distinct meaning for this file type. Useful modes are: %s.", + mode, + fileURL(embeddingDocPath), + filePath, + formatModes(usefulModes), + ), + ) +} + +// fileURL returns an absolute file URL for a local path. +func fileURL(path string) string { + if path == "" { + return "file://" + } + absolutePath, err := filepath.Abs(path) + if err != nil { + return "file://" + path + } + + return "file://" + absolutePath +} + +// formatModes formats modes for a warning message. +func formatModes(modes map[Mode]struct{}) string { + order := []Mode{ + RetainAll, + RetainNone, + RetainDocumentation, + RetainRegular, + RetainInline, + RetainBlock, + } + var result []string + for _, mode := range order { + if _, found := modes[mode]; found { + result = append(result, fmt.Sprintf("`%s`", mode)) + } + } + + return strings.Join(result, ", ") } diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index 98c779b..d265f18 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -45,14 +45,17 @@ import ( // // CommentMode — specifies which comments are retained in the embedded code. // +// DocumentationFile — a documentation file containing the instruction. +// // Configuration — a Configuration with all embed-code settings. type Instruction struct { - CodeFile string - Fragment string - StartPattern *Pattern - EndPattern *Pattern - CommentMode commentfilter.Mode - Configuration configuration.Configuration + CodeFile string + Fragment string + StartPattern *Pattern + EndPattern *Pattern + CommentMode commentfilter.Mode + DocumentationFile string + Configuration configuration.Configuration } // NewInstruction creates an Instruction based on provided attributes and configuration. @@ -118,7 +121,7 @@ func (e Instruction) Content() ([]string, error) { fileContent = e.matchingLines(fileContent) } - return commentfilter.Filter(fileContent, e.CodeFile, e.CommentMode), nil + return commentfilter.Filter(fileContent, e.CodeFile, e.CommentMode, e.DocumentationFile), nil } // Returns string representation of Instruction. diff --git a/embedding/parsing/instruction_token.go b/embedding/parsing/instruction_token.go index b3234de..dcf3705 100644 --- a/embedding/parsing/instruction_token.go +++ b/embedding/parsing/instruction_token.go @@ -78,6 +78,7 @@ func (e EmbedInstructionTokenState) Accept(context *Context, instruction, err := FromXML(strings.Join(instructionBody, " "), config) if err == nil { + instruction.DocumentationFile = context.MarkdownFilePath context.SetEmbedding(&instruction) } else { parseErr = err From c96d0f906d46a9721744b7c007b3c3e699e7f288 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 19:35:25 +0200 Subject: [PATCH 09/27] Add embedding instruction line to the warning. --- embedding/commentfilter/filter.go | 10 +++++++-- embedding/commentfilter/filter_test.go | 10 ++++----- embedding/commentfilter/syntax.go | 29 ++++++++++++++++++-------- embedding/parsing/instruction.go | 11 +++++++++- embedding/parsing/instruction_token.go | 1 + 5 files changed, 44 insertions(+), 17 deletions(-) diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go index 76e100f..ac2ce5e 100644 --- a/embedding/commentfilter/filter.go +++ b/embedding/commentfilter/filter.go @@ -21,11 +21,17 @@ package commentfilter import "strings" // Filter returns source lines with comments retained according to the requested mode. -func Filter(lines []string, filePath string, mode Mode, embeddingDocPath string) []string { +func Filter( + lines []string, + filePath string, + mode Mode, + embeddingDocPath string, + embeddingLine int, +) []string { if mode == RetainAll { return lines } - filter, found := filterFor(filePath, mode, embeddingDocPath) + filter, found := filterFor(filePath, mode, embeddingDocPath, embeddingLine) if !found { return lines } diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index a687edc..029f835 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -210,13 +210,13 @@ func TestFilterUnsupportedExtension(t *testing.T) { // TestFilterWarnsAboutUselessMode verifies warnings for modes without language-specific meaning. func TestFilterWarnsAboutUselessMode(t *testing.T) { output := captureWarnings(func() { - Filter([]string{""}, "layout.xml", RetainDocumentation, "docs/guide.md") + Filter([]string{""}, "layout.xml", RetainDocumentation, "docs/guide.md", 12) }) if !strings.Contains(output, "documentation") || !strings.Contains(output, "layout.xml") || !strings.Contains(output, "file://") || - !strings.Contains(output, "docs/guide.md") || + !strings.Contains(output, "docs/guide.md:12") || !strings.Contains(output, "does not have a distinct meaning") { t.Fatalf("warning output = %q", output) } @@ -225,12 +225,12 @@ func TestFilterWarnsAboutUselessMode(t *testing.T) { // TestFilterWarnsAboutUnsupportedExtension verifies warnings for unsupported file extensions. func TestFilterWarnsAboutUnsupportedExtension(t *testing.T) { output := captureWarnings(func() { - Filter([]string{"# comment"}, "service.pl", RetainNone, "docs/guide.md") + Filter([]string{"# comment"}, "service.pl", RetainNone, "docs/guide.md", 12) }) if !strings.Contains(output, "comment filtering is not supported for this file extension") || !strings.Contains(output, "file://") || - !strings.Contains(output, "docs/guide.md") { + !strings.Contains(output, "docs/guide.md:12") { t.Fatalf("warning output = %q", output) } } @@ -239,7 +239,7 @@ func TestFilterWarnsAboutUnsupportedExtension(t *testing.T) { func assertFiltered(t *testing.T, filePath string, mode Mode, lines []string, expected []string) { t.Helper() - got := Filter(lines, filePath, mode, "docs/guide.md") + got := Filter(lines, filePath, mode, "docs/guide.md", 12) if !reflect.DeepEqual(got, expected) { t.Fatalf("Filter() = %#v, expected %#v", got, expected) } diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index a72bc52..663e2d3 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -57,14 +57,19 @@ type filterEntry struct { } // filterFor returns the comment filter registered for the given file path and warns on odd modes. -func filterFor(filePath string, mode Mode, embeddingDocPath string) (Filterer, bool) { +func filterFor( + filePath string, + mode Mode, + embeddingDocPath string, + embeddingLine int, +) (Filterer, bool) { extension := normalizeExtension(filepath.Ext(filePath)) entry, found := filtersByExtension[extension] if !found { - warnUnsupportedCommentsMode(filePath, mode, embeddingDocPath) + warnUnsupportedCommentsMode(filePath, mode, embeddingDocPath, embeddingLine) return nil, false } - warnUselessCommentsMode(filePath, mode, embeddingDocPath, entry.usefulModes) + warnUselessCommentsMode(filePath, mode, embeddingDocPath, embeddingLine, entry.usefulModes) return entry.filter, true } @@ -185,7 +190,7 @@ func newFilterEntry(filter Filterer, usefulModes map[Mode]struct{}) filterEntry } // warnUnsupportedCommentsMode logs when comments filtering is requested for an unsupported file. -func warnUnsupportedCommentsMode(filePath string, mode Mode, embeddingDocPath string) { +func warnUnsupportedCommentsMode(filePath string, mode Mode, embeddingDocPath string, embeddingLine int) { if mode == RetainAll { return } @@ -194,7 +199,7 @@ func warnUnsupportedCommentsMode(filePath string, mode Mode, embeddingDocPath st "`comments=\"%s\"` was requested in `%s` for `%s`, "+ "but comment filtering is not supported for this file extension.", mode, - fileURL(embeddingDocPath), + fileURL(embeddingDocPath, embeddingLine), filePath, ), ) @@ -205,6 +210,7 @@ func warnUselessCommentsMode( filePath string, mode Mode, embeddingDocPath string, + embeddingLine int, usefulModes map[Mode]struct{}, ) { if _, found := usefulModes[mode]; found { @@ -215,15 +221,15 @@ func warnUselessCommentsMode( "`comments=\"%s\"` was requested in `%s` for `%s`, but this mode does not have "+ "a distinct meaning for this file type. Useful modes are: %s.", mode, - fileURL(embeddingDocPath), + fileURL(embeddingDocPath, embeddingLine), filePath, formatModes(usefulModes), ), ) } -// fileURL returns an absolute file URL for a local path. -func fileURL(path string) string { +// fileURL returns an absolute file URL for a local path and line. +func fileURL(path string, line int) string { if path == "" { return "file://" } @@ -232,7 +238,12 @@ func fileURL(path string) string { return "file://" + path } - return "file://" + absolutePath + url := "file://" + absolutePath + if line > 0 { + url = fmt.Sprintf("%s:%d", url, line) + } + + return url } // formatModes formats modes for a warning message. diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index d265f18..13e6089 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -47,6 +47,8 @@ import ( // // DocumentationFile — a documentation file containing the instruction. // +// DocumentationLine — a line containing the start of the instruction. +// // Configuration — a Configuration with all embed-code settings. type Instruction struct { CodeFile string @@ -55,6 +57,7 @@ type Instruction struct { EndPattern *Pattern CommentMode commentfilter.Mode DocumentationFile string + DocumentationLine int Configuration configuration.Configuration } @@ -121,7 +124,13 @@ func (e Instruction) Content() ([]string, error) { fileContent = e.matchingLines(fileContent) } - return commentfilter.Filter(fileContent, e.CodeFile, e.CommentMode, e.DocumentationFile), nil + return commentfilter.Filter( + fileContent, + e.CodeFile, + e.CommentMode, + e.DocumentationFile, + e.DocumentationLine, + ), nil } // Returns string representation of Instruction. diff --git a/embedding/parsing/instruction_token.go b/embedding/parsing/instruction_token.go index dcf3705..d98fa6d 100644 --- a/embedding/parsing/instruction_token.go +++ b/embedding/parsing/instruction_token.go @@ -79,6 +79,7 @@ func (e EmbedInstructionTokenState) Accept(context *Context, instruction, err := FromXML(strings.Join(instructionBody, " "), config) if err == nil { instruction.DocumentationFile = context.MarkdownFilePath + instruction.DocumentationLine = startLine context.SetEmbedding(&instruction) } else { parseErr = err From 7396c3e6b36716d3d510222fc853dac3b0c10f98 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 19:46:24 +0200 Subject: [PATCH 10/27] Extract `marker_comment_filter`. --- embedding/commentfilter/filter.go | 157 ------------------ .../commentfilter/marker_comment_filter.go | 155 +++++++++++++++++ embedding/commentfilter/scanner.go | 42 +++++ 3 files changed, 197 insertions(+), 157 deletions(-) create mode 100644 embedding/commentfilter/marker_comment_filter.go create mode 100644 embedding/commentfilter/scanner.go diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go index ac2ce5e..ad4fcbf 100644 --- a/embedding/commentfilter/filter.go +++ b/embedding/commentfilter/filter.go @@ -18,8 +18,6 @@ package commentfilter -import "strings" - // Filter returns source lines with comments retained according to the requested mode. func Filter( lines []string, @@ -38,158 +36,3 @@ func Filter( return filter.Filter(lines, mode) } - -// MarkerCommentFilter removes comments using lexical markers declared in Syntax. -type MarkerCommentFilter struct { - Syntax Syntax -} - -type blockState struct { - active bool - block BlockSyntax - keep bool -} - -// Filter removes or preserves recognized comments across all lines. -func (f MarkerCommentFilter) Filter(lines []string, mode Mode) []string { - var filtered []string - state := blockState{} - for _, line := range lines { - filteredLine, hadComment := f.filterLine(line, mode, &state) - if hadComment && strings.TrimSpace(filteredLine) == "" { - continue - } - filtered = append(filtered, filteredLine) - } - - return filtered -} - -// filterLine removes or preserves recognized comments from a single source line. -func (f MarkerCommentFilter) filterLine(line string, mode Mode, state *blockState) (string, bool) { - var result strings.Builder - position := 0 - hadComment := false - - for position < len(line) { - if state.active { - hadComment = true - end := strings.Index(line[position:], state.block.End) - if end < 0 { - if state.keep { - result.WriteString(line[position:]) - } - return result.String(), hadComment - } - endPosition := position + end + len(state.block.End) - if state.keep { - result.WriteString(line[position:endPosition]) - } - position = endPosition - state.active = false - continue - } - - if quoteEnd := quotedSegmentEnd(line, position, f.Syntax.QuoteChars); quoteEnd > position { - result.WriteString(line[position:quoteEnd]) - position = quoteEnd - continue - } - if _, found := documentationInlineAt(line, position, f.Syntax); found { - hadComment = true - if mode == RetainDocumentation { - result.WriteString(line[position:]) - } - break - } - if block, found := documentationBlockAt(line, position, f.Syntax); found { - hadComment = true - state.active = true - state.block = block - state.keep = mode == RetainDocumentation - continue - } - if _, found := inlineCommentAt(line, position, f.Syntax); found { - hadComment = true - if mode == RetainInline || mode == RetainRegular { - result.WriteString(line[position:]) - } - break - } - if block, found := blockCommentAt(line, position, f.Syntax); found { - hadComment = true - state.active = true - state.block = block - state.keep = mode == RetainBlock || mode == RetainRegular - continue - } - - result.WriteByte(line[position]) - position++ - } - - return result.String(), hadComment -} - -// quotedSegmentEnd returns the end offset of a quoted string starting at position. -func quotedSegmentEnd(line string, position int, quoteChars string) int { - if position >= len(line) || !strings.ContainsRune(quoteChars, rune(line[position])) { - return position - } - quote := line[position] - cursor := position + 1 - for cursor < len(line) { - if line[cursor] == '\\' { - cursor += 2 - continue - } - if line[cursor] == quote { - return cursor + 1 - } - cursor++ - } - - return len(line) -} - -// documentationInlineAt reports whether a documentation line comment starts at the position. -func documentationInlineAt(line string, position int, syntax Syntax) (string, bool) { - return prefixAt(line, position, syntax.Documentation.Inline) -} - -// documentationBlockAt reports whether a documentation block comment starts at the position. -func documentationBlockAt(line string, position int, syntax Syntax) (BlockSyntax, bool) { - return blockAt(line, position, syntax.Documentation.Block) -} - -// inlineCommentAt reports whether an inline comment starts at the given position. -func inlineCommentAt(line string, position int, syntax Syntax) (string, bool) { - return prefixAt(line, position, syntax.Inline) -} - -// blockCommentAt reports whether a block comment starts at the given position. -func blockCommentAt(line string, position int, syntax Syntax) (BlockSyntax, bool) { - return blockAt(line, position, syntax.Block) -} - -// prefixAt reports whether one of the given prefixes starts at the position. -func prefixAt(line string, position int, prefixes []string) (string, bool) { - for _, prefix := range prefixes { - if strings.HasPrefix(line[position:], prefix) { - return prefix, true - } - } - - return "", false -} - -// blockAt reports whether one of the given block markers starts at the position. -func blockAt(line string, position int, blocks []BlockSyntax) (BlockSyntax, bool) { - for _, block := range blocks { - if strings.HasPrefix(line[position:], block.Start) { - return block, true - } - } - - return BlockSyntax{}, false -} diff --git a/embedding/commentfilter/marker_comment_filter.go b/embedding/commentfilter/marker_comment_filter.go new file mode 100644 index 0000000..f15fa77 --- /dev/null +++ b/embedding/commentfilter/marker_comment_filter.go @@ -0,0 +1,155 @@ +// Copyright 2026, TeamDev. All rights reserved. +// +// Redistribution and use in source and/or binary forms, with or without +// modification, must retain the above copyright notice and the following +// disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package commentfilter + +import "strings" + +// MarkerCommentFilter removes comments using lexical markers declared in Syntax. +type MarkerCommentFilter struct { + Syntax Syntax +} + +type blockState struct { + active bool + block BlockSyntax + keep bool +} + +// Filter removes or preserves recognized comments across all lines. +func (f MarkerCommentFilter) Filter(lines []string, mode Mode) []string { + var filtered []string + state := blockState{} + for _, line := range lines { + filteredLine, hadComment := f.filterLine(line, mode, &state) + if hadComment && strings.TrimSpace(filteredLine) == "" { + continue + } + filtered = append(filtered, filteredLine) + } + + return filtered +} + +// filterLine removes or preserves recognized comments from a single source line. +func (f MarkerCommentFilter) filterLine(line string, mode Mode, state *blockState) (string, bool) { + var result strings.Builder + position := 0 + hadComment := false + + for position < len(line) { + if state.active { + hadComment = true + end := strings.Index(line[position:], state.block.End) + if end < 0 { + if state.keep { + result.WriteString(line[position:]) + } + return result.String(), hadComment + } + endPosition := position + end + len(state.block.End) + if state.keep { + result.WriteString(line[position:endPosition]) + } + position = endPosition + state.active = false + continue + } + + if quoteEnd := quotedSegmentEnd(line, position, f.Syntax.QuoteChars); quoteEnd > position { + result.WriteString(line[position:quoteEnd]) + position = quoteEnd + continue + } + if _, found := documentationInlineAt(line, position, f.Syntax); found { + hadComment = true + if mode == RetainDocumentation { + result.WriteString(line[position:]) + } + break + } + if block, found := documentationBlockAt(line, position, f.Syntax); found { + hadComment = true + state.active = true + state.block = block + state.keep = mode == RetainDocumentation + continue + } + if _, found := inlineCommentAt(line, position, f.Syntax); found { + hadComment = true + if mode == RetainInline || mode == RetainRegular { + result.WriteString(line[position:]) + } + break + } + if block, found := blockCommentAt(line, position, f.Syntax); found { + hadComment = true + state.active = true + state.block = block + state.keep = mode == RetainBlock || mode == RetainRegular + continue + } + + result.WriteByte(line[position]) + position++ + } + + return result.String(), hadComment +} + +// documentationInlineAt reports whether a documentation line comment starts at the position. +func documentationInlineAt(line string, position int, syntax Syntax) (string, bool) { + return prefixAt(line, position, syntax.Documentation.Inline) +} + +// documentationBlockAt reports whether a documentation block comment starts at the position. +func documentationBlockAt(line string, position int, syntax Syntax) (BlockSyntax, bool) { + return blockAt(line, position, syntax.Documentation.Block) +} + +// inlineCommentAt reports whether an inline comment starts at the given position. +func inlineCommentAt(line string, position int, syntax Syntax) (string, bool) { + return prefixAt(line, position, syntax.Inline) +} + +// blockCommentAt reports whether a block comment starts at the given position. +func blockCommentAt(line string, position int, syntax Syntax) (BlockSyntax, bool) { + return blockAt(line, position, syntax.Block) +} + +// prefixAt reports whether one of the given prefixes starts at the position. +func prefixAt(line string, position int, prefixes []string) (string, bool) { + for _, prefix := range prefixes { + if strings.HasPrefix(line[position:], prefix) { + return prefix, true + } + } + + return "", false +} + +// blockAt reports whether one of the given block markers starts at the position. +func blockAt(line string, position int, blocks []BlockSyntax) (BlockSyntax, bool) { + for _, block := range blocks { + if strings.HasPrefix(line[position:], block.Start) { + return block, true + } + } + + return BlockSyntax{}, false +} diff --git a/embedding/commentfilter/scanner.go b/embedding/commentfilter/scanner.go new file mode 100644 index 0000000..d349d9a --- /dev/null +++ b/embedding/commentfilter/scanner.go @@ -0,0 +1,42 @@ +// Copyright 2026, TeamDev. All rights reserved. +// +// Redistribution and use in source and/or binary forms, with or without +// modification, must retain the above copyright notice and the following +// disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package commentfilter + +import "strings" + +// quotedSegmentEnd returns the end offset of a quoted string starting at position. +func quotedSegmentEnd(line string, position int, quoteChars string) int { + if position >= len(line) || !strings.ContainsRune(quoteChars, rune(line[position])) { + return position + } + quote := line[position] + cursor := position + 1 + for cursor < len(line) { + if line[cursor] == '\\' { + cursor += 2 + continue + } + if line[cursor] == quote { + return cursor + 1 + } + cursor++ + } + + return len(line) +} From d7d1d6d57251c47fd205e3dd0210788726eb1b0c Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 19:54:41 +0200 Subject: [PATCH 11/27] Improve readability. --- .../commentfilter/marker_comment_filter.go | 160 ++++++++++++------ 1 file changed, 109 insertions(+), 51 deletions(-) diff --git a/embedding/commentfilter/marker_comment_filter.go b/embedding/commentfilter/marker_comment_filter.go index f15fa77..f8625a2 100644 --- a/embedding/commentfilter/marker_comment_filter.go +++ b/embedding/commentfilter/marker_comment_filter.go @@ -31,6 +31,16 @@ type blockState struct { keep bool } +type markerLineFilter struct { + filter MarkerCommentFilter + line string + mode Mode + state *blockState + result strings.Builder + position int + hadComment bool +} + // Filter removes or preserves recognized comments across all lines. func (f MarkerCommentFilter) Filter(lines []string, mode Mode) []string { var filtered []string @@ -48,68 +58,116 @@ func (f MarkerCommentFilter) Filter(lines []string, mode Mode) []string { // filterLine removes or preserves recognized comments from a single source line. func (f MarkerCommentFilter) filterLine(line string, mode Mode, state *blockState) (string, bool) { - var result strings.Builder - position := 0 - hadComment := false - - for position < len(line) { - if state.active { - hadComment = true - end := strings.Index(line[position:], state.block.End) - if end < 0 { - if state.keep { - result.WriteString(line[position:]) - } - return result.String(), hadComment - } - endPosition := position + end + len(state.block.End) - if state.keep { - result.WriteString(line[position:endPosition]) - } - position = endPosition - state.active = false - continue - } + filter := markerLineFilter{ + filter: f, + line: line, + mode: mode, + state: state, + } + + return filter.filterLine() +} - if quoteEnd := quotedSegmentEnd(line, position, f.Syntax.QuoteChars); quoteEnd > position { - result.WriteString(line[position:quoteEnd]) - position = quoteEnd +// filterLine walks the current line until it reaches its end or a line comment. +func (f *markerLineFilter) filterLine() (string, bool) { + for f.position < len(f.line) { + if f.consumeActiveBlock() { continue } - if _, found := documentationInlineAt(line, position, f.Syntax); found { - hadComment = true - if mode == RetainDocumentation { - result.WriteString(line[position:]) - } - break - } - if block, found := documentationBlockAt(line, position, f.Syntax); found { - hadComment = true - state.active = true - state.block = block - state.keep = mode == RetainDocumentation + if f.consumeQuotedSegment() { continue } - if _, found := inlineCommentAt(line, position, f.Syntax); found { - hadComment = true - if mode == RetainInline || mode == RetainRegular { - result.WriteString(line[position:]) + if consumed, stop := f.consumeComment(); consumed { + if stop { + break } - break - } - if block, found := blockCommentAt(line, position, f.Syntax); found { - hadComment = true - state.active = true - state.block = block - state.keep = mode == RetainBlock || mode == RetainRegular continue } + f.consumeCodeByte() + } + + return f.result.String(), f.hadComment +} + +// consumeActiveBlock consumes text while the scanner is inside a block comment. +func (f *markerLineFilter) consumeActiveBlock() bool { + if !f.state.active { + return false + } + f.hadComment = true + end := strings.Index(f.line[f.position:], f.state.block.End) + if end < 0 { + if f.state.keep { + f.result.WriteString(f.line[f.position:]) + } + f.position = len(f.line) + return true + } + endPosition := f.position + end + len(f.state.block.End) + if f.state.keep { + f.result.WriteString(f.line[f.position:endPosition]) + } + f.position = endPosition + f.state.active = false + + return true +} + +// consumeQuotedSegment copies a quoted segment without scanning comment markers inside it. +func (f *markerLineFilter) consumeQuotedSegment() bool { + quoteEnd := quotedSegmentEnd(f.line, f.position, f.filter.Syntax.QuoteChars) + if quoteEnd <= f.position { + return false + } + f.result.WriteString(f.line[f.position:quoteEnd]) + f.position = quoteEnd + + return true +} + +// consumeComment consumes a comment and reports whether it consumed input and ended the line. +func (f *markerLineFilter) consumeComment() (bool, bool) { + if _, found := documentationInlineAt(f.line, f.position, f.filter.Syntax); found { + f.consumeInlineComment(f.mode == RetainDocumentation) + return true, true + } + if block, found := documentationBlockAt(f.line, f.position, f.filter.Syntax); found { + f.startBlockComment(block, f.mode == RetainDocumentation) + return true, false + } + if _, found := inlineCommentAt(f.line, f.position, f.filter.Syntax); found { + f.consumeInlineComment(f.mode == RetainInline || f.mode == RetainRegular) + return true, true + } + if block, found := blockCommentAt(f.line, f.position, f.filter.Syntax); found { + f.startBlockComment(block, f.mode == RetainBlock || f.mode == RetainRegular) + return true, false + } - result.WriteByte(line[position]) - position++ + return false, false +} + +// consumeInlineComment consumes the rest of the line as a line comment. +func (f *markerLineFilter) consumeInlineComment(keep bool) { + f.hadComment = true + if keep { + f.result.WriteString(f.line[f.position:]) } + f.position = len(f.line) +} + +// startBlockComment records the active block comment markers and whether to keep them. +func (f *markerLineFilter) startBlockComment(block BlockSyntax, keep bool) { + f.hadComment = true + f.state.active = true + f.state.block = block + f.state.keep = keep +} - return result.String(), hadComment +// consumeCodeByte copies one source byte that does not belong to a recognized comment. +func (f *markerLineFilter) consumeCodeByte() { + f.result.WriteByte(f.line[f.position]) + f.position++ } // documentationInlineAt reports whether a documentation line comment starts at the position. From 414d5e1969b11f4cd71167e898991c23d6466c2d Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Mon, 18 May 2026 20:08:35 +0200 Subject: [PATCH 12/27] Improve `check` mode output view. --- main.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 98c7615..f742bbd 100644 --- a/main.go +++ b/main.go @@ -134,7 +134,7 @@ func logError(message string, err error) { slog.Error(fmt.Sprintf("%s: %v", message, err)) } -// checkByConfigs runs check for all configs and logs outdated documentation files. +// checkByConfigs runs check for all configs and panics if documentation files are outdated. func checkByConfigs(configs []configuration.Configuration) { var totalOutdatedFiles []string for _, config := range configs { @@ -146,7 +146,8 @@ func checkByConfigs(configs []configuration.Configuration) { return } - printFiles("File outdated:", "Files outdated:", totalOutdatedFiles) + printFiles("File to update:", "Files to update:", totalOutdatedFiles) + panic("the documentation files are not up-to-date with code files") } // embedByConfig runs the embedByConfig for all configs and logs the results. From b70830213bae4a72dff2736e0650f232cfc6d10c Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 08:53:14 +0200 Subject: [PATCH 13/27] Improve error logging. --- logging/logger.go | 26 +++++++++++++++++++++++++- logging/logger_test.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 logging/logger_test.go diff --git a/logging/logger.go b/logging/logger.go index a8086b6..a05e9a1 100644 --- a/logging/logger.go +++ b/logging/logger.go @@ -94,10 +94,34 @@ func (h *Handler) WithGroup(name string) slog.Handler { // defer HandlePanic(withStacktrace) func HandlePanic(withStacktrace bool) { if r := recover(); r != nil { - fmt.Printf("Panic: %v\n", r) + fmt.Println(formatPanicMessage(r)) if withStacktrace { debug.PrintStack() } os.Exit(1) } } + +// formatPanicMessage formats panic values for console output. +func formatPanicMessage(recovered any) string { + err, ok := recovered.(error) + if !ok { + return fmt.Sprintf("panic: %v", recovered) + } + + joined, ok := err.(interface { + Unwrap() []error + }) + if !ok || len(joined.Unwrap()) <= 1 { + return fmt.Sprintf("panic: %v", err) + } + + var builder strings.Builder + builder.WriteString("panic:") + for _, wrappedErr := range joined.Unwrap() { + builder.WriteString("\n- ") + builder.WriteString(wrappedErr.Error()) + } + + return builder.String() +} diff --git a/logging/logger_test.go b/logging/logger_test.go new file mode 100644 index 0000000..e711ff7 --- /dev/null +++ b/logging/logger_test.go @@ -0,0 +1,28 @@ +package logging + +import ( + "errors" + "testing" +) + +// TestFormatPanicMessage verifies formatting for ordinary and joined panic errors. +func TestFormatPanicMessage(t *testing.T) { + t.Run("formats single panic value", func(t *testing.T) { + actual := formatPanicMessage("failed") + expected := "panic: failed" + if actual != expected { + t.Fatalf("expected %q, got %q", expected, actual) + } + }) + + t.Run("formats joined panic errors as a list", func(t *testing.T) { + actual := formatPanicMessage(errors.Join( + errors.New("error1 text"), + errors.New("error2 text"), + )) + expected := "panic:\n- error1 text\n- error2 text" + if actual != expected { + t.Fatalf("expected %q, got %q", expected, actual) + } + }) +} From 5619782535795b41fb7f46a2837936e411b4bbfc Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 09:04:41 +0200 Subject: [PATCH 14/27] Fix tests under Windows. --- embedding/commentfilter/filter_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index 029f835..7387329 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -216,7 +216,7 @@ func TestFilterWarnsAboutUselessMode(t *testing.T) { if !strings.Contains(output, "documentation") || !strings.Contains(output, "layout.xml") || !strings.Contains(output, "file://") || - !strings.Contains(output, "docs/guide.md:12") || + !strings.Contains(output, "guide.md:12") || !strings.Contains(output, "does not have a distinct meaning") { t.Fatalf("warning output = %q", output) } @@ -230,7 +230,7 @@ func TestFilterWarnsAboutUnsupportedExtension(t *testing.T) { if !strings.Contains(output, "comment filtering is not supported for this file extension") || !strings.Contains(output, "file://") || - !strings.Contains(output, "docs/guide.md:12") { + !strings.Contains(output, "guide.md:12") { t.Fatalf("warning output = %q", output) } } From 4c48d377d9254dc98cff88cea17ee8cb22786a20 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 09:10:46 +0200 Subject: [PATCH 15/27] Add python support. --- EMBEDDING.md | 1 + embedding/commentfilter/filter_test.go | 18 ++++++++++++++++++ embedding/commentfilter/syntax.go | 5 +++++ 3 files changed, 24 insertions(+) diff --git a/EMBEDDING.md b/EMBEDDING.md index d50fb14..7481905 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -138,6 +138,7 @@ The table below lists the supported languages and useful `comments` modes for th | Java, Kotlin, Groovy | `.java`, `.kt`, `.kts`, `.groovy` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | | C# | `.cs` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | | JavaScript, TypeScript | `.js`, `.jsx`, `.ts`, `.tsx` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | +| Python | `.py`, `.pyi`, `.pyw` | `all`, `none` | | YAML | `.yml`, `.yaml` | `all`, `none` | | XML, HTML | `.xml`, `.html`, `.htm` | `all`, `none` | | Visual Basic | `.vb`, `.bas`, `.vbs`, `.vbscript` | `all`, `none`, `documentation`, `regular` | diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index 7387329..dd41ba4 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -141,6 +141,24 @@ func TestFilterCSharp(t *testing.T) { }) } +// TestFilterPython verifies Python line comment filtering. +func TestFilterPython(t *testing.T) { + t.Run("none", func(t *testing.T) { + lines := []string{ + "# module comment", + "name = 'hash # literal'", + "value = 1 # inline comment", + } + + expected := []string{ + "name = 'hash # literal'", + "value = 1 ", + } + + assertFiltered(t, "module.py", RetainNone, lines, expected) + }) +} + // TestFilterVisualBasic verifies Visual Basic comment filtering. func TestFilterVisualBasic(t *testing.T) { t.Run("none", func(t *testing.T) { diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index 663e2d3..e74c234 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -153,6 +153,11 @@ var filtersByExtension = map[string]filterEntry{ ".ts": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), ".tsx": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), + // Python + ".py": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), + ".pyi": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), + ".pyw": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), + // YAML ".yml": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), ".yaml": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), From bdef9d460248b2fe4f06a1f9d7194dc044ef0ccb Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 09:33:12 +0200 Subject: [PATCH 16/27] Support Go without documentation. --- EMBEDDING.md | 1 + embedding/commentfilter/filter.go | 2 +- embedding/commentfilter/filter_test.go | 69 +++++++++++- .../commentfilter/marker_comment_filter.go | 10 +- embedding/commentfilter/mode.go | 24 ++--- embedding/commentfilter/syntax.go | 100 +++++++++++------- embedding/commentfilter/visual_basic.go | 4 +- embedding/parsing/instruction.go | 2 +- 8 files changed, 154 insertions(+), 58 deletions(-) diff --git a/EMBEDDING.md b/EMBEDDING.md index 7481905..9165197 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -138,6 +138,7 @@ The table below lists the supported languages and useful `comments` modes for th | Java, Kotlin, Groovy | `.java`, `.kt`, `.kts`, `.groovy` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | | C# | `.cs` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | | JavaScript, TypeScript | `.js`, `.jsx`, `.ts`, `.tsx` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | +| Go | `.go` | `all`, `none`, `inline`, `block` | | Python | `.py`, `.pyi`, `.pyw` | `all`, `none` | | YAML | `.yml`, `.yaml` | `all`, `none` | | XML, HTML | `.xml`, `.html`, `.htm` | `all`, `none` | diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go index ad4fcbf..1d09d11 100644 --- a/embedding/commentfilter/filter.go +++ b/embedding/commentfilter/filter.go @@ -22,7 +22,7 @@ package commentfilter func Filter( lines []string, filePath string, - mode Mode, + mode CommentFilterMode, embeddingDocPath string, embeddingLine int, ) []string { diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index dd41ba4..5684dee 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -141,6 +141,67 @@ func TestFilterCSharp(t *testing.T) { }) } +// TestFilterGo verifies Go comment filtering without documentation support. +func TestFilterGo(t *testing.T) { + t.Run("none", func(t *testing.T) { + lines := []string{ + "// package comment", + "package sample", + "", + "/* block comment */", + "const slash = '/'", + "const url = \"http://example.org\"", + "const raw = `/* not a comment */`", + "func create() {} // inline comment", + } + + expected := []string{ + "package sample", + "", + "const slash = '/'", + "const url = \"http://example.org\"", + "const raw = `/* not a comment */`", + "func create() {} ", + } + + assertFiltered(t, "sample.go", RetainNone, lines, expected) + }) + + t.Run("inline", func(t *testing.T) { + lines := []string{ + "// package comment", + "package sample", + "/* block comment */", + "func create() {} // inline comment", + } + + expected := []string{ + "// package comment", + "package sample", + "func create() {} // inline comment", + } + + assertFiltered(t, "sample.go", RetainInline, lines, expected) + }) + + t.Run("block", func(t *testing.T) { + lines := []string{ + "// package comment", + "package sample", + "/* block comment */", + "func create() {} // inline comment", + } + + expected := []string{ + "package sample", + "/* block comment */", + "func create() {} ", + } + + assertFiltered(t, "sample.go", RetainBlock, lines, expected) + }) +} + // TestFilterPython verifies Python line comment filtering. func TestFilterPython(t *testing.T) { t.Run("none", func(t *testing.T) { @@ -254,7 +315,13 @@ func TestFilterWarnsAboutUnsupportedExtension(t *testing.T) { } // assertFiltered verifies filtering output for one file path and mode. -func assertFiltered(t *testing.T, filePath string, mode Mode, lines []string, expected []string) { +func assertFiltered( + t *testing.T, + filePath string, + mode CommentFilterMode, + lines []string, + expected []string, +) { t.Helper() got := Filter(lines, filePath, mode, "docs/guide.md", 12) diff --git a/embedding/commentfilter/marker_comment_filter.go b/embedding/commentfilter/marker_comment_filter.go index f8625a2..f21b15d 100644 --- a/embedding/commentfilter/marker_comment_filter.go +++ b/embedding/commentfilter/marker_comment_filter.go @@ -34,7 +34,7 @@ type blockState struct { type markerLineFilter struct { filter MarkerCommentFilter line string - mode Mode + mode CommentFilterMode state *blockState result strings.Builder position int @@ -42,7 +42,7 @@ type markerLineFilter struct { } // Filter removes or preserves recognized comments across all lines. -func (f MarkerCommentFilter) Filter(lines []string, mode Mode) []string { +func (f MarkerCommentFilter) Filter(lines []string, mode CommentFilterMode) []string { var filtered []string state := blockState{} for _, line := range lines { @@ -57,7 +57,11 @@ func (f MarkerCommentFilter) Filter(lines []string, mode Mode) []string { } // filterLine removes or preserves recognized comments from a single source line. -func (f MarkerCommentFilter) filterLine(line string, mode Mode, state *blockState) (string, bool) { +func (f MarkerCommentFilter) filterLine( + line string, + mode CommentFilterMode, + state *blockState, +) (string, bool) { filter := markerLineFilter{ filter: f, line: line, diff --git a/embedding/commentfilter/mode.go b/embedding/commentfilter/mode.go index 456f261..0245915 100644 --- a/embedding/commentfilter/mode.go +++ b/embedding/commentfilter/mode.go @@ -20,31 +20,31 @@ package commentfilter import "fmt" -// Mode controls which source comments are retained in embedded snippets. -type Mode string +// CommentFilterMode controls which source comments are retained in embedded snippets. +type CommentFilterMode string const ( // RetainAll keeps all comments in the embedded source. - RetainAll Mode = "all" + RetainAll CommentFilterMode = "all" // RetainNone removes all comments recognized for the source language. - RetainNone Mode = "none" + RetainNone CommentFilterMode = "none" // RetainDocumentation keeps only API documentation comments. - RetainDocumentation Mode = "documentation" + RetainDocumentation CommentFilterMode = "documentation" // RetainRegular keeps inline and block comments that are not documentation comments. - RetainRegular Mode = "regular" + RetainRegular CommentFilterMode = "regular" // RetainInline keeps only inline comments such as `//` and `#`. - RetainInline Mode = "inline" + RetainInline CommentFilterMode = "inline" // RetainBlock keeps only block comments such as `/* */`. - RetainBlock Mode = "block" + RetainBlock CommentFilterMode = "block" ) -// ParseMode converts an embed-code `comments` attribute value into a Mode. -func ParseMode(value string) (Mode, error) { - switch Mode(value) { +// ParseMode converts an embed-code `comments` attribute value into a CommentFilterMode. +func ParseMode(value string) (CommentFilterMode, error) { + switch CommentFilterMode(value) { case "": return RetainAll, nil case RetainAll, RetainNone, RetainDocumentation, RetainRegular, RetainInline, RetainBlock: - return Mode(value), nil + return CommentFilterMode(value), nil default: return "", fmt.Errorf("unsupported comments value `%s`; expected one of "+ "`all`, `none`, `documentation`, `regular`, `inline`, or `block`", value) diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index e74c234..e2256c0 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -47,19 +47,19 @@ type Syntax struct { // Filterer removes or preserves source comments according to the requested mode. type Filterer interface { - Filter(lines []string, mode Mode) []string + Filter(lines []string, mode CommentFilterMode) []string } // filterEntry stores a comment filter and the modes that make sense for its language. type filterEntry struct { filter Filterer - usefulModes map[Mode]struct{} + usefulModes []CommentFilterMode } // filterFor returns the comment filter registered for the given file path and warns on odd modes. func filterFor( filePath string, - mode Mode, + mode CommentFilterMode, embeddingDocPath string, embeddingLine int, ) (Filterer, bool) { @@ -107,6 +107,14 @@ var csharpSyntax = Syntax{ QuoteChars: "\"'`", } +var goSyntax = Syntax{ + Inline: []string{"//"}, + Block: []BlockSyntax{ + {Start: "/*", End: "*/"}, + }, + QuoteChars: "\"'`", +} + var hashLineSyntax = Syntax{ Inline: []string{"#"}, QuoteChars: "\"'", @@ -119,23 +127,30 @@ var xmlSyntax = Syntax{ QuoteChars: "\"'", } -var allCommentModes = usefulModes( +var allCommentModes = []CommentFilterMode{ RetainAll, RetainNone, RetainDocumentation, RetainRegular, RetainInline, RetainBlock, -) +} + +var allNoneCommentModes = []CommentFilterMode{RetainAll, RetainNone} -var allOrNoneCommentModes = usefulModes(RetainAll, RetainNone) +var inlineBlockCommentModes = []CommentFilterMode{ + RetainAll, + RetainNone, + RetainInline, + RetainBlock, +} -var regularAndDocCommentModes = usefulModes( +var regularDocCommentModes = []CommentFilterMode{ RetainAll, RetainNone, RetainDocumentation, RetainRegular, -) +} var filtersByExtension = map[string]filterEntry{ // Java/Kotlin @@ -153,41 +168,34 @@ var filtersByExtension = map[string]filterEntry{ ".ts": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), ".tsx": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), + // Go + ".go": newFilterEntry(MarkerCommentFilter{Syntax: goSyntax}, inlineBlockCommentModes), + // Python - ".py": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), - ".pyi": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), - ".pyw": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), + ".py": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), + ".pyi": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), + ".pyw": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), // YAML - ".yml": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), - ".yaml": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allOrNoneCommentModes), + ".yml": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), + ".yaml": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), // XML - ".xml": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allOrNoneCommentModes), + ".xml": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allNoneCommentModes), // HTML - ".html": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allOrNoneCommentModes), - ".htm": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allOrNoneCommentModes), + ".html": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allNoneCommentModes), + ".htm": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allNoneCommentModes), // Visual Basic - ".vb": newFilterEntry(VisualBasicCommentFilter{}, regularAndDocCommentModes), - ".bas": newFilterEntry(VisualBasicCommentFilter{}, regularAndDocCommentModes), - ".vbs": newFilterEntry(VisualBasicCommentFilter{}, regularAndDocCommentModes), - ".vbscript": newFilterEntry(VisualBasicCommentFilter{}, regularAndDocCommentModes), -} - -// usefulModes creates a lookup set for comment modes that make sense for a language. -func usefulModes(modes ...Mode) map[Mode]struct{} { - result := make(map[Mode]struct{}, len(modes)) - for _, mode := range modes { - result[mode] = struct{}{} - } - - return result + ".vb": newFilterEntry(VisualBasicCommentFilter{}, regularDocCommentModes), + ".bas": newFilterEntry(VisualBasicCommentFilter{}, regularDocCommentModes), + ".vbs": newFilterEntry(VisualBasicCommentFilter{}, regularDocCommentModes), + ".vbscript": newFilterEntry(VisualBasicCommentFilter{}, regularDocCommentModes), } // newFilterEntry creates a filter registry entry. -func newFilterEntry(filter Filterer, usefulModes map[Mode]struct{}) filterEntry { +func newFilterEntry(filter Filterer, usefulModes []CommentFilterMode) filterEntry { return filterEntry{ filter: filter, usefulModes: usefulModes, @@ -195,7 +203,12 @@ func newFilterEntry(filter Filterer, usefulModes map[Mode]struct{}) filterEntry } // warnUnsupportedCommentsMode logs when comments filtering is requested for an unsupported file. -func warnUnsupportedCommentsMode(filePath string, mode Mode, embeddingDocPath string, embeddingLine int) { +func warnUnsupportedCommentsMode( + filePath string, + mode CommentFilterMode, + embeddingDocPath string, + embeddingLine int, +) { if mode == RetainAll { return } @@ -213,12 +226,12 @@ func warnUnsupportedCommentsMode(filePath string, mode Mode, embeddingDocPath st // warnUselessCommentsMode logs when the selected mode has no distinct meaning for a file. func warnUselessCommentsMode( filePath string, - mode Mode, + mode CommentFilterMode, embeddingDocPath string, embeddingLine int, - usefulModes map[Mode]struct{}, + usefulModes []CommentFilterMode, ) { - if _, found := usefulModes[mode]; found { + if containsMode(usefulModes, mode) { return } slog.Warn( @@ -252,8 +265,8 @@ func fileURL(path string, line int) string { } // formatModes formats modes for a warning message. -func formatModes(modes map[Mode]struct{}) string { - order := []Mode{ +func formatModes(modes []CommentFilterMode) string { + order := []CommentFilterMode{ RetainAll, RetainNone, RetainDocumentation, @@ -263,10 +276,21 @@ func formatModes(modes map[Mode]struct{}) string { } var result []string for _, mode := range order { - if _, found := modes[mode]; found { + if containsMode(modes, mode) { result = append(result, fmt.Sprintf("`%s`", mode)) } } return strings.Join(result, ", ") } + +// containsMode reports whether the list includes the given mode. +func containsMode(modes []CommentFilterMode, mode CommentFilterMode) bool { + for _, usefulMode := range modes { + if usefulMode == mode { + return true + } + } + + return false +} diff --git a/embedding/commentfilter/visual_basic.go b/embedding/commentfilter/visual_basic.go index 645d61c..888184e 100644 --- a/embedding/commentfilter/visual_basic.go +++ b/embedding/commentfilter/visual_basic.go @@ -36,7 +36,7 @@ const ( type VisualBasicCommentFilter struct{} // Filter removes or preserves Visual Basic comments according to mode. -func (VisualBasicCommentFilter) Filter(lines []string, mode Mode) []string { +func (VisualBasicCommentFilter) Filter(lines []string, mode CommentFilterMode) []string { var filtered []string for _, line := range lines { filteredLine, hadComment := filterVisualBasicLine(line, mode) @@ -50,7 +50,7 @@ func (VisualBasicCommentFilter) Filter(lines []string, mode Mode) []string { } // filterVisualBasicLine removes or preserves one Visual Basic comment. -func filterVisualBasicLine(line string, mode Mode) (string, bool) { +func filterVisualBasicLine(line string, mode CommentFilterMode) (string, bool) { var result strings.Builder position := 0 for position < len(line) { diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index 13e6089..9c6ea87 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -55,7 +55,7 @@ type Instruction struct { Fragment string StartPattern *Pattern EndPattern *Pattern - CommentMode commentfilter.Mode + CommentMode commentfilter.CommentFilterMode DocumentationFile string DocumentationLine int Configuration configuration.Configuration From 617941a6a4d04c4c1bdcbb2b18d34ec4d1ca4832 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 09:53:51 +0200 Subject: [PATCH 17/27] Improve tests style. --- embedding/commentfilter/filter.go | 2 +- embedding/commentfilter/filter_test.go | 553 +++++++++++++------------ embedding/commentfilter/syntax.go | 5 +- logging/logger_test.go | 28 -- 4 files changed, 279 insertions(+), 309 deletions(-) delete mode 100644 logging/logger_test.go diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go index 1d09d11..649d631 100644 --- a/embedding/commentfilter/filter.go +++ b/embedding/commentfilter/filter.go @@ -18,7 +18,7 @@ package commentfilter -// Filter returns source lines with comments retained according to the requested mode. +// Filter returns source lines with comments stripped according to the requested mode. func Filter( lines []string, filePath string, diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index 5684dee..30c5b38 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -21,313 +21,314 @@ package commentfilter import ( "bytes" "log/slog" - "reflect" - "strings" "testing" -) - -// TestFilterYaml verifies YAML line comment filtering. -func TestFilterYaml(t *testing.T) { - lines := []string{ - "name: test # inline", - "# standalone", - "value: \"# literal\"", - } - - expected := []string{ - "name: test ", - "value: \"# literal\"", - } - - assertFiltered(t, "config.yml", RetainNone, lines, expected) -} - -// TestFilterXml verifies XML block comment filtering. -func TestFilterXml(t *testing.T) { - lines := []string{ - "", - " ", - " \"/>", - "", - } - - expected := []string{ - "", - " \"/>", - "", - } - - assertFiltered(t, "layout.xml", RetainNone, lines, expected) -} - -// TestFilterJavaStyle verifies Java-family marker-based filtering. -func TestFilterJavaStyle(t *testing.T) { - t.Run("documentation", func(t *testing.T) { - lines := []string{ - "/** API docs. */", - "// implementation note", - "fun call() = \"// literal\"", - } - - expected := []string{ - "/** API docs. */", - "fun call() = \"// literal\"", - } - - assertFiltered(t, "api.kt", RetainDocumentation, lines, expected) - }) - - t.Run("block", func(t *testing.T) { - lines := []string{ - "/** API docs. */", - "/* implementation note */", - "String create();", - } - expected := []string{ - "/* implementation note */", - "String create();", - } - - assertFiltered(t, "Api.java", RetainBlock, lines, expected) - }) - - t.Run("regular", func(t *testing.T) { - lines := []string{ - "/** API docs. */", - "/* implementation note */", - "String create(); // inline note", - } - - expected := []string{ - "/* implementation note */", - "String create(); // inline note", - } - - assertFiltered(t, "Api.java", RetainRegular, lines, expected) - }) -} - -// TestFilterCSharp verifies C# XML documentation comment filtering. -func TestFilterCSharp(t *testing.T) { - t.Run("documentation", func(t *testing.T) { - lines := []string{ - "/// Creates a value.", - "// implementation note", - "public string Create() => \"// literal\";", - } - - expected := []string{ - "/// Creates a value.", - "public string Create() => \"// literal\";", - } - - assertFiltered(t, "Api.cs", RetainDocumentation, lines, expected) - }) - - t.Run("inline", func(t *testing.T) { - lines := []string{ - "/// Creates a value.", - "// implementation note", - "public string Create() => \"// literal\";", - } - - expected := []string{ - "// implementation note", - "public string Create() => \"// literal\";", - } + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) - assertFiltered(t, "Api.cs", RetainInline, lines, expected) - }) +// TestCommentFilter runs the comment filter test suite. +func TestCommentFilter(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Comment Filter Suite") } -// TestFilterGo verifies Go comment filtering without documentation support. -func TestFilterGo(t *testing.T) { - t.Run("none", func(t *testing.T) { - lines := []string{ - "// package comment", - "package sample", - "", - "/* block comment */", - "const slash = '/'", - "const url = \"http://example.org\"", - "const raw = `/* not a comment */`", - "func create() {} // inline comment", - } - - expected := []string{ - "package sample", - "", - "const slash = '/'", - "const url = \"http://example.org\"", - "const raw = `/* not a comment */`", - "func create() {} ", - } - - assertFiltered(t, "sample.go", RetainNone, lines, expected) +var _ = Describe("Comment filter", func() { + Describe("YAML", func() { + It("should strip all comments", func() { + lines := []string{ + "name: test # inline", + "# standalone", + "value: \"# literal\"", + } + + expected := []string{ + "name: test ", + "value: \"# literal\"", + } + + assertFiltered("config.yml", RetainNone, lines, expected) + }) }) - t.Run("inline", func(t *testing.T) { - lines := []string{ - "// package comment", - "package sample", - "/* block comment */", - "func create() {} // inline comment", - } - - expected := []string{ - "// package comment", - "package sample", - "func create() {} // inline comment", - } - - assertFiltered(t, "sample.go", RetainInline, lines, expected) + Describe("XML", func() { + It("should strip all comments", func() { + lines := []string{ + "", + " ", + " \"/>", + "", + } + + expected := []string{ + "", + " \"/>", + "", + } + + assertFiltered("layout.xml", RetainNone, lines, expected) + }) }) - t.Run("block", func(t *testing.T) { - lines := []string{ - "// package comment", - "package sample", - "/* block comment */", - "func create() {} // inline comment", - } - - expected := []string{ - "package sample", - "/* block comment */", - "func create() {} ", - } - - assertFiltered(t, "sample.go", RetainBlock, lines, expected) + Describe("Java-style languages", func() { + It("should keep documentation comments", func() { + lines := []string{ + "/** API docs. */", + "// implementation note", + "fun call() = \"// literal\"", + } + + expected := []string{ + "/** API docs. */", + "fun call() = \"// literal\"", + } + + assertFiltered("api.kt", RetainDocumentation, lines, expected) + }) + + It("should keep block comments", func() { + lines := []string{ + "/** API docs. */", + "/* implementation note */", + "String create();", + } + + expected := []string{ + "/* implementation note */", + "String create();", + } + + assertFiltered("Api.java", RetainBlock, lines, expected) + }) + + It("should keep regular comments", func() { + lines := []string{ + "/** API docs. */", + "/* implementation note */", + "String create(); // inline note", + } + + expected := []string{ + "/* implementation note */", + "String create(); // inline note", + } + + assertFiltered("Api.java", RetainRegular, lines, expected) + }) }) -} -// TestFilterPython verifies Python line comment filtering. -func TestFilterPython(t *testing.T) { - t.Run("none", func(t *testing.T) { - lines := []string{ - "# module comment", - "name = 'hash # literal'", - "value = 1 # inline comment", - } - - expected := []string{ - "name = 'hash # literal'", - "value = 1 ", - } - - assertFiltered(t, "module.py", RetainNone, lines, expected) + Describe("C#", func() { + It("should keep XML documentation comments", func() { + lines := []string{ + "/// Creates a value.", + "// implementation note", + "public string Create() => \"// literal\";", + } + + expected := []string{ + "/// Creates a value.", + "public string Create() => \"// literal\";", + } + + assertFiltered("Api.cs", RetainDocumentation, lines, expected) + }) + + It("should keep inline comments", func() { + lines := []string{ + "/// Creates a value.", + "// implementation note", + "public string Create() => \"// literal\";", + } + + expected := []string{ + "// implementation note", + "public string Create() => \"// literal\";", + } + + assertFiltered("Api.cs", RetainInline, lines, expected) + }) }) -} -// TestFilterVisualBasic verifies Visual Basic comment filtering. -func TestFilterVisualBasic(t *testing.T) { - t.Run("none", func(t *testing.T) { - lines := []string{ - "' file comment", - "REM module comment", - "Dim text = \"REM not a comment\"", - "Dim value = 1 ' inline", - "Dim ready = True : Rem after statement separator", - "Dim reminder = 1", - } - - expected := []string{ - "Dim text = \"REM not a comment\"", - "Dim value = 1 ", - "Dim ready = True : ", - "Dim reminder = 1", - } - - assertFiltered(t, "Module.vb", RetainNone, lines, expected) + Describe("Go", func() { + It("should strip all comments without treating literals as comments", func() { + lines := []string{ + "// package comment", + "package sample", + "", + "/* block comment */", + "const slash = '/'", + "const url = \"http://example.org\"", + "const raw = `/* not a comment */`", + "func create() {} // inline comment", + } + + expected := []string{ + "package sample", + "", + "const slash = '/'", + "const url = \"http://example.org\"", + "const raw = `/* not a comment */`", + "func create() {} ", + } + + assertFiltered("sample.go", RetainNone, lines, expected) + }) + + It("should keep inline comments", func() { + lines := []string{ + "// package comment", + "package sample", + "/* block comment */", + "func create() {} // inline comment", + } + + expected := []string{ + "// package comment", + "package sample", + "func create() {} // inline comment", + } + + assertFiltered("sample.go", RetainInline, lines, expected) + }) + + It("should keep block comments", func() { + lines := []string{ + "// package comment", + "package sample", + "/* block comment */", + "func create() {} // inline comment", + } + + expected := []string{ + "package sample", + "/* block comment */", + "func create() {} ", + } + + assertFiltered("sample.go", RetainBlock, lines, expected) + }) }) - t.Run("regular", func(t *testing.T) { - lines := []string{ - "''' Creates a value.", - "' file comment", - "REM module comment", - "Dim value = 1 ' inline", - } - - expected := []string{ - "' file comment", - "REM module comment", - "Dim value = 1 ' inline", - } - - assertFiltered(t, "Module.vb", RetainRegular, lines, expected) + Describe("Python", func() { + It("should strip all comments", func() { + lines := []string{ + "# module comment", + "name = 'hash # literal'", + "value = 1 # inline comment", + } + + expected := []string{ + "name = 'hash # literal'", + "value = 1 ", + } + + assertFiltered("module.py", RetainNone, lines, expected) + }) }) - t.Run("documentation", func(t *testing.T) { - lines := []string{ - "''' Creates a value.", - "' implementation note", - "REM module comment", - "Public Function Create() As String", - } - - expected := []string{ - "''' Creates a value.", - "Public Function Create() As String", - } - - assertFiltered(t, "Module.vb", RetainDocumentation, lines, expected) + Describe("Visual Basic", func() { + It("should strip all comments", func() { + lines := []string{ + "' file comment", + "REM module comment", + "Dim text = \"REM not a comment\"", + "Dim value = 1 ' inline", + "Dim ready = True : Rem after statement separator", + "Dim reminder = 1", + } + + expected := []string{ + "Dim text = \"REM not a comment\"", + "Dim value = 1 ", + "Dim ready = True : ", + "Dim reminder = 1", + } + + assertFiltered("Module.vb", RetainNone, lines, expected) + }) + + It("should keep regular comments", func() { + lines := []string{ + "''' Creates a value.", + "' file comment", + "REM module comment", + "Dim value = 1 ' inline", + } + + expected := []string{ + "' file comment", + "REM module comment", + "Dim value = 1 ' inline", + } + + assertFiltered("Module.vb", RetainRegular, lines, expected) + }) + + It("should keep documentation comments", func() { + lines := []string{ + "''' Creates a value.", + "' implementation note", + "REM module comment", + "Public Function Create() As String", + } + + expected := []string{ + "''' Creates a value.", + "Public Function Create() As String", + } + + assertFiltered("Module.vb", RetainDocumentation, lines, expected) + }) }) -} -// TestFilterUnsupportedExtension verifies unsupported files are returned unchanged. -func TestFilterUnsupportedExtension(t *testing.T) { - lines := []string{ - "# docs", - "sub call { } # inline", - } - - assertFiltered(t, "service.pl", RetainAll, lines, lines) -} - -// TestFilterWarnsAboutUselessMode verifies warnings for modes without language-specific meaning. -func TestFilterWarnsAboutUselessMode(t *testing.T) { - output := captureWarnings(func() { - Filter([]string{""}, "layout.xml", RetainDocumentation, "docs/guide.md", 12) + Describe("unsupported extensions", func() { + It("should return unsupported files unchanged", func() { + lines := []string{ + "# docs", + "sub call { } # inline", + } + + assertFiltered("service.pl", RetainAll, lines, lines) + }) + + It("should warn about unsupported comment modes", func() { + output := captureWarnings(func() { + Filter([]string{"# comment"}, "service.pl", RetainNone, "docs/guide.md", 12) + }) + + Expect(output).Should(ContainSubstring( + "comment filtering is not supported for this file extension", + )) + Expect(output).Should(ContainSubstring("file://")) + Expect(output).Should(ContainSubstring("guide.md:12")) + }) }) - if !strings.Contains(output, "documentation") || - !strings.Contains(output, "layout.xml") || - !strings.Contains(output, "file://") || - !strings.Contains(output, "guide.md:12") || - !strings.Contains(output, "does not have a distinct meaning") { - t.Fatalf("warning output = %q", output) - } -} - -// TestFilterWarnsAboutUnsupportedExtension verifies warnings for unsupported file extensions. -func TestFilterWarnsAboutUnsupportedExtension(t *testing.T) { - output := captureWarnings(func() { - Filter([]string{"# comment"}, "service.pl", RetainNone, "docs/guide.md", 12) + Describe("warnings", func() { + It("should warn about modes without language-specific meaning", func() { + output := captureWarnings(func() { + Filter([]string{""}, "layout.xml", RetainDocumentation, "docs/guide.md", 12) + }) + + Expect(output).Should(ContainSubstring("documentation")) + Expect(output).Should(ContainSubstring("layout.xml")) + Expect(output).Should(ContainSubstring("file://")) + Expect(output).Should(ContainSubstring("guide.md:12")) + Expect(output).Should(ContainSubstring("does not have a distinct meaning")) + }) }) - - if !strings.Contains(output, "comment filtering is not supported for this file extension") || - !strings.Contains(output, "file://") || - !strings.Contains(output, "guide.md:12") { - t.Fatalf("warning output = %q", output) - } -} +}) // assertFiltered verifies filtering output for one file path and mode. func assertFiltered( - t *testing.T, filePath string, mode CommentFilterMode, lines []string, expected []string, ) { - t.Helper() - got := Filter(lines, filePath, mode, "docs/guide.md", 12) - if !reflect.DeepEqual(got, expected) { - t.Fatalf("Filter() = %#v, expected %#v", got, expected) - } + + Expect(got).Should(Equal(expected)) } // captureWarnings runs action and returns slog warning output. diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index e2256c0..a7fc17d 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -237,7 +237,7 @@ func warnUselessCommentsMode( slog.Warn( fmt.Sprintf( "`comments=\"%s\"` was requested in `%s` for `%s`, but this mode does not have "+ - "a distinct meaning for this file type. Useful modes are: %s.", + "a distinct meaning for this file type. Supported modes are: %s.", mode, fileURL(embeddingDocPath, embeddingLine), filePath, @@ -248,9 +248,6 @@ func warnUselessCommentsMode( // fileURL returns an absolute file URL for a local path and line. func fileURL(path string, line int) string { - if path == "" { - return "file://" - } absolutePath, err := filepath.Abs(path) if err != nil { return "file://" + path diff --git a/logging/logger_test.go b/logging/logger_test.go deleted file mode 100644 index e711ff7..0000000 --- a/logging/logger_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package logging - -import ( - "errors" - "testing" -) - -// TestFormatPanicMessage verifies formatting for ordinary and joined panic errors. -func TestFormatPanicMessage(t *testing.T) { - t.Run("formats single panic value", func(t *testing.T) { - actual := formatPanicMessage("failed") - expected := "panic: failed" - if actual != expected { - t.Fatalf("expected %q, got %q", expected, actual) - } - }) - - t.Run("formats joined panic errors as a list", func(t *testing.T) { - actual := formatPanicMessage(errors.Join( - errors.New("error1 text"), - errors.New("error2 text"), - )) - expected := "panic:\n- error1 text\n- error2 text" - if actual != expected { - t.Fatalf("expected %q, got %q", expected, actual) - } - }) -} From 9c5561465f68cf68639e0d5fed5497e35d82d984 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 10:36:18 +0200 Subject: [PATCH 18/27] Add C/C++ support. --- EMBEDDING.md | 2 + embedding/commentfilter/filter_test.go | 58 ++++++++++++++++++++++++++ embedding/commentfilter/syntax.go | 18 ++++++++ 3 files changed, 78 insertions(+) diff --git a/EMBEDDING.md b/EMBEDDING.md index 9165197..16aa08f 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -137,6 +137,8 @@ The table below lists the supported languages and useful `comments` modes for th |------------------------|-----------------------------------------|--------------------------------------------------------------| | Java, Kotlin, Groovy | `.java`, `.kt`, `.kts`, `.groovy` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | | C# | `.cs` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | +| C, C++ | `.c`, `.h`, `.cc`, `.cpp`, `.cxx`, | `all`, `none`, `inline`, `block` | +| | `.hh`, `.hpp`, `.hxx` | | | JavaScript, TypeScript | `.js`, `.jsx`, `.ts`, `.tsx` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | | Go | `.go` | `all`, `none`, `inline`, `block` | | Python | `.py`, `.pyi`, `.pyw` | `all`, `none` | diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index 30c5b38..13c1625 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -149,6 +149,64 @@ var _ = Describe("Comment filter", func() { }) }) + Describe("C and C++", func() { + It("should strip all comments without treating literals as comments", func() { + lines := []string{ + "// header comment", + "#include ", + "", + "/* block comment */", + "const char slash = '/';", + "const char* url = \"http://example.org\";", + "int create() { return 1; } // inline comment", + } + + expected := []string{ + "#include ", + "", + "const char slash = '/';", + "const char* url = \"http://example.org\";", + "int create() { return 1; } ", + } + + assertFiltered("sample.cpp", RetainNone, lines, expected) + }) + + It("should keep inline comments", func() { + lines := []string{ + "// header comment", + "int create();", + "/* block comment */", + "int count(); // inline comment", + } + + expected := []string{ + "// header comment", + "int create();", + "int count(); // inline comment", + } + + assertFiltered("sample.cpp", RetainInline, lines, expected) + }) + + It("should keep block comments", func() { + lines := []string{ + "// header comment", + "int create();", + "/* block comment */", + "int count(); // inline comment", + } + + expected := []string{ + "int create();", + "/* block comment */", + "int count(); ", + } + + assertFiltered("sample.hpp", RetainBlock, lines, expected) + }) + }) + Describe("Go", func() { It("should strip all comments without treating literals as comments", func() { lines := []string{ diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index a7fc17d..d6c03d7 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -107,6 +107,14 @@ var csharpSyntax = Syntax{ QuoteChars: "\"'`", } +var cStyleSyntax = Syntax{ + Inline: []string{"//"}, + Block: []BlockSyntax{ + {Start: "/*", End: "*/"}, + }, + QuoteChars: "\"'", +} + var goSyntax = Syntax{ Inline: []string{"//"}, Block: []BlockSyntax{ @@ -162,6 +170,16 @@ var filtersByExtension = map[string]filterEntry{ // C# ".cs": newFilterEntry(MarkerCommentFilter{Syntax: csharpSyntax}, allCommentModes), + // C/C++ + ".c": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), + ".h": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), + ".cc": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), + ".cpp": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), + ".cxx": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), + ".hh": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), + ".hpp": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), + ".hxx": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), + // JavaScript ".js": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), ".jsx": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), From e021cf0c862f495c8fc184a89b4b752d05209df5 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 10:43:17 +0200 Subject: [PATCH 19/27] Divide JS and Java comment syntaxes. --- embedding/commentfilter/filter_test.go | 17 +++++++++++++++ embedding/commentfilter/syntax.go | 29 ++++++++++++++++++-------- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index 13c1625..2f6e1ce 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -117,6 +117,23 @@ var _ = Describe("Comment filter", func() { }) }) + Describe("JavaScript and TypeScript", func() { + It("should strip comments without treating template literals as comments", func() { + lines := []string{ + "// module comment", + "const url = `http://example.org/*not-comment*/`;", + "const value = 42; // inline comment", + } + + expected := []string{ + "const url = `http://example.org/*not-comment*/`;", + "const value = 42; ", + } + + assertFiltered("sample.ts", RetainNone, lines, expected) + }) + }) + Describe("C#", func() { It("should keep XML documentation comments", func() { lines := []string{ diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index d6c03d7..bcd1c39 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -84,7 +84,18 @@ func normalizeExtension(extension string) string { return "." + normalized } -var javaStyleSyntax = Syntax{ +var javaSyntax = Syntax{ + Inline: []string{"//"}, + Block: []BlockSyntax{ + {Start: "/*", End: "*/"}, + }, + Documentation: DocumentationSyntax{ + Block: []BlockSyntax{{Start: "/**", End: "*/"}}, + }, + QuoteChars: "\"'", +} + +var jsSyntax = Syntax{ Inline: []string{"//"}, Block: []BlockSyntax{ {Start: "/*", End: "*/"}, @@ -162,10 +173,10 @@ var regularDocCommentModes = []CommentFilterMode{ var filtersByExtension = map[string]filterEntry{ // Java/Kotlin - ".java": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), - ".kt": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), - ".kts": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), - ".groovy": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), + ".java": newFilterEntry(MarkerCommentFilter{Syntax: javaSyntax}, allCommentModes), + ".kt": newFilterEntry(MarkerCommentFilter{Syntax: javaSyntax}, allCommentModes), + ".kts": newFilterEntry(MarkerCommentFilter{Syntax: javaSyntax}, allCommentModes), + ".groovy": newFilterEntry(MarkerCommentFilter{Syntax: javaSyntax}, allCommentModes), // C# ".cs": newFilterEntry(MarkerCommentFilter{Syntax: csharpSyntax}, allCommentModes), @@ -181,10 +192,10 @@ var filtersByExtension = map[string]filterEntry{ ".hxx": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), // JavaScript - ".js": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), - ".jsx": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), - ".ts": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), - ".tsx": newFilterEntry(MarkerCommentFilter{Syntax: javaStyleSyntax}, allCommentModes), + ".js": newFilterEntry(MarkerCommentFilter{Syntax: jsSyntax}, allCommentModes), + ".jsx": newFilterEntry(MarkerCommentFilter{Syntax: jsSyntax}, allCommentModes), + ".ts": newFilterEntry(MarkerCommentFilter{Syntax: jsSyntax}, allCommentModes), + ".tsx": newFilterEntry(MarkerCommentFilter{Syntax: jsSyntax}, allCommentModes), // Go ".go": newFilterEntry(MarkerCommentFilter{Syntax: goSyntax}, inlineBlockCommentModes), From b24b1e5dd38015c9f1bed41455039594ab1e6e4e Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 10:58:51 +0200 Subject: [PATCH 20/27] Add Protobuf support. --- EMBEDDING.md | 24 +++++------ embedding/commentfilter/filter_test.go | 60 ++++++++++++++++++++++++++ embedding/commentfilter/syntax.go | 3 ++ 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/EMBEDDING.md b/EMBEDDING.md index 16aa08f..70a3f9f 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -133,18 +133,18 @@ Not all languages has difference between documentation/regular or inline/block c The table below lists the supported languages and useful `comments` modes for them: -| Language | Extensions | Useful `comments` modes | -|------------------------|-----------------------------------------|--------------------------------------------------------------| -| Java, Kotlin, Groovy | `.java`, `.kt`, `.kts`, `.groovy` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | -| C# | `.cs` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | -| C, C++ | `.c`, `.h`, `.cc`, `.cpp`, `.cxx`, | `all`, `none`, `inline`, `block` | -| | `.hh`, `.hpp`, `.hxx` | | -| JavaScript, TypeScript | `.js`, `.jsx`, `.ts`, `.tsx` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | -| Go | `.go` | `all`, `none`, `inline`, `block` | -| Python | `.py`, `.pyi`, `.pyw` | `all`, `none` | -| YAML | `.yml`, `.yaml` | `all`, `none` | -| XML, HTML | `.xml`, `.html`, `.htm` | `all`, `none` | -| Visual Basic | `.vb`, `.bas`, `.vbs`, `.vbscript` | `all`, `none`, `documentation`, `regular` | +| Language | Extensions | Useful `comments` modes | +|------------------------|---------------------------------------------------------|--------------------------------------------------------------| +| Java, Kotlin, Groovy | `.java`, `.kt`, `.kts`, `.groovy` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | +| C# | `.cs` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | +| C, C++ | `.c`, `.h`, `.cc`, `.cpp`, `.cxx`,`.hh`, `.hpp`, `.hxx` | `all`, `none`, `inline`, `block` | +| JavaScript, TypeScript | `.js`, `.jsx`, `.ts`, `.tsx` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | +| Go | `.go` | `all`, `none`, `inline`, `block` | +| Protobuf | `.proto` | `all`, `none`, `inline`, `block` | +| Python | `.py`, `.pyi`, `.pyw` | `all`, `none` | +| YAML | `.yml`, `.yaml` | `all`, `none` | +| XML, HTML | `.xml`, `.html`, `.htm` | `all`, `none` | +| Visual Basic | `.vb`, `.bas`, `.vbs`, `.vbscript` | `all`, `none`, `documentation`, `regular` | ## Advanced use cases diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index 2f6e1ce..c5cb638 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -284,6 +284,66 @@ var _ = Describe("Comment filter", func() { }) }) + Describe("Protobuf", func() { + It("should strip all comments without treating literals as comments", func() { + lines := []string{ + "// file comment", + "syntax = \"proto3\";", + "", + "/* message comment */", + "message Sample {", + " string url = 1 [default = 'http://example.org'];", + " int32 count = 2; // inline comment", + "}", + } + + expected := []string{ + "syntax = \"proto3\";", + "", + "message Sample {", + " string url = 1 [default = 'http://example.org'];", + " int32 count = 2; ", + "}", + } + + assertFiltered("sample.proto", RetainNone, lines, expected) + }) + + It("should keep inline comments", func() { + lines := []string{ + "// file comment", + "syntax = \"proto3\";", + "/* message comment */", + "message Sample {} // inline comment", + } + + expected := []string{ + "// file comment", + "syntax = \"proto3\";", + "message Sample {} // inline comment", + } + + assertFiltered("sample.proto", RetainInline, lines, expected) + }) + + It("should keep block comments", func() { + lines := []string{ + "// file comment", + "syntax = \"proto3\";", + "/* message comment */", + "message Sample {} // inline comment", + } + + expected := []string{ + "syntax = \"proto3\";", + "/* message comment */", + "message Sample {} ", + } + + assertFiltered("sample.proto", RetainBlock, lines, expected) + }) + }) + Describe("Python", func() { It("should strip all comments", func() { lines := []string{ diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index bcd1c39..11441d8 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -200,6 +200,9 @@ var filtersByExtension = map[string]filterEntry{ // Go ".go": newFilterEntry(MarkerCommentFilter{Syntax: goSyntax}, inlineBlockCommentModes), + // Protobuf + ".proto": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), + // Python ".py": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), ".pyi": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), From 38b205ddbb13d968c8a4fe23b70f77c5be55cc92 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 11:21:35 +0200 Subject: [PATCH 21/27] Remove redundant methods. --- .../commentfilter/marker_comment_filter.go | 28 +++---------------- embedding/commentfilter/visual_basic.go | 2 +- 2 files changed, 5 insertions(+), 25 deletions(-) diff --git a/embedding/commentfilter/marker_comment_filter.go b/embedding/commentfilter/marker_comment_filter.go index f21b15d..5d6a207 100644 --- a/embedding/commentfilter/marker_comment_filter.go +++ b/embedding/commentfilter/marker_comment_filter.go @@ -131,19 +131,19 @@ func (f *markerLineFilter) consumeQuotedSegment() bool { // consumeComment consumes a comment and reports whether it consumed input and ended the line. func (f *markerLineFilter) consumeComment() (bool, bool) { - if _, found := documentationInlineAt(f.line, f.position, f.filter.Syntax); found { + if _, found := prefixAt(f.line, f.position, f.filter.Syntax.Documentation.Inline); found { f.consumeInlineComment(f.mode == RetainDocumentation) return true, true } - if block, found := documentationBlockAt(f.line, f.position, f.filter.Syntax); found { + if block, found := blockAt(f.line, f.position, f.filter.Syntax.Documentation.Block); found { f.startBlockComment(block, f.mode == RetainDocumentation) return true, false } - if _, found := inlineCommentAt(f.line, f.position, f.filter.Syntax); found { + if _, found := prefixAt(f.line, f.position, f.filter.Syntax.Inline); found { f.consumeInlineComment(f.mode == RetainInline || f.mode == RetainRegular) return true, true } - if block, found := blockCommentAt(f.line, f.position, f.filter.Syntax); found { + if block, found := blockAt(f.line, f.position, f.filter.Syntax.Block); found { f.startBlockComment(block, f.mode == RetainBlock || f.mode == RetainRegular) return true, false } @@ -174,26 +174,6 @@ func (f *markerLineFilter) consumeCodeByte() { f.position++ } -// documentationInlineAt reports whether a documentation line comment starts at the position. -func documentationInlineAt(line string, position int, syntax Syntax) (string, bool) { - return prefixAt(line, position, syntax.Documentation.Inline) -} - -// documentationBlockAt reports whether a documentation block comment starts at the position. -func documentationBlockAt(line string, position int, syntax Syntax) (BlockSyntax, bool) { - return blockAt(line, position, syntax.Documentation.Block) -} - -// inlineCommentAt reports whether an inline comment starts at the given position. -func inlineCommentAt(line string, position int, syntax Syntax) (string, bool) { - return prefixAt(line, position, syntax.Inline) -} - -// blockCommentAt reports whether a block comment starts at the given position. -func blockCommentAt(line string, position int, syntax Syntax) (BlockSyntax, bool) { - return blockAt(line, position, syntax.Block) -} - // prefixAt reports whether one of the given prefixes starts at the position. func prefixAt(line string, position int, prefixes []string) (string, bool) { for _, prefix := range prefixes { diff --git a/embedding/commentfilter/visual_basic.go b/embedding/commentfilter/visual_basic.go index 888184e..fd05769 100644 --- a/embedding/commentfilter/visual_basic.go +++ b/embedding/commentfilter/visual_basic.go @@ -30,7 +30,7 @@ const ( ) // VisualBasicCommentFilter filters the Visual Basic comment forms: -// - documentation comments starting with `”'`; +// - documentation comments starting with `'''`; // - apostrophe comments starting with `'`; // - REM comments starting with `REM`. type VisualBasicCommentFilter struct{} From 786a6847c793a9708e804d43d9f1ac3e77b61ded Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 11:27:53 +0200 Subject: [PATCH 22/27] Fix naming. --- embedding/commentfilter/filter.go | 2 +- embedding/commentfilter/filter_test.go | 2 +- .../commentfilter/marker_comment_filter.go | 27 ++++++++++-- embedding/commentfilter/mode.go | 22 +++++----- embedding/commentfilter/scanner.go | 42 ------------------- embedding/commentfilter/syntax.go | 28 ++++++------- embedding/commentfilter/visual_basic.go | 6 +-- embedding/parsing/instruction.go | 2 +- 8 files changed, 55 insertions(+), 76 deletions(-) delete mode 100644 embedding/commentfilter/scanner.go diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go index 649d631..5cb82fb 100644 --- a/embedding/commentfilter/filter.go +++ b/embedding/commentfilter/filter.go @@ -22,7 +22,7 @@ package commentfilter func Filter( lines []string, filePath string, - mode CommentFilterMode, + mode Mode, embeddingDocPath string, embeddingLine int, ) []string { diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index c5cb638..b2c0638 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -457,7 +457,7 @@ var _ = Describe("Comment filter", func() { // assertFiltered verifies filtering output for one file path and mode. func assertFiltered( filePath string, - mode CommentFilterMode, + mode Mode, lines []string, expected []string, ) { diff --git a/embedding/commentfilter/marker_comment_filter.go b/embedding/commentfilter/marker_comment_filter.go index 5d6a207..5933649 100644 --- a/embedding/commentfilter/marker_comment_filter.go +++ b/embedding/commentfilter/marker_comment_filter.go @@ -34,7 +34,7 @@ type blockState struct { type markerLineFilter struct { filter MarkerCommentFilter line string - mode CommentFilterMode + mode Mode state *blockState result strings.Builder position int @@ -42,7 +42,7 @@ type markerLineFilter struct { } // Filter removes or preserves recognized comments across all lines. -func (f MarkerCommentFilter) Filter(lines []string, mode CommentFilterMode) []string { +func (f MarkerCommentFilter) Filter(lines []string, mode Mode) []string { var filtered []string state := blockState{} for _, line := range lines { @@ -59,7 +59,7 @@ func (f MarkerCommentFilter) Filter(lines []string, mode CommentFilterMode) []st // filterLine removes or preserves recognized comments from a single source line. func (f MarkerCommentFilter) filterLine( line string, - mode CommentFilterMode, + mode Mode, state *blockState, ) (string, bool) { filter := markerLineFilter{ @@ -129,6 +129,27 @@ func (f *markerLineFilter) consumeQuotedSegment() bool { return true } +// quotedSegmentEnd returns the end offset of a quoted string starting at position. +func quotedSegmentEnd(line string, position int, quoteChars string) int { + if position >= len(line) || !strings.ContainsRune(quoteChars, rune(line[position])) { + return position + } + quote := line[position] + cursor := position + 1 + for cursor < len(line) { + if line[cursor] == '\\' { + cursor += 2 + continue + } + if line[cursor] == quote { + return cursor + 1 + } + cursor++ + } + + return len(line) +} + // consumeComment consumes a comment and reports whether it consumed input and ended the line. func (f *markerLineFilter) consumeComment() (bool, bool) { if _, found := prefixAt(f.line, f.position, f.filter.Syntax.Documentation.Inline); found { diff --git a/embedding/commentfilter/mode.go b/embedding/commentfilter/mode.go index 0245915..b5cadda 100644 --- a/embedding/commentfilter/mode.go +++ b/embedding/commentfilter/mode.go @@ -20,31 +20,31 @@ package commentfilter import "fmt" -// CommentFilterMode controls which source comments are retained in embedded snippets. -type CommentFilterMode string +// Mode CommentFilterMode controls which source comments are retained in embedded snippets. +type Mode string const ( // RetainAll keeps all comments in the embedded source. - RetainAll CommentFilterMode = "all" + RetainAll Mode = "all" // RetainNone removes all comments recognized for the source language. - RetainNone CommentFilterMode = "none" + RetainNone Mode = "none" // RetainDocumentation keeps only API documentation comments. - RetainDocumentation CommentFilterMode = "documentation" + RetainDocumentation Mode = "documentation" // RetainRegular keeps inline and block comments that are not documentation comments. - RetainRegular CommentFilterMode = "regular" + RetainRegular Mode = "regular" // RetainInline keeps only inline comments such as `//` and `#`. - RetainInline CommentFilterMode = "inline" + RetainInline Mode = "inline" // RetainBlock keeps only block comments such as `/* */`. - RetainBlock CommentFilterMode = "block" + RetainBlock Mode = "block" ) // ParseMode converts an embed-code `comments` attribute value into a CommentFilterMode. -func ParseMode(value string) (CommentFilterMode, error) { - switch CommentFilterMode(value) { +func ParseMode(value string) (Mode, error) { + switch Mode(value) { case "": return RetainAll, nil case RetainAll, RetainNone, RetainDocumentation, RetainRegular, RetainInline, RetainBlock: - return CommentFilterMode(value), nil + return Mode(value), nil default: return "", fmt.Errorf("unsupported comments value `%s`; expected one of "+ "`all`, `none`, `documentation`, `regular`, `inline`, or `block`", value) diff --git a/embedding/commentfilter/scanner.go b/embedding/commentfilter/scanner.go deleted file mode 100644 index d349d9a..0000000 --- a/embedding/commentfilter/scanner.go +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2026, TeamDev. All rights reserved. -// -// Redistribution and use in source and/or binary forms, with or without -// modification, must retain the above copyright notice and the following -// disclaimer. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -package commentfilter - -import "strings" - -// quotedSegmentEnd returns the end offset of a quoted string starting at position. -func quotedSegmentEnd(line string, position int, quoteChars string) int { - if position >= len(line) || !strings.ContainsRune(quoteChars, rune(line[position])) { - return position - } - quote := line[position] - cursor := position + 1 - for cursor < len(line) { - if line[cursor] == '\\' { - cursor += 2 - continue - } - if line[cursor] == quote { - return cursor + 1 - } - cursor++ - } - - return len(line) -} diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index 11441d8..66d92bc 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -47,19 +47,19 @@ type Syntax struct { // Filterer removes or preserves source comments according to the requested mode. type Filterer interface { - Filter(lines []string, mode CommentFilterMode) []string + Filter(lines []string, mode Mode) []string } // filterEntry stores a comment filter and the modes that make sense for its language. type filterEntry struct { filter Filterer - usefulModes []CommentFilterMode + usefulModes []Mode } // filterFor returns the comment filter registered for the given file path and warns on odd modes. func filterFor( filePath string, - mode CommentFilterMode, + mode Mode, embeddingDocPath string, embeddingLine int, ) (Filterer, bool) { @@ -146,7 +146,7 @@ var xmlSyntax = Syntax{ QuoteChars: "\"'", } -var allCommentModes = []CommentFilterMode{ +var allCommentModes = []Mode{ RetainAll, RetainNone, RetainDocumentation, @@ -155,16 +155,16 @@ var allCommentModes = []CommentFilterMode{ RetainBlock, } -var allNoneCommentModes = []CommentFilterMode{RetainAll, RetainNone} +var allNoneCommentModes = []Mode{RetainAll, RetainNone} -var inlineBlockCommentModes = []CommentFilterMode{ +var inlineBlockCommentModes = []Mode{ RetainAll, RetainNone, RetainInline, RetainBlock, } -var regularDocCommentModes = []CommentFilterMode{ +var regularDocCommentModes = []Mode{ RetainAll, RetainNone, RetainDocumentation, @@ -227,7 +227,7 @@ var filtersByExtension = map[string]filterEntry{ } // newFilterEntry creates a filter registry entry. -func newFilterEntry(filter Filterer, usefulModes []CommentFilterMode) filterEntry { +func newFilterEntry(filter Filterer, usefulModes []Mode) filterEntry { return filterEntry{ filter: filter, usefulModes: usefulModes, @@ -237,7 +237,7 @@ func newFilterEntry(filter Filterer, usefulModes []CommentFilterMode) filterEntr // warnUnsupportedCommentsMode logs when comments filtering is requested for an unsupported file. func warnUnsupportedCommentsMode( filePath string, - mode CommentFilterMode, + mode Mode, embeddingDocPath string, embeddingLine int, ) { @@ -258,10 +258,10 @@ func warnUnsupportedCommentsMode( // warnUselessCommentsMode logs when the selected mode has no distinct meaning for a file. func warnUselessCommentsMode( filePath string, - mode CommentFilterMode, + mode Mode, embeddingDocPath string, embeddingLine int, - usefulModes []CommentFilterMode, + usefulModes []Mode, ) { if containsMode(usefulModes, mode) { return @@ -294,8 +294,8 @@ func fileURL(path string, line int) string { } // formatModes formats modes for a warning message. -func formatModes(modes []CommentFilterMode) string { - order := []CommentFilterMode{ +func formatModes(modes []Mode) string { + order := []Mode{ RetainAll, RetainNone, RetainDocumentation, @@ -314,7 +314,7 @@ func formatModes(modes []CommentFilterMode) string { } // containsMode reports whether the list includes the given mode. -func containsMode(modes []CommentFilterMode, mode CommentFilterMode) bool { +func containsMode(modes []Mode, mode Mode) bool { for _, usefulMode := range modes { if usefulMode == mode { return true diff --git a/embedding/commentfilter/visual_basic.go b/embedding/commentfilter/visual_basic.go index fd05769..645d61c 100644 --- a/embedding/commentfilter/visual_basic.go +++ b/embedding/commentfilter/visual_basic.go @@ -30,13 +30,13 @@ const ( ) // VisualBasicCommentFilter filters the Visual Basic comment forms: -// - documentation comments starting with `'''`; +// - documentation comments starting with `”'`; // - apostrophe comments starting with `'`; // - REM comments starting with `REM`. type VisualBasicCommentFilter struct{} // Filter removes or preserves Visual Basic comments according to mode. -func (VisualBasicCommentFilter) Filter(lines []string, mode CommentFilterMode) []string { +func (VisualBasicCommentFilter) Filter(lines []string, mode Mode) []string { var filtered []string for _, line := range lines { filteredLine, hadComment := filterVisualBasicLine(line, mode) @@ -50,7 +50,7 @@ func (VisualBasicCommentFilter) Filter(lines []string, mode CommentFilterMode) [ } // filterVisualBasicLine removes or preserves one Visual Basic comment. -func filterVisualBasicLine(line string, mode CommentFilterMode) (string, bool) { +func filterVisualBasicLine(line string, mode Mode) (string, bool) { var result strings.Builder position := 0 for position < len(line) { diff --git a/embedding/parsing/instruction.go b/embedding/parsing/instruction.go index 9c6ea87..13e6089 100644 --- a/embedding/parsing/instruction.go +++ b/embedding/parsing/instruction.go @@ -55,7 +55,7 @@ type Instruction struct { Fragment string StartPattern *Pattern EndPattern *Pattern - CommentMode commentfilter.CommentFilterMode + CommentMode commentfilter.Mode DocumentationFile string DocumentationLine int Configuration configuration.Configuration From d856ee4204128278513a1cd6b0b62de9f7f18845 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 11:35:51 +0200 Subject: [PATCH 23/27] Remove redundant modes formatter. --- embedding/commentfilter/syntax.go | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go index 66d92bc..0606564 100644 --- a/embedding/commentfilter/syntax.go +++ b/embedding/commentfilter/syntax.go @@ -266,6 +266,11 @@ func warnUselessCommentsMode( if containsMode(usefulModes, mode) { return } + var wrappedModes []string + for _, mode := range usefulModes { + wrappedModes = append(wrappedModes, fmt.Sprintf("`%s`", mode)) + } + slog.Warn( fmt.Sprintf( "`comments=\"%s\"` was requested in `%s` for `%s`, but this mode does not have "+ @@ -273,7 +278,7 @@ func warnUselessCommentsMode( mode, fileURL(embeddingDocPath, embeddingLine), filePath, - formatModes(usefulModes), + strings.Join(wrappedModes, ", "), ), ) } @@ -293,26 +298,6 @@ func fileURL(path string, line int) string { return url } -// formatModes formats modes for a warning message. -func formatModes(modes []Mode) string { - order := []Mode{ - RetainAll, - RetainNone, - RetainDocumentation, - RetainRegular, - RetainInline, - RetainBlock, - } - var result []string - for _, mode := range order { - if containsMode(modes, mode) { - result = append(result, fmt.Sprintf("`%s`", mode)) - } - } - - return strings.Join(result, ", ") -} - // containsMode reports whether the list includes the given mode. func containsMode(modes []Mode, mode Mode) bool { for _, usefulMode := range modes { From ce6bd709c436d1fd120448349822d338f318ead3 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 12:18:23 +0200 Subject: [PATCH 24/27] Improve readability. --- embedding/commentfilter/config.go | 181 ++++++++++ embedding/commentfilter/filter.go | 110 +++++++ .../commentfilter/marker_comment_filter.go | 32 +- embedding/commentfilter/mode.go | 4 +- embedding/commentfilter/syntax.go | 310 ------------------ 5 files changed, 319 insertions(+), 318 deletions(-) create mode 100644 embedding/commentfilter/config.go delete mode 100644 embedding/commentfilter/syntax.go diff --git a/embedding/commentfilter/config.go b/embedding/commentfilter/config.go new file mode 100644 index 0000000..499e0d8 --- /dev/null +++ b/embedding/commentfilter/config.go @@ -0,0 +1,181 @@ +// Copyright 2026, TeamDev. All rights reserved. +// +// Redistribution and use in source and/or binary forms, with or without +// modification, must retain the above copyright notice and the following +// disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package commentfilter + +// filtersByExtension is a mapping of the file extension to its comment filter. +var filtersByExtension = map[string]filterEntry{ + // Java/Kotlin + ".java": filterConfig(MarkerCommentFilter{Syntax: javaSyntax}, allModes), + ".kt": filterConfig(MarkerCommentFilter{Syntax: javaSyntax}, allModes), + ".kts": filterConfig(MarkerCommentFilter{Syntax: javaSyntax}, allModes), + ".groovy": filterConfig(MarkerCommentFilter{Syntax: javaSyntax}, allModes), + + // C# + ".cs": filterConfig(MarkerCommentFilter{Syntax: csharpSyntax}, allModes), + + // C/C++ + ".c": filterConfig(MarkerCommentFilter{Syntax: cStyleSyntax}, regularModes), + ".h": filterConfig(MarkerCommentFilter{Syntax: cStyleSyntax}, regularModes), + ".cc": filterConfig(MarkerCommentFilter{Syntax: cStyleSyntax}, regularModes), + ".cpp": filterConfig(MarkerCommentFilter{Syntax: cStyleSyntax}, regularModes), + ".cxx": filterConfig(MarkerCommentFilter{Syntax: cStyleSyntax}, regularModes), + ".hh": filterConfig(MarkerCommentFilter{Syntax: cStyleSyntax}, regularModes), + ".hpp": filterConfig(MarkerCommentFilter{Syntax: cStyleSyntax}, regularModes), + ".hxx": filterConfig(MarkerCommentFilter{Syntax: cStyleSyntax}, regularModes), + + // JavaScript + ".js": filterConfig(MarkerCommentFilter{Syntax: jsSyntax}, allModes), + ".jsx": filterConfig(MarkerCommentFilter{Syntax: jsSyntax}, allModes), + ".ts": filterConfig(MarkerCommentFilter{Syntax: jsSyntax}, allModes), + ".tsx": filterConfig(MarkerCommentFilter{Syntax: jsSyntax}, allModes), + + // Go + ".go": filterConfig(MarkerCommentFilter{Syntax: goSyntax}, regularModes), + + // Protobuf + ".proto": filterConfig(MarkerCommentFilter{Syntax: cStyleSyntax}, regularModes), + + // Python + ".py": filterConfig(MarkerCommentFilter{Syntax: hashLineSyntax}, noneMode), + ".pyi": filterConfig(MarkerCommentFilter{Syntax: hashLineSyntax}, noneMode), + ".pyw": filterConfig(MarkerCommentFilter{Syntax: hashLineSyntax}, noneMode), + + // YAML + ".yml": filterConfig(MarkerCommentFilter{Syntax: hashLineSyntax}, noneMode), + ".yaml": filterConfig(MarkerCommentFilter{Syntax: hashLineSyntax}, noneMode), + + // XML + ".xml": filterConfig(MarkerCommentFilter{Syntax: xmlSyntax}, noneMode), + + // HTML + ".html": filterConfig(MarkerCommentFilter{Syntax: xmlSyntax}, noneMode), + ".htm": filterConfig(MarkerCommentFilter{Syntax: xmlSyntax}, noneMode), + + // Visual Basic + ".vb": filterConfig(VisualBasicCommentFilter{}, documentationModes), + ".bas": filterConfig(VisualBasicCommentFilter{}, documentationModes), + ".vbs": filterConfig(VisualBasicCommentFilter{}, documentationModes), + ".vbscript": filterConfig(VisualBasicCommentFilter{}, documentationModes), +} + +// Filterer removes or preserves source comments according to the requested mode. +type Filterer interface { + Filter(lines []string, mode Mode) []string +} + +// filterEntry stores a comment filter and supported modes for its language. +type filterEntry struct { + filter Filterer + supportedModes []Mode +} + +var javaSyntax = CommentMarker{ + Inline: []string{"//"}, + Block: []BlockMarker{ + {Start: "/*", End: "*/"}, + }, + Documentation: DocumentationMarker{ + Block: []BlockMarker{{Start: "/**", End: "*/"}}, + }, + QuoteChars: "\"'", +} + +var jsSyntax = CommentMarker{ + Inline: []string{"//"}, + Block: []BlockMarker{ + {Start: "/*", End: "*/"}, + }, + Documentation: DocumentationMarker{ + Block: []BlockMarker{{Start: "/**", End: "*/"}}, + }, + QuoteChars: "\"'`", +} + +var csharpSyntax = CommentMarker{ + Inline: []string{"//"}, + Block: []BlockMarker{ + {Start: "/*", End: "*/"}, + }, + Documentation: DocumentationMarker{ + Inline: []string{"///"}, + Block: []BlockMarker{{Start: "/**", End: "*/"}}, + }, + QuoteChars: "\"'`", +} + +var cStyleSyntax = CommentMarker{ + Inline: []string{"//"}, + Block: []BlockMarker{ + {Start: "/*", End: "*/"}, + }, + QuoteChars: "\"'", +} + +var goSyntax = CommentMarker{ + Inline: []string{"//"}, + Block: []BlockMarker{ + {Start: "/*", End: "*/"}, + }, + QuoteChars: "\"'`", +} + +var hashLineSyntax = CommentMarker{ + Inline: []string{"#"}, + QuoteChars: "\"'", +} + +var xmlSyntax = CommentMarker{ + Block: []BlockMarker{ + {Start: ""}, + }, + QuoteChars: "\"'", +} + +var allModes = []Mode{ + RetainAll, + RetainNone, + RetainDocumentation, + RetainRegular, + RetainInline, + RetainBlock, +} + +var noneMode = []Mode{RetainAll, RetainNone} + +var regularModes = []Mode{ + RetainAll, + RetainNone, + RetainInline, + RetainBlock, +} + +var documentationModes = []Mode{ + RetainAll, + RetainNone, + RetainDocumentation, + RetainRegular, +} + +// filterConfig creates a filter registry entry. +func filterConfig(filter Filterer, supportedModes []Mode) filterEntry { + return filterEntry{ + filter: filter, + supportedModes: supportedModes, + } +} diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go index 5cb82fb..20c82e8 100644 --- a/embedding/commentfilter/filter.go +++ b/embedding/commentfilter/filter.go @@ -18,6 +18,13 @@ package commentfilter +import ( + "fmt" + "log/slog" + "path/filepath" + "strings" +) + // Filter returns source lines with comments stripped according to the requested mode. func Filter( lines []string, @@ -36,3 +43,106 @@ func Filter( return filter.Filter(lines, mode) } + +// filterFor returns the comment filter registered for the given file path and warns on odd modes. +func filterFor( + filePath string, + mode Mode, + embeddingDocPath string, + embeddingLine int, +) (Filterer, bool) { + extension := normalizeExtension(filepath.Ext(filePath)) + entry, found := filtersByExtension[extension] + if !found { + warnUnsupportedFileType(filePath, mode, embeddingDocPath, embeddingLine) + return nil, false + } + warnUnsupportedCommentsMode(filePath, mode, embeddingDocPath, embeddingLine, entry.supportedModes) + + return entry.filter, true +} + +// normalizeExtension returns a lowercase file extension with a leading dot. +func normalizeExtension(extension string) string { + normalized := strings.ToLower(extension) + if normalized == "" || strings.HasPrefix(normalized, ".") { + return normalized + } + + return "." + normalized +} + +// warnUnsupportedFileType logs when comments filtering is requested for an unsupported file. +func warnUnsupportedFileType( + filePath string, + mode Mode, + embeddingDocPath string, + embeddingLine int, +) { + if mode == RetainAll { + return + } + slog.Warn( + fmt.Sprintf( + "`comments=\"%s\"` was requested in `%s` for `%s`, "+ + "but comment filtering is not supported for this file extension.", + mode, + fileURL(embeddingDocPath, embeddingLine), + filePath, + ), + ) +} + +// warnUnsupportedCommentsMode logs when the selected mode is not supported for a file. +func warnUnsupportedCommentsMode( + filePath string, + mode Mode, + embeddingDocPath string, + embeddingLine int, + usefulModes []Mode, +) { + if containsMode(usefulModes, mode) { + return + } + var wrappedModes []string + for _, mode := range usefulModes { + wrappedModes = append(wrappedModes, fmt.Sprintf("`%s`", mode)) + } + + slog.Warn( + fmt.Sprintf( + "`comments=\"%s\"` was requested in `%s` for `%s`, but this mode does not have "+ + "a distinct meaning for this file type. Supported modes are: %s.", + mode, + fileURL(embeddingDocPath, embeddingLine), + filePath, + strings.Join(wrappedModes, ", "), + ), + ) +} + +// fileURL returns an absolute file URL for a local path and line. +func fileURL(path string, line int) string { + absolutePath, err := filepath.Abs(path) + if err != nil { + return "file://" + path + } + + url := "file://" + absolutePath + if line > 0 { + url = fmt.Sprintf("%s:%d", url, line) + } + + return url +} + +// containsMode reports whether the list includes the given mode. +func containsMode(modes []Mode, mode Mode) bool { + for _, usefulMode := range modes { + if usefulMode == mode { + return true + } + } + + return false +} diff --git a/embedding/commentfilter/marker_comment_filter.go b/embedding/commentfilter/marker_comment_filter.go index 5933649..75a4b3c 100644 --- a/embedding/commentfilter/marker_comment_filter.go +++ b/embedding/commentfilter/marker_comment_filter.go @@ -20,14 +20,34 @@ package commentfilter import "strings" -// MarkerCommentFilter removes comments using lexical markers declared in Syntax. +// BlockMarker describes a block comment marker pair. +type BlockMarker struct { + Start string + End string +} + +// DocumentationMarker describes API documentation comment markers. +type DocumentationMarker struct { + Inline []string + Block []BlockMarker +} + +// CommentMarker describes lexical comment markers and string delimiters for a language family. +type CommentMarker struct { + Inline []string + Block []BlockMarker + Documentation DocumentationMarker + QuoteChars string +} + +// MarkerCommentFilter removes comments using lexical markers declared in CommentMarker. type MarkerCommentFilter struct { - Syntax Syntax + Syntax CommentMarker } type blockState struct { active bool - block BlockSyntax + block BlockMarker keep bool } @@ -182,7 +202,7 @@ func (f *markerLineFilter) consumeInlineComment(keep bool) { } // startBlockComment records the active block comment markers and whether to keep them. -func (f *markerLineFilter) startBlockComment(block BlockSyntax, keep bool) { +func (f *markerLineFilter) startBlockComment(block BlockMarker, keep bool) { f.hadComment = true f.state.active = true f.state.block = block @@ -207,12 +227,12 @@ func prefixAt(line string, position int, prefixes []string) (string, bool) { } // blockAt reports whether one of the given block markers starts at the position. -func blockAt(line string, position int, blocks []BlockSyntax) (BlockSyntax, bool) { +func blockAt(line string, position int, blocks []BlockMarker) (BlockMarker, bool) { for _, block := range blocks { if strings.HasPrefix(line[position:], block.Start) { return block, true } } - return BlockSyntax{}, false + return BlockMarker{}, false } diff --git a/embedding/commentfilter/mode.go b/embedding/commentfilter/mode.go index b5cadda..9986471 100644 --- a/embedding/commentfilter/mode.go +++ b/embedding/commentfilter/mode.go @@ -20,7 +20,7 @@ package commentfilter import "fmt" -// Mode CommentFilterMode controls which source comments are retained in embedded snippets. +// Mode controls which source comments are affected by the comment filter. type Mode string const ( @@ -38,7 +38,7 @@ const ( RetainBlock Mode = "block" ) -// ParseMode converts an embed-code `comments` attribute value into a CommentFilterMode. +// ParseMode converts an embed-code `comments` attribute value into a comment filter Mode. func ParseMode(value string) (Mode, error) { switch Mode(value) { case "": diff --git a/embedding/commentfilter/syntax.go b/embedding/commentfilter/syntax.go deleted file mode 100644 index 0606564..0000000 --- a/embedding/commentfilter/syntax.go +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright 2026, TeamDev. All rights reserved. -// -// Redistribution and use in source and/or binary forms, with or without -// modification, must retain the above copyright notice and the following -// disclaimer. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -package commentfilter - -import ( - "fmt" - "log/slog" - "path/filepath" - "strings" -) - -// BlockSyntax describes a block comment marker pair. -type BlockSyntax struct { - Start string - End string -} - -// DocumentationSyntax describes API documentation comment markers. -type DocumentationSyntax struct { - Inline []string - Block []BlockSyntax -} - -// Syntax describes lexical comment markers and string delimiters for a language family. -type Syntax struct { - Inline []string - Block []BlockSyntax - Documentation DocumentationSyntax - QuoteChars string -} - -// Filterer removes or preserves source comments according to the requested mode. -type Filterer interface { - Filter(lines []string, mode Mode) []string -} - -// filterEntry stores a comment filter and the modes that make sense for its language. -type filterEntry struct { - filter Filterer - usefulModes []Mode -} - -// filterFor returns the comment filter registered for the given file path and warns on odd modes. -func filterFor( - filePath string, - mode Mode, - embeddingDocPath string, - embeddingLine int, -) (Filterer, bool) { - extension := normalizeExtension(filepath.Ext(filePath)) - entry, found := filtersByExtension[extension] - if !found { - warnUnsupportedCommentsMode(filePath, mode, embeddingDocPath, embeddingLine) - return nil, false - } - warnUselessCommentsMode(filePath, mode, embeddingDocPath, embeddingLine, entry.usefulModes) - - return entry.filter, true -} - -// normalizeExtension returns a lowercase file extension with a leading dot. -func normalizeExtension(extension string) string { - normalized := strings.ToLower(extension) - if normalized == "" || strings.HasPrefix(normalized, ".") { - return normalized - } - - return "." + normalized -} - -var javaSyntax = Syntax{ - Inline: []string{"//"}, - Block: []BlockSyntax{ - {Start: "/*", End: "*/"}, - }, - Documentation: DocumentationSyntax{ - Block: []BlockSyntax{{Start: "/**", End: "*/"}}, - }, - QuoteChars: "\"'", -} - -var jsSyntax = Syntax{ - Inline: []string{"//"}, - Block: []BlockSyntax{ - {Start: "/*", End: "*/"}, - }, - Documentation: DocumentationSyntax{ - Block: []BlockSyntax{{Start: "/**", End: "*/"}}, - }, - QuoteChars: "\"'`", -} - -var csharpSyntax = Syntax{ - Inline: []string{"//"}, - Block: []BlockSyntax{ - {Start: "/*", End: "*/"}, - }, - Documentation: DocumentationSyntax{ - Inline: []string{"///"}, - Block: []BlockSyntax{{Start: "/**", End: "*/"}}, - }, - QuoteChars: "\"'`", -} - -var cStyleSyntax = Syntax{ - Inline: []string{"//"}, - Block: []BlockSyntax{ - {Start: "/*", End: "*/"}, - }, - QuoteChars: "\"'", -} - -var goSyntax = Syntax{ - Inline: []string{"//"}, - Block: []BlockSyntax{ - {Start: "/*", End: "*/"}, - }, - QuoteChars: "\"'`", -} - -var hashLineSyntax = Syntax{ - Inline: []string{"#"}, - QuoteChars: "\"'", -} - -var xmlSyntax = Syntax{ - Block: []BlockSyntax{ - {Start: ""}, - }, - QuoteChars: "\"'", -} - -var allCommentModes = []Mode{ - RetainAll, - RetainNone, - RetainDocumentation, - RetainRegular, - RetainInline, - RetainBlock, -} - -var allNoneCommentModes = []Mode{RetainAll, RetainNone} - -var inlineBlockCommentModes = []Mode{ - RetainAll, - RetainNone, - RetainInline, - RetainBlock, -} - -var regularDocCommentModes = []Mode{ - RetainAll, - RetainNone, - RetainDocumentation, - RetainRegular, -} - -var filtersByExtension = map[string]filterEntry{ - // Java/Kotlin - ".java": newFilterEntry(MarkerCommentFilter{Syntax: javaSyntax}, allCommentModes), - ".kt": newFilterEntry(MarkerCommentFilter{Syntax: javaSyntax}, allCommentModes), - ".kts": newFilterEntry(MarkerCommentFilter{Syntax: javaSyntax}, allCommentModes), - ".groovy": newFilterEntry(MarkerCommentFilter{Syntax: javaSyntax}, allCommentModes), - - // C# - ".cs": newFilterEntry(MarkerCommentFilter{Syntax: csharpSyntax}, allCommentModes), - - // C/C++ - ".c": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), - ".h": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), - ".cc": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), - ".cpp": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), - ".cxx": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), - ".hh": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), - ".hpp": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), - ".hxx": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), - - // JavaScript - ".js": newFilterEntry(MarkerCommentFilter{Syntax: jsSyntax}, allCommentModes), - ".jsx": newFilterEntry(MarkerCommentFilter{Syntax: jsSyntax}, allCommentModes), - ".ts": newFilterEntry(MarkerCommentFilter{Syntax: jsSyntax}, allCommentModes), - ".tsx": newFilterEntry(MarkerCommentFilter{Syntax: jsSyntax}, allCommentModes), - - // Go - ".go": newFilterEntry(MarkerCommentFilter{Syntax: goSyntax}, inlineBlockCommentModes), - - // Protobuf - ".proto": newFilterEntry(MarkerCommentFilter{Syntax: cStyleSyntax}, inlineBlockCommentModes), - - // Python - ".py": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), - ".pyi": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), - ".pyw": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), - - // YAML - ".yml": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), - ".yaml": newFilterEntry(MarkerCommentFilter{Syntax: hashLineSyntax}, allNoneCommentModes), - - // XML - ".xml": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allNoneCommentModes), - - // HTML - ".html": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allNoneCommentModes), - ".htm": newFilterEntry(MarkerCommentFilter{Syntax: xmlSyntax}, allNoneCommentModes), - - // Visual Basic - ".vb": newFilterEntry(VisualBasicCommentFilter{}, regularDocCommentModes), - ".bas": newFilterEntry(VisualBasicCommentFilter{}, regularDocCommentModes), - ".vbs": newFilterEntry(VisualBasicCommentFilter{}, regularDocCommentModes), - ".vbscript": newFilterEntry(VisualBasicCommentFilter{}, regularDocCommentModes), -} - -// newFilterEntry creates a filter registry entry. -func newFilterEntry(filter Filterer, usefulModes []Mode) filterEntry { - return filterEntry{ - filter: filter, - usefulModes: usefulModes, - } -} - -// warnUnsupportedCommentsMode logs when comments filtering is requested for an unsupported file. -func warnUnsupportedCommentsMode( - filePath string, - mode Mode, - embeddingDocPath string, - embeddingLine int, -) { - if mode == RetainAll { - return - } - slog.Warn( - fmt.Sprintf( - "`comments=\"%s\"` was requested in `%s` for `%s`, "+ - "but comment filtering is not supported for this file extension.", - mode, - fileURL(embeddingDocPath, embeddingLine), - filePath, - ), - ) -} - -// warnUselessCommentsMode logs when the selected mode has no distinct meaning for a file. -func warnUselessCommentsMode( - filePath string, - mode Mode, - embeddingDocPath string, - embeddingLine int, - usefulModes []Mode, -) { - if containsMode(usefulModes, mode) { - return - } - var wrappedModes []string - for _, mode := range usefulModes { - wrappedModes = append(wrappedModes, fmt.Sprintf("`%s`", mode)) - } - - slog.Warn( - fmt.Sprintf( - "`comments=\"%s\"` was requested in `%s` for `%s`, but this mode does not have "+ - "a distinct meaning for this file type. Supported modes are: %s.", - mode, - fileURL(embeddingDocPath, embeddingLine), - filePath, - strings.Join(wrappedModes, ", "), - ), - ) -} - -// fileURL returns an absolute file URL for a local path and line. -func fileURL(path string, line int) string { - absolutePath, err := filepath.Abs(path) - if err != nil { - return "file://" + path - } - - url := "file://" + absolutePath - if line > 0 { - url = fmt.Sprintf("%s:%d", url, line) - } - - return url -} - -// containsMode reports whether the list includes the given mode. -func containsMode(modes []Mode, mode Mode) bool { - for _, usefulMode := range modes { - if usefulMode == mode { - return true - } - } - - return false -} From 697c66d1feea8a91d8a187d68a966bbf8daa5f52 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 12:31:38 +0200 Subject: [PATCH 25/27] Improve documentation. --- embedding/commentfilter/config.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/embedding/commentfilter/config.go b/embedding/commentfilter/config.go index 499e0d8..99541a9 100644 --- a/embedding/commentfilter/config.go +++ b/embedding/commentfilter/config.go @@ -147,6 +147,7 @@ var xmlSyntax = CommentMarker{ QuoteChars: "\"'", } +// allModes lists all comment filtering modes. var allModes = []Mode{ RetainAll, RetainNone, @@ -156,8 +157,11 @@ var allModes = []Mode{ RetainBlock, } +// noneMode lists modes for languages whose comments are not separated into supported subtypes. var noneMode = []Mode{RetainAll, RetainNone} +// regularModes lists modes for languages that distinguish inline and block comments, +// but do not expose documentation comments as a separate supported type. var regularModes = []Mode{ RetainAll, RetainNone, @@ -165,6 +169,8 @@ var regularModes = []Mode{ RetainBlock, } +// documentationModes lists modes for languages that distinguish documentation and regular comments, +// but do not expose inline and block comments as separate supported types. var documentationModes = []Mode{ RetainAll, RetainNone, From a90a6f712748e5a646a0f80cfa4655c8a5a1f6a2 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 12:45:54 +0200 Subject: [PATCH 26/27] Improve readability. --- embedding/commentfilter/config.go | 9 ++------- embedding/commentfilter/filter.go | 27 +++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/embedding/commentfilter/config.go b/embedding/commentfilter/config.go index 99541a9..913a63d 100644 --- a/embedding/commentfilter/config.go +++ b/embedding/commentfilter/config.go @@ -74,14 +74,9 @@ var filtersByExtension = map[string]filterEntry{ ".vbscript": filterConfig(VisualBasicCommentFilter{}, documentationModes), } -// Filterer removes or preserves source comments according to the requested mode. -type Filterer interface { - Filter(lines []string, mode Mode) []string -} - // filterEntry stores a comment filter and supported modes for its language. type filterEntry struct { - filter Filterer + filter CommentFilter supportedModes []Mode } @@ -179,7 +174,7 @@ var documentationModes = []Mode{ } // filterConfig creates a filter registry entry. -func filterConfig(filter Filterer, supportedModes []Mode) filterEntry { +func filterConfig(filter CommentFilter, supportedModes []Mode) filterEntry { return filterEntry{ filter: filter, supportedModes: supportedModes, diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go index 20c82e8..2148a23 100644 --- a/embedding/commentfilter/filter.go +++ b/embedding/commentfilter/filter.go @@ -25,6 +25,18 @@ import ( "strings" ) +// EmbeddingCommentFilter filters comments for one embed-code instruction. +type EmbeddingCommentFilter struct { + filePath string + embeddingDocPath string + embeddingLine int +} + +// CommentFilter strips source comments according to the requested mode. +type CommentFilter interface { + Filter(lines []string, mode Mode) []string +} + // Filter returns source lines with comments stripped according to the requested mode. func Filter( lines []string, @@ -33,10 +45,21 @@ func Filter( embeddingDocPath string, embeddingLine int, ) []string { + filter := EmbeddingCommentFilter{ + filePath: filePath, + embeddingDocPath: embeddingDocPath, + embeddingLine: embeddingLine, + } + + return filter.Filter(lines, mode) +} + +// Filter strips comments using the filter registered in the filtersByExtension. +func (f EmbeddingCommentFilter) Filter(lines []string, mode Mode) []string { if mode == RetainAll { return lines } - filter, found := filterFor(filePath, mode, embeddingDocPath, embeddingLine) + filter, found := filterFor(f.filePath, mode, f.embeddingDocPath, f.embeddingLine) if !found { return lines } @@ -50,7 +73,7 @@ func filterFor( mode Mode, embeddingDocPath string, embeddingLine int, -) (Filterer, bool) { +) (CommentFilter, bool) { extension := normalizeExtension(filepath.Ext(filePath)) entry, found := filtersByExtension[extension] if !found { From e4461ff003cdebc266c30ba1fe7397e6ac1f1799 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Wed, 20 May 2026 13:01:15 +0200 Subject: [PATCH 27/27] Improve readability. --- EMBEDDING.md | 4 ++-- embedding/commentfilter/filter.go | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/EMBEDDING.md b/EMBEDDING.md index 70a3f9f..a83082b 100644 --- a/EMBEDDING.md +++ b/EMBEDDING.md @@ -131,9 +131,9 @@ Unknown extensions are embedded unchanged. Not all languages has difference between documentation/regular or inline/block comments. -The table below lists the supported languages and useful `comments` modes for them: +The table below lists the supported languages and supported `comments` modes for them: -| Language | Extensions | Useful `comments` modes | +| Language | Extensions | Supported `comments` modes | |------------------------|---------------------------------------------------------|--------------------------------------------------------------| | Java, Kotlin, Groovy | `.java`, `.kt`, `.kts`, `.groovy` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | | C# | `.cs` | `all`, `none`, `documentation`, `regular`, `inline`, `block` | diff --git a/embedding/commentfilter/filter.go b/embedding/commentfilter/filter.go index 2148a23..39544db 100644 --- a/embedding/commentfilter/filter.go +++ b/embedding/commentfilter/filter.go @@ -122,13 +122,13 @@ func warnUnsupportedCommentsMode( mode Mode, embeddingDocPath string, embeddingLine int, - usefulModes []Mode, + supportedModes []Mode, ) { - if containsMode(usefulModes, mode) { + if containsMode(supportedModes, mode) { return } var wrappedModes []string - for _, mode := range usefulModes { + for _, mode := range supportedModes { wrappedModes = append(wrappedModes, fmt.Sprintf("`%s`", mode)) } @@ -161,8 +161,8 @@ func fileURL(path string, line int) string { // containsMode reports whether the list includes the given mode. func containsMode(modes []Mode, mode Mode) bool { - for _, usefulMode := range modes { - if usefulMode == mode { + for _, supportedMode := range modes { + if supportedMode == mode { return true } }