From c4bced20d5b3bf99d16f9d17fab1cd086ab7c519 Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 14:28:58 -0400 Subject: [PATCH 01/38] Add multi-language support design doc and implementation checklist Co-Authored-By: Claude Opus 4.7 (1M context) --- MULTI_LANGUAGE_SUPPORT.md | 629 ++++++++++++++++++++++++++++++++ docs/rust-typescript-support.md | 413 +++++++++++++++++++++ 2 files changed, 1042 insertions(+) create mode 100644 MULTI_LANGUAGE_SUPPORT.md create mode 100644 docs/rust-typescript-support.md diff --git a/MULTI_LANGUAGE_SUPPORT.md b/MULTI_LANGUAGE_SUPPORT.md new file mode 100644 index 0000000..a1a0a89 --- /dev/null +++ b/MULTI_LANGUAGE_SUPPORT.md @@ -0,0 +1,629 @@ +# Multi-Language Support Guide + +A comprehensive checklist for adding new language support to diffguard. This document covers the one-time repo reorganization needed to enable multi-language support, defines the interfaces each language must implement, and provides a reusable per-language checklist. + +--- + +## Table of Contents + +1. [Architecture Overview](#architecture-overview) +2. [Repo Reorganization (One-Time)](#repo-reorganization-one-time) +3. [Interface Definitions](#interface-definitions) +4. [Per-Language Implementation Checklist](#per-language-implementation-checklist) +5. [Language-Specific Notes](#language-specific-notes) +6. [Key Design Decisions](#key-design-decisions) + +--- + +## Architecture Overview + +### What's Already Language-Agnostic + +These components work for any language with zero changes: + +| Component | Location | What It Does | +|-----------|----------|--------------| +| Report types | `internal/report/report.go` | `Finding`, `Section`, `Severity`, text/JSON output | +| Tier classification | `internal/mutation/tiers.go` | Groups mutation operators into Tier 1/2/3 by name | +| Graph algorithms | `internal/deps/deps.go` | Cycle detection, afferent/efferent coupling, instability, SDP violations | +| Git churn counting | `internal/churn/churn.go` | `git log --oneline --follow` to count commits per file | +| Diff format parsing | `internal/diff/diff.go` | Unified diff hunk header parsing (`@@ -a,b +c,d @@`) | +| CLI/config | `cmd/diffguard/main.go` | Flag parsing, exit code logic, analyzer orchestration | + +### What's Tightly Coupled to Go + +Every item below must be abstracted behind an interface and re-implemented per language: + +| Concern | Current Location | Go-Specific Mechanism | +|---------|------------------|-----------------------| +| File filtering | `diff/diff.go:92,175-177,201-208` | Hardcoded `*.go` glob, `_test.go` exclusion | +| Function identification | `sizes/sizes.go`, `complexity/complexity.go`, `churn/churn.go` | `*ast.FuncDecl` + receiver detection (duplicated 3x) | +| Complexity scoring | `complexity/complexity.go` | Walks `IfStmt`, `ForStmt`, `SwitchStmt`, `SelectStmt`, etc. | +| Import parsing | `deps/deps.go` | `parser.ParseDir()` + `go.mod` module path extraction | +| Mutation generation | `mutation/generate.go` | Go AST node pattern matching for 8 operator types | +| Mutation application | `mutation/apply.go` | Go AST rewriting + `go/printer` | +| Disable annotations | `mutation/annotations.go` | Scans Go comments + `*ast.FuncDecl` ranges | +| Test execution | `mutation/mutation.go` | `go test -overlay` (Go build system feature) | + +--- + +## Repo Reorganization (One-Time) + +These steps prepare the repo structure for multiple languages. Each step must leave all existing tests passing. + +### Step 1: Create the language abstraction layer + +- [ ] Create `internal/lang/lang.go` with all interface definitions (see [Interface Definitions](#interface-definitions)) +- [ ] Create `internal/lang/detect.go` with language auto-detection logic +- [ ] Create `internal/lang/registry.go` with a `Register()`/`Get()`/`All()` registry + +### Step 2: Extract Go file filtering + +- [ ] Create `internal/lang/goanalyzer/` package +- [ ] Implement `FileFilter` for Go (extensions: `.go`, test exclusion: `_test.go`, diff globs: `*.go`) +- [ ] Modify `diff.Parse()` and `diff.CollectPaths()` to accept a `FileFilter` parameter instead of hardcoded `.go` checks +- [ ] Update all callers in `cmd/diffguard/main.go` to pass the Go file filter + +### Step 3: Extract Go function extraction + +- [ ] Move function identification logic from `sizes.go`, `complexity.go`, and `churn.go` into `internal/lang/goanalyzer/parse.go` +- [ ] Consolidate the three duplicate `funcName()` implementations into one shared helper +- [ ] Implement `FunctionExtractor` interface for Go +- [ ] Modify `internal/sizes/sizes.go` to call through the interface + +### Step 4: Extract Go complexity scoring + +- [ ] Implement `ComplexityCalculator` interface for Go in `internal/lang/goanalyzer/complexity.go` +- [ ] Implement `ComplexityScorer` interface for Go (can share implementation with `ComplexityCalculator`) +- [ ] Modify `internal/complexity/complexity.go` to call through the interface +- [ ] Modify `internal/churn/churn.go` to call through the `ComplexityScorer` interface +- [ ] Delete the duplicated simplified `computeComplexity()` in churn + +### Step 5: Extract Go import resolution + +- [ ] Implement `ImportResolver` interface for Go in `internal/lang/goanalyzer/deps.go` +- [ ] Split `internal/deps/deps.go` into `graph.go` (pure algorithms) and `deps.go` (orchestration) +- [ ] Modify `deps.go` orchestration to call through the interface + +### Step 6: Extract Go mutation interfaces + +- [ ] Implement `MutantGenerator` in `internal/lang/goanalyzer/mutation_generate.go` +- [ ] Implement `MutantApplier` in `internal/lang/goanalyzer/mutation_apply.go` +- [ ] Implement `AnnotationScanner` in `internal/lang/goanalyzer/mutation_annotate.go` +- [ ] Implement `TestRunner` in `internal/lang/goanalyzer/testrunner.go` +- [ ] Modify `internal/mutation/` to call through interfaces +- [ ] Keep `tiers.go` in `internal/mutation/` (it's already language-agnostic) + +### Step 7: Wire up registration and detection + +- [ ] Add `init()` function to `internal/lang/goanalyzer/` that calls `lang.Register()` +- [ ] Add blank import `_ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer"` in `cmd/diffguard/main.go` +- [ ] Add `--language` CLI flag (default: auto-detect) +- [ ] Modify `cmd/diffguard/main.go` to resolve language and pass it through the analyzer pipeline +- [ ] Add tests for language detection and registration + +### Resulting directory structure + +``` +internal/ + lang/ + lang.go # Interface definitions + detect.go # Auto-detection from file extensions / manifest files + registry.go # Register/Get/All + goanalyzer/ # Go implementation + goanalyzer.go # init() + Language interface impl + parse.go # Shared Go AST helpers (funcName, etc.) + complexity.go # ComplexityCalculator + ComplexityScorer + sizes.go # FunctionExtractor + deps.go # ImportResolver + mutation_generate.go # MutantGenerator + mutation_apply.go # MutantApplier + mutation_annotate.go # AnnotationScanner + testrunner.go # TestRunner (go test -overlay) + diff/ # Modified: parameterized file filtering + complexity/ # Modified: delegates to lang.ComplexityCalculator + sizes/ # Modified: delegates to lang.FunctionExtractor + deps/ + graph.go # Pure graph algorithms (extracted, unchanged) + deps.go # Orchestration, delegates to lang.ImportResolver + churn/ # Modified: delegates to lang.ComplexityScorer + mutation/ # Modified: delegates to lang interfaces + tiers.go # Unchanged (already language-agnostic) + report/ # Unchanged +``` + +--- + +## Interface Definitions + +Each language implementation must satisfy a top-level `Language` interface that provides access to all sub-interfaces. + +### Language (top-level) + +``` +Language + Name() string -- identifier: "go", "python", "typescript", etc. + FileFilter() FileFilter -- which files belong to this language + ComplexityCalculator() ComplexityCalculator + FunctionExtractor() FunctionExtractor + ImportResolver() ImportResolver + ComplexityScorer() ComplexityScorer + MutantGenerator() MutantGenerator + MutantApplier() MutantApplier + AnnotationScanner() AnnotationScanner + TestRunner() TestRunner +``` + +### FileFilter + +Controls which files the diff parser includes and which are excluded as test files. + +``` +FileFilter + Extensions []string -- source extensions incl. dot: [".go"], [".py"], [".ts", ".tsx"] + IsTestFile func(path string) bool -- returns true for test files to exclude from analysis + DiffGlobs []string -- globs passed to `git diff -- ` +``` + +### FunctionExtractor + +Parses source files, finds function/method declarations, reports their line ranges and sizes. + +``` +FunctionExtractor + ExtractFunctions(absPath, FileChange) -> ([]FunctionSize, *FileSize, error) + +FunctionInfo { File, Line, EndLine, Name } +FunctionSize { FunctionInfo, Lines } +FileSize { Path, Lines } +``` + +### ComplexityCalculator + +Computes cognitive complexity per function using the language's control flow constructs. + +``` +ComplexityCalculator + AnalyzeFile(absPath, FileChange) -> ([]FunctionComplexity, error) + +FunctionComplexity { FunctionInfo, Complexity int } +``` + +### ComplexityScorer + +Lightweight complexity scoring for churn weighting. May reuse `ComplexityCalculator` or be a faster approximation. + +``` +ComplexityScorer + ScoreFile(absPath, FileChange) -> ([]FunctionComplexity, error) +``` + +### ImportResolver + +Detects the project's module root and scans package-level imports to build the dependency graph. + +``` +ImportResolver + DetectModulePath(repoPath) -> (string, error) + ScanPackageImports(repoPath, pkgDir, modulePath) -> map[string]map[string]bool +``` + +### MutantGenerator + +Finds mutation sites in source code within changed regions. + +``` +MutantGenerator + GenerateMutants(absPath, FileChange, disabledLines map[int]bool) -> ([]MutantSite, error) + +MutantSite { File, Line, Description, Operator } +``` + +Operator names must use the canonical names so tiering works: +`conditional_boundary`, `negate_conditional`, `math_operator`, `return_value`, +`boolean_substitution`, `incdec`, `branch_removal`, `statement_deletion` + +New language-specific operators may be added but must be registered in `tiers.go`. + +### MutantApplier + +Applies a mutation to a source file and returns the modified source bytes. + +``` +MutantApplier + ApplyMutation(absPath, MutantSite) -> ([]byte, error) +``` + +### AnnotationScanner + +Scans source files for `mutator-disable-*` comments and returns the set of source lines to skip. + +``` +AnnotationScanner + ScanAnnotations(absPath) -> (disabledLines map[int]bool, error) +``` + +### TestRunner + +Executes the test suite against mutated code and reports whether the mutation was killed. + +``` +TestRunner + RunTest(TestRunConfig) -> (killed bool, output string, error) + +TestRunConfig { RepoPath, MutantFile, OriginalFile, Timeout, TestPattern, WorkDir, Index } +``` + +--- + +## Per-Language Implementation Checklist + +Copy this checklist when adding Language X. Replace `` with the language name (e.g., `python`, `typescript`). + +### Phase 0: Research and prerequisites + +- [ ] **Parser selection**: Identify how to parse `` source from Go. Options: + - Tree-sitter (`github.com/smacker/go-tree-sitter`) -- works for any language with a grammar + - Shell out to a helper script (`python3 -c "import ast; ..."`) -- simpler but adds runtime dep + - Language-specific Go library (if one exists) +- [ ] **Test runner**: Identify the test command for `` (e.g., `pytest`, `jest`, `cargo test`, `mvn test`) +- [ ] **Test isolation**: Determine mutation isolation strategy (see [Key Design Decisions](#key-design-decisions)) +- [ ] **Module manifest**: Identify the project manifest file (`pyproject.toml`, `package.json`, `Cargo.toml`, `pom.xml`) +- [ ] **Import system**: Document how imports work -- relative vs absolute, aliasing, re-exports +- [ ] **Test file conventions**: Document how test files are identified (naming, directory, annotations) +- [ ] **Comment syntax**: Document single-line and multi-line comment syntax +- [ ] **Function declaration patterns**: Document all forms -- standalone functions, class methods, lambdas, closures, nested functions, arrow functions, etc. + +### Phase 1: FileFilter + +- [ ] Create `internal/lang/analyzer/` package directory +- [ ] Define source file extensions (e.g., `.py`, `.ts`+`.tsx`, `.rs`, `.java`) +- [ ] Implement `IsTestFile()`: + - Python: `test_*.py`, `*_test.py`, files under `tests/` or `test/` directories + - TypeScript/JS: `*.test.ts`, `*.spec.ts`, `*.test.js`, `*.spec.js`, files under `__tests__/` + - Rust: files under `tests/` directory (inline `#[cfg(test)]` modules are harder -- may need AST) + - Java: `*Test.java`, `*Tests.java`, files under `src/test/` +- [ ] Define `DiffGlobs` for `git diff` +- [ ] **Tests**: correct extensions included, test files excluded, edge cases (e.g., `testutils.py` should NOT be excluded) + +### Phase 2: FunctionExtractor (unlocks sizes analyzer) + +- [ ] Parse source files and identify function/method declarations +- [ ] Extract function name including class/module prefix: + - Python: `ClassName.method_name`, standalone `function_name` + - TypeScript: `ClassName.methodName`, `functionName`, arrow functions assigned to `const` + - Rust: `impl Type::method_name`, standalone `fn function_name` + - Java: `ClassName.methodName` +- [ ] Extract start line and end line for each function +- [ ] Compute line count (`end - start + 1`) +- [ ] Compute total file line count +- [ ] Filter to only functions overlapping the `FileChange` regions +- [ ] **Tests**: empty file, single function, multiple functions, class methods, nested functions, decorators/annotations, out-of-range filtering + +### Phase 3: ComplexityCalculator (unlocks complexity analyzer) + +- [ ] Implement cognitive complexity scoring. Map language constructs to increments: + +| Increment | Go (reference) | Python | TypeScript/JS | Rust | Java | +|-----------|----------------|--------|---------------|------|------| +| +1 base | `if`, `for`, `switch`, `select` | `if`, `for`, `while`, `try`, `with` | `if`, `for`, `while`, `switch`, `try` | `if`, `for`, `while`, `loop`, `match` | `if`, `for`, `while`, `switch`, `try` | +| +1 nesting | per nesting level | per nesting level | per nesting level | per nesting level | per nesting level | +| +1 else | `else`, `else if` | `elif`, `else` | `else`, `else if` | `else`, `else if` | `else`, `else if` | +| +1 logical op | `&&`, `\|\|` | `and`, `or` | `&&`, `\|\|` | `&&`, `\|\|` | `&&`, `\|\|` | +| +1 op switch | operator changes in sequence | operator changes in sequence | operator changes in sequence | operator changes in sequence | operator changes in sequence | + +- [ ] Handle language-specific patterns: + - Python: comprehensions (list/dict/set/generator), `lambda`, walrus `:=` in conditions, `except` clauses + - TypeScript/JS: ternary `? :`, optional chaining `?.`, nullish coalescing `??`, arrow functions in callbacks + - Rust: `?` operator, `if let`/`while let`, `match` arms, closure complexity + - Java: ternary `? :`, enhanced for-each, try-with-resources, lambda expressions, streams +- [ ] **Tests**: empty function (score=0), each control flow type, nesting penalties, logical operators, language-specific patterns + +### Phase 4: ComplexityScorer (unlocks churn analyzer) + +- [ ] Implement a scoring function for churn weighting +- [ ] Can be the same as `ComplexityCalculator` if fast enough, or a simplified approximation (count control flow keywords) +- [ ] **Tests**: verify scores are consistent with `ComplexityCalculator` (or document the approximation) + +### Phase 5: ImportResolver (unlocks deps analyzer) + +- [ ] Implement `DetectModulePath()`: + - Python: parse `pyproject.toml` `[project] name`, or `setup.py`/`setup.cfg`, or fall back to directory name + - TypeScript/JS: parse `package.json` `name` field + - Rust: parse `Cargo.toml` `[package] name` + - Java: parse `pom.xml` `:`, or `build.gradle` `group` + project name +- [ ] Implement `ScanPackageImports()`: + - Python: scan `import X` and `from X import Y` statements, resolve relative imports (`.foo` -> parent package), filter to internal packages + - TypeScript/JS: scan `import {} from './path'` and `require('./path')`, resolve relative paths, filter to internal modules + - Rust: scan `use crate::` and `mod` declarations, map to internal crate modules + - Java: scan `import com.example.foo.Bar` statements, filter by project package prefix +- [ ] Define what "internal" means for this language (same module/package vs third-party) +- [ ] **Tests**: module path detection, internal import identification, external import filtering, relative import resolution + +### Phase 6: AnnotationScanner (for mutation testing) + +- [ ] Define annotation syntax using the language's comment style: + - Python: `# mutator-disable-next-line`, `# mutator-disable-func` + - TypeScript/JS: `// mutator-disable-next-line`, `// mutator-disable-func` + - Rust: `// mutator-disable-next-line`, `// mutator-disable-func` + - Java: `// mutator-disable-next-line`, `// mutator-disable-func` +- [ ] Implement function range detection (needed for `mutator-disable-func` to know which lines to skip) +- [ ] Return `map[int]bool` of disabled source line numbers +- [ ] **Tests**: next-line annotation disables the following line, function annotation disables all lines in function, no annotations returns empty map, irrelevant comments are ignored + +### Phase 7: MutantGenerator (for mutation testing) + +- [ ] Map the 8 canonical mutation operators to language-specific patterns: + +| Operator | Category | Go (reference) | Applicability Notes | +|----------|----------|----------------|-------------------| +| `conditional_boundary` | Tier 1 | `>` to `>=`, `<` to `<=` | Universal across all languages | +| `negate_conditional` | Tier 1 | `==` to `!=`, `>` to `<` | Universal. TS/JS: include `===`/`!==` | +| `math_operator` | Tier 1 | `+` to `-`, `*` to `/` | Universal. Python: include `//` (floor div), `**` (power) | +| `return_value` | Tier 1 | Replace returns with `nil` | Language-specific zero values: Python `None`, JS `null`/`undefined`, Rust `Default::default()`, Java `null`/`0`/`false` | +| `boolean_substitution` | Tier 2 | `true` to `false` | Python: `True`/`False`. Rust: same. Universal otherwise | +| `incdec` | Tier 2 | `++` to `--` | Python/Rust: N/A (no `++`/`--` operators). Skip for these languages | +| `branch_removal` | Tier 3 | Empty the body of `if` | Universal. Python: replace body with `pass` | +| `statement_deletion` | Tier 3 | Remove bare function calls | Universal | + +- [ ] Consider language-specific additional operators (register in `tiers.go` with appropriate tier): + - Python: `is`/`is not` mutations, `in`/`not in` mutations + - TypeScript: `===`/`!==` mutations, optional chaining `?.` removal, nullish coalescing `??` to `||` + - Rust: `unwrap()` removal, `?` operator removal, `Some(x)` to `None` + - Java: null-check removal, `equals()` to `==` swap, exception swallowing +- [ ] Filter mutants to only changed lines (respect `FileChange` regions) +- [ ] Exclude disabled lines (from `AnnotationScanner`) +- [ ] **Tests**: each operator type generates correct mutants, out-of-range lines are skipped, disabled lines are respected + +### Phase 8: MutantApplier (for mutation testing) + +- [ ] Choose mutation application strategy: + - **AST-based** (preferred if a good parser is available): parse file, modify AST node, render back to source + - **Text-based** (fallback): use line/column positions from `MutantSite` to do string replacement +- [ ] Handle edge cases: multiple operators on the same line, multi-line expressions, comment-only lines +- [ ] Verify that applied mutations produce syntactically valid source code +- [ ] **Tests**: each mutation type applied correctly, parse error returns nil, line mismatch returns nil + +### Phase 9: TestRunner (for mutation testing) + +- [ ] Implement test command construction: + - Python: `pytest [--timeout=] [-k ] ` + - TypeScript/JS: `npx jest [--testPathPattern ] --forceExit` or `npx vitest run` + - Rust: `cargo test [] -- --test-threads=1` + - Java: `mvn test -Dtest= -pl ` or `gradle test --tests ` +- [ ] Implement mutation isolation strategy: + - **Go (reference)**: Uses `go test -overlay` -- mutant files are overlaid at build time, no file copying needed, fully parallel + - **All other languages**: Use temp-copy strategy: + 1. Copy original file to backup location + 2. Write mutated source in place of original + 3. Run test command + 4. Restore original from backup + 5. **Critical**: Mutants on the same file must be serialized (acquire per-file lock). Mutants on different files can run in parallel. + - Alternative per-language isolation (if available): + - Python: `importlib` tricks or `PYTHONPATH` manipulation + - TypeScript: Jest `moduleNameMapper` config + - Rust: `cargo test` doesn't support overlay; temp-copy is the only option +- [ ] Handle test timeout (kill process after `TestRunConfig.Timeout`) +- [ ] Detect kill vs survive: test command exit code != 0 means killed +- [ ] **Tests**: killed mutant (test fails), survived mutant (test passes), timeout handling, file restoration after crash + +### Phase 10: Integration and registration + +- [ ] Create `internal/lang/analyzer/analyzer.go` implementing the `Language` interface +- [ ] Add `init()` function calling `lang.Register()` +- [ ] Add blank import to `cmd/diffguard/main.go`: `_ "github.com/.../internal/lang/analyzer"` +- [ ] Write end-to-end integration test: + - Create a temp directory with a small `` project (2-3 files, 1 test file) + - Run the full analyzer pipeline + - Assert each report section has expected content +- [ ] Verify all existing Go tests still pass + +### Phase 11: Documentation + +- [ ] Add the language to README sections: + - "Install" -- any additional toolchain requirements + - "Usage" -- language-specific examples + - "What It Measures" -- any scoring differences from the Go reference + - "CLI Reference" -- new flags if any + - "CI Integration" -- workflow examples for the language +- [ ] Document the annotation syntax for the language +- [ ] Document any language-specific mutation operators and their tier assignments +- [ ] Document known limitations (e.g., "Python closures are not analyzed individually") + +--- + +## Language-Specific Notes + +### Python + +**Parser options**: +- **Tree-sitter** (`tree-sitter-python`): Best option from Go. No Python runtime needed. CST-based, so node types are strings (`"function_definition"`, `"if_statement"`). +- **Shell out to `python3 -c "import ast; ..."`**: Simpler for prototyping but adds Python as a runtime dependency. + +**Test runner**: `pytest` (most common). Fall back to `unittest` (`python -m pytest` handles both). + +**Isolation**: Temp-copy strategy. Python caches bytecode in `__pycache__/` -- set `PYTHONDONTWRITEBYTECODE=1` when running mutant tests to avoid stale cache. + +**Unique complexity considerations**: +- List/dict/set/generator comprehensions should add +1 each (they're implicit loops) +- `with` statements add +1 (context manager control flow) +- `lambda` expressions: count complexity of the lambda body +- `try`/`except`/`finally`: +1 for `try`, +1 for each `except`, +1 for `finally` +- Decorators: don't count toward complexity (they're applied at definition time) + +**Import system**: +- `import foo` -- absolute import +- `from foo import bar` -- absolute import +- `from . import bar` -- relative import (resolve against package path) +- `from ..foo import bar` -- relative import up two levels +- Distinguish internal vs external by checking if the import path starts with a package in the project + +**Test file conventions**: `test_*.py`, `*_test.py`, files in `tests/` or `test/` directories. Also `conftest.py` (test infrastructure, not test files -- should be excluded from analysis but not treated as test files). + +**Missing operators**: No `++`/`--` -- skip `incdec`. Add `is`/`is not` and `in`/`not in` as `negate_conditional` variants. + +### TypeScript / JavaScript + +**Parser options**: +- **Tree-sitter** (`tree-sitter-typescript`, `tree-sitter-javascript`): Works well. TypeScript and JavaScript need separate grammars. +- **Shell out to Node.js**: Could use `@babel/parser` or `typescript` compiler API via a helper script. + +**Test runner**: Detect from `package.json`: +- `jest` or `@jest/core` in deps -> `npx jest` +- `vitest` in deps -> `npx vitest run` +- `mocha` in deps -> `npx mocha` +- Fall back to `npm test` + +**Isolation**: Temp-copy strategy. Jest supports `moduleNameMapper` in config which could theoretically be used for overlay-like behavior, but temp-copy is simpler and more universal. + +**Unique complexity considerations**: +- Ternary `condition ? a : b` adds +1 (it's a conditional) +- Optional chaining `foo?.bar` -- don't count (it's syntactic sugar, not control flow) +- Nullish coalescing `foo ?? bar` -- don't count (not branching in the cognitive sense) +- Arrow functions used as callbacks: count complexity of the body +- `async`/`await`: `try`/`catch` around `await` adds complexity; `await` alone does not +- Promise chains `.then().catch()` -- each `.catch()` adds +1 + +**Import system**: +- `import { x } from './local'` -- relative import (internal) +- `import { x } from 'package'` -- bare specifier (external) +- `require('./local')` -- CommonJS relative (internal) +- `require('package')` -- CommonJS bare (external) +- Distinguish internal by checking if the import path starts with `.` or `@/` (project alias) + +**Test file conventions**: `*.test.ts`, `*.spec.ts`, `*.test.js`, `*.spec.js`, `*.test.tsx`, `*.spec.tsx`, files under `__tests__/` directories. + +**Additional operators**: `===`/`!==` mutations (map to `negate_conditional`). Optional chaining removal (`foo?.bar` -> `foo.bar`, Tier 2). Nullish coalescing swap (`??` -> `||`, Tier 2). + +### Rust + +**Parser options**: +- **Tree-sitter** (`tree-sitter-rust`): Best option. Mature grammar. +- **Shell out to `rustc`**: Not practical. The `syn` crate is Rust-only. + +**Test runner**: `cargo test`. Always available in Rust projects. + +**Isolation**: Temp-copy strategy. `cargo test` recompiles from source, so replacing the file and running `cargo test` works. Set `CARGO_INCREMENTAL=0` to avoid stale incremental caches. + +**Unique complexity considerations**: +- `match` arms: +1 for the `match` statement, +1 for each arm with a guard (`if` condition) +- `if let` / `while let`: +1 each (they're pattern-matching control flow) +- `?` operator: don't count (it's error propagation syntax, not branching) +- `loop` (infinite loop): +1 +- Closures: count complexity of the closure body +- `unsafe` blocks: don't count toward complexity (they're a safety annotation, not control flow) + +**Import system**: +- `use crate::foo::bar` -- internal crate import +- `use other_crate::foo` -- external crate import +- `mod foo;` -- module declaration (internal) +- Distinguish internal by checking if the path starts with `crate::` or `self::` or `super::` + +**Test file conventions**: `tests/` directory contains integration tests. Unit tests are inline `#[cfg(test)] mod tests { ... }` -- these are harder to detect without parsing. For file filtering purposes, treat files in `tests/` as test files. For inline test modules, ignore them during analysis (they share the source file). + +**Missing operators**: No `++`/`--` -- skip `incdec`. Add `unwrap()` removal (Tier 1, return_value variant), `?` removal (Tier 2), `Some(x)` to `None` (Tier 1, return_value variant). + +### Java + +**Parser options**: +- **Tree-sitter** (`tree-sitter-java`): Works well. Mature grammar. +- **Shell out to a Java parser**: Could use JavaParser as a CLI tool. + +**Test runner**: Detect from build file: +- `pom.xml` present -> `mvn test -Dtest=` +- `build.gradle` or `build.gradle.kts` present -> `gradle test --tests ` + +**Isolation**: Temp-copy strategy. Both Maven and Gradle recompile from source. Replace the `.java` file, run tests, restore. + +**Unique complexity considerations**: +- Enhanced for-each (`for (X x : collection)`) adds +1 +- Try-with-resources: +1 for the `try` block +- `catch` clauses: +1 each +- `finally`: +1 +- Ternary `? :`: +1 +- Lambda expressions: count complexity of the lambda body +- Stream operations (`.filter()`, `.map()`, `.reduce()`): don't count individually (they're method calls) +- `synchronized` blocks: don't count (concurrency annotation, not control flow) +- `assert` statements: don't count + +**Import system**: +- `import com.example.foo.Bar` -- fully qualified import +- `import com.example.foo.*` -- wildcard import +- Determine internal by checking if the import matches the project's group/package prefix + +**Test file conventions**: `*Test.java`, `*Tests.java`, `*TestCase.java`, files under `src/test/java/`. + +**Additional operators**: `null` check removal (remove `if (x == null)` guards, Tier 2). `equals()` to `==` swap (Tier 1, negate_conditional variant). Exception swallowing (empty `catch` body, Tier 3). + +--- + +## Key Design Decisions + +### Parser strategy + +**Recommended: Tree-sitter for all non-Go languages.** + +Tree-sitter provides Go bindings (`github.com/smacker/go-tree-sitter`) and has mature grammars for Python, TypeScript, JavaScript, Rust, Java, and many others. This avoids requiring language runtimes as dependencies (no need for Python, Node.js, etc. to be installed). + +Trade-off: Tree-sitter returns a concrete syntax tree with string-based node kinds (`"if_statement"`, `"function_definition"`) rather than typed AST nodes. This means pattern matching is string-based rather than type-switch-based, but the uniformity across languages is worth it. + +Go remains the exception -- it continues to use Go's standard library `go/ast` packages, which provide superior type safety and formatting preservation. + +### Mutation isolation + +| Language | Isolation Mechanism | Parallelism | +|----------|-------------------|-------------| +| Go | `go test -overlay` (build-level file substitution) | Fully parallel -- all mutants can run simultaneously | +| All others | Temp-copy: backup original, write mutant, run tests, restore | Parallel across files, serial within same file | + +For non-Go languages, the `TestRunner` implementation must handle file locking internally. The mutation orchestrator calls `RunTest()` concurrently up to `--mutation-workers` goroutines. Each `TestRunner` acquires a per-file mutex before modifying the source file and releases it after restoration. + +### Language detection + +Auto-detect by scanning for manifest files at the repo root: + +| File | Language | +|------|----------| +| `go.mod` | Go | +| `pyproject.toml`, `setup.py`, `setup.cfg` | Python | +| `package.json` + `.ts`/`.tsx` files | TypeScript | +| `package.json` + `.js`/`.jsx` files (no TS) | JavaScript | +| `Cargo.toml` | Rust | +| `pom.xml`, `build.gradle`, `build.gradle.kts` | Java | + +If multiple languages are detected, require `--language` or analyze each language separately and merge report sections. + +### Annotation syntax + +Use the same annotation names across all languages, with the language-appropriate comment prefix: + +| Language | Line disable | Function disable | +|----------|-------------|-----------------| +| Go | `// mutator-disable-next-line` | `// mutator-disable-func` | +| Python | `# mutator-disable-next-line` | `# mutator-disable-func` | +| TypeScript/JS | `// mutator-disable-next-line` | `// mutator-disable-func` | +| Rust | `// mutator-disable-next-line` | `// mutator-disable-func` | +| Java | `// mutator-disable-next-line` | `// mutator-disable-func` | + +### New CLI flags + +``` +--language string Language to analyze (default: auto-detect) +--test-command string Custom test command override (use {file} and {dir} placeholders) +``` + +The `--test-command` flag is an escape hatch for projects with non-standard test setups. Example: `--test-command "python -m pytest {dir} --timeout=30"`. + +--- + +## Adding a New Language: Quick Reference + +1. Create `internal/lang/analyzer/` package +2. Implement all 9 sub-interfaces of `Language` +3. Add `init()` calling `lang.Register()` +4. Add blank import in `cmd/diffguard/main.go` +5. Add any new mutation operators to `internal/mutation/tiers.go` +6. Write unit tests for each interface implementation +7. Write one end-to-end integration test +8. Update README with language-specific examples +9. Follow the detailed [Per-Language Implementation Checklist](#per-language-implementation-checklist) above diff --git a/docs/rust-typescript-support.md b/docs/rust-typescript-support.md new file mode 100644 index 0000000..a146241 --- /dev/null +++ b/docs/rust-typescript-support.md @@ -0,0 +1,413 @@ +# Rust + TypeScript support — implementation checklist + +This is the execution checklist for adding Rust and TypeScript analyzer support to diffguard, sized so a single `diffguard` run on a mixed-language repo reports both languages side by side. + +For the deep technical decisions (interface shapes, tree-sitter vs. runtime parsers, mutation isolation strategy, per-language parser notes), see `../MULTI_LANGUAGE_SUPPORT.md`. This checklist references that doc rather than duplicating it. + +## Scope + +- **In scope**: Rust, TypeScript (including `.tsx`). All five analyzers (complexity, sizes, deps, churn, mutation). Multi-language single-invocation support. +- **Out of scope**: Java, Python, plain JavaScript-only (JS works incidentally under the TS grammar but the TS path is the supported one). A `--test-command` override flag (add only if a fixture needs it). +- **Left alone**: Go keeps `go/ast`. Only its packaging moves — the parser does not. + +## Legend + +- **[F]** foundation work (blocks both languages) +- **[O]** orchestration (the "simultaneous" piece) +- **[R]** Rust analyzer +- **[T]** TypeScript analyzer +- **[X]** cross-cutting (docs, CI, evals) +- **[EVAL]** correctness-evidence work (proves diffguard catches real issues) + +Parts R and T are disjoint and can be worked in parallel once F and O land. + +--- + +## Part A — Foundation (shared, one-time) [F] + +Repo reorganization so Go becomes one of several registered languages. Every step leaves `go test ./...` green. + +### A1. Language abstraction layer + +- [ ] Add `github.com/smacker/go-tree-sitter` (and sub-packages for `rust`, `typescript`, `tsx`) to `go.mod`. +- [ ] Create `internal/lang/lang.go` with the 9 sub-interfaces (`FileFilter`, `FunctionExtractor`, `ComplexityCalculator`, `ComplexityScorer`, `ImportResolver`, `MutantGenerator`, `MutantApplier`, `AnnotationScanner`, `TestRunner`) and the top-level `Language` interface — shapes from `MULTI_LANGUAGE_SUPPORT.md` §Interface Definitions. +- [ ] Create `internal/lang/registry.go` with `Register(Language)`, `Get(name string)`, and `All()`. +- [ ] Create `internal/lang/detect.go`. Detection rules from `MULTI_LANGUAGE_SUPPORT.md` §Language detection. Return order must be deterministic (sorted by name) so downstream report ordering is stable. +- [ ] Unit tests for registry (register/get/all, duplicate registration is an error) and detection (each manifest file → correct language, multi-language repos return multiple, empty repo returns empty). + +### A2. Extract Go → `goanalyzer` + +- [ ] Create `internal/lang/goanalyzer/` package. +- [ ] Move the three duplicate `funcName` helpers (`sizes.go`, `complexity.go`, `churn.go`) into `internal/lang/goanalyzer/parse.go` as a single helper. +- [ ] Implement each of the 9 interfaces in `goanalyzer/` (one file per concern; filenames from `MULTI_LANGUAGE_SUPPORT.md` §Resulting directory structure). +- [ ] `goanalyzer/goanalyzer.go` exposes a `Language` struct and an `init()` that calls `lang.Register(&Language{})`. +- [ ] Blank-import `_ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer"` in `cmd/diffguard/main.go`. + +### A3. Parameterize the diff parser + +- [ ] Replace `isAnalyzableGoFile` (`internal/diff/diff.go:175-177`) with a `FileFilter` parameter. +- [ ] Replace hardcoded `--'*.go'` arg (`internal/diff/diff.go:92`) with globs from `FileFilter.DiffGlobs`. +- [ ] Replace the `+++` handler's `.go`/`_test.go` check (`internal/diff/diff.go:201-208`) with `FileFilter.IsTestFile` + extension check. +- [ ] Update `Parse()` and `CollectPaths()` signatures; callers in `cmd/diffguard/main.go` pass the appropriate filter. +- [ ] Keep `parseUnifiedDiff` and `parseHunkHeader` untouched — they're already language-agnostic. + +### A4. Route existing analyzers through the interface + +- [ ] `internal/complexity/complexity.go`: take a `lang.ComplexityCalculator` parameter, delete the embedded AST walk, call `calc.AnalyzeFile(...)` instead. +- [ ] `internal/sizes/sizes.go`: take a `lang.FunctionExtractor`; delegate. +- [ ] `internal/churn/churn.go`: take a `lang.ComplexityScorer`; delete the simplified `computeComplexity` duplicate; keep `git log --oneline --follow` counting (language-agnostic). +- [ ] `internal/deps/`: split into `graph.go` (pure graph math — cycles, afferent/efferent coupling, instability, SDP) and `deps.go` (orchestration taking `lang.ImportResolver`). +- [ ] `internal/mutation/`: route `Analyze` through `MutantGenerator`, `MutantApplier`, `AnnotationScanner`, `TestRunner`. `tiers.go` stays put; `operatorTier` gets new entries for Rust/TS operators (TBD in R/T phases). + +### A5. Regression gate + +- [ ] `go test ./...` green. +- [ ] `diffguard` binary on a self-diff of this repo produces byte-identical output before and after the reorg (record the baseline first). +- [ ] Wall-clock regression <5% on the self-diff. + +--- + +## Part B — Multi-language orchestration [O] + +The "simultaneous" requirement. Lands after A, before R and T. + +### B1. CLI + +- [ ] Add `--language` flag to `cmd/diffguard/main.go`. Default empty → auto-detect. Accepts comma-separated values (`--language rust,typescript`). +- [ ] Error messages cite the detected manifest files to help users debug "why did you pick that language". + +### B2. Orchestration loop + +- [ ] In `run()` (currently `main.go:79-102`), resolve the language set: + - [ ] If `--language` empty: call `lang.Detect(repoPath)`. + - [ ] Else: split the flag and call `lang.Get()` for each; unknown names are a hard error. + - [ ] Empty language set is a hard error with a clear message ("no supported language detected; pass --language to override"). +- [ ] For each resolved language, call `diff.Parse(repoPath, baseBranch, language.FileFilter())` → per-language `diff.Result`. +- [ ] For each `(language, Result)` with non-empty `Files`, run the full analyzer pipeline using the language's interfaces. +- [ ] Merge sections from all languages into the single `report.Report`. No concurrency at this layer — analyzers already parallelize where it matters. + +### B3. Section naming + +- [ ] Section names are suffixed `[]` (e.g., `Complexity [rust]`, `Mutation [typescript]`). `report.Section.Name` is already `string`, so no struct change. +- [ ] Text output groups by language first, then metric, so mixed reports stay readable. +- [ ] JSON output is stable: sections ordered `(language, metric)` lexicographically. + +### B4. Empty-languages behavior + +- [ ] If a detected language has no changed files in the diff, it produces no sections (no empty PASS rows). This matches existing Go behavior (`No Go files found.` early return generalizes to "No \ files found." per language, collapsing to the existing message when only one language is present). + +### B5. Exit-code aggregation + +- [ ] `checkExitCode` unchanged: it already takes a merged `Report` and returns the worst severity. Add a test that a FAIL in any language escalates the whole run. + +### B6. Mixed-repo smoke test + +- [ ] `cmd/diffguard/main_test.go` gains a test using a temp git repo with a Go file and stub Rust/TS files: run `main()` and assert all three language sections appear. (The Rust/TS analyzer impls are stubs at this point — they register, they return empty results. The point of this test is orchestration, not analysis.) + +--- + +## Part C — Rust analyzer [R] + +`internal/lang/rustanalyzer/`. See `MULTI_LANGUAGE_SUPPORT.md` §Rust for parser, complexity, import, and mutation notes. + +### C0. Research prerequisites + +- [ ] Confirm `github.com/smacker/go-tree-sitter/rust` grammar versions support the Rust edition(s) we care about. +- [ ] Decide: integration-test crates under `tests/` treated as test files? Inline `#[cfg(test)] mod tests { ... }` treated as live code? (Design doc recommends: `tests/` = test files, inline modules = live code ignored during analysis.) + +### C1. FileFilter + +- [ ] `.rs` extension. `IsTestFile`: any path segment equal to `tests`. +- [ ] `DiffGlobs`: `*.rs`. +- [ ] Tests: fixtures include `src/lib.rs`, `tests/integration.rs`, `src/foo/bar.rs`; assert expected inclusions/exclusions. + +### C2. FunctionExtractor + +- [ ] Tree-sitter query for `function_item`, `impl_item` → `function_item` (methods), `trait_item` → default methods. +- [ ] Name extraction: standalone `fn foo` → `foo`; `impl Type { fn bar }` → `Type::bar`; `impl Trait for Type { fn baz }` → `Type::baz`. +- [ ] Line range: node start/end lines. File line count from byte count. +- [ ] Filter to functions overlapping `FileChange.Regions`. +- [ ] Tests: each function form, filtering, nested functions (treated as separate). + +### C3. ComplexityCalculator + ComplexityScorer + +- [ ] Base +1 on: `if_expression`, `while_expression`, `for_expression`, `loop_expression`, `match_expression`, `if_let_expression`, `while_let_expression`. +- [ ] +1 per arm of `match_expression` with a guard (the `if` in `pattern if cond =>`). +- [ ] +1 per logical-op token sequence change inside a binary_expression chain (`&&` / `||`). +- [ ] +1 per nesting level for each scope-introducing ancestor. +- [ ] Do **not** count: `?` operator, `unsafe` blocks. +- [ ] `ComplexityScorer` reuses `ComplexityCalculator` (fast enough). +- [ ] Tests: empty fn (0), `match` with N guarded arms (N), nested `if let` inside `for`, logical chains. + +### C4. ImportResolver + +- [ ] `DetectModulePath`: parse `Cargo.toml` `[package] name`. +- [ ] `ScanPackageImports`: find `use_declaration` nodes. Internal iff the path starts with `crate::`, `self::`, or `super::`. Also treat `mod foo;` declarations as an edge to the child module. +- [ ] Map discovered paths back to package directories so the graph uses directory-level nodes consistent with Go's behavior. +- [ ] Tests: crate root detection, relative-path resolution (`super::foo`), external imports filtered out. + +### C5. AnnotationScanner + +- [ ] Scan `line_comment` tokens for `mutator-disable-next-line` and `mutator-disable-func`. +- [ ] Function ranges sourced from C2 so `mutator-disable-func` can expand to every line in the fn. +- [ ] Tests: next-line, func-wide, unrelated comments ignored, disabled-line map is complete. + +### C6. MutantGenerator + +- [ ] Canonical operators (names from `MULTI_LANGUAGE_SUPPORT.md` §MutantGenerator): + - [ ] `conditional_boundary`: `>` / `>=` / `<` / `<=` swaps. + - [ ] `negate_conditional`: `==` / `!=` swap; relational flips. + - [ ] `math_operator`: `+` / `-`, `*` / `/` swaps. + - [ ] `return_value`: replace return with `Default::default()` / `None` when the return type is an `Option` / unit. + - [ ] `boolean_substitution`: `true` / `false` swap. + - [ ] `branch_removal`: empty `if` body. + - [ ] `statement_deletion`: remove bare expression statements. +- [ ] Skip `incdec` (Rust has no `++` / `--`). +- [ ] Rust-specific additions: + - [ ] `unwrap_removal` (Tier 1 via `operatorTier` override): strip `.unwrap()` / `.expect(...)`. Register in `internal/mutation/tiers.go`. + - [ ] `some_to_none` (Tier 1): `Some(x)` → `None`. + - [ ] `question_mark_removal` (Tier 2): strip trailing `?`. Register in tiers. +- [ ] Filter mutants to changed regions; exclude disabled lines. +- [ ] Tests: each operator produces the expected mutant, out-of-range skipped, disabled lines honored. + +### C7. MutantApplier + +- [ ] Text-based application using node byte ranges from the CST. Tree-sitter gives us exact byte offsets; simpler than re-rendering the tree. +- [ ] After application, re-parse with tree-sitter and assert no syntax errors; return `nil` if the mutated source doesn't parse (silently skip corrupt mutants rather than running broken tests). +- [ ] Tests: each mutation type applied, re-parse check catches malformed output. + +### C8. TestRunner + +- [ ] Temp-copy isolation strategy (from `MULTI_LANGUAGE_SUPPORT.md` §Mutation isolation). +- [ ] Per-file `sync.Mutex` map so concurrent mutations on the same file serialize but different files run in parallel. +- [ ] Test command: `cargo test` with `CARGO_INCREMENTAL=0`. Honor `TestRunConfig.TestPattern` (pass as positional filter). +- [ ] Kill original file from a backup on restore; panic-safe via `defer`. +- [ ] Honor `TestRunConfig.Timeout` via `exec.CommandContext`. +- [ ] Tests: killed mutant (test fails → killed), survived (test passes → survived), timeout, crash-during-run leaves source restored (simulate via deliberate panic in a helper test). + +### C9. Register + wire-up + +- [ ] `rustanalyzer/rustanalyzer.go`: `Language` struct, `Name() string { return "rust" }`, `init()` calling `lang.Register`. +- [ ] Blank import in `cmd/diffguard/main.go`. + +--- + +## Part D — TypeScript analyzer [T] + +`internal/lang/tsanalyzer/`. See `MULTI_LANGUAGE_SUPPORT.md` §TypeScript for parser and operator notes. + +### D0. Research prerequisites + +- [ ] `github.com/smacker/go-tree-sitter/typescript/typescript` for `.ts`, `.../typescript/tsx` for `.tsx`. Use the grammar matching the file extension. +- [ ] Test runner detection: parse `package.json` devDependencies — prefer `vitest`, then `jest`, then fall back to `npm test`. + +### D1. FileFilter + +- [ ] Extensions: `.ts`, `.tsx`. Deliberately exclude `.js`, `.jsx`, `.mjs`, `.cjs` for now (JS-only repos out of scope). +- [ ] `IsTestFile`: suffixes `.test.ts`, `.test.tsx`, `.spec.ts`, `.spec.tsx`; any path segment `__tests__` or `__mocks__`. +- [ ] `DiffGlobs`: `*.ts`, `*.tsx`. +- [ ] Tests: glob matches, test-file exclusion, `utils.test-helper.ts` is NOT a test file (edge case). + +### D2. FunctionExtractor + +- [ ] Tree-sitter queries for: `function_declaration`, `method_definition`, `arrow_function` assigned to `variable_declarator`, `function` expressions assigned similarly, `generator_function`. +- [ ] Name extraction: `ClassName.method`, `functionName`, arrow assigned to `const x = () =>` → `x`. +- [ ] Line ranges, filtering, file LOC. +- [ ] Tests: each form, class methods (including static + private), nested arrow functions, exported vs. local. + +### D3. ComplexityCalculator + ComplexityScorer + +- [ ] Base +1 on: `if_statement`, `for_statement`, `for_in_statement`, `for_of_statement`, `while_statement`, `switch_statement`, `try_statement`, `ternary_expression`. +- [ ] +1 per `catch_clause`; +1 per `else` branch; +1 per `case` with content (empty fall-through cases don't count). +- [ ] +1 per `.catch(` promise-chain method call (string-match on identifier to avoid CST depth). +- [ ] +1 per `&&` / `||` run change. +- [ ] Do **not** count: optional chaining `?.`, nullish coalescing `??`, `await` alone, `async` keyword, stream method calls. +- [ ] Tests: ternary nest, `try/catch/finally`, logical chains, optional chaining ignored. + +### D4. ImportResolver + +- [ ] `DetectModulePath`: parse `package.json` `name` field. +- [ ] `ScanPackageImports`: `import` and `require(...)`. Internal iff the specifier starts with `.` or a registered project alias (`@/`, `~/`). Resolve relative paths against the source file's directory, fold to dir-level for the graph. +- [ ] Tests: internal vs. external classification, relative resolution, barrel re-exports count as one edge. + +### D5. AnnotationScanner + +- [ ] `// mutator-disable-next-line` and `// mutator-disable-func` comments. +- [ ] Function ranges from D2 for func-scope disables. +- [ ] Tests: same shape as Rust's C5. + +### D6. MutantGenerator + +- [ ] Canonical operators: `conditional_boundary`, `negate_conditional` (include `===` / `!==`), `math_operator`, `return_value` (use `null` / `undefined` appropriately), `boolean_substitution`, `incdec` (JS/TS has `++` / `--`), `branch_removal`, `statement_deletion`. +- [ ] TS-specific additions — register in `internal/mutation/tiers.go`: + - [ ] `strict_equality` (Tier 1): flip `===` ↔ `==` and `!==` ↔ `!=`. + - [ ] `nullish_to_logical_or` (Tier 2): `??` → `||`. + - [ ] `optional_chain_removal` (Tier 2): `foo?.bar` → `foo.bar`. +- [ ] Filter to changed regions, skip disabled lines. +- [ ] Tests: each operator emits mutants; TS-specific operators exercised. + +### D7. MutantApplier + +- [ ] Same text-based strategy as Rust's C7. Re-parse check after mutation. +- [ ] Tests: each mutation applied, re-parse catches corrupt output. + +### D8. TestRunner + +- [ ] Temp-copy + per-file lock, identical to Rust. +- [ ] Command selection by detected runner (vitest / jest / npm test). Compose with `--testPathPattern` or `-t` honoring `TestPattern`. +- [ ] Honor `TestRunConfig.Timeout`. +- [ ] Set `CI=true` to suppress interactive prompts. +- [ ] Tests: killed, survived, timeout, restoration after crash. + +### D9. Register + wire-up + +- [ ] `tsanalyzer/tsanalyzer.go`: `Language` with `Name() string { return "typescript" }`, `init()` calls `lang.Register`. +- [ ] Blank import in `cmd/diffguard/main.go`. + +--- + +## Part E — Integration & verification [X] + +### E1. Mixed-repo end-to-end + +- [ ] Fixture at `cmd/diffguard/testdata/mixed-repo/` containing a minimal Cargo crate, a minimal TS package, and (for completeness) a Go file. +- [ ] End-to-end test invoking the built binary (`go build` then `exec`) against the fixture. Assert each language's sections appear with correct suffixes. +- [ ] Negative control: same fixture stripped of violations must produce `WorstSeverity() == PASS`. + +### E2. CI + +- [ ] Extend `.github/workflows/` to install Rust (`rustup`) and Node (for test runners) before running the eval suites. +- [ ] Add `make eval-rust`, `make eval-ts`, `make eval-mixed` targets wrapping the eval Go tests with the right env (e.g., `CARGO_INCREMENTAL=0`, `CI=true`). +- [ ] Cache Cargo and npm artifacts so CI stays fast. + +### E3. README + docs + +- [ ] Update `README.md` top section: tagline no longer says Go-only; list supported languages. +- [ ] Add a per-language "Install" subsection (required toolchain: Rust + cargo, Node + npm). +- [ ] Add `--language` to the CLI reference. +- [ ] Document annotation syntax per language. +- [ ] Cross-link from `README.md` to this checklist and to `MULTI_LANGUAGE_SUPPORT.md`. + +--- + +## Evaluation suite [EVAL] — does diffguard actually catch real issues + +Structural tests (Parts A–E) prove the plumbing works. This section proves the analyzers produce correct verdicts on real, seeded problems. Every case is a **positive / negative control pair**: the positive must be flagged with the right severity, the negative must pass. Negative controls are the firewall against rubber-stamping. + +### EVAL-1. Harness + +- [ ] `internal/lang/analyzer/evaldata/` holds fixtures. +- [ ] `eval_test.go` in each analyzer package runs the full pipeline (built binary, full CLI path) against each fixture and diff-compares emitted findings to `expected.json`. +- [ ] Comparison is semantic (file + function + severity), not byte-for-byte, so cosmetic line shifts don't break the eval. +- [ ] Eval runs are deterministic: `--mutation-sample-rate 100`, fixed `--mutation-workers`, a stable seed for any randomized orderings. +- [ ] Each fixture directory has a `README.md` documenting the seeded issue and the expected verdict. + +### EVAL-2. Rust cases + +- [ ] **complexity**: + - Positive `complex_positive.rs`: nested `match` + `if let` + guarded arms, cognitive ≥11 → section FAIL with finding on that fn. + - Negative `complex_negative.rs`: same behavior split into helpers, each <10 → section PASS, zero findings. +- [ ] **sizes (function)**: + - Positive: single `fn` >50 lines → FAIL. + - Negative: same behavior factored across fns, each <50 → PASS. +- [ ] **sizes (file)**: + - Positive: `large_file.rs` >500 LOC → FAIL. + - Negative: <500 LOC → PASS. +- [ ] **deps (cycle)**: + - Positive: `a.rs` ↔ `b.rs` → FAIL with cycle finding. + - Negative: same modules with a shared `types.rs` breaking the cycle → PASS. +- [ ] **deps (SDP)**: + - Positive: unstable concrete module imported by stable abstract one → WARN/FAIL per current SDP severity. + - Negative: reversed dependency direction → PASS. +- [ ] **churn**: + - Positive `hot_complex.rs` with a baked `.git` dir showing 8+ commits on a complex fn → finding present. + - Negative `hot_simple.rs` same commit count, trivial fn → no finding. +- [ ] **mutation (kill)**: + - Positive `well_tested.rs`: arithmetic fn + tests covering boundary and sign → Tier-1 ≥90% → PASS. + - Negative `untested.rs`: same fn, test covers only one branch → Tier-1 <90% → FAIL. +- [ ] **mutation (Rust-specific operator)**: + - Positive: `unwrap_removal` / `some_to_none` on a tested fn is killed; on an untested fn survives. + - Proof that the operator adds signal, not noise. +- [ ] **mutation (annotation respect)**: + - Positive `# mutator-disable-func` suppresses all mutants in that fn. + - Negative (same file, annotation removed) regenerates them. + +### EVAL-3. TypeScript cases + +- [ ] **complexity**: + - Positive `complex_positive.ts`: nested ternaries + try/catch + `&&`/`||` chains ≥11 → FAIL. + - Negative `complex_negative.ts`: refactored into named helpers → PASS. +- [ ] **sizes (function)**: + - Positive: arrow fn assigned to `const` >50 LOC → FAIL. + - Negative: same logic across named exports → PASS. +- [ ] **sizes (file)**: + - Positive `large_file.ts` >500 LOC → FAIL. + - Negative: split across files → PASS. +- [ ] **deps (cycle)**: + - Positive `a.ts` ↔ `b.ts` → FAIL. + - Negative: shared `types.ts` breaking cycle → PASS. +- [ ] **deps (internal vs external)**: + - Positive: `./foo` appears in internal graph; `import 'lodash'` does NOT. + - Assert directly on the graph shape, not just pass/fail. +- [ ] **churn**: + - Positive `hot_complex.ts` with seeded history → finding. + - Negative `hot_simple.ts` same history → no finding. +- [ ] **mutation (kill, with configured runner)**: + - Positive: `arithmetic.ts` + tests covering boundary + sign → Tier-1 ≥90% → PASS. + - Negative: same fn, test covers one branch → Tier-1 <90% → FAIL. +- [ ] **mutation (TS-specific operators)**: + - Positive: `strict_equality` flip killed by tests that rely on strict equality; `nullish_to_logical_or` killed by tests that distinguish `null` from `undefined`. + - Negative: same operators survive when the test only asserts non-distinguishing inputs. Confirms the operators generate meaningful mutants, not noise. +- [ ] **mutation (annotation respect)**: + - Positive `// mutator-disable-next-line` suppresses the next-line mutant. + - Negative: annotation removed, mutant regenerated. + +### EVAL-4. Cross-cutting + +- [ ] **Mixed-repo severity propagation**: + - Rust FAIL + TS PASS → overall FAIL; TS section independently reports PASS. + - Flip: Rust PASS + TS FAIL → overall FAIL; Rust section independently reports PASS. + - Proves language sections don't contaminate each other. +- [ ] **Mutation concurrency safety**: + - Fixture with 3+ Rust and 3+ TS files, each with multiple mutants. Run `--mutation-workers 4`. + - Assert `git status --porcelain` is empty after the run (no temp-copy corruption). + - Assert repeated runs produce identical reports. + - Sweep `--mutation-workers` 1, 2, 4, 8 and assert report stability. +- [ ] **Disabled-line respect under concurrency**: + - A file with `mutator-disable-func` on one fn and live code on another, `--mutation-workers 4`. + - Assert zero mutants generated for the disabled fn; live fn's mutants execute. +- [ ] **False-positive ceiling**: + - Known-clean fixture (well-tested small Rust crate + well-tested small TS module) → `WorstSeverity() == PASS`, zero FAIL findings across all analyzers. + - This is the "does it cry wolf" gate. + +### EVAL-5. Pre-flight calibration (pre-ship) + +- [ ] Rust: run the built diffguard against two open-source crates (one small, one mid-sized). Triage every FAIL and WARN. If >20% are noise, iterate on thresholds/detection before declaring Rust support shipped. +- [ ] TypeScript: repeat with one app and one library project. +- [ ] Record triage findings in this document under a "Baseline noise rate" appendix so future changes know what "good" looks like. + +--- + +## Execution order summary + +``` +A (foundation) ──► B (orchestration) ──┬──► C (Rust) ──┬──► E (integration + CI) + └──► D (TypeScript) ──┘ + │ + └──► EVAL runs alongside C/D, per analyzer +``` + +Parts C and D are disjoint packages and can be implemented in parallel by separate agents / PRs, rebased onto the B branch. Part E holds the merge point and the final evaluation gate. + +--- + +## Sign-off criteria + +Before calling this done: + +- [ ] All checklist items above checked. +- [ ] `go test ./...` green. +- [ ] `make eval-rust`, `make eval-ts`, `make eval-mixed` all green in CI. +- [ ] Pre-flight calibration triage documented with <20% noise rate per language. +- [ ] README reflects multi-language support with install instructions for each toolchain. +- [ ] `diffguard` run on this repo's own HEAD produces identical output before and after the reorg (the Go path must be byte-stable). From 924b18ddb40c6c3fea7ad4703d1273218ffad57c Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 14:36:48 -0400 Subject: [PATCH 02/38] =?UTF-8?q?feat(lang):=20Part=20A1=20=E2=80=94=20lan?= =?UTF-8?q?guage=20abstraction=20layer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce internal/lang/ with the per-language interfaces (FileFilter, FunctionExtractor, ComplexityCalculator, ComplexityScorer, ImportResolver, MutantGenerator, MutantApplier, AnnotationScanner, TestRunner) plus the top-level Language interface and the shared data types the analyzers and language back-ends pass to each other (FunctionInfo, FunctionSize, FileSize, FunctionComplexity, MutantSite, TestRunConfig). Also adds the process-wide registry (Register/Get/All with deterministic sorted ordering) and a manifest-file / custom-detector-based auto-detection hook (Detect) that will be used by the CLI in Part B. No behavior change yet — the analyzers still use their hardcoded Go AST paths. Part A2 extracts those paths into an internal/lang/goanalyzer/ package that implements these interfaces. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/lang/detect.go | 103 ++++++++++++++++ internal/lang/detect_test.go | 131 +++++++++++++++++++++ internal/lang/lang.go | 207 +++++++++++++++++++++++++++++++++ internal/lang/lang_test.go | 66 +++++++++++ internal/lang/registry.go | 79 +++++++++++++ internal/lang/registry_test.go | 109 +++++++++++++++++ 6 files changed, 695 insertions(+) create mode 100644 internal/lang/detect.go create mode 100644 internal/lang/detect_test.go create mode 100644 internal/lang/lang.go create mode 100644 internal/lang/lang_test.go create mode 100644 internal/lang/registry.go create mode 100644 internal/lang/registry_test.go diff --git a/internal/lang/detect.go b/internal/lang/detect.go new file mode 100644 index 0000000..6b9cd0a --- /dev/null +++ b/internal/lang/detect.go @@ -0,0 +1,103 @@ +package lang + +import ( + "os" + "path/filepath" + "sort" + "sync" +) + +// manifestFiles maps a repo-root filename to the language Name() that owns +// it. When multiple languages share a manifest (e.g. package.json for JS and +// TS), the ambiguity is resolved inside the language's own detection hook +// — here we only record the canonical owner. +// +// Languages without a manifest (or where the manifest needs extra inspection +// to disambiguate) can add themselves to this map from their init() via +// RegisterManifest so the auto-detector still picks them up. +var ( + manifestMu sync.Mutex + manifests = map[string]string{} +) + +// RegisterManifest associates a repo-root filename with a language name. +// A language implementation typically calls this alongside Register(): +// +// func init() { +// lang.Register(&Language{}) +// lang.RegisterManifest("go.mod", "go") +// } +// +// The detector only fires on files that exist at the repository root, so +// sub-directory manifests (e.g. nested Cargo.toml for workspaces) don't +// falsely trigger; languages that need subtree scanning should implement +// their own detection hook via RegisterDetector. +func RegisterManifest(filename, languageName string) { + manifestMu.Lock() + defer manifestMu.Unlock() + manifests[filename] = languageName +} + +// Detector is a per-language hook that reports whether the given repo root +// contains a project of this language. Languages use RegisterDetector when +// manifest-file matching is too coarse — e.g. "package.json + at least one +// .ts file" for TypeScript. +type Detector func(repoPath string) bool + +var ( + detectorMu sync.Mutex + detectors = map[string]Detector{} +) + +// RegisterDetector associates a language name with a custom detection +// function. Both the detector (if present) and the manifest file (if +// registered) are consulted during Detect; a language matches if either +// returns true. +func RegisterDetector(languageName string, d Detector) { + detectorMu.Lock() + defer detectorMu.Unlock() + detectors[languageName] = d +} + +// Detect scans repoPath for per-language manifest files and custom detectors +// and returns the languages whose signatures match. The returned slice is +// sorted by Name() so report ordering stays deterministic across calls. +// +// Only languages that are both (a) registered via Register and (b) match via +// a manifest or detector are returned. That way, adding a new language to +// the binary without a matching manifest entry is inert — nothing misfires. +func Detect(repoPath string) []Language { + matched := map[string]bool{} + + // Manifest-based detection. + manifestMu.Lock() + for filename, name := range manifests { + if _, err := os.Stat(filepath.Join(repoPath, filename)); err == nil { + matched[name] = true + } + } + manifestMu.Unlock() + + // Custom-detector fallback. Languages that can't be distinguished by a + // single manifest file (TypeScript vs. JavaScript, for example) install + // a detector that inspects the tree. + detectorMu.Lock() + for name, d := range detectors { + if d(repoPath) { + matched[name] = true + } + } + detectorMu.Unlock() + + var out []Language + registryMu.RLock() + for name := range matched { + if l, ok := registryMap[name]; ok { + out = append(out, l) + } + } + registryMu.RUnlock() + + sort.Slice(out, func(i, j int) bool { return out[i].Name() < out[j].Name() }) + return out +} diff --git a/internal/lang/detect_test.go b/internal/lang/detect_test.go new file mode 100644 index 0000000..d80b1dd --- /dev/null +++ b/internal/lang/detect_test.go @@ -0,0 +1,131 @@ +package lang + +import ( + "os" + "path/filepath" + "testing" +) + +func TestDetect_ManifestMatch(t *testing.T) { + defer UnregisterForTest("test-detect-manifest") + Register(&fakeLang{name: "test-detect-manifest"}) + RegisterManifest("test-detect-marker", "test-detect-manifest") + t.Cleanup(func() { + manifestMu.Lock() + delete(manifests, "test-detect-marker") + manifestMu.Unlock() + }) + + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "test-detect-marker"), []byte("x"), 0644); err != nil { + t.Fatal(err) + } + + found := names(Detect(dir)) + if !contains(found, "test-detect-manifest") { + t.Errorf("Detect returned %v, want it to include test-detect-manifest", found) + } +} + +func TestDetect_CustomDetector(t *testing.T) { + defer UnregisterForTest("test-detect-custom") + Register(&fakeLang{name: "test-detect-custom"}) + RegisterDetector("test-detect-custom", func(string) bool { return true }) + t.Cleanup(func() { + detectorMu.Lock() + delete(detectors, "test-detect-custom") + detectorMu.Unlock() + }) + + dir := t.TempDir() + found := names(Detect(dir)) + if !contains(found, "test-detect-custom") { + t.Errorf("Detect returned %v, want it to include test-detect-custom", found) + } +} + +func TestDetect_EmptyRepo(t *testing.T) { + dir := t.TempDir() + // No languages with matching manifests should fire on an empty dir. + // We can't assert len==0 because goanalyzer's init() registered "go" + // with a go.mod manifest, and there's no go.mod in the tempdir so "go" + // should not match. + found := names(Detect(dir)) + if contains(found, "go") { + t.Errorf("Detect on empty dir returned %v, did not expect 'go'", found) + } +} + +func TestDetect_MultipleLanguages(t *testing.T) { + defer UnregisterForTest("test-multi-a") + defer UnregisterForTest("test-multi-b") + Register(&fakeLang{name: "test-multi-a"}) + Register(&fakeLang{name: "test-multi-b"}) + RegisterManifest("marker-a", "test-multi-a") + RegisterManifest("marker-b", "test-multi-b") + t.Cleanup(func() { + manifestMu.Lock() + delete(manifests, "marker-a") + delete(manifests, "marker-b") + manifestMu.Unlock() + }) + + dir := t.TempDir() + os.WriteFile(filepath.Join(dir, "marker-a"), []byte("x"), 0644) + os.WriteFile(filepath.Join(dir, "marker-b"), []byte("x"), 0644) + + found := names(Detect(dir)) + if !contains(found, "test-multi-a") || !contains(found, "test-multi-b") { + t.Errorf("Detect returned %v, want both test-multi-a and test-multi-b", found) + } + + // Ordering must be deterministic (sorted by Name()). + idxA, idxB := -1, -1 + for i, n := range found { + if n == "test-multi-a" { + idxA = i + } + if n == "test-multi-b" { + idxB = i + } + } + if idxA > idxB { + t.Errorf("Detect did not sort by name: %v", found) + } +} + +func TestDetect_UnregisteredManifestIgnored(t *testing.T) { + // Register a manifest pointing to a language that is NOT registered. + // Detect should not include it in the results. + RegisterManifest("unknown-manifest", "no-such-language") + t.Cleanup(func() { + manifestMu.Lock() + delete(manifests, "unknown-manifest") + manifestMu.Unlock() + }) + + dir := t.TempDir() + os.WriteFile(filepath.Join(dir, "unknown-manifest"), []byte("x"), 0644) + + found := names(Detect(dir)) + if contains(found, "no-such-language") { + t.Errorf("Detect returned unregistered language: %v", found) + } +} + +func names(langs []Language) []string { + out := make([]string, len(langs)) + for i, l := range langs { + out[i] = l.Name() + } + return out +} + +func contains(s []string, want string) bool { + for _, v := range s { + if v == want { + return true + } + } + return false +} diff --git a/internal/lang/lang.go b/internal/lang/lang.go new file mode 100644 index 0000000..79e0661 --- /dev/null +++ b/internal/lang/lang.go @@ -0,0 +1,207 @@ +// Package lang defines the per-language analyzer interfaces that diffguard +// plugs into. A language implementation registers itself via Register() from +// an init() function; the diffguard CLI blank-imports each language package it +// supports so the registration happens at process start. +// +// The types and interfaces declared here are the single source of truth for +// the data passed between the diff parser, the analyzers, and the language +// back-ends. Keeping them in one package avoids import cycles (analyzer +// packages import `lang`; language packages import `lang`; neither imports +// the other). +package lang + +import ( + "time" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +// FileFilter controls which files the diff parser includes and which it +// classifies as test files. A language exposes its filter as a plain value +// struct so callers can read the fields directly — the diff parser uses +// Extensions/IsTestFile/DiffGlobs during path walks. +type FileFilter struct { + // Extensions is the list of source file extensions (including the leading + // dot) that belong to this language, e.g. [".go"] or [".ts", ".tsx"]. + Extensions []string + // IsTestFile reports whether the given path is a test file that should be + // excluded from analysis. + IsTestFile func(path string) bool + // DiffGlobs is the list of globs passed to `git diff -- ` to scope + // the diff output to this language's files. + DiffGlobs []string +} + +// MatchesExtension reports whether path has one of the filter's source +// extensions. It does not apply the IsTestFile check. +func (f FileFilter) MatchesExtension(path string) bool { + for _, ext := range f.Extensions { + if hasSuffix(path, ext) { + return true + } + } + return false +} + +// IncludesSource reports whether path is an analyzable source file: the +// extension matches and the file is not a test file. +func (f FileFilter) IncludesSource(path string) bool { + if !f.MatchesExtension(path) { + return false + } + if f.IsTestFile != nil && f.IsTestFile(path) { + return false + } + return true +} + +// hasSuffix is a tiny helper used to avoid pulling in strings just for this +// single call — FileFilter is referenced on hot paths (every file walked) so +// keeping the dependency list short is worthwhile. +func hasSuffix(s, suffix string) bool { + if len(s) < len(suffix) { + return false + } + return s[len(s)-len(suffix):] == suffix +} + +// FunctionInfo identifies a function in a source file. It's embedded by the +// richer FunctionSize and FunctionComplexity types so analyzers can share one +// identity struct. +type FunctionInfo struct { + File string + Line int + EndLine int + Name string +} + +// FunctionSize holds size info for a single function. +type FunctionSize struct { + FunctionInfo + Lines int +} + +// FileSize holds size info for a single file. +type FileSize struct { + Path string + Lines int +} + +// FunctionComplexity holds a complexity score for a single function. It's +// used by both the complexity analyzer and the churn analyzer (via the +// ComplexityScorer interface, which may reuse the ComplexityCalculator's +// implementation or provide a lighter approximation). +type FunctionComplexity struct { + FunctionInfo + Complexity int +} + +// MutantSite describes a single potential mutation within changed code. +type MutantSite struct { + File string + Line int + Description string + Operator string +} + +// TestRunConfig carries the parameters needed to run tests against a single +// mutant. The set of fields is deliberately broad so temp-copy runners +// (which need WorkDir and Index to write a scratch copy) and overlay-based +// runners (which only need the MutantFile, OriginalFile, and RepoPath) can +// share one shape. +type TestRunConfig struct { + // RepoPath is the absolute path to the repository root. + RepoPath string + // MutantFile is the absolute path to the file containing the mutated + // source (usually a temp file). For languages that run tests directly on + // the original tree this may be the path to the original file after the + // mutation has been written to it. + MutantFile string + // OriginalFile is the absolute path to the original (unmutated) source + // file. Temp-copy runners use this to restore the original after running + // the tests. + OriginalFile string + // Timeout caps the test run's wall-clock duration. + Timeout time.Duration + // TestPattern, if non-empty, is passed to the runner's test filter flag + // (e.g. `go test -run `). + TestPattern string + // WorkDir is a writable directory private to this run, available for + // overlay files, backups, etc. + WorkDir string + // Index is a monotonically-increasing identifier for the mutant within + // the current run. Useful for naming per-mutant temp files without + // collision. + Index int +} + +// ComplexityCalculator computes cognitive complexity per function for a +// single file's changed regions. +type ComplexityCalculator interface { + AnalyzeFile(absPath string, fc diff.FileChange) ([]FunctionComplexity, error) +} + +// ComplexityScorer is a lightweight complexity score for churn weighting. It +// may share its implementation with ComplexityCalculator or be a faster, +// coarser approximation — the churn analyzer only needs a number, not a +// categorized score. +type ComplexityScorer interface { + ScoreFile(absPath string, fc diff.FileChange) ([]FunctionComplexity, error) +} + +// FunctionExtractor parses a single file and reports its function sizes plus +// the overall file size. +type FunctionExtractor interface { + ExtractFunctions(absPath string, fc diff.FileChange) ([]FunctionSize, *FileSize, error) +} + +// ImportResolver drives the deps analyzer. DetectModulePath returns the +// project-level identifier used to classify internal vs. external imports; +// ScanPackageImports returns a per-package adjacency list keyed by the +// importing package's directory-level identifier. +type ImportResolver interface { + DetectModulePath(repoPath string) (string, error) + ScanPackageImports(repoPath, pkgDir, modulePath string) map[string]map[string]bool +} + +// MutantGenerator returns the mutation sites produced for a single file's +// changed regions, after disabled lines have been filtered out. +type MutantGenerator interface { + GenerateMutants(absPath string, fc diff.FileChange, disabledLines map[int]bool) ([]MutantSite, error) +} + +// MutantApplier produces the mutated source bytes for a given mutation site. +// Returning nil signals "skip this mutant" — callers should not treat a nil +// return as an error. +type MutantApplier interface { + ApplyMutation(absPath string, site MutantSite) ([]byte, error) +} + +// AnnotationScanner returns the set of source lines on which mutation +// generation should be suppressed, based on in-source annotations. +type AnnotationScanner interface { + ScanAnnotations(absPath string) (map[int]bool, error) +} + +// TestRunner executes the language's test suite against a mutated source +// tree and reports whether any test failed (the mutant was "killed"). +type TestRunner interface { + RunTest(cfg TestRunConfig) (killed bool, output string, err error) +} + +// Language is the top-level per-language interface. Every language +// implementation exposes its sub-components through this one type so the +// orchestrator can iterate `for _, l := range lang.All()` and read out any +// capability it needs. +type Language interface { + Name() string + FileFilter() FileFilter + ComplexityCalculator() ComplexityCalculator + FunctionExtractor() FunctionExtractor + ImportResolver() ImportResolver + ComplexityScorer() ComplexityScorer + MutantGenerator() MutantGenerator + MutantApplier() MutantApplier + AnnotationScanner() AnnotationScanner + TestRunner() TestRunner +} diff --git a/internal/lang/lang_test.go b/internal/lang/lang_test.go new file mode 100644 index 0000000..85ed81c --- /dev/null +++ b/internal/lang/lang_test.go @@ -0,0 +1,66 @@ +package lang + +import "testing" + +func TestFileFilter_MatchesExtension(t *testing.T) { + f := FileFilter{Extensions: []string{".go"}} + tests := []struct { + path string + want bool + }{ + {"foo.go", true}, + {"path/to/foo.go", true}, + {"foo_test.go", true}, + {"foo.txt", false}, + {"", false}, + } + for _, tt := range tests { + if got := f.MatchesExtension(tt.path); got != tt.want { + t.Errorf("MatchesExtension(%q) = %v, want %v", tt.path, got, tt.want) + } + } +} + +func TestFileFilter_IncludesSource(t *testing.T) { + f := FileFilter{ + Extensions: []string{".go"}, + IsTestFile: func(p string) bool { + return len(p) >= len("_test.go") && p[len(p)-len("_test.go"):] == "_test.go" + }, + } + tests := []struct { + path string + want bool + }{ + {"foo.go", true}, + {"foo_test.go", false}, + {"foo.txt", false}, + } + for _, tt := range tests { + if got := f.IncludesSource(tt.path); got != tt.want { + t.Errorf("IncludesSource(%q) = %v, want %v", tt.path, got, tt.want) + } + } +} + +func TestFileFilter_MultipleExtensions(t *testing.T) { + f := FileFilter{Extensions: []string{".ts", ".tsx"}} + if !f.MatchesExtension("foo.ts") { + t.Error("want .ts to match") + } + if !f.MatchesExtension("foo.tsx") { + t.Error("want .tsx to match") + } + if f.MatchesExtension("foo.js") { + t.Error("want .js not to match") + } +} + +func TestFileFilter_NilIsTestFile(t *testing.T) { + // IncludesSource with nil IsTestFile must not panic and should treat + // everything with a matching extension as non-test. + f := FileFilter{Extensions: []string{".go"}} + if !f.IncludesSource("foo_test.go") { + t.Error("with nil IsTestFile, everything with matching ext should be included") + } +} diff --git a/internal/lang/registry.go b/internal/lang/registry.go new file mode 100644 index 0000000..e56b842 --- /dev/null +++ b/internal/lang/registry.go @@ -0,0 +1,79 @@ +package lang + +import ( + "fmt" + "sort" + "sync" +) + +// registry stores the set of languages that have self-registered via init(). +// It is safe for concurrent use; registrations happen during package init so +// the lock is rarely contended in practice, but Get/All are called from the +// main goroutine while other init() calls may still be running when the +// diffguard binary is linked with many language plugins. +var ( + registryMu sync.RWMutex + registryMap = map[string]Language{} +) + +// Register adds a Language to the global registry under its Name(). It +// panics on duplicate registration because registrations always happen from +// init() functions: a duplicate is a programming error in the build graph +// (two packages registering the same language) and should fail loudly before +// main() runs. +func Register(l Language) { + if l == nil { + panic("lang.Register: nil Language") + } + name := l.Name() + if name == "" { + panic("lang.Register: Language.Name() returned empty string") + } + registryMu.Lock() + defer registryMu.Unlock() + if _, exists := registryMap[name]; exists { + panic(fmt.Sprintf("lang.Register: language %q already registered", name)) + } + registryMap[name] = l +} + +// Get returns the language registered under the given name, or (nil, false) +// if no such language is registered. +func Get(name string) (Language, bool) { + registryMu.RLock() + defer registryMu.RUnlock() + l, ok := registryMap[name] + return l, ok +} + +// All returns every registered language, sorted by Name(). Deterministic +// ordering keeps report sections stable across runs and hosts. +func All() []Language { + registryMu.RLock() + defer registryMu.RUnlock() + out := make([]Language, 0, len(registryMap)) + for _, l := range registryMap { + out = append(out, l) + } + sort.Slice(out, func(i, j int) bool { return out[i].Name() < out[j].Name() }) + return out +} + +// unregisterForTest removes the named language from the registry. It is only +// useful from _test.go files that temporarily register fake languages; the +// production code path never unregisters. +// +// Tests use it by calling `lang.UnregisterForTest("x")` — declared here so +// test packages can access it without exporting an unhygienic symbol. +func unregisterForTest(name string) { + registryMu.Lock() + defer registryMu.Unlock() + delete(registryMap, name) +} + +// UnregisterForTest is the exported entry point into unregisterForTest. +// Production code must never call it; it exists so unit tests can keep the +// registry clean after injecting a fake Language. +func UnregisterForTest(name string) { + unregisterForTest(name) +} diff --git a/internal/lang/registry_test.go b/internal/lang/registry_test.go new file mode 100644 index 0000000..53deeac --- /dev/null +++ b/internal/lang/registry_test.go @@ -0,0 +1,109 @@ +package lang + +import ( + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +// fakeLang is a minimal Language stub used to exercise the registry. Its +// sub-component accessors all return nil — nothing calls them in the +// registry-only tests. +type fakeLang struct{ name string } + +func (f *fakeLang) Name() string { return f.name } +func (f *fakeLang) FileFilter() FileFilter { return FileFilter{} } +func (f *fakeLang) ComplexityCalculator() ComplexityCalculator { return nil } +func (f *fakeLang) FunctionExtractor() FunctionExtractor { return nil } +func (f *fakeLang) ImportResolver() ImportResolver { return nil } +func (f *fakeLang) ComplexityScorer() ComplexityScorer { return nil } +func (f *fakeLang) MutantGenerator() MutantGenerator { return nil } +func (f *fakeLang) MutantApplier() MutantApplier { return nil } +func (f *fakeLang) AnnotationScanner() AnnotationScanner { return nil } +func (f *fakeLang) TestRunner() TestRunner { return nil } + +// Silence the unused-import check — the import is kept so that fakeLang +// remains plug-compatible with the analyzer interfaces that reference the +// diff package in their method signatures. +var _ = diff.FileChange{} + +func TestRegister_And_Get(t *testing.T) { + defer UnregisterForTest("test-registry-1") + + l := &fakeLang{name: "test-registry-1"} + Register(l) + + got, ok := Get("test-registry-1") + if !ok { + t.Fatal("expected Get to find registered language") + } + if got.Name() != "test-registry-1" { + t.Errorf("Get returned %q, want test-registry-1", got.Name()) + } + + if _, ok := Get("no-such-language"); ok { + t.Error("Get should return false for unknown name") + } +} + +func TestRegister_DuplicatePanics(t *testing.T) { + defer UnregisterForTest("test-dup") + + Register(&fakeLang{name: "test-dup"}) + + defer func() { + if r := recover(); r == nil { + t.Error("expected panic on duplicate registration") + } + }() + Register(&fakeLang{name: "test-dup"}) +} + +func TestRegister_NilPanics(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Error("expected panic on nil registration") + } + }() + Register(nil) +} + +func TestRegister_EmptyNamePanics(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Error("expected panic on empty-name registration") + } + }() + Register(&fakeLang{name: ""}) +} + +func TestAll_SortedByName(t *testing.T) { + // Use distinct prefixes so we don't collide with any real language + // registrations coming from goanalyzer/init(). + defer UnregisterForTest("zzz-all-b") + defer UnregisterForTest("zzz-all-a") + defer UnregisterForTest("zzz-all-c") + + Register(&fakeLang{name: "zzz-all-b"}) + Register(&fakeLang{name: "zzz-all-a"}) + Register(&fakeLang{name: "zzz-all-c"}) + + all := All() + // Filter to just our test fakes so real registrations (e.g. "go" from + // goanalyzer) don't disturb the ordering assertion. + var got []string + for _, l := range all { + if len(l.Name()) >= 4 && l.Name()[:4] == "zzz-" { + got = append(got, l.Name()) + } + } + want := []string{"zzz-all-a", "zzz-all-b", "zzz-all-c"} + if len(got) != len(want) { + t.Fatalf("got %v, want %v", got, want) + } + for i := range got { + if got[i] != want[i] { + t.Errorf("All[%d] = %q, want %q", i, got[i], want[i]) + } + } +} From ee45a41e89ea04267d5d22c5e703a484efd9b22b Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 14:40:44 -0400 Subject: [PATCH 03/38] =?UTF-8?q?feat(lang):=20Part=20A2=20=E2=80=94=20ext?= =?UTF-8?q?ract=20Go=20analyzer=20into=20goanalyzer=20package?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement all 9 lang.* sub-interfaces in internal/lang/goanalyzer/, with one file per concern (parse, complexity, sizes, deps, mutation_generate, mutation_apply, mutation_annotate, testrunner, goanalyzer). The three duplicated funcName helpers from sizes.go, complexity.go, and churn.go collapse into the single definition in parse.go. goanalyzer.init() calls lang.Register(&Language{}) and lang.RegisterManifest("go.mod", "go") so Go is auto-detected by manifest and ready to serve once the package is imported. The old analyzer packages still hold the orchestration code in this commit — A3 parameterizes diff, A4 routes the analyzers through the interfaces, and the old embedded AST paths get deleted there. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/lang/goanalyzer/complexity.go | 208 +++++++++++++++++ internal/lang/goanalyzer/deps.go | 68 ++++++ internal/lang/goanalyzer/goanalyzer.go | 62 +++++ internal/lang/goanalyzer/goanalyzer_test.go | 151 +++++++++++++ internal/lang/goanalyzer/mutation_annotate.go | 95 ++++++++ internal/lang/goanalyzer/mutation_apply.go | 210 +++++++++++++++++ internal/lang/goanalyzer/mutation_generate.go | 213 ++++++++++++++++++ internal/lang/goanalyzer/parse.go | 60 +++++ internal/lang/goanalyzer/sizes.go | 51 +++++ internal/lang/goanalyzer/testrunner.go | 76 +++++++ 10 files changed, 1194 insertions(+) create mode 100644 internal/lang/goanalyzer/complexity.go create mode 100644 internal/lang/goanalyzer/deps.go create mode 100644 internal/lang/goanalyzer/goanalyzer.go create mode 100644 internal/lang/goanalyzer/goanalyzer_test.go create mode 100644 internal/lang/goanalyzer/mutation_annotate.go create mode 100644 internal/lang/goanalyzer/mutation_apply.go create mode 100644 internal/lang/goanalyzer/mutation_generate.go create mode 100644 internal/lang/goanalyzer/parse.go create mode 100644 internal/lang/goanalyzer/sizes.go create mode 100644 internal/lang/goanalyzer/testrunner.go diff --git a/internal/lang/goanalyzer/complexity.go b/internal/lang/goanalyzer/complexity.go new file mode 100644 index 0000000..e38cd12 --- /dev/null +++ b/internal/lang/goanalyzer/complexity.go @@ -0,0 +1,208 @@ +package goanalyzer + +import ( + "go/ast" + "go/token" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// complexityImpl is the Go implementation of both lang.ComplexityCalculator +// and lang.ComplexityScorer. The scorer interface is defined separately so +// a language can ship a faster approximation; for Go the full cognitive +// score is cheap enough that one struct serves both. +type complexityImpl struct{} + +// AnalyzeFile returns per-function cognitive complexity for functions whose +// line range overlaps the diff's changed regions. Parse errors return +// (nil, nil) — the old analyzer treated parse failure as "skip the file" +// and we preserve that behavior. +func (complexityImpl) AnalyzeFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + fset, f, err := parseFile(absPath, 0) + if err != nil { + return nil, nil + } + + var results []lang.FunctionComplexity + ast.Inspect(f, func(n ast.Node) bool { + fn, ok := n.(*ast.FuncDecl) + if !ok { + return true + } + startLine := fset.Position(fn.Pos()).Line + endLine := fset.Position(fn.End()).Line + if !fc.OverlapsRange(startLine, endLine) { + return false + } + results = append(results, lang.FunctionComplexity{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: startLine, + EndLine: endLine, + Name: funcName(fn), + }, + Complexity: computeCognitiveComplexity(fn.Body), + }) + return false + }) + return results, nil +} + +// ScoreFile is the ComplexityScorer entry point. Churn weighting only needs +// a number; reusing the full cognitive calculation keeps scores consistent +// between the complexity section and the churn-weighting it feeds into. +func (complexityImpl) ScoreFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + return complexityImpl{}.AnalyzeFile(absPath, fc) +} + +// computeCognitiveComplexity is the exact algorithm that lived in +// internal/complexity/complexity.go before the language split. It's moved +// here verbatim (only the receiver type changed) so byte-identical scores +// are guaranteed. +func computeCognitiveComplexity(body *ast.BlockStmt) int { + if body == nil { + return 0 + } + return walkBlock(body.List, 0) +} + +func walkBlock(stmts []ast.Stmt, nesting int) int { + total := 0 + for _, stmt := range stmts { + total += walkStmt(stmt, nesting) + } + return total +} + +func walkStmt(stmt ast.Stmt, nesting int) int { + switch s := stmt.(type) { + case *ast.IfStmt: + return walkIfStmt(s, nesting) + case *ast.ForStmt: + return walkForStmt(s, nesting) + case *ast.RangeStmt: + return 1 + nesting + walkBlock(s.Body.List, nesting+1) + case *ast.SwitchStmt: + return 1 + nesting + walkBlock(s.Body.List, nesting+1) + case *ast.TypeSwitchStmt: + return 1 + nesting + walkBlock(s.Body.List, nesting+1) + case *ast.SelectStmt: + return 1 + nesting + walkBlock(s.Body.List, nesting+1) + case *ast.CaseClause: + return walkBlock(s.Body, nesting) + case *ast.CommClause: + return walkBlock(s.Body, nesting) + case *ast.BlockStmt: + return walkBlock(s.List, nesting) + case *ast.LabeledStmt: + return walkStmt(s.Stmt, nesting) + case *ast.AssignStmt: + return walkExprsForFuncLit(s.Rhs, nesting) + case *ast.ExprStmt: + return walkExprForFuncLit(s.X, nesting) + case *ast.ReturnStmt: + return walkExprsForFuncLit(s.Results, nesting) + case *ast.GoStmt: + return walkExprForFuncLit(s.Call.Fun, nesting) + case *ast.DeferStmt: + return walkExprForFuncLit(s.Call.Fun, nesting) + } + return 0 +} + +func walkIfStmt(s *ast.IfStmt, nesting int) int { + total := 1 + nesting + total += countLogicalOps(s.Cond) + if s.Init != nil { + total += walkStmt(s.Init, nesting) + } + total += walkBlock(s.Body.List, nesting+1) + if s.Else != nil { + total += walkElseChain(s.Else, nesting) + } + return total +} + +func walkForStmt(s *ast.ForStmt, nesting int) int { + total := 1 + nesting + if s.Cond != nil { + total += countLogicalOps(s.Cond) + } + total += walkBlock(s.Body.List, nesting+1) + return total +} + +func walkElseChain(node ast.Node, nesting int) int { + switch e := node.(type) { + case *ast.IfStmt: + total := 1 + total += countLogicalOps(e.Cond) + if e.Init != nil { + total += walkStmt(e.Init, nesting) + } + total += walkBlock(e.Body.List, nesting+1) + if e.Else != nil { + total += walkElseChain(e.Else, nesting) + } + return total + case *ast.BlockStmt: + return 1 + walkBlock(e.List, nesting+1) + } + return 0 +} + +func walkExprsForFuncLit(exprs []ast.Expr, nesting int) int { + total := 0 + for _, expr := range exprs { + total += walkExprForFuncLit(expr, nesting) + } + return total +} + +func walkExprForFuncLit(expr ast.Expr, nesting int) int { + total := 0 + ast.Inspect(expr, func(n ast.Node) bool { + if fl, ok := n.(*ast.FuncLit); ok { + total += walkBlock(fl.Body.List, nesting+1) + return false + } + return true + }) + return total +} + +// countLogicalOps counts operator-type changes in a chain of && / ||. +// A run of the same operator counts as 1; each switch to the other +// operator adds 1. No logical ops at all → 0. +func countLogicalOps(expr ast.Expr) int { + if expr == nil { + return 0 + } + ops := flattenLogicalOps(expr) + if len(ops) == 0 { + return 0 + } + count := 1 + for i := 1; i < len(ops); i++ { + if ops[i] != ops[i-1] { + count++ + } + } + return count +} + +func flattenLogicalOps(expr ast.Expr) []token.Token { + bin, ok := expr.(*ast.BinaryExpr) + if !ok { + return nil + } + if bin.Op != token.LAND && bin.Op != token.LOR { + return nil + } + var ops []token.Token + ops = append(ops, flattenLogicalOps(bin.X)...) + ops = append(ops, bin.Op) + ops = append(ops, flattenLogicalOps(bin.Y)...) + return ops +} diff --git a/internal/lang/goanalyzer/deps.go b/internal/lang/goanalyzer/deps.go new file mode 100644 index 0000000..57ad50c --- /dev/null +++ b/internal/lang/goanalyzer/deps.go @@ -0,0 +1,68 @@ +package goanalyzer + +import ( + "fmt" + "go/parser" + "go/token" + "os" + "path/filepath" + "strings" +) + +// depsImpl implements lang.ImportResolver for Go. It reads the module path +// from go.mod and uses the standard Go parser to scan each package for +// internal imports. +type depsImpl struct{} + +// DetectModulePath reads `module ` from repoPath/go.mod. +func (depsImpl) DetectModulePath(repoPath string) (string, error) { + goModPath := filepath.Join(repoPath, "go.mod") + content, err := os.ReadFile(goModPath) + if err != nil { + return "", fmt.Errorf("reading go.mod: %w", err) + } + for _, line := range strings.Split(string(content), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "module ") { + return strings.TrimSpace(strings.TrimPrefix(line, "module ")), nil + } + } + return "", fmt.Errorf("no module directive found in go.mod") +} + +// ScanPackageImports returns a map with a single entry: +// +// { : { : true, : true, ... } } +// +// where pkgImportPath = modulePath + "/" + pkgDir. External imports and +// `_test` packages are ignored so the graph only contains internal edges, +// matching the pre-split deps.go behavior. +func (depsImpl) ScanPackageImports(repoPath, pkgDir, modulePath string) map[string]map[string]bool { + absDir := filepath.Join(repoPath, pkgDir) + fset := token.NewFileSet() + pkgs, err := parser.ParseDir(fset, absDir, nil, parser.ImportsOnly) + if err != nil { + return nil + } + + edges := make(map[string]map[string]bool) + pkgImportPath := modulePath + "/" + pkgDir + for _, p := range pkgs { + if strings.HasSuffix(p.Name, "_test") { + continue + } + for _, f := range p.Files { + for _, imp := range f.Imports { + importPath := strings.Trim(imp.Path.Value, `"`) + if !strings.HasPrefix(importPath, modulePath) { + continue + } + if edges[pkgImportPath] == nil { + edges[pkgImportPath] = make(map[string]bool) + } + edges[pkgImportPath][importPath] = true + } + } + } + return edges +} diff --git a/internal/lang/goanalyzer/goanalyzer.go b/internal/lang/goanalyzer/goanalyzer.go new file mode 100644 index 0000000..6585305 --- /dev/null +++ b/internal/lang/goanalyzer/goanalyzer.go @@ -0,0 +1,62 @@ +package goanalyzer + +import ( + "time" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// defaultGoTestTimeout is the per-mutant test timeout applied when the +// caller did not set one in TestRunConfig. It matches the fallback the +// mutation orchestrator used before the language split so behavior is +// preserved byte-for-byte for existing Go runs. +const defaultGoTestTimeout = 30 * time.Second + +// Language is the Go implementation of lang.Language. It holds no state — +// the sub-component impls are stateless too — but exists as a concrete +// type so external tests can construct one without relying on the +// side-effectful init() registration. +type Language struct{} + +// Name returns the canonical language identifier used by the registry and +// by report section suffixes. +func (*Language) Name() string { return "go" } + +// FileFilter returns the Go-specific file selection rules used by the diff +// parser: .go extension, _test.go files excluded from analysis. +func (*Language) FileFilter() lang.FileFilter { + return lang.FileFilter{ + Extensions: []string{".go"}, + IsTestFile: isGoTestFile, + DiffGlobs: []string{"*.go"}, + } +} + +// Sub-component accessors. Every method returns a fresh zero-value impl +// value, which is fine because all impls are stateless. +func (*Language) ComplexityCalculator() lang.ComplexityCalculator { return complexityImpl{} } +func (*Language) ComplexityScorer() lang.ComplexityScorer { return complexityImpl{} } +func (*Language) FunctionExtractor() lang.FunctionExtractor { return sizesImpl{} } +func (*Language) ImportResolver() lang.ImportResolver { return depsImpl{} } +func (*Language) MutantGenerator() lang.MutantGenerator { return mutantGeneratorImpl{} } +func (*Language) MutantApplier() lang.MutantApplier { return mutantApplierImpl{} } +func (*Language) AnnotationScanner() lang.AnnotationScanner { return annotationScannerImpl{} } +func (*Language) TestRunner() lang.TestRunner { return testRunnerImpl{} } + +// isGoTestFile matches the historical internal/diff check: any path ending +// in `_test.go` is a test file. No magic, no parse. +func isGoTestFile(path string) bool { + return hasSuffix(path, "_test.go") +} + +func hasSuffix(s, suffix string) bool { + return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix +} + +// init registers the Go analyzer with the global lang registry. The blank +// import in cmd/diffguard/main.go triggers this; other binaries wishing to +// include the Go analyzer must also blank-import this package. +func init() { + lang.Register(&Language{}) + lang.RegisterManifest("go.mod", "go") +} diff --git a/internal/lang/goanalyzer/goanalyzer_test.go b/internal/lang/goanalyzer/goanalyzer_test.go new file mode 100644 index 0000000..e2e8e43 --- /dev/null +++ b/internal/lang/goanalyzer/goanalyzer_test.go @@ -0,0 +1,151 @@ +package goanalyzer + +import ( + "os" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +// TestLanguage_Name pins the registered name. Other packages (CLI +// suffixing, tiers.go) key on this string. +func TestLanguage_Name(t *testing.T) { + l := &Language{} + if l.Name() != "go" { + t.Errorf("Name() = %q, want go", l.Name()) + } +} + +func TestLanguage_FileFilter(t *testing.T) { + f := (&Language{}).FileFilter() + if len(f.Extensions) != 1 || f.Extensions[0] != ".go" { + t.Errorf("Extensions = %v, want [.go]", f.Extensions) + } + if !f.IsTestFile("foo_test.go") { + t.Error("IsTestFile(foo_test.go) = false, want true") + } + if f.IsTestFile("foo.go") { + t.Error("IsTestFile(foo.go) = true, want false") + } + if len(f.DiffGlobs) != 1 || f.DiffGlobs[0] != "*.go" { + t.Errorf("DiffGlobs = %v, want [*.go]", f.DiffGlobs) + } +} + +// TestFuncName covers all three canonical forms: free function, value +// receiver method, pointer receiver method. funcName used to live in three +// places pre-split; this test is the canary that the consolidation didn't +// drop a case. +func TestFuncName(t *testing.T) { + tests := []struct { + code string + expected string + }{ + {`package p; func Foo() {}`, "Foo"}, + {`package p; type T struct{}; func (t T) Bar() {}`, "(T).Bar"}, + {`package p; type T struct{}; func (t *T) Baz() {}`, "(T).Baz"}, + } + for _, tt := range tests { + t.Run(tt.expected, func(t *testing.T) { + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + if err := os.WriteFile(fp, []byte(tt.code), 0644); err != nil { + t.Fatal(err) + } + fc := diff.FileChange{ + Path: "test.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, + } + results, _ := complexityImpl{}.AnalyzeFile(fp, fc) + if len(results) == 0 { + t.Fatal("no results") + } + if results[0].Name != tt.expected { + t.Errorf("Name = %q, want %q", results[0].Name, tt.expected) + } + }) + } +} + +func TestExtractFunctions_SharesShape(t *testing.T) { + code := `package p + +func f() { + x := 1 + _ = x +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "f.go") + os.WriteFile(fp, []byte(code), 0644) + + fc := diff.FileChange{ + Path: "f.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, + } + fns, fsz, err := sizesImpl{}.ExtractFunctions(fp, fc) + if err != nil { + t.Fatalf("ExtractFunctions: %v", err) + } + if len(fns) != 1 { + t.Fatalf("len(fns) = %d, want 1", len(fns)) + } + if fns[0].Name != "f" { + t.Errorf("Name = %q, want f", fns[0].Name) + } + if fsz == nil || fsz.Lines == 0 { + t.Error("expected non-nil fsz with non-zero Lines") + } +} + +func TestComplexityAndScorer_Agree(t *testing.T) { + // ComplexityScorer.ScoreFile currently delegates to AnalyzeFile, so the + // per-function scores must match exactly. This is the invariant the + // churn analyzer relies on. + code := `package p +func f(x int) { + if x > 0 { + if x > 1 {} + } +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "f.go") + os.WriteFile(fp, []byte(code), 0644) + fc := diff.FileChange{ + Path: "f.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, + } + + analyze, _ := complexityImpl{}.AnalyzeFile(fp, fc) + score, _ := complexityImpl{}.ScoreFile(fp, fc) + if len(analyze) != len(score) { + t.Fatalf("len mismatch: %d vs %d", len(analyze), len(score)) + } + for i := range analyze { + if analyze[i].Complexity != score[i].Complexity { + t.Errorf("[%d] complexity mismatch: %d vs %d", i, analyze[i].Complexity, score[i].Complexity) + } + } +} + +func TestDetectModulePath(t *testing.T) { + dir := t.TempDir() + os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module example.com/foo\n\ngo 1.21\n"), 0644) + mod, err := depsImpl{}.DetectModulePath(dir) + if err != nil { + t.Fatalf("DetectModulePath: %v", err) + } + if mod != "example.com/foo" { + t.Errorf("mod = %q, want example.com/foo", mod) + } +} + +func TestDetectModulePath_Missing(t *testing.T) { + dir := t.TempDir() + _, err := depsImpl{}.DetectModulePath(dir) + if err == nil { + t.Error("expected error when go.mod is missing") + } +} diff --git a/internal/lang/goanalyzer/mutation_annotate.go b/internal/lang/goanalyzer/mutation_annotate.go new file mode 100644 index 0000000..f3bd3c2 --- /dev/null +++ b/internal/lang/goanalyzer/mutation_annotate.go @@ -0,0 +1,95 @@ +package goanalyzer + +import ( + "go/ast" + "go/parser" + "go/token" + "strings" +) + +// annotationScannerImpl implements lang.AnnotationScanner for Go. +// The disable annotations are `// mutator-disable-next-line` (skips the +// following source line) and `// mutator-disable-func` (skips every line of +// the enclosing function, including its signature). Both forms are stripped +// of their comment markers before matching so either `//` or `/* ... */` is +// accepted. +type annotationScannerImpl struct{} + +// ScanAnnotations returns the set of source lines on which mutation +// generation should be suppressed for absPath. The returned map is keyed by +// 1-based line number; a `true` value means disabled. +func (annotationScannerImpl) ScanAnnotations(absPath string) (map[int]bool, error) { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, absPath, nil, parser.ParseComments) + if err != nil { + return nil, err + } + + disabled := make(map[int]bool) + funcs := funcRanges(fset, f) + for _, cg := range f.Comments { + for _, c := range cg.List { + applyAnnotation(stripCommentMarkers(c.Text), fset.Position(c.Pos()).Line, funcs, disabled) + } + } + return disabled, nil +} + +func stripCommentMarkers(raw string) string { + s := strings.TrimSpace(strings.TrimPrefix(raw, "//")) + s = strings.TrimSpace(strings.TrimPrefix(s, "/*")) + s = strings.TrimSpace(strings.TrimSuffix(s, "*/")) + return s +} + +func applyAnnotation(text string, commentLine int, funcs []funcRange, disabled map[int]bool) { + switch { + case strings.HasPrefix(text, "mutator-disable-next-line"): + disabled[commentLine+1] = true + case strings.HasPrefix(text, "mutator-disable-func"): + disableEnclosingFunc(commentLine, funcs, disabled) + } +} + +func disableEnclosingFunc(commentLine int, funcs []funcRange, disabled map[int]bool) { + for _, r := range funcs { + if isCommentForFunc(commentLine, r) { + markFuncDisabled(r, disabled) + return + } + } +} + +// isCommentForFunc reports whether a comment on commentLine applies to the +// given function, either because it's inside the function or directly +// precedes it (godoc-style, allowing one blank line). +func isCommentForFunc(commentLine int, r funcRange) bool { + if commentLine >= r.start && commentLine <= r.end { + return true + } + return r.start > commentLine && r.start-commentLine <= 2 +} + +func markFuncDisabled(r funcRange, disabled map[int]bool) { + for i := r.start; i <= r.end; i++ { + disabled[i] = true + } +} + +type funcRange struct{ start, end int } + +func funcRanges(fset *token.FileSet, f *ast.File) []funcRange { + var ranges []funcRange + ast.Inspect(f, func(n ast.Node) bool { + fn, ok := n.(*ast.FuncDecl) + if !ok || fn.Body == nil { + return true + } + ranges = append(ranges, funcRange{ + start: fset.Position(fn.Pos()).Line, + end: fset.Position(fn.End()).Line, + }) + return true + }) + return ranges +} diff --git a/internal/lang/goanalyzer/mutation_apply.go b/internal/lang/goanalyzer/mutation_apply.go new file mode 100644 index 0000000..b9c7da5 --- /dev/null +++ b/internal/lang/goanalyzer/mutation_apply.go @@ -0,0 +1,210 @@ +package goanalyzer + +import ( + "bytes" + "go/ast" + "go/parser" + "go/printer" + "go/token" + "strings" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// mutantApplierImpl implements lang.MutantApplier for Go by re-parsing the +// original file, walking to the line of the mutation, and mutating the +// matching AST node. The caller gets the rendered source bytes back — the +// mutation orchestrator is responsible for writing them to a temp file and +// invoking `go test -overlay`. +type mutantApplierImpl struct{} + +// ApplyMutation returns mutated source bytes, or (nil, nil) if the mutation +// can't be applied (parse error, line/operator mismatch, etc.). Returning a +// nil-without-error is the signal the orchestrator expects for "skip this +// mutant" — matching the pre-split behavior. +func (mutantApplierImpl) ApplyMutation(absPath string, site lang.MutantSite) ([]byte, error) { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, absPath, nil, parser.ParseComments) + if err != nil { + return nil, nil + } + + var applied bool + if site.Operator == "statement_deletion" { + applied = applyStatementDeletion(fset, f, site) + } else { + applied = applyMutationToAST(fset, f, site) + } + if !applied { + return nil, nil + } + return renderFile(fset, f), nil +} + +func applyMutationToAST(fset *token.FileSet, f *ast.File, m lang.MutantSite) bool { + applied := false + ast.Inspect(f, func(n ast.Node) bool { + if applied || n == nil { + return false + } + if fset.Position(n.Pos()).Line != m.Line { + return true + } + applied = tryApplyMutation(n, m) + return !applied + }) + return applied +} + +// applyStatementDeletion walks BlockStmts instead of the flat ast.Inspect +// used for other operators because it needs the containing block to replace +// a statement. +func applyStatementDeletion(fset *token.FileSet, f *ast.File, m lang.MutantSite) bool { + applied := false + ast.Inspect(f, func(n ast.Node) bool { + if applied { + return false + } + block, ok := n.(*ast.BlockStmt) + if !ok { + return true + } + if tryDeleteInBlock(fset, block, m) { + applied = true + return false + } + return true + }) + return applied +} + +func tryDeleteInBlock(fset *token.FileSet, block *ast.BlockStmt, m lang.MutantSite) bool { + for i, stmt := range block.List { + if fset.Position(stmt.Pos()).Line != m.Line { + continue + } + if _, ok := stmt.(*ast.ExprStmt); !ok { + continue + } + block.List[i] = &ast.EmptyStmt{Semicolon: stmt.Pos()} + return true + } + return false +} + +func tryApplyMutation(n ast.Node, m lang.MutantSite) bool { + switch m.Operator { + case "conditional_boundary", "negate_conditional", "math_operator": + return applyBinaryMutation(n, m) + case "boolean_substitution": + return applyBoolMutation(n, m) + case "return_value": + return applyReturnMutation(n) + case "incdec": + return applyIncDecMutation(n) + case "branch_removal": + return applyBranchRemoval(n) + } + return false +} + +func applyBinaryMutation(n ast.Node, m lang.MutantSite) bool { + expr, ok := n.(*ast.BinaryExpr) + if !ok { + return false + } + // Verify the operator matches the mutant description. Without this + // check, the walker would rewrite the first BinaryExpr it finds on + // the line — e.g. the outer `&&` in `a != nil && b`, or the outer + // `-` in `a + b - 1` — producing a no-op instead of the intended + // mutation and leaving a false-surviving mutant. + from, to := parseMutationOp(m.Description) + if to == token.ILLEGAL || expr.Op != from { + return false + } + expr.Op = to + return true +} + +func applyBoolMutation(n ast.Node, m lang.MutantSite) bool { + ident, ok := n.(*ast.Ident) + if !ok || (ident.Name != "true" && ident.Name != "false") { + return false + } + if strings.Contains(m.Description, "-> true") { + ident.Name = "true" + } else { + ident.Name = "false" + } + return true +} + +func applyReturnMutation(n ast.Node) bool { + ret, ok := n.(*ast.ReturnStmt) + if !ok { + return false + } + for i := range ret.Results { + ret.Results[i] = zeroValueExpr(ret.Results[i]) + } + return true +} + +func applyIncDecMutation(n ast.Node) bool { + stmt, ok := n.(*ast.IncDecStmt) + if !ok { + return false + } + switch stmt.Tok { + case token.INC: + stmt.Tok = token.DEC + case token.DEC: + stmt.Tok = token.INC + default: + return false + } + return true +} + +func applyBranchRemoval(n ast.Node) bool { + stmt, ok := n.(*ast.IfStmt) + if !ok || stmt.Body == nil { + return false + } + stmt.Body.List = nil + return true +} + +// parseMutationOp parses a mutant description of the form "X -> Y" into the +// (from, to) operator pair. Either token is ILLEGAL if parsing fails. +func parseMutationOp(desc string) (from, to token.Token) { + parts := strings.Split(desc, " -> ") + if len(parts) != 2 { + return token.ILLEGAL, token.ILLEGAL + } + opMap := map[string]token.Token{ + ">": token.GTR, ">=": token.GEQ, + "<": token.LSS, "<=": token.LEQ, + "==": token.EQL, "!=": token.NEQ, + "+": token.ADD, "-": token.SUB, + "*": token.MUL, "/": token.QUO, + } + fromOp, okFrom := opMap[parts[0]] + toOp, okTo := opMap[parts[1]] + if !okFrom || !okTo { + return token.ILLEGAL, token.ILLEGAL + } + return fromOp, toOp +} + +func zeroValueExpr(expr ast.Expr) ast.Expr { + return &ast.Ident{Name: "nil", NamePos: expr.Pos()} +} + +func renderFile(fset *token.FileSet, f *ast.File) []byte { + var buf bytes.Buffer + if err := printer.Fprint(&buf, fset, f); err != nil { + return nil + } + return buf.Bytes() +} diff --git a/internal/lang/goanalyzer/mutation_generate.go b/internal/lang/goanalyzer/mutation_generate.go new file mode 100644 index 0000000..8a4503b --- /dev/null +++ b/internal/lang/goanalyzer/mutation_generate.go @@ -0,0 +1,213 @@ +package goanalyzer + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// mutantGeneratorImpl implements lang.MutantGenerator for Go. The generation +// strategy is unchanged from the pre-split internal/mutation/generate.go — +// the only difference is that mutants are now returned as []lang.MutantSite +// so the mutation orchestrator can stay language-agnostic. +type mutantGeneratorImpl struct{} + +// GenerateMutants re-parses the file (with comments so annotation scanning +// can share the same AST) and emits a MutantSite for each operator that +// applies on a changed, non-disabled line. +func (mutantGeneratorImpl) GenerateMutants(absPath string, fc diff.FileChange, disabled map[int]bool) ([]lang.MutantSite, error) { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, absPath, nil, parser.ParseComments) + if err != nil { + return nil, err + } + + var mutants []lang.MutantSite + ast.Inspect(f, func(n ast.Node) bool { + if n == nil { + return true + } + line := fset.Position(n.Pos()).Line + if !fc.ContainsLine(line) || disabled[line] { + return true + } + mutants = append(mutants, mutantsFor(fc.Path, line, n)...) + return true + }) + return mutants, nil +} + +func mutantsFor(file string, line int, n ast.Node) []lang.MutantSite { + switch node := n.(type) { + case *ast.BinaryExpr: + return binaryMutants(file, line, node) + case *ast.Ident: + return boolMutants(file, line, node) + case *ast.ReturnStmt: + return returnMutants(file, line, node) + case *ast.IncDecStmt: + return incdecMutants(file, line, node) + case *ast.IfStmt: + return ifBodyMutants(file, line, node) + case *ast.ExprStmt: + return exprStmtMutants(file, line, node) + } + return nil +} + +// binaryMutants covers the conditional_boundary / negate_conditional / +// math_operator operators. Each source operator maps to a single canonical +// replacement; a surviving mutant should never be ambiguous about what +// "the mutation" was. +func binaryMutants(file string, line int, expr *ast.BinaryExpr) []lang.MutantSite { + replacements := map[token.Token][]token.Token{ + token.GTR: {token.GEQ}, + token.LSS: {token.LEQ}, + token.GEQ: {token.GTR}, + token.LEQ: {token.LSS}, + token.EQL: {token.NEQ}, + token.NEQ: {token.EQL}, + token.ADD: {token.SUB}, + token.SUB: {token.ADD}, + token.MUL: {token.QUO}, + token.QUO: {token.MUL}, + } + + targets, ok := replacements[expr.Op] + if !ok { + return nil + } + + var mutants []lang.MutantSite + for _, newOp := range targets { + mutants = append(mutants, lang.MutantSite{ + File: file, + Line: line, + Description: fmt.Sprintf("%s -> %s", expr.Op, newOp), + Operator: operatorName(expr.Op, newOp), + }) + } + return mutants +} + +// boolMutants generates true <-> false mutations. +func boolMutants(file string, line int, ident *ast.Ident) []lang.MutantSite { + if ident.Name != "true" && ident.Name != "false" { + return nil + } + newVal := "true" + if ident.Name == "true" { + newVal = "false" + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("%s -> %s", ident.Name, newVal), + Operator: "boolean_substitution", + }} +} + +// returnMutants generates zero-value return mutations. +// +// Returns whose every result is already the literal identifier `nil` are +// skipped: the zero-value mutation rewrites each result to `nil`, producing +// an identical AST and therefore an equivalent mutant that can never be +// killed. +func returnMutants(file string, line int, ret *ast.ReturnStmt) []lang.MutantSite { + if len(ret.Results) == 0 { + return nil + } + if allLiteralNil(ret.Results) { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "replace return values with zero values", + Operator: "return_value", + }} +} + +func allLiteralNil(exprs []ast.Expr) bool { + for _, e := range exprs { + ident, ok := e.(*ast.Ident) + if !ok || ident.Name != "nil" { + return false + } + } + return true +} + +// incdecMutants swaps ++ with -- and vice versa. +func incdecMutants(file string, line int, stmt *ast.IncDecStmt) []lang.MutantSite { + var newTok token.Token + switch stmt.Tok { + case token.INC: + newTok = token.DEC + case token.DEC: + newTok = token.INC + default: + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("%s -> %s", stmt.Tok, newTok), + Operator: "incdec", + }} +} + +// ifBodyMutants empties the body of an if statement. +func ifBodyMutants(file string, line int, stmt *ast.IfStmt) []lang.MutantSite { + if stmt.Body == nil || len(stmt.Body.List) == 0 { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "remove if body", + Operator: "branch_removal", + }} +} + +// exprStmtMutants deletes a bare function-call statement (discards side effects). +func exprStmtMutants(file string, line int, stmt *ast.ExprStmt) []lang.MutantSite { + if _, ok := stmt.X.(*ast.CallExpr); !ok { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "remove call statement", + Operator: "statement_deletion", + }} +} + +func operatorName(from, to token.Token) string { + switch { + case isBoundary(from) || isBoundary(to): + return "conditional_boundary" + case isComparison(from) || isComparison(to): + return "negate_conditional" + case isMath(from) || isMath(to): + return "math_operator" + default: + return "unknown" + } +} + +func isBoundary(t token.Token) bool { + return t == token.GTR || t == token.GEQ || t == token.LSS || t == token.LEQ +} + +func isComparison(t token.Token) bool { + return t == token.EQL || t == token.NEQ +} + +func isMath(t token.Token) bool { + return t == token.ADD || t == token.SUB || t == token.MUL || t == token.QUO +} diff --git a/internal/lang/goanalyzer/parse.go b/internal/lang/goanalyzer/parse.go new file mode 100644 index 0000000..e1e7c4d --- /dev/null +++ b/internal/lang/goanalyzer/parse.go @@ -0,0 +1,60 @@ +// Package goanalyzer implements the lang.Language interface for Go. It is +// blank-imported from cmd/diffguard/main.go so that Go gets registered at +// process start. +// +// One file per concern per the top-level design doc: +// - goanalyzer.go -- Language + init()/Register +// - parse.go -- shared AST helpers (funcName, parseFile) +// - complexity.go -- ComplexityCalculator + ComplexityScorer +// - sizes.go -- FunctionExtractor +// - deps.go -- ImportResolver +// - mutation_generate.go-- MutantGenerator +// - mutation_apply.go -- MutantApplier +// - mutation_annotate.go-- AnnotationScanner +// - testrunner.go -- TestRunner (wraps go test -overlay) +package goanalyzer + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" +) + +// funcName returns the canonical identifier for a function or method: +// +// func Foo() -> "Foo" +// func (t T) Bar() -> "(T).Bar" +// func (t *T) Baz() -> "(T).Baz" +// +// This was duplicated in complexity.go, sizes.go, and churn.go before the +// language split; it now lives here as the single shared implementation. +func funcName(fn *ast.FuncDecl) string { + if fn.Recv != nil && len(fn.Recv.List) > 0 { + recv := fn.Recv.List[0] + var typeName string + switch t := recv.Type.(type) { + case *ast.StarExpr: + if ident, ok := t.X.(*ast.Ident); ok { + typeName = ident.Name + } + case *ast.Ident: + typeName = t.Name + } + return fmt.Sprintf("(%s).%s", typeName, fn.Name.Name) + } + return fn.Name.Name +} + +// parseFile parses absPath with the given mode. Returning (nil, nil, err) on +// parse failure keeps callers uniform: the existing Go analyzers treated a +// parse error as "skip this file" rather than propagating it up, and we +// preserve that behavior behind the interface. +func parseFile(absPath string, mode parser.Mode) (*token.FileSet, *ast.File, error) { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, absPath, nil, mode) + if err != nil { + return nil, nil, err + } + return fset, f, nil +} diff --git a/internal/lang/goanalyzer/sizes.go b/internal/lang/goanalyzer/sizes.go new file mode 100644 index 0000000..5c58ff0 --- /dev/null +++ b/internal/lang/goanalyzer/sizes.go @@ -0,0 +1,51 @@ +package goanalyzer + +import ( + "go/ast" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// sizesImpl implements lang.FunctionExtractor for Go by parsing the file +// and reporting function line ranges plus the overall file line count. +type sizesImpl struct{} + +// ExtractFunctions parses absPath and returns (functions-in-changed-regions, +// file size, error). Parse errors return (nil, nil, nil) to match the +// pre-refactor behavior where parse failure silently skipped the file. +func (sizesImpl) ExtractFunctions(absPath string, fc diff.FileChange) ([]lang.FunctionSize, *lang.FileSize, error) { + fset, f, err := parseFile(absPath, 0) + if err != nil { + return nil, nil, nil + } + + var fileSize *lang.FileSize + if file := fset.File(f.Pos()); file != nil { + fileSize = &lang.FileSize{Path: fc.Path, Lines: file.LineCount()} + } + + var results []lang.FunctionSize + ast.Inspect(f, func(n ast.Node) bool { + fn, ok := n.(*ast.FuncDecl) + if !ok { + return true + } + startLine := fset.Position(fn.Pos()).Line + endLine := fset.Position(fn.End()).Line + if !fc.OverlapsRange(startLine, endLine) { + return false + } + results = append(results, lang.FunctionSize{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: startLine, + EndLine: endLine, + Name: funcName(fn), + }, + Lines: endLine - startLine + 1, + }) + return false + }) + return results, fileSize, nil +} diff --git a/internal/lang/goanalyzer/testrunner.go b/internal/lang/goanalyzer/testrunner.go new file mode 100644 index 0000000..090714b --- /dev/null +++ b/internal/lang/goanalyzer/testrunner.go @@ -0,0 +1,76 @@ +package goanalyzer + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// testRunnerImpl implements lang.TestRunner for Go using `go test -overlay`. +// The overlay mechanism lets mutants run fully in parallel — the build +// system picks up the mutant file without touching the real source — so +// this runner is stateless and safe to call concurrently. +type testRunnerImpl struct{} + +// RunTest writes a build-time overlay that redirects cfg.OriginalFile to +// cfg.MutantFile and invokes `go test` from the directory of the original +// file. A non-nil error from `go test` means at least one test failed — +// the mutant was killed. +// +// The returned (killed, output, err) triple matches the lang.TestRunner +// contract: err is the only error return for "the runner itself could not +// run" (e.g. couldn't write the overlay file); a normal test failure is +// reported via killed=true with the test output in `output`. +func (testRunnerImpl) RunTest(cfg lang.TestRunConfig) (bool, string, error) { + overlayPath := filepath.Join(cfg.WorkDir, fmt.Sprintf("m%d-overlay.json", cfg.Index)) + if err := writeOverlayJSON(overlayPath, cfg.OriginalFile, cfg.MutantFile); err != nil { + return false, "", err + } + + pkgDir := filepath.Dir(cfg.OriginalFile) + cmd := exec.Command("go", buildTestArgs(cfg, overlayPath)...) + cmd.Dir = pkgDir + var stderr bytes.Buffer + cmd.Stderr = &stderr + err := cmd.Run() + + if err != nil { + return true, stderr.String(), nil + } + return false, "", nil +} + +// writeOverlayJSON writes a go build overlay file mapping originalPath to +// mutantPath. See `go help build` -overlay flag for format details. +func writeOverlayJSON(path, originalPath, mutantPath string) error { + overlay := struct { + Replace map[string]string `json:"Replace"` + }{ + Replace: map[string]string{originalPath: mutantPath}, + } + data, err := json.Marshal(overlay) + if err != nil { + return err + } + return os.WriteFile(path, data, 0644) +} + +// buildTestArgs constructs the `go test` argv. The overlay argument is +// always present; -run is only added if the caller set TestPattern. +func buildTestArgs(cfg lang.TestRunConfig, overlayPath string) []string { + timeout := cfg.Timeout + if timeout <= 0 { + timeout = defaultGoTestTimeout + } + args := []string{"test", "-overlay=" + overlayPath, "-count=1", "-timeout", timeout.String()} + if cfg.TestPattern != "" { + args = append(args, "-run", cfg.TestPattern) + } + args = append(args, "./...") + return args +} From c3c24db48193be74073650eecfbdee5c83219ca4 Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 14:43:17 -0400 Subject: [PATCH 04/38] =?UTF-8?q?feat(lang):=20Part=20A3=20=E2=80=94=20par?= =?UTF-8?q?ameterize=20diff=20parser=20with=20FileFilter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hardcoded isAnalyzableGoFile, "*.go" diff glob, and the "+++" path suffix checks in internal/diff/diff.go with a caller-supplied diff.Filter. Parse() and CollectPaths() now take the filter as an explicit parameter; parseHunkHeader and parseUnifiedDiff are unchanged at the shape level but thread the filter through. cmd/diffguard/main.go looks up the Go language through the registry (lang.Get("go")) and converts its lang.FileFilter into the narrower diff.Filter shape via a small diffFilter() helper, so the diff package doesn't have to import lang (which would create an import cycle once analyzers plug through interfaces in A4). Blank-import _ "internal/lang/goanalyzer" lands in this commit so the init() registration fires; Part A4 deletes the old embedded AST code in the analyzer packages and routes through the interfaces instead. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/diffguard/main.go | 29 +++++++++-- internal/diff/diff.go | 87 ++++++++++++++++++++++---------- internal/diff/diff_extra_test.go | 38 ++++++++------ internal/diff/diff_parse_test.go | 8 +-- internal/diff/diff_test.go | 4 +- internal/diff/helpers_test.go | 16 ++++++ 6 files changed, 128 insertions(+), 54 deletions(-) create mode 100644 internal/diff/helpers_test.go diff --git a/cmd/diffguard/main.go b/cmd/diffguard/main.go index 95bffdb..e046115 100644 --- a/cmd/diffguard/main.go +++ b/cmd/diffguard/main.go @@ -13,6 +13,8 @@ import ( "github.com/0xPolygon/diffguard/internal/complexity" "github.com/0xPolygon/diffguard/internal/deps" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" "github.com/0xPolygon/diffguard/internal/mutation" "github.com/0xPolygon/diffguard/internal/report" "github.com/0xPolygon/diffguard/internal/sizes" @@ -77,7 +79,13 @@ type Config struct { } func run(repoPath string, cfg Config) error { - d, err := loadFiles(repoPath, cfg) + goLang, ok := lang.Get("go") + if !ok { + return fmt.Errorf("go analyzer not registered") + } + filter := diffFilter(goLang) + + d, err := loadFiles(repoPath, cfg, filter) if err != nil { return err } @@ -180,25 +188,38 @@ func checkExitCode(r report.Report, failOn string) error { return nil } -func loadFiles(repoPath string, cfg Config) (*diff.Result, error) { +func loadFiles(repoPath string, cfg Config, filter diff.Filter) (*diff.Result, error) { if cfg.Paths != "" { paths := strings.Split(cfg.Paths, ",") for i := range paths { paths[i] = strings.TrimSpace(paths[i]) } - d, err := diff.CollectPaths(repoPath, paths) + d, err := diff.CollectPaths(repoPath, paths, filter) if err != nil { return nil, fmt.Errorf("collecting paths: %w", err) } return d, nil } - d, err := diff.Parse(repoPath, cfg.BaseBranch) + d, err := diff.Parse(repoPath, cfg.BaseBranch, filter) if err != nil { return nil, fmt.Errorf("parsing diff: %w", err) } return d, nil } +// diffFilter converts a language's lang.FileFilter into the diff.Filter +// shape the parser expects. The two shapes are intentionally different: +// lang.FileFilter exposes the fields languages need to declare their +// territory (extensions, IsTestFile, DiffGlobs), while diff.Filter only +// carries what the parser itself reads on each file (Includes + DiffGlobs). +func diffFilter(l lang.Language) diff.Filter { + f := l.FileFilter() + return diff.Filter{ + DiffGlobs: f.DiffGlobs, + Includes: f.IncludesSource, + } +} + func detectBaseBranch(repoPath string) string { for _, branch := range []string{"develop", "main", "master"} { cmd := exec.Command("git", "rev-parse", "--verify", branch) diff --git a/internal/diff/diff.go b/internal/diff/diff.go index 74fcc8c..4b17085 100644 --- a/internal/diff/diff.go +++ b/internal/diff/diff.go @@ -49,7 +49,7 @@ func (fc FileChange) OverlapsRange(start, end int) bool { return false } -// Result holds all changed Go files parsed from a git diff. +// Result holds all changed source files parsed from a git diff. type Result struct { BaseBranch string Files []FileChange @@ -79,8 +79,35 @@ func (r Result) FilesByPackage() map[string][]FileChange { return m } -// Parse runs git diff against the given base branch and parses changed Go files. -func Parse(repoPath, baseBranch string) (*Result, error) { +// Filter describes the subset of the diff the caller cares about. It is a +// narrower shape than lang.FileFilter so the diff package doesn't have to +// import lang (which would pull the full analyzer stack). Callers (usually +// cmd/diffguard) construct a Filter from their chosen language's +// lang.FileFilter and pass it here. +type Filter struct { + // DiffGlobs is passed to `git diff -- ` to restrict the raw diff + // to language source files. + DiffGlobs []string + // Includes reports whether an analyzable source path (extension matches, + // not a test file) belongs to the caller's language. + Includes func(path string) bool +} + +// includes returns true iff the filter accepts the path. An empty filter +// (Includes == nil) defaults to accepting every path — but production +// callers always supply one. +func (f Filter) includes(path string) bool { + if f.Includes == nil { + return true + } + return f.Includes(path) +} + +// Parse runs `git diff` against the merge-base of baseBranch..HEAD and +// returns the changed files that pass the filter. The filter is also used to +// restrict the raw `git diff` output via -- globs so the parser never has to +// see files from other languages. +func Parse(repoPath, baseBranch string, filter Filter) (*Result, error) { mergeBaseCmd := exec.Command("git", "merge-base", baseBranch, "HEAD") mergeBaseCmd.Dir = repoPath mergeBaseOut, err := mergeBaseCmd.Output() @@ -89,14 +116,20 @@ func Parse(repoPath, baseBranch string) (*Result, error) { } mergeBase := strings.TrimSpace(string(mergeBaseOut)) - cmd := exec.Command("git", "diff", "-U0", mergeBase, "--", "*.go") + args := []string{"diff", "-U0", mergeBase} + if len(filter.DiffGlobs) > 0 { + args = append(args, "--") + args = append(args, filter.DiffGlobs...) + } + + cmd := exec.Command("git", args...) cmd.Dir = repoPath out, err := cmd.Output() if err != nil { return nil, fmt.Errorf("git diff failed: %w", err) } - files, err := parseUnifiedDiff(string(out)) + files, err := parseUnifiedDiff(string(out), filter) if err != nil { return nil, err } @@ -107,18 +140,19 @@ func Parse(repoPath, baseBranch string) (*Result, error) { }, nil } -// CollectPaths builds a Result by treating each .go file under the given -// paths as fully changed. Useful for refactoring mode where you want to -// analyze entire files rather than diffed regions only. +// CollectPaths builds a Result by treating each analyzable source file under +// the given paths as fully changed. Useful for refactoring mode where you +// want to analyze entire files rather than diffed regions only. // // paths may contain individual files or directories (walked recursively). -// Test files (_test.go) are excluded to match Parse's behavior. -func CollectPaths(repoPath string, paths []string) (*Result, error) { +// Files that fail filter.Includes are excluded — test files and non-source +// files never show up in the result. +func CollectPaths(repoPath string, paths []string, filter Filter) (*Result, error) { var files []FileChange seen := make(map[string]bool) for _, p := range paths { - if err := collectPath(repoPath, p, &files, seen); err != nil { + if err := collectPath(repoPath, p, filter, &files, seen); err != nil { return nil, err } } @@ -126,7 +160,7 @@ func CollectPaths(repoPath string, paths []string) (*Result, error) { return &Result{Files: files}, nil } -func collectPath(repoPath, p string, files *[]FileChange, seen map[string]bool) error { +func collectPath(repoPath, p string, filter Filter, files *[]FileChange, seen map[string]bool) error { absPath := p if !filepath.IsAbs(p) { absPath = filepath.Join(repoPath, p) @@ -136,25 +170,25 @@ func collectPath(repoPath, p string, files *[]FileChange, seen map[string]bool) return fmt.Errorf("stat %s: %w", p, err) } if info.IsDir() { - return collectDir(repoPath, absPath, files, seen) + return collectDir(repoPath, absPath, filter, files, seen) } - return addFile(repoPath, absPath, files, seen) + return addFile(repoPath, absPath, filter, files, seen) } -func collectDir(repoPath, absPath string, files *[]FileChange, seen map[string]bool) error { +func collectDir(repoPath, absPath string, filter Filter, files *[]FileChange, seen map[string]bool) error { return filepath.WalkDir(absPath, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } - if d.IsDir() || !isAnalyzableGoFile(path) { + if d.IsDir() || !filter.includes(path) { return nil } - return addFile(repoPath, path, files, seen) + return addFile(repoPath, path, filter, files, seen) }) } -func addFile(repoPath, absPath string, files *[]FileChange, seen map[string]bool) error { - if !isAnalyzableGoFile(absPath) { +func addFile(repoPath, absPath string, filter Filter, files *[]FileChange, seen map[string]bool) error { + if !filter.includes(absPath) { return nil } rel, err := filepath.Rel(repoPath, absPath) @@ -172,12 +206,9 @@ func addFile(repoPath, absPath string, files *[]FileChange, seen map[string]bool return nil } -func isAnalyzableGoFile(path string) bool { - return strings.HasSuffix(path, ".go") && !strings.HasSuffix(path, "_test.go") -} - -// parseUnifiedDiff parses the output of git diff -U0 into FileChange entries. -func parseUnifiedDiff(diffOutput string) ([]FileChange, error) { +// parseUnifiedDiff parses the output of git diff -U0 into FileChange entries, +// dropping files that don't match filter.Includes. +func parseUnifiedDiff(diffOutput string, filter Filter) ([]FileChange, error) { var files []FileChange var current *FileChange @@ -186,7 +217,7 @@ func parseUnifiedDiff(diffOutput string) ([]FileChange, error) { line := scanner.Text() if strings.HasPrefix(line, "+++ b/") { - current = handleFileLine(line, &files) + current = handleFileLine(line, filter, &files) continue } @@ -198,9 +229,9 @@ func parseUnifiedDiff(diffOutput string) ([]FileChange, error) { return files, scanner.Err() } -func handleFileLine(line string, files *[]FileChange) *FileChange { +func handleFileLine(line string, filter Filter, files *[]FileChange) *FileChange { path := strings.TrimPrefix(line, "+++ b/") - if !strings.HasSuffix(path, ".go") || strings.HasSuffix(path, "_test.go") { + if !filter.includes(path) { return nil } *files = append(*files, FileChange{Path: path}) diff --git a/internal/diff/diff_extra_test.go b/internal/diff/diff_extra_test.go index 62dec06..f322f5b 100644 --- a/internal/diff/diff_extra_test.go +++ b/internal/diff/diff_extra_test.go @@ -11,7 +11,7 @@ func TestCollectPaths_SingleFile(t *testing.T) { fp := filepath.Join(dir, "foo.go") os.WriteFile(fp, []byte("package x\n\nfunc f() {}\n"), 0644) - r, err := CollectPaths(dir, []string{"foo.go"}) + r, err := CollectPaths(dir, []string{"foo.go"}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -38,7 +38,7 @@ func TestCollectPaths_SkipsTestFiles(t *testing.T) { os.WriteFile(filepath.Join(dir, "foo.go"), []byte("package x\n"), 0644) os.WriteFile(filepath.Join(dir, "foo_test.go"), []byte("package x\n"), 0644) - r, err := CollectPaths(dir, []string{"foo_test.go"}) + r, err := CollectPaths(dir, []string{"foo_test.go"}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -56,7 +56,7 @@ func TestCollectPaths_Directory(t *testing.T) { os.WriteFile(filepath.Join(dir, "README.md"), []byte("readme\n"), 0644) os.WriteFile(filepath.Join(dir, "sub", "c.go"), []byte("package x\n"), 0644) - r, err := CollectPaths(dir, []string{"."}) + r, err := CollectPaths(dir, []string{"."}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -68,7 +68,7 @@ func TestCollectPaths_Directory(t *testing.T) { func TestCollectPaths_NonexistentPath(t *testing.T) { dir := t.TempDir() - _, err := CollectPaths(dir, []string{"nonexistent.go"}) + _, err := CollectPaths(dir, []string{"nonexistent.go"}, goFilter()) if err == nil { t.Error("expected error for nonexistent path") } @@ -81,7 +81,7 @@ func TestCollectPaths_MultiplePaths(t *testing.T) { os.WriteFile(filepath.Join(dir, "pkg1", "a.go"), []byte("package pkg1\n"), 0644) os.WriteFile(filepath.Join(dir, "pkg2", "b.go"), []byte("package pkg2\n"), 0644) - r, err := CollectPaths(dir, []string{"pkg1", "pkg2"}) + r, err := CollectPaths(dir, []string{"pkg1", "pkg2"}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -95,7 +95,7 @@ func TestCollectPaths_Deduplicates(t *testing.T) { os.WriteFile(filepath.Join(dir, "a.go"), []byte("package x\n"), 0644) // Pass the same file via both file path and dir - r, err := CollectPaths(dir, []string{"a.go", "."}) + r, err := CollectPaths(dir, []string{"a.go", "."}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -108,7 +108,7 @@ func TestCollectPaths_SkipsNonGoFile(t *testing.T) { dir := t.TempDir() os.WriteFile(filepath.Join(dir, "notes.txt"), []byte("notes"), 0644) - r, err := CollectPaths(dir, []string{"notes.txt"}) + r, err := CollectPaths(dir, []string{"notes.txt"}, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -117,7 +117,13 @@ func TestCollectPaths_SkipsNonGoFile(t *testing.T) { } } -func TestIsAnalyzableGoFile(t *testing.T) { +// TestFilter_IncludesGoFile exercises the path the diff parser takes when +// deciding whether to admit a file from `git diff` output. The old +// hardcoded isAnalyzableGoFile function is gone; the same semantic check +// now lives in the caller-supplied Filter.Includes, and this test locks in +// that Filter.includes() routes through it correctly. +func TestFilter_IncludesGoFile(t *testing.T) { + filter := goFilter() tests := []struct { path string want bool @@ -129,8 +135,8 @@ func TestIsAnalyzableGoFile(t *testing.T) { {"path/to/foo_test.go", false}, } for _, tt := range tests { - if got := isAnalyzableGoFile(tt.path); got != tt.want { - t.Errorf("isAnalyzableGoFile(%q) = %v, want %v", tt.path, got, tt.want) + if got := filter.includes(tt.path); got != tt.want { + t.Errorf("filter.includes(%q) = %v, want %v", tt.path, got, tt.want) } } } @@ -145,7 +151,7 @@ func filenames(files []FileChange) []string { func TestHandleFileLine_GoFile(t *testing.T) { var files []FileChange - result := handleFileLine("+++ b/pkg/handler.go", &files) + result := handleFileLine("+++ b/pkg/handler.go", goFilter(), &files) if result == nil { t.Fatal("expected non-nil result for .go file") } @@ -159,7 +165,7 @@ func TestHandleFileLine_GoFile(t *testing.T) { func TestHandleFileLine_TestFile(t *testing.T) { var files []FileChange - result := handleFileLine("+++ b/pkg/handler_test.go", &files) + result := handleFileLine("+++ b/pkg/handler_test.go", goFilter(), &files) if result != nil { t.Error("expected nil for test file") } @@ -170,7 +176,7 @@ func TestHandleFileLine_TestFile(t *testing.T) { func TestHandleFileLine_NonGoFile(t *testing.T) { var files []FileChange - result := handleFileLine("+++ b/README.md", &files) + result := handleFileLine("+++ b/README.md", goFilter(), &files) if result != nil { t.Error("expected nil for non-Go file") } @@ -263,7 +269,7 @@ func TestParseUnifiedDiff_NonGoFile(t *testing.T) { @@ -1,0 +1,5 @@ +new content ` - files, err := parseUnifiedDiff(input) + files, err := parseUnifiedDiff(input, goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -273,7 +279,7 @@ func TestParseUnifiedDiff_NonGoFile(t *testing.T) { } func TestParseUnifiedDiff_EmptyInput(t *testing.T) { - files, err := parseUnifiedDiff("") + files, err := parseUnifiedDiff("", goFilter()) if err != nil { t.Fatalf("error: %v", err) } @@ -361,7 +367,7 @@ diff --git a/b.go b/b.go @@ -10,0 +11,3 @@ +new code ` - files, err := parseUnifiedDiff(input) + files, err := parseUnifiedDiff(input, goFilter()) if err != nil { t.Fatalf("error: %v", err) } diff --git a/internal/diff/diff_parse_test.go b/internal/diff/diff_parse_test.go index 2b6f13f..abbec47 100644 --- a/internal/diff/diff_parse_test.go +++ b/internal/diff/diff_parse_test.go @@ -29,7 +29,7 @@ func runGit(t *testing.T, dir string, args ...string) { func TestParse_NotGitRepo(t *testing.T) { dir := t.TempDir() - _, err := Parse(dir, "main") + _, err := Parse(dir, "main", goFilter()) if err == nil { t.Fatal("expected error when running Parse outside a git repo") } @@ -47,7 +47,7 @@ func TestParse_MissingBaseBranch(t *testing.T) { runGit(t, dir, "add", ".") runGit(t, dir, "commit", "-q", "-m", "init") - _, err := Parse(dir, "no-such-branch") + _, err := Parse(dir, "no-such-branch", goFilter()) if err == nil { t.Fatal("expected error for nonexistent base branch") } @@ -71,7 +71,7 @@ func TestParse_SuccessDetectsChangedGoFile(t *testing.T) { runGit(t, dir, "add", ".") runGit(t, dir, "commit", "-q", "-m", "add new.go") - result, err := Parse(dir, "main") + result, err := Parse(dir, "main", goFilter()) if err != nil { t.Fatalf("Parse error: %v", err) } @@ -102,7 +102,7 @@ func TestParse_IgnoresTestFiles(t *testing.T) { runGit(t, dir, "add", ".") runGit(t, dir, "commit", "-q", "-m", "add test") - result, err := Parse(dir, "main") + result, err := Parse(dir, "main", goFilter()) if err != nil { t.Fatalf("Parse error: %v", err) } diff --git a/internal/diff/diff_test.go b/internal/diff/diff_test.go index c0b1cd7..7531fba 100644 --- a/internal/diff/diff_test.go +++ b/internal/diff/diff_test.go @@ -30,7 +30,7 @@ diff --git a/pkg/handler/routes_test.go b/pkg/handler/routes_test.go +test file should be skipped ` - files, err := parseUnifiedDiff(input) + files, err := parseUnifiedDiff(input, goFilter()) if err != nil { t.Fatalf("parseUnifiedDiff error: %v", err) } @@ -69,7 +69,7 @@ func TestParseUnifiedDiff_PureDeletion(t *testing.T) { @@ -10,5 +10,0 @@ ` - files, err := parseUnifiedDiff(input) + files, err := parseUnifiedDiff(input, goFilter()) if err != nil { t.Fatalf("parseUnifiedDiff error: %v", err) } diff --git a/internal/diff/helpers_test.go b/internal/diff/helpers_test.go new file mode 100644 index 0000000..5b47ffa --- /dev/null +++ b/internal/diff/helpers_test.go @@ -0,0 +1,16 @@ +package diff + +import "strings" + +// goFilter returns a minimal Filter matching the old hardcoded Go behavior: +// includes any path ending in .go except _test.go. Used by the in-package +// tests so they exercise the filter parameter without pulling in the +// goanalyzer package (which would create a test-time import cycle). +func goFilter() Filter { + return Filter{ + DiffGlobs: []string{"*.go"}, + Includes: func(path string) bool { + return strings.HasSuffix(path, ".go") && !strings.HasSuffix(path, "_test.go") + }, + } +} From afaad02ddb31372fa025505b4ed307875e14509c Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 14:50:49 -0400 Subject: [PATCH 05/38] =?UTF-8?q?feat(lang):=20Part=20A4=20=E2=80=94=20rou?= =?UTF-8?q?te=20analyzers=20through=20language=20interfaces?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each analyzer package now takes the relevant lang.* interface as a parameter and orchestrates it; the embedded Go-AST walks are gone. - internal/complexity: take lang.ComplexityCalculator, delete the AST walker (moved to goanalyzer/complexity.go; walker unit tests moved to goanalyzer/complexity_walker_test.go). - internal/sizes: take lang.FunctionExtractor, delete analyzeFile / collectFunctionSizes / funcName. - internal/churn: take lang.ComplexityScorer, delete the simplified duplicate computeComplexity; keep git log --oneline counting (language-agnostic). - internal/deps: split into graph.go (pure algorithms) and deps.go (orchestration via lang.ImportResolver). detectModulePath + scanPackageImports + collectImports move to goanalyzer/deps.go. - internal/mutation: route through the four mutation-related interfaces (MutantGenerator, MutantApplier, AnnotationScanner, TestRunner). Delete apply.go, generate.go, annotations.go; tiers.go untouched. go test -overlay scaffolding (writeOverlayJSON, buildTestArgs) moves to goanalyzer/testrunner.go. cmd/diffguard/main.go pulls the Go language out of the registry and threads it into each analyzer call. Tests migrated with their code: AST-level tests live next to the AST code in goanalyzer/*_test.go; orchestration tests stay in the analyzer packages but exercise the Go back-end via the registry (blank import). Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/diffguard/main.go | 14 +- internal/churn/churn.go | 112 +--- internal/churn/churn_test.go | 109 +--- internal/complexity/complexity.go | 240 +------- internal/complexity/complexity_extra_test.go | 552 ------------------ internal/complexity/complexity_test.go | 360 ++++++------ internal/deps/deps.go | 220 +------ internal/deps/graph.go | 167 ++++++ .../lang/goanalyzer/complexity_walker_test.go | 245 ++++++++ .../lang/goanalyzer/mutation_annotate_test.go | 138 +++++ .../lang/goanalyzer/mutation_apply_test.go | 345 +++++++++++ .../lang/goanalyzer/mutation_generate_test.go | 313 ++++++++++ internal/lang/goanalyzer/testrunner_test.go | 75 +++ internal/mutation/annotations.go | 86 --- internal/mutation/apply.go | 200 ------- internal/mutation/features_test.go | 374 ------------ internal/mutation/generate.go | 211 ------- internal/mutation/mutation.go | 157 ++--- internal/mutation/mutation_extra_test.go | 369 +----------- internal/mutation/mutation_test.go | 191 ++---- internal/sizes/sizes.go | 105 +--- internal/sizes/sizes_test.go | 207 +------ 22 files changed, 1741 insertions(+), 3049 deletions(-) delete mode 100644 internal/complexity/complexity_extra_test.go create mode 100644 internal/deps/graph.go create mode 100644 internal/lang/goanalyzer/complexity_walker_test.go create mode 100644 internal/lang/goanalyzer/mutation_annotate_test.go create mode 100644 internal/lang/goanalyzer/mutation_apply_test.go create mode 100644 internal/lang/goanalyzer/mutation_generate_test.go create mode 100644 internal/lang/goanalyzer/testrunner_test.go delete mode 100644 internal/mutation/annotations.go delete mode 100644 internal/mutation/apply.go delete mode 100644 internal/mutation/features_test.go delete mode 100644 internal/mutation/generate.go diff --git a/cmd/diffguard/main.go b/cmd/diffguard/main.go index e046115..af87fdd 100644 --- a/cmd/diffguard/main.go +++ b/cmd/diffguard/main.go @@ -97,7 +97,7 @@ func run(repoPath string, cfg Config) error { announceRun(d, cfg) - sections, err := runAnalyses(repoPath, d, cfg) + sections, err := runAnalyses(repoPath, d, cfg, goLang) if err != nil { return err } @@ -117,35 +117,35 @@ func announceRun(d *diff.Result, cfg Config) { } } -func runAnalyses(repoPath string, d *diff.Result, cfg Config) ([]report.Section, error) { +func runAnalyses(repoPath string, d *diff.Result, cfg Config, l lang.Language) ([]report.Section, error) { var sections []report.Section - complexitySection, err := complexity.Analyze(repoPath, d, cfg.ComplexityThreshold) + complexitySection, err := complexity.Analyze(repoPath, d, cfg.ComplexityThreshold, l.ComplexityCalculator()) if err != nil { return nil, fmt.Errorf("complexity analysis: %w", err) } sections = append(sections, complexitySection) - sizesSection, err := sizes.Analyze(repoPath, d, cfg.FunctionSizeThreshold, cfg.FileSizeThreshold) + sizesSection, err := sizes.Analyze(repoPath, d, cfg.FunctionSizeThreshold, cfg.FileSizeThreshold, l.FunctionExtractor()) if err != nil { return nil, fmt.Errorf("size analysis: %w", err) } sections = append(sections, sizesSection) - depsSection, err := deps.Analyze(repoPath, d) + depsSection, err := deps.Analyze(repoPath, d, l.ImportResolver()) if err != nil { return nil, fmt.Errorf("dependency analysis: %w", err) } sections = append(sections, depsSection) - churnSection, err := churn.Analyze(repoPath, d, cfg.ComplexityThreshold) + churnSection, err := churn.Analyze(repoPath, d, cfg.ComplexityThreshold, l.ComplexityScorer()) if err != nil { return nil, fmt.Errorf("churn analysis: %w", err) } sections = append(sections, churnSection) if !cfg.SkipMutation { - mutationSection, err := mutation.Analyze(repoPath, d, mutation.Options{ + mutationSection, err := mutation.Analyze(repoPath, d, l, mutation.Options{ SampleRate: cfg.MutationSampleRate, TestTimeout: cfg.TestTimeout, TestPattern: cfg.TestPattern, diff --git a/internal/churn/churn.go b/internal/churn/churn.go index 9d5fd8d..c3be9a1 100644 --- a/internal/churn/churn.go +++ b/internal/churn/churn.go @@ -1,11 +1,13 @@ +// Package churn cross-references git log with per-function complexity scores +// using a language-supplied lang.ComplexityScorer. The AST-level work lives +// in the language back-end (for Go: goanalyzer/complexity.go); this file +// owns the git log counting (which is language-agnostic) and the severity +// derivation. package churn import ( "bufio" "fmt" - "go/ast" - "go/parser" - "go/token" "os/exec" "path/filepath" "sort" @@ -13,6 +15,7 @@ import ( "strings" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" ) @@ -26,10 +29,14 @@ type FunctionChurn struct { Score float64 } -// Analyze cross-references git log with cognitive complexity for changed functions. -func Analyze(repoPath string, d *diff.Result, complexityThreshold int) (report.Section, error) { +// Analyze cross-references git log with per-function complexity scores for +// the diff's changed files. +func Analyze(repoPath string, d *diff.Result, complexityThreshold int, scorer lang.ComplexityScorer) (report.Section, error) { fileCommits := collectFileCommits(repoPath, d.Files) - results := collectChurnResults(repoPath, d.Files, fileCommits) + results, err := collectChurnResults(repoPath, d.Files, fileCommits, scorer) + if err != nil { + return report.Section{}, err + } return buildSection(results, complexityThreshold), nil } @@ -41,49 +48,37 @@ func collectFileCommits(repoPath string, files []diff.FileChange) map[string]int return commits } -func collectChurnResults(repoPath string, files []diff.FileChange, fileCommits map[string]int) []FunctionChurn { +func collectChurnResults(repoPath string, files []diff.FileChange, fileCommits map[string]int, scorer lang.ComplexityScorer) ([]FunctionChurn, error) { var results []FunctionChurn for _, fc := range files { - results = append(results, analyzeFileChurn(repoPath, fc, fileCommits[fc.Path])...) + fnResults, err := analyzeFileChurn(repoPath, fc, fileCommits[fc.Path], scorer) + if err != nil { + return nil, fmt.Errorf("analyzing %s: %w", fc.Path, err) + } + results = append(results, fnResults...) } - return results + return results, nil } -func analyzeFileChurn(repoPath string, fc diff.FileChange, commits int) []FunctionChurn { +func analyzeFileChurn(repoPath string, fc diff.FileChange, commits int, scorer lang.ComplexityScorer) ([]FunctionChurn, error) { absPath := filepath.Join(repoPath, fc.Path) - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, absPath, nil, 0) + scores, err := scorer.ScoreFile(absPath, fc) if err != nil { - return nil + return nil, err } - var results []FunctionChurn - ast.Inspect(f, func(n ast.Node) bool { - fn, ok := n.(*ast.FuncDecl) - if !ok { - return true - } - - startLine := fset.Position(fn.Pos()).Line - endLine := fset.Position(fn.End()).Line - - if !fc.OverlapsRange(startLine, endLine) { - return false - } - - complexity := computeComplexity(fn.Body) + results := make([]FunctionChurn, 0, len(scores)) + for _, s := range scores { results = append(results, FunctionChurn{ - File: fc.Path, - Line: startLine, - Name: funcName(fn), + File: s.File, + Line: s.Line, + Name: s.Name, Commits: commits, - Complexity: complexity, - Score: float64(commits) * float64(complexity), + Complexity: s.Complexity, + Score: float64(commits) * float64(s.Complexity), }) - - return false - }) - return results + } + return results, nil } // countFileCommits counts the total number of commits that touched a file. @@ -100,52 +95,9 @@ func countFileCommits(repoPath, filePath string) int { for scanner.Scan() { count++ } - - return count -} - -// computeComplexity is a simplified cognitive complexity counter. -func computeComplexity(body *ast.BlockStmt) int { - if body == nil { - return 0 - } - var count int - ast.Inspect(body, func(n ast.Node) bool { - switch n.(type) { - case *ast.IfStmt: - count++ - case *ast.ForStmt, *ast.RangeStmt: - count++ - case *ast.SwitchStmt, *ast.TypeSwitchStmt, *ast.SelectStmt: - count++ - case *ast.BinaryExpr: - bin := n.(*ast.BinaryExpr) - if bin.Op == token.LAND || bin.Op == token.LOR { - count++ - } - } - return true - }) return count } -func funcName(fn *ast.FuncDecl) string { - if fn.Recv != nil && len(fn.Recv.List) > 0 { - recv := fn.Recv.List[0] - var typeName string - switch t := recv.Type.(type) { - case *ast.StarExpr: - if ident, ok := t.X.(*ast.Ident); ok { - typeName = ident.Name - } - case *ast.Ident: - typeName = t.Name - } - return fmt.Sprintf("(%s).%s", typeName, fn.Name.Name) - } - return fn.Name.Name -} - func collectChurnFindings(results []FunctionChurn, complexityThreshold int) ([]report.Finding, int) { var findings []report.Finding var warnCount int diff --git a/internal/churn/churn_test.go b/internal/churn/churn_test.go index 2e367ad..927560d 100644 --- a/internal/churn/churn_test.go +++ b/internal/churn/churn_test.go @@ -1,63 +1,23 @@ package churn import ( - "go/ast" - "go/parser" - "go/token" "os" "path/filepath" "testing" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" "github.com/0xPolygon/diffguard/internal/report" ) -func TestComputeComplexity(t *testing.T) { - tests := []struct { - name string - code string - expected int - }{ - {"empty", `package p; func f() {}`, 0}, - {"single if", `package p; func f(x int) { if x > 0 {} }`, 1}, - {"for loop", `package p; func f() { for i := 0; i < 10; i++ {} }`, 1}, - {"switch", `package p; func f(x int) { switch x { case 1: } }`, 1}, - {"range", `package p; func f(s []int) { for range s {} }`, 1}, - {"select", `package p; func f(c chan int) { select { case <-c: } }`, 1}, - {"type switch", `package p; func f(x any) { switch x.(type) { case int: } }`, 1}, - {"logical and", `package p; func f(a, b bool) { if a && b {} }`, 2}, - {"logical or", `package p; func f(a, b bool) { if a || b {} }`, 2}, - {"nested", `package p; func f(x int) { if x > 0 { for x > 0 {} } }`, 2}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", tt.code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } - - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - - got := computeComplexity(fn.Body) - if got != tt.expected { - t.Errorf("computeComplexity = %d, want %d", got, tt.expected) - } - }) - } -} - -func TestComputeComplexity_NilBody(t *testing.T) { - if got := computeComplexity(nil); got != 0 { - t.Errorf("computeComplexity(nil) = %d, want 0", got) +func goScorer(t *testing.T) lang.ComplexityScorer { + t.Helper() + l, ok := lang.Get("go") + if !ok { + t.Fatal("go language not registered") } + return l.ComplexityScorer() } func TestCollectChurnFindings(t *testing.T) { @@ -81,7 +41,6 @@ func TestCollectChurnFindings(t *testing.T) { } func TestCollectChurnFindings_LimitExceeds(t *testing.T) { - // Fewer results than limit of 10 results := []FunctionChurn{ {File: "a.go", Score: 5, Commits: 1, Complexity: 5}, } @@ -92,16 +51,14 @@ func TestCollectChurnFindings_LimitExceeds(t *testing.T) { } func TestCollectChurnFindings_BoundaryCondition(t *testing.T) { - // Exactly at threshold — should NOT warn results := []FunctionChurn{ {File: "a.go", Score: 60, Commits: 6, Complexity: 10}, } _, warnCount := collectChurnFindings(results, 10) if warnCount != 0 { - t.Errorf("warnCount = %d, want 0 (complexity at threshold, not over)", warnCount) + t.Errorf("warnCount = %d, want 0", warnCount) } - // Over threshold and commits > 5 — should warn results2 := []FunctionChurn{ {File: "a.go", Score: 66, Commits: 6, Complexity: 11}, } @@ -171,29 +128,6 @@ func TestFormatTopScore(t *testing.T) { } } -func TestFuncName(t *testing.T) { - tests := []struct { - code string - expected string - }{ - {`package p; func Foo() {}`, "Foo"}, - {`package p; type T struct{}; func (t T) Bar() {}`, "(T).Bar"}, - {`package p; type T struct{}; func (t *T) Baz() {}`, "(T).Baz"}, - } - - for _, tt := range tests { - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", tt.code, 0) - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - if got := funcName(fd); got != tt.expected { - t.Errorf("funcName = %q, want %q", got, tt.expected) - } - } - } - } -} - func TestAnalyzeFileChurn(t *testing.T) { code := `package test @@ -218,7 +152,10 @@ func complex_fn(a, b int) int { Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, } - results := analyzeFileChurn(dir, fc, 5) + results, err := analyzeFileChurn(dir, fc, 5, goScorer(t)) + if err != nil { + t.Fatalf("analyzeFileChurn: %v", err) + } if len(results) != 2 { t.Fatalf("expected 2 results, got %d", len(results)) } @@ -232,25 +169,10 @@ func complex_fn(a, b int) int { } } -func TestAnalyzeFileChurn_ParseError(t *testing.T) { - dir := t.TempDir() - fc := diff.FileChange{ - Path: "nonexistent.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 10}}, - } - - results := analyzeFileChurn(dir, fc, 0) - if results != nil { - t.Error("expected nil for parse error") - } -} - func TestCollectFileCommits(t *testing.T) { - // Use the actual repo to test files := []diff.FileChange{ {Path: "internal/churn/churn.go"}, } - // This will either work or return 0, both are valid commits := collectFileCommits("../..", files) if commits == nil { t.Error("expected non-nil map") @@ -271,7 +193,10 @@ func f() {} } commits := map[string]int{"test.go": 3} - results := collectChurnResults(dir, files, commits) + results, err := collectChurnResults(dir, files, commits, goScorer(t)) + if err != nil { + t.Fatalf("collectChurnResults: %v", err) + } if len(results) != 1 { t.Fatalf("expected 1 result, got %d", len(results)) } diff --git a/internal/complexity/complexity.go b/internal/complexity/complexity.go index 9bf4252..c3bf9a8 100644 --- a/internal/complexity/complexity.go +++ b/internal/complexity/complexity.go @@ -1,235 +1,43 @@ +// Package complexity runs a language's ComplexityCalculator across a diff's +// changed files and formats the results into a report.Section. +// +// All AST-level work happens in the language back-end (for Go: +// internal/lang/goanalyzer/complexity.go). This package is now a thin +// orchestrator — threshold check, severity derivation, per-language stats +// summary — so new languages inherit the analyzer for free by implementing +// lang.ComplexityCalculator. package complexity import ( "fmt" - "go/ast" - "go/parser" - "go/token" "math" "path/filepath" "sort" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" ) -// FunctionComplexity holds the complexity result for a single function. -type FunctionComplexity struct { - File string - Line int - Name string - Complexity int -} - -// Analyze computes cognitive complexity for all functions in changed regions of the diff. -func Analyze(repoPath string, d *diff.Result, threshold int) (report.Section, error) { - var results []FunctionComplexity - +// Analyze computes cognitive complexity for all functions in the diff's +// changed regions using the supplied language calculator, then produces the +// "Cognitive Complexity" report section. Parse errors are swallowed at the +// calculator layer (returning nil) so a single malformed file doesn't fail +// the whole run. +func Analyze(repoPath string, d *diff.Result, threshold int, calc lang.ComplexityCalculator) (report.Section, error) { + var results []lang.FunctionComplexity for _, fc := range d.Files { - results = append(results, analyzeFile(repoPath, fc)...) - } - - return buildSection(results, threshold), nil -} - -func analyzeFile(repoPath string, fc diff.FileChange) []FunctionComplexity { - absPath := filepath.Join(repoPath, fc.Path) - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, absPath, nil, 0) - if err != nil { - return nil - } - - var results []FunctionComplexity - ast.Inspect(f, func(n ast.Node) bool { - fn, ok := n.(*ast.FuncDecl) - if !ok { - return true - } - - startLine := fset.Position(fn.Pos()).Line - endLine := fset.Position(fn.End()).Line - - if !fc.OverlapsRange(startLine, endLine) { - return false - } - - results = append(results, FunctionComplexity{ - File: fc.Path, - Line: startLine, - Name: funcName(fn), - Complexity: computeComplexity(fn.Body), - }) - - return false - }) - return results -} - -// computeComplexity calculates cognitive complexity of a function body. -func computeComplexity(body *ast.BlockStmt) int { - if body == nil { - return 0 - } - return walkBlock(body.List, 0) -} - -func walkBlock(stmts []ast.Stmt, nesting int) int { - total := 0 - for _, stmt := range stmts { - total += walkStmt(stmt, nesting) - } - return total -} - -func walkStmt(stmt ast.Stmt, nesting int) int { - switch s := stmt.(type) { - case *ast.IfStmt: - return walkIfStmt(s, nesting) - case *ast.ForStmt: - return walkForStmt(s, nesting) - case *ast.RangeStmt: - return 1 + nesting + walkBlock(s.Body.List, nesting+1) - case *ast.SwitchStmt: - return 1 + nesting + walkBlock(s.Body.List, nesting+1) - case *ast.TypeSwitchStmt: - return 1 + nesting + walkBlock(s.Body.List, nesting+1) - case *ast.SelectStmt: - return 1 + nesting + walkBlock(s.Body.List, nesting+1) - case *ast.CaseClause: - return walkBlock(s.Body, nesting) - case *ast.CommClause: - return walkBlock(s.Body, nesting) - case *ast.BlockStmt: - return walkBlock(s.List, nesting) - case *ast.LabeledStmt: - return walkStmt(s.Stmt, nesting) - case *ast.AssignStmt: - return walkExprsForFuncLit(s.Rhs, nesting) - case *ast.ExprStmt: - return walkExprForFuncLit(s.X, nesting) - case *ast.ReturnStmt: - return walkExprsForFuncLit(s.Results, nesting) - case *ast.GoStmt: - return walkExprForFuncLit(s.Call.Fun, nesting) - case *ast.DeferStmt: - return walkExprForFuncLit(s.Call.Fun, nesting) - } - return 0 -} - -func walkIfStmt(s *ast.IfStmt, nesting int) int { - total := 1 + nesting - total += countLogicalOps(s.Cond) - if s.Init != nil { - total += walkStmt(s.Init, nesting) - } - total += walkBlock(s.Body.List, nesting+1) - if s.Else != nil { - total += walkElseChain(s.Else, nesting) - } - return total -} - -func walkForStmt(s *ast.ForStmt, nesting int) int { - total := 1 + nesting - if s.Cond != nil { - total += countLogicalOps(s.Cond) - } - total += walkBlock(s.Body.List, nesting+1) - return total -} - -func walkElseChain(node ast.Node, nesting int) int { - switch e := node.(type) { - case *ast.IfStmt: - total := 1 - total += countLogicalOps(e.Cond) - if e.Init != nil { - total += walkStmt(e.Init, nesting) + absPath := filepath.Join(repoPath, fc.Path) + fnResults, err := calc.AnalyzeFile(absPath, fc) + if err != nil { + return report.Section{}, fmt.Errorf("analyzing %s: %w", fc.Path, err) } - total += walkBlock(e.Body.List, nesting+1) - if e.Else != nil { - total += walkElseChain(e.Else, nesting) - } - return total - case *ast.BlockStmt: - return 1 + walkBlock(e.List, nesting+1) - } - return 0 -} - -func walkExprsForFuncLit(exprs []ast.Expr, nesting int) int { - total := 0 - for _, expr := range exprs { - total += walkExprForFuncLit(expr, nesting) + results = append(results, fnResults...) } - return total -} - -func walkExprForFuncLit(expr ast.Expr, nesting int) int { - total := 0 - ast.Inspect(expr, func(n ast.Node) bool { - if fl, ok := n.(*ast.FuncLit); ok { - total += walkBlock(fl.Body.List, nesting+1) - return false - } - return true - }) - return total -} - -// countLogicalOps counts sequences of && and || in an expression. -func countLogicalOps(expr ast.Expr) int { - if expr == nil { - return 0 - } - ops := flattenLogicalOps(expr) - if len(ops) == 0 { - return 0 - } - count := 1 - for i := 1; i < len(ops); i++ { - if ops[i] != ops[i-1] { - count++ - } - } - return count -} - -func flattenLogicalOps(expr ast.Expr) []token.Token { - bin, ok := expr.(*ast.BinaryExpr) - if !ok { - return nil - } - if bin.Op != token.LAND && bin.Op != token.LOR { - return nil - } - var ops []token.Token - ops = append(ops, flattenLogicalOps(bin.X)...) - ops = append(ops, bin.Op) - ops = append(ops, flattenLogicalOps(bin.Y)...) - return ops -} - -func funcName(fn *ast.FuncDecl) string { - if fn.Recv != nil && len(fn.Recv.List) > 0 { - recv := fn.Recv.List[0] - var typeName string - switch t := recv.Type.(type) { - case *ast.StarExpr: - if ident, ok := t.X.(*ast.Ident); ok { - typeName = ident.Name - } - case *ast.Ident: - typeName = t.Name - } - return fmt.Sprintf("(%s).%s", typeName, fn.Name.Name) - } - return fn.Name.Name + return buildSection(results, threshold), nil } -func collectComplexityFindings(results []FunctionComplexity, threshold int) ([]report.Finding, []float64, int) { +func collectComplexityFindings(results []lang.FunctionComplexity, threshold int) ([]report.Finding, []float64, int) { var findings []report.Finding var values []float64 failCount := 0 @@ -258,7 +66,7 @@ func collectComplexityFindings(results []FunctionComplexity, threshold int) ([]r return findings, values, failCount } -func buildSection(results []FunctionComplexity, threshold int) report.Section { +func buildSection(results []lang.FunctionComplexity, threshold int) report.Section { if len(results) == 0 { return report.Section{ Name: "Cognitive Complexity", diff --git a/internal/complexity/complexity_extra_test.go b/internal/complexity/complexity_extra_test.go deleted file mode 100644 index 8a0f82b..0000000 --- a/internal/complexity/complexity_extra_test.go +++ /dev/null @@ -1,552 +0,0 @@ -package complexity - -import ( - "go/ast" - "go/parser" - "go/token" - "os" - "path/filepath" - "testing" - - "github.com/0xPolygon/diffguard/internal/diff" - "github.com/0xPolygon/diffguard/internal/report" -) - -func TestWalkStmt_NestingPenalty(t *testing.T) { - // Nesting penalty must be additive, not subtractive. - // If `1 + nesting` were mutated to `1 - nesting`, nested constructs - // would produce wrong (lower) values. - tests := []struct { - name string - code string - expected int - }{ - { - "range at nesting 1 with body", - `package p; func f(x int) { - if x > 0 { - for range []int{} { - if x > 0 {} - } - } - }`, - // if(1+0) + range(1+1) + inner_if(1+2) = 1 + 2 + 3 = 6 - 6, - }, - { - "switch at nesting 1 with body", - `package p; func f(x int) { - if x > 0 { - switch x { - case 1: - if x > 0 {} - } - } - }`, - // if(1+0) + switch(1+1) + case_if(1+2) = 1 + 2 + 3 = 6 - 6, - }, - { - "select at nesting 1 with body", - `package p; func f(x int, c chan int) { - if x > 0 { - select { - case <-c: - if x > 0 {} - } - } - }`, - // if(1+0) + select(1+1) + case_if(1+2) = 1 + 2 + 3 = 6 - 6, - }, - { - "type switch at nesting 1 with body", - `package p; func f(x int, v any) { - if x > 0 { - switch v.(type) { - case int: - if x > 0 {} - } - } - }`, - // if(1+0) + typeswitch(1+1) + case_if(1+2) = 1 + 2 + 3 = 6 - 6, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", tt.code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - if got != tt.expected { - t.Errorf("complexity = %d, want %d", got, tt.expected) - } - }) - } -} - -func TestWalkForStmt_WithLogicalCondition(t *testing.T) { - // Tests that for-loop conditions with logical ops are counted. - // If `s.Cond != nil` were mutated to `s.Cond == nil`, the logical - // ops in the condition would be missed. - code := `package p; func f(a, b bool) { for a && b {} }` - // for(1) + &&(1) = 2 - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - if got != 2 { - t.Errorf("complexity = %d, want 2 (for + logical op)", got) - } -} - -func TestWalkIfStmt_WithElseChain(t *testing.T) { - code := `package p -func f(x int) { - if x > 0 { - } else if x < 0 { - } else { - } -}` - // if(1) + else if(1) + else(1) = 3 - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - if got != 3 { - t.Errorf("complexity = %d, want 3", got) - } -} - -func TestWalkIfStmt_WithInit(t *testing.T) { - // Tests that if-init is processed for complexity. - code := `package p -func f() error { - if err := g(); err != nil { - } - return nil -} -func g() error { return nil } -` - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok && fd.Name.Name == "f" { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // if(1+0) = 1 (init is an assignment with no control flow) - if got != 1 { - t.Errorf("complexity = %d, want 1", got) - } -} - -func TestWalkElseChain_NestedInit(t *testing.T) { - code := `package p -func f(x int) error { - if x > 0 { - } else if err := g(); err != nil { - } - return nil -} -func g() error { return nil } -` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok && fd.Name.Name == "f" { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // if(1) + else-if(1) = 2 - if got != 2 { - t.Errorf("complexity = %d, want 2", got) - } -} - -func TestWalkElseChain_WithNestedBody(t *testing.T) { - // Tests that nesting+1 is correctly applied in walkElseChain's body. - code := `package p -func f(x int) { - if x > 0 { - } else if x < 0 { - if x < -10 { - } - } -}` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // if(1+0) + else-if(1) + nested-if(1+1nesting) = 1 + 1 + 2 = 4 - if got != 4 { - t.Errorf("complexity = %d, want 4", got) - } -} - -func TestBuildSection_StatsValues(t *testing.T) { - results := []FunctionComplexity{ - {File: "a.go", Line: 1, Name: "f1", Complexity: 4}, - {File: "b.go", Line: 1, Name: "f2", Complexity: 8}, - {File: "c.go", Line: 1, Name: "f3", Complexity: 12}, - } - - s := buildSection(results, 10) - - stats := s.Stats.(map[string]any) - if stats["total_functions"] != 3 { - t.Errorf("total_functions = %v, want 3", stats["total_functions"]) - } - if stats["violations"] != 1 { - t.Errorf("violations = %v, want 1", stats["violations"]) - } - // mean = (4+8+12)/3 = 8.0 - if stats["mean"] != 8.0 { - t.Errorf("mean = %v, want 8.0", stats["mean"]) - } - // median of [4,8,12] = 8 - if stats["median"] != 8.0 { - t.Errorf("median = %v, want 8.0", stats["median"]) - } - // max = 12 - if stats["max"] != 12.0 { - t.Errorf("max = %v, want 12.0", stats["max"]) - } -} - -func TestComputeComplexity_NilBody(t *testing.T) { - if got := computeComplexity(nil); got != 0 { - t.Errorf("computeComplexity(nil) = %d, want 0", got) - } -} - -func TestAnalyzeFile(t *testing.T) { - code := `package test - -func simple() { - x := 1 - _ = x -} - -func withIf(a int) { - if a > 0 { - } -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - - results := analyzeFile(dir, fc) - if len(results) != 2 { - t.Fatalf("expected 2 results, got %d", len(results)) - } - - // simple should have complexity 0 - if results[0].Complexity != 0 { - t.Errorf("simple complexity = %d, want 0", results[0].Complexity) - } - // withIf should have complexity 1 - if results[1].Complexity != 1 { - t.Errorf("withIf complexity = %d, want 1", results[1].Complexity) - } -} - -func TestAnalyzeFile_ParseError(t *testing.T) { - dir := t.TempDir() - fc := diff.FileChange{ - Path: "nonexistent.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 10}}, - } - - results := analyzeFile(dir, fc) - if results != nil { - t.Error("expected nil for parse error") - } -} - -func TestAnalyzeFile_MultipleFunctions(t *testing.T) { - // If the ast.Inspect callback's `return true` (for non-FuncDecl nodes) - // were mutated to `return false`, only the first function would be found. - code := `package test - -type S struct{} - -func (s S) Method1() { - if true {} -} - -func (s *S) Method2() { - if true {} -} - -func TopLevel() { - if true {} -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - - results := analyzeFile(dir, fc) - if len(results) != 3 { - t.Errorf("expected 3 functions, got %d", len(results)) - } -} - -func TestAnalyzeFile_OutOfRange(t *testing.T) { - code := `package test - -func f() { - x := 1 - _ = x -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 100, EndLine: 200}}, - } - - results := analyzeFile(dir, fc) - if len(results) != 0 { - t.Errorf("expected 0 results for out-of-range, got %d", len(results)) - } -} - -func TestCollectComplexityFindings(t *testing.T) { - results := []FunctionComplexity{ - {File: "a.go", Line: 1, Name: "low", Complexity: 5}, - {File: "b.go", Line: 1, Name: "high", Complexity: 15}, - {File: "c.go", Line: 1, Name: "medium", Complexity: 10}, - } - - findings, values, failCount := collectComplexityFindings(results, 10) - - if failCount != 1 { - t.Errorf("failCount = %d, want 1", failCount) - } - if len(findings) != 1 { - t.Errorf("findings = %d, want 1", len(findings)) - } - if len(values) != 3 { - t.Errorf("values = %d, want 3", len(values)) - } -} - -func TestCollectComplexityFindings_AtBoundary(t *testing.T) { - results := []FunctionComplexity{ - {File: "a.go", Line: 1, Name: "exact", Complexity: 10}, - {File: "b.go", Line: 1, Name: "over", Complexity: 11}, - } - - _, _, failCount := collectComplexityFindings(results, 10) - if failCount != 1 { - t.Errorf("failCount = %d, want 1 (11 > 10, 10 is not > 10)", failCount) - } -} - -func TestBuildSection_Empty(t *testing.T) { - s := buildSection(nil, 10) - if s.Severity != report.SeverityPass { - t.Errorf("severity = %v, want PASS", s.Severity) - } -} - -func TestBuildSection_WithViolations(t *testing.T) { - results := []FunctionComplexity{ - {File: "a.go", Line: 1, Name: "complex", Complexity: 20}, - {File: "b.go", Line: 1, Name: "simple", Complexity: 3}, - } - - s := buildSection(results, 10) - if s.Severity != report.SeverityFail { - t.Errorf("severity = %v, want FAIL", s.Severity) - } - if len(s.Findings) != 1 { - t.Errorf("findings = %d, want 1", len(s.Findings)) - } -} - -func TestMean(t *testing.T) { - if got := mean(nil); got != 0 { - t.Errorf("mean(nil) = %f, want 0", got) - } - if got := mean([]float64{2, 4, 6}); got != 4 { - t.Errorf("mean([2,4,6]) = %f, want 4", got) - } -} - -func TestMedian(t *testing.T) { - if got := median(nil); got != 0 { - t.Errorf("median(nil) = %f, want 0", got) - } - // Odd count - if got := median([]float64{3, 1, 2}); got != 2 { - t.Errorf("median([3,1,2]) = %f, want 2", got) - } - // Even count - if got := median([]float64{4, 1, 3, 2}); got != 2.5 { - t.Errorf("median([4,1,3,2]) = %f, want 2.5", got) - } -} - -func TestMax(t *testing.T) { - if got := max(nil); got != 0 { - t.Errorf("max(nil) = %f, want 0", got) - } - if got := max([]float64{3, 7, 1, 5}); got != 7 { - t.Errorf("max([3,7,1,5]) = %f, want 7", got) - } -} - -func TestWalkStmt_LabeledStmt(t *testing.T) { - code := `package p -func f(x int) { -outer: - for x > 0 { - _ = x - break outer - } -}` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // labeled for(1) = 1 - if got != 1 { - t.Errorf("complexity = %d, want 1", got) - } -} - -func TestWalkStmt_GoAndDefer(t *testing.T) { - code := `package p -func f() { - go func() { - if true {} - }() - defer func() { - if true {} - }() -}` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // go func: if(1+1nesting) = 2 - // defer func: if(1+1nesting) = 2 - // total = 4 - if got != 4 { - t.Errorf("complexity = %d, want 4", got) - } -} - -func TestWalkStmt_FuncLitInAssign(t *testing.T) { - code := `package p -func f() { - x := func() { - if true {} - } - _ = x -}` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // func lit with if at nesting 1: if(1+1) = 2 - if got != 2 { - t.Errorf("complexity = %d, want 2", got) - } -} - -func TestWalkStmt_FuncLitInReturn(t *testing.T) { - code := `package p -func f() func() { - return func() { - if true {} - } -}` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, 0) - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - got := computeComplexity(fn.Body) - // return func lit with if at nesting 1: if(1+1) = 2 - if got != 2 { - t.Errorf("complexity = %d, want 2", got) - } -} diff --git a/internal/complexity/complexity_test.go b/internal/complexity/complexity_test.go index 185241b..1f8dcfb 100644 --- a/internal/complexity/complexity_test.go +++ b/internal/complexity/complexity_test.go @@ -1,214 +1,204 @@ package complexity import ( - "go/ast" - "go/parser" - "go/token" + "os" + "path/filepath" "testing" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" + "github.com/0xPolygon/diffguard/internal/report" ) -func TestComputeComplexity(t *testing.T) { - tests := []struct { - name string - code string - expected int - }{ - { - name: "empty function", - code: `package p; func f() {}`, - expected: 0, - }, - { - name: "single if", - code: `package p; func f(x int) { if x > 0 {} }`, - expected: 1, - }, - { - name: "if-else", - code: `package p; func f(x int) { if x > 0 {} else {} }`, - expected: 2, // +1 if, +1 else - }, - { - name: "if-else if-else", - code: `package p; func f(x int) { if x > 0 {} else if x < 0 {} else {} }`, - expected: 3, // +1 if, +1 else if, +1 else - }, - { - name: "nested if", - code: `package p; func f(x, y int) { if x > 0 { if y > 0 {} } }`, - expected: 3, // +1 outer if (nesting=0), +1 inner if + 1 nesting penalty - }, - { - name: "for loop", - code: `package p; func f() { for i := 0; i < 10; i++ {} }`, - expected: 1, - }, - { - name: "nested for", - code: `package p; func f() { for i := 0; i < 10; i++ { for j := 0; j < 10; j++ {} } }`, - expected: 3, // +1 outer for, +1 inner for + 1 nesting - }, - { - name: "switch with cases", - code: `package p; func f(x int) { switch x { case 1: case 2: case 3: } }`, - expected: 1, // +1 for switch, cases don't add complexity - }, - { - name: "logical operators same type", - code: `package p; func f(a, b, c bool) { if a && b && c {} }`, - expected: 2, // +1 if, +1 for &&-sequence (same operator = 1) - }, - { - name: "logical operators mixed", - code: `package p; func f(a, b, c bool) { if a && b || c {} }`, - expected: 3, // +1 if, +2 for mixed && then || - }, - { - name: "range loop", - code: `package p; func f(s []int) { for range s {} }`, - expected: 1, - }, - { - name: "select statement", - code: `package p; func f(c chan int) { select { case <-c: } }`, - expected: 1, - }, - { - name: "deeply nested", - code: `package p -func f(x, y, z int) { - if x > 0 { // +1 (nesting=0) - for y > 0 { // +1 +1 nesting (nesting=1) - if z > 0 { // +1 +2 nesting (nesting=2) +// goCalc returns the registered Go ComplexityCalculator. The goanalyzer +// package is blank-imported above so its init() has run by the time this +// helper is called. +func goCalc(t *testing.T) lang.ComplexityCalculator { + t.Helper() + l, ok := lang.Get("go") + if !ok { + t.Fatal("go language not registered") + } + return l.ComplexityCalculator() +} + +// TestAnalyze_WithGoCalc is the integration-shape replacement for the old +// tree of "exercise the AST walker directly" tests that lived here before +// the complexity AST logic moved into goanalyzer. The walker tests now live +// next to the walker in goanalyzer/complexity_walker_test.go; this test +// locks in the orchestration: calculator is consulted, findings are +// aggregated, summary severity and stats shape are correct. +func TestAnalyze_WithGoCalc(t *testing.T) { + code := `package test + +func simple() {} + +func complex_fn(x int) { + if x > 0 { + if x > 10 { + if x > 100 { + if x > 1000 { + if x > 10000 { + if x > 100000 { + _ = x + } + } + } } } } -}`, - expected: 6, // 1 + 2 + 3 +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + if err := os.WriteFile(fp, []byte(code), 0644); err != nil { + t.Fatal(err) + } + + d := &diff.Result{ + Files: []diff.FileChange{ + {Path: "test.go", Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", tt.code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } + section, err := Analyze(dir, d, 10, goCalc(t)) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + // complex_fn has 6 nested ifs — cognitive score > 10 triggers FAIL. + if section.Severity != report.SeverityFail { + t.Errorf("severity = %v, want FAIL", section.Severity) + } + if len(section.Findings) != 1 { + t.Fatalf("findings = %d, want 1", len(section.Findings)) + } + if section.Findings[0].Function != "complex_fn" { + t.Errorf("finding function = %q, want complex_fn", section.Findings[0].Function) + } +} - var fn *ast.FuncDecl - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - fn = fd - break - } - } - if fn == nil { - t.Fatal("no function found") - } +func TestAnalyze_EmptyResult(t *testing.T) { + d := &diff.Result{} // no files + section, err := Analyze(t.TempDir(), d, 10, goCalc(t)) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + if section.Severity != report.SeverityPass { + t.Errorf("severity = %v, want PASS", section.Severity) + } + if section.Name != "Cognitive Complexity" { + t.Errorf("name = %q", section.Name) + } +} - got := computeComplexity(fn.Body) - if got != tt.expected { - t.Errorf("complexity = %d, want %d", got, tt.expected) - } - }) +func TestBuildSection_StatsValues(t *testing.T) { + results := []lang.FunctionComplexity{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "f1"}, Complexity: 4}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "f2"}, Complexity: 8}, + {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "f3"}, Complexity: 12}, + } + + s := buildSection(results, 10) + + stats := s.Stats.(map[string]any) + if stats["total_functions"] != 3 { + t.Errorf("total_functions = %v, want 3", stats["total_functions"]) + } + if stats["violations"] != 1 { + t.Errorf("violations = %v, want 1", stats["violations"]) + } + if stats["mean"] != 8.0 { + t.Errorf("mean = %v, want 8.0", stats["mean"]) + } + if stats["median"] != 8.0 { + t.Errorf("median = %v, want 8.0", stats["median"]) + } + if stats["max"] != 12.0 { + t.Errorf("max = %v, want 12.0", stats["max"]) } } -func TestFuncName(t *testing.T) { - tests := []struct { - code string - expected string - }{ - { - code: `package p; func Foo() {}`, - expected: "Foo", - }, - { - code: `package p; type T struct{}; func (t T) Foo() {}`, - expected: "(T).Foo", - }, - { - code: `package p; type T struct{}; func (t *T) Foo() {}`, - expected: "(T).Foo", - }, +func TestBuildSection_Empty(t *testing.T) { + s := buildSection(nil, 10) + if s.Severity != report.SeverityPass { + t.Errorf("severity = %v, want PASS", s.Severity) } +} - for _, tt := range tests { - t.Run(tt.expected, func(t *testing.T) { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", tt.code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } +func TestBuildSection_WithViolations(t *testing.T) { + results := []lang.FunctionComplexity{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "complex"}, Complexity: 20}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "simple"}, Complexity: 3}, + } - for _, decl := range f.Decls { - if fd, ok := decl.(*ast.FuncDecl); ok { - got := funcName(fd) - if got != tt.expected { - t.Errorf("funcName = %q, want %q", got, tt.expected) - } - return - } - } - t.Fatal("no function found") - }) + s := buildSection(results, 10) + if s.Severity != report.SeverityFail { + t.Errorf("severity = %v, want FAIL", s.Severity) + } + if len(s.Findings) != 1 { + t.Errorf("findings = %d, want 1", len(s.Findings)) } } -func TestCountLogicalOps(t *testing.T) { - tests := []struct { - name string - code string - expected int - }{ - { - name: "no logical ops", - code: `package p; var x = 1 + 2`, - expected: 0, - }, - { - name: "single and", - code: `package p; var x = true && false`, - expected: 1, - }, - { - name: "chain same op", - code: `package p; var x = true && false && true`, - expected: 1, // same operator sequence counts as 1 - }, - { - name: "mixed ops", - code: `package p; var x = true && false || true`, - expected: 2, // switch from && to || - }, +func TestCollectComplexityFindings(t *testing.T) { + results := []lang.FunctionComplexity{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "low"}, Complexity: 5}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "high"}, Complexity: 15}, + {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "medium"}, Complexity: 10}, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", tt.code, 0) - if err != nil { - t.Fatalf("parse error: %v", err) - } + findings, values, failCount := collectComplexityFindings(results, 10) - // Find the expression in the var declaration - var expr ast.Expr - ast.Inspect(f, func(n ast.Node) bool { - if vs, ok := n.(*ast.ValueSpec); ok && len(vs.Values) > 0 { - expr = vs.Values[0] - return false - } - return true - }) - if expr == nil { - t.Fatal("no expression found") - } + if failCount != 1 { + t.Errorf("failCount = %d, want 1", failCount) + } + if len(findings) != 1 { + t.Errorf("findings = %d, want 1", len(findings)) + } + if len(values) != 3 { + t.Errorf("values = %d, want 3", len(values)) + } +} - got := countLogicalOps(expr) - if got != tt.expected { - t.Errorf("countLogicalOps = %d, want %d", got, tt.expected) - } - }) +func TestCollectComplexityFindings_AtBoundary(t *testing.T) { + results := []lang.FunctionComplexity{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "exact"}, Complexity: 10}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "over"}, Complexity: 11}, + } + + _, _, failCount := collectComplexityFindings(results, 10) + if failCount != 1 { + t.Errorf("failCount = %d, want 1 (11 > 10, 10 is not > 10)", failCount) + } +} + +func TestMean(t *testing.T) { + if got := mean(nil); got != 0 { + t.Errorf("mean(nil) = %f, want 0", got) + } + if got := mean([]float64{2, 4, 6}); got != 4 { + t.Errorf("mean([2,4,6]) = %f, want 4", got) + } +} + +func TestMedian(t *testing.T) { + if got := median(nil); got != 0 { + t.Errorf("median(nil) = %f, want 0", got) + } + if got := median([]float64{3, 1, 2}); got != 2 { + t.Errorf("median([3,1,2]) = %f, want 2", got) + } + if got := median([]float64{4, 1, 3, 2}); got != 2.5 { + t.Errorf("median([4,1,3,2]) = %f, want 2.5", got) + } +} + +func TestMax(t *testing.T) { + if got := max(nil); got != 0 { + t.Errorf("max(nil) = %f, want 0", got) + } + if got := max([]float64{3, 7, 1, 5}); got != 7 { + t.Errorf("max([3,7,1,5]) = %f, want 7", got) } } diff --git a/internal/deps/deps.go b/internal/deps/deps.go index 0472dc7..135954c 100644 --- a/internal/deps/deps.go +++ b/internal/deps/deps.go @@ -2,51 +2,18 @@ package deps import ( "fmt" - "go/ast" - "go/parser" - "go/token" - "os" - "path/filepath" "sort" - "strings" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" ) -// Graph represents the internal package dependency graph. -type Graph struct { - Edges map[string]map[string]bool - ModulePath string -} - -// PackageMetrics holds coupling and instability metrics for a package. -type PackageMetrics struct { - Package string - Afferent int - Efferent int - Instability float64 -} - -// Cycle represents a circular dependency chain. -type Cycle []string - -func (c Cycle) String() string { - return strings.Join(c, " -> ") + " -> " + c[0] -} - -// SDPViolation represents a Stable Dependencies Principle violation. -type SDPViolation struct { - Package string - Dependency string - PackageInstability float64 - DependencyInstability float64 -} - -// Analyze examines import changes in the diff, builds a dependency graph, -// and reports cycles, coupling, instability, and SDP violations. -func Analyze(repoPath string, d *diff.Result) (report.Section, error) { - modulePath, err := detectModulePath(repoPath) +// Analyze examines import changes in the diff, builds a dependency graph +// via the supplied ImportResolver, and reports cycles, coupling, +// instability, and SDP violations. +func Analyze(repoPath string, d *diff.Result, resolver lang.ImportResolver) (report.Section, error) { + modulePath, err := resolver.DetectModulePath(repoPath) if err != nil { return report.Section{ Name: "Dependency Structure", @@ -62,7 +29,8 @@ func Analyze(repoPath string, d *diff.Result) (report.Section, error) { changedPkgs := d.ChangedPackages() for _, pkg := range changedPkgs { - scanPackageImports(g, repoPath, pkg) + edges := resolver.ScanPackageImports(repoPath, pkg, modulePath) + mergeEdges(g.Edges, edges) } cycles := detectCycles(g) @@ -72,159 +40,19 @@ func Analyze(repoPath string, d *diff.Result) (report.Section, error) { return buildSection(g, cycles, metrics, sdpViolations, changedPkgs), nil } -func scanPackageImports(g *Graph, repoPath, pkg string) { - absDir := filepath.Join(repoPath, pkg) - fset := token.NewFileSet() - pkgs, err := parser.ParseDir(fset, absDir, nil, parser.ImportsOnly) - if err != nil { - return - } - - pkgImportPath := g.ModulePath + "/" + pkg - for _, p := range pkgs { - if strings.HasSuffix(p.Name, "_test") { - continue - } - collectImports(g, p, pkgImportPath) - } -} - -func collectImports(g *Graph, p *ast.Package, pkgImportPath string) { - for _, f := range p.Files { - for _, imp := range f.Imports { - importPath := strings.Trim(imp.Path.Value, `"`) - if !strings.HasPrefix(importPath, g.ModulePath) { - continue - } - if g.Edges[pkgImportPath] == nil { - g.Edges[pkgImportPath] = make(map[string]bool) - } - g.Edges[pkgImportPath][importPath] = true - } - } -} - -func detectModulePath(repoPath string) (string, error) { - goModPath := filepath.Join(repoPath, "go.mod") - content, err := readFile(goModPath) - if err != nil { - return "", fmt.Errorf("reading go.mod: %w", err) - } - for _, line := range strings.Split(content, "\n") { - line = strings.TrimSpace(line) - if strings.HasPrefix(line, "module ") { - return strings.TrimSpace(strings.TrimPrefix(line, "module ")), nil - } - } - return "", fmt.Errorf("no module directive found in go.mod") -} - -// detectCycles finds all cycles in the dependency graph using DFS. -func detectCycles(g *Graph) []Cycle { - var cycles []Cycle - visited := make(map[string]bool) - inStack := make(map[string]bool) - var stack []string - - var dfs func(node string) - dfs = func(node string) { - visited[node] = true - inStack[node] = true - stack = append(stack, node) - - for dep := range g.Edges[node] { - if !visited[dep] { - dfs(dep) - } else if inStack[dep] { - cycles = append(cycles, extractCycle(stack, dep)) - } - } - - stack = stack[:len(stack)-1] - inStack[node] = false - } - - for node := range g.Edges { - if !visited[node] { - dfs(node) - } - } - - return cycles -} - -func extractCycle(stack []string, target string) Cycle { - var cycle Cycle - for i := len(stack) - 1; i >= 0; i-- { - cycle = append([]string{stack[i]}, cycle...) - if stack[i] == target { - break - } - } - return cycle -} - -// computeMetrics calculates afferent/efferent coupling and instability. -func computeMetrics(g *Graph) map[string]*PackageMetrics { - metrics := make(map[string]*PackageMetrics) - - getOrCreate := func(pkg string) *PackageMetrics { - if m, ok := metrics[pkg]; ok { - return m - } - m := &PackageMetrics{Package: pkg} - metrics[pkg] = m - return m - } - - for pkg, imports := range g.Edges { - m := getOrCreate(pkg) - m.Efferent = len(imports) - for dep := range imports { - dm := getOrCreate(dep) - dm.Afferent++ - } - } - - for _, m := range metrics { - total := m.Afferent + m.Efferent - if total > 0 { - m.Instability = float64(m.Efferent) / float64(total) - } - } - - return metrics -} - -func detectSDPViolations(g *Graph, metrics map[string]*PackageMetrics) []SDPViolation { - var violations []SDPViolation - for pkg, imports := range g.Edges { - pkgMetric := metrics[pkg] - if pkgMetric == nil { - continue - } - violations = append(violations, checkSDPForPackage(pkgMetric, imports, metrics)...) - } - return violations -} - -func checkSDPForPackage(pkgMetric *PackageMetrics, imports map[string]bool, metrics map[string]*PackageMetrics) []SDPViolation { - var violations []SDPViolation - for dep := range imports { - depMetric := metrics[dep] - if depMetric == nil { - continue +// mergeEdges folds the resolver's per-package adjacency map into the running +// graph. Resolvers typically return a single-entry map on each call, but +// the interface is broad enough that a resolver could return edges for +// sub-packages too — so merge instead of assign. +func mergeEdges(dst, src map[string]map[string]bool) { + for from, tos := range src { + if dst[from] == nil { + dst[from] = make(map[string]bool) } - if depMetric.Instability > pkgMetric.Instability { - violations = append(violations, SDPViolation{ - Package: pkgMetric.Package, - Dependency: dep, - PackageInstability: pkgMetric.Instability, - DependencyInstability: depMetric.Instability, - }) + for to := range tos { + dst[from][to] = true } } - return violations } func buildSection(g *Graph, cycles []Cycle, metrics map[string]*PackageMetrics, sdpViolations []SDPViolation, changedPkgs []string) report.Section { @@ -285,15 +113,3 @@ func buildDepsStats(changedPkgs []string, cycles []Cycle, sdpViolations []SDPVio "metrics": metricsList, } } - -func trimModule(pkg, modulePath string) string { - return strings.TrimPrefix(pkg, modulePath+"/") -} - -func readFile(path string) (string, error) { - b, err := os.ReadFile(path) - if err != nil { - return "", err - } - return string(b), nil -} diff --git a/internal/deps/graph.go b/internal/deps/graph.go new file mode 100644 index 0000000..5664ca7 --- /dev/null +++ b/internal/deps/graph.go @@ -0,0 +1,167 @@ +// Package deps runs dependency-structure analysis on the files changed in a +// diff. It relies on a language-supplied lang.ImportResolver to turn source +// files into the adjacency map the graph algorithms operate on. +// +// graph.go contains the pure-math primitives: cycle detection, coupling, +// instability, SDP violation detection. deps.go wires them up to an +// ImportResolver and builds a report.Section. Splitting the two makes the +// graph algorithms reusable for any language without dragging the +// orchestration (module-path detection, section formatting) along. +package deps + +import "strings" + +// Graph represents an internal package dependency graph. Nodes are +// package-level identifiers (typically the module path plus the package +// directory, e.g. "example.com/mod/internal/foo"). Edges point from +// importer to importee. +type Graph struct { + Edges map[string]map[string]bool + ModulePath string +} + +// PackageMetrics holds coupling and instability metrics for a package. +// Afferent = how many other packages import this one ("fan-in"). +// Efferent = how many other packages this one imports ("fan-out"). +// Instability = Efferent / (Afferent + Efferent), range [0,1]. +type PackageMetrics struct { + Package string + Afferent int + Efferent int + Instability float64 +} + +// Cycle represents a circular dependency chain. +type Cycle []string + +// String formats the cycle as "a -> b -> c -> a" (closing back to the +// start). Used in report findings. +func (c Cycle) String() string { + return strings.Join(c, " -> ") + " -> " + c[0] +} + +// SDPViolation represents a Stable Dependencies Principle violation: a +// package with low instability (stable) imports a package with higher +// instability (unstable). +type SDPViolation struct { + Package string + Dependency string + PackageInstability float64 + DependencyInstability float64 +} + +// detectCycles finds all cycles in the dependency graph using DFS. +func detectCycles(g *Graph) []Cycle { + var cycles []Cycle + visited := make(map[string]bool) + inStack := make(map[string]bool) + var stack []string + + var dfs func(node string) + dfs = func(node string) { + visited[node] = true + inStack[node] = true + stack = append(stack, node) + + for dep := range g.Edges[node] { + if !visited[dep] { + dfs(dep) + } else if inStack[dep] { + cycles = append(cycles, extractCycle(stack, dep)) + } + } + + stack = stack[:len(stack)-1] + inStack[node] = false + } + + for node := range g.Edges { + if !visited[node] { + dfs(node) + } + } + + return cycles +} + +func extractCycle(stack []string, target string) Cycle { + var cycle Cycle + for i := len(stack) - 1; i >= 0; i-- { + cycle = append([]string{stack[i]}, cycle...) + if stack[i] == target { + break + } + } + return cycle +} + +// computeMetrics calculates afferent/efferent coupling and instability. +func computeMetrics(g *Graph) map[string]*PackageMetrics { + metrics := make(map[string]*PackageMetrics) + + getOrCreate := func(pkg string) *PackageMetrics { + if m, ok := metrics[pkg]; ok { + return m + } + m := &PackageMetrics{Package: pkg} + metrics[pkg] = m + return m + } + + for pkg, imports := range g.Edges { + m := getOrCreate(pkg) + m.Efferent = len(imports) + for dep := range imports { + dm := getOrCreate(dep) + dm.Afferent++ + } + } + + for _, m := range metrics { + total := m.Afferent + m.Efferent + if total > 0 { + m.Instability = float64(m.Efferent) / float64(total) + } + } + + return metrics +} + +// detectSDPViolations returns the package->dependency edges that violate +// the Stable Dependencies Principle (a package depending on something less +// stable than itself). +func detectSDPViolations(g *Graph, metrics map[string]*PackageMetrics) []SDPViolation { + var violations []SDPViolation + for pkg, imports := range g.Edges { + pkgMetric := metrics[pkg] + if pkgMetric == nil { + continue + } + violations = append(violations, checkSDPForPackage(pkgMetric, imports, metrics)...) + } + return violations +} + +func checkSDPForPackage(pkgMetric *PackageMetrics, imports map[string]bool, metrics map[string]*PackageMetrics) []SDPViolation { + var violations []SDPViolation + for dep := range imports { + depMetric := metrics[dep] + if depMetric == nil { + continue + } + if depMetric.Instability > pkgMetric.Instability { + violations = append(violations, SDPViolation{ + Package: pkgMetric.Package, + Dependency: dep, + PackageInstability: pkgMetric.Instability, + DependencyInstability: depMetric.Instability, + }) + } + } + return violations +} + +// trimModule strips the module prefix from a package path for display. +func trimModule(pkg, modulePath string) string { + return strings.TrimPrefix(pkg, modulePath+"/") +} diff --git a/internal/lang/goanalyzer/complexity_walker_test.go b/internal/lang/goanalyzer/complexity_walker_test.go new file mode 100644 index 0000000..a893db8 --- /dev/null +++ b/internal/lang/goanalyzer/complexity_walker_test.go @@ -0,0 +1,245 @@ +package goanalyzer + +import ( + "go/ast" + "go/parser" + "go/token" + "testing" +) + +// Most of these tests are imported verbatim from the pre-split +// internal/complexity package. They exercise the walker directly (rather +// than going through AnalyzeFile + a tempdir file) so failures localize to +// the exact construct that broke. + +func TestComputeComplexity(t *testing.T) { + tests := []struct { + name string + code string + expected int + }{ + {"empty function", `package p; func f() {}`, 0}, + {"single if", `package p; func f(x int) { if x > 0 {} }`, 1}, + {"if-else", `package p; func f(x int) { if x > 0 {} else {} }`, 2}, + {"if-else if-else", `package p; func f(x int) { if x > 0 {} else if x < 0 {} else {} }`, 3}, + {"nested if", `package p; func f(x, y int) { if x > 0 { if y > 0 {} } }`, 3}, + {"for loop", `package p; func f() { for i := 0; i < 10; i++ {} }`, 1}, + {"nested for", `package p; func f() { for i := 0; i < 10; i++ { for j := 0; j < 10; j++ {} } }`, 3}, + {"switch with cases", `package p; func f(x int) { switch x { case 1: case 2: case 3: } }`, 1}, + {"logical operators same type", `package p; func f(a, b, c bool) { if a && b && c {} }`, 2}, + {"logical operators mixed", `package p; func f(a, b, c bool) { if a && b || c {} }`, 3}, + {"range loop", `package p; func f(s []int) { for range s {} }`, 1}, + {"select statement", `package p; func f(c chan int) { select { case <-c: } }`, 1}, + {"deeply nested", `package p +func f(x, y, z int) { + if x > 0 { + for y > 0 { + if z > 0 { + } + } + } +}`, 6}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + body := parseFuncBody(t, tt.code) + if got := computeCognitiveComplexity(body); got != tt.expected { + t.Errorf("complexity = %d, want %d", got, tt.expected) + } + }) + } +} + +func TestComputeComplexity_NilBody(t *testing.T) { + if got := computeCognitiveComplexity(nil); got != 0 { + t.Errorf("computeCognitiveComplexity(nil) = %d, want 0", got) + } +} + +func TestWalkStmt_NestingPenalty(t *testing.T) { + tests := []struct { + name string + code string + expected int + }{ + {"range at nesting 1", `package p; func f(x int) { + if x > 0 { + for range []int{} { + if x > 0 {} + } + } + }`, 6}, + {"switch at nesting 1", `package p; func f(x int) { + if x > 0 { + switch x { + case 1: + if x > 0 {} + } + } + }`, 6}, + {"select at nesting 1", `package p; func f(x int, c chan int) { + if x > 0 { + select { + case <-c: + if x > 0 {} + } + } + }`, 6}, + {"type switch at nesting 1", `package p; func f(x int, v any) { + if x > 0 { + switch v.(type) { + case int: + if x > 0 {} + } + } + }`, 6}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + body := parseFuncBody(t, tt.code) + if got := computeCognitiveComplexity(body); got != tt.expected { + t.Errorf("complexity = %d, want %d", got, tt.expected) + } + }) + } +} + +func TestWalkForStmt_WithLogicalCondition(t *testing.T) { + body := parseFuncBody(t, `package p; func f(a, b bool) { for a && b {} }`) + if got := computeCognitiveComplexity(body); got != 2 { + t.Errorf("complexity = %d, want 2", got) + } +} + +func TestWalkIfStmt_WithElseChain(t *testing.T) { + body := parseFuncBody(t, `package p +func f(x int) { + if x > 0 { + } else if x < 0 { + } else { + } +}`) + if got := computeCognitiveComplexity(body); got != 3 { + t.Errorf("complexity = %d, want 3", got) + } +} + +func TestWalkIfStmt_WithInit(t *testing.T) { + body := parseFuncBody(t, `package p +func f() error { + if err := g(); err != nil { + } + return nil +} +func g() error { return nil }`) + if got := computeCognitiveComplexity(body); got != 1 { + t.Errorf("complexity = %d, want 1", got) + } +} + +func TestWalkStmt_LabeledStmt(t *testing.T) { + body := parseFuncBody(t, `package p +func f(x int) { +outer: + for x > 0 { + _ = x + break outer + } +}`) + if got := computeCognitiveComplexity(body); got != 1 { + t.Errorf("complexity = %d, want 1", got) + } +} + +func TestWalkStmt_GoAndDefer(t *testing.T) { + body := parseFuncBody(t, `package p +func f() { + go func() { + if true {} + }() + defer func() { + if true {} + }() +}`) + if got := computeCognitiveComplexity(body); got != 4 { + t.Errorf("complexity = %d, want 4", got) + } +} + +func TestWalkStmt_FuncLitInAssign(t *testing.T) { + body := parseFuncBody(t, `package p +func f() { + x := func() { + if true {} + } + _ = x +}`) + if got := computeCognitiveComplexity(body); got != 2 { + t.Errorf("complexity = %d, want 2", got) + } +} + +func TestWalkStmt_FuncLitInReturn(t *testing.T) { + body := parseFuncBody(t, `package p +func f() func() { + return func() { + if true {} + } +}`) + if got := computeCognitiveComplexity(body); got != 2 { + t.Errorf("complexity = %d, want 2", got) + } +} + +func TestCountLogicalOps(t *testing.T) { + tests := []struct { + name string + code string + expected int + }{ + {"no logical ops", `package p; var x = 1 + 2`, 0}, + {"single and", `package p; var x = true && false`, 1}, + {"chain same op", `package p; var x = true && false && true`, 1}, + {"mixed ops", `package p; var x = true && false || true`, 2}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "test.go", tt.code, 0) + if err != nil { + t.Fatalf("parse: %v", err) + } + var expr ast.Expr + ast.Inspect(f, func(n ast.Node) bool { + if vs, ok := n.(*ast.ValueSpec); ok && len(vs.Values) > 0 { + expr = vs.Values[0] + return false + } + return true + }) + if got := countLogicalOps(expr); got != tt.expected { + t.Errorf("countLogicalOps = %d, want %d", got, tt.expected) + } + }) + } +} + +// parseFuncBody parses code and returns the body of the first FuncDecl. +// All the walker tests use this rather than open-coding the parse loop. +func parseFuncBody(t *testing.T, code string) *ast.BlockStmt { + t.Helper() + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "test.go", code, 0) + if err != nil { + t.Fatalf("parse: %v", err) + } + for _, decl := range f.Decls { + if fd, ok := decl.(*ast.FuncDecl); ok { + return fd.Body + } + } + t.Fatal("no function found") + return nil +} diff --git a/internal/lang/goanalyzer/mutation_annotate_test.go b/internal/lang/goanalyzer/mutation_annotate_test.go new file mode 100644 index 0000000..17f25b4 --- /dev/null +++ b/internal/lang/goanalyzer/mutation_annotate_test.go @@ -0,0 +1,138 @@ +package goanalyzer + +import ( + "go/parser" + "go/token" + "os" + "path/filepath" + "testing" +) + +func TestScanAnnotations_DisableNextLine(t *testing.T) { + code := `package p + +func f() { + // mutator-disable-next-line + if true { + } +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "t.go") + if err := os.WriteFile(fp, []byte(code), 0644); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(fp) + if err != nil { + t.Fatal(err) + } + if !disabled[5] { + t.Errorf("expected line 5 disabled, got %v", disabled) + } + if disabled[4] { + t.Error("comment line should not be disabled") + } + if disabled[6] { + t.Error("line 6 should not be disabled") + } +} + +func TestScanAnnotations_DisableFunc(t *testing.T) { + code := `package p + +// mutator-disable-func +func f() { + if true { + } + x := 1 + _ = x +} + +func g() { + if true { + } +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "t.go") + os.WriteFile(fp, []byte(code), 0644) + + disabled, err := annotationScannerImpl{}.ScanAnnotations(fp) + if err != nil { + t.Fatal(err) + } + + for i := 4; i <= 9; i++ { + if !disabled[i] { + t.Errorf("expected line %d disabled (inside f)", i) + } + } + if disabled[12] { + t.Error("g()'s line 12 should not be disabled") + } +} + +func TestScanAnnotations_NoAnnotations(t *testing.T) { + code := `package p + +func f() { + if true {} +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "t.go") + os.WriteFile(fp, []byte(code), 0644) + disabled, err := annotationScannerImpl{}.ScanAnnotations(fp) + if err != nil { + t.Fatal(err) + } + if len(disabled) != 0 { + t.Errorf("expected empty disabled map, got %v", disabled) + } +} + +func TestScanAnnotations_IrrelevantComment(t *testing.T) { + code := `package p + +// this is just a regular comment +func f() { + if true {} +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "t.go") + os.WriteFile(fp, []byte(code), 0644) + disabled, err := annotationScannerImpl{}.ScanAnnotations(fp) + if err != nil { + t.Fatal(err) + } + if len(disabled) != 0 { + t.Errorf("regular comments should not disable mutations, got %v", disabled) + } +} + +// TestFuncRanges_IncludesSignatureAndBody ensures funcRanges spans the +// whole FuncDecl (signature + body), since that's what mutator-disable-func +// should cover. +func TestFuncRanges_IncludesSignatureAndBody(t *testing.T) { + code := `package p +func f() { + if true {} +} +` + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, "t.go", code, parser.ParseComments) + if err != nil { + t.Fatal(err) + } + ranges := funcRanges(fset, f) + if len(ranges) != 1 { + t.Fatalf("expected 1 range, got %d", len(ranges)) + } + if ranges[0].start != 2 { + t.Errorf("start = %d, want 2", ranges[0].start) + } + if ranges[0].end < ranges[0].start { + t.Errorf("end=%d < start=%d", ranges[0].end, ranges[0].start) + } +} diff --git a/internal/lang/goanalyzer/mutation_apply_test.go b/internal/lang/goanalyzer/mutation_apply_test.go new file mode 100644 index 0000000..2181908 --- /dev/null +++ b/internal/lang/goanalyzer/mutation_apply_test.go @@ -0,0 +1,345 @@ +package goanalyzer + +import ( + "go/ast" + "go/parser" + "go/token" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +func TestApplyBinaryMutation_Success(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.GTR} + site := lang.MutantSite{Description: "> -> >=", Operator: "conditional_boundary"} + if !applyBinaryMutation(expr, site) { + t.Error("expected successful apply") + } + if expr.Op != token.GEQ { + t.Errorf("op = %v, want GEQ", expr.Op) + } +} + +func TestApplyBinaryMutation_WrongNodeType(t *testing.T) { + ident := &ast.Ident{Name: "x"} + site := lang.MutantSite{Description: "> -> >=", Operator: "conditional_boundary"} + if applyBinaryMutation(ident, site) { + t.Error("expected false for non-BinaryExpr") + } +} + +func TestApplyBinaryMutation_IllegalOp(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.GTR} + site := lang.MutantSite{Description: "invalid", Operator: "conditional_boundary"} + if applyBinaryMutation(expr, site) { + t.Error("expected false for invalid description") + } +} + +// TestApplyBinaryMutation_OperatorMismatch locks in the fix for a bug where +// applyBinaryMutation rewrote the first BinaryExpr found on a line even +// when its operator differed from the mutant's intended `from` op. +func TestApplyBinaryMutation_OperatorMismatch(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.LAND} + site := lang.MutantSite{Description: "!= -> ==", Operator: "negate_conditional"} + if applyBinaryMutation(expr, site) { + t.Error("expected false when expr.Op (&&) does not match mutant's from-op (!=)") + } + if expr.Op != token.LAND { + t.Errorf("expr.Op = %v, want LAND", expr.Op) + } +} + +func TestApplyBinaryMutation_MathOperatorMismatch(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.SUB} + site := lang.MutantSite{Description: "+ -> -", Operator: "math_operator"} + if applyBinaryMutation(expr, site) { + t.Error("expected false when expr.Op (-) does not match from-op (+)") + } +} + +func TestApplyBoolMutation_TrueToFalse(t *testing.T) { + ident := &ast.Ident{Name: "true"} + site := lang.MutantSite{Description: "true -> false", Operator: "boolean_substitution"} + if !applyBoolMutation(ident, site) { + t.Error("expected successful apply") + } + if ident.Name != "false" { + t.Errorf("name = %q, want false", ident.Name) + } +} + +func TestApplyBoolMutation_FalseToTrue(t *testing.T) { + ident := &ast.Ident{Name: "false"} + site := lang.MutantSite{Description: "false -> true", Operator: "boolean_substitution"} + if !applyBoolMutation(ident, site) { + t.Error("expected successful apply") + } + if ident.Name != "true" { + t.Errorf("name = %q, want true", ident.Name) + } +} + +func TestApplyBoolMutation_WrongNodeType(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.ADD} + site := lang.MutantSite{Description: "true -> false", Operator: "boolean_substitution"} + if applyBoolMutation(expr, site) { + t.Error("expected false for non-Ident") + } +} + +func TestApplyReturnMutation_Success(t *testing.T) { + ret := &ast.ReturnStmt{Results: []ast.Expr{&ast.Ident{Name: "x", NamePos: 1}}} + if !applyReturnMutation(ret) { + t.Error("expected successful apply") + } + if ident, ok := ret.Results[0].(*ast.Ident); !ok || ident.Name != "nil" { + t.Error("expected result replaced with nil") + } +} + +func TestApplyReturnMutation_WrongNodeType(t *testing.T) { + ident := &ast.Ident{Name: "x"} + if applyReturnMutation(ident) { + t.Error("expected false for non-ReturnStmt") + } +} + +func TestApplyIncDecMutation_Inc(t *testing.T) { + stmt := &ast.IncDecStmt{Tok: token.INC} + if !applyIncDecMutation(stmt) { + t.Error("expected successful apply") + } + if stmt.Tok != token.DEC { + t.Errorf("tok = %v, want DEC", stmt.Tok) + } +} + +func TestApplyIncDecMutation_Dec(t *testing.T) { + stmt := &ast.IncDecStmt{Tok: token.DEC} + if !applyIncDecMutation(stmt) { + t.Error("expected successful apply") + } + if stmt.Tok != token.INC { + t.Errorf("tok = %v, want INC", stmt.Tok) + } +} + +func TestApplyIncDecMutation_WrongNodeType(t *testing.T) { + if applyIncDecMutation(&ast.Ident{Name: "x"}) { + t.Error("expected false for non-IncDecStmt") + } +} + +func TestApplyBranchRemoval(t *testing.T) { + body := &ast.BlockStmt{List: []ast.Stmt{&ast.ExprStmt{X: &ast.Ident{Name: "x"}}}} + ifStmt := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: body} + if !applyBranchRemoval(ifStmt) { + t.Error("expected successful apply") + } + if len(ifStmt.Body.List) != 0 { + t.Errorf("expected body emptied, got %d stmts", len(ifStmt.Body.List)) + } +} + +func TestApplyBranchRemoval_WrongType(t *testing.T) { + if applyBranchRemoval(&ast.Ident{Name: "x"}) { + t.Error("expected false for non-IfStmt") + } +} + +func TestTryApplyMutation_Binary(t *testing.T) { + expr := &ast.BinaryExpr{Op: token.ADD} + site := lang.MutantSite{Description: "+ -> -", Operator: "math_operator"} + if !tryApplyMutation(expr, site) { + t.Error("expected successful apply") + } + if expr.Op != token.SUB { + t.Errorf("op = %v, want SUB", expr.Op) + } +} + +func TestTryApplyMutation_Bool(t *testing.T) { + ident := &ast.Ident{Name: "true"} + site := lang.MutantSite{Description: "true -> false", Operator: "boolean_substitution"} + if !tryApplyMutation(ident, site) { + t.Error("expected successful apply") + } +} + +func TestTryApplyMutation_Return(t *testing.T) { + ret := &ast.ReturnStmt{Results: []ast.Expr{&ast.Ident{Name: "x", NamePos: 1}}} + site := lang.MutantSite{Operator: "return_value"} + if !tryApplyMutation(ret, site) { + t.Error("expected successful apply") + } +} + +func TestTryApplyMutation_Unknown(t *testing.T) { + ident := &ast.Ident{Name: "x"} + site := lang.MutantSite{Operator: "unknown_operator"} + if tryApplyMutation(ident, site) { + t.Error("expected false for unknown operator") + } +} + +func TestApplyMutationToAST(t *testing.T) { + code := `package test + +func f() bool { + return true +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + fset := token.NewFileSet() + f, _ := parser.ParseFile(fset, fp, nil, parser.ParseComments) + + site := lang.MutantSite{Line: 4, Description: "true -> false", Operator: "boolean_substitution"} + if !applyMutationToAST(fset, f, site) { + t.Error("expected mutation to be applied") + } +} + +func TestApplyMutationToAST_NoMatch(t *testing.T) { + code := `package test + +func f() int { + return 42 +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + fset := token.NewFileSet() + f, _ := parser.ParseFile(fset, fp, nil, parser.ParseComments) + + site := lang.MutantSite{Line: 999, Description: "true -> false", Operator: "boolean_substitution"} + if applyMutationToAST(fset, f, site) { + t.Error("expected no mutation applied") + } +} + +func TestApplyMutation_Full(t *testing.T) { + code := `package test + +func f(a, b int) bool { + return a > b +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + site := lang.MutantSite{File: "test.go", Line: 4, Description: "> -> >=", Operator: "conditional_boundary"} + result, _ := mutantApplierImpl{}.ApplyMutation(fp, site) + if result == nil { + t.Fatal("expected non-nil result") + } + if !strings.Contains(string(result), ">=") { + t.Error("expected mutated code to contain >=") + } +} + +func TestApplyMutation_ParseError(t *testing.T) { + site := lang.MutantSite{Line: 1, Operator: "boolean_substitution"} + result, _ := mutantApplierImpl{}.ApplyMutation("/nonexistent/file.go", site) + if result != nil { + t.Error("expected nil for parse error") + } +} + +func TestApplyMutation_NoMatch(t *testing.T) { + code := `package test + +func f() {} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + site := lang.MutantSite{Line: 999, Operator: "boolean_substitution", Description: "true -> false"} + result, _ := mutantApplierImpl{}.ApplyMutation(fp, site) + if result != nil { + t.Error("expected nil when mutation can't be applied") + } +} + +func TestApplyStatementDeletion(t *testing.T) { + code := `package test + +func f() { + doThing() + x := 1 + _ = x +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + site := lang.MutantSite{Line: 4, Operator: "statement_deletion"} + result, _ := mutantApplierImpl{}.ApplyMutation(fp, site) + if result == nil { + t.Fatal("expected non-nil result") + } + if strings.Contains(string(result), "doThing()") { + t.Errorf("expected doThing() removed, got:\n%s", string(result)) + } +} + +func TestRenderFile(t *testing.T) { + code := `package test + +func f() {} +` + fset := token.NewFileSet() + f, _ := parser.ParseFile(fset, "test.go", code, parser.ParseComments) + + result := renderFile(fset, f) + if result == nil { + t.Fatal("expected non-nil render result") + } + if !strings.Contains(string(result), "package test") { + t.Error("rendered file should contain package declaration") + } +} + +func TestZeroValueExpr(t *testing.T) { + original := &ast.Ident{Name: "x", NamePos: 42} + result := zeroValueExpr(original) + ident, ok := result.(*ast.Ident) + if !ok { + t.Fatal("expected *ast.Ident") + } + if ident.Name != "nil" { + t.Errorf("name = %q, want nil", ident.Name) + } +} + +func TestParseMutationOp(t *testing.T) { + tests := []struct { + desc string + wantFrom token.Token + wantTo token.Token + }{ + {"> -> >=", token.GTR, token.GEQ}, + {"== -> !=", token.EQL, token.NEQ}, + {"+ -> -", token.ADD, token.SUB}, + {"invalid", token.ILLEGAL, token.ILLEGAL}, + {"+ -> unknown", token.ILLEGAL, token.ILLEGAL}, + } + for _, tt := range tests { + gotFrom, gotTo := parseMutationOp(tt.desc) + if gotFrom != tt.wantFrom || gotTo != tt.wantTo { + t.Errorf("parseMutationOp(%q) = (%v, %v), want (%v, %v)", + tt.desc, gotFrom, gotTo, tt.wantFrom, tt.wantTo) + } + } +} diff --git a/internal/lang/goanalyzer/mutation_generate_test.go b/internal/lang/goanalyzer/mutation_generate_test.go new file mode 100644 index 0000000..3ecd8cf --- /dev/null +++ b/internal/lang/goanalyzer/mutation_generate_test.go @@ -0,0 +1,313 @@ +package goanalyzer + +import ( + "go/ast" + "go/token" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +func TestBinaryMutants(t *testing.T) { + tests := []struct { + name string + op token.Token + expected int + }{ + {"greater than", token.GTR, 1}, + {"less than", token.LSS, 1}, + {"equal", token.EQL, 1}, + {"not equal", token.NEQ, 1}, + {"add", token.ADD, 1}, + {"subtract", token.SUB, 1}, + {"multiply", token.MUL, 1}, + {"divide", token.QUO, 1}, + {"and (no mutation)", token.LAND, 0}, + {"or (no mutation)", token.LOR, 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + expr := &ast.BinaryExpr{Op: tt.op} + mutants := binaryMutants("test.go", 1, expr) + if len(mutants) != tt.expected { + t.Errorf("binaryMutants(%v) = %d mutants, want %d", tt.op, len(mutants), tt.expected) + } + }) + } +} + +func TestBoolMutants(t *testing.T) { + tests := []struct { + name string + ident string + expected int + }{ + {"true", "true", 1}, + {"false", "false", 1}, + {"other", "x", 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ident := &ast.Ident{Name: tt.ident} + mutants := boolMutants("test.go", 1, ident) + if len(mutants) != tt.expected { + t.Errorf("boolMutants(%q) = %d, want %d", tt.ident, len(mutants), tt.expected) + } + }) + } +} + +func TestReturnMutants(t *testing.T) { + ret := &ast.ReturnStmt{Results: []ast.Expr{&ast.Ident{Name: "x"}}} + mutants := returnMutants("test.go", 1, ret) + if len(mutants) != 1 { + t.Errorf("returnMutants with values: got %d, want 1", len(mutants)) + } + + bareRet := &ast.ReturnStmt{} + mutants = returnMutants("test.go", 1, bareRet) + if len(mutants) != 0 { + t.Errorf("returnMutants bare: got %d, want 0", len(mutants)) + } +} + +func TestIncDecMutants(t *testing.T) { + incStmt := &ast.IncDecStmt{Tok: token.INC} + m := incdecMutants("a.go", 5, incStmt) + if len(m) != 1 { + t.Fatalf("expected 1 mutant for ++, got %d", len(m)) + } + if m[0].Operator != "incdec" { + t.Errorf("operator = %q, want incdec", m[0].Operator) + } + if !strings.Contains(m[0].Description, "--") { + t.Errorf("description = %q", m[0].Description) + } + + decStmt := &ast.IncDecStmt{Tok: token.DEC} + m = incdecMutants("a.go", 5, decStmt) + if len(m) != 1 { + t.Fatalf("expected 1 mutant for --, got %d", len(m)) + } + + other := &ast.IncDecStmt{Tok: token.ADD} + if ms := incdecMutants("a.go", 5, other); len(ms) != 0 { + t.Errorf("unexpected mutants for non-incdec tok: %+v", ms) + } +} + +func TestIfBodyMutants(t *testing.T) { + body := &ast.BlockStmt{List: []ast.Stmt{&ast.ExprStmt{X: &ast.Ident{Name: "x"}}}} + ifStmt := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: body} + m := ifBodyMutants("a.go", 5, ifStmt) + if len(m) != 1 { + t.Fatalf("expected 1 mutant, got %d", len(m)) + } + if m[0].Operator != "branch_removal" { + t.Errorf("operator = %q, want branch_removal", m[0].Operator) + } + + empty := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: &ast.BlockStmt{}} + if ms := ifBodyMutants("a.go", 5, empty); len(ms) != 0 { + t.Errorf("expected no mutants for empty if body, got %d", len(ms)) + } +} + +func TestExprStmtMutants_CallExpr(t *testing.T) { + call := &ast.ExprStmt{X: &ast.CallExpr{Fun: &ast.Ident{Name: "foo"}}} + m := exprStmtMutants("a.go", 5, call) + if len(m) != 1 { + t.Fatalf("expected 1 mutant, got %d", len(m)) + } + if m[0].Operator != "statement_deletion" { + t.Errorf("operator = %q", m[0].Operator) + } +} + +func TestExprStmtMutants_NonCall(t *testing.T) { + stmt := &ast.ExprStmt{X: &ast.Ident{Name: "x"}} + if ms := exprStmtMutants("a.go", 5, stmt); len(ms) != 0 { + t.Errorf("expected no mutants for non-call, got %d", len(ms)) + } +} + +func TestOperatorName(t *testing.T) { + tests := []struct { + from, to token.Token + expected string + }{ + {token.GTR, token.GEQ, "conditional_boundary"}, + {token.EQL, token.NEQ, "negate_conditional"}, + {token.ADD, token.SUB, "math_operator"}, + } + for _, tt := range tests { + got := operatorName(tt.from, tt.to) + if got != tt.expected { + t.Errorf("operatorName(%v, %v) = %q, want %q", tt.from, tt.to, got, tt.expected) + } + } +} + +func TestIsBoundary(t *testing.T) { + if !isBoundary(token.GTR) { + t.Error("GTR should be boundary") + } + if !isBoundary(token.GEQ) { + t.Error("GEQ should be boundary") + } + if isBoundary(token.EQL) { + t.Error("EQL should not be boundary") + } +} + +func TestIsComparison(t *testing.T) { + if !isComparison(token.EQL) { + t.Error("EQL should be comparison") + } + if isComparison(token.GTR) { + t.Error("GTR should not be comparison") + } +} + +func TestIsMath(t *testing.T) { + if !isMath(token.ADD) { + t.Error("ADD should be math") + } + if isMath(token.EQL) { + t.Error("EQL should not be math") + } +} + +func TestGenerateMutants_EndToEnd(t *testing.T) { + code := `package test + +func add(a, b int) int { + if a > b { + return a + b + } + return a - b +} +` + dir := t.TempDir() + filePath := filepath.Join(dir, "test.go") + if err := os.WriteFile(filePath, []byte(code), 0644); err != nil { + t.Fatal(err) + } + + fc := diff.FileChange{ + Path: "test.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 8}}, + } + + mutants, err := mutantGeneratorImpl{}.GenerateMutants(filePath, fc, nil) + if err != nil { + t.Fatalf("GenerateMutants: %v", err) + } + if len(mutants) == 0 { + t.Error("expected mutants, got none") + } + + operators := make(map[string]int) + for _, m := range mutants { + operators[m.Operator]++ + } + + if operators["conditional_boundary"] == 0 { + t.Error("expected conditional_boundary mutants") + } + if operators["math_operator"] == 0 { + t.Error("expected math_operator mutants") + } +} + +func TestGenerateMutants_WithAllTypes(t *testing.T) { + code := `package test + +func f(a, b int) bool { + if a > b { + return true + } + x := a + b + _ = x + return false +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + fc := diff.FileChange{ + Path: "test.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 20}}, + } + + mutants, err := mutantGeneratorImpl{}.GenerateMutants(fp, fc, nil) + if err != nil { + t.Fatalf("GenerateMutants: %v", err) + } + + operators := make(map[string]int) + for _, m := range mutants { + operators[m.Operator]++ + } + + for _, want := range []string{"conditional_boundary", "boolean_substitution", "math_operator", "return_value"} { + if operators[want] == 0 { + t.Errorf("missing %s mutants", want) + } + } +} + +func TestGenerateMutants_HonorsDisableNextLine(t *testing.T) { + code := `package test + +func f(x int) bool { + // mutator-disable-next-line + if x > 0 { + return true + } + if x < 0 { + return false + } + return false +} +` + dir := t.TempDir() + fp := filepath.Join(dir, "test.go") + os.WriteFile(fp, []byte(code), 0644) + + fc := diff.FileChange{ + Path: "test.go", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, + } + + disabled, err := annotationScannerImpl{}.ScanAnnotations(fp) + if err != nil { + t.Fatal(err) + } + mutants, err := mutantGeneratorImpl{}.GenerateMutants(fp, fc, disabled) + if err != nil { + t.Fatal(err) + } + + for _, m := range mutants { + if m.Line == 5 { + t.Errorf("expected no mutants on annotated line 5, got: %+v", m) + } + } + + foundAt8 := false + for _, m := range mutants { + if m.Line == 8 { + foundAt8 = true + } + } + if !foundAt8 { + t.Error("expected mutants on un-annotated line 8") + } +} diff --git a/internal/lang/goanalyzer/testrunner_test.go b/internal/lang/goanalyzer/testrunner_test.go new file mode 100644 index 0000000..54f604a --- /dev/null +++ b/internal/lang/goanalyzer/testrunner_test.go @@ -0,0 +1,75 @@ +package goanalyzer + +import ( + "os" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +func TestWriteOverlayJSON(t *testing.T) { + dir := t.TempDir() + overlayPath := filepath.Join(dir, "overlay.json") + if err := writeOverlayJSON(overlayPath, "/orig/foo.go", "/tmp/mutated.go"); err != nil { + t.Fatalf("writeOverlayJSON error: %v", err) + } + data, err := os.ReadFile(overlayPath) + if err != nil { + t.Fatal(err) + } + // Must be the exact shape go test -overlay expects: + // {"Replace":{"":""}} + expected := `{"Replace":{"/orig/foo.go":"/tmp/mutated.go"}}` + if string(data) != expected { + t.Errorf("overlay JSON = %q, want %q", string(data), expected) + } +} + +func TestBuildTestArgs_Default(t *testing.T) { + args := buildTestArgs(lang.TestRunConfig{}, "/tmp/overlay.json") + if args[0] != "test" { + t.Errorf("args[0] = %q, want test", args[0]) + } + foundOverlay := false + for _, a := range args { + if a == "-overlay=/tmp/overlay.json" { + foundOverlay = true + } + } + if !foundOverlay { + t.Errorf("expected -overlay in args, got %v", args) + } + for _, a := range args { + if a == "-run" { + t.Error("did not expect -run in default args") + } + } +} + +func TestBuildTestArgs_WithPattern(t *testing.T) { + args := buildTestArgs(lang.TestRunConfig{TestPattern: "TestFoo"}, "/tmp/overlay.json") + found := false + for i, a := range args { + if a == "-run" && i+1 < len(args) && args[i+1] == "TestFoo" { + found = true + } + } + if !found { + t.Errorf("expected -run TestFoo in args, got %v", args) + } +} + +func TestBuildTestArgs_TimeoutPassed(t *testing.T) { + args := buildTestArgs(lang.TestRunConfig{}, "/tmp/overlay.json") + // Default timeout (30s) should be formatted as "30s" + found := false + for i, a := range args { + if a == "-timeout" && i+1 < len(args) && args[i+1] == "30s" { + found = true + } + } + if !found { + t.Errorf("expected -timeout 30s in args, got %v", args) + } +} diff --git a/internal/mutation/annotations.go b/internal/mutation/annotations.go deleted file mode 100644 index 6910cde..0000000 --- a/internal/mutation/annotations.go +++ /dev/null @@ -1,86 +0,0 @@ -package mutation - -import ( - "go/ast" - "go/token" - "strings" -) - -// scanAnnotations returns the set of source lines where mutation generation -// should be suppressed based on mutator-disable-* comment annotations. -// -// Supported annotations: -// - // mutator-disable-next-line : skips mutations on the following line -// - // mutator-disable-func : skips mutations in the enclosing function -func scanAnnotations(fset *token.FileSet, f *ast.File) map[int]bool { - disabled := make(map[int]bool) - funcs := funcRanges(fset, f) - - for _, cg := range f.Comments { - for _, c := range cg.List { - applyAnnotation(stripCommentMarkers(c.Text), fset.Position(c.Pos()).Line, funcs, disabled) - } - } - return disabled -} - -func stripCommentMarkers(raw string) string { - s := strings.TrimSpace(strings.TrimPrefix(raw, "//")) - s = strings.TrimSpace(strings.TrimPrefix(s, "/*")) - s = strings.TrimSpace(strings.TrimSuffix(s, "*/")) - return s -} - -func applyAnnotation(text string, commentLine int, funcs []funcRange, disabled map[int]bool) { - switch { - case strings.HasPrefix(text, "mutator-disable-next-line"): - disabled[commentLine+1] = true - case strings.HasPrefix(text, "mutator-disable-func"): - disableEnclosingFunc(commentLine, funcs, disabled) - } -} - -func disableEnclosingFunc(commentLine int, funcs []funcRange, disabled map[int]bool) { - for _, r := range funcs { - if isCommentForFunc(commentLine, r) { - markFuncDisabled(r, disabled) - return - } - } -} - -// isCommentForFunc reports whether a comment on commentLine applies to the -// given function, either because it's inside the function or directly -// precedes it (godoc-style, allowing one blank line). -func isCommentForFunc(commentLine int, r funcRange) bool { - if commentLine >= r.start && commentLine <= r.end { - return true - } - return r.start > commentLine && r.start-commentLine <= 2 -} - -func markFuncDisabled(r funcRange, disabled map[int]bool) { - for i := r.start; i <= r.end; i++ { - disabled[i] = true - } -} - -type funcRange struct { - start, end int -} - -func funcRanges(fset *token.FileSet, f *ast.File) []funcRange { - var ranges []funcRange - ast.Inspect(f, func(n ast.Node) bool { - fn, ok := n.(*ast.FuncDecl) - if !ok || fn.Body == nil { - return true - } - ranges = append(ranges, funcRange{ - start: fset.Position(fn.Pos()).Line, - end: fset.Position(fn.End()).Line, - }) - return true - }) - return ranges -} diff --git a/internal/mutation/apply.go b/internal/mutation/apply.go deleted file mode 100644 index 08d95dd..0000000 --- a/internal/mutation/apply.go +++ /dev/null @@ -1,200 +0,0 @@ -package mutation - -import ( - "bytes" - "go/ast" - "go/parser" - "go/printer" - "go/token" - "strings" -) - -// applyMutation re-parses the file and applies the specific mutation. -func applyMutation(absPath string, m *Mutant) []byte { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, absPath, nil, parser.ParseComments) - if err != nil { - return nil - } - - var applied bool - if m.Operator == "statement_deletion" { - applied = applyStatementDeletion(fset, f, m) - } else { - applied = applyMutationToAST(fset, f, m) - } - - if !applied { - return nil - } - return renderFile(fset, f) -} - -func applyMutationToAST(fset *token.FileSet, f *ast.File, m *Mutant) bool { - applied := false - ast.Inspect(f, func(n ast.Node) bool { - if applied || n == nil { - return false - } - if fset.Position(n.Pos()).Line != m.Line { - return true - } - applied = tryApplyMutation(n, m) - return !applied - }) - return applied -} - -// applyStatementDeletion needs the containing block to replace a statement, -// so it walks BlockStmts instead of the flat ast.Inspect used for other ops. -func applyStatementDeletion(fset *token.FileSet, f *ast.File, m *Mutant) bool { - applied := false - ast.Inspect(f, func(n ast.Node) bool { - if applied { - return false - } - block, ok := n.(*ast.BlockStmt) - if !ok { - return true - } - if tryDeleteInBlock(fset, block, m) { - applied = true - return false - } - return true - }) - return applied -} - -func tryDeleteInBlock(fset *token.FileSet, block *ast.BlockStmt, m *Mutant) bool { - for i, stmt := range block.List { - if fset.Position(stmt.Pos()).Line != m.Line { - continue - } - if _, ok := stmt.(*ast.ExprStmt); !ok { - continue - } - block.List[i] = &ast.EmptyStmt{Semicolon: stmt.Pos()} - return true - } - return false -} - -func tryApplyMutation(n ast.Node, m *Mutant) bool { - switch m.Operator { - case "conditional_boundary", "negate_conditional", "math_operator": - return applyBinaryMutation(n, m) - case "boolean_substitution": - return applyBoolMutation(n, m) - case "return_value": - return applyReturnMutation(n) - case "incdec": - return applyIncDecMutation(n) - case "branch_removal": - return applyBranchRemoval(n) - } - return false -} - -func applyBinaryMutation(n ast.Node, m *Mutant) bool { - expr, ok := n.(*ast.BinaryExpr) - if !ok { - return false - } - // Verify the operator matches the mutant description. Without this - // check, the walker would rewrite the first BinaryExpr it finds on - // the line — e.g. the outer `&&` in `a != nil && b`, or the outer - // `-` in `a + b - 1` — producing a no-op instead of the intended - // mutation and leaving a false-surviving mutant. - from, to := parseMutationOp(m.Description) - if to == token.ILLEGAL || expr.Op != from { - return false - } - expr.Op = to - return true -} - -func applyBoolMutation(n ast.Node, m *Mutant) bool { - ident, ok := n.(*ast.Ident) - if !ok || (ident.Name != "true" && ident.Name != "false") { - return false - } - if strings.Contains(m.Description, "-> true") { - ident.Name = "true" - } else { - ident.Name = "false" - } - return true -} - -func applyReturnMutation(n ast.Node) bool { - ret, ok := n.(*ast.ReturnStmt) - if !ok { - return false - } - for i := range ret.Results { - ret.Results[i] = zeroValueExpr(ret.Results[i]) - } - return true -} - -func applyIncDecMutation(n ast.Node) bool { - stmt, ok := n.(*ast.IncDecStmt) - if !ok { - return false - } - switch stmt.Tok { - case token.INC: - stmt.Tok = token.DEC - case token.DEC: - stmt.Tok = token.INC - default: - return false - } - return true -} - -func applyBranchRemoval(n ast.Node) bool { - stmt, ok := n.(*ast.IfStmt) - if !ok || stmt.Body == nil { - return false - } - stmt.Body.List = nil - return true -} - -// parseMutationOp parses a mutant description of the form "X -> Y" into -// the (from, to) operator pair. Either token is ILLEGAL if parsing fails. -func parseMutationOp(desc string) (from, to token.Token) { - parts := strings.Split(desc, " -> ") - if len(parts) != 2 { - return token.ILLEGAL, token.ILLEGAL - } - - opMap := map[string]token.Token{ - ">": token.GTR, ">=": token.GEQ, - "<": token.LSS, "<=": token.LEQ, - "==": token.EQL, "!=": token.NEQ, - "+": token.ADD, "-": token.SUB, - "*": token.MUL, "/": token.QUO, - } - - fromOp, okFrom := opMap[parts[0]] - toOp, okTo := opMap[parts[1]] - if !okFrom || !okTo { - return token.ILLEGAL, token.ILLEGAL - } - return fromOp, toOp -} - -func zeroValueExpr(expr ast.Expr) ast.Expr { - return &ast.Ident{Name: "nil", NamePos: expr.Pos()} -} - -func renderFile(fset *token.FileSet, f *ast.File) []byte { - var buf bytes.Buffer - if err := printer.Fprint(&buf, fset, f); err != nil { - return nil - } - return buf.Bytes() -} diff --git a/internal/mutation/features_test.go b/internal/mutation/features_test.go deleted file mode 100644 index e5d4cf9..0000000 --- a/internal/mutation/features_test.go +++ /dev/null @@ -1,374 +0,0 @@ -package mutation - -import ( - "go/ast" - "go/parser" - "go/token" - "os" - "path/filepath" - "runtime" - "strings" - "testing" - - "github.com/0xPolygon/diffguard/internal/diff" -) - -// --- Annotation tests --- - -func TestScanAnnotations_DisableNextLine(t *testing.T) { - code := `package p - -func f() { - // mutator-disable-next-line - if true { - } -} -` - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", code, parser.ParseComments) - if err != nil { - t.Fatal(err) - } - disabled := scanAnnotations(fset, f) - // Comment is on line 4, so line 5 should be disabled - if !disabled[5] { - t.Errorf("expected line 5 disabled, got disabled=%v", disabled) - } - if disabled[4] { - t.Error("comment line should not be disabled") - } - if disabled[6] { - t.Error("line 6 should not be disabled") - } -} - -func TestScanAnnotations_DisableFunc(t *testing.T) { - code := `package p - -// mutator-disable-func -func f() { - if true { - } - x := 1 - _ = x -} - -func g() { - if true { - } -} -` - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, "test.go", code, parser.ParseComments) - if err != nil { - t.Fatal(err) - } - disabled := scanAnnotations(fset, f) - - // All lines of f() (4-9) should be disabled - for i := 4; i <= 9; i++ { - if !disabled[i] { - t.Errorf("expected line %d disabled (inside f)", i) - } - } - // g() should not be disabled - if disabled[12] { - t.Error("g()'s line 12 should not be disabled") - } -} - -func TestScanAnnotations_NoAnnotations(t *testing.T) { - code := `package p - -func f() { - if true {} -} -` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, parser.ParseComments) - disabled := scanAnnotations(fset, f) - if len(disabled) != 0 { - t.Errorf("expected empty disabled map, got %v", disabled) - } -} - -func TestScanAnnotations_IrrelevantComment(t *testing.T) { - code := `package p - -// this is just a regular comment -func f() { - if true {} -} -` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, parser.ParseComments) - disabled := scanAnnotations(fset, f) - if len(disabled) != 0 { - t.Errorf("regular comments should not disable mutations, got %v", disabled) - } -} - -func TestGenerateMutants_HonorsDisableNextLine(t *testing.T) { - code := `package test - -func f(x int) bool { - // mutator-disable-next-line - if x > 0 { - return true - } - if x < 0 { - return false - } - return false -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - - mutants, err := generateMutants(fp, fc) - if err != nil { - t.Fatal(err) - } - - // The `x > 0` line is annotated — no mutants for line 5 - for _, m := range mutants { - if m.Line == 5 { - t.Errorf("expected no mutants on annotated line 5, got: %+v", m) - } - } - - // The `x < 0` line should still have mutants - foundAt8 := false - for _, m := range mutants { - if m.Line == 8 { - foundAt8 = true - } - } - if !foundAt8 { - t.Error("expected mutants on un-annotated line 8") - } -} - -// --- New operator tests --- - -func TestIncDecMutants(t *testing.T) { - // x++ -> x-- - incStmt := &ast.IncDecStmt{Tok: token.INC} - m := incdecMutants("a.go", 5, incStmt) - if len(m) != 1 { - t.Fatalf("expected 1 mutant for ++, got %d", len(m)) - } - if m[0].Operator != "incdec" { - t.Errorf("operator = %q, want incdec", m[0].Operator) - } - if !strings.Contains(m[0].Description, "--") { - t.Errorf("description = %q, expected it to mention --", m[0].Description) - } - - // x-- -> x++ - decStmt := &ast.IncDecStmt{Tok: token.DEC} - m = incdecMutants("a.go", 5, decStmt) - if len(m) != 1 { - t.Fatalf("expected 1 mutant for --, got %d", len(m)) - } - - // Other tokens produce nothing - other := &ast.IncDecStmt{Tok: token.ADD} - if ms := incdecMutants("a.go", 5, other); len(ms) != 0 { - t.Errorf("unexpected mutants for non-incdec tok: %+v", ms) - } -} - -func TestIfBodyMutants(t *testing.T) { - // If with body - body := &ast.BlockStmt{List: []ast.Stmt{&ast.ExprStmt{X: &ast.Ident{Name: "x"}}}} - ifStmt := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: body} - m := ifBodyMutants("a.go", 5, ifStmt) - if len(m) != 1 { - t.Fatalf("expected 1 mutant for non-empty if body, got %d", len(m)) - } - if m[0].Operator != "branch_removal" { - t.Errorf("operator = %q, want branch_removal", m[0].Operator) - } - - // If with empty body — no mutant - empty := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: &ast.BlockStmt{}} - if ms := ifBodyMutants("a.go", 5, empty); len(ms) != 0 { - t.Errorf("expected no mutants for empty if body, got %d", len(ms)) - } -} - -func TestExprStmtMutants_CallExpr(t *testing.T) { - call := &ast.ExprStmt{X: &ast.CallExpr{Fun: &ast.Ident{Name: "foo"}}} - m := exprStmtMutants("a.go", 5, call) - if len(m) != 1 { - t.Fatalf("expected 1 mutant for call expr, got %d", len(m)) - } - if m[0].Operator != "statement_deletion" { - t.Errorf("operator = %q, want statement_deletion", m[0].Operator) - } -} - -func TestExprStmtMutants_NonCall(t *testing.T) { - // ExprStmt wrapping a non-call (e.g., an ident) — skip - stmt := &ast.ExprStmt{X: &ast.Ident{Name: "x"}} - if ms := exprStmtMutants("a.go", 5, stmt); len(ms) != 0 { - t.Errorf("expected no mutants for non-call expr, got %d", len(ms)) - } -} - -func TestApplyIncDecMutation_Inc(t *testing.T) { - stmt := &ast.IncDecStmt{Tok: token.INC} - if !applyIncDecMutation(stmt) { - t.Error("expected successful apply") - } - if stmt.Tok != token.DEC { - t.Errorf("tok = %v, want DEC", stmt.Tok) - } -} - -func TestApplyIncDecMutation_Dec(t *testing.T) { - stmt := &ast.IncDecStmt{Tok: token.DEC} - if !applyIncDecMutation(stmt) { - t.Error("expected successful apply") - } - if stmt.Tok != token.INC { - t.Errorf("tok = %v, want INC", stmt.Tok) - } -} - -func TestApplyIncDecMutation_WrongNodeType(t *testing.T) { - if applyIncDecMutation(&ast.Ident{Name: "x"}) { - t.Error("expected false for non-IncDecStmt") - } -} - -func TestApplyBranchRemoval(t *testing.T) { - body := &ast.BlockStmt{List: []ast.Stmt{&ast.ExprStmt{X: &ast.Ident{Name: "x"}}}} - ifStmt := &ast.IfStmt{Cond: &ast.Ident{Name: "cond"}, Body: body} - if !applyBranchRemoval(ifStmt) { - t.Error("expected successful apply") - } - if len(ifStmt.Body.List) != 0 { - t.Errorf("expected body emptied, got %d stmts", len(ifStmt.Body.List)) - } -} - -func TestApplyBranchRemoval_WrongType(t *testing.T) { - if applyBranchRemoval(&ast.Ident{Name: "x"}) { - t.Error("expected false for non-IfStmt") - } -} - -func TestApplyStatementDeletion(t *testing.T) { - code := `package test - -func f() { - doThing() - x := 1 - _ = x -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - m := &Mutant{Line: 4, Operator: "statement_deletion"} - result := applyMutation(fp, m) - if result == nil { - t.Fatal("expected non-nil result") - } - // doThing() should be removed (replaced with empty stmt) - if strings.Contains(string(result), "doThing()") { - t.Errorf("expected doThing() removed, got:\n%s", string(result)) - } -} - -// --- Options tests --- - -func TestOptionsTimeout_Default(t *testing.T) { - opts := Options{} - if opts.timeout() != 30*1000*1000*1000 { // 30 seconds in ns - t.Errorf("default timeout = %v, want 30s", opts.timeout()) - } -} - -func TestOptionsWorkers(t *testing.T) { - // Zero → NumCPU. - zero := Options{} - if got, want := zero.workers(), runtime.NumCPU(); got != want { - t.Errorf("zero workers = %d, want runtime.NumCPU() = %d", got, want) - } - - // Negative → NumCPU (treat as unset). - neg := Options{Workers: -4} - if got, want := neg.workers(), runtime.NumCPU(); got != want { - t.Errorf("negative workers = %d, want runtime.NumCPU() = %d", got, want) - } - - // Explicit positive value is honored. - explicit := Options{Workers: 3} - if got := explicit.workers(); got != 3 { - t.Errorf("explicit workers = %d, want 3", got) - } -} - -func TestWriteOverlayJSON(t *testing.T) { - dir := t.TempDir() - overlayPath := filepath.Join(dir, "overlay.json") - if err := writeOverlayJSON(overlayPath, "/orig/foo.go", "/tmp/mutated.go"); err != nil { - t.Fatalf("writeOverlayJSON error: %v", err) - } - data, err := os.ReadFile(overlayPath) - if err != nil { - t.Fatal(err) - } - // Must be the exact shape go test -overlay expects: - // {"Replace":{"":""}} - expected := `{"Replace":{"/orig/foo.go":"/tmp/mutated.go"}}` - if string(data) != expected { - t.Errorf("overlay JSON = %q, want %q", string(data), expected) - } -} - -func TestBuildTestArgs_Default(t *testing.T) { - args := buildTestArgs(Options{}, "/tmp/overlay.json") - if args[0] != "test" { - t.Errorf("args[0] = %q, want test", args[0]) - } - // -overlay must always be present - foundOverlay := false - for _, a := range args { - if a == "-overlay=/tmp/overlay.json" { - foundOverlay = true - } - } - if !foundOverlay { - t.Errorf("expected -overlay=/tmp/overlay.json in args, got %v", args) - } - // No -run flag in default case - for _, a := range args { - if a == "-run" { - t.Error("did not expect -run in default args") - } - } -} - -func TestBuildTestArgs_WithPattern(t *testing.T) { - args := buildTestArgs(Options{TestPattern: "TestFoo"}, "/tmp/overlay.json") - found := false - for i, a := range args { - if a == "-run" && i+1 < len(args) && args[i+1] == "TestFoo" { - found = true - } - } - if !found { - t.Errorf("expected -run TestFoo in args, got %v", args) - } -} diff --git a/internal/mutation/generate.go b/internal/mutation/generate.go deleted file mode 100644 index ab345bf..0000000 --- a/internal/mutation/generate.go +++ /dev/null @@ -1,211 +0,0 @@ -package mutation - -import ( - "fmt" - "go/ast" - "go/parser" - "go/token" - - "github.com/0xPolygon/diffguard/internal/diff" -) - -// generateMutants parses a file and creates mutants for changed regions. -// Lines disabled via mutator-disable-* annotations are skipped. -func generateMutants(absPath string, fc diff.FileChange) ([]Mutant, error) { - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, absPath, nil, parser.ParseComments) - if err != nil { - return nil, err - } - - disabled := scanAnnotations(fset, f) - var mutants []Mutant - - ast.Inspect(f, func(n ast.Node) bool { - if n == nil { - return true - } - line := fset.Position(n.Pos()).Line - if !fc.ContainsLine(line) || disabled[line] { - return true - } - mutants = append(mutants, mutantsFor(fc.Path, line, n)...) - return true - }) - - return mutants, nil -} - -func mutantsFor(file string, line int, n ast.Node) []Mutant { - switch node := n.(type) { - case *ast.BinaryExpr: - return binaryMutants(file, line, node) - case *ast.Ident: - return boolMutants(file, line, node) - case *ast.ReturnStmt: - return returnMutants(file, line, node) - case *ast.IncDecStmt: - return incdecMutants(file, line, node) - case *ast.IfStmt: - return ifBodyMutants(file, line, node) - case *ast.ExprStmt: - return exprStmtMutants(file, line, node) - } - return nil -} - -// binaryMutants generates mutations for binary expressions. -func binaryMutants(file string, line int, expr *ast.BinaryExpr) []Mutant { - replacements := map[token.Token][]token.Token{ - token.GTR: {token.GEQ}, - token.LSS: {token.LEQ}, - token.GEQ: {token.GTR}, - token.LEQ: {token.LSS}, - token.EQL: {token.NEQ}, - token.NEQ: {token.EQL}, - token.ADD: {token.SUB}, - token.SUB: {token.ADD}, - token.MUL: {token.QUO}, - token.QUO: {token.MUL}, - } - - targets, ok := replacements[expr.Op] - if !ok { - return nil - } - - var mutants []Mutant - for _, newOp := range targets { - mutants = append(mutants, Mutant{ - File: file, - Line: line, - Description: fmt.Sprintf("%s -> %s", expr.Op, newOp), - Operator: operatorName(expr.Op, newOp), - }) - } - - return mutants -} - -// boolMutants generates true <-> false mutations. -func boolMutants(file string, line int, ident *ast.Ident) []Mutant { - if ident.Name != "true" && ident.Name != "false" { - return nil - } - - newVal := "true" - if ident.Name == "true" { - newVal = "false" - } - - return []Mutant{{ - File: file, - Line: line, - Description: fmt.Sprintf("%s -> %s", ident.Name, newVal), - Operator: "boolean_substitution", - }} -} - -// returnMutants generates zero-value return mutations. -// -// Returns whose every result is already the literal identifier `nil` are -// skipped: the zero-value mutation rewrites each result to `nil`, producing -// an identical AST and therefore an equivalent mutant that can never be -// killed. Including them only adds noise to the score. -func returnMutants(file string, line int, ret *ast.ReturnStmt) []Mutant { - if len(ret.Results) == 0 { - return nil - } - if allLiteralNil(ret.Results) { - return nil - } - - return []Mutant{{ - File: file, - Line: line, - Description: "replace return values with zero values", - Operator: "return_value", - }} -} - -// allLiteralNil reports whether every expression is the bare identifier -// `nil`. See returnMutants for why this suppresses mutant generation. -func allLiteralNil(exprs []ast.Expr) bool { - for _, e := range exprs { - ident, ok := e.(*ast.Ident) - if !ok || ident.Name != "nil" { - return false - } - } - return true -} - -// incdecMutants swaps ++ with -- and vice versa. -func incdecMutants(file string, line int, stmt *ast.IncDecStmt) []Mutant { - var newTok token.Token - switch stmt.Tok { - case token.INC: - newTok = token.DEC - case token.DEC: - newTok = token.INC - default: - return nil - } - return []Mutant{{ - File: file, - Line: line, - Description: fmt.Sprintf("%s -> %s", stmt.Tok, newTok), - Operator: "incdec", - }} -} - -// ifBodyMutants empties the body of an if statement. -func ifBodyMutants(file string, line int, stmt *ast.IfStmt) []Mutant { - if stmt.Body == nil || len(stmt.Body.List) == 0 { - return nil - } - return []Mutant{{ - File: file, - Line: line, - Description: "remove if body", - Operator: "branch_removal", - }} -} - -// exprStmtMutants deletes a bare function-call statement (discards side effects). -func exprStmtMutants(file string, line int, stmt *ast.ExprStmt) []Mutant { - if _, ok := stmt.X.(*ast.CallExpr); !ok { - return nil - } - return []Mutant{{ - File: file, - Line: line, - Description: "remove call statement", - Operator: "statement_deletion", - }} -} - -func operatorName(from, to token.Token) string { - switch { - case isBoundary(from) || isBoundary(to): - return "conditional_boundary" - case isComparison(from) || isComparison(to): - return "negate_conditional" - case isMath(from) || isMath(to): - return "math_operator" - default: - return "unknown" - } -} - -func isBoundary(t token.Token) bool { - return t == token.GTR || t == token.GEQ || t == token.LSS || t == token.LEQ -} - -func isComparison(t token.Token) bool { - return t == token.EQL || t == token.NEQ -} - -func isMath(t token.Token) bool { - return t == token.ADD || t == token.SUB || t == token.MUL || t == token.QUO -} diff --git a/internal/mutation/mutation.go b/internal/mutation/mutation.go index fff7ee8..74bf795 100644 --- a/internal/mutation/mutation.go +++ b/internal/mutation/mutation.go @@ -1,12 +1,15 @@ +// Package mutation orchestrates mutation testing across a diff's changed +// files. The AST-level work (generating mutants, applying them, scanning +// annotations, running tests) is provided by the language back-end via +// lang.MutantGenerator / lang.MutantApplier / lang.AnnotationScanner / +// lang.TestRunner. This package owns the scheduling, tiering, and report +// formatting — pieces that don't depend on any particular language. package mutation import ( - "bytes" - "encoding/json" "fmt" "math/rand" "os" - "os/exec" "path/filepath" "runtime" "strings" @@ -14,6 +17,7 @@ import ( "time" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" ) @@ -31,33 +35,34 @@ type Mutant struct { type Options struct { // SampleRate is the percentage (0-100) of generated mutants to actually test. SampleRate float64 - // TestTimeout is the per-mutant timeout passed to `go test -timeout`. + // TestTimeout is the per-mutant timeout. // Zero means use the default (30s). TestTimeout time.Duration - // TestPattern, if non-empty, is passed to `go test -run` to scope tests. + // TestPattern, if non-empty, is passed to the language's test runner to + // scope tests. TestPattern string // Tier1Threshold is the minimum killed-percentage for Tier 1 operators - // (logic mutations) below which the section is reported as FAIL. Zero - // falls back to defaultTier1Threshold. + // below which the section is reported as FAIL. Zero falls back to + // defaultTier1Threshold. Tier1Threshold float64 // Tier2Threshold is the minimum killed-percentage for Tier 2 operators - // (semantic mutations) below which the section is reported as WARN. Zero - // falls back to defaultTier2Threshold. + // below which the section is reported as WARN. Zero falls back to + // defaultTier2Threshold. Tier2Threshold float64 - // Workers caps the number of packages processed concurrently. Zero or - // negative means use runtime.NumCPU(). Mutants within a single package - // always run sequentially regardless of this setting. + // Workers caps the number of mutants processed concurrently. Zero or + // negative means use runtime.NumCPU(). Workers int } const ( defaultTier1Threshold = 90.0 defaultTier2Threshold = 70.0 + defaultTestTimeout = 30 * time.Second ) func (o Options) timeout() time.Duration { if o.TestTimeout <= 0 { - return 30 * time.Second + return defaultTestTimeout } return o.TestTimeout } @@ -83,14 +88,16 @@ func (o Options) workers() int { return o.Workers } -// Analyze applies mutation operators to changed code and runs tests. +// Analyze applies mutation operators to changed code (via the language's +// MutantGenerator/Applier) and runs the language's TestRunner against each +// mutant. // -// Each mutant is tested in isolation using `go test -overlay` so mutants -// never touch the real source files on disk. This means mutants can be -// fully parallelized — including mutants on the same file or package — -// up to opts.workers() concurrent go test invocations. -func Analyze(repoPath string, d *diff.Result, opts Options) (report.Section, error) { - allMutants := collectMutants(repoPath, d) +// Parallelism is controlled by Options.Workers; concurrency safety is the +// TestRunner's responsibility (Go's overlay-based runner is safe to call +// concurrently; temp-copy runners for other languages must serialize +// per-file internally). +func Analyze(repoPath string, d *diff.Result, l lang.Language, opts Options) (report.Section, error) { + allMutants := collectMutants(repoPath, d, l) if len(allMutants) == 0 { return report.Section{ @@ -110,27 +117,49 @@ func Analyze(repoPath string, d *diff.Result, opts Options) (report.Section, err } defer os.RemoveAll(workDir) - killed := runMutantsParallel(repoPath, allMutants, opts, workDir) + killed := runMutantsParallel(repoPath, allMutants, l, opts, workDir) return buildSection(allMutants, killed, opts), nil } -func collectMutants(repoPath string, d *diff.Result) []Mutant { +// collectMutants gathers mutation sites for every changed file, honoring +// the language's annotation scanner so lines marked +// `// mutator-disable-*` never produce mutants. +func collectMutants(repoPath string, d *diff.Result, l lang.Language) []Mutant { + gen := l.MutantGenerator() + scanner := l.AnnotationScanner() + var all []Mutant for _, fc := range d.Files { absPath := filepath.Join(repoPath, fc.Path) - mutants, err := generateMutants(absPath, fc) + disabled, err := scanner.ScanAnnotations(absPath) + if err != nil { + continue + } + sites, err := gen.GenerateMutants(absPath, fc, disabled) if err != nil { continue } - all = append(all, mutants...) + for _, s := range sites { + all = append(all, Mutant{ + File: s.File, + Line: s.Line, + Description: s.Description, + Operator: s.Operator, + }) + } } return all } -// runMutantsParallel processes mutants fully in parallel (including mutants -// on the same file) up to opts.workers() concurrent workers. Isolation -// between mutants is provided by `go test -overlay`, not by serialization. -func runMutantsParallel(repoPath string, mutants []Mutant, opts Options, workDir string) int { +// runMutantsParallel processes mutants concurrently up to opts.workers(). +// Each mutant goes through ApplyMutation -> TestRunner.RunTest; the +// TestRunner implementation is responsible for isolating concurrent +// invocations (the Go runner uses `go test -overlay`; non-Go runners use +// per-file temp-copy + mutex). +func runMutantsParallel(repoPath string, mutants []Mutant, l lang.Language, opts Options, workDir string) int { + applier := l.MutantApplier() + runner := l.TestRunner() + var wg sync.WaitGroup sem := make(chan struct{}, opts.workers()) @@ -140,7 +169,7 @@ func runMutantsParallel(repoPath string, mutants []Mutant, opts Options, workDir go func(idx int) { defer wg.Done() defer func() { <-sem }() - mutants[idx].Killed = runMutant(repoPath, &mutants[idx], opts, workDir, idx) + mutants[idx].Killed = runMutant(repoPath, &mutants[idx], applier, runner, opts, workDir, idx) }(i) } wg.Wait() @@ -154,63 +183,43 @@ func runMutantsParallel(repoPath string, mutants []Mutant, opts Options, workDir return killed } -// runMutant applies a mutation to a temp file, uses go test -overlay to -// have the test compile against the temp file (leaving the real source -// untouched), and returns whether any test failed. -func runMutant(repoPath string, m *Mutant, opts Options, workDir string, idx int) bool { +// runMutant applies the mutation, writes the mutated source to a temp file +// inside workDir, and hands it to the language's TestRunner. The runner +// returns (killed, output, err); on runner error we skip the mutant. +func runMutant(repoPath string, m *Mutant, applier lang.MutantApplier, runner lang.TestRunner, opts Options, workDir string, idx int) bool { absPath := filepath.Join(repoPath, m.File) - mutated := applyMutation(absPath, m) - if mutated == nil { + mutated, err := applier.ApplyMutation(absPath, lang.MutantSite{ + File: m.File, + Line: m.Line, + Description: m.Description, + Operator: m.Operator, + }) + if err != nil || mutated == nil { return false } - mutantFile := filepath.Join(workDir, fmt.Sprintf("m%d.go", idx)) + mutantFile := filepath.Join(workDir, fmt.Sprintf("m%d%s", idx, filepath.Ext(absPath))) if err := os.WriteFile(mutantFile, mutated, 0644); err != nil { return false } - overlayPath := filepath.Join(workDir, fmt.Sprintf("m%d-overlay.json", idx)) - if err := writeOverlayJSON(overlayPath, absPath, mutantFile); err != nil { - return false - } - - pkgDir := filepath.Dir(absPath) - cmd := exec.Command("go", buildTestArgs(opts, overlayPath)...) - cmd.Dir = pkgDir - var stderr bytes.Buffer - cmd.Stderr = &stderr - err := cmd.Run() - - if err != nil { - m.TestOutput = stderr.String() - return true - } - return false -} - -// writeOverlayJSON writes a go build overlay file mapping originalPath to -// mutantPath. See `go help build` -overlay flag for format details. -func writeOverlayJSON(path, originalPath, mutantPath string) error { - overlay := struct { - Replace map[string]string `json:"Replace"` - }{ - Replace: map[string]string{originalPath: mutantPath}, - } - data, err := json.Marshal(overlay) + killed, output, err := runner.RunTest(lang.TestRunConfig{ + RepoPath: repoPath, + MutantFile: mutantFile, + OriginalFile: absPath, + Timeout: opts.timeout(), + TestPattern: opts.TestPattern, + WorkDir: workDir, + Index: idx, + }) if err != nil { - return err + return false } - return os.WriteFile(path, data, 0644) -} - -func buildTestArgs(opts Options, overlayPath string) []string { - args := []string{"test", "-overlay=" + overlayPath, "-count=1", "-timeout", opts.timeout().String()} - if opts.TestPattern != "" { - args = append(args, "-run", opts.TestPattern) + if killed { + m.TestOutput = output } - args = append(args, "./...") - return args + return killed } func sampleMutants(mutants []Mutant, rate float64) []Mutant { diff --git a/internal/mutation/mutation_extra_test.go b/internal/mutation/mutation_extra_test.go index fd29586..8c9e87b 100644 --- a/internal/mutation/mutation_extra_test.go +++ b/internal/mutation/mutation_extra_test.go @@ -1,284 +1,14 @@ package mutation import ( - "go/ast" - "go/parser" - "go/token" - "os" - "path/filepath" - "strings" "testing" - "github.com/0xPolygon/diffguard/internal/diff" "github.com/0xPolygon/diffguard/internal/report" ) -func TestApplyBinaryMutation_Success(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.GTR} - m := &Mutant{Description: "> -> >=", Operator: "conditional_boundary"} - if !applyBinaryMutation(expr, m) { - t.Error("expected successful apply") - } - if expr.Op != token.GEQ { - t.Errorf("op = %v, want GEQ", expr.Op) - } -} - -func TestApplyBinaryMutation_WrongNodeType(t *testing.T) { - ident := &ast.Ident{Name: "x"} - m := &Mutant{Description: "> -> >=", Operator: "conditional_boundary"} - if applyBinaryMutation(ident, m) { - t.Error("expected false for non-BinaryExpr") - } -} - -func TestApplyBinaryMutation_IllegalOp(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.GTR} - m := &Mutant{Description: "invalid", Operator: "conditional_boundary"} - if applyBinaryMutation(expr, m) { - t.Error("expected false for invalid description") - } -} - -// TestApplyBinaryMutation_OperatorMismatch locks in the fix for a bug where -// applyBinaryMutation rewrote the first BinaryExpr found on a line even -// when its operator differed from the mutant's intended `from` op. E.g. -// given mutant "!= -> ==", applying it to the outer `&&` of `a != nil && b` -// must NOT succeed — otherwise `&&` gets replaced and the inner `!=` stays -// untouched, producing a false-surviving mutant. -func TestApplyBinaryMutation_OperatorMismatch(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.LAND} - m := &Mutant{Description: "!= -> ==", Operator: "negate_conditional"} - if applyBinaryMutation(expr, m) { - t.Error("expected false when expr.Op (&&) does not match mutant's from-op (!=)") - } - if expr.Op != token.LAND { - t.Errorf("expr.Op = %v, want LAND (unchanged)", expr.Op) - } -} - -// TestApplyBinaryMutation_MathOperatorMismatch: same fix for math operators -// — `start + count - 1` parses with an outer SUB, and mutant "+ -> -" must -// not no-op on that outer SUB. -func TestApplyBinaryMutation_MathOperatorMismatch(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.SUB} - m := &Mutant{Description: "+ -> -", Operator: "math_operator"} - if applyBinaryMutation(expr, m) { - t.Error("expected false when expr.Op (-) does not match mutant's from-op (+)") - } - if expr.Op != token.SUB { - t.Errorf("expr.Op = %v, want SUB (unchanged)", expr.Op) - } -} - -func TestApplyBoolMutation_TrueToFalse(t *testing.T) { - ident := &ast.Ident{Name: "true"} - m := &Mutant{Description: "true -> false", Operator: "boolean_substitution"} - if !applyBoolMutation(ident, m) { - t.Error("expected successful apply") - } - if ident.Name != "false" { - t.Errorf("name = %q, want false", ident.Name) - } -} - -func TestApplyBoolMutation_FalseToTrue(t *testing.T) { - ident := &ast.Ident{Name: "false"} - m := &Mutant{Description: "false -> true", Operator: "boolean_substitution"} - if !applyBoolMutation(ident, m) { - t.Error("expected successful apply") - } - if ident.Name != "true" { - t.Errorf("name = %q, want true", ident.Name) - } -} - -func TestApplyBoolMutation_WrongNodeType(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.ADD} - m := &Mutant{Description: "true -> false", Operator: "boolean_substitution"} - if applyBoolMutation(expr, m) { - t.Error("expected false for non-Ident") - } -} - -func TestApplyBoolMutation_NonBoolIdent(t *testing.T) { - ident := &ast.Ident{Name: "x"} - m := &Mutant{Description: "true -> false", Operator: "boolean_substitution"} - if applyBoolMutation(ident, m) { - t.Error("expected false for non-bool ident") - } -} - -func TestApplyReturnMutation_Success(t *testing.T) { - ret := &ast.ReturnStmt{ - Results: []ast.Expr{ - &ast.Ident{Name: "x", NamePos: 1}, - }, - } - if !applyReturnMutation(ret) { - t.Error("expected successful apply") - } - if ident, ok := ret.Results[0].(*ast.Ident); !ok || ident.Name != "nil" { - t.Error("expected result replaced with nil") - } -} - -func TestApplyReturnMutation_WrongNodeType(t *testing.T) { - ident := &ast.Ident{Name: "x"} - if applyReturnMutation(ident) { - t.Error("expected false for non-ReturnStmt") - } -} - -func TestTryApplyMutation_Binary(t *testing.T) { - expr := &ast.BinaryExpr{Op: token.ADD} - m := &Mutant{Description: "+ -> -", Operator: "math_operator"} - if !tryApplyMutation(expr, m) { - t.Error("expected successful apply for math_operator") - } - if expr.Op != token.SUB { - t.Errorf("op = %v, want SUB", expr.Op) - } -} - -func TestTryApplyMutation_Bool(t *testing.T) { - ident := &ast.Ident{Name: "true"} - m := &Mutant{Description: "true -> false", Operator: "boolean_substitution"} - if !tryApplyMutation(ident, m) { - t.Error("expected successful apply for boolean_substitution") - } -} - -func TestTryApplyMutation_Return(t *testing.T) { - ret := &ast.ReturnStmt{Results: []ast.Expr{&ast.Ident{Name: "x", NamePos: 1}}} - m := &Mutant{Operator: "return_value"} - if !tryApplyMutation(ret, m) { - t.Error("expected successful apply for return_value") - } -} - -func TestTryApplyMutation_Unknown(t *testing.T) { - ident := &ast.Ident{Name: "x"} - m := &Mutant{Operator: "unknown_operator"} - if tryApplyMutation(ident, m) { - t.Error("expected false for unknown operator") - } -} - -func TestApplyMutationToAST(t *testing.T) { - code := `package test - -func f() bool { - return true -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, fp, nil, parser.ParseComments) - - m := &Mutant{Line: 4, Description: "true -> false", Operator: "boolean_substitution"} - if !applyMutationToAST(fset, f, m) { - t.Error("expected mutation to be applied") - } -} - -func TestApplyMutationToAST_NoMatch(t *testing.T) { - code := `package test - -func f() int { - return 42 -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, fp, nil, parser.ParseComments) - - m := &Mutant{Line: 999, Description: "true -> false", Operator: "boolean_substitution"} - if applyMutationToAST(fset, f, m) { - t.Error("expected no mutation applied for wrong line") - } -} - -func TestApplyMutation_Full(t *testing.T) { - code := `package test - -func f(a, b int) bool { - return a > b -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - m := &Mutant{Line: 4, Description: "> -> >=", Operator: "conditional_boundary"} - result := applyMutation(fp, m) - if result == nil { - t.Fatal("expected non-nil result") - } - if !strings.Contains(string(result), ">=") { - t.Error("expected mutated code to contain >=") - } -} - -func TestApplyMutation_ParseError(t *testing.T) { - m := &Mutant{Line: 1, Operator: "boolean_substitution"} - result := applyMutation("/nonexistent/file.go", m) - if result != nil { - t.Error("expected nil for parse error") - } -} - -func TestApplyMutation_NoMatch(t *testing.T) { - code := `package test - -func f() {} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - m := &Mutant{Line: 999, Operator: "boolean_substitution", Description: "true -> false"} - result := applyMutation(fp, m) - if result != nil { - t.Error("expected nil when mutation can't be applied") - } -} - -func TestRenderFile(t *testing.T) { - code := `package test - -func f() {} -` - fset := token.NewFileSet() - f, _ := parser.ParseFile(fset, "test.go", code, parser.ParseComments) - - result := renderFile(fset, f) - if result == nil { - t.Fatal("expected non-nil render result") - } - if !strings.Contains(string(result), "package test") { - t.Error("rendered file should contain package declaration") - } -} - -func TestZeroValueExpr(t *testing.T) { - original := &ast.Ident{Name: "x", NamePos: 42} - result := zeroValueExpr(original) - ident, ok := result.(*ast.Ident) - if !ok { - t.Fatal("expected *ast.Ident") - } - if ident.Name != "nil" { - t.Errorf("name = %q, want nil", ident.Name) - } -} - +// TestBuildSection_HighScore confirms a fully-killed Tier-1 run reports +// PASS. This is the "100% kill rate ⇒ PASS" invariant the CI gate relies +// on. func TestBuildSection_HighScore(t *testing.T) { mutants := []Mutant{ {File: "a.go", Line: 1, Killed: true, Operator: "negate_conditional"}, @@ -289,7 +19,7 @@ func TestBuildSection_HighScore(t *testing.T) { } s := buildSection(mutants, 5, Options{}) if s.Severity != report.SeverityPass { - t.Errorf("severity = %v, want PASS (100%% kill rate)", s.Severity) + t.Errorf("severity = %v, want PASS", s.Severity) } } @@ -305,10 +35,10 @@ func TestBuildSection_LowScore(t *testing.T) { } s := buildSection(mutants, 1, Options{}) if s.Severity != report.SeverityFail { - t.Errorf("severity = %v, want FAIL (Tier 1 at 20%% < default 90%%)", s.Severity) + t.Errorf("severity = %v, want FAIL", s.Severity) } if len(s.Findings) != 4 { - t.Errorf("findings = %d, want 4 (survived mutants)", len(s.Findings)) + t.Errorf("findings = %d, want 4", len(s.Findings)) } } @@ -324,99 +54,16 @@ func TestBuildSection_MediumScore(t *testing.T) { } s := buildSection(mutants, killed, Options{}) if s.Severity != report.SeverityWarn { - t.Errorf("severity = %v, want WARN (Tier 2 at 60%% < default 70%%)", s.Severity) + t.Errorf("severity = %v, want WARN", s.Severity) } } func TestBuildSection_ZeroMutants(t *testing.T) { s := buildSection(nil, 0, Options{}) - // No mutants means nothing to gate on — severity should be PASS and - // stats should still be populated. if s.Severity != report.SeverityPass { - t.Errorf("severity = %v, want PASS (no mutants to gate on)", s.Severity) + t.Errorf("severity = %v, want PASS", s.Severity) } if s.Stats == nil { t.Error("expected non-nil stats") } } - -func TestGenerateMutants_WithAllTypes(t *testing.T) { - code := `package test - -func f(a, b int) bool { - if a > b { - return true - } - x := a + b - _ = x - return false -} -` - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 20}}, - } - - mutants, err := generateMutants(fp, fc) - if err != nil { - t.Fatalf("error: %v", err) - } - - operators := make(map[string]int) - for _, m := range mutants { - operators[m.Operator]++ - } - - if operators["conditional_boundary"] == 0 { - t.Error("missing conditional_boundary mutants") - } - if operators["boolean_substitution"] == 0 { - t.Error("missing boolean_substitution mutants") - } - if operators["math_operator"] == 0 { - t.Error("missing math_operator mutants") - } - if operators["return_value"] == 0 { - t.Error("missing return_value mutants") - } -} - -func TestIsBoundary(t *testing.T) { - if !isBoundary(token.GTR) { - t.Error("GTR should be boundary") - } - if !isBoundary(token.GEQ) { - t.Error("GEQ should be boundary") - } - if isBoundary(token.EQL) { - t.Error("EQL should not be boundary") - } -} - -func TestIsComparison(t *testing.T) { - if !isComparison(token.EQL) { - t.Error("EQL should be comparison") - } - if !isComparison(token.NEQ) { - t.Error("NEQ should be comparison") - } - if isComparison(token.GTR) { - t.Error("GTR should not be comparison") - } -} - -func TestIsMath(t *testing.T) { - if !isMath(token.ADD) { - t.Error("ADD should be math") - } - if !isMath(token.MUL) { - t.Error("MUL should be math") - } - if isMath(token.EQL) { - t.Error("EQL should not be math") - } -} diff --git a/internal/mutation/mutation_test.go b/internal/mutation/mutation_test.go index 5c63307..7a1b4c5 100644 --- a/internal/mutation/mutation_test.go +++ b/internal/mutation/mutation_test.go @@ -1,128 +1,17 @@ package mutation import ( - "go/ast" - "go/token" - "os" + "runtime" "testing" - - "github.com/0xPolygon/diffguard/internal/diff" ) -func TestBinaryMutants(t *testing.T) { - tests := []struct { - name string - op token.Token - expected int - }{ - {"greater than", token.GTR, 1}, - {"less than", token.LSS, 1}, - {"equal", token.EQL, 1}, - {"not equal", token.NEQ, 1}, - {"add", token.ADD, 1}, - {"subtract", token.SUB, 1}, - {"multiply", token.MUL, 1}, - {"divide", token.QUO, 1}, - {"and (no mutation)", token.LAND, 0}, - {"or (no mutation)", token.LOR, 0}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - expr := &ast.BinaryExpr{Op: tt.op} - mutants := binaryMutants("test.go", 1, expr) - if len(mutants) != tt.expected { - t.Errorf("binaryMutants(%v) produced %d mutants, want %d", tt.op, len(mutants), tt.expected) - } - }) - } -} - -func TestBoolMutants(t *testing.T) { - tests := []struct { - name string - ident string - expected int - }{ - {"true", "true", 1}, - {"false", "false", 1}, - {"other", "x", 0}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - ident := &ast.Ident{Name: tt.ident} - mutants := boolMutants("test.go", 1, ident) - if len(mutants) != tt.expected { - t.Errorf("boolMutants(%q) produced %d mutants, want %d", tt.ident, len(mutants), tt.expected) - } - }) - } -} - -func TestReturnMutants(t *testing.T) { - // Return with values - ret := &ast.ReturnStmt{ - Results: []ast.Expr{&ast.Ident{Name: "x"}}, - } - mutants := returnMutants("test.go", 1, ret) - if len(mutants) != 1 { - t.Errorf("returnMutants with values: got %d, want 1", len(mutants)) - } - - // Bare return - bareRet := &ast.ReturnStmt{} - mutants = returnMutants("test.go", 1, bareRet) - if len(mutants) != 0 { - t.Errorf("returnMutants bare: got %d, want 0", len(mutants)) - } -} - -func TestGenerateMutants(t *testing.T) { - code := `package test - -func add(a, b int) int { - if a > b { - return a + b - } - return a - b -} -` - dir := t.TempDir() - filePath := dir + "/test.go" - if err := os.WriteFile(filePath, []byte(code), 0644); err != nil { - t.Fatalf("writeTestFile: %v", err) - } - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{ - {StartLine: 1, EndLine: 8}, - }, - } - - mutants, err := generateMutants(filePath, fc) - if err != nil { - t.Fatalf("generateMutants error: %v", err) - } - - if len(mutants) == 0 { - t.Error("expected mutants, got none") - } - - // Should have mutations for: > (boundary), + (math), - (math) - operators := make(map[string]int) - for _, m := range mutants { - operators[m.Operator]++ - } - - if operators["conditional_boundary"] == 0 { - t.Error("expected conditional_boundary mutants") - } - if operators["math_operator"] == 0 { - t.Error("expected math_operator mutants") - } -} +// Most of what used to be tested here was the Go AST machinery: +// binaryMutants, boolMutants, applyBoolMutation, writeOverlayJSON, +// buildTestArgs, scanAnnotations, etc. All of that now lives in +// internal/lang/goanalyzer/ next to the code, and the tests moved with it. +// +// What remains here exercises the orchestration: options defaults, mutant +// sampling, tier aggregation, and section formatting. func TestSampleMutants(t *testing.T) { mutants := make([]Mutant, 100) @@ -141,42 +30,46 @@ func TestSampleMutants(t *testing.T) { } } -func TestOperatorName(t *testing.T) { - tests := []struct { - from, to token.Token - expected string - }{ - {token.GTR, token.GEQ, "conditional_boundary"}, - {token.EQL, token.NEQ, "negate_conditional"}, - {token.ADD, token.SUB, "math_operator"}, +func TestOptionsTimeout_Default(t *testing.T) { + opts := Options{} + if opts.timeout() != defaultTestTimeout { + t.Errorf("default timeout = %v, want %v", opts.timeout(), defaultTestTimeout) + } +} + +func TestOptionsWorkers(t *testing.T) { + zero := Options{} + if got, want := zero.workers(), runtime.NumCPU(); got != want { + t.Errorf("zero workers = %d, want NumCPU = %d", got, want) + } + + neg := Options{Workers: -4} + if got, want := neg.workers(), runtime.NumCPU(); got != want { + t.Errorf("negative workers = %d, want NumCPU = %d", got, want) } - for _, tt := range tests { - got := operatorName(tt.from, tt.to) - if got != tt.expected { - t.Errorf("operatorName(%v, %v) = %q, want %q", tt.from, tt.to, got, tt.expected) - } + explicit := Options{Workers: 3} + if got := explicit.workers(); got != 3 { + t.Errorf("explicit workers = %d, want 3", got) } } -func TestParseMutationOp(t *testing.T) { - tests := []struct { - desc string - wantFrom token.Token - wantTo token.Token - }{ - {"> -> >=", token.GTR, token.GEQ}, - {"== -> !=", token.EQL, token.NEQ}, - {"+ -> -", token.ADD, token.SUB}, - {"invalid", token.ILLEGAL, token.ILLEGAL}, - {"+ -> unknown", token.ILLEGAL, token.ILLEGAL}, +func TestOptionsTiers(t *testing.T) { + // Defaults kick in when thresholds are zero. + zero := Options{} + if got := zero.tier1Threshold(); got != defaultTier1Threshold { + t.Errorf("tier1 default = %v, want %v", got, defaultTier1Threshold) + } + if got := zero.tier2Threshold(); got != defaultTier2Threshold { + t.Errorf("tier2 default = %v, want %v", got, defaultTier2Threshold) } - for _, tt := range tests { - gotFrom, gotTo := parseMutationOp(tt.desc) - if gotFrom != tt.wantFrom || gotTo != tt.wantTo { - t.Errorf("parseMutationOp(%q) = (%v, %v), want (%v, %v)", - tt.desc, gotFrom, gotTo, tt.wantFrom, tt.wantTo) - } + // Explicit values are honored. + explicit := Options{Tier1Threshold: 75, Tier2Threshold: 50} + if got := explicit.tier1Threshold(); got != 75 { + t.Errorf("tier1 explicit = %v, want 75", got) + } + if got := explicit.tier2Threshold(); got != 50 { + t.Errorf("tier2 explicit = %v, want 50", got) } } diff --git a/internal/sizes/sizes.go b/internal/sizes/sizes.go index 027730c..bf67a5e 100644 --- a/internal/sizes/sizes.go +++ b/internal/sizes/sizes.go @@ -1,38 +1,30 @@ +// Package sizes reports function and file line counts for diff-scoped files +// using a language-supplied lang.FunctionExtractor. The per-language AST +// work lives in the language back-end (for Go: goanalyzer/sizes.go). package sizes import ( "fmt" - "go/ast" - "go/parser" - "go/token" "path/filepath" "sort" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" ) -// FunctionSize holds size info for a single function. -type FunctionSize struct { - File string - Line int - Name string - Lines int -} - -// FileSize holds size info for a single file. -type FileSize struct { - Path string - Lines int -} - -// Analyze measures lines of code for changed functions and files. -func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int) (report.Section, error) { - var funcResults []FunctionSize - var fileResults []FileSize +// Analyze measures lines of code for changed functions and files using the +// supplied language extractor. +func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int, extractor lang.FunctionExtractor) (report.Section, error) { + var funcResults []lang.FunctionSize + var fileResults []lang.FileSize for _, fc := range d.Files { - funcs, fileSize := analyzeFile(repoPath, fc) + absPath := filepath.Join(repoPath, fc.Path) + funcs, fileSize, err := extractor.ExtractFunctions(absPath, fc) + if err != nil { + return report.Section{}, fmt.Errorf("analyzing %s: %w", fc.Path, err) + } funcResults = append(funcResults, funcs...) if fileSize != nil { fileResults = append(fileResults, *fileSize) @@ -42,64 +34,7 @@ func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int) return buildSection(funcResults, fileResults, funcThreshold, fileThreshold), nil } -func analyzeFile(repoPath string, fc diff.FileChange) ([]FunctionSize, *FileSize) { - absPath := filepath.Join(repoPath, fc.Path) - fset := token.NewFileSet() - f, err := parser.ParseFile(fset, absPath, nil, 0) - if err != nil { - return nil, nil - } - - var fileSize *FileSize - file := fset.File(f.Pos()) - if file != nil { - fileSize = &FileSize{Path: fc.Path, Lines: file.LineCount()} - } - - return collectFunctionSizes(fset, f, fc), fileSize -} - -func collectFunctionSizes(fset *token.FileSet, f *ast.File, fc diff.FileChange) []FunctionSize { - var results []FunctionSize - ast.Inspect(f, func(n ast.Node) bool { - fn, ok := n.(*ast.FuncDecl) - if !ok { - return true - } - startLine := fset.Position(fn.Pos()).Line - endLine := fset.Position(fn.End()).Line - if !fc.OverlapsRange(startLine, endLine) { - return false - } - results = append(results, FunctionSize{ - File: fc.Path, - Line: startLine, - Name: funcName(fn), - Lines: endLine - startLine + 1, - }) - return false - }) - return results -} - -func funcName(fn *ast.FuncDecl) string { - if fn.Recv != nil && len(fn.Recv.List) > 0 { - recv := fn.Recv.List[0] - var typeName string - switch t := recv.Type.(type) { - case *ast.StarExpr: - if ident, ok := t.X.(*ast.Ident); ok { - typeName = ident.Name - } - case *ast.Ident: - typeName = t.Name - } - return fmt.Sprintf("(%s).%s", typeName, fn.Name.Name) - } - return fn.Name.Name -} - -func checkFuncSizes(funcs []FunctionSize, threshold int) []report.Finding { +func checkFuncSizes(funcs []lang.FunctionSize, threshold int) []report.Finding { var findings []report.Finding for _, f := range funcs { if f.Lines > threshold { @@ -117,7 +52,7 @@ func checkFuncSizes(funcs []FunctionSize, threshold int) []report.Finding { return findings } -func checkFileSizes(files []FileSize, threshold int) []report.Finding { +func checkFileSizes(files []lang.FileSize, threshold int) []report.Finding { var findings []report.Finding for _, f := range files { if f.Lines > threshold { @@ -133,7 +68,7 @@ func checkFileSizes(files []FileSize, threshold int) []report.Finding { return findings } -func buildSection(funcs []FunctionSize, files []FileSize, funcThreshold, fileThreshold int) report.Section { +func buildSection(funcs []lang.FunctionSize, files []lang.FileSize, funcThreshold, fileThreshold int) report.Section { if len(funcs) == 0 && len(files) == 0 { return report.Section{ Name: "Code Sizes", @@ -163,10 +98,10 @@ func buildSection(funcs []FunctionSize, files []FileSize, funcThreshold, fileThr Findings: findings, Stats: map[string]any{ "total_functions": len(funcs), - "total_files": len(files), - "violations": len(findings), + "total_files": len(files), + "violations": len(findings), "function_threshold": funcThreshold, - "file_threshold": fileThreshold, + "file_threshold": fileThreshold, }, } } diff --git a/internal/sizes/sizes_test.go b/internal/sizes/sizes_test.go index 1b7df27..d53f11d 100644 --- a/internal/sizes/sizes_test.go +++ b/internal/sizes/sizes_test.go @@ -6,139 +6,25 @@ import ( "testing" "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" "github.com/0xPolygon/diffguard/internal/report" ) -func TestAnalyzeFile(t *testing.T) { - code := `package test - -func short() { - x := 1 - _ = x -} - -func longer() { - a := 1 - b := 2 - c := 3 - d := 4 - e := 5 - _ = a + b + c + d + e -} -` - dir := t.TempDir() - filePath := filepath.Join(dir, "test.go") - os.WriteFile(filePath, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - - funcs, fileSize := analyzeFile(dir, fc) - - if fileSize == nil { - t.Fatal("expected non-nil fileSize") - } - if fileSize.Lines == 0 { - t.Error("file should have non-zero lines") - } - if fileSize.Path != "test.go" { - t.Errorf("fileSize.Path = %q, want test.go", fileSize.Path) - } - - if len(funcs) != 2 { - t.Fatalf("expected 2 functions, got %d", len(funcs)) - } - if funcs[0].Name != "short" { - t.Errorf("funcs[0].Name = %q, want short", funcs[0].Name) - } - if funcs[0].Lines <= 0 { - t.Error("function lines should be > 0") - } -} - -func TestAnalyzeFile_ParseError(t *testing.T) { - dir := t.TempDir() - fc := diff.FileChange{ - Path: "nonexistent.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 10}}, - } - - funcs, fileSize := analyzeFile(dir, fc) - if funcs != nil { - t.Error("expected nil funcs for parse error") - } - if fileSize != nil { - t.Error("expected nil fileSize for parse error") - } -} - -func TestCollectFunctionSizes_OnlyInRange(t *testing.T) { - code := `package test - -func inRange() { - x := 1 - _ = x -} - -func outOfRange() { - y := 2 - _ = y -} -` - dir := t.TempDir() - filePath := filepath.Join(dir, "test.go") - os.WriteFile(filePath, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 3, EndLine: 6}}, - } - - funcs, _ := analyzeFile(dir, fc) - if len(funcs) != 1 { - t.Fatalf("expected 1 function in range, got %d", len(funcs)) - } - if funcs[0].Name != "inRange" { - t.Errorf("expected inRange, got %s", funcs[0].Name) - } -} - -func TestCollectFunctionSizes_LineCalc(t *testing.T) { - code := `package test - -func f() { - a := 1 - b := 2 - c := 3 - _ = a + b + c -} -` - dir := t.TempDir() - filePath := filepath.Join(dir, "test.go") - os.WriteFile(filePath, []byte(code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - - funcs, _ := analyzeFile(dir, fc) - if len(funcs) != 1 { - t.Fatalf("expected 1 function, got %d", len(funcs)) - } - // func f() { starts at line 3, } at line 8 = 6 lines - if funcs[0].Lines != 6 { - t.Errorf("function lines = %d, want 6", funcs[0].Lines) +func goExtractor(t *testing.T) lang.FunctionExtractor { + t.Helper() + l, ok := lang.Get("go") + if !ok { + t.Fatal("go language not registered") } + return l.FunctionExtractor() } func TestCheckFuncSizes(t *testing.T) { - funcs := []FunctionSize{ - {File: "a.go", Line: 1, Name: "small", Lines: 10}, - {File: "b.go", Line: 1, Name: "big", Lines: 60}, - {File: "c.go", Line: 1, Name: "huge", Lines: 100}, + funcs := []lang.FunctionSize{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "small"}, Lines: 10}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "big"}, Lines: 60}, + {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "huge"}, Lines: 100}, } findings := checkFuncSizes(funcs, 50) @@ -153,9 +39,9 @@ func TestCheckFuncSizes(t *testing.T) { } func TestCheckFuncSizes_AtBoundary(t *testing.T) { - funcs := []FunctionSize{ - {File: "a.go", Line: 1, Name: "exact", Lines: 50}, - {File: "b.go", Line: 1, Name: "over", Lines: 51}, + funcs := []lang.FunctionSize{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "exact"}, Lines: 50}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "over"}, Lines: 51}, } findings := checkFuncSizes(funcs, 50) @@ -165,7 +51,7 @@ func TestCheckFuncSizes_AtBoundary(t *testing.T) { } func TestCheckFileSizes(t *testing.T) { - files := []FileSize{ + files := []lang.FileSize{ {Path: "small.go", Lines: 100}, {Path: "big.go", Lines: 600}, } @@ -177,7 +63,7 @@ func TestCheckFileSizes(t *testing.T) { } func TestCheckFileSizes_AtBoundary(t *testing.T) { - files := []FileSize{ + files := []lang.FileSize{ {Path: "exact.go", Lines: 500}, {Path: "over.go", Lines: 501}, } @@ -199,7 +85,7 @@ func TestBuildSection_Empty(t *testing.T) { } func TestBuildSection_WithViolations(t *testing.T) { - funcs := []FunctionSize{{File: "a.go", Line: 1, Name: "big", Lines: 100}} + funcs := []lang.FunctionSize{{FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "big"}, Lines: 100}} s := buildSection(funcs, nil, 50, 500) if s.Severity != report.SeverityFail { t.Errorf("section severity = %v, want FAIL", s.Severity) @@ -210,8 +96,8 @@ func TestBuildSection_WithViolations(t *testing.T) { } func TestBuildSection_NoViolations(t *testing.T) { - funcs := []FunctionSize{{File: "a.go", Line: 1, Name: "small", Lines: 10}} - files := []FileSize{{Path: "a.go", Lines: 100}} + funcs := []lang.FunctionSize{{FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "small"}, Lines: 10}} + files := []lang.FileSize{{Path: "a.go", Lines: 100}} s := buildSection(funcs, files, 50, 500) if s.Severity != report.SeverityPass { t.Errorf("severity = %v, want PASS", s.Severity) @@ -219,10 +105,10 @@ func TestBuildSection_NoViolations(t *testing.T) { } func TestBuildSection_SortedByValue(t *testing.T) { - funcs := []FunctionSize{ - {File: "a.go", Line: 1, Name: "medium", Lines: 60}, - {File: "b.go", Line: 1, Name: "huge", Lines: 200}, - {File: "c.go", Line: 1, Name: "big", Lines: 80}, + funcs := []lang.FunctionSize{ + {FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "medium"}, Lines: 60}, + {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "huge"}, Lines: 200}, + {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "big"}, Lines: 80}, } s := buildSection(funcs, nil, 50, 500) if len(s.Findings) != 3 { @@ -233,41 +119,10 @@ func TestBuildSection_SortedByValue(t *testing.T) { } } -func TestFuncName(t *testing.T) { - tests := []struct { - code string - expected string - }{ - {`package p; func Foo() {}`, "Foo"}, - {`package p; type T struct{}; func (t T) Bar() {}`, "(T).Bar"}, - {`package p; type T struct{}; func (t *T) Baz() {}`, "(T).Baz"}, - } - - for _, tt := range tests { - t.Run(tt.expected, func(t *testing.T) { - dir := t.TempDir() - fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(tt.code), 0644) - - fc := diff.FileChange{ - Path: "test.go", - Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, - } - funcs, _ := analyzeFile(dir, fc) - found := false - for _, f := range funcs { - if f.Name == tt.expected { - found = true - } - } - if !found { - t.Errorf("funcName not found: want %q, got %v", tt.expected, funcs) - } - }) - } -} - -func TestAnalyze(t *testing.T) { +// TestAnalyze_WithGoExtractor is the integration replacement for the old +// analyzeFile-based unit tests. The AST walk logic now lives in goanalyzer +// and has its own tests; here we only verify the orchestration wiring. +func TestAnalyze_WithGoExtractor(t *testing.T) { code := `package test func small() { @@ -277,7 +132,9 @@ func small() { ` dir := t.TempDir() fp := filepath.Join(dir, "test.go") - os.WriteFile(fp, []byte(code), 0644) + if err := os.WriteFile(fp, []byte(code), 0644); err != nil { + t.Fatal(err) + } d := &diff.Result{ Files: []diff.FileChange{ @@ -285,7 +142,7 @@ func small() { }, } - section, err := Analyze(dir, d, 50, 500) + section, err := Analyze(dir, d, 50, 500, goExtractor(t)) if err != nil { t.Fatalf("Analyze error: %v", err) } From 2c579416451ae89d0074248b7e23ec5d6b109c10 Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 14:53:07 -0400 Subject: [PATCH 06/38] =?UTF-8?q?feat(lang):=20Part=20A5=20=E2=80=94=20reg?= =?UTF-8?q?ression=20gate;=20preserve=20churn=20score=20simple-counter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The churn analyzer's pre-split computeComplexity was a coarse "+1 per branching node" counter, not the full cognitive-complexity walker. Moving to lang.ComplexityScorer had caused the churn section to reuse AnalyzeFile — producing higher scores and breaking byte-identical output on the regression baseline. Restore the coarser counter as computeSimpleComplexity in goanalyzer and have ComplexityScorer.ScoreFile use it. Lock in the difference between the two scorers with a test (nested ifs = 2 via the counter, 3 via the cognitive walker). Regression gate commands used: go build -o /tmp/diffguard-baseline ./cmd/diffguard (pre-refactor HEAD) go build -o /tmp/diffguard-after ./cmd/diffguard (post-refactor) # text /tmp/diffguard-baseline --base 6f359df --skip-mutation --fail-on none /repo > base.txt /tmp/diffguard-after --base 6f359df --skip-mutation --fail-on none /repo > after.txt diff base.txt after.txt -> byte-identical # json (after normalizing metrics[] ordering, which was already # non-deterministic pre-refactor due to sort ties + map iteration) -> normalized-identical # wall clock baseline median ~0.491s, after median ~0.484s -> within 5% Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/lang/goanalyzer/complexity.go | 67 +++++++++++++++++++-- internal/lang/goanalyzer/goanalyzer_test.go | 25 ++++---- 2 files changed, 77 insertions(+), 15 deletions(-) diff --git a/internal/lang/goanalyzer/complexity.go b/internal/lang/goanalyzer/complexity.go index e38cd12..efb5820 100644 --- a/internal/lang/goanalyzer/complexity.go +++ b/internal/lang/goanalyzer/complexity.go @@ -49,11 +49,70 @@ func (complexityImpl) AnalyzeFile(absPath string, fc diff.FileChange) ([]lang.Fu return results, nil } -// ScoreFile is the ComplexityScorer entry point. Churn weighting only needs -// a number; reusing the full cognitive calculation keeps scores consistent -// between the complexity section and the churn-weighting it feeds into. +// ScoreFile is the ComplexityScorer entry point used by the churn analyzer. +// It deliberately uses a simplified counter (bump by 1 for each if/for/ +// switch/select/logical-op node) rather than the full cognitive complexity +// walker, matching the pre-split churn.computeComplexity. The churn score +// only needs a relative ordering of "hotter" functions; a coarse counter is +// faster to compute and keeps the churn output byte-identical to the +// pre-refactor numbers. func (complexityImpl) ScoreFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { - return complexityImpl{}.AnalyzeFile(absPath, fc) + fset, f, err := parseFile(absPath, 0) + if err != nil { + return nil, nil + } + + var results []lang.FunctionComplexity + ast.Inspect(f, func(n ast.Node) bool { + fn, ok := n.(*ast.FuncDecl) + if !ok { + return true + } + startLine := fset.Position(fn.Pos()).Line + endLine := fset.Position(fn.End()).Line + if !fc.OverlapsRange(startLine, endLine) { + return false + } + results = append(results, lang.FunctionComplexity{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: startLine, + EndLine: endLine, + Name: funcName(fn), + }, + Complexity: computeSimpleComplexity(fn.Body), + }) + return false + }) + return results, nil +} + +// computeSimpleComplexity is the simplified counter used by the churn +// analyzer: +1 per branching construct, +1 per && / || operator. No +// nesting penalty and no operator-change accounting. Matches the +// pre-split internal/churn.computeComplexity so churn scores stay +// byte-identical. +func computeSimpleComplexity(body *ast.BlockStmt) int { + if body == nil { + return 0 + } + count := 0 + ast.Inspect(body, func(n ast.Node) bool { + switch v := n.(type) { + case *ast.IfStmt: + count++ + case *ast.ForStmt, *ast.RangeStmt: + count++ + case *ast.SwitchStmt, *ast.TypeSwitchStmt, *ast.SelectStmt: + count++ + case *ast.BinaryExpr: + if v.Op == token.LAND || v.Op == token.LOR { + count++ + } + } + return true + }) + return count } // computeCognitiveComplexity is the exact algorithm that lived in diff --git a/internal/lang/goanalyzer/goanalyzer_test.go b/internal/lang/goanalyzer/goanalyzer_test.go index e2e8e43..30daf4c 100644 --- a/internal/lang/goanalyzer/goanalyzer_test.go +++ b/internal/lang/goanalyzer/goanalyzer_test.go @@ -99,10 +99,10 @@ func f() { } } -func TestComplexityAndScorer_Agree(t *testing.T) { - // ComplexityScorer.ScoreFile currently delegates to AnalyzeFile, so the - // per-function scores must match exactly. This is the invariant the - // churn analyzer relies on. +// TestScorer_SimpleCounter locks in the ScoreFile behavior: it's the +// simpler "bump by 1 per branch" counter, not the full cognitive walker. +// Two nested if statements score 2 (not 3 — no nesting penalty). +func TestScorer_SimpleCounter(t *testing.T) { code := `package p func f(x int) { if x > 0 { @@ -118,15 +118,18 @@ func f(x int) { Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}, } - analyze, _ := complexityImpl{}.AnalyzeFile(fp, fc) score, _ := complexityImpl{}.ScoreFile(fp, fc) - if len(analyze) != len(score) { - t.Fatalf("len mismatch: %d vs %d", len(analyze), len(score)) + if len(score) != 1 { + t.Fatalf("len(score) = %d, want 1", len(score)) + } + if score[0].Complexity != 2 { + t.Errorf("score = %d, want 2 (+1 per if, no nesting)", score[0].Complexity) } - for i := range analyze { - if analyze[i].Complexity != score[i].Complexity { - t.Errorf("[%d] complexity mismatch: %d vs %d", i, analyze[i].Complexity, score[i].Complexity) - } + + // The full calculator gives the same code a higher score due to nesting. + analyze, _ := complexityImpl{}.AnalyzeFile(fp, fc) + if analyze[0].Complexity != 3 { + t.Errorf("AnalyzeFile = %d, want 3 (cognitive with nesting)", analyze[0].Complexity) } } From f8d711012ffe486488c8f3edd3191521bb14eb6a Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 14:55:31 -0400 Subject: [PATCH 07/38] =?UTF-8?q?feat(lang):=20Part=20B=20=E2=80=94=20mult?= =?UTF-8?q?i-language=20orchestration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add --language CLI flag (comma-separated, empty = auto-detect) and rewire run() to loop over the resolved language set, running the analyzer pipeline once per language and merging sections into one report. Section naming: when only one language contributes (the common case today, since only Go is registered), section names stay unsuffixed so the output is byte-identical to pre-multi-language diffguard. When two or more languages contribute, each section name gets an "[]" suffix and the merged sections are sorted (language, metric) lexicographically for stable report ordering. Empty-diff UX preserved for the single-language case ("No Go files found.") and generalized per-language in the multi-language case. An unknown --language value is a hard error listing the registered names. Includes the B6 smoke test (TestRun_SingleLanguageGo) using a temp git repo, plus resolveLanguages unit tests and a B5 checkExitCode escalation test. A TODO on the smoke test notes it will extend to assert per-language section suffixes when Rust/TS land. Regression gate re-run: byte-identical text output, wall-clock median 0.462s vs baseline 0.491s (-6%). Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/diffguard/main.go | 150 +++++++++++++++++++++---- cmd/diffguard/main_test.go | 221 +++++++++++++++++++++++++++++++++++++ 2 files changed, 352 insertions(+), 19 deletions(-) create mode 100644 cmd/diffguard/main_test.go diff --git a/cmd/diffguard/main.go b/cmd/diffguard/main.go index af87fdd..e5fe510 100644 --- a/cmd/diffguard/main.go +++ b/cmd/diffguard/main.go @@ -6,6 +6,7 @@ import ( "os" "os/exec" "path/filepath" + "sort" "strings" "time" @@ -36,6 +37,7 @@ func main() { flag.StringVar(&cfg.FailOn, "fail-on", "warn", "Exit non-zero if thresholds breached: none, warn, all") flag.StringVar(&cfg.BaseBranch, "base", "", "Base branch to diff against (default: auto-detect)") flag.StringVar(&cfg.Paths, "paths", "", "Comma-separated files/dirs to analyze in full (refactoring mode); skips git diff") + flag.StringVar(&cfg.Language, "language", "", "Comma-separated languages to analyze (e.g. 'go' or 'rust,typescript'); empty = auto-detect") flag.Parse() if flag.NArg() < 1 { @@ -76,44 +78,154 @@ type Config struct { FailOn string BaseBranch string Paths string + Language string } +// run resolves the language set (explicit --language flag or auto-detect via +// manifest scan), then invokes the analyzer pipeline once per language and +// merges the resulting sections into a single report. func run(repoPath string, cfg Config) error { - goLang, ok := lang.Get("go") - if !ok { - return fmt.Errorf("go analyzer not registered") - } - filter := diffFilter(goLang) - - d, err := loadFiles(repoPath, cfg, filter) + languages, err := resolveLanguages(repoPath, cfg.Language) if err != nil { return err } - if len(d.Files) == 0 { - fmt.Println("No Go files found.") + // Collect per-language analysis. suffix-per-section only when more than + // one language contributes, so the single-language invocation stays + // byte-identical to the pre-multi-language output. + type langResult struct { + lang lang.Language + diff *diff.Result + sections []report.Section + } + var results []langResult + for _, l := range languages { + d, err := loadFiles(repoPath, cfg, diffFilter(l)) + if err != nil { + return err + } + if len(d.Files) == 0 { + // Empty language: report nothing for it. When only one language + // is in play we preserve the legacy UX with a specific message. + if len(languages) == 1 { + fmt.Printf("No %s files found.\n", languageNoun(l)) + return nil + } + fmt.Fprintf(os.Stderr, "No %s files found; skipping.\n", languageNoun(l)) + continue + } + announceRun(d, cfg, l, len(languages)) + sections, err := runAnalyses(repoPath, d, cfg, l) + if err != nil { + return err + } + results = append(results, langResult{lang: l, diff: d, sections: sections}) + } + + if len(results) == 0 { + fmt.Printf("No %s files found.\n", languageNoun(languages[0])) return nil } - announceRun(d, cfg) + var allSections []report.Section + multi := len(results) > 1 + for _, r := range results { + for _, s := range r.sections { + if multi { + s.Name = fmt.Sprintf("%s [%s]", s.Name, r.lang.Name()) + } + allSections = append(allSections, s) + } + } - sections, err := runAnalyses(repoPath, d, cfg, goLang) - if err != nil { - return err + // When multi-language, sort by (language, metric) lexicographically so + // section ordering is stable across runs and hosts. + if multi { + sort.SliceStable(allSections, func(i, j int) bool { + return allSections[i].Name < allSections[j].Name + }) } - r := report.Report{Sections: sections} - if err := writeReport(r, cfg.Output); err != nil { + rpt := report.Report{Sections: allSections} + if err := writeReport(rpt, cfg.Output); err != nil { return err } - return checkExitCode(r, cfg.FailOn) + return checkExitCode(rpt, cfg.FailOn) } -func announceRun(d *diff.Result, cfg Config) { +// resolveLanguages turns the --language flag value (or auto-detect) into a +// concrete list of Language implementations. Unknown names in the flag are +// a hard error; an empty detection set is a hard error with a suggestion +// to pass --language. +func resolveLanguages(repoPath, flagValue string) ([]lang.Language, error) { + if flagValue == "" { + langs := lang.Detect(repoPath) + if len(langs) == 0 { + return nil, fmt.Errorf("no supported language detected; pass --language to override (see --help)") + } + return langs, nil + } + + var out []lang.Language + seen := map[string]bool{} + for _, name := range strings.Split(flagValue, ",") { + name = strings.TrimSpace(name) + if name == "" || seen[name] { + continue + } + seen[name] = true + l, ok := lang.Get(name) + if !ok { + return nil, fmt.Errorf("unknown language %q (registered: %s)", name, strings.Join(registeredNames(), ", ")) + } + out = append(out, l) + } + if len(out) == 0 { + return nil, fmt.Errorf("empty --language flag") + } + // Sort for determinism, matching lang.All()/Detect() behavior. + sort.Slice(out, func(i, j int) bool { return out[i].Name() < out[j].Name() }) + return out, nil +} + +func registeredNames() []string { + all := lang.All() + names := make([]string, len(all)) + for i, l := range all { + names[i] = l.Name() + } + return names +} + +// languageNoun returns the human-friendly noun for status messages. For Go +// we preserve the legacy capitalized form ("No Go files found.") so +// single-language output stays byte-identical. +func languageNoun(l lang.Language) string { + switch l.Name() { + case "go": + return "Go" + case "rust": + return "Rust" + case "typescript": + return "TypeScript" + default: + return l.Name() + } +} + +func announceRun(d *diff.Result, cfg Config, l lang.Language, numLanguages int) { + noun := languageNoun(l) + // For a single-language run, preserve the legacy message exactly: + // "Analyzing N changed Go files against main..." / refactoring-mode + // phrasing. Multi-language adds a bracketed suffix. + suffix := "" + if numLanguages > 1 { + suffix = fmt.Sprintf(" [%s]", l.Name()) + } if cfg.Paths != "" { - fmt.Fprintf(os.Stderr, "Analyzing %d Go files (refactoring mode)...\n", len(d.Files)) + fmt.Fprintf(os.Stderr, "Analyzing %d %s files (refactoring mode)%s...\n", len(d.Files), noun, suffix) } else { - fmt.Fprintf(os.Stderr, "Analyzing %d changed Go files against %s...\n", len(d.Files), cfg.BaseBranch) + fmt.Fprintf(os.Stderr, "Analyzing %d changed %s files against %s%s...\n", len(d.Files), noun, cfg.BaseBranch, suffix) } } diff --git a/cmd/diffguard/main_test.go b/cmd/diffguard/main_test.go new file mode 100644 index 0000000..a02ea39 --- /dev/null +++ b/cmd/diffguard/main_test.go @@ -0,0 +1,221 @@ +package main + +import ( + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/0xPolygon/diffguard/internal/lang" + "github.com/0xPolygon/diffguard/internal/report" +) + +// TestRun_SingleLanguageGo is the B6 smoke test: runs the orchestrator +// against a temp git repo with a single .go file change. Exercises the +// end-to-end path (CLI config → language resolution → diff parse → +// analyzer pipeline → report build → exit code) without spawning a +// subprocess. +// +// TODO: once Rust and TypeScript analyzers land, extend this test with +// fixture files in the same temp repo and assert all three language +// sections appear in the output. The current test only has the Go +// analyzer registered, so multi-language section naming isn't exercised. +func TestRun_SingleLanguageGo(t *testing.T) { + repo := initTempGoRepo(t) + + cfg := Config{ + ComplexityThreshold: 10, + FunctionSizeThreshold: 50, + FileSizeThreshold: 500, + SkipMutation: true, + Output: "text", + FailOn: "none", + BaseBranch: "main", + } + + // Redirect stdout/stderr so the test doesn't pollute output. We don't + // assert on exact content here — the byte-identical regression gate + // covers that — but we do assert run() returns no error. + withSuppressedStdio(t, func() { + if err := run(repo, cfg); err != nil { + t.Fatalf("run returned error: %v", err) + } + }) +} + +// TestRun_UnknownLanguageHardError locks in that an unknown --language +// value fails with a clear error rather than silently falling back to +// auto-detect. +func TestRun_UnknownLanguageHardError(t *testing.T) { + repo := initTempGoRepo(t) + cfg := Config{ + Output: "text", + FailOn: "none", + BaseBranch: "main", + Language: "cobol", + } + err := run(repo, cfg) + if err == nil { + t.Fatal("expected error for unknown language, got nil") + } + if !strings.Contains(err.Error(), "cobol") { + t.Errorf("error = %q, want it to mention 'cobol'", err.Error()) + } +} + +// TestResolveLanguages_ExplicitGo verifies the comma-split path. +func TestResolveLanguages_ExplicitGo(t *testing.T) { + repo := initTempGoRepo(t) + langs, err := resolveLanguages(repo, "go") + if err != nil { + t.Fatalf("resolveLanguages: %v", err) + } + if len(langs) != 1 || langs[0].Name() != "go" { + t.Errorf("langs = %v, want [go]", names(langs)) + } +} + +// TestResolveLanguages_AutoDetect verifies that a repo with go.mod is +// auto-detected as Go. +func TestResolveLanguages_AutoDetect(t *testing.T) { + repo := initTempGoRepo(t) + langs, err := resolveLanguages(repo, "") + if err != nil { + t.Fatalf("resolveLanguages: %v", err) + } + if len(langs) != 1 || langs[0].Name() != "go" { + t.Errorf("langs = %v, want [go]", names(langs)) + } +} + +// TestResolveLanguages_EmptyDetection fails cleanly when nothing is +// detectable and no --language is provided. +func TestResolveLanguages_EmptyDetection(t *testing.T) { + dir := t.TempDir() + _, err := resolveLanguages(dir, "") + if err == nil { + t.Fatal("expected error for empty detection") + } + if !strings.Contains(err.Error(), "--language") { + t.Errorf("error = %q, expected hint about --language", err.Error()) + } +} + +// TestResolveLanguages_Deduplicates ensures passing "go,go" returns one +// Language, not two. +func TestResolveLanguages_Deduplicates(t *testing.T) { + repo := initTempGoRepo(t) + langs, err := resolveLanguages(repo, "go,go") + if err != nil { + t.Fatalf("resolveLanguages: %v", err) + } + if len(langs) != 1 { + t.Errorf("len = %d, want 1 (dedup)", len(langs)) + } +} + +// initTempGoRepo creates a minimal git repo with a single committed Go +// file on main, plus an additional file on HEAD so the diff has content. +// Returns the absolute path to the repo. +func initTempGoRepo(t *testing.T) string { + t.Helper() + dir := t.TempDir() + run := func(args ...string) { + cmd := exec.Command("git", args...) + cmd.Dir = dir + cmd.Env = append(os.Environ(), "GIT_CONFIG_GLOBAL=/dev/null") + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } + } + + // init + author config + run("init", "-q", "--initial-branch=main") + run("config", "user.email", "test@example.com") + run("config", "user.name", "Test") + run("config", "commit.gpgsign", "false") + + // base commit with go.mod + a base file so Parse has something to + // merge-base against. + if err := os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module example.com/testrepo\n\ngo 1.21\n"), 0644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(dir, "base.go"), []byte("package main\n"), 0644); err != nil { + t.Fatal(err) + } + run("add", ".") + run("commit", "-q", "-m", "base") + + // Feature commit adds a new file with a small function. This is what + // appears in the diff. + if err := os.WriteFile(filepath.Join(dir, "new.go"), []byte("package main\n\nfunc helper(x int) int {\n\tif x > 0 {\n\t\treturn x\n\t}\n\treturn -x\n}\n"), 0644); err != nil { + t.Fatal(err) + } + run("add", ".") + run("commit", "-q", "-m", "add new.go") + + return dir +} + +// withSuppressedStdio redirects os.Stdout/Stderr to /dev/null for the +// duration of fn. Restores on return. +func withSuppressedStdio(t *testing.T, fn func()) { + t.Helper() + devnull, err := os.OpenFile(os.DevNull, os.O_WRONLY, 0) + if err != nil { + t.Fatal(err) + } + defer devnull.Close() + + origOut, origErr := os.Stdout, os.Stderr + os.Stdout = devnull + os.Stderr = devnull + defer func() { + os.Stdout = origOut + os.Stderr = origErr + }() + fn() +} + +func names(langs []lang.Language) []string { + out := make([]string, len(langs)) + for i, l := range langs { + out[i] = l.Name() + } + return out +} + +// TestCheckExitCode_FailInAnyLanguageEscalates covers B5: a FAIL section +// in any language must escalate the overall exit code, regardless of how +// many languages contribute sections. checkExitCode already takes a +// merged report, so this is a unit-level check on WorstSeverity behavior +// mirrored through checkExitCode. +func TestCheckExitCode_FailInAnyLanguageEscalates(t *testing.T) { + fail := report.Section{Name: "Complexity [rust]", Severity: report.SeverityFail} + pass := report.Section{Name: "Complexity [go]", Severity: report.SeverityPass} + warn := report.Section{Name: "Sizes [typescript]", Severity: report.SeverityWarn} + + merged := report.Report{Sections: []report.Section{pass, fail, warn}} + + // fail-on=warn: any FAIL escalates. + if err := checkExitCode(merged, "warn"); err == nil { + t.Error("fail-on=warn with FAIL section should return error") + } + + // fail-on=all: any non-PASS escalates (FAIL or WARN). + if err := checkExitCode(merged, "all"); err == nil { + t.Error("fail-on=all with FAIL section should return error") + } + + // fail-on=none: never escalates. + if err := checkExitCode(merged, "none"); err != nil { + t.Errorf("fail-on=none should not error, got %v", err) + } + + // All PASS: no error. + allPass := report.Report{Sections: []report.Section{pass, pass}} + if err := checkExitCode(allPass, "warn"); err != nil { + t.Errorf("all-PASS should not error, got %v", err) + } +} From 27c18c8fb8d637809696548ca205c95352247771 Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 14:59:35 -0400 Subject: [PATCH 08/38] fix(diff): restore .go detection after Filter refactor Pass explicit --src-prefix=a/ --dst-prefix=b/ to git diff so the unified diff output always carries the "+++ b/" prefix that parseUnifiedDiff expects. Without this, users with diff.mnemonicPrefix=true (or the diff.noPrefix / custom prefix variants) see git emit "+++ w/", handleFileLine rejects every path, and Parse returns zero files. This bug predates the Filter refactor, but exposing it via a parameter made TestParse_SuccessDetectsChangedGoFile flake under local git configs that the previous CI-only run never saw. The baseline regression gate (pre-refactor vs HEAD, --skip-mutation, --fail-on none) remains byte-identical in a clean env where the prefix default is already a/b/. --- internal/diff/diff.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/diff/diff.go b/internal/diff/diff.go index 4b17085..27391cb 100644 --- a/internal/diff/diff.go +++ b/internal/diff/diff.go @@ -116,7 +116,7 @@ func Parse(repoPath, baseBranch string, filter Filter) (*Result, error) { } mergeBase := strings.TrimSpace(string(mergeBaseOut)) - args := []string{"diff", "-U0", mergeBase} + args := []string{"diff", "--src-prefix=a/", "--dst-prefix=b/", "-U0", mergeBase} if len(filter.DiffGlobs) > 0 { args = append(args, "--") args = append(args, filter.DiffGlobs...) From 31a561b5c971aca58850362b0e75112f2677e314 Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 15:10:05 -0400 Subject: [PATCH 09/38] feat(rust): C0-C1 scaffold rustanalyzer package with FileFilter Adds the rustanalyzer package directory, depends on tree-sitter-rust via go-tree-sitter, and registers a minimal Rust Language with diffguard so Part C checklist items C0 (research prerequisites) and C1 (FileFilter) are complete. * FileFilter: .rs extension, IsTestFile treats any `tests` path segment as a test file, DiffGlobs = [*.rs]. * init() registers the language and its Cargo.toml manifest. * Stub implementations for the remaining sub-interfaces are in place so the package compiles; they're filled out by subsequent commits. * Rust-specific operators registered in internal/mutation/tiers.go: unwrap_removal + some_to_none (Tier 1), question_mark_removal (Tier 2). * Blank import added in cmd/diffguard/main.go. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/diffguard/main.go | 1 + go.mod | 2 + go.sum | 10 + internal/lang/rustanalyzer/complexity.go | 275 +++++++++++++++ internal/lang/rustanalyzer/deps.go | 257 ++++++++++++++ .../lang/rustanalyzer/mutation_annotate.go | 108 ++++++ internal/lang/rustanalyzer/mutation_apply.go | 327 ++++++++++++++++++ .../lang/rustanalyzer/mutation_generate.go | 304 ++++++++++++++++ internal/lang/rustanalyzer/parse.go | 108 ++++++ internal/lang/rustanalyzer/rustanalyzer.go | 65 ++++ .../lang/rustanalyzer/rustanalyzer_test.go | 70 ++++ internal/lang/rustanalyzer/sizes.go | 198 +++++++++++ internal/lang/rustanalyzer/testrunner.go | 186 ++++++++++ internal/mutation/tiers.go | 16 +- 14 files changed, 1925 insertions(+), 2 deletions(-) create mode 100644 go.sum create mode 100644 internal/lang/rustanalyzer/complexity.go create mode 100644 internal/lang/rustanalyzer/deps.go create mode 100644 internal/lang/rustanalyzer/mutation_annotate.go create mode 100644 internal/lang/rustanalyzer/mutation_apply.go create mode 100644 internal/lang/rustanalyzer/mutation_generate.go create mode 100644 internal/lang/rustanalyzer/parse.go create mode 100644 internal/lang/rustanalyzer/rustanalyzer.go create mode 100644 internal/lang/rustanalyzer/rustanalyzer_test.go create mode 100644 internal/lang/rustanalyzer/sizes.go create mode 100644 internal/lang/rustanalyzer/testrunner.go diff --git a/cmd/diffguard/main.go b/cmd/diffguard/main.go index e5fe510..a8f03e4 100644 --- a/cmd/diffguard/main.go +++ b/cmd/diffguard/main.go @@ -16,6 +16,7 @@ import ( "github.com/0xPolygon/diffguard/internal/diff" "github.com/0xPolygon/diffguard/internal/lang" _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" + _ "github.com/0xPolygon/diffguard/internal/lang/rustanalyzer" "github.com/0xPolygon/diffguard/internal/mutation" "github.com/0xPolygon/diffguard/internal/report" "github.com/0xPolygon/diffguard/internal/sizes" diff --git a/go.mod b/go.mod index ee2a376..bd17a28 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/0xPolygon/diffguard go 1.26.1 + +require github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..702e57b --- /dev/null +++ b/go.sum @@ -0,0 +1,10 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 h1:6C8qej6f1bStuePVkLSFxoU22XBS165D3klxlzRg8F4= +github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82/go.mod h1:xe4pgH49k4SsmkQq5OT8abwhWmnzkhpgnXeekbx2efw= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/lang/rustanalyzer/complexity.go b/internal/lang/rustanalyzer/complexity.go new file mode 100644 index 0000000..bafd992 --- /dev/null +++ b/internal/lang/rustanalyzer/complexity.go @@ -0,0 +1,275 @@ +package rustanalyzer + +import ( + "sort" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// complexityImpl implements both lang.ComplexityCalculator and +// lang.ComplexityScorer for Rust. Tree-sitter walks are fast enough that we +// use the same full-cognitive-complexity algorithm for both interfaces — +// matching the Go analyzer's reuse strategy. +type complexityImpl struct{} + +// AnalyzeFile returns per-function cognitive complexity for every function +// that overlaps the diff's changed regions. +func (complexityImpl) AnalyzeFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + return scoreFile(absPath, fc) +} + +// ScoreFile is the ComplexityScorer entry point used by the churn analyzer. +// It shares an implementation with AnalyzeFile; the per-file cost is small +// enough that a separate "faster" scorer would not be worth the divergence. +func (complexityImpl) ScoreFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + return scoreFile(absPath, fc) +} + +func scoreFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, nil + } + defer tree.Close() + + fns := collectFunctions(tree.RootNode(), src) + + var results []lang.FunctionComplexity + for _, fn := range fns { + if !fc.OverlapsRange(fn.startLine, fn.endLine) { + continue + } + results = append(results, lang.FunctionComplexity{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: fn.startLine, + EndLine: fn.endLine, + Name: fn.name, + }, + Complexity: cognitiveComplexity(fn.body, src), + }) + } + + sort.SliceStable(results, func(i, j int) bool { + if results[i].Line != results[j].Line { + return results[i].Line < results[j].Line + } + return results[i].Name < results[j].Name + }) + return results, nil +} + +// cognitiveComplexity computes the Rust cognitive-complexity score for the +// body block of a function. The algorithm, per the design doc: +// +// - +1 base on each control-flow construct (if, while, for, loop, match, +// if let, while let) +// - +1 per guarded match arm (the `if` guard in `pattern if cond => ...`) +// - +1 per logical-op token-sequence switch (a `||` that follows an `&&` +// chain or vice versa) +// - +1 nesting penalty for each scope-introducing ancestor +// +// The `?` operator and `unsafe` blocks do NOT contribute — they're +// error-propagation and safety annotations respectively, not cognitive +// control flow. +// +// A nil body (trait method with no default) has complexity 0. +func cognitiveComplexity(body *sitter.Node, src []byte) int { + if body == nil { + return 0 + } + return walkComplexity(body, src, 0) +} + +// walkComplexity is the recursive heart of the algorithm. `nesting` is the +// depth penalty to apply when an increment fires — it goes up every time +// we descend into a control-flow construct and does NOT go up for +// non-control-flow blocks like `unsafe`. +func walkComplexity(n *sitter.Node, src []byte, nesting int) int { + if n == nil { + return 0 + } + total := 0 + switch n.Type() { + case "if_expression": + total += 1 + nesting + total += conditionLogicalOps(n.ChildByFieldName("condition")) + total += walkChildrenWithNesting(n, src, nesting) + return total + case "while_expression": + total += 1 + nesting + total += conditionLogicalOps(n.ChildByFieldName("condition")) + total += walkChildrenWithNesting(n, src, nesting) + return total + case "for_expression": + total += 1 + nesting + total += walkChildrenWithNesting(n, src, nesting) + return total + case "loop_expression": + total += 1 + nesting + total += walkChildrenWithNesting(n, src, nesting) + return total + case "match_expression": + total += 1 + nesting + total += countGuardedArms(n) + total += walkChildrenWithNesting(n, src, nesting) + return total + case "if_let_expression": + // Older grammar versions model `if let` as a distinct node; current + // versions fold it into if_expression with a `let_condition` child. + // We cover both so the walker is resilient across grammar updates. + total += 1 + nesting + total += walkChildrenWithNesting(n, src, nesting) + return total + case "while_let_expression": + total += 1 + nesting + total += walkChildrenWithNesting(n, src, nesting) + return total + case "closure_expression": + // A closure body introduces its own nesting context and doesn't + // inherit the outer nesting depth — same treatment as Go's FuncLit. + if body := n.ChildByFieldName("body"); body != nil { + total += walkComplexity(body, src, 0) + } + return total + case "function_item": + // Nested function declarations are treated as separate functions + // for the size extractor and should not contribute here. + return 0 + } + + // Descend into children without adding nesting for plain blocks, + // expressions, statements, etc. + for i := 0; i < int(n.ChildCount()); i++ { + total += walkComplexity(n.Child(i), src, nesting) + } + return total +} + +// walkChildrenWithNesting recurses into the subtrees whose bodies belong to +// the construct at `n`. We identify those by looking at `body`, `alternative` +// ('else' branch), and `consequence` fields where present; other children +// (the condition expression, the header) keep the current nesting level so +// logical-op counting doesn't get a bonus point for being inside an `if`. +func walkChildrenWithNesting(n *sitter.Node, src []byte, nesting int) int { + total := 0 + // Tree-sitter exposes the sub-trees we want via named fields. Any + // field we haven't handled explicitly is walked as a body for safety. + for i := 0; i < int(n.ChildCount()); i++ { + c := n.Child(i) + if c == nil { + continue + } + fieldName := n.FieldNameForChild(i) + switch fieldName { + case "condition", "value", "pattern", "type": + // Condition expressions stay at the current nesting: a && chain + // inside an `if` is already being counted by conditionLogicalOps; + // re-descending here would double-count. + total += walkComplexity(c, src, nesting) + case "body", "consequence", "alternative": + total += walkComplexity(c, src, nesting+1) + default: + total += walkComplexity(c, src, nesting) + } + } + return total +} + +// countGuardedArms walks the arms of a match_expression and counts how many +// have an `if` guard. Grammar shape: +// +// (match_expression +// value: ... +// body: (match_block +// (match_arm pattern: (...) [(match_arm_guard ...)] value: (...)))) +// +// We look for any child named `match_arm` whose subtree includes a +// `match_arm_guard` node. This is grammar-robust: older variants nest the +// guard directly as an `if` keyword sibling, newer ones wrap it in an +// explicit guard node — both show up under the arm when we walk. +func countGuardedArms(match *sitter.Node) int { + block := match.ChildByFieldName("body") + if block == nil { + return 0 + } + count := 0 + walk(block, func(n *sitter.Node) bool { + if n.Type() == "match_arm" { + if hasGuard(n) { + count++ + } + // Descend: arms can contain nested match expressions. + return true + } + return true + }) + return count +} + +// hasGuard reports whether a match_arm node carries an `if` guard. +func hasGuard(arm *sitter.Node) bool { + for i := 0; i < int(arm.ChildCount()); i++ { + c := arm.Child(i) + if c == nil { + continue + } + if c.Type() == "match_arm_guard" { + return true + } + } + return false +} + +// conditionLogicalOps returns the operator-switch count for the chain of +// `&&`/`||` operators directly inside an `if`/`while` condition. See +// countLogicalOps in the Go analyzer for the algorithm — a run of the same +// operator counts as 1, each switch to the other adds 1. +func conditionLogicalOps(cond *sitter.Node) int { + if cond == nil { + return 0 + } + ops := flattenLogicalOps(cond) + if len(ops) == 0 { + return 0 + } + count := 1 + for i := 1; i < len(ops); i++ { + if ops[i] != ops[i-1] { + count++ + } + } + return count +} + +// flattenLogicalOps collects the `&&` / `||` operator sequence of a +// binary_expression tree, left-to-right. Non-logical binary ops stop the +// recursion (their operands don't contribute to the logical-chain count). +// +// Tree-sitter Rust models `a && b` as +// +// (binary_expression left: ... operator: "&&" right: ...) +// +// — the operator is an anonymous child whose type literal is the operator +// symbol. We discover it via ChildByFieldName("operator"). +func flattenLogicalOps(n *sitter.Node) []string { + if n == nil || n.Type() != "binary_expression" { + return nil + } + op := n.ChildByFieldName("operator") + if op == nil { + return nil + } + opText := op.Type() + if opText != "&&" && opText != "||" { + return nil + } + var out []string + out = append(out, flattenLogicalOps(n.ChildByFieldName("left"))...) + out = append(out, opText) + out = append(out, flattenLogicalOps(n.ChildByFieldName("right"))...) + return out +} diff --git a/internal/lang/rustanalyzer/deps.go b/internal/lang/rustanalyzer/deps.go new file mode 100644 index 0000000..9f4d18a --- /dev/null +++ b/internal/lang/rustanalyzer/deps.go @@ -0,0 +1,257 @@ +package rustanalyzer + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + sitter "github.com/smacker/go-tree-sitter" +) + +// depsImpl implements lang.ImportResolver for Rust via tree-sitter. The +// Cargo.toml manifest gives us the crate (package) name; in-source +// `use crate::` / `use self::` / `use super::` declarations and `mod` +// declarations provide the internal dependency edges. +// +// The returned graph uses directory-level node keys (paths relative to the +// repo root) so it matches the Go analyzer's shape: every edge says "this +// package directory depends on that package directory". +type depsImpl struct{} + +// DetectModulePath returns the crate name read from Cargo.toml's +// `[package] name = "..."` entry. We parse the TOML with a lightweight +// line scanner rather than pulling in a full TOML dependency — the two +// tokens we need are easy to find and the result is cached by the caller. +func (depsImpl) DetectModulePath(repoPath string) (string, error) { + cargoPath := filepath.Join(repoPath, "Cargo.toml") + content, err := os.ReadFile(cargoPath) + if err != nil { + return "", fmt.Errorf("reading Cargo.toml: %w", err) + } + name := parseCargoPackageName(string(content)) + if name == "" { + return "", fmt.Errorf("no [package] name found in Cargo.toml") + } + return name, nil +} + +// parseCargoPackageName extracts the `name = "..."` value from the +// [package] table of a Cargo.toml. We accept either quote style and ignore +// table nesting beyond the top-level [package] header; that's sufficient +// because `name` is never redeclared under nested tables. +func parseCargoPackageName(content string) string { + inPackage := false + for _, raw := range strings.Split(content, "\n") { + line := strings.TrimSpace(raw) + if strings.HasPrefix(line, "#") { + continue + } + if strings.HasPrefix(line, "[") && strings.HasSuffix(line, "]") { + inPackage = strings.EqualFold(line, "[package]") + continue + } + if !inPackage { + continue + } + if !strings.HasPrefix(line, "name") { + continue + } + // line looks like: name = "foo" or name="foo" + eq := strings.IndexByte(line, '=') + if eq < 0 { + continue + } + val := strings.TrimSpace(line[eq+1:]) + val = strings.Trim(val, "\"'") + if val != "" { + return val + } + } + return "" +} + +// ScanPackageImports returns a single-entry adjacency map: +// +// { : { : true, : true, ... } } +// +// where keys are directories relative to repoPath. A use declaration is +// "internal" when it begins with `crate::`, `self::`, or `super::`. +// External crates (anything else) are filtered out. `mod foo;` adds an +// edge from the current package to the child module subdir. +func (depsImpl) ScanPackageImports(repoPath, pkgDir, _ string) map[string]map[string]bool { + absDir := filepath.Join(repoPath, pkgDir) + entries, err := os.ReadDir(absDir) + if err != nil { + return nil + } + + deps := map[string]bool{} + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".rs") { + continue + } + absFile := filepath.Join(absDir, e.Name()) + if isRustTestFile(absFile) { + continue + } + collectImports(absFile, repoPath, pkgDir, deps) + } + if len(deps) == 0 { + return nil + } + return map[string]map[string]bool{pkgDir: deps} +} + +// collectImports parses one .rs file and adds each internal import / mod +// declaration to `deps`. Parse errors are silently ignored to match the Go +// analyzer's "skip broken files" behavior. +func collectImports(absFile, repoPath, pkgDir string, deps map[string]bool) { + tree, src, err := parseFile(absFile) + if err != nil { + return + } + defer tree.Close() + + walk(tree.RootNode(), func(n *sitter.Node) bool { + switch n.Type() { + case "use_declaration": + addUseEdge(n, src, pkgDir, deps) + case "mod_item": + addModEdge(n, src, repoPath, pkgDir, deps) + } + return true + }) +} + +// addUseEdge examines a `use` declaration and, if it starts with +// `crate::` / `self::` / `super::`, records an edge to the directory that +// corresponds to the path's module prefix. We stop at the penultimate +// segment because the final segment is the imported item (function/type/ +// trait), not a package directory. +func addUseEdge(n *sitter.Node, src []byte, pkgDir string, deps map[string]bool) { + // The `argument` field holds the import path tree. + arg := n.ChildByFieldName("argument") + if arg == nil { + return + } + // Walk the arg, skipping the final item to produce a package path. + segs := collectUseSegments(arg, src) + if len(segs) == 0 { + return + } + target := resolveInternalPath(segs, pkgDir) + if target == "" { + return + } + deps[target] = true +} + +// collectUseSegments returns the left-to-right identifier sequence of a +// use path. We skip list forms (`use foo::{bar, baz}`) by only descending +// through scoped_identifier / scoped_use_list / identifier structures and +// taking the first branch — good enough to detect `crate::`/`self::`/ +// `super::` roots for edge classification. +// +// Only the prefix is load-bearing; we intentionally don't try to enumerate +// every symbol in a nested use list because the edge granularity is the +// module (directory), not the symbol. +func collectUseSegments(n *sitter.Node, src []byte) []string { + var segs []string + var collect func(*sitter.Node) + collect = func(cur *sitter.Node) { + if cur == nil { + return + } + switch cur.Type() { + case "scoped_identifier": + collect(cur.ChildByFieldName("path")) + if name := cur.ChildByFieldName("name"); name != nil { + segs = append(segs, nodeText(name, src)) + } + case "identifier", "crate", "self", "super": + segs = append(segs, nodeText(cur, src)) + case "use_list": + // Take only the first item of a `{a, b}` list — enough to + // retain the shared prefix that already got emitted. + if cur.ChildCount() > 0 { + for i := 0; i < int(cur.ChildCount()); i++ { + c := cur.Child(i) + if c != nil && c.IsNamed() { + collect(c) + return + } + } + } + case "scoped_use_list": + collect(cur.ChildByFieldName("path")) + if list := cur.ChildByFieldName("list"); list != nil { + collect(list) + } + case "use_as_clause": + collect(cur.ChildByFieldName("path")) + } + } + collect(n) + return segs +} + +// resolveInternalPath maps a sequence of use segments to a repo-relative +// package directory, or returns "" if the path is not internal. +// +// crate::foo::bar::Baz -> src/foo/bar (relative to crate root 'src') +// self::foo -> pkgDir/foo (sibling module) +// super::foo -> /foo +// +// We assume a standard Cargo layout: crate root lives at `src/` under the +// repo root for library crates and `src/bin/.rs` / similar for +// binaries. For this analyzer, `crate::x::y::Z` resolves to `src/x/y` — +// which is the directory the imported module lives in. The final segment +// (`Z`) is dropped because we want package-level, not symbol-level, edges. +func resolveInternalPath(segs []string, pkgDir string) string { + if len(segs) == 0 { + return "" + } + // Drop the final segment (imported item) to get the module directory. + // A single-segment import like `use crate::foo;` still lands at the + // crate root directory since `foo` is the item, not a directory. + modSegs := segs[:len(segs)-1] + if len(modSegs) == 0 { + return "" + } + + switch modSegs[0] { + case "crate": + // `crate::` roots at `src/`. + parts := append([]string{"src"}, modSegs[1:]...) + return filepath.ToSlash(filepath.Join(parts...)) + case "self": + parts := append([]string{pkgDir}, modSegs[1:]...) + return filepath.ToSlash(filepath.Join(parts...)) + case "super": + parent := filepath.Dir(pkgDir) + if parent == "." || parent == "/" { + parent = "" + } + parts := append([]string{parent}, modSegs[1:]...) + p := filepath.Join(parts...) + return filepath.ToSlash(p) + } + return "" +} + +// addModEdge records an edge for `mod foo;` declarations: the module +// always resolves to a sibling directory (or sibling file) inside pkgDir. +// We emit the directory path so the graph stays at directory granularity. +func addModEdge(n *sitter.Node, src []byte, _, pkgDir string, deps map[string]bool) { + name := n.ChildByFieldName("name") + if name == nil { + return + } + modName := nodeText(name, src) + if modName == "" { + return + } + target := filepath.ToSlash(filepath.Join(pkgDir, modName)) + deps[target] = true +} diff --git a/internal/lang/rustanalyzer/mutation_annotate.go b/internal/lang/rustanalyzer/mutation_annotate.go new file mode 100644 index 0000000..78d6fb0 --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_annotate.go @@ -0,0 +1,108 @@ +package rustanalyzer + +import ( + "strings" + + sitter "github.com/smacker/go-tree-sitter" +) + +// annotationScannerImpl implements lang.AnnotationScanner for Rust. The +// disable annotations are identical to the Go forms: +// +// // mutator-disable-next-line +// // mutator-disable-func +// +// `//` and `/* ... */` comments are both accepted — tree-sitter exposes +// them as `line_comment` and `block_comment` respectively. +type annotationScannerImpl struct{} + +// ScanAnnotations returns the set of 1-based source lines on which mutation +// generation should be suppressed. +func (annotationScannerImpl) ScanAnnotations(absPath string) (map[int]bool, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, err + } + defer tree.Close() + + disabled := map[int]bool{} + funcRanges := collectFuncRanges(tree.RootNode(), src) + + walk(tree.RootNode(), func(n *sitter.Node) bool { + switch n.Type() { + case "line_comment", "block_comment": + applyAnnotation(n, src, funcRanges, disabled) + } + return true + }) + return disabled, nil +} + +// applyAnnotation consumes a single comment node and, if it carries a +// known annotation, disables the appropriate line(s) in `disabled`. +func applyAnnotation(comment *sitter.Node, src []byte, funcs []funcRange, disabled map[int]bool) { + text := stripCommentMarkers(nodeText(comment, src)) + line := nodeLine(comment) + switch { + case strings.HasPrefix(text, "mutator-disable-next-line"): + disabled[line+1] = true + case strings.HasPrefix(text, "mutator-disable-func"): + disableEnclosingFunc(line, funcs, disabled) + } +} + +// stripCommentMarkers strips `//`, `/*`, `*/` and surrounding whitespace. +// Matches the Go analyzer's helper so annotation behavior stays uniform +// across languages. +func stripCommentMarkers(raw string) string { + s := strings.TrimSpace(raw) + s = strings.TrimPrefix(s, "//") + s = strings.TrimPrefix(s, "/*") + s = strings.TrimSuffix(s, "*/") + return strings.TrimSpace(s) +} + +// disableEnclosingFunc marks every line of the function the comment +// belongs to as disabled. A comment belongs to a function when it sits +// inside the function's range, or when it directly precedes the function +// (at most one blank line between them, matching the Go analyzer). +func disableEnclosingFunc(commentLine int, funcs []funcRange, disabled map[int]bool) { + for _, r := range funcs { + if isCommentForFunc(commentLine, r) { + for i := r.start; i <= r.end; i++ { + disabled[i] = true + } + return + } + } +} + +func isCommentForFunc(commentLine int, r funcRange) bool { + if commentLine >= r.start && commentLine <= r.end { + return true + } + return r.start > commentLine && r.start-commentLine <= 2 +} + +// funcRange is the 1-based inclusive line span of a function_item node. +// The same range shape is used by the annotation scanner and by the mutant +// generator (via its filtering of "which lines belong to a function"). +type funcRange struct{ start, end int } + +// collectFuncRanges returns one funcRange per function_item in the file. +// Methods inside impl blocks are included too — same source-line universe +// the mutant generator cares about. +func collectFuncRanges(root *sitter.Node, _ []byte) []funcRange { + var ranges []funcRange + walk(root, func(n *sitter.Node) bool { + if n.Type() != "function_item" { + return true + } + ranges = append(ranges, funcRange{ + start: nodeLine(n), + end: nodeEndLine(n), + }) + return true + }) + return ranges +} diff --git a/internal/lang/rustanalyzer/mutation_apply.go b/internal/lang/rustanalyzer/mutation_apply.go new file mode 100644 index 0000000..1bdbeb9 --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_apply.go @@ -0,0 +1,327 @@ +package rustanalyzer + +import ( + "strings" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// mutantApplierImpl implements lang.MutantApplier for Rust. Unlike the Go +// analyzer, which rewrites the AST and re-renders with go/printer, we +// operate on source bytes directly: tree-sitter reports exact byte offsets +// for every node, and text-level edits keep formatting intact without a +// dedicated Rust formatter. +// +// After every mutation we re-parse the output with tree-sitter and check +// for ERROR nodes. If the mutation produced syntactically invalid code we +// return nil (no bytes, no error) — the mutation orchestrator treats that +// as "skip this mutant", matching the Go analyzer's contract. +type mutantApplierImpl struct{} + +// ApplyMutation returns the mutated file bytes, or (nil, nil) if the +// mutation can't be applied cleanly. +func (mutantApplierImpl) ApplyMutation(absPath string, site lang.MutantSite) ([]byte, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, nil + } + defer tree.Close() + + mutated := applyBySite(tree.RootNode(), src, site) + if mutated == nil { + return nil, nil + } + if !isValidRust(mutated) { + // Re-parse check per the design doc: don't ship corrupt mutants. + return nil, nil + } + return mutated, nil +} + +// applyBySite dispatches to the operator-specific helper. Each helper +// returns either the mutated byte slice or nil if it couldn't find a +// matching node on the target line. +func applyBySite(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + switch site.Operator { + case "conditional_boundary", "negate_conditional", "math_operator": + return applyBinary(root, src, site) + case "boolean_substitution": + return applyBool(root, src, site) + case "return_value": + return applyReturnValue(root, src, site) + case "some_to_none": + return applySomeToNone(root, src, site) + case "branch_removal": + return applyBranchRemoval(root, src, site) + case "statement_deletion": + return applyStatementDeletion(root, src, site) + case "unwrap_removal": + return applyUnwrapRemoval(root, src, site) + case "question_mark_removal": + return applyQuestionMarkRemoval(root, src, site) + } + return nil +} + +// findOnLine returns the first node matching `pred` whose start line +// equals `line`. We keep it small: the CST walks are tiny and predicates +// stay decidable in one pass. +func findOnLine(root *sitter.Node, line int, pred func(*sitter.Node) bool) *sitter.Node { + var hit *sitter.Node + walk(root, func(n *sitter.Node) bool { + if hit != nil { + return false + } + if nodeLine(n) != line { + // We're still searching; descend into children that might + // reach the target line. + if int(n.StartPoint().Row)+1 > line || int(n.EndPoint().Row)+1 < line { + return false + } + return true + } + if pred(n) { + hit = n + return false + } + return true + }) + return hit +} + +// replaceRange returns src with the bytes [start, end) replaced by `with`. +func replaceRange(src []byte, start, end uint32, with []byte) []byte { + out := make([]byte, 0, len(src)-int(end-start)+len(with)) + out = append(out, src[:start]...) + out = append(out, with...) + out = append(out, src[end:]...) + return out +} + +// applyBinary swaps the operator of a binary_expression on the target line. +// We honor the site description so overlapping binaries on the same line +// (`a == b && c > d`) mutate the exact one the generator emitted. +func applyBinary(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + fromOp, toOp := parseBinaryDesc(site.Description) + if fromOp == "" { + return nil + } + var target *sitter.Node + walk(root, func(n *sitter.Node) bool { + if target != nil { + return false + } + if n.Type() != "binary_expression" || nodeLine(n) != site.Line { + return true + } + op := n.ChildByFieldName("operator") + if op != nil && op.Type() == fromOp { + target = n + return false + } + return true + }) + if target == nil { + return nil + } + op := target.ChildByFieldName("operator") + return replaceRange(src, op.StartByte(), op.EndByte(), []byte(toOp)) +} + +// parseBinaryDesc parses "X -> Y" from the mutant description. +func parseBinaryDesc(desc string) (string, string) { + parts := strings.SplitN(desc, " -> ", 2) + if len(parts) != 2 { + return "", "" + } + return parts[0], parts[1] +} + +// applyBool flips a boolean literal on the target line. +func applyBool(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + n := findOnLine(root, site.Line, func(n *sitter.Node) bool { + if n.Type() != "boolean_literal" { + return false + } + txt := nodeText(n, src) + return txt == "true" || txt == "false" + }) + if n == nil { + return nil + } + txt := nodeText(n, src) + flipped := "true" + if txt == "true" { + flipped = "false" + } + return replaceRange(src, n.StartByte(), n.EndByte(), []byte(flipped)) +} + +// applyReturnValue replaces the returned expression with +// `Default::default()`. Works for any non-unit return; tests on Option / +// unit / numeric returns will all observe either a type mismatch (caught +// by the re-parse step — wait, rustc type errors won't show in +// tree-sitter; so this is a Tier-1 operator that can produce equivalent +// mutants on some types, which we accept). +func applyReturnValue(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + ret := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "return_expression" + }) + if ret == nil { + return nil + } + var value *sitter.Node + for i := 0; i < int(ret.NamedChildCount()); i++ { + value = ret.NamedChild(i) + break + } + if value == nil { + return nil + } + return replaceRange(src, value.StartByte(), value.EndByte(), []byte("Default::default()")) +} + +// applySomeToNone replaces `return Some(x)` with `return None`. +func applySomeToNone(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + ret := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "return_expression" + }) + if ret == nil { + return nil + } + var value *sitter.Node + for i := 0; i < int(ret.NamedChildCount()); i++ { + value = ret.NamedChild(i) + break + } + if value == nil || value.Type() != "call_expression" { + return nil + } + fn := value.ChildByFieldName("function") + if fn == nil || nodeText(fn, src) != "Some" { + return nil + } + return replaceRange(src, value.StartByte(), value.EndByte(), []byte("None")) +} + +// applyBranchRemoval empties the consequence block of an if_expression. +// We replace the block contents with nothing so the braces remain and +// the code still parses. +func applyBranchRemoval(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + ifNode := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "if_expression" + }) + if ifNode == nil { + return nil + } + body := ifNode.ChildByFieldName("consequence") + if body == nil { + return nil + } + // Preserve the outer braces; replace inner bytes with an empty body. + inner := bodyInnerRange(body, src) + if inner == nil { + return nil + } + return replaceRange(src, inner[0], inner[1], []byte{}) +} + +// bodyInnerRange returns [openBracePlusOne, closeBrace) for a block node — +// i.e. the byte range strictly inside the braces. Returns nil if the +// node doesn't look like a block with braces. +func bodyInnerRange(block *sitter.Node, src []byte) []uint32 { + start := block.StartByte() + end := block.EndByte() + if start >= end { + return nil + } + if src[start] != '{' || src[end-1] != '}' { + return nil + } + return []uint32{start + 1, end - 1} +} + +// applyStatementDeletion replaces a bare call statement with the empty +// expression `();`. Keeps the source parseable and kills the side effect. +func applyStatementDeletion(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + stmt := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "expression_statement" + }) + if stmt == nil { + return nil + } + return replaceRange(src, stmt.StartByte(), stmt.EndByte(), []byte("();")) +} + +// applyUnwrapRemoval strips `.unwrap()` / `.expect(...)` from a call, +// leaving the receiver. We find the outer call_expression, then rewrite +// the whole call to be just the receiver. +func applyUnwrapRemoval(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + call := findOnLine(root, site.Line, func(n *sitter.Node) bool { + if n.Type() != "call_expression" { + return false + } + fn := n.ChildByFieldName("function") + if fn == nil || fn.Type() != "field_expression" { + return false + } + field := fn.ChildByFieldName("field") + if field == nil { + return false + } + name := nodeText(field, src) + return name == "unwrap" || name == "expect" + }) + if call == nil { + return nil + } + fn := call.ChildByFieldName("function") + receiver := fn.ChildByFieldName("value") + if receiver == nil { + return nil + } + return replaceRange(src, call.StartByte(), call.EndByte(), + src[receiver.StartByte():receiver.EndByte()]) +} + +// applyQuestionMarkRemoval strips the trailing `?` from a try_expression. +// Grammar shape: (try_expression ?) — the `?` token sits after the +// inner expression's end byte. +func applyQuestionMarkRemoval(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + try := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "try_expression" + }) + if try == nil { + return nil + } + // The inner expression is the first (and only) named child. + var inner *sitter.Node + for i := 0; i < int(try.NamedChildCount()); i++ { + inner = try.NamedChild(i) + break + } + if inner == nil { + return nil + } + return replaceRange(src, try.StartByte(), try.EndByte(), + src[inner.StartByte():inner.EndByte()]) +} + +// isValidRust re-parses the mutated source and reports whether tree-sitter +// encountered any syntax errors. tree-sitter marks malformed regions with +// ERROR nodes (or sets HasError on ancestors); we check both. +func isValidRust(src []byte) bool { + tree, err := parseBytes(src) + if err != nil || tree == nil { + return false + } + defer tree.Close() + root := tree.RootNode() + if root == nil { + return false + } + return !root.HasError() +} + diff --git a/internal/lang/rustanalyzer/mutation_generate.go b/internal/lang/rustanalyzer/mutation_generate.go new file mode 100644 index 0000000..7e3bc7f --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_generate.go @@ -0,0 +1,304 @@ +package rustanalyzer + +import ( + "fmt" + "sort" + "strings" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// mutantGeneratorImpl implements lang.MutantGenerator for Rust. It emits +// canonical operators (conditional_boundary, negate_conditional, +// math_operator, return_value, boolean_substitution, branch_removal, +// statement_deletion) plus the Rust-specific operators defined in the +// design doc: unwrap_removal, some_to_none, question_mark_removal. +// +// `incdec` is deliberately absent — Rust has no `++`/`--` operators. +type mutantGeneratorImpl struct{} + +// GenerateMutants walks the CST and emits a MutantSite for each qualifying +// node on a changed, non-disabled line. The output is deterministic: we +// sort by (line, operator, description) before returning. +func (mutantGeneratorImpl) GenerateMutants(absPath string, fc diff.FileChange, disabled map[int]bool) ([]lang.MutantSite, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, err + } + defer tree.Close() + + var out []lang.MutantSite + walk(tree.RootNode(), func(n *sitter.Node) bool { + line := nodeLine(n) + if !fc.ContainsLine(line) || disabled[line] { + return true + } + out = append(out, mutantsFor(fc.Path, line, n, src)...) + return true + }) + sort.SliceStable(out, func(i, j int) bool { + if out[i].Line != out[j].Line { + return out[i].Line < out[j].Line + } + if out[i].Operator != out[j].Operator { + return out[i].Operator < out[j].Operator + } + return out[i].Description < out[j].Description + }) + return out, nil +} + +// mutantsFor dispatches on the node kind. Nodes that don't match any +// operator return nil — the walker simply moves on. +func mutantsFor(file string, line int, n *sitter.Node, src []byte) []lang.MutantSite { + switch n.Type() { + case "binary_expression": + return binaryMutants(file, line, n, src) + case "boolean_literal": + return boolMutants(file, line, n, src) + case "return_expression": + return returnMutants(file, line, n, src) + case "if_expression": + return ifMutants(file, line, n, src) + case "expression_statement": + return exprStmtMutants(file, line, n, src) + case "call_expression": + return unwrapMutants(file, line, n, src) + case "try_expression": + return tryMutants(file, line, n) + case "scoped_identifier", "identifier": + return nil + } + return nil +} + +// binaryMutants covers conditional_boundary, negate_conditional, and +// math_operator. Shape: (binary_expression operator: "" ...). Skip +// unhandled operators so we don't mutate e.g. bit-shift tokens. +func binaryMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + opNode := n.ChildByFieldName("operator") + if opNode == nil { + return nil + } + op := opNode.Type() + replacements := map[string]string{ + ">": ">=", + "<": "<=", + ">=": ">", + "<=": "<", + "==": "!=", + "!=": "==", + "+": "-", + "-": "+", + "*": "/", + "/": "*", + } + newOp, ok := replacements[op] + if !ok { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("%s -> %s", op, newOp), + Operator: binaryOperatorName(op, newOp), + }} +} + +// binaryOperatorName classifies a source/target operator pair into one of +// the canonical tier-1 operator names. The classification matches the Go +// analyzer so operator stats stay comparable across languages. +func binaryOperatorName(from, to string) string { + if isBoundary(from) || isBoundary(to) { + return "conditional_boundary" + } + if isComparison(from) || isComparison(to) { + return "negate_conditional" + } + if isMath(from) || isMath(to) { + return "math_operator" + } + return "unknown" +} + +func isBoundary(op string) bool { + return op == ">" || op == ">=" || op == "<" || op == "<=" +} + +func isComparison(op string) bool { + return op == "==" || op == "!=" +} + +func isMath(op string) bool { + return op == "+" || op == "-" || op == "*" || op == "/" +} + +// boolMutants flips true <-> false. Tree-sitter exposes boolean literals +// as boolean_literal whose Type() is literally "boolean_literal"; the +// source text is either "true" or "false". +func boolMutants(file string, line int, n *sitter.Node, src []byte) []lang.MutantSite { + text := nodeText(n, src) + if text != "true" && text != "false" { + return nil + } + flipped := "true" + if text == "true" { + flipped = "false" + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("%s -> %s", text, flipped), + Operator: "boolean_substitution", + }} +} + +// returnMutants covers two Rust-specific cases under the canonical +// return_value operator name: `Default::default()` substitution and +// `Some(x) -> None` (an optional-return swap called some_to_none in the +// design doc). +// +// A bare `return;` (unit return) has no expression to mutate, so we skip. +func returnMutants(file string, line int, n *sitter.Node, src []byte) []lang.MutantSite { + // A return_expression has at most one named child — the returned value. + var value *sitter.Node + for i := 0; i < int(n.NamedChildCount()); i++ { + value = n.NamedChild(i) + break + } + if value == nil { + // `return;` — nothing to mutate. + return nil + } + + var out []lang.MutantSite + if someVal, ok := matchSome(value, src); ok { + out = append(out, lang.MutantSite{ + File: file, + Line: line, + Description: fmt.Sprintf("Some(%s) -> None", someVal), + Operator: "some_to_none", + }) + } + out = append(out, lang.MutantSite{ + File: file, + Line: line, + Description: "replace return value with Default::default()", + Operator: "return_value", + }) + return out +} + +// matchSome reports whether value is a `Some(expr)` call expression and +// returns the inner expression text if so. We use this to generate a +// descriptive mutant description ("Some(x) -> None") rather than a generic +// "return_value" blurb. Tree-sitter parses `Some(x)` as a call_expression +// whose function is the identifier `Some`. +func matchSome(value *sitter.Node, src []byte) (string, bool) { + if value == nil || value.Type() != "call_expression" { + return "", false + } + fn := value.ChildByFieldName("function") + if fn == nil || nodeText(fn, src) != "Some" { + return "", false + } + args := value.ChildByFieldName("arguments") + if args == nil { + return "", false + } + // Grab the text between the parens, trimmed. + argText := nodeText(args, src) + argText = strings.TrimPrefix(argText, "(") + argText = strings.TrimSuffix(argText, ")") + return strings.TrimSpace(argText), true +} + +// ifMutants empties an if_expression body (branch_removal). +func ifMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + body := n.ChildByFieldName("consequence") + if body == nil || body.NamedChildCount() == 0 { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "remove if body", + Operator: "branch_removal", + }} +} + +// exprStmtMutants deletes a bare call statement — the Rust analog of the +// Go statement_deletion case. A semicolon-terminated expression whose +// payload is a call_expression is the canonical candidate; other bare +// statements (assignments, let bindings) are left alone because deleting +// them tends to produce un-killable dead-code mutants. +func exprStmtMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + var payload *sitter.Node + for i := 0; i < int(n.NamedChildCount()); i++ { + c := n.NamedChild(i) + if c == nil { + continue + } + payload = c + break + } + if payload == nil || payload.Type() != "call_expression" { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "remove call statement", + Operator: "statement_deletion", + }} +} + +// unwrapMutants emits the Rust-specific unwrap_removal operator: a method +// call whose name is `unwrap` or `expect` has its receiver preserved but +// the trailing `.unwrap()` / `.expect(...)` stripped. Tree-sitter exposes +// `foo.unwrap()` as: +// +// (call_expression +// function: (field_expression value: ... field: (field_identifier))) +// +// We look for that shape with field name "unwrap" or "expect". +func unwrapMutants(file string, line int, n *sitter.Node, src []byte) []lang.MutantSite { + fn := n.ChildByFieldName("function") + if fn == nil || fn.Type() != "field_expression" { + return nil + } + field := fn.ChildByFieldName("field") + if field == nil { + return nil + } + name := nodeText(field, src) + if name != "unwrap" && name != "expect" { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("strip .%s()", name), + Operator: "unwrap_removal", + }} +} + +// tryMutants emits the question_mark_removal operator for try expressions +// (`expr?`). Tree-sitter models `foo()?` as (try_expression ...), making +// detection straightforward. +func tryMutants(file string, line int, n *sitter.Node) []lang.MutantSite { + // A try_expression always has exactly one inner expression; if that's + // missing we have malformed input, so bail. + if n.NamedChildCount() == 0 { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "strip trailing ?", + Operator: "question_mark_removal", + }} +} diff --git a/internal/lang/rustanalyzer/parse.go b/internal/lang/rustanalyzer/parse.go new file mode 100644 index 0000000..a4eae7e --- /dev/null +++ b/internal/lang/rustanalyzer/parse.go @@ -0,0 +1,108 @@ +// Package rustanalyzer implements the lang.Language interface for Rust. It +// is blank-imported from cmd/diffguard/main.go so Rust gets registered at +// process start. +// +// One file per concern, mirroring the Go analyzer layout: +// - rustanalyzer.go -- Language + init()/Register +// - parse.go -- tree-sitter setup, CST helpers +// - sizes.go -- FunctionExtractor +// - complexity.go -- ComplexityCalculator + ComplexityScorer +// - deps.go -- ImportResolver +// - mutation_generate.go-- MutantGenerator +// - mutation_apply.go -- MutantApplier +// - mutation_annotate.go-- AnnotationScanner +// - testrunner.go -- TestRunner (wraps cargo test) +package rustanalyzer + +import ( + "context" + "os" + "sync" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/rust" +) + +// rustLang is the cached tree-sitter Rust grammar handle. Because building +// the grammar involves cgo bridging, we do it once and reuse the pointer +// rather than paying for it on every parse. Lazy-init keeps process start +// fast — diffguard binaries that never touch a .rs file pay nothing. +var ( + rustLangOnce sync.Once + rustLang *sitter.Language +) + +// rustLanguage returns the tree-sitter Rust grammar, building it on first +// use. The sitter.Language struct is safe to share across goroutines. +func rustLanguage() *sitter.Language { + rustLangOnce.Do(func() { + rustLang = rust.GetLanguage() + }) + return rustLang +} + +// parseFile reads absPath from disk and returns the parsed tree plus the +// source bytes. Callers get back (nil, nil, err) on read error. +func parseFile(absPath string) (*sitter.Tree, []byte, error) { + src, err := os.ReadFile(absPath) + if err != nil { + return nil, nil, err + } + tree, err := parseBytes(src) + if err != nil { + return nil, nil, err + } + return tree, src, nil +} + +// parseBytes returns a *sitter.Tree for src. Unlike sitter.Parse which +// returns only the root node, we return the Tree so callers can hold onto +// it and Close it when done to release the underlying C allocation. +func parseBytes(src []byte) (*sitter.Tree, error) { + parser := sitter.NewParser() + parser.SetLanguage(rustLanguage()) + return parser.ParseCtx(context.Background(), nil, src) +} + +// walk invokes fn on every node in the subtree rooted at n. The walk is a +// plain depth-first pre-order traversal using NamedChildCount/NamedChild — +// matches the style used by the sitter example code and avoids the trickier +// TreeCursor API. Returning false from fn prunes the subtree. +func walk(n *sitter.Node, fn func(*sitter.Node) bool) { + if n == nil { + return + } + if !fn(n) { + return + } + count := int(n.ChildCount()) + for i := 0; i < count; i++ { + walk(n.Child(i), fn) + } +} + +// nodeLine returns the 1-based start line of n. tree-sitter uses 0-based +// coordinates internally; every diffguard interface (FunctionInfo, MutantSite) +// is 1-based, so we convert here once. +func nodeLine(n *sitter.Node) int { + return int(n.StartPoint().Row) + 1 +} + +// nodeEndLine returns the 1-based end line of n (inclusive of the last line +// any part of n occupies). We subtract one when EndPoint is exactly at a +// line boundary (column 0) because tree-sitter reports the position one past +// the last byte — e.g. a function whose closing brace is the last char on +// line 10 has EndPoint at (11, 0). Without the adjustment we'd report end +// lines that disagree with the Go analyzer's behavior. +func nodeEndLine(n *sitter.Node) int { + end := n.EndPoint() + if end.Column == 0 && end.Row > 0 { + return int(end.Row) + } + return int(end.Row) + 1 +} + +// nodeText returns the byte slice of src covering n. +func nodeText(n *sitter.Node, src []byte) string { + return string(src[n.StartByte():n.EndByte()]) +} diff --git a/internal/lang/rustanalyzer/rustanalyzer.go b/internal/lang/rustanalyzer/rustanalyzer.go new file mode 100644 index 0000000..7b514a0 --- /dev/null +++ b/internal/lang/rustanalyzer/rustanalyzer.go @@ -0,0 +1,65 @@ +package rustanalyzer + +import ( + "strings" + "time" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// defaultRustTestTimeout is the per-mutant test timeout applied when the +// caller did not set one in TestRunConfig. Rust `cargo test` cold-starts +// are slow (compile + link per mutant) so the default is generous. +const defaultRustTestTimeout = 120 * time.Second + +// Language is the Rust implementation of lang.Language. Like the Go +// analyzer, it holds no state; sub-component impls are stateless. +type Language struct{} + +// Name returns the canonical language identifier used by the registry and +// by report section suffixes. +func (*Language) Name() string { return "rust" } + +// FileFilter returns the Rust-specific file selection rules used by the +// diff parser: .rs extension; any path segment literally equal to `tests` +// marks the file as an integration test (i.e. excluded from analysis). +func (*Language) FileFilter() lang.FileFilter { + return lang.FileFilter{ + Extensions: []string{".rs"}, + IsTestFile: isRustTestFile, + DiffGlobs: []string{"*.rs"}, + } +} + +// Sub-component accessors. Stateless impls return fresh zero-value structs. +func (*Language) ComplexityCalculator() lang.ComplexityCalculator { return complexityImpl{} } +func (*Language) ComplexityScorer() lang.ComplexityScorer { return complexityImpl{} } +func (*Language) FunctionExtractor() lang.FunctionExtractor { return sizesImpl{} } +func (*Language) ImportResolver() lang.ImportResolver { return depsImpl{} } +func (*Language) MutantGenerator() lang.MutantGenerator { return mutantGeneratorImpl{} } +func (*Language) MutantApplier() lang.MutantApplier { return mutantApplierImpl{} } +func (*Language) AnnotationScanner() lang.AnnotationScanner { return annotationScannerImpl{} } +func (*Language) TestRunner() lang.TestRunner { return newTestRunner() } + +// isRustTestFile reports whether path is a Rust integration test file. The +// design doc settles this: any file whose path contains a `tests` segment +// is treated as a test file. Inline `#[cfg(test)] mod tests { ... }` stays +// ambiguous from path alone — we simply ignore those blocks during analysis +// (they sit inside ordinary source files which are still analyzed). +func isRustTestFile(path string) bool { + // Normalize separators so Windows-style paths behave the same. + segs := strings.Split(strings.ReplaceAll(path, "\\", "/"), "/") + for _, s := range segs { + if s == "tests" { + return true + } + } + return false +} + +// init registers the Rust analyzer. The blank import in cmd/diffguard/main.go +// triggers this; external callers wanting Rust must also blank-import. +func init() { + lang.Register(&Language{}) + lang.RegisterManifest("Cargo.toml", "rust") +} diff --git a/internal/lang/rustanalyzer/rustanalyzer_test.go b/internal/lang/rustanalyzer/rustanalyzer_test.go new file mode 100644 index 0000000..cba5c52 --- /dev/null +++ b/internal/lang/rustanalyzer/rustanalyzer_test.go @@ -0,0 +1,70 @@ +package rustanalyzer + +import ( + "testing" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// TestLanguageRegistration verifies the Rust analyzer registered itself +// and exposes the correct name + file filter. The init() function runs on +// package load so the registry should already contain "rust" by the time +// this test executes. +func TestLanguageRegistration(t *testing.T) { + l, ok := lang.Get("rust") + if !ok { + t.Fatal("rust language not registered") + } + if l.Name() != "rust" { + t.Errorf("Name() = %q, want %q", l.Name(), "rust") + } + ff := l.FileFilter() + if len(ff.Extensions) != 1 || ff.Extensions[0] != ".rs" { + t.Errorf("Extensions = %v, want [.rs]", ff.Extensions) + } + if len(ff.DiffGlobs) != 1 || ff.DiffGlobs[0] != "*.rs" { + t.Errorf("DiffGlobs = %v, want [*.rs]", ff.DiffGlobs) + } +} + +func TestIsRustTestFile(t *testing.T) { + cases := []struct { + path string + want bool + }{ + // Integration tests live under a `tests` directory at any depth. + {"tests/integration.rs", true}, + {"crates/foo/tests/integration.rs", true}, + {"tests/subdir/more.rs", true}, + // Source files never count as tests, even when the path mentions + // the word "test" in a non-segment context. + {"src/lib.rs", false}, + {"src/tester.rs", false}, + {"src/foo/bar.rs", false}, + // Trailing slash variants don't confuse the segment split. + {"src/tests_common.rs", false}, + // Windows separators should behave the same for consistency + // across platforms. + {`tests\integration.rs`, true}, + } + for _, tc := range cases { + got := isRustTestFile(tc.path) + if got != tc.want { + t.Errorf("isRustTestFile(%q) = %v, want %v", tc.path, got, tc.want) + } + } +} + +func TestFileFilterIncludesSource(t *testing.T) { + l, _ := lang.Get("rust") + ff := l.FileFilter() + if !ff.IncludesSource("src/lib.rs") { + t.Error("expected src/lib.rs to be included") + } + if ff.IncludesSource("tests/integration.rs") { + t.Error("expected tests/integration.rs to be excluded") + } + if ff.IncludesSource("build.py") { + t.Error("expected non-.rs files to be excluded") + } +} diff --git a/internal/lang/rustanalyzer/sizes.go b/internal/lang/rustanalyzer/sizes.go new file mode 100644 index 0000000..01181ec --- /dev/null +++ b/internal/lang/rustanalyzer/sizes.go @@ -0,0 +1,198 @@ +package rustanalyzer + +import ( + "sort" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// sizesImpl implements lang.FunctionExtractor for Rust via tree-sitter. A +// single walk produces both the per-function sizes and the overall file +// size — the file-size row is cheap to compute from the raw byte buffer so +// we don't bother the CST for that number. +type sizesImpl struct{} + +// ExtractFunctions parses absPath and returns functions overlapping the +// diff's changed regions plus the overall file size. A parse failure is +// treated as "skip this file" to match the Go analyzer's (nil, nil, nil) +// return convention. +func (sizesImpl) ExtractFunctions(absPath string, fc diff.FileChange) ([]lang.FunctionSize, *lang.FileSize, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, nil, nil + } + defer tree.Close() + + fns := collectFunctions(tree.RootNode(), src) + fileSize := &lang.FileSize{Path: fc.Path, Lines: countLines(src)} + + var results []lang.FunctionSize + for _, fn := range fns { + if !fc.OverlapsRange(fn.startLine, fn.endLine) { + continue + } + results = append(results, lang.FunctionSize{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: fn.startLine, + EndLine: fn.endLine, + Name: fn.name, + }, + Lines: fn.endLine - fn.startLine + 1, + }) + } + + // Deterministic order matters for report stability: sort by start line, + // then by name so two functions declared on the same line never flip. + sort.SliceStable(results, func(i, j int) bool { + if results[i].Line != results[j].Line { + return results[i].Line < results[j].Line + } + return results[i].Name < results[j].Name + }) + return results, fileSize, nil +} + +// rustFunction is the internal record produced by the extractor. It's +// deliberately wider than FunctionSize/FunctionComplexity because the +// complexity analyzer needs the node to walk the body; keeping one record +// shape avoids re-parsing or re-walking. +type rustFunction struct { + name string + startLine int + endLine int + body *sitter.Node // the body block, or nil for e.g. trait methods with no default impl + node *sitter.Node // the entire function_item / declaration node +} + +// collectFunctions walks the CST and returns every function_item and every +// method inside an impl_item. Nested functions are reported as separate +// entries to match the spec. Trait default methods are included too — +// their function_item has a body. +// +// Name extraction rules: +// +// fn foo() -> "foo" +// impl Type { fn bar() } -> "Type::bar" +// impl Trait for Type { fn baz() } -> "Type::baz" +// impl Foo { fn qux() } -> "Foo::qux" +// +// The grammar uses a uniform node kind `function_item` for every function +// definition regardless of context; its parent (`declaration_list` of an +// `impl_item`) tells us the receiver type. +func collectFunctions(root *sitter.Node, src []byte) []rustFunction { + var fns []rustFunction + walk(root, func(n *sitter.Node) bool { + if n.Type() != "function_item" { + return true + } + fn := buildRustFunction(n, src) + if fn != nil { + fns = append(fns, *fn) + } + // Keep descending: a function may contain nested closures or + // function items the spec treats as separate entries. + return true + }) + return fns +} + +// buildRustFunction constructs a rustFunction record from a function_item +// node. Returns nil if the name is unparseable. +func buildRustFunction(n *sitter.Node, src []byte) *rustFunction { + nameNode := n.ChildByFieldName("name") + if nameNode == nil { + return nil + } + baseName := nodeText(nameNode, src) + + fullName := baseName + if typeName := enclosingImplType(n, src); typeName != "" { + fullName = typeName + "::" + baseName + } + + body := n.ChildByFieldName("body") + return &rustFunction{ + name: fullName, + startLine: nodeLine(n), + endLine: nodeEndLine(n), + body: body, + node: n, + } +} + +// enclosingImplType walks up parents looking for an impl_item and returns +// its "type" field's text (the `Type` in `impl Type { ... }` or +// `impl Trait for Type { ... }`). Returns "" if the function is not inside +// an impl block. +func enclosingImplType(n *sitter.Node, src []byte) string { + for parent := n.Parent(); parent != nil; parent = parent.Parent() { + if parent.Type() == "impl_item" { + typeNode := parent.ChildByFieldName("type") + if typeNode == nil { + return "" + } + return simpleTypeName(typeNode, src) + } + } + return "" +} + +// simpleTypeName strips generics and pathing from a type node, returning +// just the trailing identifier (`Foo` from `path::to::Foo`). The +// impl-type field is usually already simple but the grammar allows any +// type expression here, including `generic_type` with a `type_arguments` +// child and `scoped_type_identifier` with a `path::`/`name` pair. +func simpleTypeName(n *sitter.Node, src []byte) string { + switch n.Type() { + case "type_identifier", "primitive_type": + return nodeText(n, src) + case "generic_type": + if inner := n.ChildByFieldName("type"); inner != nil { + return simpleTypeName(inner, src) + } + case "scoped_type_identifier": + if name := n.ChildByFieldName("name"); name != nil { + return nodeText(name, src) + } + case "reference_type": + if inner := n.ChildByFieldName("type"); inner != nil { + return simpleTypeName(inner, src) + } + } + // Fallback: take the last identifier-looking child so unusual shapes + // don't collapse to an empty name. + for i := int(n.ChildCount()) - 1; i >= 0; i-- { + c := n.Child(i) + if c == nil { + continue + } + if c.Type() == "type_identifier" || c.Type() == "identifier" { + return nodeText(c, src) + } + } + return nodeText(n, src) +} + +// countLines returns the number of source lines in src. An empty file is +// 0, a file without a trailing newline still counts its final line, a file +// with a trailing newline counts exactly that many newline-terminated +// lines. +func countLines(src []byte) int { + if len(src) == 0 { + return 0 + } + count := 0 + for _, b := range src { + if b == '\n' { + count++ + } + } + if src[len(src)-1] != '\n' { + count++ + } + return count +} diff --git a/internal/lang/rustanalyzer/testrunner.go b/internal/lang/rustanalyzer/testrunner.go new file mode 100644 index 0000000..a530e59 --- /dev/null +++ b/internal/lang/rustanalyzer/testrunner.go @@ -0,0 +1,186 @@ +package rustanalyzer + +import ( + "bytes" + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "sync" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// testRunnerImpl implements lang.TestRunner for Rust using `cargo test`. +// Unlike Go's overlay-based runner, Cargo has no build-time file +// substitution, so we use a temp-copy isolation strategy: +// +// 1. Acquire a per-file mutex so concurrent mutants on the same file +// serialize. Different files run in parallel. +// 2. Back the original up. +// 3. Copy the mutant bytes over the original in place. +// 4. Run `cargo test` with a timeout. +// 5. Restore the original from the backup — always, via defer — even +// if cargo panics or we panic. +type testRunnerImpl struct { + // cmd is the executable to run. Normally "cargo"; tests override this + // with a fake binary that exercises the kill / survive / timeout paths + // without needing a real Cargo toolchain. + cmd string + // extraArgs are prepended before the normal cargo test args. Tests use + // this to swap in a no-op command ("sh -c 'exit 0'") by setting + // cmd="sh" and extraArgs=["-c","..."]. + extraArgs []string + + mu sync.Mutex + locks map[string]*sync.Mutex +} + +// newTestRunner builds a fresh runner. All fields are zero-value except +// the cmd which defaults to "cargo". Tests construct their own via +// newTestRunnerWithCommand. +func newTestRunner() *testRunnerImpl { + return &testRunnerImpl{cmd: "cargo"} +} + +// fileLock returns the per-file mutex for the given path, lazily +// initializing the entry on first access. The outer lock (r.mu) guards +// only the map; the returned mutex is what the caller actually holds +// while mutating the source file. +func (r *testRunnerImpl) fileLock(path string) *sync.Mutex { + r.mu.Lock() + defer r.mu.Unlock() + if r.locks == nil { + r.locks = map[string]*sync.Mutex{} + } + m, ok := r.locks[path] + if !ok { + m = &sync.Mutex{} + r.locks[path] = m + } + return m +} + +// RunTest implements the lang.TestRunner contract. Returning (true, ..., +// nil) signals the mutant was killed (test exit != 0); (false, ..., nil) +// signals survived (tests passed); (false, "", err) signals the runner +// itself couldn't run. +func (r *testRunnerImpl) RunTest(cfg lang.TestRunConfig) (bool, string, error) { + // Per-file serialization: two concurrent mutants on the same file + // would race on the in-place swap below. + lock := r.fileLock(cfg.OriginalFile) + lock.Lock() + defer lock.Unlock() + + mutantBytes, err := os.ReadFile(cfg.MutantFile) + if err != nil { + return false, "", fmt.Errorf("reading mutant file: %w", err) + } + originalBytes, err := os.ReadFile(cfg.OriginalFile) + if err != nil { + return false, "", fmt.Errorf("reading original file: %w", err) + } + + // Defer restore BEFORE writing the mutant so a panic between the + // write and the test run can't leave a corrupt source file behind. + restore := func() { + // Best-effort restore; we don't have a sane way to report an + // error here and the harness is expected to panic-safely run. + _ = os.WriteFile(cfg.OriginalFile, originalBytes, 0644) + } + defer restore() + + if err := os.WriteFile(cfg.OriginalFile, mutantBytes, 0644); err != nil { + return false, "", fmt.Errorf("writing mutant over original: %w", err) + } + + timeout := cfg.Timeout + if timeout <= 0 { + timeout = defaultRustTestTimeout + } + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + args := r.buildArgs(cfg) + cmd := exec.CommandContext(ctx, r.cmd, args...) + cmd.Dir = cfg.RepoPath + cmd.Env = append(os.Environ(), "CARGO_INCREMENTAL=0") + var combined bytes.Buffer + cmd.Stdout = &combined + cmd.Stderr = &combined + + runErr := cmd.Run() + output := combined.String() + + // A timeout is reported as "killed" — the mutant made tests so slow + // they couldn't finish within the allotted window, which is a + // meaningful signal in line with the Go analyzer's treatment. + if ctx.Err() == context.DeadlineExceeded { + return true, output, nil + } + if runErr != nil { + return true, output, nil + } + return false, output, nil +} + +// buildArgs returns the argv after the command name. When the caller +// supplied extraArgs (tests), we honor those; otherwise we build a normal +// `cargo test` invocation with the pattern as a positional filter. +func (r *testRunnerImpl) buildArgs(cfg lang.TestRunConfig) []string { + if len(r.extraArgs) > 0 { + return append([]string(nil), r.extraArgs...) + } + args := []string{"test"} + if cfg.TestPattern != "" { + args = append(args, cfg.TestPattern) + } + return args +} + +// cargoTestArgs is exposed to tests so they can assert the argv we'd send +// to cargo when no overrides are in play. +func cargoTestArgs(cfg lang.TestRunConfig) []string { + r := &testRunnerImpl{} + return r.buildArgs(cfg) +} + +// backupAndRestore is exposed for tests that want to verify the +// restore-on-panic guarantee without actually invoking cargo. +// +// It writes `mutantBytes` over `path`, runs `work`, and restores +// `originalBytes` via defer. Returns the original unmodified bytes so the +// caller can assert restoration. +// +//nolint:unused // used by testrunner_test.go +func backupAndRestore(path string, originalBytes, mutantBytes []byte, work func()) (restored []byte, err error) { + defer func() { + _ = os.WriteFile(path, originalBytes, 0644) + restored, err = os.ReadFile(path) + }() + if err := os.WriteFile(path, mutantBytes, 0644); err != nil { + return nil, err + } + work() + return nil, nil +} + +// AtomicCopy copies src to dst; used to build a file-level "backup" +// location if a caller prefers backing up to a sibling path rather than +// holding bytes in memory. We don't use this from RunTest (in-memory is +// cheap for source files) but leave it here for future runners that may +// need on-disk backups. +// +//nolint:unused +func AtomicCopy(src, dst string) error { + data, err := os.ReadFile(src) + if err != nil { + return err + } + tmp := filepath.Join(filepath.Dir(dst), ".diffguard-backup-tmp") + if err := os.WriteFile(tmp, data, 0644); err != nil { + return err + } + return os.Rename(tmp, dst) +} diff --git a/internal/mutation/tiers.go b/internal/mutation/tiers.go index 14e9bb9..3133b48 100644 --- a/internal/mutation/tiers.go +++ b/internal/mutation/tiers.go @@ -46,11 +46,23 @@ func (t Tier) String() string { // operatorTier maps a mutation operator name (as set on Mutant.Operator) to // its tier. Unknown operators default to TierSemantic so a new operator // doesn't silently become report-only noise. +// +// Language-specific operators are listed alongside the canonical ones — the +// tier reflects the signal quality of a surviving mutant, which is a +// property of what the mutation encodes rather than which language it +// targets. Rust's `unwrap_removal` and `some_to_none` strip error-handling +// that well-tested code almost always exercises, so both sit in Tier 1 +// alongside negate_conditional. Rust's `question_mark_removal` also +// disables error propagation but equivalent-mutant rate is higher (early +// returns can be substituted by the caller's own match), so it lands in +// Tier 2. func operatorTier(op string) Tier { switch op { - case "negate_conditional", "conditional_boundary", "return_value", "math_operator": + case "negate_conditional", "conditional_boundary", "return_value", "math_operator", + "unwrap_removal", "some_to_none": return TierLogic - case "boolean_substitution", "incdec": + case "boolean_substitution", "incdec", + "question_mark_removal": return TierSemantic case "statement_deletion", "branch_removal": return TierObservability From e4e0b1a4ebd2ae1e530d50e7e641a54c24ce139e Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 15:11:10 -0400 Subject: [PATCH 10/38] =?UTF-8?q?feat(rust):=20C2=20=E2=80=94=20FunctionEx?= =?UTF-8?q?tractor=20via=20tree-sitter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracts standalone functions, impl methods (inherent and trait-impl), and nested functions as separate entries. Method names are prefixed with their impl type: `impl Counter { fn new }` -> "Counter::new", `impl Named for Counter { fn name }` -> "Counter::name". Nested functions inside method bodies do NOT inherit the impl prefix (the walk-up stops at the nearest function boundary), matching the spec's "treated as separate" requirement. Line ranges are 1-based and inclusive, consistent with the Go analyzer. Testdata fixture covers every function form plus the filter-by-changed- region path; countLines is unit-tested for edge cases (empty, trailing newline, bare text). Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/lang/rustanalyzer/helpers_test.go | 10 ++ internal/lang/rustanalyzer/sizes.go | 22 ++- internal/lang/rustanalyzer/sizes_test.go | 163 ++++++++++++++++++ .../lang/rustanalyzer/testdata/functions.rs | 35 ++++ 4 files changed, 225 insertions(+), 5 deletions(-) create mode 100644 internal/lang/rustanalyzer/helpers_test.go create mode 100644 internal/lang/rustanalyzer/sizes_test.go create mode 100644 internal/lang/rustanalyzer/testdata/functions.rs diff --git a/internal/lang/rustanalyzer/helpers_test.go b/internal/lang/rustanalyzer/helpers_test.go new file mode 100644 index 0000000..09c5b10 --- /dev/null +++ b/internal/lang/rustanalyzer/helpers_test.go @@ -0,0 +1,10 @@ +package rustanalyzer + +import "os" + +// writeFile is a tiny helper shared across the rustanalyzer test files. +// We define it here (rather than importing testutil) so each _test.go +// file can stay self-contained in what it inspects. +func writeFile(path string, data []byte) error { + return os.WriteFile(path, data, 0644) +} diff --git a/internal/lang/rustanalyzer/sizes.go b/internal/lang/rustanalyzer/sizes.go index 01181ec..bf0271d 100644 --- a/internal/lang/rustanalyzer/sizes.go +++ b/internal/lang/rustanalyzer/sizes.go @@ -124,13 +124,25 @@ func buildRustFunction(n *sitter.Node, src []byte) *rustFunction { } } -// enclosingImplType walks up parents looking for an impl_item and returns -// its "type" field's text (the `Type` in `impl Type { ... }` or -// `impl Trait for Type { ... }`). Returns "" if the function is not inside -// an impl block. +// enclosingImplType walks up parents looking for the closest enclosing +// impl_item and returns its "type" field's text (the `Type` in +// `impl Type { ... }` or `impl Trait for Type { ... }`). If we encounter +// a function_item or closure_expression first, the candidate function is +// nested inside another function and should not inherit an impl prefix — +// it stays a bare standalone name. +// +// Tree-sitter Rust uses the "type" field name for `impl Type` and +// `impl Trait for Type` alike (the trait, when present, lives under the +// "trait" field), so the same lookup works for both forms. func enclosingImplType(n *sitter.Node, src []byte) string { for parent := n.Parent(); parent != nil; parent = parent.Parent() { - if parent.Type() == "impl_item" { + switch parent.Type() { + case "function_item", "closure_expression": + // Reached a nesting boundary before any impl — the function + // is defined inside another function's body and should not + // carry the outer impl's type prefix. + return "" + case "impl_item": typeNode := parent.ChildByFieldName("type") if typeNode == nil { return "" diff --git a/internal/lang/rustanalyzer/sizes_test.go b/internal/lang/rustanalyzer/sizes_test.go new file mode 100644 index 0000000..6b63265 --- /dev/null +++ b/internal/lang/rustanalyzer/sizes_test.go @@ -0,0 +1,163 @@ +package rustanalyzer + +import ( + "math" + "path/filepath" + "sort" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +// fullRegion returns a FileChange covering every line so tests can assert +// against every function in the fixture without threading line numbers. +func fullRegion(path string) diff.FileChange { + return diff.FileChange{ + Path: path, + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: math.MaxInt32}}, + } +} + +func TestExtractFunctions_AllForms(t *testing.T) { + absPath, err := filepath.Abs("testdata/functions.rs") + if err != nil { + t.Fatal(err) + } + s := sizesImpl{} + fns, fsize, err := s.ExtractFunctions(absPath, fullRegion("testdata/functions.rs")) + if err != nil { + t.Fatalf("ExtractFunctions: %v", err) + } + if fsize == nil { + t.Fatal("expected non-nil file size") + } + if fsize.Lines == 0 { + t.Error("file size reports zero lines") + } + + // Collect names and assert the expected set appears. Tolerate order + // by sorting; collectFunctions already sorts by (line, name) but + // asserting on a set is more resilient to minor CST shape changes. + names := make([]string, 0, len(fns)) + for _, fn := range fns { + names = append(names, fn.Name) + } + sort.Strings(names) + + expected := map[string]bool{ + "standalone": false, + "Counter::new": false, + "Counter::increment": false, + "nested_helper": false, // nested fns are separate entries + "Named::name": false, // default (trait-declared) method is not in this fixture + "Counter::name": false, // trait-impl methods attach to the impl type, not the trait + } + for _, name := range names { + if _, ok := expected[name]; ok { + expected[name] = true + } + } + + mustHave := []string{"standalone", "Counter::new", "Counter::increment", "nested_helper", "Counter::name"} + for _, n := range mustHave { + if !expected[n] { + t.Errorf("missing expected function %q (got %v)", n, names) + } + } +} + +func TestExtractFunctions_LineRanges(t *testing.T) { + absPath, _ := filepath.Abs("testdata/functions.rs") + fns, _, err := sizesImpl{}.ExtractFunctions(absPath, fullRegion("testdata/functions.rs")) + if err != nil { + t.Fatal(err) + } + for _, fn := range fns { + if fn.Line <= 0 { + t.Errorf("%s: Line = %d, want > 0 (1-based)", fn.Name, fn.Line) + } + if fn.EndLine < fn.Line { + t.Errorf("%s: EndLine %d < Line %d", fn.Name, fn.EndLine, fn.Line) + } + if fn.Lines != fn.EndLine-fn.Line+1 { + t.Errorf("%s: Lines = %d, want %d", fn.Name, fn.Lines, fn.EndLine-fn.Line+1) + } + } +} + +func TestExtractFunctions_FilterToChangedRegion(t *testing.T) { + absPath, _ := filepath.Abs("testdata/functions.rs") + + // Narrow region that only covers the standalone fn (lines 5-7 in the + // fixture). The impl methods should be filtered out. + fc := diff.FileChange{ + Path: "testdata/functions.rs", + Regions: []diff.ChangedRegion{{StartLine: 5, EndLine: 7}}, + } + fns, _, err := sizesImpl{}.ExtractFunctions(absPath, fc) + if err != nil { + t.Fatal(err) + } + names := []string{} + for _, fn := range fns { + names = append(names, fn.Name) + } + sort.Strings(names) + + // Must contain "standalone" and exclude the impl methods. + foundStandalone := false + for _, n := range names { + if n == "standalone" { + foundStandalone = true + } + if n == "Counter::new" || n == "Counter::name" { + t.Errorf("unexpected function %q in narrow region, got %v", n, names) + } + } + if !foundStandalone { + t.Errorf("expected standalone in narrow region, got %v", names) + } +} + +func TestExtractFunctions_EmptyFile(t *testing.T) { + // Tree-sitter tolerates an empty file and produces an empty source_file + // node — we should return no functions and a 0-line file size. + dir := t.TempDir() + empty := filepath.Join(dir, "empty.rs") + if err := writeFile(empty, []byte("")); err != nil { + t.Fatal(err) + } + fns, fsize, err := sizesImpl{}.ExtractFunctions(empty, fullRegion("empty.rs")) + if err != nil { + t.Fatalf("ExtractFunctions: %v", err) + } + if len(fns) != 0 { + t.Errorf("empty file: got %d fns, want 0", len(fns)) + } + if fsize == nil { + t.Fatal("expected non-nil file size for empty file") + } + if fsize.Lines != 0 { + t.Errorf("empty file: Lines = %d, want 0", fsize.Lines) + } +} + +func TestCountLines(t *testing.T) { + cases := []struct { + in string + want int + }{ + {"", 0}, + {"x", 1}, + {"x\n", 1}, + {"x\ny", 2}, + {"x\ny\n", 2}, + {"\n", 1}, + } + for _, tc := range cases { + got := countLines([]byte(tc.in)) + if got != tc.want { + t.Errorf("countLines(%q) = %d, want %d", tc.in, got, tc.want) + } + } +} diff --git a/internal/lang/rustanalyzer/testdata/functions.rs b/internal/lang/rustanalyzer/testdata/functions.rs new file mode 100644 index 0000000..80e68a0 --- /dev/null +++ b/internal/lang/rustanalyzer/testdata/functions.rs @@ -0,0 +1,35 @@ +// Fixture: a small Rust file covering every function form the extractor +// should handle: standalone fn, inherent method, trait-impl method, and +// nested functions (reported as separate entries). + +fn standalone() -> i32 { + 42 +} + +pub struct Counter { + n: i32, +} + +impl Counter { + pub fn new() -> Self { + Counter { n: 0 } + } + + pub fn increment(&mut self) -> i32 { + fn nested_helper(x: i32) -> i32 { + x + 1 + } + self.n = nested_helper(self.n); + self.n + } +} + +pub trait Named { + fn name(&self) -> &str; +} + +impl Named for Counter { + fn name(&self) -> &str { + "Counter" + } +} From a3968aa7daab33c0a0c4d02620906ead140efa32 Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 15:12:54 -0400 Subject: [PATCH 11/38] =?UTF-8?q?feat(rust):=20C3=20=E2=80=94=20cognitive?= =?UTF-8?q?=20complexity=20calculator=20and=20scorer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the cognitive complexity walker per the design doc: * Base +1 for if / while / for / loop / match / if-let / while-let. * +1 per guarded match arm (detected via either `match_arm_guard` child or the newer `match_pattern.condition` field; both grammar shapes are accepted for resilience against tree-sitter-rust upgrades). * +1 per logical-operator run change in a `&&`/`||` chain. * +1 nesting penalty per scope-introducing ancestor. * `?` and `unsafe` do NOT contribute. * Closures start a fresh nesting context (matches the Go analyzer's FuncLit behavior); nested `function_item`s are reported separately. ComplexityScorer reuses the calculator — tree-sitter walks are cheap enough that a separate approximation isn't worth the divergence. Testdata fixture documents expected scores per function; unit tests assert each case plus the logical-op counter directly. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/lang/rustanalyzer/complexity.go | 16 +++ internal/lang/rustanalyzer/complexity_test.go | 122 ++++++++++++++++++ .../lang/rustanalyzer/testdata/complexity.rs | 59 +++++++++ 3 files changed, 197 insertions(+) create mode 100644 internal/lang/rustanalyzer/complexity_test.go create mode 100644 internal/lang/rustanalyzer/testdata/complexity.rs diff --git a/internal/lang/rustanalyzer/complexity.go b/internal/lang/rustanalyzer/complexity.go index bafd992..7e425c0 100644 --- a/internal/lang/rustanalyzer/complexity.go +++ b/internal/lang/rustanalyzer/complexity.go @@ -211,6 +211,17 @@ func countGuardedArms(match *sitter.Node) int { } // hasGuard reports whether a match_arm node carries an `if` guard. +// +// Two grammar shapes appear in practice: +// +// 1. Older grammars used a distinct `match_arm_guard` child. +// 2. Current tree-sitter-rust models the guard as a `condition` field on +// the arm's `match_pattern` child — i.e. +// (match_arm pattern: (match_pattern (identifier) +// condition: (binary_expression ...)) +// value: ...) +// +// We check for either to stay resilient across grammar updates. func hasGuard(arm *sitter.Node) bool { for i := 0; i < int(arm.ChildCount()); i++ { c := arm.Child(i) @@ -221,6 +232,11 @@ func hasGuard(arm *sitter.Node) bool { return true } } + if pat := arm.ChildByFieldName("pattern"); pat != nil { + if pat.ChildByFieldName("condition") != nil { + return true + } + } return false } diff --git a/internal/lang/rustanalyzer/complexity_test.go b/internal/lang/rustanalyzer/complexity_test.go new file mode 100644 index 0000000..7dd8bd9 --- /dev/null +++ b/internal/lang/rustanalyzer/complexity_test.go @@ -0,0 +1,122 @@ +package rustanalyzer + +import ( + "path/filepath" + "testing" + + sitter "github.com/smacker/go-tree-sitter" +) + +// TestCognitiveComplexity_ByFixture asserts per-function scores on +// testdata/complexity.rs. The fixture docstrings record each function's +// expected score; this test is the canonical place to assert them. +func TestCognitiveComplexity_ByFixture(t *testing.T) { + absPath, _ := filepath.Abs("testdata/complexity.rs") + scores, err := complexityImpl{}.AnalyzeFile(absPath, fullRegion("testdata/complexity.rs")) + if err != nil { + t.Fatal(err) + } + scoreByName := map[string]int{} + for _, s := range scores { + scoreByName[s.Name] = s.Complexity + } + + cases := []struct { + name string + want int + }{ + {"empty", 0}, + {"one_if", 1}, + {"guarded", 3}, + {"nested", 3}, + {"logical", 3}, + {"unsafe_and_try", 1}, + } + for _, tc := range cases { + got, ok := scoreByName[tc.name] + if !ok { + t.Errorf("missing score for %q (have %v)", tc.name, scoreByName) + continue + } + if got != tc.want { + t.Errorf("complexity(%s) = %d, want %d", tc.name, got, tc.want) + } + } +} + +// TestComplexityScorer_ReusesCalculator asserts the Scorer (used by the +// churn analyzer) returns the same values as the Calculator — the design +// note explicitly allows reuse and a future refactor to a separate +// approximation would need a deliberate update here. +func TestComplexityScorer_ReusesCalculator(t *testing.T) { + absPath, _ := filepath.Abs("testdata/complexity.rs") + calc, err := complexityImpl{}.AnalyzeFile(absPath, fullRegion("testdata/complexity.rs")) + if err != nil { + t.Fatal(err) + } + score, err := complexityImpl{}.ScoreFile(absPath, fullRegion("testdata/complexity.rs")) + if err != nil { + t.Fatal(err) + } + if len(calc) != len(score) { + t.Fatalf("counts differ: calc=%d score=%d", len(calc), len(score)) + } + for i := range calc { + if calc[i].Name != score[i].Name || calc[i].Complexity != score[i].Complexity { + t.Errorf("row %d differs: calc=%+v score=%+v", i, calc[i], score[i]) + } + } +} + +// TestLogicalOpChain asserts the operator-switch counter directly. A run +// of the same operator counts as 1; each switch to the other adds 1. +func TestLogicalOpChain(t *testing.T) { + cases := []struct { + src string + want int + }{ + {"fn f(a: bool, b: bool) -> bool { a && b }", 1}, + {"fn f(a: bool, b: bool, c: bool) -> bool { a && b && c }", 1}, + {"fn f(a: bool, b: bool, c: bool) -> bool { a && b || c }", 2}, + {"fn f(a: bool, b: bool, c: bool, d: bool) -> bool { a || b && c || d }", 3}, + {"fn f(a: i32) -> bool { a == 1 }", 0}, + } + for _, tc := range cases { + tree, err := parseBytes([]byte(tc.src)) + if err != nil { + t.Fatalf("parseBytes(%q): %v", tc.src, err) + } + target := findFirstLogical(tree.RootNode()) + got := conditionLogicalOps(target) + if got != tc.want { + t.Errorf("conditionLogicalOps(%q) = %d, want %d", tc.src, got, tc.want) + } + tree.Close() + } +} + +// findFirstLogical returns the outermost binary_expression whose operator +// is && or || — i.e. the root of the logical chain in the source. If no +// such chain is present, returns nil so callers can still exercise the +// "no logical ops" branch of conditionLogicalOps. +func findFirstLogical(root *sitter.Node) *sitter.Node { + var hit *sitter.Node + walk(root, func(n *sitter.Node) bool { + if hit != nil { + return false + } + if n.Type() != "binary_expression" { + return true + } + op := n.ChildByFieldName("operator") + if op == nil { + return true + } + if op.Type() == "&&" || op.Type() == "||" { + hit = n + return false + } + return true + }) + return hit +} diff --git a/internal/lang/rustanalyzer/testdata/complexity.rs b/internal/lang/rustanalyzer/testdata/complexity.rs new file mode 100644 index 0000000..a76d7df --- /dev/null +++ b/internal/lang/rustanalyzer/testdata/complexity.rs @@ -0,0 +1,59 @@ +// Fixture for the cognitive-complexity scorer. Each function below has a +// documented expected score so the test can assert precise numbers. + +// Empty function: no control flow, score 0. +fn empty() {} + +// Single if: +1 base, 0 nesting, 0 logical. +fn one_if(x: i32) -> i32 { + if x > 0 { + 1 + } else { + 0 + } +} + +// match with 3 arms, 2 guarded: +1 for match, +2 for guarded arms. +fn guarded(x: i32) -> i32 { + match x { + n if n > 0 => 1, + n if n < 0 => -1, + _ => 0, + } +} + +// Nested if inside for: for = +1, nested if = +1 base + 1 nesting = +2. +// Total = 3. +fn nested(xs: &[i32]) -> i32 { + let mut n = 0; + for x in xs { + if *x > 0 { + n += 1; + } + } + n +} + +// Logical chain: if +1, &&/|| switch counted. "a && b && c" is a single +// run = +1; "a && b || c" is two runs = +2. This fn has "a && b || c": +// base if = +1, logical = +2, total = 3. +fn logical(a: bool, b: bool, c: bool) -> bool { + if a && b || c { + true + } else { + false + } +} + +// unsafe block should NOT count; `?` should NOT count. This fn has: +// one if = +1, one ? = +0, one unsafe = +0. Total = 1. +fn unsafe_and_try(maybe: Option) -> Result { + let v = maybe.ok_or(())?; + if v > 0 { + return Ok(v); + } + unsafe { + let _p: *const i32 = std::ptr::null(); + } + Ok(0) +} From 281d9e1ad9d41dda83899064fe84f230dd8c5f53 Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 15:13:35 -0400 Subject: [PATCH 12/38] =?UTF-8?q?feat(rust):=20C4=20=E2=80=94=20ImportReso?= =?UTF-8?q?lver=20reads=20Cargo.toml=20and=20scans=20use/mod=20edges?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DetectModulePath parses [package] name from Cargo.toml via a line-based scanner — no TOML dependency needed since we only pull two tokens. ScanPackageImports walks every .rs file in the package, looks for `use_declaration` and `mod_item` nodes via tree-sitter, and classifies each as internal iff the leading segment is `crate`, `self`, or `super`. The resolver maps each internal path to a directory-level graph node (e.g. `crate::foo::bar::Baz` -> `src/foo/bar`), matching the Go analyzer's directory-granularity edges. External crates and std imports are dropped. Tests cover crate-root detection, relative-path resolution (super/self/crate), and end-to-end scanning on a fixture crate with mixed internal + external imports. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/lang/rustanalyzer/deps_test.go | 179 ++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 internal/lang/rustanalyzer/deps_test.go diff --git a/internal/lang/rustanalyzer/deps_test.go b/internal/lang/rustanalyzer/deps_test.go new file mode 100644 index 0000000..5089adc --- /dev/null +++ b/internal/lang/rustanalyzer/deps_test.go @@ -0,0 +1,179 @@ +package rustanalyzer + +import ( + "os" + "path/filepath" + "testing" +) + +func TestParseCargoPackageName(t *testing.T) { + cases := []struct { + src string + want string + }{ + { + src: ` +[package] +name = "diffguard-rust-fixture" +version = "0.1.0" +`, + want: "diffguard-rust-fixture", + }, + { + src: ` +[package] +name="foo" +`, + want: "foo", + }, + { + // Nested table: name under [dependencies] must NOT match. + src: ` +[dependencies] +name = "other" + +[package] +name = "real-pkg" +`, + want: "real-pkg", + }, + { + src: `[workspace]\nmembers = []`, + want: "", + }, + } + for _, tc := range cases { + got := parseCargoPackageName(tc.src) + if got != tc.want { + t.Errorf("parseCargoPackageName got %q, want %q", got, tc.want) + } + } +} + +func TestDetectModulePath(t *testing.T) { + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "Cargo.toml"), []byte(` +[package] +name = "my-crate" +version = "0.1.0" +`), 0644); err != nil { + t.Fatal(err) + } + got, err := depsImpl{}.DetectModulePath(dir) + if err != nil { + t.Fatal(err) + } + if got != "my-crate" { + t.Errorf("DetectModulePath = %q, want my-crate", got) + } +} + +func TestDetectModulePath_Missing(t *testing.T) { + dir := t.TempDir() + _, err := depsImpl{}.DetectModulePath(dir) + if err == nil { + t.Error("expected error for missing Cargo.toml") + } +} + +// TestScanPackageImports_InternalVsExternal asserts that `use crate::...` +// and `use super::...` produce internal edges while external crates and +// std imports are filtered out. +func TestScanPackageImports_InternalVsExternal(t *testing.T) { + root := t.TempDir() + + // Layout: + // Cargo.toml + // src/ + // lib.rs -- `use crate::foo::bar::Baz;` + `use std::fmt;` + // foo/ + // mod.rs + // bar.rs + // src/util/mod.rs -- `use super::foo::Helper;` + must := func(p, content string) { + full := filepath.Join(root, p) + if err := os.MkdirAll(filepath.Dir(full), 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(full, []byte(content), 0644); err != nil { + t.Fatal(err) + } + } + must("Cargo.toml", ` +[package] +name = "demo" +`) + must("src/lib.rs", ` +use crate::foo::bar::Baz; +use std::fmt; +mod foo; +mod util; +`) + must("src/foo/mod.rs", ` +pub mod bar; +`) + must("src/foo/bar.rs", ` +pub struct Baz; +`) + must("src/util/mod.rs", ` +use super::foo::Helper; +`) + + // Scan src/ — should find the `use crate::foo::bar` edge (-> src/foo/bar) + // and `mod foo;` (-> src/foo) and `mod util;` (-> src/util). External + // std import must NOT create an edge. + edges := depsImpl{}.ScanPackageImports(root, "src", "demo") + if edges == nil { + t.Fatal("expected non-nil edges for src") + } + srcEdges := edges["src"] + if srcEdges == nil { + t.Fatalf("expected edges keyed by 'src', got %v", edges) + } + // Expected internal edges (directory nodes): + expectedInternal := []string{ + "src/foo/bar", // crate::foo::bar + "src/foo", // mod foo; + "src/util", // mod util; + } + for _, want := range expectedInternal { + if !srcEdges[want] { + t.Errorf("missing edge to %q in %v", want, srcEdges) + } + } + + // Nothing external should sneak in. + for k := range srcEdges { + if k == "std/fmt" || k == "std" { + t.Errorf("external std edge leaked: %q", k) + } + } +} + +// TestScanPackageImports_SuperResolution directly asserts the resolver on +// a "super::" use to keep the relative-path arithmetic honest in isolation. +func TestScanPackageImports_SuperResolution(t *testing.T) { + // super:: in pkgDir=src/util resolves to src/foo for `super::foo::X`. + got := resolveInternalPath([]string{"super", "foo", "Bar"}, "src/util") + want := "src/foo" + if got != want { + t.Errorf("resolveInternalPath(super::foo::Bar in src/util) = %q, want %q", got, want) + } + // self:: in pkgDir=src resolves to src for `self::foo::X`. + got = resolveInternalPath([]string{"self", "foo", "Bar"}, "src") + want = "src/foo" + if got != want { + t.Errorf("resolveInternalPath(self::foo::Bar in src) = %q, want %q", got, want) + } + // crate::x::y::Z always resolves to src/x/y regardless of pkgDir. + got = resolveInternalPath([]string{"crate", "x", "y", "Z"}, "anywhere") + want = "src/x/y" + if got != want { + t.Errorf("resolveInternalPath(crate::x::y::Z) = %q, want %q", got, want) + } + // External roots return "". + got = resolveInternalPath([]string{"std", "fmt", "Display"}, "src") + if got != "" { + t.Errorf("resolveInternalPath(std::fmt::Display) = %q, want empty", got) + } +} From aad93c4df88df4f8292bebd3e04258e243e29c8b Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 15:14:03 -0400 Subject: [PATCH 13/38] =?UTF-8?q?feat(rust):=20C5=20=E2=80=94=20Annotation?= =?UTF-8?q?Scanner=20for=20mutator-disable=20annotations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scans line_comment and block_comment tokens for `mutator-disable-next- line` (suppresses the following source line) and `mutator-disable-func` (suppresses every line of the enclosing function, including the signature). Function ranges are sourced from the same function_item walk used by the FunctionExtractor so `mutator-disable-func` can attach to comments that live inside the function body OR directly precede it (one blank line tolerated, matching the Go analyzer's behavior). Tests cover next-line annotations, func-wide annotations from both preceding and internal positions, and the negative case of ordinary comments that must not toggle anything. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../rustanalyzer/mutation_annotate_test.go | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 internal/lang/rustanalyzer/mutation_annotate_test.go diff --git a/internal/lang/rustanalyzer/mutation_annotate_test.go b/internal/lang/rustanalyzer/mutation_annotate_test.go new file mode 100644 index 0000000..48062c3 --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_annotate_test.go @@ -0,0 +1,116 @@ +package rustanalyzer + +import ( + "path/filepath" + "testing" +) + +// TestScanAnnotations_NextLine writes a fixture with a mutator-disable- +// next-line comment and confirms the following source line is disabled. +func TestScanAnnotations_NextLine(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + src := []byte(`fn f(x: i32) -> i32 { + // mutator-disable-next-line + if x > 0 { 1 } else { 0 } +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + // Line 3 (the `if` line) should be disabled. + if !disabled[3] { + t.Errorf("expected line 3 disabled, got %v", disabled) + } + if disabled[4] { + t.Errorf("line 4 should not be disabled (unrelated), got %v", disabled) + } +} + +// TestScanAnnotations_FuncWide asserts that `mutator-disable-func` +// marks every line of the enclosing function — including the signature +// line. +func TestScanAnnotations_FuncWide(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + src := []byte(`// mutator-disable-func +fn top(x: i32) -> i32 { + x + 1 +} + +fn other(x: i32) -> i32 { + x * 2 +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + // The `top` function spans lines 2-4. All three must be disabled. + for _, line := range []int{2, 3, 4} { + if !disabled[line] { + t.Errorf("expected line %d disabled in top, got %v", line, disabled) + } + } + // The `other` function (lines 6-8) must not be touched. + for _, line := range []int{6, 7, 8} { + if disabled[line] { + t.Errorf("line %d in other should not be disabled, got %v", line, disabled) + } + } +} + +// TestScanAnnotations_UnrelatedComments is a negative control: ordinary +// comments must not toggle anything. +func TestScanAnnotations_UnrelatedComments(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + src := []byte(`// just a regular comment +fn f(x: i32) -> i32 { + // another regular comment + x +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + if len(disabled) != 0 { + t.Errorf("expected empty disabled map, got %v", disabled) + } +} + +// TestScanAnnotations_FuncInsideComment is a coverage test for the case +// where the disable-func comment lives inside the function body rather +// than preceding it. The Go analyzer accepts both positions. +func TestScanAnnotations_FuncInsideComment(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + src := []byte(`fn only(x: i32) -> i32 { + // mutator-disable-func + x + 1 +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + for _, line := range []int{1, 2, 3, 4} { + if !disabled[line] { + t.Errorf("expected line %d disabled, got %v", line, disabled) + } + } +} From e7fdf4be874e7c149976571457d045f3fe7c091c Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 15:14:58 -0400 Subject: [PATCH 14/38] =?UTF-8?q?feat(rust):=20C6=20=E2=80=94=20MutantGene?= =?UTF-8?q?rator=20emits=20canonical=20+=20Rust-specific=20operators?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements all canonical operators (conditional_boundary, negate_conditional, math_operator, return_value, boolean_substitution, branch_removal, statement_deletion) plus the three Rust-specific ones: * `unwrap_removal` — strips `.unwrap()` / `.expect(...)` from the receiver of a call (Tier 1). * `some_to_none` — flips `return Some(x)` to `return None` (Tier 1). * `question_mark_removal` — strips the trailing `?` from a try_expression (Tier 2). `incdec` is deliberately absent because Rust has no `++`/`--` operators. Mutants are filtered to changed regions and disabled-line maps, then sorted by (line, operator, description) so repeated runs produce byte-identical output — a critical property for the exit-code gate. Unit tests exercise every operator on minimal source snippets plus the determinism and filtering paths. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../rustanalyzer/mutation_generate_test.go | 234 ++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 internal/lang/rustanalyzer/mutation_generate_test.go diff --git a/internal/lang/rustanalyzer/mutation_generate_test.go b/internal/lang/rustanalyzer/mutation_generate_test.go new file mode 100644 index 0000000..3985aee --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_generate_test.go @@ -0,0 +1,234 @@ +package rustanalyzer + +import ( + "math" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// writeAndGenerate is a small harness: write `src` to a temp .rs file, +// generate mutants over the entire file, and return them. +func writeAndGenerate(t *testing.T, src string, disabled map[int]bool) []lang.MutantSite { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + fc := diff.FileChange{ + Path: "a.rs", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: math.MaxInt32}}, + } + mutants, err := mutantGeneratorImpl{}.GenerateMutants(path, fc, disabled) + if err != nil { + t.Fatal(err) + } + return mutants +} + +// collectOps returns the sorted set of operator names from a mutant list. +func collectOps(mutants []lang.MutantSite) map[string]int { + m := map[string]int{} + for _, x := range mutants { + m[x.Operator]++ + } + return m +} + +func TestGenerate_BinaryOps(t *testing.T) { + src := `fn f(x: i32) -> bool { + x > 0 +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + if ops["conditional_boundary"] == 0 { + t.Errorf("expected conditional_boundary mutant, got %v", ops) + } +} + +func TestGenerate_EqualityAndMath(t *testing.T) { + src := `fn g(a: i32, b: i32) -> bool { + a == b +} + +fn h(a: i32, b: i32) -> i32 { + a + b +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + if ops["negate_conditional"] == 0 { + t.Errorf("expected negate_conditional for ==, got %v", ops) + } + if ops["math_operator"] == 0 { + t.Errorf("expected math_operator for +, got %v", ops) + } +} + +func TestGenerate_BooleanLiteral(t *testing.T) { + src := `fn g() -> bool { true } +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["boolean_substitution"] == 0 { + t.Errorf("expected boolean_substitution, got %v", collectOps(m)) + } +} + +func TestGenerate_ReturnValue(t *testing.T) { + src := `fn g() -> i32 { + return 42; +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["return_value"] == 0 { + t.Errorf("expected return_value mutant, got %v", collectOps(m)) + } +} + +func TestGenerate_SomeToNone(t *testing.T) { + src := `fn g(x: i32) -> Option { + return Some(x); +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + if ops["some_to_none"] == 0 { + t.Errorf("expected some_to_none mutant, got %v", ops) + } + // The generator also emits a generic return_value on the same line — + // that's expected. + if ops["return_value"] == 0 { + t.Errorf("expected return_value companion, got %v", ops) + } +} + +func TestGenerate_UnwrapRemoval(t *testing.T) { + src := `fn g(x: Option) -> i32 { + x.unwrap() +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["unwrap_removal"] == 0 { + t.Errorf("expected unwrap_removal mutant, got %v", collectOps(m)) + } +} + +func TestGenerate_ExpectBecomesUnwrapRemoval(t *testing.T) { + src := `fn g(x: Option) -> i32 { + x.expect("boom") +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["unwrap_removal"] == 0 { + t.Errorf("expected unwrap_removal mutant for .expect, got %v", collectOps(m)) + } +} + +func TestGenerate_QuestionMarkRemoval(t *testing.T) { + src := `fn g(x: Result) -> Result { + let v = x?; + Ok(v) +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["question_mark_removal"] == 0 { + t.Errorf("expected question_mark_removal mutant, got %v", collectOps(m)) + } +} + +func TestGenerate_BranchRemovalAndStatementDeletion(t *testing.T) { + // Uses a plain function call (not a macro) for the statement-deletion + // case. Tree-sitter models `println!(...)` as a macro_invocation, so + // we'd miss it; bare `side_effect()` is parsed as a call_expression + // wrapped in an expression_statement, which is what the generator + // looks for. + src := `fn side_effect() {} + +fn g(x: i32) { + if x > 0 { + side_effect(); + } + side_effect(); +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + if ops["branch_removal"] == 0 { + t.Errorf("expected branch_removal, got %v", ops) + } + if ops["statement_deletion"] == 0 { + t.Errorf("expected statement_deletion for bare call, got %v", ops) + } +} + +// TestGenerate_RespectsChangedRegion asserts out-of-region mutants are +// dropped. +func TestGenerate_RespectsChangedRegion(t *testing.T) { + src := `fn in_region(x: i32) -> bool { x > 0 } +fn out_of_region(x: i32) -> bool { x > 0 } +` + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + // Region covers only line 1. Line 2's binary_expression should be dropped. + fc := diff.FileChange{ + Path: "a.rs", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 1}}, + } + mutants, err := mutantGeneratorImpl{}.GenerateMutants(path, fc, nil) + if err != nil { + t.Fatal(err) + } + for _, m := range mutants { + if m.Line != 1 { + t.Errorf("got out-of-region mutant at line %d: %+v", m.Line, m) + } + } +} + +// TestGenerate_RespectsDisabledLines asserts disabledLines suppress +// mutants on those lines. +func TestGenerate_RespectsDisabledLines(t *testing.T) { + src := `fn g(a: i32, b: i32) -> bool { + a > b +} +` + disabled := map[int]bool{2: true} + m := writeAndGenerate(t, src, disabled) + for _, x := range m { + if x.Line == 2 { + t.Errorf("mutant on disabled line 2: %+v", x) + } + } +} + +// TestGenerate_Deterministic asserts repeated calls produce byte-identical +// results. Stable ordering is a critical property for the exit-code gate. +func TestGenerate_Deterministic(t *testing.T) { + src := `fn g(a: i32, b: i32) -> bool { + a > b && b < 10 +} +` + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + fc := diff.FileChange{Path: "a.rs", Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}} + first, _ := mutantGeneratorImpl{}.GenerateMutants(path, fc, nil) + second, _ := mutantGeneratorImpl{}.GenerateMutants(path, fc, nil) + if len(first) != len(second) { + t.Fatalf("lengths differ: %d vs %d", len(first), len(second)) + } + for i := range first { + if first[i] != second[i] { + t.Errorf("row %d differs: %+v vs %+v", i, first[i], second[i]) + } + } +} From 2046c0f677c05227b641ed962996b8982e281096 Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 15:15:46 -0400 Subject: [PATCH 15/38] =?UTF-8?q?feat(rust):=20C7=20=E2=80=94=20MutantAppl?= =?UTF-8?q?ier=20does=20text-based=20edits=20with=20re-parse=20gate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Uses tree-sitter byte offsets to do surgical text replacements — simpler than AST rewriting and preserves whitespace + formatting exactly. Each operator has a dedicated helper: * binary: swaps the operator token within a binary_expression. * bool: flips true <-> false. * return_value: substitutes Default::default() for the return expression. * some_to_none: rewrites `return Some(x)` to `return None`. * branch_removal: empties the consequence block of an if_expression. * statement_deletion: replaces a call statement with `();`. * unwrap_removal: drops `.unwrap()` / `.expect(...)` from a call. * question_mark_removal: strips the trailing `?` from a try_expression. After every successful edit we re-parse with tree-sitter and check for HasError on the root; corrupt mutants return nil so the test runner never exercises invalid source. Unknown operators and line-mismatches also return nil cleanly. Tests cover each operator's success path plus the two nil-return paths (unknown op, site mismatch) and direct coverage of the re-parse gate. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../lang/rustanalyzer/mutation_apply_test.go | 241 ++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 internal/lang/rustanalyzer/mutation_apply_test.go diff --git a/internal/lang/rustanalyzer/mutation_apply_test.go b/internal/lang/rustanalyzer/mutation_apply_test.go new file mode 100644 index 0000000..a2d9915 --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_apply_test.go @@ -0,0 +1,241 @@ +package rustanalyzer + +import ( + "path/filepath" + "strings" + "testing" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// applyAt writes src to a temp file and invokes the applier for `site`. +// Returns the mutated bytes (or nil if the applier skipped the site). +func applyAt(t *testing.T, src string, site lang.MutantSite) []byte { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + out, err := mutantApplierImpl{}.ApplyMutation(path, site) + if err != nil { + t.Fatal(err) + } + return out +} + +func TestApply_BinaryOperator(t *testing.T) { + src := `fn f(x: i32) -> bool { + x > 0 +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 2, + Operator: "conditional_boundary", + Description: "> -> >=", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "x >= 0") { + t.Errorf("expected 'x >= 0' in output, got:\n%s", out) + } +} + +func TestApply_BooleanFlip(t *testing.T) { + src := `fn f() -> bool { true } +` + site := lang.MutantSite{ + File: "a.rs", + Line: 1, + Operator: "boolean_substitution", + Description: "true -> false", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "false") { + t.Errorf("expected 'false' in output, got:\n%s", out) + } + if strings.Contains(string(out), "true") { + t.Errorf("'true' should have been replaced, got:\n%s", out) + } +} + +func TestApply_ReturnValueToDefault(t *testing.T) { + src := `fn f() -> i32 { + return 42; +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 2, + Operator: "return_value", + Description: "replace return value with Default::default()", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "Default::default()") { + t.Errorf("expected Default::default(), got:\n%s", out) + } +} + +func TestApply_SomeToNone(t *testing.T) { + src := `fn g(x: i32) -> Option { + return Some(x); +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 2, + Operator: "some_to_none", + Description: "Some(x) -> None", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "return None;") { + t.Errorf("expected 'return None;', got:\n%s", out) + } +} + +func TestApply_BranchRemoval(t *testing.T) { + src := `fn side() {} +fn f(x: i32) { + if x > 0 { + side(); + } +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 3, + Operator: "branch_removal", + Description: "remove if body", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + // The call inside the body should be gone. + if strings.Contains(string(out), "side();") && strings.Contains(string(out), "if x > 0") { + // The function-declaration body still contains `side()` statement; + // we're asserting the if-body is emptied. After branch removal the + // `side();` call inside the braces must not appear between the if + // braces. Parse and check the if body is empty (approximated via + // a substring match that fails only if the consequence body still + // has text). + if strings.Contains(string(out), "if x > 0 {\n side();") { + t.Errorf("if body not emptied, got:\n%s", out) + } + } +} + +func TestApply_StatementDeletion(t *testing.T) { + src := `fn side() {} +fn f() { + side(); +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 3, + Operator: "statement_deletion", + Description: "remove call statement", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "();") { + t.Errorf("expected statement replaced with '();', got:\n%s", out) + } +} + +func TestApply_UnwrapRemoval(t *testing.T) { + src := `fn g(x: Option) -> i32 { + x.unwrap() +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 2, + Operator: "unwrap_removal", + Description: "strip .unwrap()", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if strings.Contains(string(out), "unwrap") { + t.Errorf(".unwrap() not stripped, got:\n%s", out) + } +} + +func TestApply_QuestionMarkRemoval(t *testing.T) { + src := `fn g(x: Result) -> Result { + let v = x?; + Ok(v) +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 2, + Operator: "question_mark_removal", + Description: "strip trailing ?", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if strings.Contains(string(out), "?;") { + t.Errorf("trailing ? not stripped, got:\n%s", out) + } +} + +// TestApply_ReparseRejectsCorrupt asserts that when the applier produces +// source that fails to tree-sitter parse (via a synthetic "apply every +// operator that doesn't exist" scenario), the applier returns nil. +// +// We exercise this via an operator the applier doesn't know — result is +// nil bytes, not a corrupt output. +func TestApply_UnknownOperatorReturnsNil(t *testing.T) { + src := `fn f() {} +` + site := lang.MutantSite{Line: 1, Operator: "nonexistent_op"} + out := applyAt(t, src, site) + if out != nil { + t.Errorf("expected nil for unknown operator, got:\n%s", out) + } +} + +// TestApply_SiteMismatchReturnsNil asserts a mutant whose target line has +// no matching node is a silent no-op (nil bytes, no error). +func TestApply_SiteMismatchReturnsNil(t *testing.T) { + src := `fn f() -> i32 { 42 } +` + // boolean_substitution on a line that has no boolean literal. + site := lang.MutantSite{Line: 1, Operator: "boolean_substitution", Description: "true -> false"} + out := applyAt(t, src, site) + if out != nil { + t.Errorf("expected nil for site with no matching node, got:\n%s", out) + } +} + +// TestIsValidRust exercises the re-parse gate directly. +func TestIsValidRust(t *testing.T) { + good := []byte(`fn f() -> i32 { 42 }`) + bad := []byte(`fn f() -> i32 { 42 `) // missing brace + if !isValidRust(good) { + t.Error("well-formed Rust reported invalid") + } + if isValidRust(bad) { + t.Error("malformed Rust reported valid") + } +} From 8397004bb15418a9b701273d7810515e694e8151 Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Thu, 16 Apr 2026 15:19:18 -0400 Subject: [PATCH 16/38] =?UTF-8?q?feat(rust):=20C8=20=E2=80=94=20TestRunner?= =?UTF-8?q?=20via=20cargo=20test=20with=20temp-copy=20isolation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Temp-copy strategy per MULTI_LANGUAGE_SUPPORT.md §Mutation isolation: 1. Acquire a per-file mutex so concurrent mutants on the same file serialize; different files run in parallel. 2. Backup original bytes in memory. 3. Write mutant over original in place. 4. exec cargo test with context.WithTimeout and CARGO_INCREMENTAL=0. 5. Restore original bytes via defer — always, even on panic or unexpected process failure. TestPattern is passed as a positional filter (`cargo test `). Timeouts promote to a killed verdict (the mutant made tests too slow to finish, which is itself a test-gap signal). Tests simulate the full kill / survive / timeout / process-failure matrix with /bin/sh scripts so the suite stays hermetic — no actual cargo or Rust toolchain required. `go test -race -count=3` passes, which is the critical assurance for the per-file lock. Also register the Rust-specific operators in tiers.go unit tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/lang/rustanalyzer/testrunner.go | 39 --- internal/lang/rustanalyzer/testrunner_test.go | 261 ++++++++++++++++++ internal/mutation/tiers_test.go | 8 + 3 files changed, 269 insertions(+), 39 deletions(-) create mode 100644 internal/lang/rustanalyzer/testrunner_test.go diff --git a/internal/lang/rustanalyzer/testrunner.go b/internal/lang/rustanalyzer/testrunner.go index a530e59..5b4af13 100644 --- a/internal/lang/rustanalyzer/testrunner.go +++ b/internal/lang/rustanalyzer/testrunner.go @@ -6,7 +6,6 @@ import ( "fmt" "os" "os/exec" - "path/filepath" "sync" "github.com/0xPolygon/diffguard/internal/lang" @@ -146,41 +145,3 @@ func cargoTestArgs(cfg lang.TestRunConfig) []string { return r.buildArgs(cfg) } -// backupAndRestore is exposed for tests that want to verify the -// restore-on-panic guarantee without actually invoking cargo. -// -// It writes `mutantBytes` over `path`, runs `work`, and restores -// `originalBytes` via defer. Returns the original unmodified bytes so the -// caller can assert restoration. -// -//nolint:unused // used by testrunner_test.go -func backupAndRestore(path string, originalBytes, mutantBytes []byte, work func()) (restored []byte, err error) { - defer func() { - _ = os.WriteFile(path, originalBytes, 0644) - restored, err = os.ReadFile(path) - }() - if err := os.WriteFile(path, mutantBytes, 0644); err != nil { - return nil, err - } - work() - return nil, nil -} - -// AtomicCopy copies src to dst; used to build a file-level "backup" -// location if a caller prefers backing up to a sibling path rather than -// holding bytes in memory. We don't use this from RunTest (in-memory is -// cheap for source files) but leave it here for future runners that may -// need on-disk backups. -// -//nolint:unused -func AtomicCopy(src, dst string) error { - data, err := os.ReadFile(src) - if err != nil { - return err - } - tmp := filepath.Join(filepath.Dir(dst), ".diffguard-backup-tmp") - if err := os.WriteFile(tmp, data, 0644); err != nil { - return err - } - return os.Rename(tmp, dst) -} diff --git a/internal/lang/rustanalyzer/testrunner_test.go b/internal/lang/rustanalyzer/testrunner_test.go new file mode 100644 index 0000000..69d964a --- /dev/null +++ b/internal/lang/rustanalyzer/testrunner_test.go @@ -0,0 +1,261 @@ +package rustanalyzer + +import ( + "os" + "path/filepath" + "strings" + "sync" + "testing" + "time" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// fakeRunner returns a runner that invokes `/bin/sh -c