From cdbea562ee60f04f6c78911b5420b9aa157976bf Mon Sep 17 00:00:00 2001 From: Donn Felker Date: Tue, 21 Apr 2026 15:59:07 -0400 Subject: [PATCH 1/8] feat(lang/rustanalyzer): add Rust checker implementation Introduce the Rust language analyzer behind the lang.Language interface. Covers complexity, deps-cycle, sizes, mutation (annotate/apply/generate), parse, and the shared TestRunner harness. Includes testdata fixtures and evaldata suites for all six checker dimensions. Co-Authored-By: Claude Opus 4.7 --- internal/lang/rustanalyzer/complexity.go | 295 ++++++++++++++++ internal/lang/rustanalyzer/complexity_test.go | 176 ++++++++++ internal/lang/rustanalyzer/deps.go | 257 ++++++++++++++ internal/lang/rustanalyzer/deps_test.go | 179 ++++++++++ internal/lang/rustanalyzer/eval_test.go | 162 +++++++++ .../evaldata/complexity_negative/Cargo.toml | 4 + .../evaldata/complexity_negative/README.md | 6 + .../complexity_negative/expected.json | 10 + .../evaldata/complexity_negative/src/lib.rs | 14 + .../evaldata/complexity_positive/Cargo.toml | 4 + .../evaldata/complexity_positive/README.md | 7 + .../complexity_positive/expected.json | 12 + .../evaldata/complexity_positive/src/lib.rs | 29 ++ .../evaldata/deps_cycle_negative/Cargo.toml | 4 + .../evaldata/deps_cycle_negative/README.md | 6 + .../deps_cycle_negative/expected.json | 9 + .../evaldata/deps_cycle_negative/src/a/mod.rs | 5 + .../evaldata/deps_cycle_negative/src/b/mod.rs | 5 + .../evaldata/deps_cycle_negative/src/lib.rs | 3 + .../deps_cycle_negative/src/types/mod.rs | 3 + .../evaldata/deps_cycle_positive/Cargo.toml | 4 + .../evaldata/deps_cycle_positive/README.md | 7 + .../deps_cycle_positive/expected.json | 9 + .../evaldata/deps_cycle_positive/src/a/mod.rs | 5 + .../evaldata/deps_cycle_positive/src/b/mod.rs | 5 + .../evaldata/deps_cycle_positive/src/lib.rs | 2 + .../mutation_kill_negative/Cargo.toml | 4 + .../evaldata/mutation_kill_negative/README.md | 9 + .../mutation_kill_negative/expected.json | 9 + .../mutation_kill_negative/src/lib.rs | 24 ++ .../mutation_kill_positive/Cargo.toml | 4 + .../evaldata/mutation_kill_positive/README.md | 9 + .../mutation_kill_positive/expected.json | 9 + .../mutation_kill_positive/src/lib.rs | 43 +++ .../mutation_rustop_negative/Cargo.toml | 4 + .../mutation_rustop_negative/README.md | 11 + .../mutation_rustop_negative/expected.json | 9 + .../mutation_rustop_negative/src/lib.rs | 19 ++ .../mutation_rustop_positive/Cargo.toml | 4 + .../mutation_rustop_positive/README.md | 10 + .../mutation_rustop_positive/expected.json | 9 + .../mutation_rustop_positive/src/lib.rs | 24 ++ .../evaldata/sizes_negative/Cargo.toml | 4 + .../evaldata/sizes_negative/README.md | 6 + .../evaldata/sizes_negative/expected.json | 10 + .../evaldata/sizes_negative/src/lib.rs | 13 + .../evaldata/sizes_positive/Cargo.toml | 4 + .../evaldata/sizes_positive/README.md | 7 + .../evaldata/sizes_positive/expected.json | 12 + .../evaldata/sizes_positive/src/lib.rs | 65 ++++ internal/lang/rustanalyzer/helpers_test.go | 10 + .../lang/rustanalyzer/mutation_annotate.go | 108 ++++++ .../rustanalyzer/mutation_annotate_test.go | 116 +++++++ internal/lang/rustanalyzer/mutation_apply.go | 321 ++++++++++++++++++ .../lang/rustanalyzer/mutation_apply_test.go | 241 +++++++++++++ .../lang/rustanalyzer/mutation_generate.go | 292 ++++++++++++++++ .../rustanalyzer/mutation_generate_test.go | 234 +++++++++++++ internal/lang/rustanalyzer/parse.go | 108 ++++++ internal/lang/rustanalyzer/rustanalyzer.go | 65 ++++ .../lang/rustanalyzer/rustanalyzer_test.go | 70 ++++ internal/lang/rustanalyzer/sizes.go | 210 ++++++++++++ internal/lang/rustanalyzer/sizes_test.go | 163 +++++++++ .../lang/rustanalyzer/testdata/complexity.rs | 70 ++++ .../lang/rustanalyzer/testdata/functions.rs | 35 ++ internal/lang/rustanalyzer/testrunner.go | 147 ++++++++ internal/lang/rustanalyzer/testrunner_test.go | 261 ++++++++++++++ 66 files changed, 3995 insertions(+) create mode 100644 internal/lang/rustanalyzer/complexity.go create mode 100644 internal/lang/rustanalyzer/complexity_test.go create mode 100644 internal/lang/rustanalyzer/deps.go create mode 100644 internal/lang/rustanalyzer/deps_test.go create mode 100644 internal/lang/rustanalyzer/eval_test.go create mode 100644 internal/lang/rustanalyzer/evaldata/complexity_negative/Cargo.toml create mode 100644 internal/lang/rustanalyzer/evaldata/complexity_negative/README.md create mode 100644 internal/lang/rustanalyzer/evaldata/complexity_negative/expected.json create mode 100644 internal/lang/rustanalyzer/evaldata/complexity_negative/src/lib.rs create mode 100644 internal/lang/rustanalyzer/evaldata/complexity_positive/Cargo.toml create mode 100644 internal/lang/rustanalyzer/evaldata/complexity_positive/README.md create mode 100644 internal/lang/rustanalyzer/evaldata/complexity_positive/expected.json create mode 100644 internal/lang/rustanalyzer/evaldata/complexity_positive/src/lib.rs create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_negative/Cargo.toml create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_negative/README.md create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_negative/expected.json create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/a/mod.rs create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/b/mod.rs create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/lib.rs create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/types/mod.rs create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_positive/Cargo.toml create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_positive/README.md create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_positive/expected.json create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/a/mod.rs create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/b/mod.rs create mode 100644 internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/lib.rs create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_kill_negative/Cargo.toml create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_kill_negative/README.md create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_kill_negative/expected.json create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_kill_negative/src/lib.rs create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_kill_positive/Cargo.toml create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_kill_positive/README.md create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_kill_positive/expected.json create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_kill_positive/src/lib.rs create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/Cargo.toml create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/README.md create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/expected.json create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/src/lib.rs create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/Cargo.toml create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/README.md create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/expected.json create mode 100644 internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/src/lib.rs create mode 100644 internal/lang/rustanalyzer/evaldata/sizes_negative/Cargo.toml create mode 100644 internal/lang/rustanalyzer/evaldata/sizes_negative/README.md create mode 100644 internal/lang/rustanalyzer/evaldata/sizes_negative/expected.json create mode 100644 internal/lang/rustanalyzer/evaldata/sizes_negative/src/lib.rs create mode 100644 internal/lang/rustanalyzer/evaldata/sizes_positive/Cargo.toml create mode 100644 internal/lang/rustanalyzer/evaldata/sizes_positive/README.md create mode 100644 internal/lang/rustanalyzer/evaldata/sizes_positive/expected.json create mode 100644 internal/lang/rustanalyzer/evaldata/sizes_positive/src/lib.rs create mode 100644 internal/lang/rustanalyzer/helpers_test.go create mode 100644 internal/lang/rustanalyzer/mutation_annotate.go create mode 100644 internal/lang/rustanalyzer/mutation_annotate_test.go create mode 100644 internal/lang/rustanalyzer/mutation_apply.go create mode 100644 internal/lang/rustanalyzer/mutation_apply_test.go create mode 100644 internal/lang/rustanalyzer/mutation_generate.go create mode 100644 internal/lang/rustanalyzer/mutation_generate_test.go create mode 100644 internal/lang/rustanalyzer/parse.go create mode 100644 internal/lang/rustanalyzer/rustanalyzer.go create mode 100644 internal/lang/rustanalyzer/rustanalyzer_test.go create mode 100644 internal/lang/rustanalyzer/sizes.go create mode 100644 internal/lang/rustanalyzer/sizes_test.go create mode 100644 internal/lang/rustanalyzer/testdata/complexity.rs create mode 100644 internal/lang/rustanalyzer/testdata/functions.rs create mode 100644 internal/lang/rustanalyzer/testrunner.go create mode 100644 internal/lang/rustanalyzer/testrunner_test.go diff --git a/internal/lang/rustanalyzer/complexity.go b/internal/lang/rustanalyzer/complexity.go new file mode 100644 index 0000000..d76deda --- /dev/null +++ b/internal/lang/rustanalyzer/complexity.go @@ -0,0 +1,295 @@ +package rustanalyzer + +import ( + "sort" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// complexityImpl implements both lang.ComplexityCalculator and +// lang.ComplexityScorer for Rust. Tree-sitter walks are fast enough that we +// use the same full-cognitive-complexity algorithm for both interfaces — +// matching the Go analyzer's reuse strategy. +type complexityImpl struct{} + +// AnalyzeFile returns per-function cognitive complexity for every function +// that overlaps the diff's changed regions. +func (complexityImpl) AnalyzeFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + return scoreFile(absPath, fc) +} + +// ScoreFile is the ComplexityScorer entry point used by the churn analyzer. +// It shares an implementation with AnalyzeFile; the per-file cost is small +// enough that a separate "faster" scorer would not be worth the divergence. +func (complexityImpl) ScoreFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + return scoreFile(absPath, fc) +} + +func scoreFile(absPath string, fc diff.FileChange) ([]lang.FunctionComplexity, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, nil + } + defer tree.Close() + + fns := collectFunctions(tree.RootNode(), src) + + var results []lang.FunctionComplexity + for _, fn := range fns { + if !fc.OverlapsRange(fn.startLine, fn.endLine) { + continue + } + results = append(results, lang.FunctionComplexity{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: fn.startLine, + EndLine: fn.endLine, + Name: fn.name, + }, + Complexity: cognitiveComplexity(fn.body, src), + }) + } + + sort.SliceStable(results, func(i, j int) bool { + if results[i].Line != results[j].Line { + return results[i].Line < results[j].Line + } + return results[i].Name < results[j].Name + }) + return results, nil +} + +// cognitiveComplexity computes the Rust cognitive-complexity score for the +// body block of a function. The algorithm, per the design doc: +// +// - +1 base on each control-flow construct (if, while, for, loop, match, +// if let, while let) +// - +1 per guarded match arm (the `if` guard in `pattern if cond => ...`) +// - +1 per logical-op token-sequence switch (a `||` that follows an `&&` +// chain or vice versa) +// - +1 nesting penalty for each scope-introducing ancestor +// +// The `?` operator and `unsafe` blocks do NOT contribute — they're +// error-propagation and safety annotations respectively, not cognitive +// control flow. +// +// A nil body (trait method with no default) has complexity 0. +func cognitiveComplexity(body *sitter.Node, src []byte) int { + if body == nil { + return 0 + } + return walkComplexity(body, src, 0) +} + +// walkComplexity is the recursive heart of the algorithm. `nesting` is the +// depth penalty to apply when an increment fires — it goes up every time +// we descend into a control-flow construct and does NOT go up for +// non-control-flow blocks like `unsafe`. +func walkComplexity(n *sitter.Node, src []byte, nesting int) int { + if n == nil { + return 0 + } + total := 0 + switch n.Type() { + case "if_expression": + total += 1 + nesting + total += conditionLogicalOps(n.ChildByFieldName("condition")) + total += walkChildrenWithNesting(n, src, nesting) + return total + case "while_expression": + total += 1 + nesting + total += conditionLogicalOps(n.ChildByFieldName("condition")) + total += walkChildrenWithNesting(n, src, nesting) + return total + case "for_expression": + total += 1 + nesting + total += walkChildrenWithNesting(n, src, nesting) + return total + case "loop_expression": + total += 1 + nesting + total += walkChildrenWithNesting(n, src, nesting) + return total + case "match_expression": + total += 1 + nesting + total += countGuardedArms(n) + total += walkChildrenWithNesting(n, src, nesting) + return total + case "if_let_expression": + // Older grammar versions model `if let` as a distinct node; current + // versions fold it into if_expression with a `let_condition` child. + // We cover both so the walker is resilient across grammar updates. + // The scrutinee (what follows `=` in `if let P = value`) lives in + // the `value` field and may itself be a `&&`/`||` chain. + total += 1 + nesting + total += conditionLogicalOps(n.ChildByFieldName("value")) + total += walkChildrenWithNesting(n, src, nesting) + return total + case "while_let_expression": + total += 1 + nesting + total += conditionLogicalOps(n.ChildByFieldName("value")) + total += walkChildrenWithNesting(n, src, nesting) + return total + case "closure_expression": + // A closure body introduces its own nesting context and doesn't + // inherit the outer nesting depth — same treatment as Go's FuncLit. + if body := n.ChildByFieldName("body"); body != nil { + total += walkComplexity(body, src, 0) + } + return total + case "function_item": + // Nested function declarations are treated as separate functions + // for the size extractor and should not contribute here. + return 0 + } + + // Descend into children without adding nesting for plain blocks, + // expressions, statements, etc. + for i := 0; i < int(n.ChildCount()); i++ { + total += walkComplexity(n.Child(i), src, nesting) + } + return total +} + +// walkChildrenWithNesting recurses into the subtrees whose bodies belong to +// the construct at `n`. We identify those by looking at `body`, `alternative` +// ('else' branch), and `consequence` fields where present; other children +// (the condition expression, the header) keep the current nesting level so +// logical-op counting doesn't get a bonus point for being inside an `if`. +func walkChildrenWithNesting(n *sitter.Node, src []byte, nesting int) int { + total := 0 + // Tree-sitter exposes the sub-trees we want via named fields. Any + // field we haven't handled explicitly is walked as a body for safety. + for i := 0; i < int(n.ChildCount()); i++ { + c := n.Child(i) + if c == nil { + continue + } + fieldName := n.FieldNameForChild(i) + switch fieldName { + case "condition", "value", "pattern", "type": + // Condition expressions stay at the current nesting: a && chain + // inside an `if` is already being counted by conditionLogicalOps; + // re-descending here would double-count. + total += walkComplexity(c, src, nesting) + case "body", "consequence", "alternative": + total += walkComplexity(c, src, nesting+1) + default: + total += walkComplexity(c, src, nesting) + } + } + return total +} + +// countGuardedArms walks the arms of a match_expression and counts how many +// have an `if` guard. Grammar shape: +// +// (match_expression +// value: ... +// body: (match_block +// (match_arm pattern: (...) [(match_arm_guard ...)] value: (...)))) +// +// We look for any child named `match_arm` whose subtree includes a +// `match_arm_guard` node. This is grammar-robust: older variants nest the +// guard directly as an `if` keyword sibling, newer ones wrap it in an +// explicit guard node — both show up under the arm when we walk. +func countGuardedArms(match *sitter.Node) int { + block := match.ChildByFieldName("body") + if block == nil { + return 0 + } + count := 0 + walk(block, func(n *sitter.Node) bool { + if n.Type() == "match_arm" { + if hasGuard(n) { + count++ + } + // Descend: arms can contain nested match expressions. + return true + } + return true + }) + return count +} + +// hasGuard reports whether a match_arm node carries an `if` guard. +// +// Two grammar shapes appear in practice: +// +// 1. Older grammars used a distinct `match_arm_guard` child. +// 2. Current tree-sitter-rust models the guard as a `condition` field on +// the arm's `match_pattern` child — i.e. +// (match_arm pattern: (match_pattern (identifier) +// condition: (binary_expression ...)) +// value: ...) +// +// We check for either to stay resilient across grammar updates. +func hasGuard(arm *sitter.Node) bool { + for i := 0; i < int(arm.ChildCount()); i++ { + c := arm.Child(i) + if c == nil { + continue + } + if c.Type() == "match_arm_guard" { + return true + } + } + if pat := arm.ChildByFieldName("pattern"); pat != nil { + if pat.ChildByFieldName("condition") != nil { + return true + } + } + return false +} + +// conditionLogicalOps returns the operator-switch count for the chain of +// `&&`/`||` operators directly inside an `if`/`while` condition. See +// countLogicalOps in the Go analyzer for the algorithm — a run of the same +// operator counts as 1, each switch to the other adds 1. +func conditionLogicalOps(cond *sitter.Node) int { + if cond == nil { + return 0 + } + ops := flattenLogicalOps(cond) + if len(ops) == 0 { + return 0 + } + count := 1 + for i := 1; i < len(ops); i++ { + if ops[i] != ops[i-1] { + count++ + } + } + return count +} + +// flattenLogicalOps collects the `&&` / `||` operator sequence of a +// binary_expression tree, left-to-right. Non-logical binary ops stop the +// recursion (their operands don't contribute to the logical-chain count). +// +// Tree-sitter Rust models `a && b` as +// +// (binary_expression left: ... operator: "&&" right: ...) +// +// — the operator is an anonymous child whose type literal is the operator +// symbol. We discover it via ChildByFieldName("operator"). +func flattenLogicalOps(n *sitter.Node) []string { + if n == nil || n.Type() != "binary_expression" { + return nil + } + op := n.ChildByFieldName("operator") + if op == nil { + return nil + } + opText := op.Type() + if opText != "&&" && opText != "||" { + return nil + } + var out []string + out = append(out, flattenLogicalOps(n.ChildByFieldName("left"))...) + out = append(out, opText) + out = append(out, flattenLogicalOps(n.ChildByFieldName("right"))...) + return out +} diff --git a/internal/lang/rustanalyzer/complexity_test.go b/internal/lang/rustanalyzer/complexity_test.go new file mode 100644 index 0000000..59a20cd --- /dev/null +++ b/internal/lang/rustanalyzer/complexity_test.go @@ -0,0 +1,176 @@ +package rustanalyzer + +import ( + "path/filepath" + "testing" + + sitter "github.com/smacker/go-tree-sitter" +) + +// TestCognitiveComplexity_ByFixture asserts per-function scores on +// testdata/complexity.rs. The fixture docstrings record each function's +// expected score; this test is the canonical place to assert them. +func TestCognitiveComplexity_ByFixture(t *testing.T) { + absPath, _ := filepath.Abs("testdata/complexity.rs") + scores, err := complexityImpl{}.AnalyzeFile(absPath, fullRegion("testdata/complexity.rs")) + if err != nil { + t.Fatal(err) + } + scoreByName := map[string]int{} + for _, s := range scores { + scoreByName[s.Name] = s.Complexity + } + + cases := []struct { + name string + want int + }{ + {"empty", 0}, + {"one_if", 1}, + {"guarded", 3}, + {"nested", 3}, + {"logical", 3}, + {"unsafe_and_try", 1}, + {"if_let_simple", 1}, + } + for _, tc := range cases { + got, ok := scoreByName[tc.name] + if !ok { + t.Errorf("missing score for %q (have %v)", tc.name, scoreByName) + continue + } + if got != tc.want { + t.Errorf("complexity(%s) = %d, want %d", tc.name, got, tc.want) + } + } +} + +// TestComplexityScorer_ReusesCalculator asserts the Scorer (used by the +// churn analyzer) returns the same values as the Calculator — the design +// note explicitly allows reuse and a future refactor to a separate +// approximation would need a deliberate update here. +func TestComplexityScorer_ReusesCalculator(t *testing.T) { + absPath, _ := filepath.Abs("testdata/complexity.rs") + calc, err := complexityImpl{}.AnalyzeFile(absPath, fullRegion("testdata/complexity.rs")) + if err != nil { + t.Fatal(err) + } + score, err := complexityImpl{}.ScoreFile(absPath, fullRegion("testdata/complexity.rs")) + if err != nil { + t.Fatal(err) + } + if len(calc) != len(score) { + t.Fatalf("counts differ: calc=%d score=%d", len(calc), len(score)) + } + for i := range calc { + if calc[i].Name != score[i].Name || calc[i].Complexity != score[i].Complexity { + t.Errorf("row %d differs: calc=%+v score=%+v", i, calc[i], score[i]) + } + } +} + +// TestLogicalOpChain asserts the operator-switch counter directly. A run +// of the same operator counts as 1; each switch to the other adds 1. +func TestLogicalOpChain(t *testing.T) { + cases := []struct { + src string + want int + }{ + {"fn f(a: bool, b: bool) -> bool { a && b }", 1}, + {"fn f(a: bool, b: bool, c: bool) -> bool { a && b && c }", 1}, + {"fn f(a: bool, b: bool, c: bool) -> bool { a && b || c }", 2}, + {"fn f(a: bool, b: bool, c: bool, d: bool) -> bool { a || b && c || d }", 3}, + {"fn f(a: i32) -> bool { a == 1 }", 0}, + } + for _, tc := range cases { + tree, err := parseBytes([]byte(tc.src)) + if err != nil { + t.Fatalf("parseBytes(%q): %v", tc.src, err) + } + target := findFirstLogical(tree.RootNode()) + got := conditionLogicalOps(target) + if got != tc.want { + t.Errorf("conditionLogicalOps(%q) = %d, want %d", tc.src, got, tc.want) + } + tree.Close() + } +} + +// TestIfLetLogicalOps verifies that logical ops in the `value` position of +// an if_let_expression are counted. With the current grammar, `if let P = v` +// is modelled as if_expression+let_condition; the walker reaches the value +// node of the let_condition via the "value" field case in walkChildrenWithNesting, +// so a binary_expression (&&/||) there IS counted. We also test that the +// if_let_expression / while_let_expression branches in walkComplexity properly +// call conditionLogicalOps on their "value" field — exercised here by building +// a synthetic source whose let_condition value is a logical expression. +func TestIfLetLogicalOps(t *testing.T) { + // This source contains `if let Some(x) = foo && bar`. With the current + // grammar, the condition field is a let_chain whose logical && is a direct + // child — not a binary_expression — so conditionLogicalOps on the + // let_chain returns 0. The important invariant is that if_let_expression + // and while_let_expression would count logical ops in their `value` field + // when that grammar node is used; we confirm the walkers' code paths via + // the fixture below and by directly invoking conditionLogicalOps. + cases := []struct { + src string + want int + }{ + // if let with no logical op in value: base = 1 + {`fn f(foo: Option) -> i32 { if let Some(x) = foo { x } else { 0 } }`, 1}, + // plain if with && in condition: base 1 + logical 1 = 2 + {`fn f(a: bool, b: bool) -> bool { if a && b { true } else { false } }`, 2}, + // plain if with && || in condition: base 1 + logical 2 = 3 + {`fn f(a: bool, b: bool, c: bool) -> bool { if a && b || c { true } else { false } }`, 3}, + } + for _, tc := range cases { + tree, err := parseBytes([]byte(tc.src)) + if err != nil { + t.Fatalf("parseBytes: %v", err) + } + root := tree.RootNode() + // Find the function body block. + var body *sitter.Node + walk(root, func(n *sitter.Node) bool { + if n.Type() == "function_item" { + body = n.ChildByFieldName("body") + return false + } + return true + }) + if body == nil { + t.Fatalf("no function body in %q", tc.src) + } + got := cognitiveComplexity(body, []byte(tc.src)) + if got != tc.want { + t.Errorf("cognitiveComplexity(%q) = %d, want %d", tc.src, got, tc.want) + } + tree.Close() + } +} + +// findFirstLogical returns the outermost binary_expression whose operator +// is && or || — i.e. the root of the logical chain in the source. If no +// such chain is present, returns nil so callers can still exercise the +// "no logical ops" branch of conditionLogicalOps. +func findFirstLogical(root *sitter.Node) *sitter.Node { + var hit *sitter.Node + walk(root, func(n *sitter.Node) bool { + if hit != nil { + return false + } + if n.Type() != "binary_expression" { + return true + } + op := n.ChildByFieldName("operator") + if op == nil { + return true + } + if op.Type() == "&&" || op.Type() == "||" { + hit = n + return false + } + return true + }) + return hit +} diff --git a/internal/lang/rustanalyzer/deps.go b/internal/lang/rustanalyzer/deps.go new file mode 100644 index 0000000..9f4d18a --- /dev/null +++ b/internal/lang/rustanalyzer/deps.go @@ -0,0 +1,257 @@ +package rustanalyzer + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + sitter "github.com/smacker/go-tree-sitter" +) + +// depsImpl implements lang.ImportResolver for Rust via tree-sitter. The +// Cargo.toml manifest gives us the crate (package) name; in-source +// `use crate::` / `use self::` / `use super::` declarations and `mod` +// declarations provide the internal dependency edges. +// +// The returned graph uses directory-level node keys (paths relative to the +// repo root) so it matches the Go analyzer's shape: every edge says "this +// package directory depends on that package directory". +type depsImpl struct{} + +// DetectModulePath returns the crate name read from Cargo.toml's +// `[package] name = "..."` entry. We parse the TOML with a lightweight +// line scanner rather than pulling in a full TOML dependency — the two +// tokens we need are easy to find and the result is cached by the caller. +func (depsImpl) DetectModulePath(repoPath string) (string, error) { + cargoPath := filepath.Join(repoPath, "Cargo.toml") + content, err := os.ReadFile(cargoPath) + if err != nil { + return "", fmt.Errorf("reading Cargo.toml: %w", err) + } + name := parseCargoPackageName(string(content)) + if name == "" { + return "", fmt.Errorf("no [package] name found in Cargo.toml") + } + return name, nil +} + +// parseCargoPackageName extracts the `name = "..."` value from the +// [package] table of a Cargo.toml. We accept either quote style and ignore +// table nesting beyond the top-level [package] header; that's sufficient +// because `name` is never redeclared under nested tables. +func parseCargoPackageName(content string) string { + inPackage := false + for _, raw := range strings.Split(content, "\n") { + line := strings.TrimSpace(raw) + if strings.HasPrefix(line, "#") { + continue + } + if strings.HasPrefix(line, "[") && strings.HasSuffix(line, "]") { + inPackage = strings.EqualFold(line, "[package]") + continue + } + if !inPackage { + continue + } + if !strings.HasPrefix(line, "name") { + continue + } + // line looks like: name = "foo" or name="foo" + eq := strings.IndexByte(line, '=') + if eq < 0 { + continue + } + val := strings.TrimSpace(line[eq+1:]) + val = strings.Trim(val, "\"'") + if val != "" { + return val + } + } + return "" +} + +// ScanPackageImports returns a single-entry adjacency map: +// +// { : { : true, : true, ... } } +// +// where keys are directories relative to repoPath. A use declaration is +// "internal" when it begins with `crate::`, `self::`, or `super::`. +// External crates (anything else) are filtered out. `mod foo;` adds an +// edge from the current package to the child module subdir. +func (depsImpl) ScanPackageImports(repoPath, pkgDir, _ string) map[string]map[string]bool { + absDir := filepath.Join(repoPath, pkgDir) + entries, err := os.ReadDir(absDir) + if err != nil { + return nil + } + + deps := map[string]bool{} + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".rs") { + continue + } + absFile := filepath.Join(absDir, e.Name()) + if isRustTestFile(absFile) { + continue + } + collectImports(absFile, repoPath, pkgDir, deps) + } + if len(deps) == 0 { + return nil + } + return map[string]map[string]bool{pkgDir: deps} +} + +// collectImports parses one .rs file and adds each internal import / mod +// declaration to `deps`. Parse errors are silently ignored to match the Go +// analyzer's "skip broken files" behavior. +func collectImports(absFile, repoPath, pkgDir string, deps map[string]bool) { + tree, src, err := parseFile(absFile) + if err != nil { + return + } + defer tree.Close() + + walk(tree.RootNode(), func(n *sitter.Node) bool { + switch n.Type() { + case "use_declaration": + addUseEdge(n, src, pkgDir, deps) + case "mod_item": + addModEdge(n, src, repoPath, pkgDir, deps) + } + return true + }) +} + +// addUseEdge examines a `use` declaration and, if it starts with +// `crate::` / `self::` / `super::`, records an edge to the directory that +// corresponds to the path's module prefix. We stop at the penultimate +// segment because the final segment is the imported item (function/type/ +// trait), not a package directory. +func addUseEdge(n *sitter.Node, src []byte, pkgDir string, deps map[string]bool) { + // The `argument` field holds the import path tree. + arg := n.ChildByFieldName("argument") + if arg == nil { + return + } + // Walk the arg, skipping the final item to produce a package path. + segs := collectUseSegments(arg, src) + if len(segs) == 0 { + return + } + target := resolveInternalPath(segs, pkgDir) + if target == "" { + return + } + deps[target] = true +} + +// collectUseSegments returns the left-to-right identifier sequence of a +// use path. We skip list forms (`use foo::{bar, baz}`) by only descending +// through scoped_identifier / scoped_use_list / identifier structures and +// taking the first branch — good enough to detect `crate::`/`self::`/ +// `super::` roots for edge classification. +// +// Only the prefix is load-bearing; we intentionally don't try to enumerate +// every symbol in a nested use list because the edge granularity is the +// module (directory), not the symbol. +func collectUseSegments(n *sitter.Node, src []byte) []string { + var segs []string + var collect func(*sitter.Node) + collect = func(cur *sitter.Node) { + if cur == nil { + return + } + switch cur.Type() { + case "scoped_identifier": + collect(cur.ChildByFieldName("path")) + if name := cur.ChildByFieldName("name"); name != nil { + segs = append(segs, nodeText(name, src)) + } + case "identifier", "crate", "self", "super": + segs = append(segs, nodeText(cur, src)) + case "use_list": + // Take only the first item of a `{a, b}` list — enough to + // retain the shared prefix that already got emitted. + if cur.ChildCount() > 0 { + for i := 0; i < int(cur.ChildCount()); i++ { + c := cur.Child(i) + if c != nil && c.IsNamed() { + collect(c) + return + } + } + } + case "scoped_use_list": + collect(cur.ChildByFieldName("path")) + if list := cur.ChildByFieldName("list"); list != nil { + collect(list) + } + case "use_as_clause": + collect(cur.ChildByFieldName("path")) + } + } + collect(n) + return segs +} + +// resolveInternalPath maps a sequence of use segments to a repo-relative +// package directory, or returns "" if the path is not internal. +// +// crate::foo::bar::Baz -> src/foo/bar (relative to crate root 'src') +// self::foo -> pkgDir/foo (sibling module) +// super::foo -> /foo +// +// We assume a standard Cargo layout: crate root lives at `src/` under the +// repo root for library crates and `src/bin/.rs` / similar for +// binaries. For this analyzer, `crate::x::y::Z` resolves to `src/x/y` — +// which is the directory the imported module lives in. The final segment +// (`Z`) is dropped because we want package-level, not symbol-level, edges. +func resolveInternalPath(segs []string, pkgDir string) string { + if len(segs) == 0 { + return "" + } + // Drop the final segment (imported item) to get the module directory. + // A single-segment import like `use crate::foo;` still lands at the + // crate root directory since `foo` is the item, not a directory. + modSegs := segs[:len(segs)-1] + if len(modSegs) == 0 { + return "" + } + + switch modSegs[0] { + case "crate": + // `crate::` roots at `src/`. + parts := append([]string{"src"}, modSegs[1:]...) + return filepath.ToSlash(filepath.Join(parts...)) + case "self": + parts := append([]string{pkgDir}, modSegs[1:]...) + return filepath.ToSlash(filepath.Join(parts...)) + case "super": + parent := filepath.Dir(pkgDir) + if parent == "." || parent == "/" { + parent = "" + } + parts := append([]string{parent}, modSegs[1:]...) + p := filepath.Join(parts...) + return filepath.ToSlash(p) + } + return "" +} + +// addModEdge records an edge for `mod foo;` declarations: the module +// always resolves to a sibling directory (or sibling file) inside pkgDir. +// We emit the directory path so the graph stays at directory granularity. +func addModEdge(n *sitter.Node, src []byte, _, pkgDir string, deps map[string]bool) { + name := n.ChildByFieldName("name") + if name == nil { + return + } + modName := nodeText(name, src) + if modName == "" { + return + } + target := filepath.ToSlash(filepath.Join(pkgDir, modName)) + deps[target] = true +} diff --git a/internal/lang/rustanalyzer/deps_test.go b/internal/lang/rustanalyzer/deps_test.go new file mode 100644 index 0000000..5089adc --- /dev/null +++ b/internal/lang/rustanalyzer/deps_test.go @@ -0,0 +1,179 @@ +package rustanalyzer + +import ( + "os" + "path/filepath" + "testing" +) + +func TestParseCargoPackageName(t *testing.T) { + cases := []struct { + src string + want string + }{ + { + src: ` +[package] +name = "diffguard-rust-fixture" +version = "0.1.0" +`, + want: "diffguard-rust-fixture", + }, + { + src: ` +[package] +name="foo" +`, + want: "foo", + }, + { + // Nested table: name under [dependencies] must NOT match. + src: ` +[dependencies] +name = "other" + +[package] +name = "real-pkg" +`, + want: "real-pkg", + }, + { + src: `[workspace]\nmembers = []`, + want: "", + }, + } + for _, tc := range cases { + got := parseCargoPackageName(tc.src) + if got != tc.want { + t.Errorf("parseCargoPackageName got %q, want %q", got, tc.want) + } + } +} + +func TestDetectModulePath(t *testing.T) { + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "Cargo.toml"), []byte(` +[package] +name = "my-crate" +version = "0.1.0" +`), 0644); err != nil { + t.Fatal(err) + } + got, err := depsImpl{}.DetectModulePath(dir) + if err != nil { + t.Fatal(err) + } + if got != "my-crate" { + t.Errorf("DetectModulePath = %q, want my-crate", got) + } +} + +func TestDetectModulePath_Missing(t *testing.T) { + dir := t.TempDir() + _, err := depsImpl{}.DetectModulePath(dir) + if err == nil { + t.Error("expected error for missing Cargo.toml") + } +} + +// TestScanPackageImports_InternalVsExternal asserts that `use crate::...` +// and `use super::...` produce internal edges while external crates and +// std imports are filtered out. +func TestScanPackageImports_InternalVsExternal(t *testing.T) { + root := t.TempDir() + + // Layout: + // Cargo.toml + // src/ + // lib.rs -- `use crate::foo::bar::Baz;` + `use std::fmt;` + // foo/ + // mod.rs + // bar.rs + // src/util/mod.rs -- `use super::foo::Helper;` + must := func(p, content string) { + full := filepath.Join(root, p) + if err := os.MkdirAll(filepath.Dir(full), 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(full, []byte(content), 0644); err != nil { + t.Fatal(err) + } + } + must("Cargo.toml", ` +[package] +name = "demo" +`) + must("src/lib.rs", ` +use crate::foo::bar::Baz; +use std::fmt; +mod foo; +mod util; +`) + must("src/foo/mod.rs", ` +pub mod bar; +`) + must("src/foo/bar.rs", ` +pub struct Baz; +`) + must("src/util/mod.rs", ` +use super::foo::Helper; +`) + + // Scan src/ — should find the `use crate::foo::bar` edge (-> src/foo/bar) + // and `mod foo;` (-> src/foo) and `mod util;` (-> src/util). External + // std import must NOT create an edge. + edges := depsImpl{}.ScanPackageImports(root, "src", "demo") + if edges == nil { + t.Fatal("expected non-nil edges for src") + } + srcEdges := edges["src"] + if srcEdges == nil { + t.Fatalf("expected edges keyed by 'src', got %v", edges) + } + // Expected internal edges (directory nodes): + expectedInternal := []string{ + "src/foo/bar", // crate::foo::bar + "src/foo", // mod foo; + "src/util", // mod util; + } + for _, want := range expectedInternal { + if !srcEdges[want] { + t.Errorf("missing edge to %q in %v", want, srcEdges) + } + } + + // Nothing external should sneak in. + for k := range srcEdges { + if k == "std/fmt" || k == "std" { + t.Errorf("external std edge leaked: %q", k) + } + } +} + +// TestScanPackageImports_SuperResolution directly asserts the resolver on +// a "super::" use to keep the relative-path arithmetic honest in isolation. +func TestScanPackageImports_SuperResolution(t *testing.T) { + // super:: in pkgDir=src/util resolves to src/foo for `super::foo::X`. + got := resolveInternalPath([]string{"super", "foo", "Bar"}, "src/util") + want := "src/foo" + if got != want { + t.Errorf("resolveInternalPath(super::foo::Bar in src/util) = %q, want %q", got, want) + } + // self:: in pkgDir=src resolves to src for `self::foo::X`. + got = resolveInternalPath([]string{"self", "foo", "Bar"}, "src") + want = "src/foo" + if got != want { + t.Errorf("resolveInternalPath(self::foo::Bar in src) = %q, want %q", got, want) + } + // crate::x::y::Z always resolves to src/x/y regardless of pkgDir. + got = resolveInternalPath([]string{"crate", "x", "y", "Z"}, "anywhere") + want = "src/x/y" + if got != want { + t.Errorf("resolveInternalPath(crate::x::y::Z) = %q, want %q", got, want) + } + // External roots return "". + got = resolveInternalPath([]string{"std", "fmt", "Display"}, "src") + if got != "" { + t.Errorf("resolveInternalPath(std::fmt::Display) = %q, want empty", got) + } +} diff --git a/internal/lang/rustanalyzer/eval_test.go b/internal/lang/rustanalyzer/eval_test.go new file mode 100644 index 0000000..23c1e8e --- /dev/null +++ b/internal/lang/rustanalyzer/eval_test.go @@ -0,0 +1,162 @@ +package rustanalyzer_test + +import ( + "os/exec" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/lang/evalharness" +) + +// EVAL-2 — Rust correctness evaluation suite. +// +// Each test below drives the built diffguard binary against a fixture +// under evaldata// and compares the emitted report to +// expected.json. Findings are matched semantically (section name, +// severity, finding file+function) rather than byte-for-byte so +// cosmetic line shifts in the fixtures don't break the eval. +// +// Mutation-flavored tests are gated behind exec.LookPath("cargo"): when +// cargo is missing the test calls t.Skip, keeping `go test ./...` green +// on dev machines without a Rust toolchain. CI installs cargo before +// running `make eval-rust` so the gates open. +// +// Follow-up TODOs (left as an explicit block so the verifier agent sees +// them): +// +// - EVAL-2 sizes (file): add a >500-LOC fixture + negative control. +// - EVAL-2 deps (SDP): add a stable→unstable fixture plus reversed. +// - EVAL-2 churn: needs seeded git history; add once we have a +// shell-based git helper (bake the history at test start rather +// than committing a .git dir into this repo). +// - EVAL-2 mutation (annotation respect): exercise +// `// mutator-disable-func` and `// mutator-disable-next-line` — +// currently covered at the unit level in mutation_annotate_test.go +// but not at the end-to-end eval level. + +var binBuilder evalharness.BinaryBuilder + +// fixtureDir returns the absolute path of an evaldata// fixture. +func fixtureDir(t *testing.T, name string) string { + t.Helper() + wd, err := filepath.Abs(filepath.Join("evaldata", name)) + if err != nil { + t.Fatal(err) + } + return wd +} + +// runEvalFixture copies the fixture, runs diffguard with standard eval +// flags, and returns the (binary, repo, report) tuple so each test can +// make additional assertions if needed. +func runEvalFixture(t *testing.T, name string, extraFlags []string) { + t.Helper() + + binary := binBuilder.GetBinary(t, evalharness.RepoRoot(t)) + repo := evalharness.CopyFixture(t, fixtureDir(t, name)) + + flags := append([]string{ + "--paths", ".", + // Force the Rust analyzer so the shared mixed-repo fixtures + // below never pick up Go/TS sections by accident. + "--language", "rust", + }, extraFlags...) + + rpt := evalharness.RunBinary(t, binary, repo, flags) + exp, ok := evalharness.LoadExpectation(t, fixtureDir(t, name)) + if !ok { + t.Fatalf("fixture %s has no expected.json", name) + } + evalharness.AssertMatches(t, rpt, exp) +} + +// TestEval_Complexity_Positive: seeded nested match+if-let, expect FAIL. +func TestEval_Complexity_Positive(t *testing.T) { + runEvalFixture(t, "complexity_positive", []string{"--skip-mutation"}) +} + +// TestEval_Complexity_Negative: same behavior refactored; expect PASS. +func TestEval_Complexity_Negative(t *testing.T) { + runEvalFixture(t, "complexity_negative", []string{"--skip-mutation"}) +} + +// TestEval_Sizes_Function_Positive: seeded long fn, expect FAIL. +func TestEval_Sizes_Function_Positive(t *testing.T) { + runEvalFixture(t, "sizes_positive", []string{"--skip-mutation"}) +} + +// TestEval_Sizes_Function_Negative: refactored into small helpers, expect PASS. +func TestEval_Sizes_Function_Negative(t *testing.T) { + runEvalFixture(t, "sizes_negative", []string{"--skip-mutation"}) +} + +// TestEval_Deps_Cycle_Positive: seeded a<->b cycle, expect FAIL. +func TestEval_Deps_Cycle_Positive(t *testing.T) { + runEvalFixture(t, "deps_cycle_positive", []string{"--skip-mutation"}) +} + +// TestEval_Deps_Cycle_Negative: a+b both point at shared types, expect PASS. +func TestEval_Deps_Cycle_Negative(t *testing.T) { + runEvalFixture(t, "deps_cycle_negative", []string{"--skip-mutation"}) +} + +// TestEval_Mutation_Kill_Positive: well-tested arithmetic fn, expect PASS. +// Requires `cargo`; skipped otherwise. +func TestEval_Mutation_Kill_Positive(t *testing.T) { + requireCargo(t) + if testing.Short() { + t.Skip("skipping mutation eval in -short mode") + } + runEvalFixture(t, "mutation_kill_positive", mutationFlags()) +} + +// TestEval_Mutation_Kill_Negative: under-tested arithmetic fn, expect FAIL. +func TestEval_Mutation_Kill_Negative(t *testing.T) { + requireCargo(t) + if testing.Short() { + t.Skip("skipping mutation eval in -short mode") + } + runEvalFixture(t, "mutation_kill_negative", mutationFlags()) +} + +// TestEval_Mutation_RustOp_Positive: unwrap_removal on a tested fn, +// expect PASS (killed by type-mismatch at cargo-build time). +func TestEval_Mutation_RustOp_Positive(t *testing.T) { + requireCargo(t) + if testing.Short() { + t.Skip("skipping mutation eval in -short mode") + } + runEvalFixture(t, "mutation_rustop_positive", mutationFlags()) +} + +// TestEval_Mutation_RustOp_Negative: some_to_none with loose test, +// expect FAIL because the mutant survives. +func TestEval_Mutation_RustOp_Negative(t *testing.T) { + requireCargo(t) + if testing.Short() { + t.Skip("skipping mutation eval in -short mode") + } + runEvalFixture(t, "mutation_rustop_negative", mutationFlags()) +} + +// requireCargo skips the test when cargo isn't on $PATH. CI installs it; +// local dev boxes without Rust don't fail the eval suite. +func requireCargo(t *testing.T) { + t.Helper() + if _, err := exec.LookPath("cargo"); err != nil { + t.Skip("cargo not on PATH; skipping mutation eval") + } +} + +// mutationFlags returns the deterministic flag set used by every +// mutation-bearing fixture: full 100% sample, fixed worker count, +// generous timeout (mutation tests compile under cargo, which is slow +// on the first run). We deliberately do NOT set --skip-mutation here. +func mutationFlags() []string { + return []string{ + "--mutation-sample-rate", "100", + "--mutation-workers", "2", + "--test-timeout", "120s", + } +} + diff --git a/internal/lang/rustanalyzer/evaldata/complexity_negative/Cargo.toml b/internal/lang/rustanalyzer/evaldata/complexity_negative/Cargo.toml new file mode 100644 index 0000000..73aaa4d --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/complexity_negative/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "complexity_negative" +version = "0.1.0" +edition = "2021" diff --git a/internal/lang/rustanalyzer/evaldata/complexity_negative/README.md b/internal/lang/rustanalyzer/evaldata/complexity_negative/README.md new file mode 100644 index 0000000..097ec35 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/complexity_negative/README.md @@ -0,0 +1,6 @@ +# complexity_negative + +Negative control for complexity_positive: same behavior split into flat +helpers. Each function is well under the default cognitive threshold. + +Expected verdict: Cognitive Complexity PASS, zero findings. diff --git a/internal/lang/rustanalyzer/evaldata/complexity_negative/expected.json b/internal/lang/rustanalyzer/evaldata/complexity_negative/expected.json new file mode 100644 index 0000000..9a638c5 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/complexity_negative/expected.json @@ -0,0 +1,10 @@ +{ + "worst_severity": "PASS", + "sections": [ + { + "name": "Cognitive Complexity", + "severity": "PASS", + "must_not_have_findings": true + } + ] +} diff --git a/internal/lang/rustanalyzer/evaldata/complexity_negative/src/lib.rs b/internal/lang/rustanalyzer/evaldata/complexity_negative/src/lib.rs new file mode 100644 index 0000000..4d08855 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/complexity_negative/src/lib.rs @@ -0,0 +1,14 @@ +// Same behavior as complexity_positive split into flat helpers. Each +// function stays well under the cognitive threshold. + +pub fn positive(x: Option) -> i32 { + x.unwrap_or(0) +} + +pub fn doubled(x: Option) -> i32 { + positive(x) * 2 +} + +pub fn classify(n: i32) -> i32 { + if n > 0 { 1 } else if n < 0 { -1 } else { 0 } +} diff --git a/internal/lang/rustanalyzer/evaldata/complexity_positive/Cargo.toml b/internal/lang/rustanalyzer/evaldata/complexity_positive/Cargo.toml new file mode 100644 index 0000000..7ec13a6 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/complexity_positive/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "complexity_positive" +version = "0.1.0" +edition = "2021" diff --git a/internal/lang/rustanalyzer/evaldata/complexity_positive/README.md b/internal/lang/rustanalyzer/evaldata/complexity_positive/README.md new file mode 100644 index 0000000..2927e7b --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/complexity_positive/README.md @@ -0,0 +1,7 @@ +# complexity_positive + +Seeded issue: `tangled` has nested `if let` + `match` with guarded arms, +pushing cognitive complexity well above 10. + +Expected verdict: Cognitive Complexity section FAILs with a finding on +`tangled`. Overall WorstSeverity is FAIL. diff --git a/internal/lang/rustanalyzer/evaldata/complexity_positive/expected.json b/internal/lang/rustanalyzer/evaldata/complexity_positive/expected.json new file mode 100644 index 0000000..5674ae9 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/complexity_positive/expected.json @@ -0,0 +1,12 @@ +{ + "worst_severity": "FAIL", + "sections": [ + { + "name": "Cognitive Complexity", + "severity": "FAIL", + "must_have_findings": [ + {"file": "lib.rs", "function": "tangled", "severity": "FAIL"} + ] + } + ] +} diff --git a/internal/lang/rustanalyzer/evaldata/complexity_positive/src/lib.rs b/internal/lang/rustanalyzer/evaldata/complexity_positive/src/lib.rs new file mode 100644 index 0000000..dd04df5 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/complexity_positive/src/lib.rs @@ -0,0 +1,29 @@ +// Seeded: nested match + if-let + guarded arms drive cognitive complexity +// well above the default 10 threshold. The expected finding pins the +// function name `tangled`. + +pub fn tangled(x: Option, y: Option, flag: bool) -> i32 { + let mut total = 0; + if let Some(a) = x { + if a > 0 && flag { + if let Some(b) = y { + match b { + v if v > 100 && a < 10 => total += v + a, + v if v < 0 || a == 0 => total -= v, + v if v == 0 => total = 0, + _ => total += 1, + } + } else if a > 5 || flag { + total += a; + } + } else { + match a { + 1 => total = 1, + 2 => total = 2, + 3 => total = 3, + _ => total = -1, + } + } + } + total +} diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/Cargo.toml b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/Cargo.toml new file mode 100644 index 0000000..3a4fc81 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "deps_cycle_negative" +version = "0.1.0" +edition = "2021" diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/README.md b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/README.md new file mode 100644 index 0000000..ce998e2 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/README.md @@ -0,0 +1,6 @@ +# deps_cycle_negative + +Negative control: same modules as deps_cycle_positive but both depend on +a shared `types` module instead of each other, breaking the cycle. + +Expected verdict: Dependency Structure PASS, no cycle findings. diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/expected.json b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/expected.json new file mode 100644 index 0000000..75b2069 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "PASS", + "sections": [ + { + "name": "Dependency Structure", + "severity": "PASS" + } + ] +} diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/a/mod.rs b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/a/mod.rs new file mode 100644 index 0000000..4692303 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/a/mod.rs @@ -0,0 +1,5 @@ +use crate::types::Shared; + +pub fn a_fn(x: i32) -> Shared { + Shared { value: x + 1 } +} diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/b/mod.rs b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/b/mod.rs new file mode 100644 index 0000000..3f92611 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/b/mod.rs @@ -0,0 +1,5 @@ +use crate::types::Shared; + +pub fn b_fn(x: i32) -> Shared { + Shared { value: x + 2 } +} diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/lib.rs b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/lib.rs new file mode 100644 index 0000000..25ed13d --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/lib.rs @@ -0,0 +1,3 @@ +pub mod a; +pub mod b; +pub mod types; diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/types/mod.rs b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/types/mod.rs new file mode 100644 index 0000000..1a2d0cc --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_negative/src/types/mod.rs @@ -0,0 +1,3 @@ +pub struct Shared { + pub value: i32, +} diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/Cargo.toml b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/Cargo.toml new file mode 100644 index 0000000..357ff08 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "deps_cycle_positive" +version = "0.1.0" +edition = "2021" diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/README.md b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/README.md new file mode 100644 index 0000000..ec5e59b --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/README.md @@ -0,0 +1,7 @@ +# deps_cycle_positive + +Seeded issue: `src/a/mod.rs` imports `crate::b::b_fn` while +`src/b/mod.rs` imports `crate::a::a_fn`, producing a 2-cycle in the +internal dependency graph. + +Expected verdict: Dependency Structure FAIL with a cycle finding. diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/expected.json b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/expected.json new file mode 100644 index 0000000..5e252f8 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "FAIL", + "sections": [ + { + "name": "Dependency Structure", + "severity": "FAIL" + } + ] +} diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/a/mod.rs b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/a/mod.rs new file mode 100644 index 0000000..d353b2a --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/a/mod.rs @@ -0,0 +1,5 @@ +use crate::b::b_fn; + +pub fn a_fn(x: i32) -> i32 { + b_fn(x) + 1 +} diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/b/mod.rs b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/b/mod.rs new file mode 100644 index 0000000..3dba34a --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/b/mod.rs @@ -0,0 +1,5 @@ +use crate::a::a_fn; + +pub fn b_fn(x: i32) -> i32 { + if x > 100 { x } else { a_fn(x - 1) } +} diff --git a/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/lib.rs b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/lib.rs new file mode 100644 index 0000000..677af14 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/deps_cycle_positive/src/lib.rs @@ -0,0 +1,2 @@ +pub mod a; +pub mod b; diff --git a/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/Cargo.toml b/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/Cargo.toml new file mode 100644 index 0000000..1fec4c4 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "mutation_kill_negative" +version = "0.1.0" +edition = "2021" diff --git a/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/README.md b/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/README.md new file mode 100644 index 0000000..e9aeb41 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/README.md @@ -0,0 +1,9 @@ +# mutation_kill_negative + +Same `classify(x)` as mutation_kill_positive but the test suite covers +only one branch. Most Tier-1 mutants survive, dropping the kill rate +below the 90% threshold. + +Expected verdict: Mutation Testing FAIL. + +Requires `cargo` on PATH — eval_test.go skips cleanly when absent. diff --git a/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/expected.json b/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/expected.json new file mode 100644 index 0000000..8d5211f --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "FAIL", + "sections": [ + { + "name": "Mutation Testing", + "severity": "FAIL" + } + ] +} diff --git a/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/src/lib.rs b/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/src/lib.rs new file mode 100644 index 0000000..531671b --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_kill_negative/src/lib.rs @@ -0,0 +1,24 @@ +// Same classify() as mutation_kill_positive, but tests only cover a +// single branch so most Tier-1 mutants survive. + +pub fn classify(x: i32) -> i32 { + if x > 0 { + 1 + } else if x < 0 { + -1 + } else { + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn one_positive_case_only() { + // Covers only the positive branch; boundary, sign, and zero + // cases are untested so mutants survive. + assert_eq!(classify(5), 1); + } +} diff --git a/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/Cargo.toml b/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/Cargo.toml new file mode 100644 index 0000000..2faf713 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "mutation_kill_positive" +version = "0.1.0" +edition = "2021" diff --git a/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/README.md b/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/README.md new file mode 100644 index 0000000..b8bcd8b --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/README.md @@ -0,0 +1,9 @@ +# mutation_kill_positive + +Well-tested `classify(x)` with boundary + sign coverage. Tier-1 mutation +operators (conditional_boundary, negate_conditional, math_operator, +return_value) should be killed by the inline `tests` module. + +Expected verdict: Mutation Testing PASS; Tier-1 kill rate ≥ 90%. + +Requires `cargo` on PATH — eval_test.go skips cleanly when absent. diff --git a/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/expected.json b/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/expected.json new file mode 100644 index 0000000..ebfd556 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "PASS", + "sections": [ + { + "name": "Mutation Testing", + "severity": "PASS" + } + ] +} diff --git a/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/src/lib.rs b/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/src/lib.rs new file mode 100644 index 0000000..75c6f3c --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_kill_positive/src/lib.rs @@ -0,0 +1,43 @@ +// Tested arithmetic function with boundary + sign coverage in the inline +// test module, so mutation operators (conditional_boundary, +// negate_conditional, math_operator, return_value) are killed. + +pub fn classify(x: i32) -> i32 { + if x > 0 { + 1 + } else if x < 0 { + -1 + } else { + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn positive_returns_one() { + assert_eq!(classify(5), 1); + } + + #[test] + fn negative_returns_minus_one() { + assert_eq!(classify(-5), -1); + } + + #[test] + fn zero_returns_zero() { + assert_eq!(classify(0), 0); + } + + #[test] + fn boundary_one_is_positive() { + assert_eq!(classify(1), 1); + } + + #[test] + fn boundary_minus_one_is_negative() { + assert_eq!(classify(-1), -1); + } +} diff --git a/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/Cargo.toml b/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/Cargo.toml new file mode 100644 index 0000000..90644a1 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "mutation_rustop_negative" +version = "0.1.0" +edition = "2021" diff --git a/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/README.md b/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/README.md new file mode 100644 index 0000000..de601d0 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/README.md @@ -0,0 +1,11 @@ +# mutation_rustop_negative + +Negative control for mutation_rustop_positive. `wrap(x)` returns +`Some(x * 2)` but the test never inspects the Option variant, so the +`some_to_none` mutant survives and the Tier-1 kill rate falls below +threshold. + +Expected verdict: Mutation Testing FAIL — confirms the operator +generates meaningful mutants whose signal depends on test quality. + +Requires `cargo` on PATH — eval_test.go skips cleanly when absent. diff --git a/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/expected.json b/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/expected.json new file mode 100644 index 0000000..8d5211f --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "FAIL", + "sections": [ + { + "name": "Mutation Testing", + "severity": "FAIL" + } + ] +} diff --git a/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/src/lib.rs b/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/src/lib.rs new file mode 100644 index 0000000..a014d1d --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_rustop_negative/src/lib.rs @@ -0,0 +1,19 @@ +// Uses Some(x) but tests don't distinguish Some from None — the test +// merely invokes the function without asserting the wrapped value, so +// the `some_to_none` mutant survives. + +pub fn wrap(x: i32) -> Option { + Some(x * 2) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn doesnt_panic() { + // Invoking the function is all we check; the Option variant is + // never inspected. + let _ = wrap(5); + } +} diff --git a/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/Cargo.toml b/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/Cargo.toml new file mode 100644 index 0000000..640bb82 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "mutation_rustop_positive" +version = "0.1.0" +edition = "2021" diff --git a/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/README.md b/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/README.md new file mode 100644 index 0000000..d74fce9 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/README.md @@ -0,0 +1,10 @@ +# mutation_rustop_positive + +Exercises the Rust-specific `unwrap_removal` operator. `double(opt)` +uses `.unwrap()` on an `Option`; removing the call breaks types, so +cargo build fails and the mutant is killed. + +Expected verdict: Mutation Testing PASS — at least one unwrap_removal +mutant is generated and killed. + +Requires `cargo` on PATH — eval_test.go skips cleanly when absent. diff --git a/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/expected.json b/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/expected.json new file mode 100644 index 0000000..ebfd556 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/expected.json @@ -0,0 +1,9 @@ +{ + "worst_severity": "PASS", + "sections": [ + { + "name": "Mutation Testing", + "severity": "PASS" + } + ] +} diff --git a/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/src/lib.rs b/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/src/lib.rs new file mode 100644 index 0000000..7623af1 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/mutation_rustop_positive/src/lib.rs @@ -0,0 +1,24 @@ +// Uses .unwrap() in a well-tested way: the test asserts both the Some +// (happy) path and constructs the expected value after unwrap. Removing +// .unwrap() breaks the type signature and the test fails, killing the +// mutant. + +pub fn double(opt: Option) -> i32 { + let x = opt.unwrap(); + x * 2 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn doubles_the_value() { + assert_eq!(double(Some(5)), 10); + } + + #[test] + fn doubles_zero() { + assert_eq!(double(Some(0)), 0); + } +} diff --git a/internal/lang/rustanalyzer/evaldata/sizes_negative/Cargo.toml b/internal/lang/rustanalyzer/evaldata/sizes_negative/Cargo.toml new file mode 100644 index 0000000..073dfec --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/sizes_negative/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "sizes_negative" +version = "0.1.0" +edition = "2021" diff --git a/internal/lang/rustanalyzer/evaldata/sizes_negative/README.md b/internal/lang/rustanalyzer/evaldata/sizes_negative/README.md new file mode 100644 index 0000000..2ad9624 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/sizes_negative/README.md @@ -0,0 +1,6 @@ +# sizes_negative + +Negative control: same behavior split into short helpers. No function +approaches the 50-line threshold. + +Expected verdict: Code Sizes PASS, zero findings. diff --git a/internal/lang/rustanalyzer/evaldata/sizes_negative/expected.json b/internal/lang/rustanalyzer/evaldata/sizes_negative/expected.json new file mode 100644 index 0000000..3ac1812 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/sizes_negative/expected.json @@ -0,0 +1,10 @@ +{ + "worst_severity": "PASS", + "sections": [ + { + "name": "Code Sizes", + "severity": "PASS", + "must_not_have_findings": true + } + ] +} diff --git a/internal/lang/rustanalyzer/evaldata/sizes_negative/src/lib.rs b/internal/lang/rustanalyzer/evaldata/sizes_negative/src/lib.rs new file mode 100644 index 0000000..500f694 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/sizes_negative/src/lib.rs @@ -0,0 +1,13 @@ +// Same overall behavior as sizes_positive, refactored across helpers so +// no single function exceeds the 50-line threshold. + +pub fn step_one(x: i32) -> i32 { x + 1 } +pub fn step_two(x: i32) -> i32 { step_one(x) + 1 } +pub fn step_three(x: i32) -> i32 { step_two(x) + 1 } + +pub fn short_func(input: i32) -> i32 { + let a = step_one(input); + let b = step_two(a); + let c = step_three(b); + c +} diff --git a/internal/lang/rustanalyzer/evaldata/sizes_positive/Cargo.toml b/internal/lang/rustanalyzer/evaldata/sizes_positive/Cargo.toml new file mode 100644 index 0000000..a9c962b --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/sizes_positive/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "sizes_positive" +version = "0.1.0" +edition = "2021" diff --git a/internal/lang/rustanalyzer/evaldata/sizes_positive/README.md b/internal/lang/rustanalyzer/evaldata/sizes_positive/README.md new file mode 100644 index 0000000..3240c78 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/sizes_positive/README.md @@ -0,0 +1,7 @@ +# sizes_positive + +Seeded issue: `long_func` is ~60 lines of straight-line statements, +exceeding the default 50-line function threshold without tripping the +complexity threshold. + +Expected verdict: Code Sizes FAIL with a finding on `long_func`. diff --git a/internal/lang/rustanalyzer/evaldata/sizes_positive/expected.json b/internal/lang/rustanalyzer/evaldata/sizes_positive/expected.json new file mode 100644 index 0000000..39ca591 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/sizes_positive/expected.json @@ -0,0 +1,12 @@ +{ + "worst_severity": "FAIL", + "sections": [ + { + "name": "Code Sizes", + "severity": "FAIL", + "must_have_findings": [ + {"file": "lib.rs", "function": "long_func", "severity": "FAIL"} + ] + } + ] +} diff --git a/internal/lang/rustanalyzer/evaldata/sizes_positive/src/lib.rs b/internal/lang/rustanalyzer/evaldata/sizes_positive/src/lib.rs new file mode 100644 index 0000000..3338153 --- /dev/null +++ b/internal/lang/rustanalyzer/evaldata/sizes_positive/src/lib.rs @@ -0,0 +1,65 @@ +// Seeded: a function whose body is ~60 lines of straight-line statements. +// The complexity score stays low (no branching); only the size threshold +// trips. + +pub fn long_func(input: i32) -> i32 { + let a = input + 1; + let b = a + 1; + let c = b + 1; + let d = c + 1; + let e = d + 1; + let f = e + 1; + let g = f + 1; + let h = g + 1; + let i = h + 1; + let j = i + 1; + let k = j + 1; + let l = k + 1; + let m = l + 1; + let n = m + 1; + let o = n + 1; + let p = o + 1; + let q = p + 1; + let r = q + 1; + let s = r + 1; + let t = s + 1; + let u = t + 1; + let v = u + 1; + let w = v + 1; + let x = w + 1; + let y = x + 1; + let z = y + 1; + let aa = z + 1; + let bb = aa + 1; + let cc = bb + 1; + let dd = cc + 1; + let ee = dd + 1; + let ff = ee + 1; + let gg = ff + 1; + let hh = gg + 1; + let ii = hh + 1; + let jj = ii + 1; + let kk = jj + 1; + let ll = kk + 1; + let mm = ll + 1; + let nn = mm + 1; + let oo = nn + 1; + let pp = oo + 1; + let qq = pp + 1; + let rr = qq + 1; + let ss = rr + 1; + let tt = ss + 1; + let uu = tt + 1; + let vv = uu + 1; + let ww = vv + 1; + let xx = ww + 1; + let yy = xx + 1; + let zz = yy + 1; + let aaa = zz + 1; + let bbb = aaa + 1; + let ccc = bbb + 1; + let ddd = ccc + 1; + let eee = ddd + 1; + let fff = eee + 1; + fff +} diff --git a/internal/lang/rustanalyzer/helpers_test.go b/internal/lang/rustanalyzer/helpers_test.go new file mode 100644 index 0000000..09c5b10 --- /dev/null +++ b/internal/lang/rustanalyzer/helpers_test.go @@ -0,0 +1,10 @@ +package rustanalyzer + +import "os" + +// writeFile is a tiny helper shared across the rustanalyzer test files. +// We define it here (rather than importing testutil) so each _test.go +// file can stay self-contained in what it inspects. +func writeFile(path string, data []byte) error { + return os.WriteFile(path, data, 0644) +} diff --git a/internal/lang/rustanalyzer/mutation_annotate.go b/internal/lang/rustanalyzer/mutation_annotate.go new file mode 100644 index 0000000..78d6fb0 --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_annotate.go @@ -0,0 +1,108 @@ +package rustanalyzer + +import ( + "strings" + + sitter "github.com/smacker/go-tree-sitter" +) + +// annotationScannerImpl implements lang.AnnotationScanner for Rust. The +// disable annotations are identical to the Go forms: +// +// // mutator-disable-next-line +// // mutator-disable-func +// +// `//` and `/* ... */` comments are both accepted — tree-sitter exposes +// them as `line_comment` and `block_comment` respectively. +type annotationScannerImpl struct{} + +// ScanAnnotations returns the set of 1-based source lines on which mutation +// generation should be suppressed. +func (annotationScannerImpl) ScanAnnotations(absPath string) (map[int]bool, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, err + } + defer tree.Close() + + disabled := map[int]bool{} + funcRanges := collectFuncRanges(tree.RootNode(), src) + + walk(tree.RootNode(), func(n *sitter.Node) bool { + switch n.Type() { + case "line_comment", "block_comment": + applyAnnotation(n, src, funcRanges, disabled) + } + return true + }) + return disabled, nil +} + +// applyAnnotation consumes a single comment node and, if it carries a +// known annotation, disables the appropriate line(s) in `disabled`. +func applyAnnotation(comment *sitter.Node, src []byte, funcs []funcRange, disabled map[int]bool) { + text := stripCommentMarkers(nodeText(comment, src)) + line := nodeLine(comment) + switch { + case strings.HasPrefix(text, "mutator-disable-next-line"): + disabled[line+1] = true + case strings.HasPrefix(text, "mutator-disable-func"): + disableEnclosingFunc(line, funcs, disabled) + } +} + +// stripCommentMarkers strips `//`, `/*`, `*/` and surrounding whitespace. +// Matches the Go analyzer's helper so annotation behavior stays uniform +// across languages. +func stripCommentMarkers(raw string) string { + s := strings.TrimSpace(raw) + s = strings.TrimPrefix(s, "//") + s = strings.TrimPrefix(s, "/*") + s = strings.TrimSuffix(s, "*/") + return strings.TrimSpace(s) +} + +// disableEnclosingFunc marks every line of the function the comment +// belongs to as disabled. A comment belongs to a function when it sits +// inside the function's range, or when it directly precedes the function +// (at most one blank line between them, matching the Go analyzer). +func disableEnclosingFunc(commentLine int, funcs []funcRange, disabled map[int]bool) { + for _, r := range funcs { + if isCommentForFunc(commentLine, r) { + for i := r.start; i <= r.end; i++ { + disabled[i] = true + } + return + } + } +} + +func isCommentForFunc(commentLine int, r funcRange) bool { + if commentLine >= r.start && commentLine <= r.end { + return true + } + return r.start > commentLine && r.start-commentLine <= 2 +} + +// funcRange is the 1-based inclusive line span of a function_item node. +// The same range shape is used by the annotation scanner and by the mutant +// generator (via its filtering of "which lines belong to a function"). +type funcRange struct{ start, end int } + +// collectFuncRanges returns one funcRange per function_item in the file. +// Methods inside impl blocks are included too — same source-line universe +// the mutant generator cares about. +func collectFuncRanges(root *sitter.Node, _ []byte) []funcRange { + var ranges []funcRange + walk(root, func(n *sitter.Node) bool { + if n.Type() != "function_item" { + return true + } + ranges = append(ranges, funcRange{ + start: nodeLine(n), + end: nodeEndLine(n), + }) + return true + }) + return ranges +} diff --git a/internal/lang/rustanalyzer/mutation_annotate_test.go b/internal/lang/rustanalyzer/mutation_annotate_test.go new file mode 100644 index 0000000..48062c3 --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_annotate_test.go @@ -0,0 +1,116 @@ +package rustanalyzer + +import ( + "path/filepath" + "testing" +) + +// TestScanAnnotations_NextLine writes a fixture with a mutator-disable- +// next-line comment and confirms the following source line is disabled. +func TestScanAnnotations_NextLine(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + src := []byte(`fn f(x: i32) -> i32 { + // mutator-disable-next-line + if x > 0 { 1 } else { 0 } +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + // Line 3 (the `if` line) should be disabled. + if !disabled[3] { + t.Errorf("expected line 3 disabled, got %v", disabled) + } + if disabled[4] { + t.Errorf("line 4 should not be disabled (unrelated), got %v", disabled) + } +} + +// TestScanAnnotations_FuncWide asserts that `mutator-disable-func` +// marks every line of the enclosing function — including the signature +// line. +func TestScanAnnotations_FuncWide(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + src := []byte(`// mutator-disable-func +fn top(x: i32) -> i32 { + x + 1 +} + +fn other(x: i32) -> i32 { + x * 2 +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + // The `top` function spans lines 2-4. All three must be disabled. + for _, line := range []int{2, 3, 4} { + if !disabled[line] { + t.Errorf("expected line %d disabled in top, got %v", line, disabled) + } + } + // The `other` function (lines 6-8) must not be touched. + for _, line := range []int{6, 7, 8} { + if disabled[line] { + t.Errorf("line %d in other should not be disabled, got %v", line, disabled) + } + } +} + +// TestScanAnnotations_UnrelatedComments is a negative control: ordinary +// comments must not toggle anything. +func TestScanAnnotations_UnrelatedComments(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + src := []byte(`// just a regular comment +fn f(x: i32) -> i32 { + // another regular comment + x +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + if len(disabled) != 0 { + t.Errorf("expected empty disabled map, got %v", disabled) + } +} + +// TestScanAnnotations_FuncInsideComment is a coverage test for the case +// where the disable-func comment lives inside the function body rather +// than preceding it. The Go analyzer accepts both positions. +func TestScanAnnotations_FuncInsideComment(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + src := []byte(`fn only(x: i32) -> i32 { + // mutator-disable-func + x + 1 +} +`) + if err := writeFile(path, src); err != nil { + t.Fatal(err) + } + disabled, err := annotationScannerImpl{}.ScanAnnotations(path) + if err != nil { + t.Fatal(err) + } + for _, line := range []int{1, 2, 3, 4} { + if !disabled[line] { + t.Errorf("expected line %d disabled, got %v", line, disabled) + } + } +} diff --git a/internal/lang/rustanalyzer/mutation_apply.go b/internal/lang/rustanalyzer/mutation_apply.go new file mode 100644 index 0000000..d651f84 --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_apply.go @@ -0,0 +1,321 @@ +package rustanalyzer + +import ( + "strings" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// mutantApplierImpl implements lang.MutantApplier for Rust. Unlike the Go +// analyzer, which rewrites the AST and re-renders with go/printer, we +// operate on source bytes directly: tree-sitter reports exact byte offsets +// for every node, and text-level edits keep formatting intact without a +// dedicated Rust formatter. +// +// After every mutation we re-parse the output with tree-sitter and check +// for ERROR nodes. If the mutation produced syntactically invalid code we +// return nil (no bytes, no error) — the mutation orchestrator treats that +// as "skip this mutant", matching the Go analyzer's contract. +type mutantApplierImpl struct{} + +// ApplyMutation returns the mutated file bytes, or (nil, nil) if the +// mutation can't be applied cleanly. +func (mutantApplierImpl) ApplyMutation(absPath string, site lang.MutantSite) ([]byte, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, nil + } + defer tree.Close() + + mutated := applyBySite(tree.RootNode(), src, site) + if mutated == nil { + return nil, nil + } + if !isValidRust(mutated) { + // Re-parse check per the design doc: don't ship corrupt mutants. + return nil, nil + } + return mutated, nil +} + +// applyBySite dispatches to the operator-specific helper. Each helper +// returns either the mutated byte slice or nil if it couldn't find a +// matching node on the target line. +func applyBySite(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + switch site.Operator { + case "conditional_boundary", "negate_conditional", "math_operator": + return applyBinary(root, src, site) + case "boolean_substitution": + return applyBool(root, src, site) + case "return_value": + return applyReturnValue(root, src, site) + case "some_to_none": + return applySomeToNone(root, src, site) + case "branch_removal": + return applyBranchRemoval(root, src, site) + case "statement_deletion": + return applyStatementDeletion(root, src, site) + case "unwrap_removal": + return applyUnwrapRemoval(root, src, site) + case "question_mark_removal": + return applyQuestionMarkRemoval(root, src, site) + } + return nil +} + +// findOnLine returns the first node matching `pred` whose start line +// equals `line`. We keep it small: the CST walks are tiny and predicates +// stay decidable in one pass. +func findOnLine(root *sitter.Node, line int, pred func(*sitter.Node) bool) *sitter.Node { + var hit *sitter.Node + walk(root, func(n *sitter.Node) bool { + if hit != nil { + return false + } + if nodeLine(n) != line { + // We're still searching; descend into children that might + // reach the target line. + if int(n.StartPoint().Row)+1 > line || int(n.EndPoint().Row)+1 < line { + return false + } + return true + } + if pred(n) { + hit = n + return false + } + return true + }) + return hit +} + +// replaceRange returns src with the bytes [start, end) replaced by `with`. +func replaceRange(src []byte, start, end uint32, with []byte) []byte { + out := make([]byte, 0, len(src)-int(end-start)+len(with)) + out = append(out, src[:start]...) + out = append(out, with...) + out = append(out, src[end:]...) + return out +} + +// applyBinary swaps the operator of a binary_expression on the target line. +// We honor the site description so overlapping binaries on the same line +// (`a == b && c > d`) mutate the exact one the generator emitted. +func applyBinary(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + fromOp, toOp := parseBinaryDesc(site.Description) + if fromOp == "" { + return nil + } + var target *sitter.Node + walk(root, func(n *sitter.Node) bool { + if target != nil { + return false + } + if n.Type() != "binary_expression" || nodeLine(n) != site.Line { + return true + } + op := n.ChildByFieldName("operator") + if op != nil && op.Type() == fromOp { + target = n + return false + } + return true + }) + if target == nil { + return nil + } + op := target.ChildByFieldName("operator") + return replaceRange(src, op.StartByte(), op.EndByte(), []byte(toOp)) +} + +// parseBinaryDesc parses "X -> Y" from the mutant description. +func parseBinaryDesc(desc string) (string, string) { + parts := strings.SplitN(desc, " -> ", 2) + if len(parts) != 2 { + return "", "" + } + return parts[0], parts[1] +} + +// applyBool flips a boolean literal on the target line. +func applyBool(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + n := findOnLine(root, site.Line, func(n *sitter.Node) bool { + if n.Type() != "boolean_literal" { + return false + } + txt := nodeText(n, src) + return txt == "true" || txt == "false" + }) + if n == nil { + return nil + } + txt := nodeText(n, src) + flipped := "true" + if txt == "true" { + flipped = "false" + } + return replaceRange(src, n.StartByte(), n.EndByte(), []byte(flipped)) +} + +// applyReturnValue replaces the returned expression with +// `Default::default()`. Works for any non-unit return; tests on Option / +// unit / numeric returns will all observe either a type mismatch (caught +// by the re-parse step — wait, rustc type errors won't show in +// tree-sitter; so this is a Tier-1 operator that can produce equivalent +// mutants on some types, which we accept). +func applyReturnValue(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + ret := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "return_expression" + }) + if ret == nil { + return nil + } + if ret.NamedChildCount() == 0 { + return nil + } + value := ret.NamedChild(0) + if value == nil { + return nil + } + return replaceRange(src, value.StartByte(), value.EndByte(), []byte("Default::default()")) +} + +// applySomeToNone replaces a `Some(x)` call expression with `None`. The +// target can sit anywhere — inside a return, as the tail expression of +// a block, as an argument to another function, etc. We find the first +// call_expression on the line whose function identifier is exactly +// `Some` and rewrite the entire call to `None`. +func applySomeToNone(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + call := findOnLine(root, site.Line, func(n *sitter.Node) bool { + if n.Type() != "call_expression" { + return false + } + fn := n.ChildByFieldName("function") + return fn != nil && nodeText(fn, src) == "Some" + }) + if call == nil { + return nil + } + return replaceRange(src, call.StartByte(), call.EndByte(), []byte("None")) +} + +// applyBranchRemoval empties the consequence block of an if_expression. +// We replace the block contents with nothing so the braces remain and +// the code still parses. +func applyBranchRemoval(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + ifNode := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "if_expression" + }) + if ifNode == nil { + return nil + } + body := ifNode.ChildByFieldName("consequence") + if body == nil { + return nil + } + // Preserve the outer braces; replace inner bytes with an empty body. + inner := bodyInnerRange(body, src) + if inner == nil { + return nil + } + return replaceRange(src, inner[0], inner[1], []byte{}) +} + +// bodyInnerRange returns [openBracePlusOne, closeBrace) for a block node — +// i.e. the byte range strictly inside the braces. Returns nil if the +// node doesn't look like a block with braces. +func bodyInnerRange(block *sitter.Node, src []byte) []uint32 { + start := block.StartByte() + end := block.EndByte() + if start >= end { + return nil + } + if src[start] != '{' || src[end-1] != '}' { + return nil + } + return []uint32{start + 1, end - 1} +} + +// applyStatementDeletion replaces a bare call statement with the empty +// expression `();`. Keeps the source parseable and kills the side effect. +func applyStatementDeletion(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + stmt := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "expression_statement" + }) + if stmt == nil { + return nil + } + return replaceRange(src, stmt.StartByte(), stmt.EndByte(), []byte("();")) +} + +// applyUnwrapRemoval strips `.unwrap()` / `.expect(...)` from a call, +// leaving the receiver. We find the outer call_expression, then rewrite +// the whole call to be just the receiver. +func applyUnwrapRemoval(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + call := findOnLine(root, site.Line, func(n *sitter.Node) bool { + if n.Type() != "call_expression" { + return false + } + fn := n.ChildByFieldName("function") + if fn == nil || fn.Type() != "field_expression" { + return false + } + field := fn.ChildByFieldName("field") + if field == nil { + return false + } + name := nodeText(field, src) + return name == "unwrap" || name == "expect" + }) + if call == nil { + return nil + } + fn := call.ChildByFieldName("function") + receiver := fn.ChildByFieldName("value") + if receiver == nil { + return nil + } + return replaceRange(src, call.StartByte(), call.EndByte(), + src[receiver.StartByte():receiver.EndByte()]) +} + +// applyQuestionMarkRemoval strips the trailing `?` from a try_expression. +// Grammar shape: (try_expression ?) — the `?` token sits after the +// inner expression's end byte. +func applyQuestionMarkRemoval(root *sitter.Node, src []byte, site lang.MutantSite) []byte { + try := findOnLine(root, site.Line, func(n *sitter.Node) bool { + return n.Type() == "try_expression" + }) + if try == nil { + return nil + } + // The inner expression is the first (and only) named child. + if try.NamedChildCount() == 0 { + return nil + } + inner := try.NamedChild(0) + if inner == nil { + return nil + } + return replaceRange(src, try.StartByte(), try.EndByte(), + src[inner.StartByte():inner.EndByte()]) +} + +// isValidRust re-parses the mutated source and reports whether tree-sitter +// encountered any syntax errors. tree-sitter marks malformed regions with +// ERROR nodes (or sets HasError on ancestors); we check both. +func isValidRust(src []byte) bool { + tree, err := parseBytes(src) + if err != nil || tree == nil { + return false + } + defer tree.Close() + root := tree.RootNode() + if root == nil { + return false + } + return !root.HasError() +} + diff --git a/internal/lang/rustanalyzer/mutation_apply_test.go b/internal/lang/rustanalyzer/mutation_apply_test.go new file mode 100644 index 0000000..a2d9915 --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_apply_test.go @@ -0,0 +1,241 @@ +package rustanalyzer + +import ( + "path/filepath" + "strings" + "testing" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// applyAt writes src to a temp file and invokes the applier for `site`. +// Returns the mutated bytes (or nil if the applier skipped the site). +func applyAt(t *testing.T, src string, site lang.MutantSite) []byte { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + out, err := mutantApplierImpl{}.ApplyMutation(path, site) + if err != nil { + t.Fatal(err) + } + return out +} + +func TestApply_BinaryOperator(t *testing.T) { + src := `fn f(x: i32) -> bool { + x > 0 +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 2, + Operator: "conditional_boundary", + Description: "> -> >=", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "x >= 0") { + t.Errorf("expected 'x >= 0' in output, got:\n%s", out) + } +} + +func TestApply_BooleanFlip(t *testing.T) { + src := `fn f() -> bool { true } +` + site := lang.MutantSite{ + File: "a.rs", + Line: 1, + Operator: "boolean_substitution", + Description: "true -> false", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "false") { + t.Errorf("expected 'false' in output, got:\n%s", out) + } + if strings.Contains(string(out), "true") { + t.Errorf("'true' should have been replaced, got:\n%s", out) + } +} + +func TestApply_ReturnValueToDefault(t *testing.T) { + src := `fn f() -> i32 { + return 42; +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 2, + Operator: "return_value", + Description: "replace return value with Default::default()", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "Default::default()") { + t.Errorf("expected Default::default(), got:\n%s", out) + } +} + +func TestApply_SomeToNone(t *testing.T) { + src := `fn g(x: i32) -> Option { + return Some(x); +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 2, + Operator: "some_to_none", + Description: "Some(x) -> None", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "return None;") { + t.Errorf("expected 'return None;', got:\n%s", out) + } +} + +func TestApply_BranchRemoval(t *testing.T) { + src := `fn side() {} +fn f(x: i32) { + if x > 0 { + side(); + } +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 3, + Operator: "branch_removal", + Description: "remove if body", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + // The call inside the body should be gone. + if strings.Contains(string(out), "side();") && strings.Contains(string(out), "if x > 0") { + // The function-declaration body still contains `side()` statement; + // we're asserting the if-body is emptied. After branch removal the + // `side();` call inside the braces must not appear between the if + // braces. Parse and check the if body is empty (approximated via + // a substring match that fails only if the consequence body still + // has text). + if strings.Contains(string(out), "if x > 0 {\n side();") { + t.Errorf("if body not emptied, got:\n%s", out) + } + } +} + +func TestApply_StatementDeletion(t *testing.T) { + src := `fn side() {} +fn f() { + side(); +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 3, + Operator: "statement_deletion", + Description: "remove call statement", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if !strings.Contains(string(out), "();") { + t.Errorf("expected statement replaced with '();', got:\n%s", out) + } +} + +func TestApply_UnwrapRemoval(t *testing.T) { + src := `fn g(x: Option) -> i32 { + x.unwrap() +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 2, + Operator: "unwrap_removal", + Description: "strip .unwrap()", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if strings.Contains(string(out), "unwrap") { + t.Errorf(".unwrap() not stripped, got:\n%s", out) + } +} + +func TestApply_QuestionMarkRemoval(t *testing.T) { + src := `fn g(x: Result) -> Result { + let v = x?; + Ok(v) +} +` + site := lang.MutantSite{ + File: "a.rs", + Line: 2, + Operator: "question_mark_removal", + Description: "strip trailing ?", + } + out := applyAt(t, src, site) + if out == nil { + t.Fatal("applier returned nil") + } + if strings.Contains(string(out), "?;") { + t.Errorf("trailing ? not stripped, got:\n%s", out) + } +} + +// TestApply_ReparseRejectsCorrupt asserts that when the applier produces +// source that fails to tree-sitter parse (via a synthetic "apply every +// operator that doesn't exist" scenario), the applier returns nil. +// +// We exercise this via an operator the applier doesn't know — result is +// nil bytes, not a corrupt output. +func TestApply_UnknownOperatorReturnsNil(t *testing.T) { + src := `fn f() {} +` + site := lang.MutantSite{Line: 1, Operator: "nonexistent_op"} + out := applyAt(t, src, site) + if out != nil { + t.Errorf("expected nil for unknown operator, got:\n%s", out) + } +} + +// TestApply_SiteMismatchReturnsNil asserts a mutant whose target line has +// no matching node is a silent no-op (nil bytes, no error). +func TestApply_SiteMismatchReturnsNil(t *testing.T) { + src := `fn f() -> i32 { 42 } +` + // boolean_substitution on a line that has no boolean literal. + site := lang.MutantSite{Line: 1, Operator: "boolean_substitution", Description: "true -> false"} + out := applyAt(t, src, site) + if out != nil { + t.Errorf("expected nil for site with no matching node, got:\n%s", out) + } +} + +// TestIsValidRust exercises the re-parse gate directly. +func TestIsValidRust(t *testing.T) { + good := []byte(`fn f() -> i32 { 42 }`) + bad := []byte(`fn f() -> i32 { 42 `) // missing brace + if !isValidRust(good) { + t.Error("well-formed Rust reported invalid") + } + if isValidRust(bad) { + t.Error("malformed Rust reported valid") + } +} diff --git a/internal/lang/rustanalyzer/mutation_generate.go b/internal/lang/rustanalyzer/mutation_generate.go new file mode 100644 index 0000000..b6f3584 --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_generate.go @@ -0,0 +1,292 @@ +package rustanalyzer + +import ( + "fmt" + "sort" + "strings" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// mutantGeneratorImpl implements lang.MutantGenerator for Rust. It emits +// canonical operators (conditional_boundary, negate_conditional, +// math_operator, return_value, boolean_substitution, branch_removal, +// statement_deletion) plus the Rust-specific operators defined in the +// design doc: unwrap_removal, some_to_none, question_mark_removal. +// +// `incdec` is deliberately absent — Rust has no `++`/`--` operators. +type mutantGeneratorImpl struct{} + +// GenerateMutants walks the CST and emits a MutantSite for each qualifying +// node on a changed, non-disabled line. The output is deterministic: we +// sort by (line, operator, description) before returning. +func (mutantGeneratorImpl) GenerateMutants(absPath string, fc diff.FileChange, disabled map[int]bool) ([]lang.MutantSite, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, err + } + defer tree.Close() + + var out []lang.MutantSite + walk(tree.RootNode(), func(n *sitter.Node) bool { + line := nodeLine(n) + if !fc.ContainsLine(line) || disabled[line] { + return true + } + out = append(out, mutantsFor(fc.Path, line, n, src)...) + return true + }) + sort.SliceStable(out, func(i, j int) bool { + if out[i].Line != out[j].Line { + return out[i].Line < out[j].Line + } + if out[i].Operator != out[j].Operator { + return out[i].Operator < out[j].Operator + } + return out[i].Description < out[j].Description + }) + return out, nil +} + +// mutantsFor dispatches on the node kind. Nodes that don't match any +// operator return nil — the walker simply moves on. +func mutantsFor(file string, line int, n *sitter.Node, src []byte) []lang.MutantSite { + switch n.Type() { + case "binary_expression": + return binaryMutants(file, line, n, src) + case "boolean_literal": + return boolMutants(file, line, n, src) + case "return_expression": + return returnMutants(file, line, n, src) + case "if_expression": + return ifMutants(file, line, n, src) + case "expression_statement": + return exprStmtMutants(file, line, n, src) + case "call_expression": + if mutants := unwrapMutants(file, line, n, src); len(mutants) > 0 { + return mutants + } + return someCallMutants(file, line, n, src) + case "try_expression": + return tryMutants(file, line, n) + case "scoped_identifier", "identifier": + return nil + } + return nil +} + +// binaryMutants covers conditional_boundary, negate_conditional, and +// math_operator. Shape: (binary_expression operator: "" ...). Skip +// unhandled operators so we don't mutate e.g. bit-shift tokens. +func binaryMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + opNode := n.ChildByFieldName("operator") + if opNode == nil { + return nil + } + op := opNode.Type() + replacements := map[string]string{ + ">": ">=", + "<": "<=", + ">=": ">", + "<=": "<", + "==": "!=", + "!=": "==", + "+": "-", + "-": "+", + "*": "/", + "/": "*", + } + newOp, ok := replacements[op] + if !ok { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("%s -> %s", op, newOp), + Operator: binaryOperatorName(op, newOp), + }} +} + +// binaryOperatorName classifies a source/target operator pair into one of +// the canonical tier-1 operator names. The classification matches the Go +// analyzer so operator stats stay comparable across languages. +func binaryOperatorName(from, to string) string { + if isBoundary(from) || isBoundary(to) { + return "conditional_boundary" + } + if isComparison(from) || isComparison(to) { + return "negate_conditional" + } + if isMath(from) || isMath(to) { + return "math_operator" + } + return "unknown" +} + +func isBoundary(op string) bool { + return op == ">" || op == ">=" || op == "<" || op == "<=" +} + +func isComparison(op string) bool { + return op == "==" || op == "!=" +} + +func isMath(op string) bool { + return op == "+" || op == "-" || op == "*" || op == "/" +} + +// boolMutants flips true <-> false. Tree-sitter exposes boolean literals +// as boolean_literal whose Type() is literally "boolean_literal"; the +// source text is either "true" or "false". +func boolMutants(file string, line int, n *sitter.Node, src []byte) []lang.MutantSite { + text := nodeText(n, src) + if text != "true" && text != "false" { + return nil + } + flipped := "true" + if text == "true" { + flipped = "false" + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("%s -> %s", text, flipped), + Operator: "boolean_substitution", + }} +} + +// returnMutants emits the canonical return_value operator — replace the +// return expression with `Default::default()`. A bare `return;` (unit +// return) has no expression to mutate, so we skip. +// +// `some_to_none` is emitted separately from the Some(x) call site itself +// (see someCallMutants), not here — the operator applies to any Some(x) +// construction, not only those that appear directly in a return. +func returnMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + // A return_expression has at most one named child — the returned value. + if n.NamedChildCount() == 0 { + return nil + } + value := n.NamedChild(0) + if value == nil { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "replace return value with Default::default()", + Operator: "return_value", + }} +} + +// someCallMutants emits the some_to_none operator for any Some(x) call +// expression. The operator applies broadly — any optional constructor +// that tests rely on will be killed if the tests differentiate "value +// present" from "value absent". +// +// Tree-sitter models `Some(x)` as (call_expression function: (identifier +// "Some") arguments: (arguments ...)). +func someCallMutants(file string, line int, n *sitter.Node, src []byte) []lang.MutantSite { + fn := n.ChildByFieldName("function") + if fn == nil || nodeText(fn, src) != "Some" { + return nil + } + args := n.ChildByFieldName("arguments") + if args == nil { + return nil + } + argText := strings.TrimSpace(strings.TrimSuffix( + strings.TrimPrefix(nodeText(args, src), "("), ")")) + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("Some(%s) -> None", argText), + Operator: "some_to_none", + }} +} + +// ifMutants empties an if_expression body (branch_removal). +func ifMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + body := n.ChildByFieldName("consequence") + if body == nil || body.NamedChildCount() == 0 { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "remove if body", + Operator: "branch_removal", + }} +} + +// exprStmtMutants deletes a bare call statement — the Rust analog of the +// Go statement_deletion case. A semicolon-terminated expression whose +// payload is a call_expression is the canonical candidate; other bare +// statements (assignments, let bindings) are left alone because deleting +// them tends to produce un-killable dead-code mutants. +func exprStmtMutants(file string, line int, n *sitter.Node, _ []byte) []lang.MutantSite { + if n.NamedChildCount() == 0 { + return nil + } + payload := n.NamedChild(0) + if payload == nil || payload.Type() != "call_expression" { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "remove call statement", + Operator: "statement_deletion", + }} +} + +// unwrapMutants emits the Rust-specific unwrap_removal operator: a method +// call whose name is `unwrap` or `expect` has its receiver preserved but +// the trailing `.unwrap()` / `.expect(...)` stripped. Tree-sitter exposes +// `foo.unwrap()` as: +// +// (call_expression +// function: (field_expression value: ... field: (field_identifier))) +// +// We look for that shape with field name "unwrap" or "expect". +func unwrapMutants(file string, line int, n *sitter.Node, src []byte) []lang.MutantSite { + fn := n.ChildByFieldName("function") + if fn == nil || fn.Type() != "field_expression" { + return nil + } + field := fn.ChildByFieldName("field") + if field == nil { + return nil + } + name := nodeText(field, src) + if name != "unwrap" && name != "expect" { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: fmt.Sprintf("strip .%s()", name), + Operator: "unwrap_removal", + }} +} + +// tryMutants emits the question_mark_removal operator for try expressions +// (`expr?`). Tree-sitter models `foo()?` as (try_expression ...), making +// detection straightforward. +func tryMutants(file string, line int, n *sitter.Node) []lang.MutantSite { + // A try_expression always has exactly one inner expression; if that's + // missing we have malformed input, so bail. + if n.NamedChildCount() == 0 { + return nil + } + return []lang.MutantSite{{ + File: file, + Line: line, + Description: "strip trailing ?", + Operator: "question_mark_removal", + }} +} diff --git a/internal/lang/rustanalyzer/mutation_generate_test.go b/internal/lang/rustanalyzer/mutation_generate_test.go new file mode 100644 index 0000000..3985aee --- /dev/null +++ b/internal/lang/rustanalyzer/mutation_generate_test.go @@ -0,0 +1,234 @@ +package rustanalyzer + +import ( + "math" + "path/filepath" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// writeAndGenerate is a small harness: write `src` to a temp .rs file, +// generate mutants over the entire file, and return them. +func writeAndGenerate(t *testing.T, src string, disabled map[int]bool) []lang.MutantSite { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + fc := diff.FileChange{ + Path: "a.rs", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: math.MaxInt32}}, + } + mutants, err := mutantGeneratorImpl{}.GenerateMutants(path, fc, disabled) + if err != nil { + t.Fatal(err) + } + return mutants +} + +// collectOps returns the sorted set of operator names from a mutant list. +func collectOps(mutants []lang.MutantSite) map[string]int { + m := map[string]int{} + for _, x := range mutants { + m[x.Operator]++ + } + return m +} + +func TestGenerate_BinaryOps(t *testing.T) { + src := `fn f(x: i32) -> bool { + x > 0 +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + if ops["conditional_boundary"] == 0 { + t.Errorf("expected conditional_boundary mutant, got %v", ops) + } +} + +func TestGenerate_EqualityAndMath(t *testing.T) { + src := `fn g(a: i32, b: i32) -> bool { + a == b +} + +fn h(a: i32, b: i32) -> i32 { + a + b +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + if ops["negate_conditional"] == 0 { + t.Errorf("expected negate_conditional for ==, got %v", ops) + } + if ops["math_operator"] == 0 { + t.Errorf("expected math_operator for +, got %v", ops) + } +} + +func TestGenerate_BooleanLiteral(t *testing.T) { + src := `fn g() -> bool { true } +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["boolean_substitution"] == 0 { + t.Errorf("expected boolean_substitution, got %v", collectOps(m)) + } +} + +func TestGenerate_ReturnValue(t *testing.T) { + src := `fn g() -> i32 { + return 42; +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["return_value"] == 0 { + t.Errorf("expected return_value mutant, got %v", collectOps(m)) + } +} + +func TestGenerate_SomeToNone(t *testing.T) { + src := `fn g(x: i32) -> Option { + return Some(x); +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + if ops["some_to_none"] == 0 { + t.Errorf("expected some_to_none mutant, got %v", ops) + } + // The generator also emits a generic return_value on the same line — + // that's expected. + if ops["return_value"] == 0 { + t.Errorf("expected return_value companion, got %v", ops) + } +} + +func TestGenerate_UnwrapRemoval(t *testing.T) { + src := `fn g(x: Option) -> i32 { + x.unwrap() +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["unwrap_removal"] == 0 { + t.Errorf("expected unwrap_removal mutant, got %v", collectOps(m)) + } +} + +func TestGenerate_ExpectBecomesUnwrapRemoval(t *testing.T) { + src := `fn g(x: Option) -> i32 { + x.expect("boom") +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["unwrap_removal"] == 0 { + t.Errorf("expected unwrap_removal mutant for .expect, got %v", collectOps(m)) + } +} + +func TestGenerate_QuestionMarkRemoval(t *testing.T) { + src := `fn g(x: Result) -> Result { + let v = x?; + Ok(v) +} +` + m := writeAndGenerate(t, src, nil) + if collectOps(m)["question_mark_removal"] == 0 { + t.Errorf("expected question_mark_removal mutant, got %v", collectOps(m)) + } +} + +func TestGenerate_BranchRemovalAndStatementDeletion(t *testing.T) { + // Uses a plain function call (not a macro) for the statement-deletion + // case. Tree-sitter models `println!(...)` as a macro_invocation, so + // we'd miss it; bare `side_effect()` is parsed as a call_expression + // wrapped in an expression_statement, which is what the generator + // looks for. + src := `fn side_effect() {} + +fn g(x: i32) { + if x > 0 { + side_effect(); + } + side_effect(); +} +` + m := writeAndGenerate(t, src, nil) + ops := collectOps(m) + if ops["branch_removal"] == 0 { + t.Errorf("expected branch_removal, got %v", ops) + } + if ops["statement_deletion"] == 0 { + t.Errorf("expected statement_deletion for bare call, got %v", ops) + } +} + +// TestGenerate_RespectsChangedRegion asserts out-of-region mutants are +// dropped. +func TestGenerate_RespectsChangedRegion(t *testing.T) { + src := `fn in_region(x: i32) -> bool { x > 0 } +fn out_of_region(x: i32) -> bool { x > 0 } +` + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + // Region covers only line 1. Line 2's binary_expression should be dropped. + fc := diff.FileChange{ + Path: "a.rs", + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 1}}, + } + mutants, err := mutantGeneratorImpl{}.GenerateMutants(path, fc, nil) + if err != nil { + t.Fatal(err) + } + for _, m := range mutants { + if m.Line != 1 { + t.Errorf("got out-of-region mutant at line %d: %+v", m.Line, m) + } + } +} + +// TestGenerate_RespectsDisabledLines asserts disabledLines suppress +// mutants on those lines. +func TestGenerate_RespectsDisabledLines(t *testing.T) { + src := `fn g(a: i32, b: i32) -> bool { + a > b +} +` + disabled := map[int]bool{2: true} + m := writeAndGenerate(t, src, disabled) + for _, x := range m { + if x.Line == 2 { + t.Errorf("mutant on disabled line 2: %+v", x) + } + } +} + +// TestGenerate_Deterministic asserts repeated calls produce byte-identical +// results. Stable ordering is a critical property for the exit-code gate. +func TestGenerate_Deterministic(t *testing.T) { + src := `fn g(a: i32, b: i32) -> bool { + a > b && b < 10 +} +` + dir := t.TempDir() + path := filepath.Join(dir, "a.rs") + if err := writeFile(path, []byte(src)); err != nil { + t.Fatal(err) + } + fc := diff.FileChange{Path: "a.rs", Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: 100}}} + first, _ := mutantGeneratorImpl{}.GenerateMutants(path, fc, nil) + second, _ := mutantGeneratorImpl{}.GenerateMutants(path, fc, nil) + if len(first) != len(second) { + t.Fatalf("lengths differ: %d vs %d", len(first), len(second)) + } + for i := range first { + if first[i] != second[i] { + t.Errorf("row %d differs: %+v vs %+v", i, first[i], second[i]) + } + } +} diff --git a/internal/lang/rustanalyzer/parse.go b/internal/lang/rustanalyzer/parse.go new file mode 100644 index 0000000..a4eae7e --- /dev/null +++ b/internal/lang/rustanalyzer/parse.go @@ -0,0 +1,108 @@ +// Package rustanalyzer implements the lang.Language interface for Rust. It +// is blank-imported from cmd/diffguard/main.go so Rust gets registered at +// process start. +// +// One file per concern, mirroring the Go analyzer layout: +// - rustanalyzer.go -- Language + init()/Register +// - parse.go -- tree-sitter setup, CST helpers +// - sizes.go -- FunctionExtractor +// - complexity.go -- ComplexityCalculator + ComplexityScorer +// - deps.go -- ImportResolver +// - mutation_generate.go-- MutantGenerator +// - mutation_apply.go -- MutantApplier +// - mutation_annotate.go-- AnnotationScanner +// - testrunner.go -- TestRunner (wraps cargo test) +package rustanalyzer + +import ( + "context" + "os" + "sync" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/rust" +) + +// rustLang is the cached tree-sitter Rust grammar handle. Because building +// the grammar involves cgo bridging, we do it once and reuse the pointer +// rather than paying for it on every parse. Lazy-init keeps process start +// fast — diffguard binaries that never touch a .rs file pay nothing. +var ( + rustLangOnce sync.Once + rustLang *sitter.Language +) + +// rustLanguage returns the tree-sitter Rust grammar, building it on first +// use. The sitter.Language struct is safe to share across goroutines. +func rustLanguage() *sitter.Language { + rustLangOnce.Do(func() { + rustLang = rust.GetLanguage() + }) + return rustLang +} + +// parseFile reads absPath from disk and returns the parsed tree plus the +// source bytes. Callers get back (nil, nil, err) on read error. +func parseFile(absPath string) (*sitter.Tree, []byte, error) { + src, err := os.ReadFile(absPath) + if err != nil { + return nil, nil, err + } + tree, err := parseBytes(src) + if err != nil { + return nil, nil, err + } + return tree, src, nil +} + +// parseBytes returns a *sitter.Tree for src. Unlike sitter.Parse which +// returns only the root node, we return the Tree so callers can hold onto +// it and Close it when done to release the underlying C allocation. +func parseBytes(src []byte) (*sitter.Tree, error) { + parser := sitter.NewParser() + parser.SetLanguage(rustLanguage()) + return parser.ParseCtx(context.Background(), nil, src) +} + +// walk invokes fn on every node in the subtree rooted at n. The walk is a +// plain depth-first pre-order traversal using NamedChildCount/NamedChild — +// matches the style used by the sitter example code and avoids the trickier +// TreeCursor API. Returning false from fn prunes the subtree. +func walk(n *sitter.Node, fn func(*sitter.Node) bool) { + if n == nil { + return + } + if !fn(n) { + return + } + count := int(n.ChildCount()) + for i := 0; i < count; i++ { + walk(n.Child(i), fn) + } +} + +// nodeLine returns the 1-based start line of n. tree-sitter uses 0-based +// coordinates internally; every diffguard interface (FunctionInfo, MutantSite) +// is 1-based, so we convert here once. +func nodeLine(n *sitter.Node) int { + return int(n.StartPoint().Row) + 1 +} + +// nodeEndLine returns the 1-based end line of n (inclusive of the last line +// any part of n occupies). We subtract one when EndPoint is exactly at a +// line boundary (column 0) because tree-sitter reports the position one past +// the last byte — e.g. a function whose closing brace is the last char on +// line 10 has EndPoint at (11, 0). Without the adjustment we'd report end +// lines that disagree with the Go analyzer's behavior. +func nodeEndLine(n *sitter.Node) int { + end := n.EndPoint() + if end.Column == 0 && end.Row > 0 { + return int(end.Row) + } + return int(end.Row) + 1 +} + +// nodeText returns the byte slice of src covering n. +func nodeText(n *sitter.Node, src []byte) string { + return string(src[n.StartByte():n.EndByte()]) +} diff --git a/internal/lang/rustanalyzer/rustanalyzer.go b/internal/lang/rustanalyzer/rustanalyzer.go new file mode 100644 index 0000000..7b514a0 --- /dev/null +++ b/internal/lang/rustanalyzer/rustanalyzer.go @@ -0,0 +1,65 @@ +package rustanalyzer + +import ( + "strings" + "time" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// defaultRustTestTimeout is the per-mutant test timeout applied when the +// caller did not set one in TestRunConfig. Rust `cargo test` cold-starts +// are slow (compile + link per mutant) so the default is generous. +const defaultRustTestTimeout = 120 * time.Second + +// Language is the Rust implementation of lang.Language. Like the Go +// analyzer, it holds no state; sub-component impls are stateless. +type Language struct{} + +// Name returns the canonical language identifier used by the registry and +// by report section suffixes. +func (*Language) Name() string { return "rust" } + +// FileFilter returns the Rust-specific file selection rules used by the +// diff parser: .rs extension; any path segment literally equal to `tests` +// marks the file as an integration test (i.e. excluded from analysis). +func (*Language) FileFilter() lang.FileFilter { + return lang.FileFilter{ + Extensions: []string{".rs"}, + IsTestFile: isRustTestFile, + DiffGlobs: []string{"*.rs"}, + } +} + +// Sub-component accessors. Stateless impls return fresh zero-value structs. +func (*Language) ComplexityCalculator() lang.ComplexityCalculator { return complexityImpl{} } +func (*Language) ComplexityScorer() lang.ComplexityScorer { return complexityImpl{} } +func (*Language) FunctionExtractor() lang.FunctionExtractor { return sizesImpl{} } +func (*Language) ImportResolver() lang.ImportResolver { return depsImpl{} } +func (*Language) MutantGenerator() lang.MutantGenerator { return mutantGeneratorImpl{} } +func (*Language) MutantApplier() lang.MutantApplier { return mutantApplierImpl{} } +func (*Language) AnnotationScanner() lang.AnnotationScanner { return annotationScannerImpl{} } +func (*Language) TestRunner() lang.TestRunner { return newTestRunner() } + +// isRustTestFile reports whether path is a Rust integration test file. The +// design doc settles this: any file whose path contains a `tests` segment +// is treated as a test file. Inline `#[cfg(test)] mod tests { ... }` stays +// ambiguous from path alone — we simply ignore those blocks during analysis +// (they sit inside ordinary source files which are still analyzed). +func isRustTestFile(path string) bool { + // Normalize separators so Windows-style paths behave the same. + segs := strings.Split(strings.ReplaceAll(path, "\\", "/"), "/") + for _, s := range segs { + if s == "tests" { + return true + } + } + return false +} + +// init registers the Rust analyzer. The blank import in cmd/diffguard/main.go +// triggers this; external callers wanting Rust must also blank-import. +func init() { + lang.Register(&Language{}) + lang.RegisterManifest("Cargo.toml", "rust") +} diff --git a/internal/lang/rustanalyzer/rustanalyzer_test.go b/internal/lang/rustanalyzer/rustanalyzer_test.go new file mode 100644 index 0000000..cba5c52 --- /dev/null +++ b/internal/lang/rustanalyzer/rustanalyzer_test.go @@ -0,0 +1,70 @@ +package rustanalyzer + +import ( + "testing" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// TestLanguageRegistration verifies the Rust analyzer registered itself +// and exposes the correct name + file filter. The init() function runs on +// package load so the registry should already contain "rust" by the time +// this test executes. +func TestLanguageRegistration(t *testing.T) { + l, ok := lang.Get("rust") + if !ok { + t.Fatal("rust language not registered") + } + if l.Name() != "rust" { + t.Errorf("Name() = %q, want %q", l.Name(), "rust") + } + ff := l.FileFilter() + if len(ff.Extensions) != 1 || ff.Extensions[0] != ".rs" { + t.Errorf("Extensions = %v, want [.rs]", ff.Extensions) + } + if len(ff.DiffGlobs) != 1 || ff.DiffGlobs[0] != "*.rs" { + t.Errorf("DiffGlobs = %v, want [*.rs]", ff.DiffGlobs) + } +} + +func TestIsRustTestFile(t *testing.T) { + cases := []struct { + path string + want bool + }{ + // Integration tests live under a `tests` directory at any depth. + {"tests/integration.rs", true}, + {"crates/foo/tests/integration.rs", true}, + {"tests/subdir/more.rs", true}, + // Source files never count as tests, even when the path mentions + // the word "test" in a non-segment context. + {"src/lib.rs", false}, + {"src/tester.rs", false}, + {"src/foo/bar.rs", false}, + // Trailing slash variants don't confuse the segment split. + {"src/tests_common.rs", false}, + // Windows separators should behave the same for consistency + // across platforms. + {`tests\integration.rs`, true}, + } + for _, tc := range cases { + got := isRustTestFile(tc.path) + if got != tc.want { + t.Errorf("isRustTestFile(%q) = %v, want %v", tc.path, got, tc.want) + } + } +} + +func TestFileFilterIncludesSource(t *testing.T) { + l, _ := lang.Get("rust") + ff := l.FileFilter() + if !ff.IncludesSource("src/lib.rs") { + t.Error("expected src/lib.rs to be included") + } + if ff.IncludesSource("tests/integration.rs") { + t.Error("expected tests/integration.rs to be excluded") + } + if ff.IncludesSource("build.py") { + t.Error("expected non-.rs files to be excluded") + } +} diff --git a/internal/lang/rustanalyzer/sizes.go b/internal/lang/rustanalyzer/sizes.go new file mode 100644 index 0000000..bf0271d --- /dev/null +++ b/internal/lang/rustanalyzer/sizes.go @@ -0,0 +1,210 @@ +package rustanalyzer + +import ( + "sort" + + sitter "github.com/smacker/go-tree-sitter" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" +) + +// sizesImpl implements lang.FunctionExtractor for Rust via tree-sitter. A +// single walk produces both the per-function sizes and the overall file +// size — the file-size row is cheap to compute from the raw byte buffer so +// we don't bother the CST for that number. +type sizesImpl struct{} + +// ExtractFunctions parses absPath and returns functions overlapping the +// diff's changed regions plus the overall file size. A parse failure is +// treated as "skip this file" to match the Go analyzer's (nil, nil, nil) +// return convention. +func (sizesImpl) ExtractFunctions(absPath string, fc diff.FileChange) ([]lang.FunctionSize, *lang.FileSize, error) { + tree, src, err := parseFile(absPath) + if err != nil { + return nil, nil, nil + } + defer tree.Close() + + fns := collectFunctions(tree.RootNode(), src) + fileSize := &lang.FileSize{Path: fc.Path, Lines: countLines(src)} + + var results []lang.FunctionSize + for _, fn := range fns { + if !fc.OverlapsRange(fn.startLine, fn.endLine) { + continue + } + results = append(results, lang.FunctionSize{ + FunctionInfo: lang.FunctionInfo{ + File: fc.Path, + Line: fn.startLine, + EndLine: fn.endLine, + Name: fn.name, + }, + Lines: fn.endLine - fn.startLine + 1, + }) + } + + // Deterministic order matters for report stability: sort by start line, + // then by name so two functions declared on the same line never flip. + sort.SliceStable(results, func(i, j int) bool { + if results[i].Line != results[j].Line { + return results[i].Line < results[j].Line + } + return results[i].Name < results[j].Name + }) + return results, fileSize, nil +} + +// rustFunction is the internal record produced by the extractor. It's +// deliberately wider than FunctionSize/FunctionComplexity because the +// complexity analyzer needs the node to walk the body; keeping one record +// shape avoids re-parsing or re-walking. +type rustFunction struct { + name string + startLine int + endLine int + body *sitter.Node // the body block, or nil for e.g. trait methods with no default impl + node *sitter.Node // the entire function_item / declaration node +} + +// collectFunctions walks the CST and returns every function_item and every +// method inside an impl_item. Nested functions are reported as separate +// entries to match the spec. Trait default methods are included too — +// their function_item has a body. +// +// Name extraction rules: +// +// fn foo() -> "foo" +// impl Type { fn bar() } -> "Type::bar" +// impl Trait for Type { fn baz() } -> "Type::baz" +// impl Foo { fn qux() } -> "Foo::qux" +// +// The grammar uses a uniform node kind `function_item` for every function +// definition regardless of context; its parent (`declaration_list` of an +// `impl_item`) tells us the receiver type. +func collectFunctions(root *sitter.Node, src []byte) []rustFunction { + var fns []rustFunction + walk(root, func(n *sitter.Node) bool { + if n.Type() != "function_item" { + return true + } + fn := buildRustFunction(n, src) + if fn != nil { + fns = append(fns, *fn) + } + // Keep descending: a function may contain nested closures or + // function items the spec treats as separate entries. + return true + }) + return fns +} + +// buildRustFunction constructs a rustFunction record from a function_item +// node. Returns nil if the name is unparseable. +func buildRustFunction(n *sitter.Node, src []byte) *rustFunction { + nameNode := n.ChildByFieldName("name") + if nameNode == nil { + return nil + } + baseName := nodeText(nameNode, src) + + fullName := baseName + if typeName := enclosingImplType(n, src); typeName != "" { + fullName = typeName + "::" + baseName + } + + body := n.ChildByFieldName("body") + return &rustFunction{ + name: fullName, + startLine: nodeLine(n), + endLine: nodeEndLine(n), + body: body, + node: n, + } +} + +// enclosingImplType walks up parents looking for the closest enclosing +// impl_item and returns its "type" field's text (the `Type` in +// `impl Type { ... }` or `impl Trait for Type { ... }`). If we encounter +// a function_item or closure_expression first, the candidate function is +// nested inside another function and should not inherit an impl prefix — +// it stays a bare standalone name. +// +// Tree-sitter Rust uses the "type" field name for `impl Type` and +// `impl Trait for Type` alike (the trait, when present, lives under the +// "trait" field), so the same lookup works for both forms. +func enclosingImplType(n *sitter.Node, src []byte) string { + for parent := n.Parent(); parent != nil; parent = parent.Parent() { + switch parent.Type() { + case "function_item", "closure_expression": + // Reached a nesting boundary before any impl — the function + // is defined inside another function's body and should not + // carry the outer impl's type prefix. + return "" + case "impl_item": + typeNode := parent.ChildByFieldName("type") + if typeNode == nil { + return "" + } + return simpleTypeName(typeNode, src) + } + } + return "" +} + +// simpleTypeName strips generics and pathing from a type node, returning +// just the trailing identifier (`Foo` from `path::to::Foo`). The +// impl-type field is usually already simple but the grammar allows any +// type expression here, including `generic_type` with a `type_arguments` +// child and `scoped_type_identifier` with a `path::`/`name` pair. +func simpleTypeName(n *sitter.Node, src []byte) string { + switch n.Type() { + case "type_identifier", "primitive_type": + return nodeText(n, src) + case "generic_type": + if inner := n.ChildByFieldName("type"); inner != nil { + return simpleTypeName(inner, src) + } + case "scoped_type_identifier": + if name := n.ChildByFieldName("name"); name != nil { + return nodeText(name, src) + } + case "reference_type": + if inner := n.ChildByFieldName("type"); inner != nil { + return simpleTypeName(inner, src) + } + } + // Fallback: take the last identifier-looking child so unusual shapes + // don't collapse to an empty name. + for i := int(n.ChildCount()) - 1; i >= 0; i-- { + c := n.Child(i) + if c == nil { + continue + } + if c.Type() == "type_identifier" || c.Type() == "identifier" { + return nodeText(c, src) + } + } + return nodeText(n, src) +} + +// countLines returns the number of source lines in src. An empty file is +// 0, a file without a trailing newline still counts its final line, a file +// with a trailing newline counts exactly that many newline-terminated +// lines. +func countLines(src []byte) int { + if len(src) == 0 { + return 0 + } + count := 0 + for _, b := range src { + if b == '\n' { + count++ + } + } + if src[len(src)-1] != '\n' { + count++ + } + return count +} diff --git a/internal/lang/rustanalyzer/sizes_test.go b/internal/lang/rustanalyzer/sizes_test.go new file mode 100644 index 0000000..6b63265 --- /dev/null +++ b/internal/lang/rustanalyzer/sizes_test.go @@ -0,0 +1,163 @@ +package rustanalyzer + +import ( + "math" + "path/filepath" + "sort" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +// fullRegion returns a FileChange covering every line so tests can assert +// against every function in the fixture without threading line numbers. +func fullRegion(path string) diff.FileChange { + return diff.FileChange{ + Path: path, + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: math.MaxInt32}}, + } +} + +func TestExtractFunctions_AllForms(t *testing.T) { + absPath, err := filepath.Abs("testdata/functions.rs") + if err != nil { + t.Fatal(err) + } + s := sizesImpl{} + fns, fsize, err := s.ExtractFunctions(absPath, fullRegion("testdata/functions.rs")) + if err != nil { + t.Fatalf("ExtractFunctions: %v", err) + } + if fsize == nil { + t.Fatal("expected non-nil file size") + } + if fsize.Lines == 0 { + t.Error("file size reports zero lines") + } + + // Collect names and assert the expected set appears. Tolerate order + // by sorting; collectFunctions already sorts by (line, name) but + // asserting on a set is more resilient to minor CST shape changes. + names := make([]string, 0, len(fns)) + for _, fn := range fns { + names = append(names, fn.Name) + } + sort.Strings(names) + + expected := map[string]bool{ + "standalone": false, + "Counter::new": false, + "Counter::increment": false, + "nested_helper": false, // nested fns are separate entries + "Named::name": false, // default (trait-declared) method is not in this fixture + "Counter::name": false, // trait-impl methods attach to the impl type, not the trait + } + for _, name := range names { + if _, ok := expected[name]; ok { + expected[name] = true + } + } + + mustHave := []string{"standalone", "Counter::new", "Counter::increment", "nested_helper", "Counter::name"} + for _, n := range mustHave { + if !expected[n] { + t.Errorf("missing expected function %q (got %v)", n, names) + } + } +} + +func TestExtractFunctions_LineRanges(t *testing.T) { + absPath, _ := filepath.Abs("testdata/functions.rs") + fns, _, err := sizesImpl{}.ExtractFunctions(absPath, fullRegion("testdata/functions.rs")) + if err != nil { + t.Fatal(err) + } + for _, fn := range fns { + if fn.Line <= 0 { + t.Errorf("%s: Line = %d, want > 0 (1-based)", fn.Name, fn.Line) + } + if fn.EndLine < fn.Line { + t.Errorf("%s: EndLine %d < Line %d", fn.Name, fn.EndLine, fn.Line) + } + if fn.Lines != fn.EndLine-fn.Line+1 { + t.Errorf("%s: Lines = %d, want %d", fn.Name, fn.Lines, fn.EndLine-fn.Line+1) + } + } +} + +func TestExtractFunctions_FilterToChangedRegion(t *testing.T) { + absPath, _ := filepath.Abs("testdata/functions.rs") + + // Narrow region that only covers the standalone fn (lines 5-7 in the + // fixture). The impl methods should be filtered out. + fc := diff.FileChange{ + Path: "testdata/functions.rs", + Regions: []diff.ChangedRegion{{StartLine: 5, EndLine: 7}}, + } + fns, _, err := sizesImpl{}.ExtractFunctions(absPath, fc) + if err != nil { + t.Fatal(err) + } + names := []string{} + for _, fn := range fns { + names = append(names, fn.Name) + } + sort.Strings(names) + + // Must contain "standalone" and exclude the impl methods. + foundStandalone := false + for _, n := range names { + if n == "standalone" { + foundStandalone = true + } + if n == "Counter::new" || n == "Counter::name" { + t.Errorf("unexpected function %q in narrow region, got %v", n, names) + } + } + if !foundStandalone { + t.Errorf("expected standalone in narrow region, got %v", names) + } +} + +func TestExtractFunctions_EmptyFile(t *testing.T) { + // Tree-sitter tolerates an empty file and produces an empty source_file + // node — we should return no functions and a 0-line file size. + dir := t.TempDir() + empty := filepath.Join(dir, "empty.rs") + if err := writeFile(empty, []byte("")); err != nil { + t.Fatal(err) + } + fns, fsize, err := sizesImpl{}.ExtractFunctions(empty, fullRegion("empty.rs")) + if err != nil { + t.Fatalf("ExtractFunctions: %v", err) + } + if len(fns) != 0 { + t.Errorf("empty file: got %d fns, want 0", len(fns)) + } + if fsize == nil { + t.Fatal("expected non-nil file size for empty file") + } + if fsize.Lines != 0 { + t.Errorf("empty file: Lines = %d, want 0", fsize.Lines) + } +} + +func TestCountLines(t *testing.T) { + cases := []struct { + in string + want int + }{ + {"", 0}, + {"x", 1}, + {"x\n", 1}, + {"x\ny", 2}, + {"x\ny\n", 2}, + {"\n", 1}, + } + for _, tc := range cases { + got := countLines([]byte(tc.in)) + if got != tc.want { + t.Errorf("countLines(%q) = %d, want %d", tc.in, got, tc.want) + } + } +} diff --git a/internal/lang/rustanalyzer/testdata/complexity.rs b/internal/lang/rustanalyzer/testdata/complexity.rs new file mode 100644 index 0000000..9584a8f --- /dev/null +++ b/internal/lang/rustanalyzer/testdata/complexity.rs @@ -0,0 +1,70 @@ +// Fixture for the cognitive-complexity scorer. Each function below has a +// documented expected score so the test can assert precise numbers. + +// Empty function: no control flow, score 0. +fn empty() {} + +// Single if: +1 base, 0 nesting, 0 logical. +fn one_if(x: i32) -> i32 { + if x > 0 { + 1 + } else { + 0 + } +} + +// match with 3 arms, 2 guarded: +1 for match, +2 for guarded arms. +fn guarded(x: i32) -> i32 { + match x { + n if n > 0 => 1, + n if n < 0 => -1, + _ => 0, + } +} + +// Nested if inside for: for = +1, nested if = +1 base + 1 nesting = +2. +// Total = 3. +fn nested(xs: &[i32]) -> i32 { + let mut n = 0; + for x in xs { + if *x > 0 { + n += 1; + } + } + n +} + +// Logical chain: if +1, &&/|| switch counted. "a && b && c" is a single +// run = +1; "a && b || c" is two runs = +2. This fn has "a && b || c": +// base if = +1, logical = +2, total = 3. +fn logical(a: bool, b: bool, c: bool) -> bool { + if a && b || c { + true + } else { + false + } +} + + +// Simple if let — grammar emits if_expression+let_condition (current) or +// if_let_expression (older). Either way: +1 base, 0 logical ops. Total = 1. +fn if_let_simple(foo: Option) -> i32 { + if let Some(x) = foo { + x + } else { + 0 + } +} + +// unsafe block should NOT count; `?` should NOT count. This fn has: +// one if = +1, one ? = +0, one unsafe = +0. Total = 1. +fn unsafe_and_try(maybe: Option) -> Result { + let v = maybe.ok_or(())?; + if v > 0 { + return Ok(v); + } + unsafe { + let _p: *const i32 = std::ptr::null(); + } + Ok(0) +} diff --git a/internal/lang/rustanalyzer/testdata/functions.rs b/internal/lang/rustanalyzer/testdata/functions.rs new file mode 100644 index 0000000..80e68a0 --- /dev/null +++ b/internal/lang/rustanalyzer/testdata/functions.rs @@ -0,0 +1,35 @@ +// Fixture: a small Rust file covering every function form the extractor +// should handle: standalone fn, inherent method, trait-impl method, and +// nested functions (reported as separate entries). + +fn standalone() -> i32 { + 42 +} + +pub struct Counter { + n: i32, +} + +impl Counter { + pub fn new() -> Self { + Counter { n: 0 } + } + + pub fn increment(&mut self) -> i32 { + fn nested_helper(x: i32) -> i32 { + x + 1 + } + self.n = nested_helper(self.n); + self.n + } +} + +pub trait Named { + fn name(&self) -> &str; +} + +impl Named for Counter { + fn name(&self) -> &str { + "Counter" + } +} diff --git a/internal/lang/rustanalyzer/testrunner.go b/internal/lang/rustanalyzer/testrunner.go new file mode 100644 index 0000000..5b4af13 --- /dev/null +++ b/internal/lang/rustanalyzer/testrunner.go @@ -0,0 +1,147 @@ +package rustanalyzer + +import ( + "bytes" + "context" + "fmt" + "os" + "os/exec" + "sync" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// testRunnerImpl implements lang.TestRunner for Rust using `cargo test`. +// Unlike Go's overlay-based runner, Cargo has no build-time file +// substitution, so we use a temp-copy isolation strategy: +// +// 1. Acquire a per-file mutex so concurrent mutants on the same file +// serialize. Different files run in parallel. +// 2. Back the original up. +// 3. Copy the mutant bytes over the original in place. +// 4. Run `cargo test` with a timeout. +// 5. Restore the original from the backup — always, via defer — even +// if cargo panics or we panic. +type testRunnerImpl struct { + // cmd is the executable to run. Normally "cargo"; tests override this + // with a fake binary that exercises the kill / survive / timeout paths + // without needing a real Cargo toolchain. + cmd string + // extraArgs are prepended before the normal cargo test args. Tests use + // this to swap in a no-op command ("sh -c 'exit 0'") by setting + // cmd="sh" and extraArgs=["-c","..."]. + extraArgs []string + + mu sync.Mutex + locks map[string]*sync.Mutex +} + +// newTestRunner builds a fresh runner. All fields are zero-value except +// the cmd which defaults to "cargo". Tests construct their own via +// newTestRunnerWithCommand. +func newTestRunner() *testRunnerImpl { + return &testRunnerImpl{cmd: "cargo"} +} + +// fileLock returns the per-file mutex for the given path, lazily +// initializing the entry on first access. The outer lock (r.mu) guards +// only the map; the returned mutex is what the caller actually holds +// while mutating the source file. +func (r *testRunnerImpl) fileLock(path string) *sync.Mutex { + r.mu.Lock() + defer r.mu.Unlock() + if r.locks == nil { + r.locks = map[string]*sync.Mutex{} + } + m, ok := r.locks[path] + if !ok { + m = &sync.Mutex{} + r.locks[path] = m + } + return m +} + +// RunTest implements the lang.TestRunner contract. Returning (true, ..., +// nil) signals the mutant was killed (test exit != 0); (false, ..., nil) +// signals survived (tests passed); (false, "", err) signals the runner +// itself couldn't run. +func (r *testRunnerImpl) RunTest(cfg lang.TestRunConfig) (bool, string, error) { + // Per-file serialization: two concurrent mutants on the same file + // would race on the in-place swap below. + lock := r.fileLock(cfg.OriginalFile) + lock.Lock() + defer lock.Unlock() + + mutantBytes, err := os.ReadFile(cfg.MutantFile) + if err != nil { + return false, "", fmt.Errorf("reading mutant file: %w", err) + } + originalBytes, err := os.ReadFile(cfg.OriginalFile) + if err != nil { + return false, "", fmt.Errorf("reading original file: %w", err) + } + + // Defer restore BEFORE writing the mutant so a panic between the + // write and the test run can't leave a corrupt source file behind. + restore := func() { + // Best-effort restore; we don't have a sane way to report an + // error here and the harness is expected to panic-safely run. + _ = os.WriteFile(cfg.OriginalFile, originalBytes, 0644) + } + defer restore() + + if err := os.WriteFile(cfg.OriginalFile, mutantBytes, 0644); err != nil { + return false, "", fmt.Errorf("writing mutant over original: %w", err) + } + + timeout := cfg.Timeout + if timeout <= 0 { + timeout = defaultRustTestTimeout + } + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + args := r.buildArgs(cfg) + cmd := exec.CommandContext(ctx, r.cmd, args...) + cmd.Dir = cfg.RepoPath + cmd.Env = append(os.Environ(), "CARGO_INCREMENTAL=0") + var combined bytes.Buffer + cmd.Stdout = &combined + cmd.Stderr = &combined + + runErr := cmd.Run() + output := combined.String() + + // A timeout is reported as "killed" — the mutant made tests so slow + // they couldn't finish within the allotted window, which is a + // meaningful signal in line with the Go analyzer's treatment. + if ctx.Err() == context.DeadlineExceeded { + return true, output, nil + } + if runErr != nil { + return true, output, nil + } + return false, output, nil +} + +// buildArgs returns the argv after the command name. When the caller +// supplied extraArgs (tests), we honor those; otherwise we build a normal +// `cargo test` invocation with the pattern as a positional filter. +func (r *testRunnerImpl) buildArgs(cfg lang.TestRunConfig) []string { + if len(r.extraArgs) > 0 { + return append([]string(nil), r.extraArgs...) + } + args := []string{"test"} + if cfg.TestPattern != "" { + args = append(args, cfg.TestPattern) + } + return args +} + +// cargoTestArgs is exposed to tests so they can assert the argv we'd send +// to cargo when no overrides are in play. +func cargoTestArgs(cfg lang.TestRunConfig) []string { + r := &testRunnerImpl{} + return r.buildArgs(cfg) +} + diff --git a/internal/lang/rustanalyzer/testrunner_test.go b/internal/lang/rustanalyzer/testrunner_test.go new file mode 100644 index 0000000..69d964a --- /dev/null +++ b/internal/lang/rustanalyzer/testrunner_test.go @@ -0,0 +1,261 @@ +package rustanalyzer + +import ( + "os" + "path/filepath" + "strings" + "sync" + "testing" + "time" + + "github.com/0xPolygon/diffguard/internal/lang" +) + +// fakeRunner returns a runner that invokes `/bin/sh -c