From 8aa0e06c65cf72a4fd31a0273cd85f4d900f4138 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Tue, 5 May 2026 18:10:14 +0300 Subject: [PATCH 1/2] fix PR-E1 follow-up contract and docs Restore analyze_pr risk_score normalization to [0,1], rename call-match ontology constant with alias compatibility, and document deferred Tier 1B behavior updates from the PR-E1 plan. Co-authored-by: Cursor --- README.md | 6 + build_ast_graph.py | 9 +- java_ontology.py | 5 +- pr_analysis.py | 7 +- .../TIER1B-HTTP-ASYNC-EDGES-PROPOSE.md | 1 + server.py | 2 +- tests/test_pr_analysis.py | 130 ++++++++++++++++++ 7 files changed, 152 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 877bdd9..01b9a53 100644 --- a/README.md +++ b/README.md @@ -384,6 +384,12 @@ layer emits method-level outgoing calls, built-in outgoing calls for that same method are replaced (not appended) to avoid double-counting one network call site. +When a brownfield caller override specifies only part of what built-in detection +would produce, missing fields are inherited from the built-in result. Partial +overrides are therefore non-destructive (tightening instead of replacing). To +fully replace the built-in result for a method, supply all relevant fields in +the override; otherwise unspecified fields default to built-in values. + For source stubs, copy `@CodebaseClient` / `@CodebaseClients` and `@CodebaseProducer` / `@CodebaseProducers` from `tests/fixtures/brownfield_client_stubs/` (same "simple-name only" behavior as diff --git a/build_ast_graph.py b/build_ast_graph.py index 2c565a0..93da783 100644 --- a/build_ast_graph.py +++ b/build_ast_graph.py @@ -63,7 +63,7 @@ symbol_id, ) from path_filtering import LayeredIgnore, iter_java_source_files -from java_ontology import VALID_HTTP_CALL_MATCHES +from java_ontology import VALID_CALL_MATCHES log = logging.getLogger(__name__) @@ -1589,6 +1589,7 @@ def _match_call_edge( candidates: list[RouteRow] = [] if call.client_kind == "feign_method": + # Both sides must provide a non-empty Feign name; empty values are unresolved. candidates = [ r for r in routes if r.feign_name and call.feign_target_name and r.feign_name == call.feign_target_name @@ -1640,6 +1641,8 @@ def pass6_match_edges( all_routes = [r for r in tables.routes_rows if r.microservice] member_by_id = {m.node_id: m for m in tables.members} + # Pass 6 is idempotent for full rebuilds: each run fully re-derives match outcomes. + # If incremental rebuild lands later, this reset must remain pass-scoped. tables.call_edge_stats.http_calls_match_breakdown.clear() tables.call_edge_stats.async_calls_match_breakdown.clear() tables.call_edge_stats.cross_service_calls_total = 0 @@ -1674,7 +1677,7 @@ def _micro_factor(member: MemberEntry | None) -> float: end_line=member.decl.end_line if member else 0, ) outcome, candidates = _match_call_edge(call, all_routes, member.microservice if member else "") - if outcome in VALID_HTTP_CALL_MATCHES: + if outcome in VALID_CALL_MATCHES: row.match = outcome if outcome in ("cross_service", "intra_service") and len(candidates) == 1: row.route_id = candidates[0].id @@ -1710,7 +1713,7 @@ def _micro_factor(member: MemberEntry | None) -> float: end_line=member.decl.end_line if member else 0, ) outcome, candidates = _match_call_edge(call, all_routes, member.microservice if member else "") - if outcome in VALID_HTTP_CALL_MATCHES: + if outcome in VALID_CALL_MATCHES: row.match = outcome if outcome in ("cross_service", "intra_service") and len(candidates) == 1: row.route_id = candidates[0].id diff --git a/java_ontology.py b/java_ontology.py index f1a9df6..16fc279 100644 --- a/java_ontology.py +++ b/java_ontology.py @@ -66,13 +66,15 @@ "unresolved", )) -VALID_HTTP_CALL_MATCHES: frozenset[str] = frozenset(( +VALID_CALL_MATCHES: frozenset[str] = frozenset(( "cross_service", "intra_service", "ambiguous", "phantom", "unresolved", )) +# Back-compat alias; retained for one release while callers migrate. +VALID_HTTP_CALL_MATCHES = VALID_CALL_MATCHES __all__ = [ "VALID_ROLES", @@ -82,5 +84,6 @@ "VALID_CLIENT_KINDS", "VALID_HTTP_CALL_STRATEGIES", "VALID_ASYNC_CALL_STRATEGIES", + "VALID_CALL_MATCHES", "VALID_HTTP_CALL_MATCHES", ] diff --git a/pr_analysis.py b/pr_analysis.py index 8055a74..3c812d6 100644 --- a/pr_analysis.py +++ b/pr_analysis.py @@ -380,8 +380,9 @@ def _route_ids_for_symbol(graph: Any, symbol_id: str) -> list[str]: def compute_risk(graph: Any, changed: list[ChangedSymbol]) -> PrRiskReport: """Aggregate blast radius, routes, cross-service callers, and v1 risk score. - Risk score includes a cross-service route-caller bump: +1.0 per caller - (capped at +5.0) across changed methods that expose routes. + Risk score stays in [0, 1]. Cross-service route callers add a bounded + bump (up to +1.0) after normalization so they influence rank while + preserving the public scalar contract. """ notes: list[str] = [] blast_by: dict[str, int] = {} @@ -472,7 +473,7 @@ def _normalize(x: float, ceiling: float) -> float: 5.0, float(sum(c.cross_service_callers_count for c in enriched_changed)), ) - score = max(0.0, raw + cross_service_bonus) + score = max(0.0, min(1.0, raw + (cross_service_bonus / 5.0))) if score < 0.3: band = "low" elif score < 0.7: diff --git a/propose/completed/TIER1B-HTTP-ASYNC-EDGES-PROPOSE.md b/propose/completed/TIER1B-HTTP-ASYNC-EDGES-PROPOSE.md index 51d0b71..36e7227 100644 --- a/propose/completed/TIER1B-HTTP-ASYNC-EDGES-PROPOSE.md +++ b/propose/completed/TIER1B-HTTP-ASYNC-EDGES-PROPOSE.md @@ -123,6 +123,7 @@ to match. The fields are: | Field | Source | Notes | | -------------------- | ------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | | `client_kind` | `feign_method` / `rest_template` / `web_client` / `kafka_send` / `stream_bridge_send` | Picks the matcher branch. | +| `channel` | `"http"` or `"async"` | Durable discriminator used by `_match_call_edge` to choose HTTP vs async matching. | | `feign_target_name` | `@FeignClient(name=…)` on the interface the caller's method belongs to | Resolution: literal → SpEL → constant. Same three-strategy ladder as B2a §4.4.5. | | `path_template_call` | URI argument of `RestTemplate.exchange` etc., curly-collapsed via B2a's normalizer | Re-use B2a's normalizer — do not re-implement. | | `method_call` | `HttpMethod.GET` etc., or extracted from the called function (`getForObject` → `GET`) | `''` means "couldn't determine". | diff --git a/server.py b/server.py index e04f883..46b1a4b 100644 --- a/server.py +++ b/server.py @@ -1268,7 +1268,7 @@ async def impact_analysis( "Map a unified diff to changed indexed symbols and estimate blast radius / risk. " "Pass full unified-diff text (e.g. `git diff` output). Returns JSON-serializable " "risk report: changed_symbols, blast_radius_total, cross_service_callers, " - "routes_touched (Route ids via EXPOSES), risk_score, risk_band, notes. " + "routes_touched (Route ids via EXPOSES), risk_score ([0,1]), risk_band, notes. " "Binary hunks and file renames are skipped for symbol mapping and surfaced in notes." ), ) diff --git a/tests/test_pr_analysis.py b/tests/test_pr_analysis.py index c954a66..21c75fd 100644 --- a/tests/test_pr_analysis.py +++ b/tests/test_pr_analysis.py @@ -166,6 +166,136 @@ def fake_fc(self, name, **kwargs): assert rep.routes_touched +def test_35a_compute_risk_cross_service_bonus_saturates_to_one(monkeypatch) -> None: + from pr_analysis import ChangedSymbol + + class _FakeGraph: + def _rows(self, query, params): + if "MATCH (s:Symbol) WHERE s.id = $id RETURN" in query: + return [{ + "id": params["id"], + "kind": "method", + "name": "handler", + "fqn": "com.acme.Controller#handler()", + "package": "com.acme", + "module": "svc-a", + "microservice": "svc-a", + "filename": "src/main/java/com/acme/Controller.java", + "start_line": 10, + "end_line": 20, + "start_byte": 0, + "end_byte": 0, + "modifiers": [], + "annotations": [], + "capabilities": [], + "role": "CONTROLLER", + "signature": "handler()", + "parent_id": "", + "resolved": True, + }] + if "MATCH (s:Symbol)-[e:HTTP_CALLS|ASYNC_CALLS]->(r:Route {id: $rid})" in query: + return [{"id": str(i)} for i in range(6)] + return [] + + def impact_analysis(self, name, **kwargs): + del name, kwargs + return [] + + def find_callers(self, name, **kwargs): + del name, kwargs + return [] + + monkeypatch.setattr("pr_analysis._route_ids_for_symbol", lambda graph, sid: ["route-1"]) + rep = compute_risk( + _FakeGraph(), + [ + ChangedSymbol( + symbol_id="sym-1", + fqn="com.acme.Controller#handler()", + kind="method", + change_type="modified", + file="src/main/java/com/acme/Controller.java", + hunk_lines=[12], + ), + ], + ) + assert rep.risk_score == 1.0 + + +def test_35b_compute_risk_single_cross_service_bonus_is_point_two(monkeypatch) -> None: + from pr_analysis import ChangedSymbol + + class _FakeGraph: + def __init__(self, *, include_callers: bool) -> None: + self._include_callers = include_callers + + def _rows(self, query, params): + if "MATCH (s:Symbol) WHERE s.id = $id RETURN" in query: + return [{ + "id": params["id"], + "kind": "method", + "name": "handler", + "fqn": "com.acme.Controller#handler()", + "package": "com.acme", + "module": "svc-a", + "microservice": "svc-a", + "filename": "src/main/java/com/acme/Controller.java", + "start_line": 10, + "end_line": 20, + "start_byte": 0, + "end_byte": 0, + "modifiers": [], + "annotations": [], + "capabilities": [], + "role": "CONTROLLER", + "signature": "handler()", + "parent_id": "", + "resolved": True, + }] + if "MATCH (s:Symbol)-[e:HTTP_CALLS|ASYNC_CALLS]->(r:Route {id: $rid})" in query: + if self._include_callers: + return [{"id": "caller-1"}] + return [] + return [] + + def impact_analysis(self, name, **kwargs): + del name, kwargs + return [] + + def find_callers(self, name, **kwargs): + del name, kwargs + return [] + + monkeypatch.setattr("pr_analysis._route_ids_for_symbol", lambda graph, sid: ["route-1"]) + rep = compute_risk( + _FakeGraph(include_callers=True), + [ + ChangedSymbol( + symbol_id="sym-1", + fqn="com.acme.Controller#handler()", + kind="method", + change_type="modified", + file="src/main/java/com/acme/Controller.java", + hunk_lines=[12], + ), + ], + ) + baseline = compute_risk( + _FakeGraph(include_callers=False), + [ + ChangedSymbol( + symbol_id="sym-1", + fqn="com.acme.Controller#handler()", + kind="method", + change_type="modified", + file="src/main/java/com/acme/Controller.java", + hunk_lines=[12], + ), + ], + ) + assert rep.risk_score - baseline.risk_score == 0.2 + + def test_36_removed_symbol_from_minus_only_hunk(kuzu_graph) -> None: diff = """diff --git a/chat-assign/src/main/java/com/bank/chat/assign/service/ChatManagementService.java b/chat-assign/src/main/java/com/bank/chat/assign/service/ChatManagementService.java --- a/chat-assign/src/main/java/com/bank/chat/assign/service/ChatManagementService.java From 1bca06cb76ad21d1d4040c8c0a9b7ae4d6740e28 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Tue, 5 May 2026 18:25:43 +0300 Subject: [PATCH 2/2] polish PR-E1 follow-up docs and test clarity Add a concrete partial-override README example, tighten the risk delta test to isolate raw terms, and annotate pass6 idempotency with Tier-2 incremental-rebuild context. Co-authored-by: Cursor --- README.md | 3 +++ build_ast_graph.py | 2 +- tests/test_pr_analysis.py | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 01b9a53..3997a71 100644 --- a/README.md +++ b/README.md @@ -389,6 +389,9 @@ would produce, missing fields are inherited from the built-in result. Partial overrides are therefore non-destructive (tightening instead of replacing). To fully replace the built-in result for a method, supply all relevant fields in the override; otherwise unspecified fields default to built-in values. +Example: if built-in detection produces `client_kind=rest_template`, `method=GET`, +`path=/users/{id}`, and an override sets only `path=/users/me`, the final call +keeps `client_kind=rest_template` and `method=GET` while changing only the path. For source stubs, copy `@CodebaseClient` / `@CodebaseClients` and `@CodebaseProducer` / `@CodebaseProducers` from diff --git a/build_ast_graph.py b/build_ast_graph.py index 93da783..ed69fe1 100644 --- a/build_ast_graph.py +++ b/build_ast_graph.py @@ -1642,7 +1642,7 @@ def pass6_match_edges( member_by_id = {m.node_id: m for m in tables.members} # Pass 6 is idempotent for full rebuilds: each run fully re-derives match outcomes. - # If incremental rebuild lands later, this reset must remain pass-scoped. + # If incremental rebuild lands later (Tier-2 follow-up), this reset must remain pass-scoped. tables.call_edge_stats.http_calls_match_breakdown.clear() tables.call_edge_stats.async_calls_match_breakdown.clear() tables.call_edge_stats.cross_service_calls_total = 0 diff --git a/tests/test_pr_analysis.py b/tests/test_pr_analysis.py index 21c75fd..372df34 100644 --- a/tests/test_pr_analysis.py +++ b/tests/test_pr_analysis.py @@ -293,7 +293,9 @@ def find_callers(self, name, **kwargs): ), ], ) - assert rep.risk_score - baseline.risk_score == 0.2 + # Keep raw terms identical in both runs; only cross-service route-callers differ. + assert rep.routes_touched == baseline.routes_touched == ["route-1"] + assert abs((rep.risk_score - baseline.risk_score) - 0.2) < 1e-9 def test_36_removed_symbol_from_minus_only_hunk(kuzu_graph) -> None: