Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions build_ast_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ class CallResolutionStats:
phantom_chained: int = 0
phantom_other: int = 0
callee_unresolved: int = 0
skipped_cross_service: int = 0


@dataclass
Expand Down Expand Up @@ -252,6 +253,7 @@ class GraphTables:
methods_by_type: dict[str, list[MemberEntry]] = field(default_factory=dict)
parse_errors: int = 0
skipped_files: int = 0
pass3_skipped_cross_service: int = 0


# ---------- file walk (see `path_filtering.iter_java_source_files`) ----------
Expand Down Expand Up @@ -1042,6 +1044,7 @@ def _resolve_and_emit_call(
recv_type, strat, conf = _resolve_receiver_type(call, scope=scope, member=member, ast=ast, tables=tables)

if strat == "chained_receiver":
# Chained-receiver phantoms have no microservice attribution, so they cannot violate cross-service CALLS invariants.
pid = _phantom_method_id(
tables, receiver_fqn=None, receiver_expr=call.receiver_expr,
callee=call.callee_simple, arg_count=call.arg_count,
Expand All @@ -1053,6 +1056,7 @@ def _resolve_and_emit_call(
return

if recv_type is None:
# Unresolved-receiver phantoms also carry empty microservice attribution.
pid = _phantom_method_id(
tables, receiver_fqn=None, receiver_expr=call.receiver_expr,
callee=call.callee_simple, arg_count=call.arg_count,
Expand All @@ -1067,6 +1071,22 @@ def _resolve_and_emit_call(
recv_type, call.callee_simple, call.arg_count, tables, ast,
)

# Guard relies on `_lookup_method_candidates` returning a same-ms candidate when one exists; revisit if pass3 scopes lookups per-microservice.
if member.microservice:
same_ms = [c for c in candidates if c.microservice == member.microservice]
if same_ms and len(same_ms) != len(candidates):
for c in candidates:
if c.microservice and c.microservice != member.microservice:
log.warning(
"skipping cross-microservice CALLS edge %s -> %s "
"(caller=%s, callee=%s)",
f"{member.parent_fqn}#{member.decl.signature}",
f"{c.parent_fqn}#{c.decl.signature}",
member.microservice, c.microservice,
)
stats.skipped_cross_service += 1
candidates = same_ms

# Compute the call-shape strategy / confidence override BEFORE the
# empty-candidates check so they are preserved even when the callee cannot
# be located on the resolved receiver type (B3 fix).
Expand Down Expand Up @@ -1106,11 +1126,12 @@ def _resolve_and_emit_call(
return

if len(candidates) == 1:
candidate = candidates[0]
ref_arity: int | None = None
if call.arg_count < 0:
ref_arity = len(candidates[0].decl.parameters)
ref_arity = len(candidate.decl.parameters)
_emit_call_edge(
tables, stats, src_id=member.node_id, dst_id=candidates[0].node_id, call=call,
tables, stats, src_id=member.node_id, dst_id=candidate.node_id, call=call,
confidence=edge_conf, strategy=edge_strat, resolved=True,
edge_arg_count=ref_arity,
)
Expand Down Expand Up @@ -1165,11 +1186,13 @@ def pass3_calls(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
pct_chained = 100.0 * stats.phantom_chained / max(1, stats.total)
pct_callee_unres = 100.0 * stats.callee_unresolved / max(1, stats.total)
pct_phantom_recv = 100.0 * stats.phantom_other / max(1, stats.total)
tables.pass3_skipped_cross_service = int(stats.skipped_cross_service)
msg = (
f"Call resolution: {stats.total} sites, {stats.phantom_chained} chained phantoms "
f"({pct_chained:.1f}%), {stats.callee_unresolved} unresolved callee "
f"({pct_callee_unres:.1f}%), {stats.phantom_other} phantom receiver "
f"({pct_phantom_recv:.1f}%), strategies: {dict(stats.by_strategy)}"
f"({pct_phantom_recv:.1f}%), {stats.skipped_cross_service} skipped cross-service, "
f"strategies: {dict(stats.by_strategy)}"
)
log.info(msg)
if verbose:
Expand Down Expand Up @@ -1781,7 +1804,8 @@ def _micro_factor(member: MemberEntry | None) -> float:
"async_producers_from_brownfield_pct DOUBLE, "
"http_calls_match_breakdown STRING, "
"async_calls_match_breakdown STRING, "
"cross_service_calls_total INT64"
"cross_service_calls_total INT64, "
"pass3_skipped_cross_service INT64"
")"
)

Expand Down Expand Up @@ -2158,7 +2182,8 @@ def _write_meta(conn: kuzu.Connection, tables: GraphTables, source_root: Path) -
"async_producers_from_brownfield_pct: $async_producers_from_brownfield_pct, "
"http_calls_match_breakdown: $http_calls_match_breakdown, "
"async_calls_match_breakdown: $async_calls_match_breakdown, "
"cross_service_calls_total: $cross_service_calls_total})",
"cross_service_calls_total: $cross_service_calls_total, "
"pass3_skipped_cross_service: $pass3_skipped_cross_service})",
{
"k": "graph",
"ov": ONTOLOGY_VERSION,
Expand All @@ -2183,6 +2208,7 @@ def _write_meta(conn: kuzu.Connection, tables: GraphTables, source_root: Path) -
"http_calls_match_breakdown": json.dumps(http_match),
"async_calls_match_breakdown": json.dumps(async_match),
"cross_service_calls_total": int(call_stats.cross_service_calls_total),
"pass3_skipped_cross_service": int(tables.pass3_skipped_cross_service),
},
)

Expand Down
46 changes: 36 additions & 10 deletions kuzu_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,26 @@ def _rows(self, query: str, params: dict[str, Any] | None = None) -> list[dict[s
# ---- meta ----

def meta(self) -> dict[str, Any]:
_META_FULL = (
_META_PR_E3 = (
"MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
"m.built_at AS built_at, m.source_root AS source_root, "
"m.counts_json AS counts_json, m.parse_errors AS parse_errors, "
"m.routes_total AS routes_total, m.exposes_total AS exposes_total, "
"m.routes_by_framework AS routes_by_framework, "
"m.routes_resolved_pct AS routes_resolved_pct, "
"m.routes_from_brownfield_pct AS routes_from_brownfield_pct, "
"m.routes_by_layer AS routes_by_layer, "
"m.http_calls_total AS http_calls_total, m.async_calls_total AS async_calls_total, "
"m.http_calls_by_strategy AS http_calls_by_strategy, m.async_calls_by_strategy AS async_calls_by_strategy, "
"m.http_calls_resolved_pct AS http_calls_resolved_pct, m.async_calls_resolved_pct AS async_calls_resolved_pct, "
"m.http_clients_from_brownfield_pct AS http_clients_from_brownfield_pct, "
"m.async_producers_from_brownfield_pct AS async_producers_from_brownfield_pct, "
"m.http_calls_match_breakdown AS http_calls_match_breakdown, "
"m.async_calls_match_breakdown AS async_calls_match_breakdown, "
"m.cross_service_calls_total AS cross_service_calls_total, "
"m.pass3_skipped_cross_service AS pass3_skipped_cross_service"
)
_META_PRE_E3 = (
"MATCH (m:GraphMeta) RETURN m.key AS key, m.ontology_version AS ontology_version, "
"m.built_at AS built_at, m.source_root AS source_root, "
"m.counts_json AS counts_json, m.parse_errors AS parse_errors, "
Expand Down Expand Up @@ -361,19 +380,23 @@ def meta(self) -> dict[str, Any]:
"m.counts_json AS counts_json, m.parse_errors AS parse_errors"
)
rows: list[dict[str, Any]]
meta_mode = "full"
meta_mode = "pr_e3"
try:
rows = self._rows(_META_FULL)
rows = self._rows(_META_PR_E3)
except Exception:
meta_mode = "pr_a2"
meta_mode = "pre_e3"
try:
rows = self._rows(_META_PR_A2)
rows = self._rows(_META_PRE_E3)
except Exception:
meta_mode = "legacy"
meta_mode = "pr_a2"
try:
rows = self._rows(_META_LEGACY)
except Exception as e:
return {"error": f"{e}"}
rows = self._rows(_META_PR_A2)
except Exception:
meta_mode = "legacy"
try:
rows = self._rows(_META_LEGACY)
except Exception as e:
return {"error": f"{e}"}
if not rows:
return {"error": "no GraphMeta node"}
row = rows[0]
Expand All @@ -398,6 +421,7 @@ def meta(self) -> dict[str, Any]:
http_calls_match_breakdown: dict[str, Any] = {}
async_calls_match_breakdown: dict[str, Any] = {}
cross_service_calls_total = 0
pass3_skipped_cross_service = 0
if meta_mode != "legacy":
rfw_raw = row.get("routes_by_framework") or "{}"
try:
Expand All @@ -409,7 +433,7 @@ def meta(self) -> dict[str, Any]:
routes_total = int(row.get("routes_total") or 0)
exposes_total = int(row.get("exposes_total") or 0)
routes_resolved_pct = float(row.get("routes_resolved_pct") or 0.0)
if meta_mode == "full":
if meta_mode in ("pr_e3", "pre_e3"):
routes_from_brownfield_pct = float(row.get("routes_from_brownfield_pct") or 0.0)
rbl_raw = row.get("routes_by_layer") or "{}"
try:
Expand Down Expand Up @@ -453,6 +477,7 @@ def meta(self) -> dict[str, Any]:
if not isinstance(async_calls_match_breakdown, dict):
async_calls_match_breakdown = {}
cross_service_calls_total = int(row.get("cross_service_calls_total") or 0)
pass3_skipped_cross_service = int(row.get("pass3_skipped_cross_service") or 0)
return {
"ontology_version": int(row.get("ontology_version") or 0),
"built_at": int(row.get("built_at") or 0),
Expand All @@ -476,6 +501,7 @@ def meta(self) -> dict[str, Any]:
"http_calls_match_breakdown": http_calls_match_breakdown,
"async_calls_match_breakdown": async_calls_match_breakdown,
"cross_service_calls_total": cross_service_calls_total,
"pass3_skipped_cross_service": pass3_skipped_cross_service,
"db_path": self.db_path,
}

Expand Down
6 changes: 4 additions & 2 deletions plans/PLAN-POST-TIER1B-FOLLOWUPS.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Links go to the review comment so the original context survives.
| 5 | [PR-D2 #13](https://github.com/HumanBean17/java-enterprise-codebase-rag/pull/13#issuecomment-4378995637) post-D3 follow-up 1 | README: document `anchor`-fills-from-builtin behaviour for partial brownfield overrides | low (doc) |
| 6 | [PR-D2 #13](https://github.com/HumanBean17/java-enterprise-codebase-rag/pull/13#issuecomment-4378995637) post-D3 follow-up 2 | Proposal §6: add `channel` field to the `OutgoingCallDecl` schema sketch as durable | low (doc) |
| 7 | [PR-D1 #12](https://github.com/HumanBean17/java-enterprise-codebase-rag/pull/12#issuecomment-4378723605) obs 2 | Second copy of strategy ladder still in `graph_enrich.py:720-724` (annotation/spel/constant_ref) — known consolidation candidate | medium (tech debt) |
| 8 | [PR-E1 #19 review reply](https://github.com/HumanBean17/java-enterprise-codebase-rag/pull/19#issuecomment-4380659734) | `pass3_calls` doesn't enforce the intra-JVM invariant for `CALLS` edges. Today no cross-microservice CALLS edge is emitted on any fixture (verified on `cross_service_smoke`: 9 CALLS edges, 0 cross), but the cleanliness is incidental — no `caller.microservice == callee.microservice` guard exists. FQN collisions across services or brownfield supertype overrides could in principle break the invariant silently. | low-to-medium (invariant) |
| 8 | [PR-E1 #19 review reply](https://github.com/HumanBean17/java-enterprise-codebase-rag/pull/19#issuecomment-4380659734) | `pass3_calls` doesn't enforce the intra-JVM invariant for `CALLS` edges. Today no cross-microservice CALLS edge is emitted on any fixture (verified on `cross_service_smoke`: 9 CALLS edges, 0 cross), but the cleanliness is incidental — no `caller.microservice == callee.microservice` guard exists. FQN collisions across services or brownfield supertype overrides could in principle break the invariant silently. **✅ shipped in PR-E3** | low-to-medium (invariant) |

## Recommended PR boundaries

Expand Down Expand Up @@ -313,7 +313,9 @@ is surgical, not over-eager.
(verified by extending one existing graph_meta assertion in
`tests/test_kuzu_meta.py` or equivalent).
5. Pass3 verbose log mentions the new counter.
6. `260+` (current baseline + 1 new test) tests still pass.
6. `260+` (current baseline + 1 new test) tests still pass.

Status: shipped as PR-E3 in [#22](https://github.com/HumanBean17/java-enterprise-codebase-rag/pull/22).

### Risk

Expand Down
8 changes: 8 additions & 0 deletions tests/fixtures/fqn_collision_smoke/svc-x/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>smoke</groupId>
<artifactId>svc-x</artifactId>
<version>1.0.0</version>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package com.example;

public class Caller {
public void run() {
new SharedDto().process();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package com.example;

public class SharedDto {
public void process() {
}
}
8 changes: 8 additions & 0 deletions tests/fixtures/fqn_collision_smoke/svc-y/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>smoke</groupId>
<artifactId>svc-y</artifactId>
<version>1.0.0</version>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package com.example;

public class SharedDto {
public void process() {
}
}
53 changes: 53 additions & 0 deletions tests/test_call_invariant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from __future__ import annotations

from pathlib import Path

import kuzu

from build_ast_graph import GraphTables, pass1_parse, pass2_edges, pass3_calls, write_kuzu
from kuzu_queries import KuzuGraph


_FQN_COLLISION_FIXTURE = Path(__file__).resolve().parent / "fixtures" / "fqn_collision_smoke"
_CROSS_SERVICE_FIXTURE = Path(__file__).resolve().parent / "fixtures" / "cross_service_smoke"


def _build(root: Path, db_path: Path) -> None:
"""Build through pass3 only (no routes); sufficient for `pass3_skipped_cross_service` assertions."""
tables = GraphTables()
asts = pass1_parse(root, tables, verbose=False)
pass2_edges(tables, asts, verbose=False)
pass3_calls(tables, asts, verbose=False)
write_kuzu(db_path, tables, source_root=root, verbose=False)


def _scalar(db_path: Path, query: str) -> int:
conn = kuzu.Connection(kuzu.Database(str(db_path), read_only=True))
r = conn.execute(query)
return int(r.get_next()[0] or 0) if r.has_next() else 0


def test_call_invariant_blocks_cross_microservice_edges(tmp_path: Path) -> None:
db = tmp_path / "fqn_collision.kuzu"
_build(_FQN_COLLISION_FIXTURE, db)
cross_calls = _scalar(
db,
"MATCH (a:Symbol)-[:CALLS]->(b:Symbol) "
"WHERE a.microservice <> '' AND b.microservice <> '' "
"AND a.microservice <> b.microservice "
"RETURN count(*)",
)
assert cross_calls == 0
assert KuzuGraph(str(db)).meta()["pass3_skipped_cross_service"] >= 1


def test_call_invariant_inert_on_clean_fixtures(tmp_path: Path) -> None:
db = tmp_path / "cross_service_smoke.kuzu"
_build(_CROSS_SERVICE_FIXTURE, db)
assert KuzuGraph(str(db)).meta()["pass3_skipped_cross_service"] == 0


def test_call_invariant_inert_on_bank_chat_system(tmp_path: Path, corpus_root: Path) -> None:
db = tmp_path / "bank_chat.kuzu"
_build(corpus_root, db)
assert KuzuGraph(str(db)).meta()["pass3_skipped_cross_service"] == 0