From 42ddb4567a6d26d79978db34ceeec5953dce6c57 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Thu, 7 May 2026 20:40:06 +0300 Subject: [PATCH 1/2] accept json-encoded filter strings in mcp search/find/neighbors Co-authored-by: Cursor --- README.md | 8 +++++--- mcp_v2.py | 45 ++++++++++++++++++++++++++++++++++++------ server.py | 22 +++++++++++++++------ tests/test_mcp_v2.py | 47 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 5b6d719..fa36ef5 100644 --- a/README.md +++ b/README.md @@ -154,10 +154,12 @@ The DB is dropped and rebuilt from scratch on each run (Phase 1 is a full rebuil | Tool | Purpose | Args | Example | |------|---------|------|---------| -| `search` | locate nodes by NL/code text | `query: str`, `table: str="java"`, `hybrid: bool=False`, `limit: int=5`, `offset: int=0`, `path_contains: str \| None`, `filter: NodeFilter \| None` | `{"query":"join operator flow","limit":5}` | -| `find` | locate nodes by structured filter | `kind: "symbol"\|"route"\|"client"`, `filter: NodeFilter`, `limit: int=25`, `offset: int=0` | `{"kind":"symbol","filter":{"role":"CONTROLLER"}}` | +| `search` | locate nodes by NL/code text | `query: str`, `table: str="java"`, `hybrid: bool=False`, `limit: int=5`, `offset: int=0`, `path_contains: str \| None`, `filter: NodeFilter \| str \| None` | `{"query":"join operator flow","limit":5}` | +| `find` | locate nodes by structured filter | `kind: "symbol"\|"route"\|"client"`, `filter: NodeFilter \| str`, `limit: int=25`, `offset: int=0` | `{"kind":"symbol","filter":{"role":"CONTROLLER"}}` | | `describe` | full record + edge counts for one node | `id: str` | `{"id":"sym:com.bank.chat.core.api.ChatController#joinOperator(JoinOperatorRequest)"}` | -| `neighbors` | one-hop walk; REQUIRED direction + edge_types | `ids: str \| list[str]`, `direction: "in"\|"out"`, `edge_types: list[str]`, `limit: int=25`, `offset: int=0`, `filter: NodeFilter \| None` | `{"ids":"route:chat-core:POST:/chat/joinOperator","direction":"in","edge_types":["HTTP_CALLS","ASYNC_CALLS"]}` | +| `neighbors` | one-hop walk; REQUIRED direction + edge_types | `ids: str \| list[str]`, `direction: "in"\|"out"`, `edge_types: list[str]`, `limit: int=25`, `offset: int=0`, `filter: NodeFilter \| str \| None` | `{"ids":"route:chat-core:POST:/chat/joinOperator","direction":"in","edge_types":["HTTP_CALLS","ASYNC_CALLS"]}` | + +- `filter` may be passed as a JSON-encoded string for clients that flatten objects in tool calls; it is decoded server-side and validated against `NodeFilter`. ## CLI reference diff --git a/mcp_v2.py b/mcp_v2.py index 9abb420..6fab587 100644 --- a/mcp_v2.py +++ b/mcp_v2.py @@ -51,6 +51,26 @@ class NodeFilter(BaseModel): client_method: str | None = None +def _coerce_filter( + value: NodeFilter | dict[str, Any] | str | None, +) -> NodeFilter | dict[str, Any] | None: + """Normalize MCP tool input: weak clients sometimes pass JSON-encoded strings.""" + if value is None or isinstance(value, NodeFilter): + return value + if isinstance(value, str): + s = value.strip() + if not s: + return None + try: + decoded = json.loads(s) + except json.JSONDecodeError as exc: + raise ValueError(f"filter must be a JSON object; invalid JSON: {exc.msg}") from exc + if not isinstance(decoded, dict): + raise ValueError(f"filter must decode to a JSON object, got {type(decoded).__name__}") + return decoded + return value + + class SearchHit(BaseModel): chunk_id: str symbol_id: str | None = None @@ -322,7 +342,7 @@ def search_v2( limit: int = 5, offset: int = 0, path_contains: str | None = None, - filter: NodeFilter | dict[str, Any] | None = None, + filter: NodeFilter | dict[str, Any] | str | None = None, graph: KuzuGraph | None = None, ) -> SearchOutput: try: @@ -346,7 +366,12 @@ def search_v2( device=device, model=model, ) - nf = NodeFilter.model_validate(filter) if filter is not None and not isinstance(filter, NodeFilter) else filter + raw_filter = _coerce_filter(filter) + nf = ( + NodeFilter.model_validate(raw_filter) + if raw_filter is not None and not isinstance(raw_filter, NodeFilter) + else raw_filter + ) hits: list[SearchHit] = [] for row in rows: if path_contains and path_contains not in str(row.get("filename") or ""): @@ -363,14 +388,17 @@ def search_v2( def find_v2( kind: Literal["symbol", "route", "client"], - filter: NodeFilter | dict[str, Any], + filter: NodeFilter | dict[str, Any] | str, limit: int = 25, offset: int = 0, graph: KuzuGraph | None = None, ) -> FindOutput: try: g = graph or KuzuGraph.get() - nf = NodeFilter.model_validate(filter) + raw_filter = _coerce_filter(filter) + if raw_filter is None: + raw_filter = {} + nf = NodeFilter.model_validate(raw_filter) if not isinstance(raw_filter, NodeFilter) else raw_filter if kind == "symbol": where, params = _symbol_where_from_filter(nf) params["lim"] = int(limit) + int(offset) @@ -434,13 +462,18 @@ def neighbors_v2( edge_types: list[str] = Field(...), limit: int = 25, offset: int = 0, - filter: NodeFilter | dict[str, Any] | None = None, + filter: NodeFilter | dict[str, Any] | str | None = None, graph: Any | None = None, ) -> NeighborsOutput: try: _NEIGHBOR_EDGE_TYPES_ADAPTER.validate_python(edge_types) g = graph or KuzuGraph.get() - nf = NodeFilter.model_validate(filter) if filter is not None and not isinstance(filter, NodeFilter) else filter + raw_filter = _coerce_filter(filter) + nf = ( + NodeFilter.model_validate(raw_filter) + if raw_filter is not None and not isinstance(raw_filter, NodeFilter) + else raw_filter + ) origins = [ids] if isinstance(ids, str) else list(ids) results: list[Edge] = [] for origin_id in origins: diff --git a/server.py b/server.py index e1e99b6..a5a4737 100644 --- a/server.py +++ b/server.py @@ -20,6 +20,7 @@ "Java codebase graph navigator (LanceDB + Kuzu). " "Tools: search (NL/code locate), find (structured NodeFilter), describe (one node + edge counts), " "neighbors (one hop; you MUST pass direction in|out AND edge_types list — no defaults). " + "NodeFilter `filter` arguments may be passed as JSON-encoded strings. " "Edge labels: EXTENDS, IMPLEMENTS, INJECTS, DECLARES, DECLARES_CLIENT, CALLS, EXPOSES, HTTP_CALLS, ASYNC_CALLS. " "Rebuild, meta, tables, diagnose-ignore, analyze-pr: use user-rag CLI — not MCP." ) @@ -290,9 +291,12 @@ async def search( default=None, description="Substring match on file path (pre-filter from index)", ), - filter: dict[str, Any] | None = Field( + filter: dict[str, Any] | str | None = Field( default=None, - description="Optional NodeFilter (symbol-oriented keys) applied to each hit after search", + description=( + "Optional NodeFilter (symbol-oriented keys) applied to each hit after search; " + "a JSON-encoded string is also accepted" + ), ), ) -> mcp_v2.SearchOutput: return await asyncio.to_thread( @@ -310,9 +314,12 @@ async def search( @mcp.tool(name="find", description="locate nodes by structured filter") async def find( kind: Literal["symbol", "route", "client"] = Field(description="symbol | route | client"), - filter: dict[str, Any] = Field( + filter: dict[str, Any] | str = Field( ..., - description="Required NodeFilter object (shared schema; irrelevant keys ignored per kind)", + description=( + "Required NodeFilter object (shared schema; irrelevant keys ignored per kind); " + "a JSON-encoded string is also accepted" + ), ), limit: int = Field(default=25, ge=1, le=500, description="Max nodes to return"), offset: int = Field(default=0, ge=0, le=499, description="Skip this many nodes (pagination)"), @@ -351,9 +358,12 @@ async def neighbors( le=1000, description="Skip this many edges after merge (pagination)", ), - filter: dict[str, Any] | None = Field( + filter: dict[str, Any] | str | None = Field( default=None, - description="Optional NodeFilter applied to the other endpoint of each edge", + description=( + "Optional NodeFilter applied to the other endpoint of each edge; " + "a JSON-encoded string is also accepted" + ), ), ) -> mcp_v2.NeighborsOutput: return await asyncio.to_thread( diff --git a/tests/test_mcp_v2.py b/tests/test_mcp_v2.py index 8348cc1..256f67e 100644 --- a/tests/test_mcp_v2.py +++ b/tests/test_mcp_v2.py @@ -304,3 +304,50 @@ def test_neighbors_empty_edge_types_rejected(kuzu_graph) -> None: mid = _method_id_with_calls(kuzu_graph, "out") with pytest.raises(ValidationError): neighbors_v2(mid, direction="in", edge_types=[], graph=kuzu_graph) + + +def test_search_filter_accepts_json_string(monkeypatch, kuzu_graph) -> None: + monkeypatch.setattr("mcp_v2.run_search", lambda *args, **kwargs: _fake_search_rows()) + want = {"microservice": "chat-assign"} + out_dict = search_v2("ChatService", filter=want, graph=kuzu_graph) + out_str = search_v2("ChatService", filter='{"microservice":"chat-assign"}', graph=kuzu_graph) + assert out_dict.success is True + assert out_str.success is True + assert out_dict.results == out_str.results + + +def test_search_filter_empty_string_treated_as_none(monkeypatch, kuzu_graph) -> None: + monkeypatch.setattr("mcp_v2.run_search", lambda *args, **kwargs: _fake_search_rows()) + baseline = search_v2("ChatService", graph=kuzu_graph) + empty = search_v2("ChatService", filter="", graph=kuzu_graph) + whitespace = search_v2("ChatService", filter=" ", graph=kuzu_graph) + assert baseline.success is True + assert empty.success is True + assert whitespace.success is True + assert baseline.results == empty.results == whitespace.results + + +def test_find_filter_accepts_json_string(kuzu_graph) -> None: + out_dict = find_v2("symbol", {"role": "CONTROLLER"}, graph=kuzu_graph) + out_str = find_v2("symbol", '{"role":"CONTROLLER"}', graph=kuzu_graph) + assert out_dict.success is True + assert out_str.success is True + assert out_dict.results == out_str.results + + +def test_neighbors_filter_accepts_json_string(kuzu_graph) -> None: + mid = _method_id_with_calls(kuzu_graph, "out") + flt = {"role": "SERVICE"} + out_dict = neighbors_v2(mid, direction="out", edge_types=["CALLS"], filter=flt, graph=kuzu_graph) + out_str = neighbors_v2(mid, direction="out", edge_types=["CALLS"], filter='{"role":"SERVICE"}', graph=kuzu_graph) + assert out_dict.success is True + assert out_str.success is True + assert out_dict.results == out_str.results + + +def test_filter_invalid_json_returns_failure(monkeypatch, kuzu_graph) -> None: + monkeypatch.setattr("mcp_v2.run_search", lambda *args, **kwargs: _fake_search_rows()) + out = search_v2("ChatService", filter="{not json", graph=kuzu_graph) + assert out.success is False + assert out.message is not None + assert "JSON" in out.message From 71d84f2d5ae4f7431c5db743959e45be7ed95046 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Thu, 7 May 2026 20:49:36 +0300 Subject: [PATCH 2/2] address pr review: prefer-object copy, json null filter, docs and tests Co-authored-by: Cursor --- README.md | 3 ++- mcp_v2.py | 2 ++ server.py | 14 +++++++------- tests/test_mcp_v2.py | 17 +++++++++++++++++ 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index fa36ef5..6c51358 100644 --- a/README.md +++ b/README.md @@ -159,7 +159,8 @@ The DB is dropped and rebuilt from scratch on each run (Phase 1 is a full rebuil | `describe` | full record + edge counts for one node | `id: str` | `{"id":"sym:com.bank.chat.core.api.ChatController#joinOperator(JoinOperatorRequest)"}` | | `neighbors` | one-hop walk; REQUIRED direction + edge_types | `ids: str \| list[str]`, `direction: "in"\|"out"`, `edge_types: list[str]`, `limit: int=25`, `offset: int=0`, `filter: NodeFilter \| str \| None` | `{"ids":"route:chat-core:POST:/chat/joinOperator","direction":"in","edge_types":["HTTP_CALLS","ASYNC_CALLS"]}` | -- `filter` may be passed as a JSON-encoded string for clients that flatten objects in tool calls; it is decoded server-side and validated against `NodeFilter`. +- `filter` is a JSON object matching the `NodeFilter` schema (wire types are `object` or, as a fallback, a JSON-encoded string for clients that flatten objects in tool calls). Prefer the object form when the client supports it. +- For `find`, an empty or whitespace-only filter string, or the JSON literal `null`, is treated like `{}` (match anything). ## CLI reference diff --git a/mcp_v2.py b/mcp_v2.py index 6fab587..0b7f13d 100644 --- a/mcp_v2.py +++ b/mcp_v2.py @@ -65,6 +65,8 @@ def _coerce_filter( decoded = json.loads(s) except json.JSONDecodeError as exc: raise ValueError(f"filter must be a JSON object; invalid JSON: {exc.msg}") from exc + if decoded is None: + return None if not isinstance(decoded, dict): raise ValueError(f"filter must decode to a JSON object, got {type(decoded).__name__}") return decoded diff --git a/server.py b/server.py index a5a4737..fbadf01 100644 --- a/server.py +++ b/server.py @@ -20,7 +20,7 @@ "Java codebase graph navigator (LanceDB + Kuzu). " "Tools: search (NL/code locate), find (structured NodeFilter), describe (one node + edge counts), " "neighbors (one hop; you MUST pass direction in|out AND edge_types list — no defaults). " - "NodeFilter `filter` arguments may be passed as JSON-encoded strings. " + "NodeFilter `filter` is a JSON object (preferred); a JSON-encoded string is also accepted as a fallback. " "Edge labels: EXTENDS, IMPLEMENTS, INJECTS, DECLARES, DECLARES_CLIENT, CALLS, EXPOSES, HTTP_CALLS, ASYNC_CALLS. " "Rebuild, meta, tables, diagnose-ignore, analyze-pr: use user-rag CLI — not MCP." ) @@ -294,8 +294,8 @@ async def search( filter: dict[str, Any] | str | None = Field( default=None, description=( - "Optional NodeFilter (symbol-oriented keys) applied to each hit after search; " - "a JSON-encoded string is also accepted" + "Optional NodeFilter (symbol-oriented keys) applied to each hit after search. " + "Prefer a JSON object; a JSON-encoded string is accepted as a fallback." ), ), ) -> mcp_v2.SearchOutput: @@ -317,8 +317,8 @@ async def find( filter: dict[str, Any] | str = Field( ..., description=( - "Required NodeFilter object (shared schema; irrelevant keys ignored per kind); " - "a JSON-encoded string is also accepted" + "Required NodeFilter (shared schema; irrelevant keys ignored per kind). " + "Prefer a JSON object; a JSON-encoded string is accepted as a fallback." ), ), limit: int = Field(default=25, ge=1, le=500, description="Max nodes to return"), @@ -361,8 +361,8 @@ async def neighbors( filter: dict[str, Any] | str | None = Field( default=None, description=( - "Optional NodeFilter applied to the other endpoint of each edge; " - "a JSON-encoded string is also accepted" + "Optional NodeFilter applied to the other endpoint of each edge. " + "Prefer a JSON object; a JSON-encoded string is accepted as a fallback." ), ), ) -> mcp_v2.NeighborsOutput: diff --git a/tests/test_mcp_v2.py b/tests/test_mcp_v2.py index 256f67e..f5bbb6b 100644 --- a/tests/test_mcp_v2.py +++ b/tests/test_mcp_v2.py @@ -327,6 +327,23 @@ def test_search_filter_empty_string_treated_as_none(monkeypatch, kuzu_graph) -> assert baseline.results == empty.results == whitespace.results +def test_search_filter_json_null_treated_as_none(monkeypatch, kuzu_graph) -> None: + monkeypatch.setattr("mcp_v2.run_search", lambda *args, **kwargs: _fake_search_rows()) + baseline = search_v2("ChatService", graph=kuzu_graph) + out = search_v2("ChatService", filter="null", graph=kuzu_graph) + assert baseline.success is True + assert out.success is True + assert baseline.results == out.results + + +def test_find_filter_json_null_treated_as_empty_filter(kuzu_graph) -> None: + empty = find_v2("symbol", {}, graph=kuzu_graph) + out = find_v2("symbol", "null", graph=kuzu_graph) + assert empty.success is True + assert out.success is True + assert empty.results == out.results + + def test_find_filter_accepts_json_string(kuzu_graph) -> None: out_dict = find_v2("symbol", {"role": "CONTROLLER"}, graph=kuzu_graph) out_str = find_v2("symbol", '{"role":"CONTROLLER"}', graph=kuzu_graph)