From 921dccdacff4f987bfa86674d77a44d1926b604b Mon Sep 17 00:00:00 2001 From: Dvir Dukhan Date: Wed, 27 May 2026 12:43:52 +0300 Subject: [PATCH] feat(graph): per-branch graph identity (T17 #651) Refactor FalkorDB graph naming so each (project, branch) pair gets its own graph: 'code:{project}:{branch}'. This lets concurrent agents working on different branches of the same repo index in parallel without overwriting each other. Changes: - api/graph.py: add DEFAULT_BRANCH, compose_graph_name(), parse_graph_name(); Graph and AsyncGraphQuery constructors now accept (name, branch=None); Graph.from_raw_name() classmethod for internal callers that need to bypass composition (e.g. clone()); get_repos()/async_get_repos() now return {project, branch, graph} dicts. - api/info.py: branch-aware Redis hash keys ('{repo}:{branch}_info'); reads fall back to legacy '{repo}_info' for un-migrated graphs. - api/git_utils: GitRepoName() and switch_commit() thread branch through; LegacyGitRepoName() retained for the migration helper. - api/project.py: detect_branch() via 'git rev-parse --abbrev-ref HEAD'; Project.__init__ / from_git_repository / from_local_repository accept branch. - api/index.py: all Pydantic request models gain 'branch: Optional[str]'; endpoints thread it into AsyncGraphQuery + info functions; responses include 'branch'. - api/cli.py: --branch flag on index / index-repo / search / neighbors / paths / info; new 'cgraph migrate' command. - api/migrations/per_branch.py (NEW): idempotent migration that renames legacy '' graphs to 'code::_default', '{}_info' Redis keys to '{}:_default_info', and '{}_git' graphs to '{}:_default_git'. Supports --dry-run. Tests: - tests/test_per_branch_graphs.py (NEW): 24 unit tests covering compose/parse helpers, Graph constructor branch awareness, AsyncGraphQuery, info-key shape, GitRepoName shape, and migration idempotency (with mocked FalkorDB). - tests/test_async_graph.py, tests/test_cli.py, tests/endpoints/test_list_repos.py: updated assertions for the new dict return shape from get_repos / async_get_repos. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/source_analyzer.py | 9 +- api/auto_complete.py | 10 +- api/cli.py | 86 ++++++++-- api/code_coverage/lcov/lcov.py | 6 +- api/git_utils/git_utils.py | 45 +++-- api/graph.py | 174 ++++++++++++++++++-- api/index.py | 67 ++++---- api/info.py | 72 ++++++-- api/migrations/__init__.py | 4 + api/migrations/per_branch.py | 151 +++++++++++++++++ api/project.py | 54 ++++-- pyproject.toml | 6 + tests/endpoints/test_list_repos.py | 4 +- tests/test_async_graph.py | 7 +- tests/test_cli.py | 4 +- tests/test_per_branch_graphs.py | 256 +++++++++++++++++++++++++++++ uv.lock | 35 ++++ 17 files changed, 872 insertions(+), 118 deletions(-) create mode 100644 api/migrations/__init__.py create mode 100644 api/migrations/per_branch.py create mode 100644 tests/test_per_branch_graphs.py diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 9046abcf..e500caea 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -208,21 +208,26 @@ def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]] logging.info("Done analyzing path") - def analyze_local_repository(self, path: str, ignore: Optional[list[str]] = None) -> Graph: + def analyze_local_repository(self, path: str, ignore: Optional[list[str]] = None, branch: Optional[str] = None) -> Graph: """ Analyze a local Git repository. Args: path (str): Path to a local git repository ignore (List(str)): List of paths to skip + branch (Optional[str]): Branch name. Auto-detected from the + checkout when ``None``. """ if ignore is None: ignore = [] from pygit2.repository import Repository + from ..project import detect_branch proj_name = Path(path).name - graph = Graph(proj_name) + if branch is None: + branch = detect_branch(Path(path)) + graph = Graph(proj_name, branch=branch) self.analyze_local_folder(path, graph, ignore) # Save processed commit hash to the DB diff --git a/api/auto_complete.py b/api/auto_complete.py index deebc827..8c69bc8b 100644 --- a/api/auto_complete.py +++ b/api/auto_complete.py @@ -1,15 +1,17 @@ +from typing import Optional + from .graph import Graph, AsyncGraphQuery -def prefix_search(repo: str, prefix: str) -> str: +def prefix_search(repo: str, prefix: str, branch: Optional[str] = None) -> str: """ Returns a list of all entities in the repository that start with the given prefix. """ - g = Graph(repo) + g = Graph(repo, branch=branch) return g.prefix_search(prefix) -async def async_prefix_search(repo: str, prefix: str) -> list: +async def async_prefix_search(repo: str, prefix: str, branch: Optional[str] = None) -> list: """Async version of prefix_search using AsyncGraphQuery.""" - g = AsyncGraphQuery(repo) + g = AsyncGraphQuery(repo, branch=branch) try: return await g.prefix_search(prefix) finally: diff --git a/api/cli.py b/api/cli.py index 960f8f39..bf6d9d46 100644 --- a/api/cli.py +++ b/api/cli.py @@ -172,6 +172,9 @@ def index( repo: Optional[str] = typer.Option( None, "--repo", help="Graph name (defaults to folder name)" ), + branch: Optional[str] = typer.Option( + None, "--branch", help="Branch to associate with this index (auto-detected from git checkout when omitted; '_default' for non-git paths)" + ), ) -> None: """Index a local folder into the knowledge graph.""" from .project import Project @@ -204,14 +207,14 @@ def index( _stderr(f"Indexing {folder} as '{name}'…") try: - project = Project(name, folder, url) + project = Project(name, folder, url, branch=branch) graph = project.analyze_sources(ignore=list(ignore) if ignore else []) stats = graph.stats() except Exception as e: _json_error(str(e)) - _stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges") - _json_out({"status": "ok", "repo": name, **stats}) + _stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges (branch={project.branch})") + _json_out({"status": "ok", "repo": name, "branch": project.branch, **stats}) # ── index-repo ───────────────────────────────────────────────────────── @@ -223,6 +226,9 @@ def index_repo( ignore: Optional[List[str]] = typer.Option( None, "--ignore", help="Directories to ignore (repeatable)" ), + branch: Optional[str] = typer.Option( + None, "--branch", help="Branch to associate with this index (auto-detected from the cloned checkout when omitted)" + ), ) -> None: """Clone a git repository and index it into the knowledge graph.""" from .project import Project @@ -233,14 +239,14 @@ def index_repo( import io import contextlib with contextlib.redirect_stdout(io.StringIO()): - project = Project.from_git_repository(url) + project = Project.from_git_repository(url, branch=branch) graph = project.analyze_sources(ignore=list(ignore) if ignore else []) stats = graph.stats() except Exception as e: _json_error(str(e)) - _stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges") - _json_out({"status": "ok", "repo": project.name, **stats}) + _stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges (branch={project.branch})") + _json_out({"status": "ok", "repo": project.name, "branch": project.branch, **stats}) # ── list ─────────────────────────────────────────────────────────────── @@ -248,7 +254,7 @@ def index_repo( @app.command("list") def list_repos() -> None: - """List all indexed repositories.""" + """List all indexed (project, branch) pairs.""" from .graph import get_repos try: @@ -259,6 +265,30 @@ def list_repos() -> None: _json_out({"repos": repos}) +# ── migrate ──────────────────────────────────────────────────────────── + + +@app.command("migrate") +def migrate( + dry_run: bool = typer.Option(False, "--dry-run", help="Print actions without performing them"), +) -> None: + """Promote legacy (pre-T17) graphs and Redis keys into the per-branch namespace. + + Renames each legacy ```` graph to ``code::_default``, + each ``{project}_info`` Redis key to ``{project}:_default_info``, and + each ``{project}_git`` graph to ``{project}:_default_git``. Idempotent. + """ + + from .migrations.per_branch import run_migration + + try: + result = run_migration(dry_run=dry_run) + except Exception as e: + _json_error(str(e)) + + _json_out(result) + + # ── search ───────────────────────────────────────────────────────────── @@ -268,18 +298,24 @@ def search( repo: Optional[str] = typer.Option( None, "--repo", help="Repository name (defaults to CWD name)" ), + branch: Optional[str] = typer.Option( + None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)" + ), ) -> None: """Search for entities by prefix (full-text search).""" from .graph import Graph + from .project import detect_branch name = _default_repo(repo) + if branch is None: + branch = detect_branch(Path.cwd()) try: - g = Graph(name) + g = Graph(name, branch=branch) results = g.prefix_search(query) except Exception as e: _json_error(str(e)) - _json_out({"repo": name, "results": results}) + _json_out({"repo": name, "branch": branch, "results": results}) # ── neighbors ────────────────────────────────────────────────────────── @@ -297,18 +333,24 @@ def neighbors( label: Optional[str] = typer.Option( None, "--label", help="Filter by destination label (e.g. Function, Class)" ), + branch: Optional[str] = typer.Option( + None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)" + ), ) -> None: """Get neighboring entities of the given node(s).""" from .graph import Graph + from .project import detect_branch name = _default_repo(repo) + if branch is None: + branch = detect_branch(Path.cwd()) try: - g = Graph(name) + g = Graph(name, branch=branch) result = g.get_neighbors(node_ids, rel=rel, lbl=label) except Exception as e: _json_error(str(e)) - _json_out({"repo": name, **result}) + _json_out({"repo": name, "branch": branch, **result}) # ── paths ────────────────────────────────────────────────────────────── @@ -321,18 +363,24 @@ def paths( repo: Optional[str] = typer.Option( None, "--repo", help="Repository name (defaults to CWD name)" ), + branch: Optional[str] = typer.Option( + None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)" + ), ) -> None: """Find call-chain paths between two nodes.""" from .graph import Graph + from .project import detect_branch name = _default_repo(repo) + if branch is None: + branch = detect_branch(Path.cwd()) try: - g = Graph(name) + g = Graph(name, branch=branch) result = g.find_paths(src, dest) except Exception as e: _json_error(str(e)) - _json_out({"repo": name, "paths": result}) + _json_out({"repo": name, "branch": branch, "paths": result}) # ── info ─────────────────────────────────────────────────────────────── @@ -343,20 +391,26 @@ def info( repo: Optional[str] = typer.Option( None, "--repo", help="Repository name (defaults to CWD name)" ), + branch: Optional[str] = typer.Option( + None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)" + ), ) -> None: """Show repository statistics and metadata.""" from .graph import Graph from .info import get_repo_info + from .project import detect_branch name = _default_repo(repo) + if branch is None: + branch = detect_branch(Path.cwd()) try: - g = Graph(name) + g = Graph(name, branch=branch) stats = g.stats() - metadata = get_repo_info(name) or {} + metadata = get_repo_info(name, branch) or {} except Exception as e: _json_error(str(e)) - _json_out({"repo": name, **stats, "metadata": metadata}) + _json_out({"repo": name, "branch": branch, **stats, "metadata": metadata}) if __name__ == "__main__": diff --git a/api/code_coverage/lcov/lcov.py b/api/code_coverage/lcov/lcov.py index 734f285d..94daae31 100644 --- a/api/code_coverage/lcov/lcov.py +++ b/api/code_coverage/lcov/lcov.py @@ -1,5 +1,7 @@ import os import sys +from typing import Optional + from ...graph import Graph def lcovparse(content): @@ -124,7 +126,7 @@ def _line(l, report): else: sys.stdout.write("Unknown method name %s" % method) -def process_lcov(repo: str, lcov_file: str) -> None: +def process_lcov(repo: str, lcov_file: str, branch: Optional[str] = None) -> None: # create report from coverage lcov file with open(lcov_file, "r") as file: content = file.read() # Reads the entire file as a single string @@ -134,7 +136,7 @@ def process_lcov(repo: str, lcov_file: str) -> None: # SF:/__w/FalkorDB/FalkorDB/src/algorithms/detect_cycle.c prefix = "/__w/FalkorDB/FalkorDB/" # prefix to remove - g = Graph(repo) + g = Graph(repo, branch=branch) #--------------------------------------------------------------------------- # Process report diff --git a/api/git_utils/git_utils.py b/api/git_utils/git_utils.py index 0c723b03..e9cbad19 100644 --- a/api/git_utils/git_utils.py +++ b/api/git_utils/git_utils.py @@ -14,8 +14,21 @@ # Configure logging logging.basicConfig(level=logging.DEBUG, format='%(filename)s - %(asctime)s - %(levelname)s - %(message)s') -def GitRepoName(repo_name): - """ Returns the git repository name """ +def GitRepoName(repo_name, branch=None): + """ Returns the git transitions graph key for ``(repo_name, branch)``. + + Format: ``{repo_name}:{branch}_git``. Hash-tag stays on ``repo_name`` + so the git-graph key lives on the same FalkorDB cluster slot as its + sibling code graph and ``*_info`` Redis hash. + """ + from ..graph import DEFAULT_BRANCH + if branch is None or branch == "": + branch = DEFAULT_BRANCH + return "{" + repo_name + "}" + ":" + branch + "_git" + + +def LegacyGitRepoName(repo_name): + """Pre-T17 git graph key shape — kept for the migration helper.""" return "{" + repo_name + "}_git" def is_ignored(file_path: str, ignore_list: List[str]) -> bool: @@ -70,7 +83,7 @@ def classify_changes( return added, deleted, modified # build a graph capturing the git commit history -def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, ignore_list: Optional[List[str]] = None) -> GitGraph: +def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, ignore_list: Optional[List[str]] = None, branch: Optional[str] = None) -> GitGraph: """ Builds a graph representation of the git commit history. @@ -78,6 +91,7 @@ def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, igno path (str): Path to the git repository. repo_name (str): Name of the repository. ignore_list (List[str], optional): List of file patterns to ignore. + branch (Optional[str]): Branch name. Defaults to ``_default``. Returns: GitGraph: Graph object representing the commit history. @@ -86,13 +100,15 @@ def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, igno if ignore_list is None: ignore_list = [] - # Copy the graph into a temporary graph - logging.info("Cloning source graph %s -> %s_tmp", repo_name, repo_name) - # Will be deleted at the end of this function - g = Graph(repo_name).clone(repo_name + "_tmp") + # Copy the graph into a temporary graph (sibling key with `_tmp` suffix on + # the branch component so the clone lands on the same cluster slot). + source = Graph(repo_name, branch=branch) + tmp_name = source.name + "_tmp" + logging.info("Cloning source graph %s -> %s", source.name, tmp_name) + g = source.clone(tmp_name) g.enable_backlog() - git_graph = GitGraph(GitRepoName(repo_name)) + git_graph = GitGraph(GitRepoName(repo_name, branch)) supported_types = analyzer.supported_types() # Initialize with the current commit @@ -252,12 +268,12 @@ def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, igno # Delete temporaty graph g.disable_backlog() - logging.debug(f"Deleting temporary graph {repo_name + '_tmp'}") + logging.debug(f"Deleting temporary graph {g.name}") g.delete() return git_graph -def switch_commit(repo: str, to: str): +def switch_commit(repo: str, to: str, branch: Optional[str] = None): """ Switches the state of a graph repository from its current commit to the given commit. @@ -268,6 +284,7 @@ def switch_commit(repo: str, to: str): Args: repo (str): The name of the graph repository to switch commits. to (str): The target commit hash to switch the graph to. + branch (Optional[str]): The branch. Defaults to ``_default``. """ # Validate input arguments @@ -280,11 +297,11 @@ def switch_commit(repo: str, to: str): logging.info(f"Switching to commit: {to}") # Initialize the graph and GitGraph objects - g = Graph(repo) - git_graph = GitGraph(GitRepoName(repo)) + g = Graph(repo, branch=branch) + git_graph = GitGraph(GitRepoName(repo, branch)) # Get the current commit hash of the graph - current_hash = get_repo_commit(repo) + current_hash = get_repo_commit(repo, branch) logging.info(f"Current graph commit: {current_hash}") if current_hash == to: @@ -329,5 +346,5 @@ def switch_commit(repo: str, to: str): g.rerun_query(_q, _p) # Update the graph's commit to the new target commit - set_repo_commit(repo, to) + set_repo_commit(repo, to, branch) logging.info(f"Graph commit updated to {to}") diff --git a/api/graph.py b/api/graph.py index eda72e63..5ec1863c 100644 --- a/api/graph.py +++ b/api/graph.py @@ -1,4 +1,5 @@ import os +import re import time from .entities import * from typing import Optional @@ -10,6 +11,56 @@ logging.basicConfig(level=logging.DEBUG, format='%(filename)s - %(asctime)s - %(levelname)s - %(message)s') + +# --------------------------------------------------------------------------- +# Branch-aware graph naming (T17) +# +# Each indexed (project, branch) pair gets its own FalkorDB graph so that +# concurrent agents indexing the same repo on different branches do not +# overwrite each other. The format is:: +# +# code:{project_name}:{branch} +# +# When ``branch`` is omitted it defaults to ``_default`` — that is also the +# name the one-shot migration uses when promoting legacy ``{project_name}`` +# graphs into the new namespace. +# --------------------------------------------------------------------------- + +DEFAULT_BRANCH = "_default" +_GRAPH_NAME_RE = re.compile(r"^code:(?P[^:]+):(?P.+)$") + + +def compose_graph_name(project_name: str, branch: Optional[str] = None) -> str: + """Compose the FalkorDB graph name for a (project, branch) pair. + + Args: + project_name: The repository / project name (typically the + directory basename). + branch: Branch name. ``None`` is treated as :data:`DEFAULT_BRANCH`. + + Returns: + ``"code:{project_name}:{branch}"``. + """ + + if branch is None or branch == "": + branch = DEFAULT_BRANCH + return f"code:{project_name}:{branch}" + + +def parse_graph_name(graph_name: str) -> Optional[tuple[str, str]]: + """Inverse of :func:`compose_graph_name`. + + Returns ``(project, branch)`` if ``graph_name`` follows the new + ``code:{project}:{branch}`` format, otherwise ``None`` so callers can + treat it as a legacy / unrelated graph. + """ + + m = _GRAPH_NAME_RE.match(graph_name) + if not m: + return None + return m.group("project"), m.group("branch") + + def graph_exists(name: str): db = FalkorDB(host=os.getenv('FALKORDB_HOST', 'localhost'), port=os.getenv('FALKORDB_PORT', 6379), @@ -18,9 +69,21 @@ def graph_exists(name: str): return name in db.list_graphs() -def get_repos() -> list[str]: + +def _is_internal_suffix(graph_name: str) -> bool: + """Internal helper graph suffixes that should never be listed as repos.""" + return graph_name.endswith('_git') or graph_name.endswith('_schema') or graph_name.endswith('_tmp') + + +def get_repos() -> list[dict]: """ - List processed repositories + List processed (project, branch) pairs. + + Returns a list of ``{"project": ..., "branch": ..., "graph": ...}`` + dicts for every graph that matches the new ``code:{project}:{branch}`` + format. Legacy graphs (created before T17) are returned with + ``branch == DEFAULT_BRANCH`` so callers can keep treating them as a + single graph until the migration is run. """ db = FalkorDB(host=os.getenv('FALKORDB_HOST', 'localhost'), @@ -28,22 +91,52 @@ def get_repos() -> list[str]: username=os.getenv('FALKORDB_USERNAME', None), password=os.getenv('FALKORDB_PASSWORD', None)) - graphs = db.list_graphs() - graphs = [g for g in graphs if not (g.endswith('_git') or g.endswith('_schema'))] - return graphs + repos = [] + for g in db.list_graphs(): + if _is_internal_suffix(g): + continue + parsed = parse_graph_name(g) + if parsed is None: + # Legacy graph (pre-T17): synthesize a virtual entry so it stays + # discoverable until the migration helper promotes it. + repos.append({"project": g, "branch": DEFAULT_BRANCH, "graph": g}) + else: + project, branch = parsed + repos.append({"project": project, "branch": branch, "graph": g}) + return repos class Graph(): """ Represents a connection to a graph database using FalkorDB. + + The underlying graph is named ``code:{project_name}:{branch}`` so that + concurrent agents working on different branches of the same repo do + not corrupt each other's data (see T17, issue #651). + + For backwards compatibility ``name`` may be either a bare project name + (``"falkordb"``) or a fully composed graph name (``"code:falkordb:main"``); + in the former case the composition is performed automatically using + ``branch`` (default :data:`DEFAULT_BRANCH`). """ - def __init__(self, name: str) -> None: - self.name = name + def __init__(self, name: str, branch: Optional[str] = None) -> None: + # Accept either an already-composed graph name or a bare project + # name + branch. ``parse_graph_name`` returns ``None`` for legacy / + # bare names, signalling that we need to compose. + parsed = parse_graph_name(name) + if parsed is not None: + self.project, self.branch = parsed + self.name = name + else: + self.project = name + self.branch = branch if branch is not None else DEFAULT_BRANCH + self.name = compose_graph_name(self.project, self.branch) + self.db = FalkorDB(host=os.getenv('FALKORDB_HOST', 'localhost'), port=os.getenv('FALKORDB_PORT', 6379), username=os.getenv('FALKORDB_USERNAME', None), password=os.getenv('FALKORDB_PASSWORD', None)) - self.g = self.db.select_graph(name) + self.g = self.db.select_graph(self.name) # Initialize the backlog as disabled by default self.backlog = None @@ -62,9 +155,34 @@ def __init__(self, name: str) -> None: except Exception: pass + @classmethod + def from_raw_name(cls, raw_name: str) -> "Graph": + """Construct a :class:`Graph` from an already-composed (or raw) name. + + Used by :meth:`clone` and the migration helper, where the caller + already knows the final FalkorDB key. Bypasses + :func:`compose_graph_name`. + """ + + obj = cls.__new__(cls) + obj.name = raw_name + parsed = parse_graph_name(raw_name) + if parsed is None: + obj.project = raw_name + obj.branch = DEFAULT_BRANCH + else: + obj.project, obj.branch = parsed + obj.db = FalkorDB(host=os.getenv('FALKORDB_HOST', 'localhost'), + port=os.getenv('FALKORDB_PORT', 6379), + username=os.getenv('FALKORDB_USERNAME', None), + password=os.getenv('FALKORDB_PASSWORD', None)) + obj.g = obj.db.select_graph(raw_name) + obj.backlog = None + return obj + def clone(self, clone: str) -> "Graph": """ - Create a copy of the graph under the name clone + Create a copy of the graph under the name clone (raw FalkorDB key). Returns: a new instance of Graph @@ -81,7 +199,7 @@ def clone(self, clone: str) -> "Graph": # TODO: add a waiting limit time.sleep(1) - return Graph(clone) + return Graph.from_raw_name(clone) def delete(self) -> None: @@ -639,12 +757,24 @@ async def async_graph_exists(name: str) -> bool: await db.aclose() -async def async_get_repos() -> list[str]: - """List processed repositories (async version).""" +async def async_get_repos() -> list[dict]: + """List processed (project, branch) pairs (async version). + + Mirrors :func:`get_repos`; see that function for the return shape. + """ db = _async_db() try: - graphs = await db.list_graphs() - return [g for g in graphs if not (g.endswith('_git') or g.endswith('_schema'))] + repos = [] + for g in await db.list_graphs(): + if _is_internal_suffix(g): + continue + parsed = parse_graph_name(g) + if parsed is None: + repos.append({"project": g, "branch": DEFAULT_BRANCH, "graph": g}) + else: + project, branch = parsed + repos.append({"project": project, "branch": branch, "graph": g}) + return repos finally: await db.aclose() @@ -654,12 +784,22 @@ class AsyncGraphQuery: Uses falkordb.asyncio under the hood. No index creation or backlog — indexes already exist from the sync Graph used during analysis. + + Accepts either a bare project name + branch or a fully composed graph + name (``code:{project}:{branch}``); see :class:`Graph` for details. """ - def __init__(self, name: str) -> None: - self.name = name + def __init__(self, name: str, branch: Optional[str] = None) -> None: + parsed = parse_graph_name(name) + if parsed is not None: + self.project, self.branch = parsed + self.name = name + else: + self.project = name + self.branch = branch if branch is not None else DEFAULT_BRANCH + self.name = compose_graph_name(self.project, self.branch) self.db = _async_db() - self.g = self.db.select_graph(name) + self.g = self.db.select_graph(self.name) async def graph_exists(self) -> bool: """Check if this graph exists, reusing the current connection.""" diff --git a/api/index.py b/api/index.py index 38dfb61d..b41023d6 100644 --- a/api/index.py +++ b/api/index.py @@ -3,6 +3,7 @@ import asyncio import logging from pathlib import Path +from typing import Optional from dotenv import load_dotenv from fastapi import Depends, FastAPI, Header, HTTPException, Query @@ -15,7 +16,7 @@ from api.graph import Graph, AsyncGraphQuery, async_get_repos from api.info import async_get_repo_info from api.llm import ask -from api.project import Project +from api.project import Project, detect_branch # Load environment variables from .env file @@ -56,35 +57,43 @@ def token_required(authorization: str | None = Header(None)): class RepoRequest(BaseModel): repo: str + branch: Optional[str] = None class NeighborsRequest(BaseModel): repo: str node_ids: list[int] + branch: Optional[str] = None class AutoCompleteRequest(BaseModel): repo: str prefix: str + branch: Optional[str] = None class FindPathsRequest(BaseModel): repo: str src: int dest: int + branch: Optional[str] = None class ChatRequest(BaseModel): repo: str msg: str + branch: Optional[str] = None class AnalyzeFolderRequest(BaseModel): path: str ignore: list[str] = [] + branch: Optional[str] = None class AnalyzeRepoRequest(BaseModel): repo_url: str ignore: list[str] = [] + branch: Optional[str] = None class SwitchCommitRequest(BaseModel): repo: str commit: str + branch: Optional[str] = None # --------------------------------------------------------------------------- # Application @@ -105,23 +114,23 @@ class SwitchCommitRequest(BaseModel): # --------------------------------------------------------------------------- @app.get('/api/graph_entities') -async def graph_entities(repo: str = Query(None), _=Depends(public_or_auth)): +async def graph_entities(repo: str = Query(None), branch: Optional[str] = Query(None), _=Depends(public_or_auth)): """Fetch sub-graph entities from a given repository.""" if not repo: logging.error("Missing 'repo' parameter in request.") return JSONResponse({"status": "Missing 'repo' parameter"}, status_code=400) - g = AsyncGraphQuery(repo) + g = AsyncGraphQuery(repo, branch=branch) try: if not await g.graph_exists(): - logging.error("Missing project %s", repo) + logging.error("Missing project %s (branch=%s)", repo, g.branch) return JSONResponse({"status": f"Missing project {repo}"}, status_code=400) sub_graph = await g.get_sub_graph(500) - logging.info("Successfully retrieved sub-graph for repo: %s", repo) - return {"status": "success", "entities": sub_graph} + logging.info("Successfully retrieved sub-graph for repo: %s (branch=%s)", repo, g.branch) + return {"status": "success", "branch": g.branch, "entities": sub_graph} except Exception as e: logging.exception("Error retrieving sub-graph for repo '%s': %s", repo, e) @@ -134,26 +143,26 @@ async def graph_entities(repo: str = Query(None), _=Depends(public_or_auth)): async def get_neighbors(data: NeighborsRequest, _=Depends(public_or_auth)): """Get neighbors of a nodes list in the graph.""" - g = AsyncGraphQuery(data.repo) + g = AsyncGraphQuery(data.repo, branch=data.branch) try: if not await g.graph_exists(): - logging.error("Missing project %s", data.repo) + logging.error("Missing project %s (branch=%s)", data.repo, g.branch) return JSONResponse({"status": f"Missing project {data.repo}"}, status_code=400) neighbors = await g.get_neighbors(data.node_ids) finally: await g.close() - logging.info("Successfully retrieved neighbors for node IDs %s in repo '%s'.", - data.node_ids, data.repo) - return {"status": "success", "neighbors": neighbors} + logging.info("Successfully retrieved neighbors for node IDs %s in repo '%s' (branch=%s).", + data.node_ids, data.repo, g.branch) + return {"status": "success", "branch": g.branch, "neighbors": neighbors} @app.post('/api/auto_complete') async def auto_complete(data: AutoCompleteRequest, _=Depends(public_or_auth)): """Process auto-completion requests for a repository based on a prefix.""" - g = AsyncGraphQuery(data.repo) + g = AsyncGraphQuery(data.repo, branch=data.branch) try: if not await g.graph_exists(): return JSONResponse({"status": f"Missing project {data.repo}"}, status_code=400) @@ -161,12 +170,12 @@ async def auto_complete(data: AutoCompleteRequest, _=Depends(public_or_auth)): completions = await g.prefix_search(data.prefix) finally: await g.close() - return {"status": "success", "completions": completions} + return {"status": "success", "branch": g.branch, "completions": completions} @app.get('/api/list_repos') async def list_repos(_=Depends(public_or_auth)): - """List all available repositories.""" + """List all available repositories (returns (project, branch) pairs).""" repos = await async_get_repos() return {"status": "success", "repositories": repos} @@ -176,7 +185,7 @@ async def list_repos(_=Depends(public_or_auth)): async def repo_info(data: RepoRequest, _=Depends(public_or_auth)): """Retrieve information about a specific repository.""" - g = AsyncGraphQuery(data.repo) + g = AsyncGraphQuery(data.repo, branch=data.branch) try: if not await g.graph_exists(): return JSONResponse({"status": f'Missing repository "{data.repo}"'}, status_code=400) @@ -184,29 +193,29 @@ async def repo_info(data: RepoRequest, _=Depends(public_or_auth)): stats = await g.stats() finally: await g.close() - info = await async_get_repo_info(data.repo) + info = await async_get_repo_info(data.repo, data.branch) if info is None: return JSONResponse({"status": f'Missing repository "{data.repo}"'}, status_code=400) stats |= info - return {"status": "success", "info": stats} + return {"status": "success", "branch": g.branch, "info": stats} @app.post('/api/find_paths') async def find_paths(data: FindPathsRequest, _=Depends(public_or_auth)): """Find all paths between a source and destination node in the graph.""" - g = AsyncGraphQuery(data.repo) + g = AsyncGraphQuery(data.repo, branch=data.branch) try: if not await g.graph_exists(): - logging.error("Missing project %s", data.repo) + logging.error("Missing project %s (branch=%s)", data.repo, g.branch) return JSONResponse({"status": f"Missing project {data.repo}"}, status_code=400) paths = await g.find_paths(data.src, data.dest) finally: await g.close() - return {"status": "success", "paths": paths} + return {"status": "success", "branch": g.branch, "paths": paths} @app.post('/api/chat') @@ -241,33 +250,35 @@ async def analyze_folder(data: AnalyzeFolderRequest, _=Depends(token_required)): status_code=400) proj_name = resolved_path.name + branch = data.branch if data.branch is not None else detect_branch(resolved_path) def _analyze(): - g = Graph(proj_name) + g = Graph(proj_name, branch=branch) analyzer = SourceAnalyzer() analyzer.analyze_local_folder(str(resolved_path), g, data.ignore) loop = asyncio.get_running_loop() await loop.run_in_executor(None, _analyze) - return {"status": "success", "project": proj_name} + return {"status": "success", "project": proj_name, "branch": branch} @app.post('/api/analyze_repo') async def analyze_repo(data: AnalyzeRepoRequest, _=Depends(token_required)): """Analyze a GitHub repository. Always requires a valid token.""" - logger.debug('Received repo_url: %s', data.repo_url) + logger.debug('Received repo_url: %s branch: %s', data.repo_url, data.branch) def _analyze(): - proj = Project.from_git_repository(data.repo_url) + proj = Project.from_git_repository(data.repo_url, branch=data.branch) proj.analyze_sources(data.ignore) proj.process_git_history(data.ignore) + return proj.branch loop = asyncio.get_running_loop() - await loop.run_in_executor(None, _analyze) + resolved_branch = await loop.run_in_executor(None, _analyze) - return {"status": "success"} + return {"status": "success", "branch": resolved_branch} @app.post('/api/switch_commit') @@ -275,7 +286,7 @@ async def switch_commit(data: SwitchCommitRequest, _=Depends(token_required)): """Switch a repository to a specific commit. Always requires a valid token.""" loop = asyncio.get_running_loop() - await loop.run_in_executor(None, git_utils.switch_commit, data.repo, data.commit) + await loop.run_in_executor(None, git_utils.switch_commit, data.repo, data.commit, data.branch) return {"status": "success"} @@ -283,7 +294,7 @@ async def switch_commit(data: SwitchCommitRequest, _=Depends(token_required)): async def list_commits(data: RepoRequest, _=Depends(public_or_auth)): """List all commits of a specified repository.""" - git_graph = AsyncGitGraph(git_utils.GitRepoName(data.repo)) + git_graph = AsyncGitGraph(git_utils.GitRepoName(data.repo, data.branch)) try: commits = await git_graph.list_commits() finally: diff --git a/api/info.py b/api/info.py index b1d9ea7e..3b41d042 100644 --- a/api/info.py +++ b/api/info.py @@ -4,10 +4,31 @@ import logging from typing import Optional, Dict +from .graph import DEFAULT_BRANCH + # Configure logging logging.basicConfig(level=logging.INFO) -def _repo_info_key(repo_name: str) -> str: + +def _normalize_branch(branch: Optional[str]) -> str: + if branch is None or branch == "": + return DEFAULT_BRANCH + return branch + + +def _repo_info_key(repo_name: str, branch: Optional[str] = None) -> str: + """Compose the Redis hash key holding ``(repo, branch)`` metadata. + + The curly-brace hash-tag stays on ``repo_name`` so per-branch metadata + keys land on the same FalkorDB cluster slot as the equivalent graph + keys (e.g. ``{repo}:{branch}_git``). + """ + branch = _normalize_branch(branch) + return f"{{{repo_name}}}:{branch}_info" + + +def _legacy_repo_info_key(repo_name: str) -> str: + """Pre-T17 key shape, retained for the migration helper / fallback reads.""" return f"{{{repo_name}}}_info" def get_redis_connection() -> redis.Redis: @@ -30,12 +51,12 @@ def get_redis_connection() -> redis.Redis: raise -def set_repo_commit(repo_name: str, commit_hash: str) -> None: - """Save processed commit hash to the DB""" +def set_repo_commit(repo_name: str, commit_hash: str, branch: Optional[str] = None) -> None: + """Save processed commit hash to the DB for ``(repo_name, branch)``.""" try: r = get_redis_connection() - key = _repo_info_key(repo_name) # Safely format the key + key = _repo_info_key(repo_name, branch) # Safely format the key # Save the repository URL r.hset(key, 'commit', commit_hash) @@ -46,15 +67,19 @@ def set_repo_commit(repo_name: str, commit_hash: str) -> None: raise -def get_repo_commit(repo_name: str) -> str: - """Get the current commit the repo is at""" +def get_repo_commit(repo_name: str, branch: Optional[str] = None) -> str: + """Get the current commit the repo is at for ``(repo_name, branch)``.""" try: r = get_redis_connection() - key = _repo_info_key(repo_name) + key = _repo_info_key(repo_name, branch) # Retrieve all information about the repository commit_hash = r.hget(key, "commit") + if not commit_hash: + # Fall back to the legacy single-key shape, so reads against + # un-migrated graphs still succeed. + commit_hash = r.hget(_legacy_repo_info_key(repo_name), "commit") if not commit_hash: logging.warning(f"Failed to retrieve {repo_name} current commit hash") return None @@ -67,18 +92,20 @@ def get_repo_commit(repo_name: str) -> str: raise -def save_repo_info(repo_name: str, repo_url: str) -> None: +def save_repo_info(repo_name: str, repo_url: str, branch: Optional[str] = None) -> None: """ - Saves repository information (URL) to Redis under a hash named {repo_name}_info. + Saves repository information (URL) to Redis under a hash named + ``{repo_name}:{branch}_info``. Args: repo_name (str): The name of the repository. repo_url (str): The URL of the repository. + branch (Optional[str]): The branch. Defaults to ``_default``. """ try: r = get_redis_connection() - key = _repo_info_key(repo_name) + key = _repo_info_key(repo_name, branch) # Save the repository URL r.hset(key, 'repo_url', repo_url) @@ -88,27 +115,34 @@ def save_repo_info(repo_name: str, repo_url: str) -> None: logging.error(f"Error saving repo info for '{repo_name}': {e}") raise -def get_repo_info(repo_name: str) -> Optional[Dict[str, str]]: +def get_repo_info(repo_name: str, branch: Optional[str] = None) -> Optional[Dict[str, str]]: """ - Retrieves repository information from Redis. + Retrieves repository information from Redis for ``(repo_name, branch)``. + + Falls back to the legacy single-key shape so pre-migration graphs + remain readable. Args: repo_name (str): The name of the repository. + branch (Optional[str]): The branch. Defaults to ``_default``. Returns: - Optional[Dict[str, str]]: A dictionary of repository information, or None if not found. + Optional[Dict[str, str]]: A dictionary of repository information, + or ``None`` if not found. """ try: r = get_redis_connection() - key = _repo_info_key(repo_name) - + key = _repo_info_key(repo_name, branch) + # Retrieve all information about the repository repo_info = r.hgetall(key) + if not repo_info: + repo_info = r.hgetall(_legacy_repo_info_key(repo_name)) if not repo_info: logging.warning(f"No repository info found for {repo_name}") return None - + logging.info(f"Repository info retrieved for {repo_name}") return repo_info @@ -131,12 +165,14 @@ async def async_get_redis_connection() -> aioredis.Redis: ) -async def async_get_repo_info(repo_name: str) -> Optional[Dict[str, str]]: +async def async_get_repo_info(repo_name: str, branch: Optional[str] = None) -> Optional[Dict[str, str]]: try: r = await async_get_redis_connection() try: - key = _repo_info_key(repo_name) + key = _repo_info_key(repo_name, branch) repo_info = await r.hgetall(key) + if not repo_info: + repo_info = await r.hgetall(_legacy_repo_info_key(repo_name)) if not repo_info: logging.warning(f"No repository info found for {repo_name}") return None diff --git a/api/migrations/__init__.py b/api/migrations/__init__.py new file mode 100644 index 00000000..e6f73896 --- /dev/null +++ b/api/migrations/__init__.py @@ -0,0 +1,4 @@ +"""One-shot data migrations for code-graph. + +Each migration is idempotent and safe to re-run. +""" diff --git a/api/migrations/per_branch.py b/api/migrations/per_branch.py new file mode 100644 index 00000000..f54f2223 --- /dev/null +++ b/api/migrations/per_branch.py @@ -0,0 +1,151 @@ +"""T17 (#651): promote legacy graphs into the per-branch namespace. + +Before T17 a repo named ``myrepo`` lived at: + + * Graph key: ``myrepo`` + * Info hash: ``{myrepo}_info`` + * Git graph: ``{myrepo}_git`` + +After T17, the new format is: + + * Graph key: ``code:myrepo:_default`` + * Info hash: ``{myrepo}:_default_info`` + * Git graph: ``{myrepo}:_default_git`` + +This module renames every legacy artifact in place. It is safe to run +multiple times — already-migrated graphs are skipped — and exposes a +``--dry-run`` mode via ``cgraph migrate`` for previewing changes. +""" + +from __future__ import annotations + +import logging +import os +from typing import Iterable + +from falkordb import FalkorDB + +from ..graph import ( + DEFAULT_BRANCH, + compose_graph_name, + parse_graph_name, +) +from ..info import _legacy_repo_info_key, _repo_info_key + + +logger = logging.getLogger(__name__) + + +def _connect() -> FalkorDB: + return FalkorDB( + host=os.getenv("FALKORDB_HOST", "localhost"), + port=os.getenv("FALKORDB_PORT", 6379), + username=os.getenv("FALKORDB_USERNAME", None), + password=os.getenv("FALKORDB_PASSWORD", None), + ) + + +def _legacy_graphs(all_graphs: Iterable[str]) -> list[str]: + """Return graphs that look like the pre-T17 single-name shape.""" + + legacy = [] + for name in all_graphs: + # Already migrated? + if parse_graph_name(name) is not None: + continue + # System graphs etc. — also skip the ``_tmp`` mid-migration name and + # the per-repo ``_git`` companion (handled separately, see below). + if name.endswith("_tmp") or name.endswith("_schema") or name.endswith("_git"): + continue + legacy.append(name) + return legacy + + +def _rename_graph(db: FalkorDB, src: str, dst: str, *, dry_run: bool) -> bool: + """Copy ``src`` to ``dst`` and delete ``src``. Returns ``True`` on success. + + Skips with ``False`` (and a warning) when ``dst`` already exists. + """ + + if db.connection.exists(dst): + logger.warning("Target graph %s already exists — skipping rename of %s", dst, src) + return False + if dry_run: + logger.info("[dry-run] would rename graph %s -> %s", src, dst) + return True + g = db.select_graph(src) + g.copy(dst) + g.delete() + logger.info("Renamed graph %s -> %s", src, dst) + return True + + +def _rename_redis_key(db: FalkorDB, src: str, dst: str, *, dry_run: bool) -> bool: + """Rename a Redis hash key. Idempotent — skips when ``dst`` already exists. + + Uses the underlying ``redis-py`` connection on the FalkorDB client so + we don't need a separate Redis connection. + """ + + conn = db.connection + if not conn.exists(src): + return False + if conn.exists(dst): + logger.warning("Target Redis key %s already exists — skipping rename of %s", dst, src) + return False + if dry_run: + logger.info("[dry-run] would rename Redis key %s -> %s", src, dst) + return True + conn.rename(src, dst) + logger.info("Renamed Redis key %s -> %s", src, dst) + return True + + +def run_migration(*, dry_run: bool = False) -> dict: + """Run the per-branch migration once. + + Returns a small summary dict suitable for the CLI to print as JSON:: + + {"status": "ok", "graphs_renamed": int, "info_renamed": int, + "git_renamed": int, "dry_run": bool, "skipped": list[str]} + """ + + db = _connect() + all_graphs = list(db.list_graphs()) + legacy = _legacy_graphs(all_graphs) + + graphs_renamed = 0 + info_renamed = 0 + git_renamed = 0 + skipped: list[str] = [] + + for project in legacy: + # Rename the main code graph itself. + new_graph = compose_graph_name(project, DEFAULT_BRANCH) + if _rename_graph(db, project, new_graph, dry_run=dry_run): + graphs_renamed += 1 + else: + skipped.append(project) + continue + + # Rename the sibling Redis info key, if any. + legacy_info = _legacy_repo_info_key(project) + new_info = _repo_info_key(project, DEFAULT_BRANCH) + if _rename_redis_key(db, legacy_info, new_info, dry_run=dry_run): + info_renamed += 1 + + # Rename the sibling ``{project}_git`` graph, if any. + legacy_git = "{" + project + "}_git" + new_git = "{" + project + "}:" + DEFAULT_BRANCH + "_git" + if legacy_git in all_graphs: + if _rename_graph(db, legacy_git, new_git, dry_run=dry_run): + git_renamed += 1 + + return { + "status": "ok", + "dry_run": dry_run, + "graphs_renamed": graphs_renamed, + "info_renamed": info_renamed, + "git_renamed": git_renamed, + "skipped": skipped, + } diff --git a/api/project.py b/api/project.py index aed5a9e7..2478c7c4 100644 --- a/api/project.py +++ b/api/project.py @@ -6,7 +6,7 @@ from pygit2.repository import Repository from .info import * from pathlib import Path -from .graph import Graph +from .graph import Graph, DEFAULT_BRANCH from typing import Optional, List from urllib.parse import urlparse from .analyzers import SourceAnalyzer @@ -15,6 +15,30 @@ # Configure logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') + +def detect_branch(path: Path) -> str: + """Resolve the current branch name for a checkout at ``path``. + + Uses ``git rev-parse --abbrev-ref HEAD``. Returns + :data:`api.graph.DEFAULT_BRANCH` when the path is not a git checkout + or when HEAD is detached (the ``rev-parse`` call returns ``HEAD``). + """ + + try: + result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + cwd=str(path), + capture_output=True, + text=True, + check=True, + ) + branch = (result.stdout or "").strip() + if not branch or branch == "HEAD": + return DEFAULT_BRANCH + return branch + except (FileNotFoundError, subprocess.CalledProcessError): + return DEFAULT_BRANCH + def _clone_source(url: str, name: str) -> Path: # path to local repositories path = Path.cwd() / "repositories" / name @@ -37,17 +61,21 @@ def _clone_source(url: str, name: str) -> Path: return path class Project(): - def __init__(self, name: str, path: Path, url: Optional[str]): - self.url = url - self.name = name - self.path = path - self.graph = Graph(name) + def __init__(self, name: str, path: Path, url: Optional[str], branch: Optional[str] = None): + self.url = url + self.name = name + self.path = path + # Auto-detect branch from the working tree when not explicitly given. + if branch is None: + branch = detect_branch(path) if path is not None and Path(path).exists() else DEFAULT_BRANCH + self.branch = branch + self.graph = Graph(name, branch=self.branch) if url is not None: - save_repo_info(name, url) + save_repo_info(name, url, self.branch) @classmethod - def from_git_repository(cls, url: str): + def from_git_repository(cls, url: str, branch: Optional[str] = None): # Validate url if not validators.url(url): raise Exception(f"invalid url: {url}") @@ -57,10 +85,10 @@ def from_git_repository(cls, url: str): name = parsed_url.path.split('/')[-1] path = _clone_source(url, name) - return cls(name, path, url) + return cls(name, path, url, branch=branch) @classmethod - def from_local_repository(cls, path: Path|str): + def from_local_repository(cls, path: Path|str, branch: Optional[str] = None): path = Path(path) if isinstance(path, str) else path # Validate path exists @@ -74,7 +102,7 @@ def from_local_repository(cls, path: Path|str): name = path.name - return cls(name, path, url) + return cls(name, path, url, branch=branch) def analyze_sources(self, ignore: Optional[List[str]] = None) -> Graph: if ignore is None: @@ -86,7 +114,7 @@ def analyze_sources(self, ignore: Optional[List[str]] = None) -> Graph: # Save processed commit hash to the DB repo = Repository(self.path) current_commit = repo.walk(repo.head.target).__next__() - set_repo_commit(self.name, current_commit.short_id) + set_repo_commit(self.name, current_commit.short_id, self.branch) except Exception: # Probably not .git folder is missing pass @@ -103,7 +131,7 @@ def process_git_history(self, ignore: Optional[List[str]] = []) -> GitGraph: logging.info(f"Switching current working directory to: {self.path}") os.chdir(self.path) - git_graph = build_commit_graph(self.path, self.analyzer, self.name, ignore) + git_graph = build_commit_graph(self.path, self.analyzer, self.name, ignore, branch=self.branch) # Restore original working directory logging.info(f"Restoring current working directory to: {original_dir}") diff --git a/pyproject.toml b/pyproject.toml index 5cdfa914..be0a3186 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,3 +44,9 @@ build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] where = ["."] + +[dependency-groups] +dev = [ + "pytest>=9.0.2", + "pytest-anyio>=0.0.0", +] diff --git a/tests/endpoints/test_list_repos.py b/tests/endpoints/test_list_repos.py index 198f7c2a..28d8cb7f 100644 --- a/tests/endpoints/test_list_repos.py +++ b/tests/endpoints/test_list_repos.py @@ -47,7 +47,9 @@ def test_list_repos(client): # Expecting an empty response assert status == "success" - assert repositories == ['git_repo'] + assert repositories == [ + {"project": "git_repo", "branch": "_default", "graph": "code:git_repo:_default"} + ] def test_list_repos_with_auth(monkeypatch): diff --git a/tests/test_async_graph.py b/tests/test_async_graph.py index ccd1e000..32e445b1 100644 --- a/tests/test_async_graph.py +++ b/tests/test_async_graph.py @@ -61,14 +61,17 @@ async def test_async_graph_exists_closes_on_error(): async def test_async_get_repos_filters_suffixes(): mock_db = MagicMock() mock_db.list_graphs = AsyncMock( - return_value=["repo1", "repo1_git", "repo1_schema", "repo2"] + return_value=["repo1", "code:repo2:main", "repo1_git", "repo1_schema", "code:repo2:main_git"] ) mock_db.aclose = AsyncMock() with patch("api.graph._async_db", return_value=mock_db): repos = await async_get_repos() - assert repos == ["repo1", "repo2"] + assert repos == [ + {"project": "repo1", "branch": "_default", "graph": "repo1"}, + {"project": "repo2", "branch": "main", "graph": "code:repo2:main"}, + ] mock_db.aclose.assert_awaited_once() diff --git a/tests/test_cli.py b/tests/test_cli.py index 9b7f2eeb..6087f257 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -88,7 +88,9 @@ def test_list_includes_repo(self): result = runner.invoke(app, ["list"]) self.assertEqual(result.exit_code, 0) data = _parse_json(result.output) - self.assertIn(self.REPO_NAME, data["repos"]) + # T17: repos are now {project, branch, graph} dicts. + projects = [r["project"] for r in data["repos"]] + self.assertIn(self.REPO_NAME, projects) class TestCLIHelp(unittest.TestCase): diff --git a/tests/test_per_branch_graphs.py b/tests/test_per_branch_graphs.py new file mode 100644 index 00000000..5c738b5d --- /dev/null +++ b/tests/test_per_branch_graphs.py @@ -0,0 +1,256 @@ +"""Unit tests for the per-branch graph identity refactor (T17, issue #651). + +Exercises only the name-composition / parsing helpers and the in-memory +behaviour of the ``Graph`` / ``AsyncGraphQuery`` constructors plus the +``run_migration`` orchestration. Anything that would require a live +FalkorDB / Redis instance is mocked. +""" + +from unittest.mock import MagicMock, AsyncMock, patch + +import pytest + +from api.graph import ( + DEFAULT_BRANCH, + Graph, + AsyncGraphQuery, + compose_graph_name, + parse_graph_name, + async_get_repos, +) +from api.info import _repo_info_key, _legacy_repo_info_key, _normalize_branch +from api.git_utils.git_utils import GitRepoName, LegacyGitRepoName + + +# --------------------------------------------------------------------------- +# compose_graph_name / parse_graph_name +# --------------------------------------------------------------------------- + +def test_compose_graph_name_default_branch(): + assert compose_graph_name("falkordb") == f"code:falkordb:{DEFAULT_BRANCH}" + + +def test_compose_graph_name_explicit_branch(): + assert compose_graph_name("falkordb", "main") == "code:falkordb:main" + + +def test_compose_graph_name_empty_branch_falls_back_to_default(): + assert compose_graph_name("foo", "") == f"code:foo:{DEFAULT_BRANCH}" + + +def test_compose_graph_name_branch_with_slashes(): + # Real-world branches like `dvir/feature` must survive composition. + assert compose_graph_name("foo", "dvir/feature") == "code:foo:dvir/feature" + + +def test_parse_graph_name_round_trip(): + name = compose_graph_name("falkordb", "main") + assert parse_graph_name(name) == ("falkordb", "main") + + +def test_parse_graph_name_legacy_returns_none(): + assert parse_graph_name("legacy_bare_name") is None + + +def test_parse_graph_name_handles_branch_with_colons(): + # Branch names cannot contain ':' in git, so we don't try to support it. + # But branch may contain other unusual characters: the regex is + # ``code:{project}:(.+)`` so anything after the second colon is the branch. + assert parse_graph_name("code:repo:feature/x") == ("repo", "feature/x") + + +# --------------------------------------------------------------------------- +# Graph constructor branch-awareness +# --------------------------------------------------------------------------- + +def _patched_falkordb(): + """Return a MagicMock that replaces ``FalkorDB`` in ``api.graph``.""" + mock_db = MagicMock() + mock_db.select_graph = MagicMock(return_value=MagicMock()) + return patch("api.graph.FalkorDB", return_value=mock_db) + + +def test_graph_default_branch_composes_name(): + with _patched_falkordb(): + g = Graph("foo") + assert g.project == "foo" + assert g.branch == DEFAULT_BRANCH + assert g.name == f"code:foo:{DEFAULT_BRANCH}" + + +def test_graph_explicit_branch_composes_name(): + with _patched_falkordb(): + g = Graph("foo", branch="main") + assert g.project == "foo" + assert g.branch == "main" + assert g.name == "code:foo:main" + + +def test_graph_accepts_already_composed_name(): + with _patched_falkordb(): + g = Graph("code:bar:dev") + assert g.project == "bar" + assert g.branch == "dev" + assert g.name == "code:bar:dev" + + +def test_graph_two_branches_produce_distinct_names(): + with _patched_falkordb(): + a = Graph("foo", branch="main") + b = Graph("foo", branch="feat") + assert a.name != b.name + assert a.project == b.project == "foo" + + +def test_graph_from_raw_name_bypasses_composition(): + with _patched_falkordb(): + g = Graph.from_raw_name("code:foo:main_tmp") + # raw name preserved exactly, even though it isn't a well-formed + # ``code:project:branch`` graph. + assert g.name == "code:foo:main_tmp" + + +# --------------------------------------------------------------------------- +# AsyncGraphQuery constructor branch-awareness +# --------------------------------------------------------------------------- + +def test_async_graph_query_default_branch(): + with patch("api.graph._async_db") as mock_db: + mock_db.return_value = MagicMock() + g = AsyncGraphQuery("foo") + assert g.project == "foo" + assert g.branch == DEFAULT_BRANCH + assert g.name == f"code:foo:{DEFAULT_BRANCH}" + + +def test_async_graph_query_branch_param(): + with patch("api.graph._async_db") as mock_db: + mock_db.return_value = MagicMock() + g = AsyncGraphQuery("foo", branch="develop") + assert g.name == "code:foo:develop" + + +# --------------------------------------------------------------------------- +# async_get_repos returns (project, branch, graph) dicts +# --------------------------------------------------------------------------- + +@pytest.mark.anyio +async def test_async_get_repos_mixes_legacy_and_new(): + mock_db = MagicMock() + mock_db.list_graphs = AsyncMock( + return_value=[ + "legacy_proj", # legacy: no `code:` prefix + "code:newproj:main", # new style + "code:newproj:feat", # same project, different branch + "code:newproj:main_git", # internal suffix, must be filtered + "code:newproj:main_schema", # internal suffix, must be filtered + ] + ) + mock_db.aclose = AsyncMock() + + with patch("api.graph._async_db", return_value=mock_db): + repos = await async_get_repos() + + assert repos == [ + {"project": "legacy_proj", "branch": DEFAULT_BRANCH, "graph": "legacy_proj"}, + {"project": "newproj", "branch": "main", "graph": "code:newproj:main"}, + {"project": "newproj", "branch": "feat", "graph": "code:newproj:feat"}, + ] + + +# --------------------------------------------------------------------------- +# Redis info key composition (api.info) +# --------------------------------------------------------------------------- + +def test_repo_info_key_default_branch(): + assert _repo_info_key("falkordb") == "{falkordb}:_default_info" + + +def test_repo_info_key_explicit_branch(): + assert _repo_info_key("falkordb", "main") == "{falkordb}:main_info" + + +def test_legacy_repo_info_key_for_fallback(): + assert _legacy_repo_info_key("falkordb") == "{falkordb}_info" + + +def test_normalize_branch_handles_none_and_empty(): + assert _normalize_branch(None) == DEFAULT_BRANCH + assert _normalize_branch("") == DEFAULT_BRANCH + assert _normalize_branch("main") == "main" + + +# --------------------------------------------------------------------------- +# Git graph naming (api.git_utils.git_utils) +# --------------------------------------------------------------------------- + +def test_git_repo_name_default_branch(): + assert GitRepoName("repo") == "{repo}:_default_git" + + +def test_git_repo_name_explicit_branch(): + assert GitRepoName("repo", "main") == "{repo}:main_git" + + +def test_legacy_git_repo_name_for_fallback(): + assert LegacyGitRepoName("repo") == "{repo}_git" + + +# --------------------------------------------------------------------------- +# Migration helper (api.migrations.per_branch) +# --------------------------------------------------------------------------- + +def test_migration_dry_run_reports_actions_without_renaming(): + from api.migrations import per_branch + + fake_conn = MagicMock() + fake_conn.exists = MagicMock(return_value=0) + fake_graph = MagicMock() + + mock_db = MagicMock() + mock_db.connection = fake_conn + mock_db.list_graphs = MagicMock( + return_value=["legacy_proj", "{legacy_proj}_git", "code:already:main"] + ) + mock_db.select_graph = MagicMock(return_value=fake_graph) + + with patch.object(per_branch, "_connect", return_value=mock_db): + result = per_branch.run_migration(dry_run=True) + + # Dry-run must never copy or delete any graph. + fake_graph.copy.assert_not_called() + fake_graph.delete.assert_not_called() + fake_conn.rename.assert_not_called() + + # ``code:already:main`` is already migrated and must not be reported. + assert result["dry_run"] is True + assert result["graphs_renamed"] == 1 # legacy_proj + # the {legacy_proj}_git companion graph was also planned, but only when + # info-key existed; the mock returns exists=0 so info_renamed stays 0. + assert result["git_renamed"] == 1 + + +def test_migration_is_idempotent_when_no_legacy_graphs(): + from api.migrations import per_branch + + fake_conn = MagicMock() + fake_conn.exists = MagicMock(return_value=0) + fake_graph = MagicMock() + + mock_db = MagicMock() + mock_db.connection = fake_conn + # Already-migrated layout: only new-style names present. + mock_db.list_graphs = MagicMock( + return_value=["code:proj:_default", "{proj}:_default_git"] + ) + mock_db.select_graph = MagicMock(return_value=fake_graph) + + with patch.object(per_branch, "_connect", return_value=mock_db): + result = per_branch.run_migration(dry_run=False) + + fake_graph.copy.assert_not_called() + fake_graph.delete.assert_not_called() + fake_conn.rename.assert_not_called() + assert result["graphs_renamed"] == 0 + assert result["info_renamed"] == 0 + assert result["git_renamed"] == 0 diff --git a/uv.lock b/uv.lock index 2f57f302..0258bad8 100644 --- a/uv.lock +++ b/uv.lock @@ -373,13 +373,21 @@ dependencies = [ [package.optional-dependencies] test = [ + { name = "anyio" }, { name = "httpx" }, { name = "pytest" }, { name = "ruff" }, ] +[package.dev-dependencies] +dev = [ + { name = "pytest" }, + { name = "pytest-anyio" }, +] + [package.metadata] requires-dist = [ + { name = "anyio", marker = "extra == 'test'", specifier = ">=4.0,<5.0" }, { name = "falkordb", specifier = ">=1.1.3,<2.0.0" }, { name = "falkordb-multilspy", specifier = ">=0.1.0,<1.0.0" }, { name = "fastapi", specifier = ">=0.115.0,<1.0.0" }, @@ -404,6 +412,12 @@ requires-dist = [ ] provides-extras = ["test"] +[package.metadata.requires-dev] +dev = [ + { name = "pytest", specifier = ">=9.0.2" }, + { name = "pytest-anyio", specifier = ">=0.0.0" }, +] + [[package]] name = "falkordb-multilspy" version = "0.1.0" @@ -1326,6 +1340,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, ] +[[package]] +name = "pytest-anyio" +version = "0.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/00/44/a02e5877a671b0940f21a7a0d9704c22097b123ed5cdbcca9cab39f17acc/pytest-anyio-0.0.0.tar.gz", hash = "sha256:b41234e9e9ad7ea1dbfefcc1d6891b23d5ef7c9f07ccf804c13a9cc338571fd3", size = 1560, upload-time = "2021-06-29T22:57:30.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/25/bd6493ae85d0a281b6a0f248d0fdb1d9aa2b31f18bcd4a8800cf397d8209/pytest_anyio-0.0.0-py2.py3-none-any.whl", hash = "sha256:dc8b5c4741cb16ff90be37fddd585ca943ed12bbeb563de7ace6cd94441d8746", size = 1999, upload-time = "2021-06-29T22:57:29.158Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1775,6 +1802,14 @@ version = "0.23.1" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/22/85/a61c782afbb706a47d990eaee6977e7c2bd013771c5bf5c81c617684f286/tree_sitter_c_sharp-0.23.1.tar.gz", hash = "sha256:322e2cfd3a547a840375276b2aea3335fa6458aeac082f6c60fec3f745c967eb", size = 1317728, upload-time = "2024-11-11T05:25:32.535Z" } wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/dc/d4a0ad9e466263728f80f9dac399609473af01c1aba2ea3ea8879ce56276/tree_sitter_c_sharp-0.23.1-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:e87be7572991552606a3155d2f6c2045ded8bce94bfd9f74bf521d949c219a1c", size = 333661, upload-time = "2026-04-14T15:11:14.227Z" }, + { url = "https://files.pythonhosted.org/packages/61/7a/5c862770460a2e27079e725585ad2718100373c09448c14e36934ef44414/tree_sitter_c_sharp-0.23.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:86c2fdf178c66474a1be2965602818d30780e4e3ed890e3c206931f65d9a154c", size = 376295, upload-time = "2026-04-14T15:11:15.346Z" }, + { url = "https://files.pythonhosted.org/packages/67/18/0571a3a34c0feda60a9c37cf6dd5edfdbc24f8fcb1e48b6b6eb0f324ad2a/tree_sitter_c_sharp-0.23.1-cp310-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:035d259e64c41d02cc45afc3b8b46388b232e7d16d84734d851cca7334761da5", size = 358331, upload-time = "2026-04-14T15:11:16.418Z" }, + { url = "https://files.pythonhosted.org/packages/44/65/0f7e1f50f6365338eb700f01710da0adc49a49fa9a8443e5a90ea4f29491/tree_sitter_c_sharp-0.23.1-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fa472cb9de7e14fee9408e144f29f68384cd8e9c677dff0002da19f361a59bdf", size = 359444, upload-time = "2026-04-14T15:11:17.509Z" }, + { url = "https://files.pythonhosted.org/packages/98/60/129bd56d5ef22b4ae254940a09b6d3ed873093218868a3f9635d571d514e/tree_sitter_c_sharp-0.23.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1a0ea86eccff74e85ab4a2cf77c813fad7c84162962ce242dff0c51601028832", size = 358143, upload-time = "2026-04-14T15:11:18.755Z" }, + { url = "https://files.pythonhosted.org/packages/7c/cd/e12cdca47e0c56151cb4b156d48091b7bc1d968e072c1656cf6b73fe7218/tree_sitter_c_sharp-0.23.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8ab26dc998bbd4b4287b129f67c10ca715deb402ed77d0645674490ea509097e", size = 357524, upload-time = "2026-04-14T15:11:19.717Z" }, + { url = "https://files.pythonhosted.org/packages/6a/2c/f742d60f818cba83760f4975c7158d1c96c36b5807e95a843db7fb8c64b7/tree_sitter_c_sharp-0.23.1-cp310-abi3-win_amd64.whl", hash = "sha256:d4486653feaff3314ef45534dcb6f9ea8ab3aa160896287c6473788f88eb38be", size = 338755, upload-time = "2026-04-14T15:11:20.883Z" }, + { url = "https://files.pythonhosted.org/packages/5b/e4/8a8642b9bba86248ac2facc81ffb187c06c6768efa56c79d61fab70d736b/tree_sitter_c_sharp-0.23.1-cp310-abi3-win_arm64.whl", hash = "sha256:e7a14b76ec23cc8386cf662d5ea602d81331376c93ca6299a97b174047790345", size = 337261, upload-time = "2026-04-14T15:11:22.111Z" }, { url = "https://files.pythonhosted.org/packages/58/04/f6c2df4c53a588ccd88d50851155945cff8cd887bd70c175e00aaade7edf/tree_sitter_c_sharp-0.23.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2b612a6e5bd17bb7fa2aab4bb6fc1fba45c94f09cb034ab332e45603b86e32fd", size = 372235, upload-time = "2024-11-11T05:25:19.424Z" }, { url = "https://files.pythonhosted.org/packages/99/10/1aa9486f1e28fc22810fa92cbdc54e1051e7f5536a5e5b5e9695f609b31e/tree_sitter_c_sharp-0.23.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a8b98f62bc53efcd4d971151950c9b9cd5cbe3bacdb0cd69fdccac63350d83e", size = 419046, upload-time = "2024-11-11T05:25:20.679Z" }, { url = "https://files.pythonhosted.org/packages/0f/21/13df29f8fcb9ba9f209b7b413a4764b673dfd58989a0dd67e9c7e19e9c2e/tree_sitter_c_sharp-0.23.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:986e93d845a438ec3c4416401aa98e6a6f6631d644bbbc2e43fcb915c51d255d", size = 415999, upload-time = "2024-11-11T05:25:22.359Z" },