Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions api/analyzers/source_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,21 +208,26 @@ def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]]

logging.info("Done analyzing path")

def analyze_local_repository(self, path: str, ignore: Optional[list[str]] = None) -> Graph:
def analyze_local_repository(self, path: str, ignore: Optional[list[str]] = None, branch: Optional[str] = None) -> Graph:
"""
Analyze a local Git repository.

Args:
path (str): Path to a local git repository
ignore (List(str)): List of paths to skip
branch (Optional[str]): Branch name. Auto-detected from the
checkout when ``None``.
"""
if ignore is None:
ignore = []

from pygit2.repository import Repository
from ..project import detect_branch

proj_name = Path(path).name
graph = Graph(proj_name)
if branch is None:
branch = detect_branch(Path(path))
graph = Graph(proj_name, branch=branch)
self.analyze_local_folder(path, graph, ignore)

# Save processed commit hash to the DB
Expand Down
10 changes: 6 additions & 4 deletions api/auto_complete.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
from typing import Optional

from .graph import Graph, AsyncGraphQuery


def prefix_search(repo: str, prefix: str) -> str:
def prefix_search(repo: str, prefix: str, branch: Optional[str] = None) -> str:
""" Returns a list of all entities in the repository that start with the given prefix. """
g = Graph(repo)
g = Graph(repo, branch=branch)
return g.prefix_search(prefix)


async def async_prefix_search(repo: str, prefix: str) -> list:
async def async_prefix_search(repo: str, prefix: str, branch: Optional[str] = None) -> list:
"""Async version of prefix_search using AsyncGraphQuery."""
g = AsyncGraphQuery(repo)
g = AsyncGraphQuery(repo, branch=branch)
try:
return await g.prefix_search(prefix)
finally:
Expand Down
86 changes: 70 additions & 16 deletions api/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,9 @@ def index(
repo: Optional[str] = typer.Option(
None, "--repo", help="Graph name (defaults to folder name)"
),
branch: Optional[str] = typer.Option(
None, "--branch", help="Branch to associate with this index (auto-detected from git checkout when omitted; '_default' for non-git paths)"
),
) -> None:
"""Index a local folder into the knowledge graph."""
from .project import Project
Expand Down Expand Up @@ -204,14 +207,14 @@ def index(

_stderr(f"Indexing {folder} as '{name}'…")
try:
project = Project(name, folder, url)
project = Project(name, folder, url, branch=branch)
graph = project.analyze_sources(ignore=list(ignore) if ignore else [])
stats = graph.stats()
except Exception as e:
_json_error(str(e))

_stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges")
_json_out({"status": "ok", "repo": name, **stats})
_stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges (branch={project.branch})")
_json_out({"status": "ok", "repo": name, "branch": project.branch, **stats})


# ── index-repo ─────────────────────────────────────────────────────────
Expand All @@ -223,6 +226,9 @@ def index_repo(
ignore: Optional[List[str]] = typer.Option(
None, "--ignore", help="Directories to ignore (repeatable)"
),
branch: Optional[str] = typer.Option(
None, "--branch", help="Branch to associate with this index (auto-detected from the cloned checkout when omitted)"
),
) -> None:
"""Clone a git repository and index it into the knowledge graph."""
from .project import Project
Expand All @@ -233,22 +239,22 @@ def index_repo(
import io
import contextlib
with contextlib.redirect_stdout(io.StringIO()):
project = Project.from_git_repository(url)
project = Project.from_git_repository(url, branch=branch)
graph = project.analyze_sources(ignore=list(ignore) if ignore else [])
stats = graph.stats()
except Exception as e:
_json_error(str(e))

_stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges")
_json_out({"status": "ok", "repo": project.name, **stats})
_stderr(f"Done — {stats['node_count']} nodes, {stats['edge_count']} edges (branch={project.branch})")
_json_out({"status": "ok", "repo": project.name, "branch": project.branch, **stats})


# ── list ───────────────────────────────────────────────────────────────


@app.command("list")
def list_repos() -> None:
"""List all indexed repositories."""
"""List all indexed (project, branch) pairs."""
from .graph import get_repos

try:
Expand All @@ -259,6 +265,30 @@ def list_repos() -> None:
_json_out({"repos": repos})


# ── migrate ────────────────────────────────────────────────────────────


@app.command("migrate")
def migrate(
dry_run: bool = typer.Option(False, "--dry-run", help="Print actions without performing them"),
) -> None:
"""Promote legacy (pre-T17) graphs and Redis keys into the per-branch namespace.

Renames each legacy ``<project>`` graph to ``code:<project>:_default``,
each ``{project}_info`` Redis key to ``{project}:_default_info``, and
each ``{project}_git`` graph to ``{project}:_default_git``. Idempotent.
"""

from .migrations.per_branch import run_migration

try:
result = run_migration(dry_run=dry_run)
except Exception as e:
_json_error(str(e))

_json_out(result)


# ── search ─────────────────────────────────────────────────────────────


Expand All @@ -268,18 +298,24 @@ def search(
repo: Optional[str] = typer.Option(
None, "--repo", help="Repository name (defaults to CWD name)"
),
branch: Optional[str] = typer.Option(
None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)"
),
) -> None:
"""Search for entities by prefix (full-text search)."""
from .graph import Graph
from .project import detect_branch

name = _default_repo(repo)
if branch is None:
branch = detect_branch(Path.cwd())
try:
g = Graph(name)
g = Graph(name, branch=branch)
results = g.prefix_search(query)
except Exception as e:
_json_error(str(e))

_json_out({"repo": name, "results": results})
_json_out({"repo": name, "branch": branch, "results": results})


# ── neighbors ──────────────────────────────────────────────────────────
Expand All @@ -297,18 +333,24 @@ def neighbors(
label: Optional[str] = typer.Option(
None, "--label", help="Filter by destination label (e.g. Function, Class)"
),
branch: Optional[str] = typer.Option(
None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)"
),
) -> None:
"""Get neighboring entities of the given node(s)."""
from .graph import Graph
from .project import detect_branch

name = _default_repo(repo)
if branch is None:
branch = detect_branch(Path.cwd())
try:
g = Graph(name)
g = Graph(name, branch=branch)
result = g.get_neighbors(node_ids, rel=rel, lbl=label)
except Exception as e:
_json_error(str(e))

_json_out({"repo": name, **result})
_json_out({"repo": name, "branch": branch, **result})


# ── paths ──────────────────────────────────────────────────────────────
Expand All @@ -321,18 +363,24 @@ def paths(
repo: Optional[str] = typer.Option(
None, "--repo", help="Repository name (defaults to CWD name)"
),
branch: Optional[str] = typer.Option(
None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)"
),
) -> None:
"""Find call-chain paths between two nodes."""
from .graph import Graph
from .project import detect_branch

name = _default_repo(repo)
if branch is None:
branch = detect_branch(Path.cwd())
try:
g = Graph(name)
g = Graph(name, branch=branch)
result = g.find_paths(src, dest)
except Exception as e:
_json_error(str(e))

_json_out({"repo": name, "paths": result})
_json_out({"repo": name, "branch": branch, "paths": result})


# ── info ───────────────────────────────────────────────────────────────
Expand All @@ -343,20 +391,26 @@ def info(
repo: Optional[str] = typer.Option(
None, "--repo", help="Repository name (defaults to CWD name)"
),
branch: Optional[str] = typer.Option(
None, "--branch", help="Branch (auto-detected from CWD; '_default' for non-git paths)"
),
) -> None:
"""Show repository statistics and metadata."""
from .graph import Graph
from .info import get_repo_info
from .project import detect_branch

name = _default_repo(repo)
if branch is None:
branch = detect_branch(Path.cwd())
try:
g = Graph(name)
g = Graph(name, branch=branch)
stats = g.stats()
metadata = get_repo_info(name) or {}
metadata = get_repo_info(name, branch) or {}
except Exception as e:
_json_error(str(e))

_json_out({"repo": name, **stats, "metadata": metadata})
_json_out({"repo": name, "branch": branch, **stats, "metadata": metadata})


if __name__ == "__main__":
Expand Down
6 changes: 4 additions & 2 deletions api/code_coverage/lcov/lcov.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
import sys
from typing import Optional

from ...graph import Graph

def lcovparse(content):
Expand Down Expand Up @@ -124,7 +126,7 @@ def _line(l, report):
else:
sys.stdout.write("Unknown method name %s" % method)

def process_lcov(repo: str, lcov_file: str) -> None:
def process_lcov(repo: str, lcov_file: str, branch: Optional[str] = None) -> None:
# create report from coverage lcov file
with open(lcov_file, "r") as file:
content = file.read() # Reads the entire file as a single string
Expand All @@ -134,7 +136,7 @@ def process_lcov(repo: str, lcov_file: str) -> None:
# SF:/__w/FalkorDB/FalkorDB/src/algorithms/detect_cycle.c
prefix = "/__w/FalkorDB/FalkorDB/" # prefix to remove

g = Graph(repo)
g = Graph(repo, branch=branch)

#---------------------------------------------------------------------------
# Process report
Expand Down
45 changes: 31 additions & 14 deletions api/git_utils/git_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,21 @@
# Configure logging
logging.basicConfig(level=logging.DEBUG, format='%(filename)s - %(asctime)s - %(levelname)s - %(message)s')

def GitRepoName(repo_name):
""" Returns the git repository name """
def GitRepoName(repo_name, branch=None):
""" Returns the git transitions graph key for ``(repo_name, branch)``.

Format: ``{repo_name}:{branch}_git``. Hash-tag stays on ``repo_name``
so the git-graph key lives on the same FalkorDB cluster slot as its
sibling code graph and ``*_info`` Redis hash.
"""
from ..graph import DEFAULT_BRANCH
if branch is None or branch == "":
branch = DEFAULT_BRANCH
return "{" + repo_name + "}" + ":" + branch + "_git"


def LegacyGitRepoName(repo_name):
"""Pre-T17 git graph key shape — kept for the migration helper."""
return "{" + repo_name + "}_git"

def is_ignored(file_path: str, ignore_list: List[str]) -> bool:
Expand Down Expand Up @@ -70,14 +83,15 @@ def classify_changes(
return added, deleted, modified

# build a graph capturing the git commit history
def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, ignore_list: Optional[List[str]] = None) -> GitGraph:
def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, ignore_list: Optional[List[str]] = None, branch: Optional[str] = None) -> GitGraph:
"""
Builds a graph representation of the git commit history.

Args:
path (str): Path to the git repository.
repo_name (str): Name of the repository.
ignore_list (List[str], optional): List of file patterns to ignore.
branch (Optional[str]): Branch name. Defaults to ``_default``.

Returns:
GitGraph: Graph object representing the commit history.
Expand All @@ -86,13 +100,15 @@ def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, igno
if ignore_list is None:
ignore_list = []

# Copy the graph into a temporary graph
logging.info("Cloning source graph %s -> %s_tmp", repo_name, repo_name)
# Will be deleted at the end of this function
g = Graph(repo_name).clone(repo_name + "_tmp")
# Copy the graph into a temporary graph (sibling key with `_tmp` suffix on
# the branch component so the clone lands on the same cluster slot).
source = Graph(repo_name, branch=branch)
tmp_name = source.name + "_tmp"
logging.info("Cloning source graph %s -> %s", source.name, tmp_name)
g = source.clone(tmp_name)
g.enable_backlog()

git_graph = GitGraph(GitRepoName(repo_name))
git_graph = GitGraph(GitRepoName(repo_name, branch))
supported_types = analyzer.supported_types()

# Initialize with the current commit
Expand Down Expand Up @@ -252,12 +268,12 @@ def build_commit_graph(path: str, analyzer: SourceAnalyzer, repo_name: str, igno
# Delete temporaty graph
g.disable_backlog()

logging.debug(f"Deleting temporary graph {repo_name + '_tmp'}")
logging.debug(f"Deleting temporary graph {g.name}")
g.delete()

return git_graph

def switch_commit(repo: str, to: str):
def switch_commit(repo: str, to: str, branch: Optional[str] = None):
"""
Switches the state of a graph repository from its current commit to the given commit.

Expand All @@ -268,6 +284,7 @@ def switch_commit(repo: str, to: str):
Args:
repo (str): The name of the graph repository to switch commits.
to (str): The target commit hash to switch the graph to.
branch (Optional[str]): The branch. Defaults to ``_default``.
"""

# Validate input arguments
Expand All @@ -280,11 +297,11 @@ def switch_commit(repo: str, to: str):
logging.info(f"Switching to commit: {to}")

# Initialize the graph and GitGraph objects
g = Graph(repo)
git_graph = GitGraph(GitRepoName(repo))
g = Graph(repo, branch=branch)
git_graph = GitGraph(GitRepoName(repo, branch))

# Get the current commit hash of the graph
current_hash = get_repo_commit(repo)
current_hash = get_repo_commit(repo, branch)
logging.info(f"Current graph commit: {current_hash}")

if current_hash == to:
Expand Down Expand Up @@ -329,5 +346,5 @@ def switch_commit(repo: str, to: str):
g.rerun_query(_q, _p)

# Update the graph's commit to the new target commit
set_repo_commit(repo, to)
set_repo_commit(repo, to, branch)
logging.info(f"Graph commit updated to {to}")
Loading