From c85cbd82e30f5d29b53191fb2271972345bfad57 Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Tue, 12 May 2026 14:50:42 +0300 Subject: [PATCH 01/10] ci: incrementally update docs graph on PR merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On PR merge to main touching any .md file, this workflow runs the generic incremental update from FalkorDB/GraphRAG-UI: python -m server.scripts.update_graph --graph-id docs_benchmark ... It checks out this repo (the docs content) AND GraphRAG-UI (where the Python lives). The action's Python is source-agnostic — it reads ingestion config from the :Graph node in the org graph, not from this workflow. The only docs-specific input is `--graph-id docs_benchmark`; the rest (LLM/embedder/chunker/extractor/resolver/globs/skip_list/ smoke-test questions) is data in the org graph. Future user-created widgets share the same code path with no workflow change. Concurrency group serializes runs on `main` so two PRs merging within seconds queue rather than race. cancel-in-progress: false because each run costs LLM credit. Secrets required on this repo before the first run: FALKORDB_HOST, FALKORDB_PORT, FALKORDB_PASSWORD AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_DEPLOYMENT GRAPHRAG_UI_CHECKOUT_PAT (only while FalkorDB/GraphRAG-UI is private; drop the `token:` line when it becomes public) Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-graph.yml | 83 ++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 .github/workflows/update-graph.yml diff --git a/.github/workflows/update-graph.yml b/.github/workflows/update-graph.yml new file mode 100644 index 00000000..04bd7b35 --- /dev/null +++ b/.github/workflows/update-graph.yml @@ -0,0 +1,83 @@ +# Incrementally updates the FalkorDB docs knowledge graph on PR merge. +# +# On merge to main of a PR touching .md files, this workflow: +# 1. Checks out this repo (the content) and FalkorDB/GraphRAG-UI (the Python). +# 2. Runs `python -m server.scripts.update_graph --graph-id docs_benchmark` +# from the GraphRAG-UI checkout. That script: +# - looks up the :Graph node for docs_benchmark in the org graph +# - reads ingestion config (LLM/embedder/chunker/extractor/resolver/globs) +# from the node, NOT from this workflow — the workflow stays generic +# - copies the live graph to docs_v{N+1}_pending via redis-cli GRAPH.COPY +# - SDK.apply_changes(added, modified, deleted) + SDK.finalize() on pending +# - smoke-tests the `questions` from the :Graph node against the pending +# - on success: atomic Cypher flip of active_graph; drops old previous +# - on failure: exits non-zero, leaves pending graph for inspection +# +# The only docs-specific input from this file is `--graph-id docs_benchmark`. +# Everything else lives as data in the org graph so user-created widgets +# share the same code path with no workflow changes. + +name: Update graph (incremental) + +on: + pull_request: + types: [closed] + branches: [main] + paths: + - "**/*.md" + +# Merge target is always main, so all runs share one group. Bursts of PR +# merges queue rather than race. cancel-in-progress: false because each +# run consumes LLM credit and we'd rather pay the wait than re-do work. +concurrency: + group: update-graph-${{ github.ref }} + cancel-in-progress: false + +jobs: + update-graph: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout docs (this repo) + uses: actions/checkout@v4 + with: + fetch-depth: 0 # need history for git diff against an arbitrary base + path: docs + + - name: Checkout GraphRAG-UI (action code lives there) + uses: actions/checkout@v4 + with: + repository: FalkorDB/GraphRAG-UI + # Required if GraphRAG-UI is private. Drop this line entirely + # when the repo becomes public. + token: ${{ secrets.GRAPHRAG_UI_CHECKOUT_PAT }} + path: graphrag-ui + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install GraphRAG-UI server deps + run: pip install -r graphrag-ui/server/requirements.txt + + - name: Run incremental update + env: + # FalkorDB connection (the action talks via redis-cli AND via the SDK). + FALKORDB_HOST: ${{ secrets.FALKORDB_HOST }} + FALKORDB_PORT: ${{ secrets.FALKORDB_PORT }} + FALKORDB_PASSWORD: ${{ secrets.FALKORDB_PASSWORD }} + + # Azure OpenAI credentials — read at factory time inside + # IngestionConfig.{LLM,Embedder}Config.to_*(). Never stored in + # the org graph alongside the rest of the config. + AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} + AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }} + AZURE_OPENAI_DEPLOYMENT: ${{ secrets.AZURE_OPENAI_DEPLOYMENT }} + working-directory: graphrag-ui + run: | + python -m server.scripts.update_graph \ + --graph-id docs_benchmark \ + --source-root ../docs \ + --base-sha ${{ github.event.pull_request.base.sha }} \ + --head-sha ${{ github.sha }} From 160fd4a34aabdd68506fbbd61578604d533d83e4 Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Tue, 12 May 2026 16:40:40 +0300 Subject: [PATCH 02/10] ci: simplify update-graph workflow to a single endpoint call (Design B) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous version of this workflow: - Checked out FalkorDB/GraphRAG-UI (needed a PAT secret) - pip-installed graphrag-sdk + dev deps - Ran ``python -m server.scripts.update_graph`` locally on the runner - Required FALKORDB_HOST/PORT/PASSWORD and AZURE_OPENAI_API_KEY/ ENDPOINT/DEPLOYMENT secrets GraphRAG-UI now exposes /api/admin/update-graph that does all that work server-side using its existing credentials. This workflow drops to: 1. Checkout docs (this repo) with full history 2. Inline Python: parse ``git diff``, read .md content for added+modified, build a JSON payload with {graph_id, files:{added,modified,deleted}} 3. curl POST the payload with a bearer token Secrets required on this repo, total: - ``UPDATE_GRAPH_TOKEN`` — shared bearer token for the endpoint Repo/environment variable required: - ``GRAPHRAG_UI_URL`` — base URL of the GraphRAG-UI deployment (e.g., https://api.staging.../ or https://api.prod.../) What's gone vs. the previous version: - FALKORDB_HOST / FALKORDB_PORT / FALKORDB_PASSWORD - AZURE_OPENAI_API_KEY / AZURE_OPENAI_ENDPOINT / AZURE_OPENAI_DEPLOYMENT - GRAPHRAG_UI_CHECKOUT_PAT - The whole graphrag-ui sibling checkout and pip-install steps Behavior notes: - The diff payload includes only .md files (path filter on the trigger catches non-.md PRs; the inline Python also re-filters for safety). - Renames are split into (delete old) + (add new with current content). - If the post-filter diff is empty, the workflow exits clean before the POST. The endpoint also short-circuits on empty diff but skipping saves one round-trip + the bearer-token cost. - curl --fail-with-body bubbles HTTP non-2xx (400 bad path, 401 wrong token, 422 smoke fail, 500 server config issue) up as CI failures with the server's detail message in the output. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-graph.yml | 128 ++++++++++++++++++----------- 1 file changed, 80 insertions(+), 48 deletions(-) diff --git a/.github/workflows/update-graph.yml b/.github/workflows/update-graph.yml index 04bd7b35..673c90c2 100644 --- a/.github/workflows/update-graph.yml +++ b/.github/workflows/update-graph.yml @@ -1,21 +1,16 @@ # Incrementally updates the FalkorDB docs knowledge graph on PR merge. # # On merge to main of a PR touching .md files, this workflow: -# 1. Checks out this repo (the content) and FalkorDB/GraphRAG-UI (the Python). -# 2. Runs `python -m server.scripts.update_graph --graph-id docs_benchmark` -# from the GraphRAG-UI checkout. That script: -# - looks up the :Graph node for docs_benchmark in the org graph -# - reads ingestion config (LLM/embedder/chunker/extractor/resolver/globs) -# from the node, NOT from this workflow — the workflow stays generic -# - copies the live graph to docs_v{N+1}_pending via redis-cli GRAPH.COPY -# - SDK.apply_changes(added, modified, deleted) + SDK.finalize() on pending -# - smoke-tests the `questions` from the :Graph node against the pending -# - on success: atomic Cypher flip of active_graph; drops old previous -# - on failure: exits non-zero, leaves pending graph for inspection +# 1. Computes the diff (added / modified / deleted .md files). +# 2. Reads the content of added + modified files. +# 3. POSTs the payload to GraphRAG-UI's /api/admin/update-graph endpoint. +# 4. The endpoint does all the SDK / FalkorDB / smoke-test work server-side +# using its existing credentials. This workflow only needs ONE secret: +# ``UPDATE_GRAPH_TOKEN`` — a shared-secret bearer token. # -# The only docs-specific input from this file is `--graph-id docs_benchmark`. -# Everything else lives as data in the org graph so user-created widgets -# share the same code path with no workflow changes. +# No Azure OpenAI keys, no FalkorDB credentials, no PAT for cross-repo +# checkout. Failures from the server come back as HTTP non-2xx with a +# detail message; curl --fail-with-body bubbles them up as a CI failure. name: Update graph (incremental) @@ -38,46 +33,83 @@ jobs: if: github.event.pull_request.merged == true runs-on: ubuntu-latest timeout-minutes: 30 + env: + GRAPH_ID: docs_benchmark + # The base URL of the GraphRAG-UI deployment. Set this as a repo or + # environment variable so it can differ between staging and prod. + GRAPHRAG_UI_URL: ${{ vars.GRAPHRAG_UI_URL }} steps: - - name: Checkout docs (this repo) + - name: Checkout docs uses: actions/checkout@v4 with: - fetch-depth: 0 # need history for git diff against an arbitrary base - path: docs + fetch-depth: 0 # need history for diff against the PR base - - name: Checkout GraphRAG-UI (action code lives there) - uses: actions/checkout@v4 - with: - repository: FalkorDB/GraphRAG-UI - # Required if GraphRAG-UI is private. Drop this line entirely - # when the repo becomes public. - token: ${{ secrets.GRAPHRAG_UI_CHECKOUT_PAT }} - path: graphrag-ui + - name: Build diff payload + id: payload + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.sha }} + run: | + python3 - <<'PY' + import json, os, pathlib, subprocess, sys - - uses: actions/setup-python@v5 - with: - python-version: "3.12" + base, head = os.environ["BASE_SHA"], os.environ["HEAD_SHA"] + out = subprocess.run( + ["git", "diff", "--name-status", base, head], + capture_output=True, text=True, check=True, + ).stdout - - name: Install GraphRAG-UI server deps - run: pip install -r graphrag-ui/server/requirements.txt + added, modified, deleted = {}, {}, [] + for line in out.splitlines(): + parts = line.split("\t") + if not parts: + continue + status = parts[0][0] # R100 -> R + if status == "R" and len(parts) >= 3: + old, new = parts[1], parts[2] + if old.endswith(".md"): + deleted.append(old) + if new.endswith(".md"): + try: + added[new] = pathlib.Path(new).read_text(encoding="utf-8") + except FileNotFoundError: + pass + continue + if len(parts) < 2 or not parts[1].endswith(".md"): + continue + path = parts[1] + if status == "A": + added[path] = pathlib.Path(path).read_text(encoding="utf-8") + elif status == "M": + modified[path] = pathlib.Path(path).read_text(encoding="utf-8") + elif status == "D": + deleted.append(path) - - name: Run incremental update - env: - # FalkorDB connection (the action talks via redis-cli AND via the SDK). - FALKORDB_HOST: ${{ secrets.FALKORDB_HOST }} - FALKORDB_PORT: ${{ secrets.FALKORDB_PORT }} - FALKORDB_PASSWORD: ${{ secrets.FALKORDB_PASSWORD }} + payload = { + "graph_id": os.environ.get("GRAPH_ID", "docs_benchmark"), + "files": {"added": added, "modified": modified, "deleted": deleted}, + } + if not (added or modified or deleted): + # Path filter on the workflow trigger should make this rare, but + # be explicit: no work to do, exit clean before the POST. + print("::notice::No .md changes after filtering — skipping graph update.", file=sys.stderr) + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write("skip=true\n") + sys.exit(0) + + pathlib.Path("payload.json").write_text(json.dumps(payload)) + print(f"::notice::Diff: +{len(added)} ~{len(modified)} -{len(deleted)} files") + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write("skip=false\n") + PY - # Azure OpenAI credentials — read at factory time inside - # IngestionConfig.{LLM,Embedder}Config.to_*(). Never stored in - # the org graph alongside the rest of the config. - AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} - AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }} - AZURE_OPENAI_DEPLOYMENT: ${{ secrets.AZURE_OPENAI_DEPLOYMENT }} - working-directory: graphrag-ui + - name: Call admin update-graph endpoint + if: steps.payload.outputs.skip != 'true' run: | - python -m server.scripts.update_graph \ - --graph-id docs_benchmark \ - --source-root ../docs \ - --base-sha ${{ github.event.pull_request.base.sha }} \ - --head-sha ${{ github.sha }} + curl -X POST "$GRAPHRAG_UI_URL/api/admin/update-graph" \ + -H "Authorization: Bearer ${{ secrets.UPDATE_GRAPH_TOKEN }}" \ + -H "Content-Type: application/json" \ + --data-binary @payload.json \ + --fail-with-body \ + --show-error \ + --max-time 1800 From c7ae19a8662ffd6f5b12446028bdaaa098abef5e Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Wed, 13 May 2026 14:14:31 +0300 Subject: [PATCH 03/10] fix(ci): correct concurrency group target + add least-privilege permissions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues flagged by CodeRabbit + CodeQL on PR #478: 1. concurrency.group used github.ref, which in a pull_request event resolves to refs/pull//merge — a per-PR value. Two PRs merging to main simultaneously would have ended up in *different* concurrency groups and run in parallel, defeating the queue. Server-side CAS in /api/admin/update-graph (FalkorDB/GraphRAG-UI#152) would have caught the race, but parallel runs would still cost 2× LLM credit for what should be one ingestion. Use github.event.pull_request.base.ref so all merges to main share update-graph-main and queue properly. 2. The job ran with default GITHUB_TOKEN permissions. The work only needs to read repo source (for the git diff); nothing writes back to the repo. Added `permissions: { contents: read }`. Closes CodeQL alerts #14 + #15 ("workflow does not contain permissions"). No functional change beyond serializing concurrent merges and restricting the GITHUB_TOKEN scope. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-graph.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update-graph.yml b/.github/workflows/update-graph.yml index 673c90c2..c4092a69 100644 --- a/.github/workflows/update-graph.yml +++ b/.github/workflows/update-graph.yml @@ -24,8 +24,13 @@ on: # Merge target is always main, so all runs share one group. Bursts of PR # merges queue rather than race. cancel-in-progress: false because each # run consumes LLM credit and we'd rather pay the wait than re-do work. +# +# IMPORTANT: ``github.ref`` resolves to ``refs/pull//merge`` in a +# pull_request event — a different value per PR. Using it here would +# defeat the queue (each PR would get its own group). Use the target +# branch ref instead so all merges to main share one group. concurrency: - group: update-graph-${{ github.ref }} + group: update-graph-${{ github.event.pull_request.base.ref }} cancel-in-progress: false jobs: @@ -33,6 +38,12 @@ jobs: if: github.event.pull_request.merged == true runs-on: ubuntu-latest timeout-minutes: 30 + # Least-privilege GITHUB_TOKEN: this job only needs to read repo + # source to compute the diff. No writes back to the repo, no comments, + # no status updates. Closes CodeQL "workflow does not contain + # permissions" alerts. + permissions: + contents: read env: GRAPH_ID: docs_benchmark # The base URL of the GraphRAG-UI deployment. Set this as a repo or From 03c58e6b43f7eb1df420e858ef5a4a2f3d6b33ef Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Wed, 13 May 2026 14:30:03 +0300 Subject: [PATCH 04/10] fix(ci): use merge_commit_sha for HEAD_SHA, not github.sha (Naseem) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a pull_request event, github.sha resolves to the temporary test-merge commit (refs/pull//merge) GitHub creates for CI, NOT the commit that actually lands on main when the merge button is clicked. Using it for HEAD_SHA in `git diff $BASE_SHA $HEAD_SHA` would silently corrupt the payload in three concrete scenarios: 1. Squash merges — the squash commit on main is a different object than the test-merge; tree diffs *should* match but corner cases exist. 2. Rebase merges — definitely different commits per rebased PR commit. 3. PRs updated between last test-merge and actual merge (user clicked "Update branch" or rebased after CI's last run) — test-merge SHA is stale. Failure mode is silent: workflow succeeds, endpoint accepts the payload, graph ingests a different file set than what's actually on main. First symptom would be widget answers referencing files that don't match live docs. Authoritative post-merge SHA is github.event.pull_request.merge_commit_sha, populated on closed+merged events. One subtle wrinkle worth knowing: pull_request.base.sha reflects main's tip when the PR was *last updated*, not at *merge time*. If main moved forward between those points, our diff includes the interim changes too. Content-hash short-circuit in update_graph.py makes re-ingesting identical content a no-op, so this is wasted-LLM not wrong-graph. Tighter fix (use `merge_commit_sha^1` for base) is a separate refinement that can land later if the edge case ever matters. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-graph.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update-graph.yml b/.github/workflows/update-graph.yml index c4092a69..72f33124 100644 --- a/.github/workflows/update-graph.yml +++ b/.github/workflows/update-graph.yml @@ -58,8 +58,14 @@ jobs: - name: Build diff payload id: payload env: + # ``github.sha`` in a pull_request event resolves to the temporary + # test-merge commit (``refs/pull//merge``) GitHub creates for CI, + # NOT the commit that actually lands on the target branch when the + # merge button is clicked. Use ``merge_commit_sha`` so the diff + # reflects what really landed — critical for squash/rebase merges + # and for PRs updated between the last test-merge and merge. BASE_SHA: ${{ github.event.pull_request.base.sha }} - HEAD_SHA: ${{ github.sha }} + HEAD_SHA: ${{ github.event.pull_request.merge_commit_sha }} run: | python3 - <<'PY' import json, os, pathlib, subprocess, sys From 836fb6de5111ba7de8eb54aecea1605a35d9d3e2 Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Wed, 13 May 2026 14:38:21 +0300 Subject: [PATCH 05/10] ci: switch trigger from pull_request to push:branches:[main] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per product direction: "changes on main" is the source of truth, not "PR closed." Switching the trigger has a couple of nice second-order effects: 1. Catches direct pushes to main, not just PR merges. If anyone bypasses the PR flow (rare given branch protection, but possible for emergency fixes or admin pushes), the graph still updates. 2. The Naseem-flagged ``github.sha`` vs ``merge_commit_sha`` quirk goes away. On push events github.sha IS the actual commit on main, not a synthetic test-merge commit. The diff payload can use github.event.before + github.sha directly with no special-casing. Changes: on: - pull_request: - types: [closed] - branches: [main] + push: + branches: + - main paths: - "**/*.md" jobs: update-graph: - if: github.event.pull_request.merged == true # not needed on push concurrency: - group: update-graph-${{ github.event.pull_request.base.ref }} + group: update-graph-${{ github.ref_name }} # = "main" env BASE_SHA: - ${{ github.event.pull_request.base.sha }} + ${{ github.event.before }} # parent of new HEAD env HEAD_SHA: - ${{ github.event.pull_request.merge_commit_sha }} + ${{ github.sha }} # new HEAD on main Added defensive handling for the all-zero ``before`` SHA (carried by the first push to a brand-new branch) — falls back to diffing against git's empty-tree object so the workflow doesn't crash if main is ever recreated. Module docstring updated to reflect the new trigger. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-graph.yml | 71 ++++++++++++++++++------------ 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/.github/workflows/update-graph.yml b/.github/workflows/update-graph.yml index 72f33124..540e4cc9 100644 --- a/.github/workflows/update-graph.yml +++ b/.github/workflows/update-graph.yml @@ -1,7 +1,15 @@ -# Incrementally updates the FalkorDB docs knowledge graph on PR merge. +# Incrementally updates the FalkorDB docs knowledge graph on push to main. # -# On merge to main of a PR touching .md files, this workflow: -# 1. Computes the diff (added / modified / deleted .md files). +# Fires whenever a commit lands on main and at least one .md file changed. +# Covers PR merges (squash/rebase/merge-commit) and direct pushes alike — +# the trigger is "main moved," not "a PR closed." +# +# On each push the workflow: +# 1. Computes the diff (added / modified / deleted .md files) between the +# previous main HEAD (``github.event.before``) and the new HEAD +# (``github.sha``). On push events both values point at real commits +# on main, unlike pull_request where ``github.sha`` is the synthetic +# test-merge commit. # 2. Reads the content of added + modified files. # 3. POSTs the payload to GraphRAG-UI's /api/admin/update-graph endpoint. # 4. The endpoint does all the SDK / FalkorDB / smoke-test work server-side @@ -15,27 +23,21 @@ name: Update graph (incremental) on: - pull_request: - types: [closed] - branches: [main] + push: + branches: + - main paths: - "**/*.md" -# Merge target is always main, so all runs share one group. Bursts of PR -# merges queue rather than race. cancel-in-progress: false because each -# run consumes LLM credit and we'd rather pay the wait than re-do work. -# -# IMPORTANT: ``github.ref`` resolves to ``refs/pull//merge`` in a -# pull_request event — a different value per PR. Using it here would -# defeat the queue (each PR would get its own group). Use the target -# branch ref instead so all merges to main share one group. +# All pushes to main share one queue. Bursts of merges process in order +# rather than racing. cancel-in-progress: false because each run consumes +# LLM credit and we'd rather pay the wait than re-do work. concurrency: - group: update-graph-${{ github.event.pull_request.base.ref }} + group: update-graph-${{ github.ref_name }} cancel-in-progress: false jobs: update-graph: - if: github.event.pull_request.merged == true runs-on: ubuntu-latest timeout-minutes: 30 # Least-privilege GITHUB_TOKEN: this job only needs to read repo @@ -53,24 +55,39 @@ jobs: - name: Checkout docs uses: actions/checkout@v4 with: - fetch-depth: 0 # need history for diff against the PR base + fetch-depth: 0 # need history for diff against the previous HEAD - name: Build diff payload id: payload env: - # ``github.sha`` in a pull_request event resolves to the temporary - # test-merge commit (``refs/pull//merge``) GitHub creates for CI, - # NOT the commit that actually lands on the target branch when the - # merge button is clicked. Use ``merge_commit_sha`` so the diff - # reflects what really landed — critical for squash/rebase merges - # and for PRs updated between the last test-merge and merge. - BASE_SHA: ${{ github.event.pull_request.base.sha }} - HEAD_SHA: ${{ github.event.pull_request.merge_commit_sha }} + # On push events: + # ``github.event.before`` = parent of the new HEAD on main + # ``github.sha`` = the new HEAD (what just landed) + # ``git diff $before $sha`` is exactly what this push introduced — + # the merge/rebase/direct-push contribution. No test-merge quirks. + BASE_SHA: ${{ github.event.before }} + HEAD_SHA: ${{ github.sha }} run: | python3 - <<'PY' import json, os, pathlib, subprocess, sys base, head = os.environ["BASE_SHA"], os.environ["HEAD_SHA"] + + # First push to a newly-created branch carries an all-zero + # ``before`` SHA. main already exists in this repo so this is + # a safety net, not an expected path. Treat it as "no prior + # history — diff the whole tree against the empty tree." + if set(base) == {"0"}: + print( + "::notice::No prior HEAD on main (all-zero before); " + "diffing against the empty tree.", + file=sys.stderr, + ) + # 4b825dc642cb6eb9a060e54bf8d69288fbee4904 is the well-known + # SHA of git's empty tree object — diff against it yields + # every file as added. + base = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" + out = subprocess.run( ["git", "diff", "--name-status", base, head], capture_output=True, text=True, check=True, @@ -107,8 +124,8 @@ jobs: "files": {"added": added, "modified": modified, "deleted": deleted}, } if not (added or modified or deleted): - # Path filter on the workflow trigger should make this rare, but - # be explicit: no work to do, exit clean before the POST. + # Path filter on the workflow trigger should make this rare, + # but be explicit: no work to do, exit clean before the POST. print("::notice::No .md changes after filtering — skipping graph update.", file=sys.stderr) with open(os.environ["GITHUB_OUTPUT"], "a") as f: f.write("skip=true\n") From e443e393d2a161b4a71684a9afafc7d4287526a3 Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Wed, 13 May 2026 14:40:07 +0300 Subject: [PATCH 06/10] style(ci): strip unnecessary commentary from update-graph.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The header block + verbose multi-line explanations were editorial, not load-bearing — anyone reading the workflow can see what it does from the step names. Kept one-line WHY notes only where the choice isn't obvious from the code (empty-tree fallback for all-zero before-SHA, fetch-depth=0 for the diff). No behavior change. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-graph.yml | 59 +++--------------------------- 1 file changed, 6 insertions(+), 53 deletions(-) diff --git a/.github/workflows/update-graph.yml b/.github/workflows/update-graph.yml index 540e4cc9..9dd34fc9 100644 --- a/.github/workflows/update-graph.yml +++ b/.github/workflows/update-graph.yml @@ -1,25 +1,3 @@ -# Incrementally updates the FalkorDB docs knowledge graph on push to main. -# -# Fires whenever a commit lands on main and at least one .md file changed. -# Covers PR merges (squash/rebase/merge-commit) and direct pushes alike — -# the trigger is "main moved," not "a PR closed." -# -# On each push the workflow: -# 1. Computes the diff (added / modified / deleted .md files) between the -# previous main HEAD (``github.event.before``) and the new HEAD -# (``github.sha``). On push events both values point at real commits -# on main, unlike pull_request where ``github.sha`` is the synthetic -# test-merge commit. -# 2. Reads the content of added + modified files. -# 3. POSTs the payload to GraphRAG-UI's /api/admin/update-graph endpoint. -# 4. The endpoint does all the SDK / FalkorDB / smoke-test work server-side -# using its existing credentials. This workflow only needs ONE secret: -# ``UPDATE_GRAPH_TOKEN`` — a shared-secret bearer token. -# -# No Azure OpenAI keys, no FalkorDB credentials, no PAT for cross-repo -# checkout. Failures from the server come back as HTTP non-2xx with a -# detail message; curl --fail-with-body bubbles them up as a CI failure. - name: Update graph (incremental) on: @@ -29,9 +7,7 @@ on: paths: - "**/*.md" -# All pushes to main share one queue. Bursts of merges process in order -# rather than racing. cancel-in-progress: false because each run consumes -# LLM credit and we'd rather pay the wait than re-do work. +# `github.ref_name` is "main"; all pushes to main share one queue. concurrency: group: update-graph-${{ github.ref_name }} cancel-in-progress: false @@ -40,31 +16,20 @@ jobs: update-graph: runs-on: ubuntu-latest timeout-minutes: 30 - # Least-privilege GITHUB_TOKEN: this job only needs to read repo - # source to compute the diff. No writes back to the repo, no comments, - # no status updates. Closes CodeQL "workflow does not contain - # permissions" alerts. permissions: contents: read env: GRAPH_ID: docs_benchmark - # The base URL of the GraphRAG-UI deployment. Set this as a repo or - # environment variable so it can differ between staging and prod. GRAPHRAG_UI_URL: ${{ vars.GRAPHRAG_UI_URL }} steps: - name: Checkout docs uses: actions/checkout@v4 with: - fetch-depth: 0 # need history for diff against the previous HEAD + fetch-depth: 0 # full history needed for the diff below - name: Build diff payload id: payload env: - # On push events: - # ``github.event.before`` = parent of the new HEAD on main - # ``github.sha`` = the new HEAD (what just landed) - # ``git diff $before $sha`` is exactly what this push introduced — - # the merge/rebase/direct-push contribution. No test-merge quirks. BASE_SHA: ${{ github.event.before }} HEAD_SHA: ${{ github.sha }} run: | @@ -73,19 +38,9 @@ jobs: base, head = os.environ["BASE_SHA"], os.environ["HEAD_SHA"] - # First push to a newly-created branch carries an all-zero - # ``before`` SHA. main already exists in this repo so this is - # a safety net, not an expected path. Treat it as "no prior - # history — diff the whole tree against the empty tree." + # All-zero "before" = first push to a newly-created branch. + # Fall back to git's empty-tree SHA so every file shows as added. if set(base) == {"0"}: - print( - "::notice::No prior HEAD on main (all-zero before); " - "diffing against the empty tree.", - file=sys.stderr, - ) - # 4b825dc642cb6eb9a060e54bf8d69288fbee4904 is the well-known - # SHA of git's empty tree object — diff against it yields - # every file as added. base = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" out = subprocess.run( @@ -98,7 +53,7 @@ jobs: parts = line.split("\t") if not parts: continue - status = parts[0][0] # R100 -> R + status = parts[0][0] # strip rename similarity score if status == "R" and len(parts) >= 3: old, new = parts[1], parts[2] if old.endswith(".md"): @@ -124,9 +79,7 @@ jobs: "files": {"added": added, "modified": modified, "deleted": deleted}, } if not (added or modified or deleted): - # Path filter on the workflow trigger should make this rare, - # but be explicit: no work to do, exit clean before the POST. - print("::notice::No .md changes after filtering — skipping graph update.", file=sys.stderr) + print("::notice::No .md changes — skipping graph update.", file=sys.stderr) with open(os.environ["GITHUB_OUTPUT"], "a") as f: f.write("skip=true\n") sys.exit(0) From 4307c21e8c2daa59b971bf156b8a0fd12ca59226 Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Wed, 13 May 2026 14:41:51 +0300 Subject: [PATCH 07/10] docs(ci): add 4-line header explaining update-graph.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brief header so a reader skimming the file knows what it does without reading the steps. No editorial or marketing — just trigger, action, and where the heavy lifting lives. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-graph.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/update-graph.yml b/.github/workflows/update-graph.yml index 9dd34fc9..c82802b5 100644 --- a/.github/workflows/update-graph.yml +++ b/.github/workflows/update-graph.yml @@ -1,3 +1,8 @@ +# Incrementally updates the FalkorDB docs knowledge graph whenever .md +# files change on main. Computes the diff against the previous HEAD, +# POSTs it to GraphRAG-UI's /api/admin/update-graph endpoint, which does +# the SDK ingestion + smoke test + atomic alias flip server-side. + name: Update graph (incremental) on: From 1d935e91aa2601c6d1e4342b0a169fdc522f064e Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Wed, 13 May 2026 14:51:56 +0300 Subject: [PATCH 08/10] refactor(ci): extract diff-payload Python to .github/scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inline heredoc moved to .github/scripts/build_diff_payload.py. The workflow YAML drops from ~110 lines to ~50; the script gains a ``main()`` + helper functions and is syntax-highlighted / lintable / unit-testable like normal Python. ``.github/`` is the CI-config corner of the repo, so this preserves the spirit of "docs repo stays content-only" — Python lives in the ops directory, not in source/content paths. No behavior change. ``BASE_SHA`` / ``HEAD_SHA`` / ``GRAPH_ID`` / ``GITHUB_OUTPUT`` are still read from env exactly as before. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/scripts/build_diff_payload.py | 108 ++++++++++++++++++++++++++ .github/workflows/update-graph.yml | 58 +------------- 2 files changed, 109 insertions(+), 57 deletions(-) create mode 100644 .github/scripts/build_diff_payload.py diff --git a/.github/scripts/build_diff_payload.py b/.github/scripts/build_diff_payload.py new file mode 100644 index 00000000..251bba85 --- /dev/null +++ b/.github/scripts/build_diff_payload.py @@ -0,0 +1,108 @@ +"""Build the JSON payload sent to GraphRAG-UI's /api/admin/update-graph. + +Invoked from .github/workflows/update-graph.yml after a push to main: +reads BASE_SHA + HEAD_SHA from env, computes the .md diff, reads file +content for added+modified entries, and writes payload.json. Sets the +``skip`` step output to ``true`` when nothing ingestable changed so the +workflow can short-circuit before the network call. +""" + +from __future__ import annotations + +import json +import os +import pathlib +import subprocess +import sys + +# git's well-known empty-tree SHA — used as the "before" when a push +# carries an all-zero ``before`` (i.e., first push to a brand-new branch). +EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" + + +def _git_diff_name_status(base: str, head: str) -> str: + """Return the raw ``git diff --name-status`` output between two SHAs.""" + return subprocess.run( + ["git", "diff", "--name-status", base, head], + capture_output=True, text=True, check=True, + ).stdout + + +def _collect_md_changes( + diff_output: str, +) -> tuple[dict[str, str], dict[str, str], list[str]]: + """Parse ``git diff --name-status`` and bucket .md changes. + + Renames (``R``) are split into delete-old + add-new so the SDK + re-extracts the content under the new path. Non-.md files are + skipped. + """ + added: dict[str, str] = {} + modified: dict[str, str] = {} + deleted: list[str] = [] + + for line in diff_output.splitlines(): + parts = line.split("\t") + if not parts: + continue + status = parts[0][0] # strip rename similarity score, e.g. R100 → R + + if status == "R" and len(parts) >= 3: + old, new = parts[1], parts[2] + if old.endswith(".md"): + deleted.append(old) + if new.endswith(".md"): + try: + added[new] = pathlib.Path(new).read_text(encoding="utf-8") + except FileNotFoundError: + pass + continue + + if len(parts) < 2 or not parts[1].endswith(".md"): + continue + path = parts[1] + if status == "A": + added[path] = pathlib.Path(path).read_text(encoding="utf-8") + elif status == "M": + modified[path] = pathlib.Path(path).read_text(encoding="utf-8") + elif status == "D": + deleted.append(path) + + return added, modified, deleted + + +def _set_output(name: str, value: str) -> None: + """Append to GITHUB_OUTPUT so subsequent steps can branch on it.""" + out = os.environ.get("GITHUB_OUTPUT") + if not out: # local dev / standalone run + return + with open(out, "a", encoding="utf-8") as f: + f.write(f"{name}={value}\n") + + +def main() -> int: + base = os.environ["BASE_SHA"] + head = os.environ["HEAD_SHA"] + if set(base) == {"0"}: + base = EMPTY_TREE_SHA + + diff = _git_diff_name_status(base, head) + added, modified, deleted = _collect_md_changes(diff) + + if not (added or modified or deleted): + print("::notice::No .md changes — skipping graph update.", file=sys.stderr) + _set_output("skip", "true") + return 0 + + payload = { + "graph_id": os.environ.get("GRAPH_ID", "docs_benchmark"), + "files": {"added": added, "modified": modified, "deleted": deleted}, + } + pathlib.Path("payload.json").write_text(json.dumps(payload), encoding="utf-8") + print(f"::notice::Diff: +{len(added)} ~{len(modified)} -{len(deleted)} files") + _set_output("skip", "false") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/update-graph.yml b/.github/workflows/update-graph.yml index c82802b5..9bf9df5f 100644 --- a/.github/workflows/update-graph.yml +++ b/.github/workflows/update-graph.yml @@ -37,63 +37,7 @@ jobs: env: BASE_SHA: ${{ github.event.before }} HEAD_SHA: ${{ github.sha }} - run: | - python3 - <<'PY' - import json, os, pathlib, subprocess, sys - - base, head = os.environ["BASE_SHA"], os.environ["HEAD_SHA"] - - # All-zero "before" = first push to a newly-created branch. - # Fall back to git's empty-tree SHA so every file shows as added. - if set(base) == {"0"}: - base = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" - - out = subprocess.run( - ["git", "diff", "--name-status", base, head], - capture_output=True, text=True, check=True, - ).stdout - - added, modified, deleted = {}, {}, [] - for line in out.splitlines(): - parts = line.split("\t") - if not parts: - continue - status = parts[0][0] # strip rename similarity score - if status == "R" and len(parts) >= 3: - old, new = parts[1], parts[2] - if old.endswith(".md"): - deleted.append(old) - if new.endswith(".md"): - try: - added[new] = pathlib.Path(new).read_text(encoding="utf-8") - except FileNotFoundError: - pass - continue - if len(parts) < 2 or not parts[1].endswith(".md"): - continue - path = parts[1] - if status == "A": - added[path] = pathlib.Path(path).read_text(encoding="utf-8") - elif status == "M": - modified[path] = pathlib.Path(path).read_text(encoding="utf-8") - elif status == "D": - deleted.append(path) - - payload = { - "graph_id": os.environ.get("GRAPH_ID", "docs_benchmark"), - "files": {"added": added, "modified": modified, "deleted": deleted}, - } - if not (added or modified or deleted): - print("::notice::No .md changes — skipping graph update.", file=sys.stderr) - with open(os.environ["GITHUB_OUTPUT"], "a") as f: - f.write("skip=true\n") - sys.exit(0) - - pathlib.Path("payload.json").write_text(json.dumps(payload)) - print(f"::notice::Diff: +{len(added)} ~{len(modified)} -{len(deleted)} files") - with open(os.environ["GITHUB_OUTPUT"], "a") as f: - f.write("skip=false\n") - PY + run: python3 .github/scripts/build_diff_payload.py - name: Call admin update-graph endpoint if: steps.payload.outputs.skip != 'true' From 83792edabacc717e0bcb6bbb91da6df541253f0a Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Wed, 13 May 2026 14:56:28 +0300 Subject: [PATCH 09/10] fix(ci): read file content from git object store, not working tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaced by a local dry-run against a historical commit pair: the script was calling ``pathlib.Path(path).read_text()`` to grab the content of added/modified files. That works in CI because ``actions/checkout`` puts HEAD on disk, but it fails locally when the working directory isn't at HEAD_SHA (e.g., running the script against a past commit to debug). Switched to ``git show :``, which pulls the blob from the object store regardless of what's checked out. Returns None on "not present at that commit" and the caller skips silently — same permissive behavior as the previous ``except FileNotFoundError: pass`` in the rename branch, now applied uniformly. Verified locally on commit pair 5f90004..3ce7b18 (PR #477 merge): script produces +1 ~1 -0 with correct file contents from the object store. CI behavior unchanged because the checked-out HEAD's blobs match the object-store blobs. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/scripts/build_diff_payload.py | 42 +++++++++++++++++++++------ 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/.github/scripts/build_diff_payload.py b/.github/scripts/build_diff_payload.py index 251bba85..b6e51da9 100644 --- a/.github/scripts/build_diff_payload.py +++ b/.github/scripts/build_diff_payload.py @@ -28,14 +28,35 @@ def _git_diff_name_status(base: str, head: str) -> str: ).stdout +def _read_at(head: str, path: str) -> str | None: + """Read a file's content at a specific commit, regardless of what's + currently checked out in the working tree. + + Uses ``git show :``, which pulls the blob from the + object store. Reading from disk via ``pathlib`` would only work if + the runner had already checked out ``head``; this is more robust + and lets the script be exercised locally against historical + commits without checking them out first. Returns None if the path + doesn't exist at ``head`` (e.g. rare rename edge cases). + """ + proc = subprocess.run( + ["git", "show", f"{head}:{path}"], + capture_output=True, text=True, check=False, + ) + if proc.returncode != 0: + return None + return proc.stdout + + def _collect_md_changes( - diff_output: str, + diff_output: str, head: str, ) -> tuple[dict[str, str], dict[str, str], list[str]]: """Parse ``git diff --name-status`` and bucket .md changes. Renames (``R``) are split into delete-old + add-new so the SDK re-extracts the content under the new path. Non-.md files are - skipped. + skipped. File content for added/modified entries is read from + the git object store at ``head``, not from disk. """ added: dict[str, str] = {} modified: dict[str, str] = {} @@ -52,19 +73,22 @@ def _collect_md_changes( if old.endswith(".md"): deleted.append(old) if new.endswith(".md"): - try: - added[new] = pathlib.Path(new).read_text(encoding="utf-8") - except FileNotFoundError: - pass + content = _read_at(head, new) + if content is not None: + added[new] = content continue if len(parts) < 2 or not parts[1].endswith(".md"): continue path = parts[1] if status == "A": - added[path] = pathlib.Path(path).read_text(encoding="utf-8") + content = _read_at(head, path) + if content is not None: + added[path] = content elif status == "M": - modified[path] = pathlib.Path(path).read_text(encoding="utf-8") + content = _read_at(head, path) + if content is not None: + modified[path] = content elif status == "D": deleted.append(path) @@ -87,7 +111,7 @@ def main() -> int: base = EMPTY_TREE_SHA diff = _git_diff_name_status(base, head) - added, modified, deleted = _collect_md_changes(diff) + added, modified, deleted = _collect_md_changes(diff, head) if not (added or modified or deleted): print("::notice::No .md changes — skipping graph update.", file=sys.stderr) From b745c178207e07696f683bed916e608ed9101442 Mon Sep 17 00:00:00 2001 From: Gal Shubeli Date: Wed, 13 May 2026 16:51:47 +0300 Subject: [PATCH 10/10] ci(workflow): pin actions/checkout to commit SHA Matches the existing convention from ``.github/workflows/spellcheck.yml`` (``actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6``). Protects against tag squatting and silent action retargeting; addresses Copilot review comment on PR #478. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-graph.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update-graph.yml b/.github/workflows/update-graph.yml index 9bf9df5f..590b76ae 100644 --- a/.github/workflows/update-graph.yml +++ b/.github/workflows/update-graph.yml @@ -28,7 +28,7 @@ jobs: GRAPHRAG_UI_URL: ${{ vars.GRAPHRAG_UI_URL }} steps: - name: Checkout docs - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 with: fetch-depth: 0 # full history needed for the diff below