From 1e9629565363180debfe1e41cd91098b238c46c7 Mon Sep 17 00:00:00 2001 From: Andre Manoel Date: Thu, 5 Mar 2026 08:01:20 -0300 Subject: [PATCH 1/8] fix: cache notebook builds to avoid failures from flaky upstream models The build-notebooks CI executes all tutorial notebooks on every run. When an upstream model (e.g. black-forest-labs/flux.2-pro) is down, the entire docs build fails even if no notebooks changed. Add per-notebook caching based on source file SHA-256 hashes. Unchanged notebooks are served from cache, and only modified ones are re-executed. On the first CI run (empty cache), the workflow seeds the cache from the last successful build artifact. Also add a minimal test script (test_flux_image_gen.py) to reproduce the flux.2-pro health check failure locally. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-notebooks.yml | 41 ++++++++++++++++++- .gitignore | 3 ++ Makefile | 10 ++++- docs/scripts/build_notebooks_cached.sh | 55 ++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 4 deletions(-) create mode 100755 docs/scripts/build_notebooks_cached.sh diff --git a/.github/workflows/build-notebooks.yml b/.github/workflows/build-notebooks.yml index 99571bc1..e70f8d55 100644 --- a/.github/workflows/build-notebooks.yml +++ b/.github/workflows/build-notebooks.yml @@ -22,8 +22,45 @@ jobs: version: "0.9.5" - name: Set up Python run: uv python install 3.11 - - name: Convert and execute notebooks - run: make convert-execute-notebooks + - name: Restore notebook cache + id: cache + uses: actions/cache@v4 + with: + path: .notebook-cache + key: notebooks-${{ hashFiles('docs/notebook_source/*.py') }} + restore-keys: | + notebooks- + - name: Seed cache from last successful artifact + if: steps.cache.outputs.cache-hit != 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + echo "Cache miss — seeding from last successful build artifact..." + LAST_RUN_ID=$(gh run list --workflow build-notebooks.yml --status success --limit 1 --json databaseId -q '.[0].databaseId' 2>/dev/null || true) + if [ -n "$LAST_RUN_ID" ]; then + TMPDIR=$(mktemp -d) + if gh run download "$LAST_RUN_ID" --name notebooks --dir "$TMPDIR" 2>/dev/null; then + mkdir -p .notebook-cache + for src in docs/notebook_source/*.py; do + name="$(basename "$src" .py)" + nb="$TMPDIR/${name}.ipynb" + if [ -f "$nb" ]; then + hash="$(sha256sum "$src" | cut -d' ' -f1)" + cp "$nb" ".notebook-cache/${name}.ipynb" + echo "$hash" > ".notebook-cache/${name}.sha256" + echo " Seeded: ${name}" + fi + done + echo "Cache seeded from run $LAST_RUN_ID" + else + echo "Could not download artifact from run $LAST_RUN_ID, proceeding without cache" + fi + rm -rf "$TMPDIR" + else + echo "No previous successful run found, proceeding without cache" + fi + - name: Convert and execute notebooks (cached) + run: make convert-execute-notebooks USE_CACHE=1 - name: Upload notebooks as artifacts uses: actions/upload-artifact@v4 with: diff --git a/.gitignore b/.gitignore index 99f6e26c..9f3b78f6 100644 --- a/.gitignore +++ b/.gitignore @@ -99,6 +99,9 @@ NOTEPAD.md # Build-time copy of README for data-designer package (copied from top-level during build) packages/data-designer/README.md +# Notebook build cache +.notebook-cache/ + # Cerebro knowledge base .cerebro/ .cursor/rules/cerebro.mdc diff --git a/Makefile b/Makefile index e2c61db0..c4b86798 100644 --- a/Makefile +++ b/Makefile @@ -75,7 +75,7 @@ help: @echo " clean-dist - Remove dist directories from all packages" @echo " verify-imports - Verify all package imports work" @echo " show-versions - Show versions of all packages" - @echo " convert-execute-notebooks - Convert notebooks from .py to .ipynb using jupytext" + @echo " convert-execute-notebooks - Convert notebooks from .py to .ipynb using jupytext (USE_CACHE=1 to skip unchanged)" @echo " generate-colab-notebooks - Generate Colab-compatible notebooks" @echo " serve-docs-locally - Serve documentation locally" @echo " check-license-headers - Check if all files have license headers" @@ -461,6 +461,10 @@ serve-docs-locally: uv run mkdocs serve --livereload convert-execute-notebooks: +ifeq ($(USE_CACHE),1) + @echo "📓 Converting Python tutorials to notebooks (with caching)..." + @bash docs/scripts/build_notebooks_cached.sh +else @echo "📓 Converting Python tutorials to notebooks and executing..." @mkdir -p docs/notebooks cp docs/notebook_source/_README.md docs/notebooks/README.md @@ -470,6 +474,7 @@ convert-execute-notebooks: rm -r docs/notebook_source/artifacts rm docs/notebook_source/*.csv @echo "✅ Notebooks created in docs/notebooks/" +endif generate-colab-notebooks: @echo "📓 Generating Colab-compatible notebooks..." @@ -624,7 +629,8 @@ clean-test-coverage: check-all check-all-fix check-config check-engine check-interface \ check-license-headers \ clean clean-dist clean-notebooks clean-pycache clean-test-coverage \ - convert-execute-notebooks coverage coverage-config coverage-engine coverage-interface \ + convert-execute-notebooks \ + coverage coverage-config coverage-engine coverage-interface \ format format-check format-check-config format-check-engine format-check-interface \ format-config format-engine format-interface \ generate-colab-notebooks help \ diff --git a/docs/scripts/build_notebooks_cached.sh b/docs/scripts/build_notebooks_cached.sh new file mode 100755 index 00000000..81093e94 --- /dev/null +++ b/docs/scripts/build_notebooks_cached.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Build notebooks with per-file caching. Only re-executes notebooks whose +# source .py file changed since the last cached build. +# +# Usage: +# ./docs/scripts/build_notebooks_cached.sh [CACHE_DIR] +# +# CACHE_DIR defaults to .notebook-cache + +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" +SOURCE_DIR="$REPO_ROOT/docs/notebook_source" +OUTPUT_DIR="$REPO_ROOT/docs/notebooks" +CACHE_DIR="${1:-$REPO_ROOT/.notebook-cache}" + +mkdir -p "$OUTPUT_DIR" "$CACHE_DIR" + +# Copy static files +cp "$SOURCE_DIR/_README.md" "$OUTPUT_DIR/README.md" +cp "$SOURCE_DIR/_pyproject.toml" "$OUTPUT_DIR/pyproject.toml" + +needs_cleanup=false + +for src in "$SOURCE_DIR"/*.py; do + name="$(basename "$src" .py)" + hash="$(shasum -a 256 "$src" | cut -d' ' -f1)" + cached_hash_file="$CACHE_DIR/${name}.sha256" + cached_notebook="$CACHE_DIR/${name}.ipynb" + + if [ -f "$cached_hash_file" ] && [ -f "$cached_notebook" ] && [ "$(cat "$cached_hash_file")" = "$hash" ]; then + echo " ✅ $name.ipynb — cached (unchanged)" + cp "$cached_notebook" "$OUTPUT_DIR/${name}.ipynb" + else + echo " 🔄 $name.ipynb — executing..." + uv run --all-packages --group notebooks --group docs jupytext --to ipynb --execute "$src" + mv "$SOURCE_DIR/${name}.ipynb" "$OUTPUT_DIR/${name}.ipynb" + needs_cleanup=true + + # Update cache + cp "$OUTPUT_DIR/${name}.ipynb" "$cached_notebook" + echo "$hash" > "$cached_hash_file" + fi +done + +if [ "$needs_cleanup" = true ]; then + # Clean up artifacts from executed notebooks + [ -d "$SOURCE_DIR/artifacts" ] && rm -rf "$SOURCE_DIR/artifacts" + find "$SOURCE_DIR" -name '*.csv' -delete 2>/dev/null || true +fi + +echo "✅ Notebooks ready in $OUTPUT_DIR" From 9a9a56c60e066dc2543277a29599ecfbdb2a199c Mon Sep 17 00:00:00 2001 From: Andre Manoel Date: Thu, 5 Mar 2026 08:08:28 -0300 Subject: [PATCH 2/8] fix: address review comments on notebook caching - Don't write .sha256 during seeding so changed notebooks are detected - Rename TMPDIR to SEED_TMPDIR to avoid shadowing the POSIX env var - Use portable sha256 helper (sha256sum with shasum fallback) Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-notebooks.yml | 10 ++++------ docs/scripts/build_notebooks_cached.sh | 10 +++++++++- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build-notebooks.yml b/.github/workflows/build-notebooks.yml index e70f8d55..81e17112 100644 --- a/.github/workflows/build-notebooks.yml +++ b/.github/workflows/build-notebooks.yml @@ -38,16 +38,14 @@ jobs: echo "Cache miss — seeding from last successful build artifact..." LAST_RUN_ID=$(gh run list --workflow build-notebooks.yml --status success --limit 1 --json databaseId -q '.[0].databaseId' 2>/dev/null || true) if [ -n "$LAST_RUN_ID" ]; then - TMPDIR=$(mktemp -d) - if gh run download "$LAST_RUN_ID" --name notebooks --dir "$TMPDIR" 2>/dev/null; then + SEED_TMPDIR=$(mktemp -d) + if gh run download "$LAST_RUN_ID" --name notebooks --dir "$SEED_TMPDIR" 2>/dev/null; then mkdir -p .notebook-cache for src in docs/notebook_source/*.py; do name="$(basename "$src" .py)" - nb="$TMPDIR/${name}.ipynb" + nb="$SEED_TMPDIR/${name}.ipynb" if [ -f "$nb" ]; then - hash="$(sha256sum "$src" | cut -d' ' -f1)" cp "$nb" ".notebook-cache/${name}.ipynb" - echo "$hash" > ".notebook-cache/${name}.sha256" echo " Seeded: ${name}" fi done @@ -55,7 +53,7 @@ jobs: else echo "Could not download artifact from run $LAST_RUN_ID, proceeding without cache" fi - rm -rf "$TMPDIR" + rm -rf "$SEED_TMPDIR" else echo "No previous successful run found, proceeding without cache" fi diff --git a/docs/scripts/build_notebooks_cached.sh b/docs/scripts/build_notebooks_cached.sh index 81093e94..929a5a95 100755 --- a/docs/scripts/build_notebooks_cached.sh +++ b/docs/scripts/build_notebooks_cached.sh @@ -12,6 +12,14 @@ set -euo pipefail +compute_sha256() { + if command -v sha256sum >/dev/null 2>&1; then + sha256sum "$1" | cut -d' ' -f1 + else + shasum -a 256 "$1" | cut -d' ' -f1 + fi +} + REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" SOURCE_DIR="$REPO_ROOT/docs/notebook_source" OUTPUT_DIR="$REPO_ROOT/docs/notebooks" @@ -27,7 +35,7 @@ needs_cleanup=false for src in "$SOURCE_DIR"/*.py; do name="$(basename "$src" .py)" - hash="$(shasum -a 256 "$src" | cut -d' ' -f1)" + hash="$(compute_sha256 "$src")" cached_hash_file="$CACHE_DIR/${name}.sha256" cached_notebook="$CACHE_DIR/${name}.ipynb" From ea0ed10d8e8d9cfafc201d286bb95dcb91bd7685 Mon Sep 17 00:00:00 2001 From: Andre Manoel Date: Thu, 5 Mar 2026 08:13:15 -0300 Subject: [PATCH 3/8] fix: only seed cache when truly empty, restore hash writing Skip artifact seeding when a partial cache was restored (it already has correct per-file hashes). Only seed + write current hashes when the cache dir is completely empty (true bootstrapping). Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-notebooks.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-notebooks.yml b/.github/workflows/build-notebooks.yml index 81e17112..902797fb 100644 --- a/.github/workflows/build-notebooks.yml +++ b/.github/workflows/build-notebooks.yml @@ -35,7 +35,14 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - echo "Cache miss — seeding from last successful build artifact..." + # Only seed when the cache dir is truly empty (no partial restore). + # A partial restore already has correct per-file hashes — seeding + # over it would write current hashes for old notebooks, masking changes. + if [ -d .notebook-cache ] && [ -n "$(ls -A .notebook-cache 2>/dev/null)" ]; then + echo "Partial cache restored — skipping artifact seed" + exit 0 + fi + echo "Empty cache — seeding from last successful build artifact..." LAST_RUN_ID=$(gh run list --workflow build-notebooks.yml --status success --limit 1 --json databaseId -q '.[0].databaseId' 2>/dev/null || true) if [ -n "$LAST_RUN_ID" ]; then SEED_TMPDIR=$(mktemp -d) @@ -45,7 +52,9 @@ jobs: name="$(basename "$src" .py)" nb="$SEED_TMPDIR/${name}.ipynb" if [ -f "$nb" ]; then + hash="$(sha256sum "$src" | cut -d' ' -f1)" cp "$nb" ".notebook-cache/${name}.ipynb" + echo "$hash" > ".notebook-cache/${name}.sha256" echo " Seeded: ${name}" fi done From 15027328f479fe266aa5ec33f3692d1ae535b227 Mon Sep 17 00:00:00 2001 From: Andre Manoel Date: Thu, 5 Mar 2026 08:19:58 -0300 Subject: [PATCH 4/8] fix: restrict artifact seed lookup to main branch Prevents seeding from feature branch runs that may have different notebook sources. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-notebooks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-notebooks.yml b/.github/workflows/build-notebooks.yml index 902797fb..dc8e9e56 100644 --- a/.github/workflows/build-notebooks.yml +++ b/.github/workflows/build-notebooks.yml @@ -43,7 +43,7 @@ jobs: exit 0 fi echo "Empty cache — seeding from last successful build artifact..." - LAST_RUN_ID=$(gh run list --workflow build-notebooks.yml --status success --limit 1 --json databaseId -q '.[0].databaseId' 2>/dev/null || true) + LAST_RUN_ID=$(gh run list --workflow build-notebooks.yml --status success --branch main --limit 1 --json databaseId -q '.[0].databaseId' 2>/dev/null || true) if [ -n "$LAST_RUN_ID" ]; then SEED_TMPDIR=$(mktemp -d) if gh run download "$LAST_RUN_ID" --name notebooks --dir "$SEED_TMPDIR" 2>/dev/null; then From 520c160edf3e39c110b1d839a837fee0593e13ca Mon Sep 17 00:00:00 2001 From: Andre Manoel Date: Thu, 5 Mar 2026 08:30:17 -0300 Subject: [PATCH 5/8] fix: add actions:read permission for artifact seeding The seed step uses gh run list and gh run download which require actions:read. Without it, these calls silently fail and the cold-start cache bootstrapping never executes. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-notebooks.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-notebooks.yml b/.github/workflows/build-notebooks.yml index dc8e9e56..0e19e602 100644 --- a/.github/workflows/build-notebooks.yml +++ b/.github/workflows/build-notebooks.yml @@ -9,6 +9,7 @@ jobs: build: runs-on: ubuntu-latest permissions: + actions: read contents: write env: NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }} From 632ecbd9b6df6807066e9f872f817bc300feaca7 Mon Sep 17 00:00:00 2001 From: Andre Manoel Date: Thu, 5 Mar 2026 08:47:50 -0300 Subject: [PATCH 6/8] fix: only use notebook cache when called from build-docs Scheduled Monday runs and manual workflow_dispatch should execute all notebooks to catch regressions (e.g. library changes that break a notebook). Caching is only used via workflow_call (from build-docs) where the goal is fast, resilient doc deployment. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-notebooks.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-notebooks.yml b/.github/workflows/build-notebooks.yml index 0e19e602..a5e87498 100644 --- a/.github/workflows/build-notebooks.yml +++ b/.github/workflows/build-notebooks.yml @@ -24,6 +24,7 @@ jobs: - name: Set up Python run: uv python install 3.11 - name: Restore notebook cache + if: github.event_name == 'workflow_call' id: cache uses: actions/cache@v4 with: @@ -32,7 +33,7 @@ jobs: restore-keys: | notebooks- - name: Seed cache from last successful artifact - if: steps.cache.outputs.cache-hit != 'true' + if: github.event_name == 'workflow_call' && steps.cache.outputs.cache-hit != 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | @@ -67,8 +68,13 @@ jobs: else echo "No previous successful run found, proceeding without cache" fi - - name: Convert and execute notebooks (cached) - run: make convert-execute-notebooks USE_CACHE=1 + - name: Convert and execute notebooks + run: | + if [ "${{ github.event_name }}" = "workflow_call" ]; then + make convert-execute-notebooks USE_CACHE=1 + else + make convert-execute-notebooks + fi - name: Upload notebooks as artifacts uses: actions/upload-artifact@v4 with: From c8add3325309eb07cf8e591f173e245a82d53212 Mon Sep 17 00:00:00 2001 From: Andre Manoel Date: Thu, 5 Mar 2026 08:49:43 -0300 Subject: [PATCH 7/8] fix: use jq // empty to avoid "null" string on empty run list Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-notebooks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-notebooks.yml b/.github/workflows/build-notebooks.yml index a5e87498..03e84ca4 100644 --- a/.github/workflows/build-notebooks.yml +++ b/.github/workflows/build-notebooks.yml @@ -45,7 +45,7 @@ jobs: exit 0 fi echo "Empty cache — seeding from last successful build artifact..." - LAST_RUN_ID=$(gh run list --workflow build-notebooks.yml --status success --branch main --limit 1 --json databaseId -q '.[0].databaseId' 2>/dev/null || true) + LAST_RUN_ID=$(gh run list --workflow build-notebooks.yml --status success --branch main --limit 1 --json databaseId -q '.[0].databaseId // empty' 2>/dev/null || true) if [ -n "$LAST_RUN_ID" ]; then SEED_TMPDIR=$(mktemp -d) if gh run download "$LAST_RUN_ID" --name notebooks --dir "$SEED_TMPDIR" 2>/dev/null; then From 67101d6f6ab85c277b2ee05304ad57e13f0d914c Mon Sep 17 00:00:00 2001 From: Andre Manoel Date: Thu, 5 Mar 2026 09:09:20 -0300 Subject: [PATCH 8/8] feat: add use_cache input flag to notebook and docs workflows Replace event_name-based cache logic with an explicit use_cache boolean input. Defaults: - build-notebooks: workflow_call=true, dispatch=false, schedule=false - build-docs: dispatch=true (toggleable), release=false This gives full control over caching from the GitHub Actions UI. --- .github/workflows/build-docs.yml | 7 +++++++ .github/workflows/build-notebooks.yml | 20 ++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index a1a9b88c..7a6e6ca4 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -1,6 +1,11 @@ name: Build docs on: workflow_dispatch: + inputs: + use_cache: + description: "Use cached notebooks for unchanged sources" + type: boolean + default: true release: types: - published @@ -8,6 +13,8 @@ on: jobs: build-notebooks: uses: ./.github/workflows/build-notebooks.yml + with: + use_cache: ${{ github.event_name == 'workflow_dispatch' && inputs.use_cache || false }} secrets: inherit deploy: needs: build-notebooks diff --git a/.github/workflows/build-notebooks.yml b/.github/workflows/build-notebooks.yml index 03e84ca4..e110290b 100644 --- a/.github/workflows/build-notebooks.yml +++ b/.github/workflows/build-notebooks.yml @@ -1,7 +1,16 @@ name: Build notebooks on: workflow_call: + inputs: + use_cache: + type: boolean + default: true workflow_dispatch: + inputs: + use_cache: + description: "Use cached notebooks for unchanged sources" + type: boolean + default: false schedule: - cron: "0 12 * * MON" @@ -24,7 +33,7 @@ jobs: - name: Set up Python run: uv python install 3.11 - name: Restore notebook cache - if: github.event_name == 'workflow_call' + if: inputs.use_cache id: cache uses: actions/cache@v4 with: @@ -33,7 +42,7 @@ jobs: restore-keys: | notebooks- - name: Seed cache from last successful artifact - if: github.event_name == 'workflow_call' && steps.cache.outputs.cache-hit != 'true' + if: inputs.use_cache && steps.cache.outputs.cache-hit != 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | @@ -69,12 +78,7 @@ jobs: echo "No previous successful run found, proceeding without cache" fi - name: Convert and execute notebooks - run: | - if [ "${{ github.event_name }}" = "workflow_call" ]; then - make convert-execute-notebooks USE_CACHE=1 - else - make convert-execute-notebooks - fi + run: make convert-execute-notebooks ${{ inputs.use_cache && 'USE_CACHE=1' || '' }} - name: Upload notebooks as artifacts uses: actions/upload-artifact@v4 with: