Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/build-docs.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
name: Build docs
on:
workflow_dispatch:
inputs:
use_cache:
description: "Use cached notebooks for unchanged sources"
type: boolean
default: true
release:
types:
- published

jobs:
build-notebooks:
uses: ./.github/workflows/build-notebooks.yml
with:
use_cache: ${{ github.event_name == 'workflow_dispatch' && inputs.use_cache || false }}
secrets: inherit
deploy:
needs: build-notebooks
Expand Down
57 changes: 56 additions & 1 deletion .github/workflows/build-notebooks.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
name: Build notebooks
on:
workflow_call:
inputs:
use_cache:
type: boolean
default: true
workflow_dispatch:
inputs:
use_cache:
description: "Use cached notebooks for unchanged sources"
type: boolean
default: false
schedule:
- cron: "0 12 * * MON"

jobs:
build:
runs-on: ubuntu-latest
permissions:
actions: read
contents: write
env:
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
Expand All @@ -22,8 +32,53 @@ jobs:
version: "0.9.5"
- name: Set up Python
run: uv python install 3.11
- name: Restore notebook cache
if: inputs.use_cache
id: cache
uses: actions/cache@v4
with:
path: .notebook-cache
key: notebooks-${{ hashFiles('docs/notebook_source/*.py') }}
restore-keys: |
notebooks-
- name: Seed cache from last successful artifact
if: inputs.use_cache && steps.cache.outputs.cache-hit != 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Only seed when the cache dir is truly empty (no partial restore).
# A partial restore already has correct per-file hashes — seeding
# over it would write current hashes for old notebooks, masking changes.
if [ -d .notebook-cache ] && [ -n "$(ls -A .notebook-cache 2>/dev/null)" ]; then
echo "Partial cache restored — skipping artifact seed"
exit 0
fi
echo "Empty cache — seeding from last successful build artifact..."
LAST_RUN_ID=$(gh run list --workflow build-notebooks.yml --status success --branch main --limit 1 --json databaseId -q '.[0].databaseId // empty' 2>/dev/null || true)
if [ -n "$LAST_RUN_ID" ]; then
SEED_TMPDIR=$(mktemp -d)
if gh run download "$LAST_RUN_ID" --name notebooks --dir "$SEED_TMPDIR" 2>/dev/null; then
mkdir -p .notebook-cache
for src in docs/notebook_source/*.py; do
name="$(basename "$src" .py)"
nb="$SEED_TMPDIR/${name}.ipynb"
if [ -f "$nb" ]; then
hash="$(sha256sum "$src" | cut -d' ' -f1)"
cp "$nb" ".notebook-cache/${name}.ipynb"
echo "$hash" > ".notebook-cache/${name}.sha256"
echo " Seeded: ${name}"
fi
Comment thread
andreatgretel marked this conversation as resolved.
Comment thread
andreatgretel marked this conversation as resolved.
done
echo "Cache seeded from run $LAST_RUN_ID"
else
echo "Could not download artifact from run $LAST_RUN_ID, proceeding without cache"
fi
rm -rf "$SEED_TMPDIR"
else
echo "No previous successful run found, proceeding without cache"
fi
- name: Convert and execute notebooks
run: make convert-execute-notebooks
run: make convert-execute-notebooks ${{ inputs.use_cache && 'USE_CACHE=1' || '' }}
- name: Upload notebooks as artifacts
uses: actions/upload-artifact@v4
with:
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ NOTEPAD.md
# Build-time copy of README for data-designer package (copied from top-level during build)
packages/data-designer/README.md

# Notebook build cache
.notebook-cache/

# Cerebro knowledge base
.cerebro/
.cursor/rules/cerebro.mdc
Expand Down
10 changes: 8 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ help:
@echo " clean-dist - Remove dist directories from all packages"
@echo " verify-imports - Verify all package imports work"
@echo " show-versions - Show versions of all packages"
@echo " convert-execute-notebooks - Convert notebooks from .py to .ipynb using jupytext"
@echo " convert-execute-notebooks - Convert notebooks from .py to .ipynb using jupytext (USE_CACHE=1 to skip unchanged)"
@echo " generate-colab-notebooks - Generate Colab-compatible notebooks"
@echo " serve-docs-locally - Serve documentation locally"
@echo " check-license-headers - Check if all files have license headers"
Expand Down Expand Up @@ -461,6 +461,10 @@ serve-docs-locally:
uv run mkdocs serve --livereload

convert-execute-notebooks:
ifeq ($(USE_CACHE),1)
@echo "📓 Converting Python tutorials to notebooks (with caching)..."
@bash docs/scripts/build_notebooks_cached.sh
else
@echo "📓 Converting Python tutorials to notebooks and executing..."
@mkdir -p docs/notebooks
cp docs/notebook_source/_README.md docs/notebooks/README.md
Expand All @@ -470,6 +474,7 @@ convert-execute-notebooks:
rm -r docs/notebook_source/artifacts
rm docs/notebook_source/*.csv
@echo "✅ Notebooks created in docs/notebooks/"
endif

generate-colab-notebooks:
@echo "📓 Generating Colab-compatible notebooks..."
Expand Down Expand Up @@ -624,7 +629,8 @@ clean-test-coverage:
check-all check-all-fix check-config check-engine check-interface \
check-license-headers \
clean clean-dist clean-notebooks clean-pycache clean-test-coverage \
convert-execute-notebooks coverage coverage-config coverage-engine coverage-interface \
convert-execute-notebooks \
coverage coverage-config coverage-engine coverage-interface \
format format-check format-check-config format-check-engine format-check-interface \
format-config format-engine format-interface \
generate-colab-notebooks help \
Expand Down
63 changes: 63 additions & 0 deletions docs/scripts/build_notebooks_cached.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Build notebooks with per-file caching. Only re-executes notebooks whose
# source .py file changed since the last cached build.
#
# Usage:
# ./docs/scripts/build_notebooks_cached.sh [CACHE_DIR]
#
# CACHE_DIR defaults to .notebook-cache

set -euo pipefail

compute_sha256() {
if command -v sha256sum >/dev/null 2>&1; then
sha256sum "$1" | cut -d' ' -f1
else
shasum -a 256 "$1" | cut -d' ' -f1
fi
}

REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
SOURCE_DIR="$REPO_ROOT/docs/notebook_source"
OUTPUT_DIR="$REPO_ROOT/docs/notebooks"
CACHE_DIR="${1:-$REPO_ROOT/.notebook-cache}"

mkdir -p "$OUTPUT_DIR" "$CACHE_DIR"

# Copy static files
cp "$SOURCE_DIR/_README.md" "$OUTPUT_DIR/README.md"
cp "$SOURCE_DIR/_pyproject.toml" "$OUTPUT_DIR/pyproject.toml"

needs_cleanup=false

for src in "$SOURCE_DIR"/*.py; do
name="$(basename "$src" .py)"
hash="$(compute_sha256 "$src")"
cached_hash_file="$CACHE_DIR/${name}.sha256"
cached_notebook="$CACHE_DIR/${name}.ipynb"

if [ -f "$cached_hash_file" ] && [ -f "$cached_notebook" ] && [ "$(cat "$cached_hash_file")" = "$hash" ]; then
echo " ✅ $name.ipynb — cached (unchanged)"
cp "$cached_notebook" "$OUTPUT_DIR/${name}.ipynb"
else
echo " 🔄 $name.ipynb — executing..."
uv run --all-packages --group notebooks --group docs jupytext --to ipynb --execute "$src"
mv "$SOURCE_DIR/${name}.ipynb" "$OUTPUT_DIR/${name}.ipynb"
needs_cleanup=true

# Update cache
cp "$OUTPUT_DIR/${name}.ipynb" "$cached_notebook"
echo "$hash" > "$cached_hash_file"
fi
done

if [ "$needs_cleanup" = true ]; then
# Clean up artifacts from executed notebooks
[ -d "$SOURCE_DIR/artifacts" ] && rm -rf "$SOURCE_DIR/artifacts"
find "$SOURCE_DIR" -name '*.csv' -delete 2>/dev/null || true
fi

echo "✅ Notebooks ready in $OUTPUT_DIR"