diff --git a/docs/worktrees.md b/docs/worktrees.md index 38b4c895..2c1cf2f5 100644 --- a/docs/worktrees.md +++ b/docs/worktrees.md @@ -67,6 +67,42 @@ whichever worktree it already pointed at — `PYTHONPATH` from `.envrc` does the per-worktree routing. (Running `pip install -e` again rewrites that record and will pin "default" Python to the new worktree.) +## TOGSim binary is shared + +`TOGSim/build/bin/Simulator` is a standalone C++ binary whose source rarely +changes alongside Python frontend work, so `setup_worktree.sh` symlinks it +from the worktree the script was invoked in. If you do modify TOGSim C++ in +a particular worktree, delete the symlink and run `cd TOGSim/build && make` +locally — `Simulator/simulator.py` resolves the binary path relative to +`TORCHSIM_DIR`, so each worktree has its own resolution. + +If neither worktree has the binary yet, build it once (any worktree) per +the CLAUDE.md "Build" section. + +## Iterating on codegen inside a worktree + +`.envrc` gives each worktree its own `$TORCHSIM_DUMP_PATH=$_self/outputs`, +so parallel worktrees do not share caches. But within a worktree, after +editing anything that affects emitted MLIR or wrapper code +(`PyTorchSimFrontend/mlir/*`, lowering rules, codegen backend), the next +`torch.compile` will replay the previously cached compile from +`outputs//` and your change silently does not take. Run: + +```bash +scripts/clear_codegen_cache.sh +``` + +between iterations. It wipes `outputs/.torchinductor` (Inductor's compile +cache, set via `TORCHINDUCTOR_CACHE_DIR` in `extension_config.py:139`) and +the per-source-hash dirs (`outputs/<11-char-hash>/`, keyed by +`extension_codecache.hash_prefix`). `togsim_results/` (run logs) is left +alone. + +Diagnostic for the other common gotcha: if a traceback mentions a path +under `/workspace/PyTorchSim/...` while you are editing in a different +worktree, you forgot to `source .envrc` in that shell — Python imported the +canonical worktree's `PyTorchSimFrontend` instead of yours. + ## What the env looks like Worktree-scoped (auto-set by `.envrc`): diff --git a/scripts/clear_codegen_cache.sh b/scripts/clear_codegen_cache.sh new file mode 100755 index 00000000..a7a2b550 --- /dev/null +++ b/scripts/clear_codegen_cache.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Clear PyTorchSim's codegen caches so the next torch.compile run regenerates +# the wrapper Python and the per-kernel MLIR. Run this whenever you edit +# anything that affects emitted MLIR (PyTorchSimFrontend/mlir/*, lowering +# rules, codegen backend, etc.) -- otherwise the previous compile is replayed +# byte-for-byte from $TORCHSIM_DUMP_PATH and your change appears not to take. +# +# Wipes: +# $TORCHSIM_DUMP_PATH/.torchinductor (Inductor compile cache, points +# here via TORCHINDUCTOR_CACHE_DIR +# set in extension_config.py) +# $TORCHSIM_DUMP_PATH/<11-char-hash>/ (per-source MLIR/wrapper dirs, +# keyed by hash_prefix(src) in +# extension_codecache.py) +# +# Does NOT touch: +# $TORCHSIM_LOG_PATH (togsim_results/, just simulation logs) +# Anything outside $TORCHSIM_DUMP_PATH +# +# Usage: +# scripts/clear_codegen_cache.sh +set -euo pipefail + +DUMP_PATH="${TORCHSIM_DUMP_PATH:-${TORCHSIM_DIR:-/workspace/PyTorchSim}/outputs}" + +if [[ ! -d "$DUMP_PATH" ]]; then + echo "No cache at $DUMP_PATH; nothing to clear." + exit 0 +fi + +echo "Clearing $DUMP_PATH/.torchinductor and per-source-hash dirs" +rm -rf "$DUMP_PATH/.torchinductor" + +# Per-source-hash dirs are an 11-char alphanumeric prefix +# (extension_codecache.hash_prefix). Match by length+charset so we don't +# touch anything else a developer may have parked under outputs/. +find "$DUMP_PATH" -mindepth 1 -maxdepth 1 -type d \ + -regextype posix-egrep -regex '.*/[a-z0-9]{11}$' \ + -exec rm -rf {} + + +echo "Done." diff --git a/scripts/setup_worktree.sh b/scripts/setup_worktree.sh index 3c4c5f95..1323a49c 100755 --- a/scripts/setup_worktree.sh +++ b/scripts/setup_worktree.sh @@ -61,6 +61,24 @@ git -C "$REPO_ROOT" worktree add "$WT_DIR" -b "$BRANCH" "$BASE_REF" # pushes to its own name on first `git push -u origin `. git -C "$WT_DIR" branch --unset-upstream || true +# Share the TOGSim binary from the worktree this script was run from. TOGSim +# is a standalone C++ simulator that rarely changes alongside Python frontend +# work, so symlinking saves a ~10-minute rebuild per worktree. If you do +# modify TOGSim C++ in the new worktree, run `cd TOGSim/build && make` after +# wiping the link target -- the symlink will be replaced by the local build +# output. +TOGSIM_BIN_SRC="$REPO_ROOT/TOGSim/build/bin/Simulator" +TOGSIM_BIN_DST="$WT_DIR/TOGSim/build/bin/Simulator" +if [[ -x "$TOGSIM_BIN_SRC" ]]; then + # Resolve so we point at the real binary, not a chain of worktree symlinks. + TOGSIM_BIN_REAL="$(readlink -f "$TOGSIM_BIN_SRC")" + mkdir -p "$(dirname "$TOGSIM_BIN_DST")" + ln -sfn "$TOGSIM_BIN_REAL" "$TOGSIM_BIN_DST" + TOGSIM_LINK_MSG="Symlinked TOGSim binary from $TOGSIM_BIN_REAL" +else + TOGSIM_LINK_MSG="TOGSim binary not found at $TOGSIM_BIN_SRC; build it once with 'cd TOGSim/build && conan install .. --build=missing && cmake .. && make -j' or symlink from another worktree." +fi + # Per-worktree env. Container-dedicated paths for shared binaries. cat > "$WT_DIR/.envrc" <<'ENVRC' #!/usr/bin/env bash @@ -92,8 +110,13 @@ ENVRC echo echo "Created worktree: $WT_DIR" echo "Branch: $BRANCH (base: $BASE_REF)" +echo "$TOGSIM_LINK_MSG" echo echo "Next:" echo " cd $WT_DIR" echo " source .envrc" echo " (cd PyTorchSimDevice && python setup.py build_ext --inplace) # build the .so once" +echo +echo "When iterating on PyTorchSimFrontend/mlir/* (or any codegen) in this worktree," +echo "run scripts/clear_codegen_cache.sh between runs so the cached compile does not" +echo "shadow your changes."