NVIDIA · arjkesh · Jan 5, 2026 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026
diff --git a/.github/workflows/tilegym-ci.yml b/.github/workflows/tilegym-ci.yml
@@ -288,11 +288,12 @@ jobs:
       (needs.build.result == 'success' || needs.build.result == 'skipped')
     runs-on: linux-amd64-gpu-rtxpro6000-latest-1
     steps:
-      - name: Checkout code (sparse - only need formatting script)
+      - name: Checkout code (sparse - need scripts and benchmarks)
         uses: actions/checkout@v4
         with:
           sparse-checkout: |
             .github/scripts/format_benchmark_summary.py
+            tests/benchmark
           sparse-checkout-cone-mode: false
 
       - name: Create test results directory
@@ -314,6 +315,7 @@ jobs:
           docker pull ${IMAGE}
           docker run --rm \
             --gpus all \
+            -v ${{ github.workspace }}/tests/benchmark:/workspace/tilegym/tests/benchmark \
             -v ${{ github.workspace }}/test-results:/test-results \
             -w /workspace/tilegym/tests/benchmark \
             ${IMAGE} \

diff --git a/tests/benchmark/run_all.sh b/tests/benchmark/run_all.sh
@@ -5,14 +5,34 @@
 # SPDX-License-Identifier: MIT
 
 # Run all Python benchmark files and save results
-# Usage: ./run_all.sh [OUTPUT_DIR]
+# Usage: ./run_all.sh [OUTPUT_DIR] [--json]
+
+# Enable pipefail to catch errors in piped commands
+set -o pipefail
 
 cd "$(dirname "$0")"
 
 OUTPUT_DIR="${1:-.}"
+FORMAT="txt"
+
+# Parse arguments
+for arg in "$@"; do
+    if [[ "$arg" == "--json" ]]; then
+        FORMAT="json"
+    elif [[ -z "$OUTPUT_DIR" ]] || [[ "$OUTPUT_DIR" == "--json" ]]; then
+        OUTPUT_DIR="."
+    fi
+done
+
+# If --json is first argument, reset OUTPUT_DIR
+if [[ "$OUTPUT_DIR" == "--json" ]]; then
+    OUTPUT_DIR="${2:-.}"
+fi
+
 mkdir -p "$OUTPUT_DIR"
 
 echo "Running benchmarks sequentially (parallel execution disabled to ensure accurate results)..."
+echo "Output format: $FORMAT"
 echo "Results will be saved to: $OUTPUT_DIR"
 echo "Current directory: $(pwd)"
 echo "Benchmark files found: $(ls bench_*.py 2>/dev/null | wc -l)"
@@ -24,7 +44,27 @@ if [[ ! -w "$OUTPUT_DIR" ]]; then
     exit 1
 fi
 
-# Run each benchmark and capture output
+# Use JSON runner if --json flag is set
+if [[ "$FORMAT" == "json" ]]; then
+    echo "Using JSON output format..."
+    if python3 run_all_json.py "$OUTPUT_DIR"; then
+        echo ""
+        echo "=========================================="
+        echo "All benchmarks complete!"
+        echo "Results directory: $OUTPUT_DIR"
+        echo "Files created:"
+        ls -lh "$OUTPUT_DIR"/*.json 2>/dev/null || echo "  No result files found"
+        echo "=========================================="
+        exit 0
+    else
+        echo "Benchmark execution failed" >&2
+        exit 1
+    fi
+fi
+
+# Original text format runner
+FAILED_BENCHMARKS=()
+
 for file in bench_*.py; do
     if [[ ! -f "$file" ]]; then
         echo "Warning: No benchmark files matching bench_*.py found" >&2
@@ -38,23 +78,43 @@ for file in bench_*.py; do
     echo "Running $file..."
     echo "=========================================="
 
-    # Ensure output file is created even if benchmark produces no output
-    touch "$output_file"
-
-    if python "$file" 2>&1 | tee "$output_file"; then
+    # Run benchmark and capture output
+    # Note: tee will create the file, errors go to both console and file
+    if python3 "$file" 2>&1 | tee "$output_file"; then
+        # Success - ensure file is readable
+        chmod 644 "$output_file" 2>/dev/null || true
         echo "✓ PASSED: $file"
         echo "  Results saved to: $output_file"
     else
+        # Failure - mark file and ensure readable
+        # tee already captured the output, just prepend marker
+        (echo "BENCHMARK FAILED"; echo ""; cat "$output_file") > "$output_file.new" 2>/dev/null && \
+            mv "$output_file.new" "$output_file" 2>/dev/null || \
+            echo "BENCHMARK FAILED" > "$output_file"
+        chmod 644 "$output_file" 2>/dev/null || true
         echo "✗ FAILED: $file"
-        echo "FAILED" > "$output_file"
-        exit 1  # Exit with error if any benchmark fails
+        echo "  Error details saved to: $output_file"
+        FAILED_BENCHMARKS+=("$file")
     fi
     echo ""
 done
 
 echo "=========================================="
-echo "All benchmarks complete!"
+if [ ${#FAILED_BENCHMARKS[@]} -eq 0 ]; then
+    echo "All benchmarks complete! ✓"
+else
+    echo "Benchmarks complete with failures! ✗"
+    echo "Failed benchmarks:"
+    for failed in "${FAILED_BENCHMARKS[@]}"; do
+        echo "  - $failed"
+    done
+fi
 echo "Results directory: $OUTPUT_DIR"
 echo "Files created:"
 ls -lh "$OUTPUT_DIR"/*_results.txt 2>/dev/null || echo "  No result files found"
 echo "=========================================="
+
+# Exit with error if any benchmarks failed
+if [ ${#FAILED_BENCHMARKS[@]} -gt 0 ]; then
+    exit 1
+fi