Recusive · fazxes · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/.recursive/agents/brain.md b/.recursive/agents/brain.md
@@ -80,6 +80,7 @@ Rules for delegation:
 1. Give the sub-agent a SPECIFIC task. Include the task number, file path, and acceptance criteria.
 2. **Match agent to zone.** Before delegating, read the task and identify which files it touches. If the task modifies `.recursive/` framework files (engine, prompts, agents, lib, operators, ops, scripts, templates, tests, skills) or root docs (CLAUDE.md, AGENTS.md), delegate to `evolve` (framework zone) — NOT `build`. The `build` agent is for `nightshift/` project code ONLY. Tasks with `target: recursive` in frontmatter are always framework-zone.
    - **Security-to-framework path**: If the task queue contains pending tasks with `source: pentest` AND `target: recursive`, delegate them to `evolve`, not `build` and not `security`. The `security` agent produces findings (read-only); the `evolve` agent applies fixes to framework files. This is the only compliant path for confirmed security vulnerabilities in `.recursive/` code. The `pick-role.py` advisory will boost `evolve` when such tasks exist (signal: `pentest_framework_tasks >= 1`).
+   - **Eval cadence rule**: If `eval_staleness` appears in the dashboard Alerts (i.e., `sessions_since_eval >= 5`), the brain SHOULD include a Phractal E2E eval run as one of its delegations in that session. Delegate to the `build` agent with the eval task (task #0243 or the lowest-numbered pending eval task). The build agent should run `nightshift test --agent claude --cycles 2 --cycle-minutes 5 --repo-dir /tmp/nightshift-eval-NNNN` and write results to `.recursive/evaluations/NNNN.md`. Do not defer the eval indefinitely -- the build-measure-build feedback loop depends on regular measurement against Phractal.
 3. Never delegate vague instructions like "improve the codebase".
 4. One sub-agent per task. Do not ask one agent to do two unrelated things.
 5. For build/review/oversee/achieve/strategize/security/evolve/audit-agent: always use `isolation: "worktree"`.

diff --git a/.recursive/engine/dashboard.py b/.recursive/engine/dashboard.py
@@ -38,6 +38,7 @@
     read_healer_status,
     read_latest_autonomy_score,
     read_latest_eval_score,
+    sessions_since_eval,
 )
 
 # All roles tracked by sessions_since
@@ -162,6 +163,9 @@ def collect_signals(recursive_dir: Path) -> dict[str, object]:
     signals["task_composition"] = task_composition
     signals["human_tasks"] = human_tasks
 
+    # Eval staleness -- dedicated scalar signal for alert threshold
+    signals["sessions_since_eval"] = sessions_since_eval(evaluations_dir, index_rows)
+
     # Decision-consequence signals (self-awareness)
     signals["queue_trend"] = compute_queue_trend(decisions_path)
     signals["agent_diversity"] = compute_agent_diversity(delegations)
@@ -189,6 +193,13 @@ def format_dashboard(signals: dict[str, object]) -> str:
     eval_note = " (default)" if signals.get("_eval_is_default") else ""
     auto_note = " (default)" if signals.get("_autonomy_is_default") else ""
     lines.append(f"Eval score:     {signals['eval_score']}/100{eval_note}")
+    eval_staleness = signals.get("sessions_since_eval", 0)
+    if isinstance(eval_staleness, int) and eval_staleness >= 5:
+        lines.append(f"Eval staleness: {eval_staleness} sessions  [STALE -- rerun recommended]")
+    elif isinstance(eval_staleness, int) and eval_staleness > 0:
+        lines.append(f"Eval staleness: {eval_staleness} sessions")
+    else:
+        lines.append("Eval staleness: 0 sessions (up to date)")
     lines.append(f"Autonomy score: {signals['autonomy_score']}/100{auto_note}")
     lines.append(f"Healer status:  {signals['healer_status']}")
     nh_note = " (may be inaccurate if gh unavailable)" if signals["needs_human_issues"] == 0 else ""
@@ -272,6 +283,13 @@ def format_dashboard(signals: dict[str, object]) -> str:
     lines.append("")
     lines.append("Alerts:")
     alerts: list[str] = []
+    # Eval staleness alert (threshold: 5 sessions)
+    eval_since = signals.get("sessions_since_eval", 0)
+    if isinstance(eval_since, int) and eval_since >= 5:
+        alerts.append(
+            f"  eval_staleness: {eval_since} sessions since last Phractal eval"
+            " -- delegate build agent to run eval (task #0243 or equivalent)"
+        )
     audit_since = signals.get("sessions_since_audit", 0)
     if isinstance(audit_since, int) and audit_since >= 25:
         alerts.append(f"  Framework audit overdue ({audit_since} sessions since last)")

diff --git a/.recursive/engine/lib-agent.sh b/.recursive/engine/lib-agent.sh
@@ -626,23 +626,52 @@ PY
 
 # cleanup_worktrees
 # Prunes stale git worktrees left by sub-agent sessions.
-# Removes worktrees marked 'prunable' by git.
+# Removes ALL .claude/worktrees/agent-* worktrees (active sub-agent dirs),
+# plus any worktrees marked 'prunable' by git, then runs git worktree prune.
+# Safe to call from the daemon main loop: the daemon runs in REPO_DIR, not
+# inside an agent-* worktree, so no currently-executing agent is skipped.
+# If called from inside an agent worktree (e.g. evolve), the current dir is
+# detected and skipped to avoid self-removal.
 cleanup_worktrees() {
-    git -C "$REPO_DIR" worktree prune 2>/dev/null || true
     local count=0
+    local current_wt
+    current_wt="$(git rev-parse --show-toplevel 2>/dev/null || echo "")"
+
+    # Pass 1: remove ALL .claude/worktrees/agent-* worktrees by path.
+    # Uses porcelain format to get one path per stanza reliably.
+    while IFS= read -r wt_path; do
+        # Skip empty lines
+        [ -z "$wt_path" ] && continue
+        # Skip the main worktree
+        [ "$wt_path" = "$REPO_DIR" ] && continue
+        # Skip the worktree we are currently executing inside (safety guard)
+        [ "$wt_path" = "$current_wt" ] && continue
+        # Only target agent worktrees in .claude/worktrees/
+        case "$wt_path" in
+            */.claude/worktrees/agent-*)
+                git -C "$REPO_DIR" worktree remove "$wt_path" --force 2>/dev/null || true
+                count=$((count + 1))
+                ;;
+        esac
+    done < <(git -C "$REPO_DIR" worktree list --porcelain 2>/dev/null | grep "^worktree " | sed 's/^worktree //')
+
+    # Pass 2: remove any remaining worktrees marked prunable by git.
     while IFS= read -r wt_line; do
         local wt_path
         wt_path=$(echo "$wt_line" | awk '{print $1}')
-        # Skip the main worktree
         [ "$wt_path" = "$REPO_DIR" ] && continue
-        # Remove if marked prunable or is a daemon worktree
+        [ "$wt_path" = "$current_wt" ] && continue
         if echo "$wt_line" | grep -q "prunable" 2>/dev/null; then
             git -C "$REPO_DIR" worktree remove "$wt_path" --force 2>/dev/null || true
             count=$((count + 1))
         fi
     done < <(git -C "$REPO_DIR" worktree list 2>/dev/null)
+
+    # Prune git metadata for any worktrees whose directories no longer exist.
+    git -C "$REPO_DIR" worktree prune 2>/dev/null || true
+
     if [ "$count" -gt 0 ]; then
-        echo "  Cleaned up $count worktree(s)"
+        echo "  Cleaned up $count agent worktree(s)"
     fi
 }
 

diff --git a/.recursive/engine/signals.py b/.recursive/engine/signals.py
@@ -508,6 +508,51 @@ def compute_agent_diversity(delegations: list[set[str]], window: int = 10) -> di
     return dict(sorted(counts.items(), key=lambda x: -x[1]))
 
 
+def sessions_since_eval(evaluations_dir: Path, sessions_index: list[dict[str, str]]) -> int:
+    """Count sessions since the last evaluation report.
+
+    Reads the latest eval file from evaluations_dir (highest-numbered NNNN.md),
+    extracts its date, then counts how many rows in sessions_index have
+    a timestamp after that date.
+
+    Returns 0 if no eval files exist or the eval is up-to-date.
+    Returns len(sessions_index) if the eval date cannot be determined.
+
+    This is the primary eval-freshness signal used by the dashboard alert.
+    compute_eval_staleness() provides the full (sessions, files_changed) tuple
+    for the decision-patterns section; sessions_since_eval() is the scalar
+    signal consumed by the Alerts section.
+    """
+    if not evaluations_dir.is_dir():
+        return 0
+    evals = sorted(evaluations_dir.glob("[0-9]*.md"))
+    if not evals:
+        return 0
+    latest = evals[-1]
+    try:
+        text = latest.read_text(encoding="utf-8")
+        dm = re.search(r"\*?\*?[Dd]ate\*?\*?:\s*(\d{4}-\d{2}-\d{2})", text)
+        if not dm:
+            return len(sessions_index)
+        eval_date = dm.group(1)
+        # Use file mtime for more precise timestamp (YYYY-MM-DD HH:MM)
+        from datetime import datetime as _dt
+
+        eval_ts = _dt.fromtimestamp(latest.stat().st_mtime).strftime("%Y-%m-%d %H:%M")
+    except OSError:
+        return len(sessions_index)
+    # Count sessions after the eval
+    compare_ts = eval_ts if eval_ts else eval_date
+    count = 0
+    for row in reversed(sessions_index):
+        ts = row.get("timestamp", "")
+        if ts > compare_ts:
+            count += 1
+        else:
+            break
+    return count
+
+
 def compute_eval_staleness(evaluations_dir: Path, sessions_index: list[dict[str, str]]) -> tuple[int, int]:
     """How stale is the eval? Returns (sessions_since_eval, files_changed).
 

diff --git a/.recursive/tests/test_dashboard.py b/.recursive/tests/test_dashboard.py
@@ -273,3 +273,50 @@ def test_classifies_safe_vs_specific(self, tmp_path: Path) -> None:
         )
         result = compute_commitment_quality(log)
         assert "2/2 MET" in result
+
+
+class TestEvalStalenessAlert:
+    """Dashboard alert fires when sessions_since_eval >= 5 (task #0242)."""
+
+    def test_alert_fires_at_threshold(self, tmp_path: Path) -> None:
+        signals = collect_signals(tmp_path)
+        signals["sessions_since_eval"] = 5
+        output = format_dashboard(signals)
+        assert "eval_staleness" in output
+        assert "STALE" in output  # top-level staleness indicator
+        assert "5 sessions since last Phractal eval" in output  # alert text
+
+    def test_alert_fires_above_threshold(self, tmp_path: Path) -> None:
+        signals = collect_signals(tmp_path)
+        signals["sessions_since_eval"] = 14
+        output = format_dashboard(signals)
+        assert "eval_staleness" in output
+        assert "14 sessions since last Phractal eval" in output
+
+    def test_no_alert_below_threshold(self, tmp_path: Path) -> None:
+        signals = collect_signals(tmp_path)
+        signals["sessions_since_eval"] = 3
+        output = format_dashboard(signals)
+        # Alert text should not appear when below threshold
+        assert "sessions since last Phractal eval" not in output
+
+    def test_staleness_shown_in_health_section(self, tmp_path: Path) -> None:
+        """Eval staleness appears next to Eval score in the Health section."""
+        signals = collect_signals(tmp_path)
+        signals["sessions_since_eval"] = 7
+        output = format_dashboard(signals)
+        assert "Eval staleness:" in output
+        assert "7 sessions" in output
+        assert "STALE" in output
+
+    def test_up_to_date_message_when_zero(self, tmp_path: Path) -> None:
+        signals = collect_signals(tmp_path)
+        signals["sessions_since_eval"] = 0
+        output = format_dashboard(signals)
+        assert "up to date" in output
+
+    def test_sessions_since_eval_in_collect_signals(self, tmp_path: Path) -> None:
+        """collect_signals includes sessions_since_eval key."""
+        signals = collect_signals(tmp_path)
+        assert "sessions_since_eval" in signals
+        assert isinstance(signals["sessions_since_eval"], int)
diff --git a/.recursive/tests/test_signals.py b/.recursive/tests/test_signals.py
@@ -293,3 +293,162 @@ def test_date_cutoff_boundary(self, tmp_path: Path) -> None:
             f"status: done\nsource: pentest\ncompleted: {before}\n",
         )
         assert signals.count_recent_pentest_tasks(tmp_path, days=3) == 1
+
+
+# ---------------------------------------------------------------------------
+# sessions_since_eval tests (task #0242)
+# ---------------------------------------------------------------------------
+
+_VALID_EVAL_CONTENT = (
+    "**Date**: 2026-04-08\n"
+    "| Startup | 9/10 | OK |\n"
+    "| Discovery | 8/10 | OK |\n"
+    "| Fix quality | 8/10 | OK |\n"
+    "| **Total** | **86/100** | |\n"
+)
+
+
+class TestSessionsSinceEval:
+    """Unit tests for the sessions_since_eval signal."""
+
+    def _make_eval(self, evals_dir: Path, name: str, date: str) -> Path:
+        """Write a minimal valid eval file with the given date."""
+        content = (
+            f"**Date**: {date}\n"
+            "| Startup | 9/10 | OK |\n"
+            "| Discovery | 8/10 | OK |\n"
+            "| Fix quality | 8/10 | OK |\n"
+            "| **Total** | **86/100** | |\n"
+        )
+        f = evals_dir / name
+        f.write_text(content)
+        return f
+
+    def _make_index_rows(self, timestamps: list[str]) -> list[dict[str, str]]:
+        """Build session index rows with the given timestamp strings."""
+        return [{"timestamp": ts, "role": "build"} for ts in timestamps]
+
+    def test_no_eval_dir_returns_zero(self, tmp_path: Path) -> None:
+        evals_dir = tmp_path / "evaluations"
+        rows = self._make_index_rows(["2026-04-09 01:00"])
+        assert signals.sessions_since_eval(evals_dir, rows) == 0
+
+    def test_empty_eval_dir_returns_zero(self, tmp_path: Path) -> None:
+        evals_dir = tmp_path / "evaluations"
+        evals_dir.mkdir()
+        rows = self._make_index_rows(["2026-04-09 01:00"])
+        assert signals.sessions_since_eval(evals_dir, rows) == 0
+
+    def test_all_sessions_after_eval(self, tmp_path: Path) -> None:
+        evals_dir = tmp_path / "evaluations"
+        evals_dir.mkdir()
+        eval_file = self._make_eval(evals_dir, "0001.md", "2026-04-01")
+        # Force the file mtime to an earlier date via a fixed timestamp
+        import os
+        import time
+
+        old_ts = time.mktime(time.strptime("2026-04-01 12:00", "%Y-%m-%d %H:%M"))
+        os.utime(str(eval_file), (old_ts, old_ts))
+        rows = self._make_index_rows(
+            [
+                "2026-04-02 01:00",
+                "2026-04-03 01:00",
+                "2026-04-04 01:00",
+            ]
+        )
+        result = signals.sessions_since_eval(evals_dir, rows)
+        assert result == 3
+
+    def test_some_sessions_after_eval(self, tmp_path: Path) -> None:
+        evals_dir = tmp_path / "evaluations"
+        evals_dir.mkdir()
+        eval_file = self._make_eval(evals_dir, "0001.md", "2026-04-05")
+        import os
+        import time
+
+        old_ts = time.mktime(time.strptime("2026-04-05 12:00", "%Y-%m-%d %H:%M"))
+        os.utime(str(eval_file), (old_ts, old_ts))
+        rows = self._make_index_rows(
+            [
+                "2026-04-04 01:00",  # before eval
+                "2026-04-05 10:00",  # before eval (mtime is 12:00)
+                "2026-04-06 01:00",  # after eval
+                "2026-04-07 01:00",  # after eval
+            ]
+        )
+        result = signals.sessions_since_eval(evals_dir, rows)
+        assert result == 2
+
+    def test_no_sessions_after_eval_returns_zero(self, tmp_path: Path) -> None:
+        evals_dir = tmp_path / "evaluations"
+        evals_dir.mkdir()
+        eval_file = self._make_eval(evals_dir, "0001.md", "2026-04-09")
+        import os
+        import time
+
+        future_ts = time.mktime(time.strptime("2026-04-09 23:00", "%Y-%m-%d %H:%M"))
+        os.utime(str(eval_file), (future_ts, future_ts))
+        rows = self._make_index_rows(
+            [
+                "2026-04-09 01:00",  # before eval mtime
+                "2026-04-09 10:00",  # before eval mtime
+            ]
+        )
+        result = signals.sessions_since_eval(evals_dir, rows)
+        assert result == 0
+
+    def test_uses_latest_eval_file(self, tmp_path: Path) -> None:
+        evals_dir = tmp_path / "evaluations"
+        evals_dir.mkdir()
+        import os
+        import time
+
+        # Older eval (0001.md) with old date
+        eval_old = self._make_eval(evals_dir, "0001.md", "2026-04-01")
+        old_ts = time.mktime(time.strptime("2026-04-01 12:00", "%Y-%m-%d %H:%M"))
+        os.utime(str(eval_old), (old_ts, old_ts))
+
+        # Newer eval (0002.md) with recent date -- function should use this one
+        eval_new = self._make_eval(evals_dir, "0002.md", "2026-04-08")
+        new_ts = time.mktime(time.strptime("2026-04-08 12:00", "%Y-%m-%d %H:%M"))
+        os.utime(str(eval_new), (new_ts, new_ts))
+
+        rows = self._make_index_rows(
+            [
+                "2026-04-02 01:00",  # after 0001, before 0002
+                "2026-04-09 01:00",  # after both evals
+            ]
+        )
+        # Should use 0002.md (newest), so only 1 session after
+        result = signals.sessions_since_eval(evals_dir, rows)
+        assert result == 1
+
+    def test_empty_sessions_list_returns_zero(self, tmp_path: Path) -> None:
+        evals_dir = tmp_path / "evaluations"
+        evals_dir.mkdir()
+        self._make_eval(evals_dir, "0001.md", "2026-04-01")
+        result = signals.sessions_since_eval(evals_dir, [])
+        assert result == 0
+
+    def test_five_or_more_sessions_triggers_alert_threshold(self, tmp_path: Path) -> None:
+        """Confirm the alert threshold value: >= 5 should trigger."""
+        evals_dir = tmp_path / "evaluations"
+        evals_dir.mkdir()
+        eval_file = self._make_eval(evals_dir, "0001.md", "2026-04-01")
+        import os
+        import time
+
+        old_ts = time.mktime(time.strptime("2026-04-01 12:00", "%Y-%m-%d %H:%M"))
+        os.utime(str(eval_file), (old_ts, old_ts))
+        rows = self._make_index_rows(
+            [
+                "2026-04-02 01:00",
+                "2026-04-03 01:00",
+                "2026-04-04 01:00",
+                "2026-04-05 01:00",
+                "2026-04-06 01:00",
+            ]
+        )
+        result = signals.sessions_since_eval(evals_dir, rows)
+        assert result == 5
+        assert result >= 5  # alert threshold met