diff --git a/packages/claude-code-plugin/hooks/lib/hook_timer.py b/packages/claude-code-plugin/hooks/lib/hook_timer.py new file mode 100644 index 00000000..0d28c0c9 --- /dev/null +++ b/packages/claude-code-plugin/hooks/lib/hook_timer.py @@ -0,0 +1,86 @@ +"""Adaptive hook timeout tracking (#945). + +Tracks hook execution time and provides statistics and warnings +for hooks approaching their timeout threshold. +""" +import time +from typing import Dict, List + + +class HookTimer: + """Track hook execution times and provide performance statistics.""" + + def __init__(self) -> None: + self._active: Dict[str, float] = {} + self._timings: Dict[str, List[float]] = {} + + def start(self, hook_name: str) -> None: + """Start timing a hook execution. + + Args: + hook_name: Name of the hook being timed. + """ + self._active[hook_name] = time.monotonic() + + def stop(self, hook_name: str) -> float: + """Stop timing a hook and record the duration. + + Args: + hook_name: Name of the hook to stop timing. + + Returns: + Elapsed time in milliseconds. + + Raises: + ValueError: If no active timer exists for the hook. + """ + if hook_name not in self._active: + raise ValueError(f"No active timer for hook: {hook_name}") + elapsed_ms = (time.monotonic() - self._active.pop(hook_name)) * 1000 + if hook_name not in self._timings: + self._timings[hook_name] = [] + self._timings[hook_name].append(elapsed_ms) + return elapsed_ms + + def get_stats(self) -> Dict[str, Dict[str, float]]: + """Compute statistics for all tracked hooks. + + Returns: + Dict mapping hook_name to {count, avg_ms, p95_ms, max_ms}. + """ + result: Dict[str, Dict[str, float]] = {} + for hook_name, timings in self._timings.items(): + sorted_t = sorted(timings) + count = len(sorted_t) + avg_ms = sum(sorted_t) / count + p95_idx = int(count * 0.95) + if p95_idx >= count: + p95_idx = count - 1 + result[hook_name] = { + "count": count, + "avg_ms": round(avg_ms, 2), + "p95_ms": round(sorted_t[p95_idx], 2), + "max_ms": round(sorted_t[-1], 2), + } + return result + + def get_warnings(self, timeout_ms: int = 10000) -> List[str]: + """Return warnings for hooks using >=80% of their timeout. + + Args: + timeout_ms: Hook timeout in milliseconds (default 10000). + + Returns: + List of warning strings for slow hooks. + """ + threshold = timeout_ms * 0.8 + warnings: List[str] = [] + for hook_name, timings in self._timings.items(): + max_time = max(timings) + if max_time >= threshold: + pct = max_time / timeout_ms * 100 + warnings.append( + f"Hook '{hook_name}' used {max_time:.0f}ms " + f"of {timeout_ms}ms timeout ({pct:.0f}%)" + ) + return warnings diff --git a/packages/claude-code-plugin/hooks/lib/stats.py b/packages/claude-code-plugin/hooks/lib/stats.py index 199c96d3..30717dbe 100644 --- a/packages/claude-code-plugin/hooks/lib/stats.py +++ b/packages/claude-code-plugin/hooks/lib/stats.py @@ -6,7 +6,7 @@ import json import os import time -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional try: import fcntl @@ -52,12 +52,26 @@ def __init__(self, session_id: str, data_dir: Optional[str] = None, flush_interv "tool_count": 0, "error_count": 0, "tool_names": {}, + "hook_timings": {}, }) # In-memory accumulator — deltas since last flush self._mem_tool_count = 0 self._mem_error_count = 0 self._mem_tool_names: Dict[str, int] = {} + self._mem_hook_timings: Dict[str, List[float]] = {} + + def record_hook_timing(self, hook_name: str, elapsed_ms: float) -> None: + """Record a hook execution timing in memory. + + Args: + hook_name: Name of the hook (e.g. 'PreToolUse'). + elapsed_ms: Elapsed time in milliseconds. + """ + if hook_name not in self._mem_hook_timings: + self._mem_hook_timings[hook_name] = [] + self._mem_hook_timings[hook_name].append(elapsed_ms) + self._pending_count += 1 def record_tool_call(self, tool_name: str, success: bool = True) -> None: """Record a tool call in memory. Flushes to disk every flush_interval calls. @@ -86,11 +100,19 @@ def flush(self) -> None: for name, count in self._mem_tool_names.items(): tool_names[name] = tool_names.get(name, 0) + count data["tool_names"] = tool_names + # Merge hook timings + hook_timings = data.get("hook_timings", {}) + for name, times in self._mem_hook_timings.items(): + if name not in hook_timings: + hook_timings[name] = [] + hook_timings[name].extend(times) + data["hook_timings"] = hook_timings self._locked_write(data) # Reset in-memory accumulators self._mem_tool_count = 0 self._mem_error_count = 0 self._mem_tool_names = {} + self._mem_hook_timings = {} self._pending_count = 0 def _merged_data(self) -> Dict[str, Any]: @@ -102,6 +124,13 @@ def _merged_data(self) -> Dict[str, Any]: for name, count in self._mem_tool_names.items(): tool_names[name] = tool_names.get(name, 0) + count data["tool_names"] = tool_names + # Merge hook timings + hook_timings = dict(data.get("hook_timings", {})) + for name, times in self._mem_hook_timings.items(): + if name not in hook_timings: + hook_timings[name] = [] + hook_timings[name] = hook_timings[name] + times + data["hook_timings"] = hook_timings return data def format_summary(self) -> str: @@ -123,7 +152,18 @@ def format_summary(self) -> str: sorted_tools = sorted(tool_names.items(), key=lambda x: x[1], reverse=True) tools_str = " ".join(f"{name}:{count}" for name, count in sorted_tools[:5]) - return f"[CB] {minutes}m | {tool_count} tools | {error_count} {error_word} | {tools_str}" + summary = f"[CB] {minutes}m | {tool_count} tools | {error_count} {error_word} | {tools_str}" + + # Append hook timing report if any timings exist + hook_timings = data.get("hook_timings", {}) + if hook_timings: + timing_parts = [] + for name, timings in sorted(hook_timings.items()): + avg = sum(timings) / len(timings) + timing_parts.append(f"{name}:{avg:.0f}ms") + summary += f" | \u23f1 {' '.join(timing_parts)}" + + return summary def finalize(self) -> Dict[str, Any]: """Finalize session stats, return data, and cleanup file. @@ -136,6 +176,23 @@ def finalize(self) -> Dict[str, Any]: duration = time.time() - data.get("started_at", time.time()) data["duration_seconds"] = duration + # Compute hook timing statistics + hook_timings = data.get("hook_timings", {}) + if hook_timings: + hook_timing_stats: Dict[str, Any] = {} + for name, timings in hook_timings.items(): + sorted_t = sorted(timings) + count = len(sorted_t) + avg_ms = sum(sorted_t) / count + p95_idx = min(int(count * 0.95), count - 1) + hook_timing_stats[name] = { + "count": count, + "avg_ms": round(avg_ms, 2), + "p95_ms": round(sorted_t[p95_idx], 2), + "max_ms": round(sorted_t[-1], 2), + } + data["hook_timing_stats"] = hook_timing_stats + # Cleanup stats file try: os.remove(self.stats_file) @@ -190,6 +247,7 @@ def _locked_read(self) -> Dict[str, Any]: "tool_count": 0, "error_count": 0, "tool_names": {}, + "hook_timings": {}, } def _locked_write(self, data: Dict[str, Any]) -> None: diff --git a/packages/claude-code-plugin/tests/test_hook_timer.py b/packages/claude-code-plugin/tests/test_hook_timer.py new file mode 100644 index 00000000..a0440889 --- /dev/null +++ b/packages/claude-code-plugin/tests/test_hook_timer.py @@ -0,0 +1,117 @@ +"""Tests for HookTimer — adaptive hook timeout tracking (#945).""" +import os +import sys +import time + +import pytest + +# Ensure hooks/lib is on path +_tests_dir = os.path.dirname(os.path.abspath(__file__)) +_lib_dir = os.path.join(os.path.dirname(_tests_dir), "hooks", "lib") +if _lib_dir not in sys.path: + sys.path.insert(0, _lib_dir) + +from hook_timer import HookTimer + + +@pytest.fixture +def timer(): + return HookTimer() + + +class TestStartStop: + """Test basic start/stop timing functionality.""" + + def test_start_stop_returns_elapsed_ms(self, timer): + """stop() should return elapsed time in milliseconds.""" + timer.start("my_hook") + time.sleep(0.05) + elapsed = timer.stop("my_hook") + assert elapsed >= 40 # at least ~40ms (sleep 50ms with tolerance) + assert elapsed < 200 # not unreasonably long + + def test_stop_without_start_raises(self, timer): + """stop() without matching start() should raise ValueError.""" + with pytest.raises(ValueError, match="No active timer"): + timer.stop("nonexistent_hook") + + def test_multiple_hooks_tracked_independently(self, timer): + """Different hooks should track independently.""" + timer.start("hook_a") + time.sleep(0.02) + timer.start("hook_b") + time.sleep(0.02) + elapsed_a = timer.stop("hook_a") + elapsed_b = timer.stop("hook_b") + # hook_a ran longer than hook_b + assert elapsed_a > elapsed_b + + def test_same_hook_multiple_times(self, timer): + """Same hook can be started/stopped multiple times, recording each.""" + timer.start("hook_a") + timer.stop("hook_a") + timer.start("hook_a") + timer.stop("hook_a") + stats = timer.get_stats() + assert stats["hook_a"]["count"] == 2 + + +class TestGetStats: + """Test statistics calculation.""" + + def test_empty_stats(self, timer): + """get_stats() on fresh timer returns empty dict.""" + assert timer.get_stats() == {} + + def test_stats_count_and_avg(self, timer): + """get_stats() should compute count and avg_ms correctly.""" + # Inject known timings for deterministic tests + timer._timings["hook_a"] = [100.0, 200.0, 300.0] + stats = timer.get_stats() + assert stats["hook_a"]["count"] == 3 + assert stats["hook_a"]["avg_ms"] == pytest.approx(200.0, abs=0.01) + + def test_stats_p95(self, timer): + """get_stats() should compute p95_ms correctly.""" + # 20 values: 10, 20, ..., 200 + timer._timings["hook_a"] = [float(i * 10) for i in range(1, 21)] + stats = timer.get_stats() + # p95 of 20 items: index 19 (0.95*20=19) -> value 200 + assert stats["hook_a"]["p95_ms"] == pytest.approx(200.0, abs=0.01) + + def test_stats_max(self, timer): + """get_stats() should compute max_ms correctly.""" + timer._timings["hook_a"] = [10.0, 50.0, 30.0, 999.0, 20.0] + stats = timer.get_stats() + assert stats["hook_a"]["max_ms"] == pytest.approx(999.0, abs=0.01) + + +class TestGetWarnings: + """Test timeout warning detection.""" + + def test_no_warnings_when_fast(self, timer): + """Hooks well under timeout should produce no warnings.""" + timer._timings["fast_hook"] = [100.0, 200.0, 300.0] + warnings = timer.get_warnings(timeout_ms=10000) + assert warnings == [] + + def test_warning_at_80_percent(self, timer): + """Hook using >=80% of timeout should produce a warning.""" + # 80% of 10000 = 8000 + timer._timings["slow_hook"] = [8000.0] + warnings = timer.get_warnings(timeout_ms=10000) + assert len(warnings) == 1 + assert "slow_hook" in warnings[0] + + def test_warning_with_custom_timeout(self, timer): + """get_warnings() should respect custom timeout_ms.""" + timer._timings["hook_a"] = [500.0] + # 80% of 600 = 480, so 500 >= 480 -> warning + warnings = timer.get_warnings(timeout_ms=600) + assert len(warnings) == 1 + + def test_no_warning_just_below_threshold(self, timer): + """Hook just below 80% threshold should NOT warn.""" + timer._timings["hook_a"] = [7999.0] + warnings = timer.get_warnings(timeout_ms=10000) + assert warnings == [] diff --git a/packages/claude-code-plugin/tests/test_stats.py b/packages/claude-code-plugin/tests/test_stats.py index 925350af..40756240 100644 --- a/packages/claude-code-plugin/tests/test_stats.py +++ b/packages/claude-code-plugin/tests/test_stats.py @@ -195,6 +195,49 @@ def test_format_summary_uses_memory_data(self, data_dir): assert "Bash:2" in summary +class TestHookTimingIntegration: + """Tests for hook timing integration in SessionStats (#945).""" + + def test_hook_timings_field_initialized(self, stats): + """SessionStats should have hook_timings field in stored data.""" + data = stats._locked_read() + assert "hook_timings" in data + assert data["hook_timings"] == {} + + def test_record_hook_timing_stores_data(self, stats): + """record_hook_timing() should store timing in hook_timings.""" + stats.record_hook_timing("PreToolUse", 150.0) + stats.record_hook_timing("PreToolUse", 200.0) + stats.record_hook_timing("PostToolUse", 50.0) + stats.flush() + data = stats._locked_read() + assert data["hook_timings"]["PreToolUse"] == [150.0, 200.0] + assert data["hook_timings"]["PostToolUse"] == [50.0] + + def test_finalize_includes_hook_timing_stats(self, stats): + """finalize() should include computed hook timing statistics.""" + stats.record_hook_timing("PreToolUse", 100.0) + stats.record_hook_timing("PreToolUse", 200.0) + stats.record_hook_timing("PreToolUse", 300.0) + result = stats.finalize() + assert "hook_timing_stats" in result + ht = result["hook_timing_stats"]["PreToolUse"] + assert ht["count"] == 3 + assert ht["avg_ms"] == pytest.approx(200.0, abs=0.01) + assert ht["max_ms"] == pytest.approx(300.0, abs=0.01) + + def test_format_summary_includes_timing_report(self, stats): + """format_summary() should include hook timing info when present.""" + stats.record_hook_timing("PreToolUse", 8500.0) + summary = stats.format_summary() + assert "PreToolUse" in summary + + def test_format_summary_no_timing_when_empty(self, stats): + """format_summary() should not include timing section when no timings.""" + summary = stats.format_summary() + assert "⏱" not in summary + + class TestCleanup: def test_cleanup_stale_removes_old_files(self, data_dir): # Create a stale file