Add stuck loop detection and failure tracking for features

gmccrackin · claude · gmccrackin · commit 1f74b885a057 · 2026-01-09T20:01:16.000-05:00
Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/.claude/templates/coding_prompt.template.md b/.claude/templates/coding_prompt.template.md
@@ -189,7 +189,31 @@ Use browser automation tools:
 - [ ] Loading states appeared during API calls
 - [ ] Error states handle failures gracefully
 
-### STEP 6.6: MOCK DATA DETECTION SWEEP
+### STEP 6.6: HANDLING TOOL FAILURES
+
+#### Playwright "Not connected" or Timeout Errors
+
+If browser tools repeatedly fail with "Not connected" or timeout errors:
+
+1. **Do NOT retry more than 3 times** - Repeated failures indicate the MCP server may have disconnected
+2. **Record the failure** using `feature_record_failure` tool with the feature ID and error message
+3. **Update progress notes** in `claude-progress.txt` documenting the issue
+4. **Commit your progress** so work isn't lost
+5. **Let the session end** - The system will detect the stuck loop and restart with fresh MCP connections
+
+The session will automatically restart and resume the in-progress feature with working browser tools.
+
+#### General Error Recovery
+
+If ANY tool fails repeatedly (3+ times with the same error):
+1. Stop retrying - the issue likely requires a fresh session
+2. Call `feature_record_failure` with the feature ID and error message
+3. Commit any progress made
+4. Let the session end naturally
+
+**Never retry the same failing operation more than 3 times in a row.**
+
+### STEP 6.7: MOCK DATA DETECTION SWEEP (OPTIONAL)
 
 **Run this sweep AFTER EVERY FEATURE before marking it as passing:**
 
@@ -359,6 +383,9 @@ feature_mark_passing with feature_id={id}
 
 # 5. Skip a feature (moves to end of queue) - ONLY when blocked by dependency
 feature_skip with feature_id={id}
+
+# 6. Record a failure (when tools repeatedly fail) - increments failure count
+feature_record_failure with feature_id={id} and error_message="description"
 ```
 
 ### RULES:
diff --git a/agent.py b/agent.py
@@ -25,6 +25,9 @@
 AUTO_CONTINUE_DELAY_SECONDS = 3
 STOP_FILE_NAME = ".stop_requested"
 
+# Stuck loop detection
+MAX_CONSECUTIVE_SAME_ERRORS = 5
+
 
 def check_stop_requested(project_dir: Path) -> bool:
     """
@@ -52,6 +55,34 @@ def request_stop(project_dir: Path) -> None:
     print("The agent will stop after completing the current feature.")
 
 
+def _normalize_error_for_comparison(error_str: str) -> str:
+    """
+    Normalize error string for stuck loop detection.
+
+    Extracts the key part of the error message to detect repeated patterns
+    even if details vary slightly.
+    """
+    # Truncate to first 100 chars for comparison
+    normalized = error_str[:100].strip().lower()
+    return normalized
+
+
+def _check_stuck_loop(error_history: list[str]) -> bool:
+    """
+    Check if the error history indicates a stuck loop.
+
+    Returns True if the last N errors are all identical (same normalized message).
+    """
+    if len(error_history) < MAX_CONSECUTIVE_SAME_ERRORS:
+        return False
+
+    recent = error_history[-MAX_CONSECUTIVE_SAME_ERRORS:]
+    normalized = [_normalize_error_for_comparison(e) for e in recent]
+
+    # All recent errors are the same
+    return len(set(normalized)) == 1
+
+
 async def run_agent_session(
     client: ClaudeSDKClient,
     message: str,
@@ -69,9 +100,13 @@ async def run_agent_session(
         (status, response_text) where status is:
         - "continue" if agent should continue working
         - "error" if an error occurred
+        - "stuck" if stuck loop detected (repeated identical errors)
     """
     print("Sending prompt to Claude Agent SDK...\n")
 
+    # Track consecutive errors for stuck loop detection
+    error_history: list[str] = []
+
     try:
         # Send the query
         await client.query(message)
@@ -114,8 +149,24 @@ async def run_agent_session(
                             # Show errors (truncated)
                             error_str = str(result_content)[:500]
                             print(f"   [Error] {error_str}", flush=True)
+
+                            # Track error for stuck loop detection
+                            error_history.append(error_str)
+
+                            # Check for stuck loop
+                            if _check_stuck_loop(error_history):
+                                print("\n" + "=" * 70)
+                                print("  STUCK LOOP DETECTED")
+                                print("=" * 70)
+                                print(f"\nSame error repeated {MAX_CONSECUTIVE_SAME_ERRORS} times:")
+                                print(f"  {error_history[-1][:100]}...")
+                                print("\nEnding session for clean restart.")
+                                print("The MCP servers will reconnect in the next session.")
+                                print("-" * 70 + "\n")
+                                return "stuck", f"Stuck loop detected: {error_history[-1][:200]}"
                         else:
-                            # Tool succeeded - just show brief confirmation
+                            # Tool succeeded - reset error history
+                            error_history.clear()
                             print("   [Done]", flush=True)
 
         print("\n" + "-" * 70 + "\n")
@@ -218,6 +269,12 @@ async def run_autonomous_agent(
             print_progress_summary(project_dir)
             await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS)
 
+        elif status == "stuck":
+            print("\nSession ended due to stuck loop detection")
+            print("Starting fresh session with reconnected MCP servers...")
+            print_progress_summary(project_dir)
+            await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS)
+
         elif status == "error":
             print("\nSession encountered an error")
             print("Will retry with a fresh session...")
diff --git a/api/database.py b/api/database.py
@@ -29,6 +29,9 @@ class Feature(Base):
     steps = Column(JSON, nullable=False)  # Stored as JSON array
     passes = Column(Boolean, default=False, index=True)
     in_progress = Column(Boolean, default=False, index=True)
+    # Failure tracking for stuck loop detection
+    failure_count = Column(Integer, default=0)
+    last_error = Column(Text, nullable=True)
 
     def to_dict(self) -> dict:
         """Convert feature to dictionary for JSON serialization."""
@@ -41,6 +44,8 @@ def to_dict(self) -> dict:
             "steps": self.steps,
             "passes": self.passes,
             "in_progress": self.in_progress,
+            "failure_count": self.failure_count or 0,
+            "last_error": self.last_error,
         }
 
 
diff --git a/api/migration.py b/api/migration.py
@@ -198,3 +198,60 @@ def migrate_add_in_progress_column(
         return False
     finally:
         session.close()
+
+
+def migrate_add_failure_tracking_columns(
+    project_dir: Path,
+    session_maker: sessionmaker,
+) -> bool:
+    """
+    Add failure_count and last_error columns to existing databases.
+
+    This migration adds columns for tracking consecutive failures on features:
+    - failure_count: Number of consecutive session failures
+    - last_error: Last error message encountered
+
+    These enable stuck loop detection and auto-skipping of problematic features.
+
+    Args:
+        project_dir: Directory containing the project
+        session_maker: SQLAlchemy session maker
+
+    Returns:
+        True if migration was performed, False if columns already exist
+    """
+    session: Session = session_maker()
+    try:
+        # Check existing columns using PRAGMA
+        result = session.execute(text("PRAGMA table_info(features)"))
+        columns = [row[1] for row in result.fetchall()]
+
+        added_any = False
+
+        # Add failure_count column if missing
+        if "failure_count" not in columns:
+            session.execute(
+                text("ALTER TABLE features ADD COLUMN failure_count INTEGER DEFAULT 0")
+            )
+            print("Added failure_count column to features table")
+            added_any = True
+
+        # Add last_error column if missing
+        if "last_error" not in columns:
+            session.execute(
+                text("ALTER TABLE features ADD COLUMN last_error TEXT")
+            )
+            print("Added last_error column to features table")
+            added_any = True
+
+        if added_any:
+            session.commit()
+
+        return added_any
+
+    except Exception as e:
+        session.rollback()
+        print(f"Error adding failure tracking columns: {e}")
+        return False
+    finally:
+        session.close()
diff --git a/client.py b/client.py
@@ -24,6 +24,7 @@
     "mcp__features__feature_mark_passing",
     "mcp__features__feature_skip",
     "mcp__features__feature_create_bulk",
+    "mcp__features__feature_record_failure",  # For stuck loop recovery
 ]
 
 # Playwright MCP tools for browser automation
@@ -144,7 +145,8 @@ def create_client(project_dir: Path, model: str):
             ],
             mcp_servers={
                 "playwright": {"command": "npx", "args": ["@playwright/mcp@latest", "--viewport-size", "1280x720"]},
-                "features": {
+                # "playwright": {"command": "npx", "args": ["@playwright/mcp@latest", "--headless"]},
+		"features": {
                     "command": sys.executable,  # Use the same Python that's running this script
                     "args": ["-m", "mcp_server.feature_mcp"],
                     "env": {
diff --git a/mcp_server/feature_mcp.py b/mcp_server/feature_mcp.py