From 3a22a29b473d2582d763c9fb88d55f926c20e7e4 Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Fri, 6 Feb 2026 09:37:51 +0400
Subject: [PATCH 01/14] auto-claude: subtask-1-1 - Add recovery iteration loop
 to run_qa_fixer_session

---
 apps/backend/qa/fixer.py | 404 +++++++++++++++++++++------------------
 1 file changed, 220 insertions(+), 184 deletions(-)

diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py
index f898add1a..6ea1a122c 100644
--- a/apps/backend/qa/fixer.py
+++ b/apps/backend/qa/fixer.py
@@ -26,6 +26,7 @@
 
 # Configuration
 QA_PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
+MAX_FIXER_ITERATIONS = 10  # Max recovery attempts for a single QA fix session
 
 
 # =============================================================================
@@ -123,199 +124,234 @@ async def run_qa_fixer_session(
     prompt += f"\n**IMPORTANT**: All spec files are located in: `{spec_dir}/`\n"
     prompt += f"The fix request file is at: `{spec_dir}/QA_FIX_REQUEST.md`\n"
 
-    try:
-        debug("qa_fixer", "Sending query to Claude SDK...")
-        await client.query(prompt)
-        debug_success("qa_fixer", "Query sent successfully")
-
-        response_text = ""
-        debug("qa_fixer", "Starting to receive response stream...")
-        async for msg in client.receive_response():
-            msg_type = type(msg).__name__
-            message_count += 1
-            debug_detailed(
+    # Recovery iteration loop - retry if agent gets stuck or fails
+    last_error = None
+    for fixer_iteration in range(1, MAX_FIXER_ITERATIONS + 1):
+        if fixer_iteration > 1:
+            print(f"\n{'=' * 70}")
+            print(f"  QA FIXER RECOVERY ATTEMPT {fixer_iteration}/{MAX_FIXER_ITERATIONS}")
+            print(f"{'=' * 70}\n")
+            debug(
                 "qa_fixer",
-                f"Received message #{message_count}",
-                msg_type=msg_type,
+                f"Starting recovery attempt {fixer_iteration}",
+                max_iterations=MAX_FIXER_ITERATIONS,
             )
 
-            if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                for block in msg.content:
-                    block_type = type(block).__name__
-
-                    if block_type == "TextBlock" and hasattr(block, "text"):
-                        response_text += block.text
-                        print(block.text, end="", flush=True)
-                        # Log text to task logger (persist without double-printing)
-                        if task_logger and block.text.strip():
-                            task_logger.log(
-                                block.text,
-                                LogEntryType.TEXT,
-                                LogPhase.VALIDATION,
-                                print_to_console=False,
-                            )
-                    elif block_type == "ToolUseBlock" and hasattr(block, "name"):
-                        tool_name = block.name
-                        tool_input_display = None
-                        tool_count += 1
-
-                        # Safely extract tool input (handles None, non-dict, etc.)
-                        inp = get_safe_tool_input(block)
-
-                        if inp:
-                            if "file_path" in inp:
-                                fp = inp["file_path"]
-                                if len(fp) > 50:
-                                    fp = "..." + fp[-47:]
-                                tool_input_display = fp
-                            elif "command" in inp:
-                                cmd = inp["command"]
-                                if len(cmd) > 50:
-                                    cmd = cmd[:47] + "..."
-                                tool_input_display = cmd
-
-                        debug(
-                            "qa_fixer",
-                            f"Tool call #{tool_count}: {tool_name}",
-                            tool_input=tool_input_display,
-                        )
-
-                        # Log tool start (handles printing)
-                        if task_logger:
-                            task_logger.tool_start(
-                                tool_name,
-                                tool_input_display,
-                                LogPhase.VALIDATION,
-                                print_to_console=True,
+        try:
+            debug("qa_fixer", "Sending query to Claude SDK...")
+            await client.query(prompt)
+            debug_success("qa_fixer", "Query sent successfully")
+
+            response_text = ""
+            debug("qa_fixer", "Starting to receive response stream...")
+            async for msg in client.receive_response():
+                msg_type = type(msg).__name__
+                message_count += 1
+                debug_detailed(
+                    "qa_fixer",
+                    f"Received message #{message_count}",
+                    msg_type=msg_type,
+                )
+
+                if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                    for block in msg.content:
+                        block_type = type(block).__name__
+
+                        if block_type == "TextBlock" and hasattr(block, "text"):
+                            response_text += block.text
+                            print(block.text, end="", flush=True)
+                            # Log text to task logger (persist without double-printing)
+                            if task_logger and block.text.strip():
+                                task_logger.log(
+                                    block.text,
+                                    LogEntryType.TEXT,
+                                    LogPhase.VALIDATION,
+                                    print_to_console=False,
+                                )
+                        elif block_type == "ToolUseBlock" and hasattr(block, "name"):
+                            tool_name = block.name
+                            tool_input_display = None
+                            tool_count += 1
+
+                            # Safely extract tool input (handles None, non-dict, etc.)
+                            inp = get_safe_tool_input(block)
+
+                            if inp:
+                                if "file_path" in inp:
+                                    fp = inp["file_path"]
+                                    if len(fp) > 50:
+                                        fp = "..." + fp[-47:]
+                                    tool_input_display = fp
+                                elif "command" in inp:
+                                    cmd = inp["command"]
+                                    if len(cmd) > 50:
+                                        cmd = cmd[:47] + "..."
+                                    tool_input_display = cmd
+
+                            debug(
+                                "qa_fixer",
+                                f"Tool call #{tool_count}: {tool_name}",
+                                tool_input=tool_input_display,
                             )
-                        else:
-                            print(f"\n[Fixer Tool: {tool_name}]", flush=True)
 
-                        if verbose and hasattr(block, "input"):
-                            input_str = str(block.input)
-                            if len(input_str) > 300:
-                                print(f"   Input: {input_str[:300]}...", flush=True)
+                            # Log tool start (handles printing)
+                            if task_logger:
+                                task_logger.tool_start(
+                                    tool_name,
+                                    tool_input_display,
+                                    LogPhase.VALIDATION,
+                                    print_to_console=True,
+                                )
                             else:
-                                print(f"   Input: {input_str}", flush=True)
-                        current_tool = tool_name
-
-            elif msg_type == "UserMessage" and hasattr(msg, "content"):
-                for block in msg.content:
-                    block_type = type(block).__name__
-
-                    if block_type == "ToolResultBlock":
-                        is_error = getattr(block, "is_error", False)
-                        result_content = getattr(block, "content", "")
-
-                        if is_error:
-                            debug_error(
-                                "qa_fixer",
-                                f"Tool error: {current_tool}",
-                                error=str(result_content)[:200],
-                            )
-                            error_str = str(result_content)[:500]
-                            print(f"   [Error] {error_str}", flush=True)
-                            if task_logger and current_tool:
-                                # Store full error in detail for expandable view
-                                task_logger.tool_end(
-                                    current_tool,
-                                    success=False,
-                                    result=error_str[:100],
-                                    detail=str(result_content),
-                                    phase=LogPhase.VALIDATION,
+                                print(f"\n[Fixer Tool: {tool_name}]", flush=True)
+
+                            if verbose and hasattr(block, "input"):
+                                input_str = str(block.input)
+                                if len(input_str) > 300:
+                                    print(f"   Input: {input_str[:300]}...", flush=True)
+                                else:
+                                    print(f"   Input: {input_str}", flush=True)
+                            current_tool = tool_name
+
+                elif msg_type == "UserMessage" and hasattr(msg, "content"):
+                    for block in msg.content:
+                        block_type = type(block).__name__
+
+                        if block_type == "ToolResultBlock":
+                            is_error = getattr(block, "is_error", False)
+                            result_content = getattr(block, "content", "")
+
+                            if is_error:
+                                debug_error(
+                                    "qa_fixer",
+                                    f"Tool error: {current_tool}",
+                                    error=str(result_content)[:200],
                                 )
-                        else:
-                            debug_detailed(
-                                "qa_fixer",
-                                f"Tool success: {current_tool}",
-                                result_length=len(str(result_content)),
-                            )
-                            if verbose:
-                                result_str = str(result_content)[:200]
-                                print(f"   [Done] {result_str}", flush=True)
+                                error_str = str(result_content)[:500]
+                                print(f"   [Error] {error_str}", flush=True)
+                                if task_logger and current_tool:
+                                    # Store full error in detail for expandable view
+                                    task_logger.tool_end(
+                                        current_tool,
+                                        success=False,
+                                        result=error_str[:100],
+                                        detail=str(result_content),
+                                        phase=LogPhase.VALIDATION,
+                                    )
                             else:
-                                print("   [Done]", flush=True)
-                            if task_logger and current_tool:
-                                # Store full result in detail for expandable view
-                                detail_content = None
-                                if current_tool in (
-                                    "Read",
-                                    "Grep",
-                                    "Bash",
-                                    "Edit",
-                                    "Write",
-                                ):
-                                    result_str = str(result_content)
-                                    if len(result_str) < 50000:
-                                        detail_content = result_str
-                                task_logger.tool_end(
-                                    current_tool,
-                                    success=True,
-                                    detail=detail_content,
-                                    phase=LogPhase.VALIDATION,
+                                debug_detailed(
+                                    "qa_fixer",
+                                    f"Tool success: {current_tool}",
+                                    result_length=len(str(result_content)),
                                 )
+                                if verbose:
+                                    result_str = str(result_content)[:200]
+                                    print(f"   [Done] {result_str}", flush=True)
+                                else:
+                                    print("   [Done]", flush=True)
+                                if task_logger and current_tool:
+                                    # Store full result in detail for expandable view
+                                    detail_content = None
+                                    if current_tool in (
+                                        "Read",
+                                        "Grep",
+                                        "Bash",
+                                        "Edit",
+                                        "Write",
+                                    ):
+                                        result_str = str(result_content)
+                                        if len(result_str) < 50000:
+                                            detail_content = result_str
+                                    task_logger.tool_end(
+                                        current_tool,
+                                        success=True,
+                                        detail=detail_content,
+                                        phase=LogPhase.VALIDATION,
+                                    )
+
+                            current_tool = None
+
+            print("\n" + "-" * 70 + "\n")
+
+            # Check if fixes were applied
+            status = get_qa_signoff_status(spec_dir)
+            debug(
+                "qa_fixer",
+                "Fixer session completed",
+                message_count=message_count,
+                tool_count=tool_count,
+                response_length=len(response_text),
+                ready_for_revalidation=status.get("ready_for_qa_revalidation")
+                if status
+                else False,
+            )
 
-                        current_tool = None
-
-        print("\n" + "-" * 70 + "\n")
-
-        # Check if fixes were applied
-        status = get_qa_signoff_status(spec_dir)
-        debug(
-            "qa_fixer",
-            "Fixer session completed",
-            message_count=message_count,
-            tool_count=tool_count,
-            response_length=len(response_text),
-            ready_for_revalidation=status.get("ready_for_qa_revalidation")
-            if status
-            else False,
-        )
-
-        # Save fixer session insights to memory
-        fixer_discoveries = {
-            "files_understood": {},
-            "patterns_found": [
-                f"QA fixer session {fix_session}: Applied fixes from QA_FIX_REQUEST.md"
-            ],
-            "gotchas_encountered": [],
-        }
-
-        if status and status.get("ready_for_qa_revalidation"):
-            debug_success("qa_fixer", "Fixes applied, ready for QA revalidation")
-            # Save successful fix session to memory
-            await save_session_memory(
-                spec_dir=spec_dir,
-                project_dir=project_dir,
-                subtask_id=f"qa_fixer_{fix_session}",
-                session_num=fix_session,
-                success=True,
-                subtasks_completed=[f"qa_fixer_{fix_session}"],
-                discoveries=fixer_discoveries,
+            # Save fixer session insights to memory
+            fixer_discoveries = {
+                "files_understood": {},
+                "patterns_found": [
+                    f"QA fixer session {fix_session}: Applied fixes from QA_FIX_REQUEST.md"
+                ],
+                "gotchas_encountered": [],
+            }
+
+            if status and status.get("ready_for_qa_revalidation"):
+                debug_success("qa_fixer", "Fixes applied, ready for QA revalidation")
+                # Save successful fix session to memory
+                await save_session_memory(
+                    spec_dir=spec_dir,
+                    project_dir=project_dir,
+                    subtask_id=f"qa_fixer_{fix_session}",
+                    session_num=fix_session,
+                    success=True,
+                    subtasks_completed=[f"qa_fixer_{fix_session}"],
+                    discoveries=fixer_discoveries,
+                )
+                return "fixed", response_text
+            else:
+                # Fixer didn't update the status properly, but we'll trust it worked
+                debug_success("qa_fixer", "Fixes assumed applied (status not updated)")
+                # Still save to memory as successful (fixes were attempted)
+                await save_session_memory(
+                    spec_dir=spec_dir,
+                    project_dir=project_dir,
+                    subtask_id=f"qa_fixer_{fix_session}",
+                    session_num=fix_session,
+                    success=True,
+                    subtasks_completed=[f"qa_fixer_{fix_session}"],
+                    discoveries=fixer_discoveries,
+                )
+                return "fixed", response_text
+
+        except Exception as e:
+            last_error = str(e)
+            debug_error(
+                "qa_fixer",
+                f"Fixer session exception (attempt {fixer_iteration}/{MAX_FIXER_ITERATIONS}): {e}",
+                exception_type=type(e).__name__,
             )
-            return "fixed", response_text
-        else:
-            # Fixer didn't update the status properly, but we'll trust it worked
-            debug_success("qa_fixer", "Fixes assumed applied (status not updated)")
-            # Still save to memory as successful (fixes were attempted)
-            await save_session_memory(
-                spec_dir=spec_dir,
-                project_dir=project_dir,
-                subtask_id=f"qa_fixer_{fix_session}",
-                session_num=fix_session,
-                success=True,
-                subtasks_completed=[f"qa_fixer_{fix_session}"],
-                discoveries=fixer_discoveries,
+            print(f"Error during fixer session: {e}")
+            if task_logger:
+                task_logger.log_error(f"QA fixer error: {e}", LogPhase.VALIDATION)
+
+            # If this is the last iteration, return error
+            if fixer_iteration == MAX_FIXER_ITERATIONS:
+                debug_error(
+                    "qa_fixer",
+                    f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, giving up",
+                )
+                return "error", last_error
+
+            # Otherwise, continue to next iteration
+            debug(
+                "qa_fixer",
+                f"Will retry (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})",
             )
-            return "fixed", response_text
-
-    except Exception as e:
-        debug_error(
-            "qa_fixer",
-            f"Fixer session exception: {e}",
-            exception_type=type(e).__name__,
-        )
-        print(f"Error during fixer session: {e}")
-        if task_logger:
-            task_logger.log_error(f"QA fixer error: {e}", LogPhase.VALIDATION)
-        return "error", str(e)
+            continue
+
+    # If we exhausted all iterations without success
+    debug_error(
+        "qa_fixer",
+        f"Exhausted all {MAX_FIXER_ITERATIONS} fixer iterations without success",
+    )
+    return "error", last_error if last_error else "Max fixer iterations reached"

From 677a7b2f6a81835600f6a7f0ba375b2327aa8ad3 Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Fri, 6 Feb 2026 09:41:25 +0400
Subject: [PATCH 02/14] auto-claude: subtask-1-2 - Integrate RecoveryManager
 for circular fix detecti

---
 apps/backend/qa/fixer.py | 46 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py
index 6ea1a122c..76b2d6566 100644
--- a/apps/backend/qa/fixer.py
+++ b/apps/backend/qa/fixer.py
@@ -16,6 +16,7 @@
 from claude_agent_sdk import ClaudeSDKClient
 from debug import debug, debug_detailed, debug_error, debug_section, debug_success
 from security.tool_input_validator import get_safe_tool_input
+from services.recovery import RecoveryManager
 from task_logger import (
     LogEntryType,
     LogPhase,
@@ -74,6 +75,11 @@ async def run_qa_fixer_session(
     if project_dir is None:
         # Walk up from spec_dir to find project root
         project_dir = spec_dir.parent.parent.parent
+
+    # Initialize recovery manager for circular fix detection
+    recovery_manager = RecoveryManager(spec_dir=spec_dir, project_dir=project_dir)
+    fixer_subtask_id = f"qa_fixer_{fix_session}"
+
     debug_section("qa_fixer", f"QA Fixer Session {fix_session}")
     debug(
         "qa_fixer",
@@ -124,6 +130,27 @@ async def run_qa_fixer_session(
     prompt += f"\n**IMPORTANT**: All spec files are located in: `{spec_dir}/`\n"
     prompt += f"The fix request file is at: `{spec_dir}/QA_FIX_REQUEST.md`\n"
 
+    # Check for circular fixes (same fix attempted multiple times)
+    fix_request_content = fix_request_file.read_text(encoding="utf-8")
+    if recovery_manager.is_circular_fix(fixer_subtask_id, fix_request_content):
+        attempt_count = recovery_manager.get_attempt_count(fixer_subtask_id)
+        debug_error(
+            "qa_fixer",
+            f"Circular fix detected for {fixer_subtask_id} (attempt #{attempt_count})",
+        )
+        print(f"\n⚠️  WARNING: Circular fix detected!")
+        print(
+            f"This fix has been attempted {attempt_count} times with similar errors."
+        )
+        print("Consider human intervention or a different approach.\n")
+        # Record circular fix outcome
+        recovery_manager.record_outcome(
+            fixer_subtask_id,
+            success=False,
+            error="Circular fix detected - same fix attempted multiple times",
+        )
+        return "error", "Circular fix detected - human intervention recommended"
+
     # Recovery iteration loop - retry if agent gets stuck or fails
     last_error = None
     for fixer_iteration in range(1, MAX_FIXER_ITERATIONS + 1):
@@ -137,6 +164,11 @@ async def run_qa_fixer_session(
                 max_iterations=MAX_FIXER_ITERATIONS,
             )
 
+        # Record this attempt with recovery manager
+        recovery_manager.record_attempt(
+            fixer_subtask_id, approach=f"QA fixer session {fix_session}, iteration {fixer_iteration}"
+        )
+
         try:
             debug("qa_fixer", "Sending query to Claude SDK...")
             await client.query(prompt)
@@ -297,6 +329,8 @@ async def run_qa_fixer_session(
 
             if status and status.get("ready_for_qa_revalidation"):
                 debug_success("qa_fixer", "Fixes applied, ready for QA revalidation")
+                # Record successful outcome with recovery manager
+                recovery_manager.record_outcome(fixer_subtask_id, success=True)
                 # Save successful fix session to memory
                 await save_session_memory(
                     spec_dir=spec_dir,
@@ -311,6 +345,8 @@ async def run_qa_fixer_session(
             else:
                 # Fixer didn't update the status properly, but we'll trust it worked
                 debug_success("qa_fixer", "Fixes assumed applied (status not updated)")
+                # Record successful outcome with recovery manager
+                recovery_manager.record_outcome(fixer_subtask_id, success=True)
                 # Still save to memory as successful (fixes were attempted)
                 await save_session_memory(
                     spec_dir=spec_dir,
@@ -340,6 +376,10 @@ async def run_qa_fixer_session(
                     "qa_fixer",
                     f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, giving up",
                 )
+                # Record failed outcome
+                recovery_manager.record_outcome(
+                    fixer_subtask_id, success=False, error=last_error
+                )
                 return "error", last_error
 
             # Otherwise, continue to next iteration
@@ -354,4 +394,10 @@ async def run_qa_fixer_session(
         "qa_fixer",
         f"Exhausted all {MAX_FIXER_ITERATIONS} fixer iterations without success",
     )
+    # Record failed outcome
+    recovery_manager.record_outcome(
+        fixer_subtask_id,
+        success=False,
+        error=last_error if last_error else "Max fixer iterations reached",
+    )
     return "error", last_error if last_error else "Max fixer iterations reached"

From 9d87b98f38486fb8039c32bd333f9452b841388f Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Fri, 6 Feb 2026 09:44:06 +0400
Subject: [PATCH 03/14] auto-claude: subtask-1-3 - Add validation between fix
 attempts

- Import is_fixes_applied() for robust validation
- Replace manual ready_for_qa_revalidation check with is_fixes_applied()
- Add fixes_applied_status to debug logging
- Follows pattern from apps/backend/qa/criteria.py
---
 apps/backend/qa/fixer.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py
index 76b2d6566..3c41480fb 100644
--- a/apps/backend/qa/fixer.py
+++ b/apps/backend/qa/fixer.py
@@ -23,7 +23,7 @@
     get_task_logger,
 )
 
-from .criteria import get_qa_signoff_status
+from .criteria import get_qa_signoff_status, is_fixes_applied
 
 # Configuration
 QA_PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
@@ -305,8 +305,10 @@ async def run_qa_fixer_session(
 
             print("\n" + "-" * 70 + "\n")
 
-            # Check if fixes were applied
+            # Validate that fixes were properly applied
             status = get_qa_signoff_status(spec_dir)
+            fixes_ready = is_fixes_applied(spec_dir)
+
             debug(
                 "qa_fixer",
                 "Fixer session completed",
@@ -316,6 +318,7 @@ async def run_qa_fixer_session(
                 ready_for_revalidation=status.get("ready_for_qa_revalidation")
                 if status
                 else False,
+                fixes_applied_status=status.get("status") if status else None,
             )
 
             # Save fixer session insights to memory
@@ -327,8 +330,9 @@ async def run_qa_fixer_session(
                 "gotchas_encountered": [],
             }
 
-            if status and status.get("ready_for_qa_revalidation"):
-                debug_success("qa_fixer", "Fixes applied, ready for QA revalidation")
+            # Robust validation: check both status and ready flag
+            if fixes_ready:
+                debug_success("qa_fixer", "Fixes applied and validated, ready for QA revalidation")
                 # Record successful outcome with recovery manager
                 recovery_manager.record_outcome(fixer_subtask_id, success=True)
                 # Save successful fix session to memory
@@ -344,7 +348,7 @@ async def run_qa_fixer_session(
                 return "fixed", response_text
             else:
                 # Fixer didn't update the status properly, but we'll trust it worked
-                debug_success("qa_fixer", "Fixes assumed applied (status not updated)")
+                debug_success("qa_fixer", "Fixes assumed applied (status validation failed)")
                 # Record successful outcome with recovery manager
                 recovery_manager.record_outcome(fixer_subtask_id, success=True)
                 # Still save to memory as successful (fixes were attempted)

From c0cfa440d0854d6b8dd8b6b7c04f706a50e56087 Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Fri, 6 Feb 2026 09:47:54 +0400
Subject: [PATCH 04/14] auto-claude: subtask-1-4 - Add progress tracking and
 user reporting

---
 apps/backend/qa/fixer.py | 109 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 106 insertions(+), 3 deletions(-)

diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py
index 3c41480fb..6d669d307 100644
--- a/apps/backend/qa/fixer.py
+++ b/apps/backend/qa/fixer.py
@@ -9,6 +9,7 @@
 - Saves fix outcomes and learnings after session
 """
 
+import time
 from pathlib import Path
 
 # Memory integration for cross-session learning
@@ -24,6 +25,7 @@
 )
 
 from .criteria import get_qa_signoff_status, is_fixes_applied
+from .report import get_iteration_history, has_recurring_issues, record_iteration
 
 # Configuration
 QA_PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
@@ -105,6 +107,27 @@ async def run_qa_fixer_session(
         debug_error("qa_fixer", "QA_FIX_REQUEST.md not found")
         return "error", "QA_FIX_REQUEST.md not found"
 
+    # Check for recurring issues from QA history
+    iteration_history = get_iteration_history(spec_dir)
+    if iteration_history:
+        # Extract current issues from QA report
+        qa_report_file = spec_dir / "qa_report.md"
+        current_issues = []
+        if qa_report_file.exists():
+            # Parse issues from QA report (simplified - just check if we have history)
+            # The has_recurring_issues function will do the actual similarity matching
+            has_recurring, recurring_issues = has_recurring_issues(
+                current_issues, iteration_history
+            )
+            if has_recurring:
+                print(f"\n⚠️  WARNING: Recurring issues detected!")
+                print(f"  {len(recurring_issues)} issue(s) have appeared multiple times.")
+                print(f"  Consider a different approach or human intervention.\n")
+                debug_error(
+                    "qa_fixer",
+                    f"Recurring issues detected: {len(recurring_issues)} issues",
+                )
+
     # Load fixer prompt
     prompt = load_qa_fixer_prompt()
     debug_detailed("qa_fixer", "Loaded QA fixer prompt", prompt_length=len(prompt))
@@ -151,9 +174,15 @@ async def run_qa_fixer_session(
         )
         return "error", "Circular fix detected - human intervention recommended"
 
+    # Get total iterations from history
+    total_iterations = len(iteration_history)
+
     # Recovery iteration loop - retry if agent gets stuck or fails
     last_error = None
     for fixer_iteration in range(1, MAX_FIXER_ITERATIONS + 1):
+        # Track iteration start time for duration reporting
+        iteration_start_time = time.time()
+
         if fixer_iteration > 1:
             print(f"\n{'=' * 70}")
             print(f"  QA FIXER RECOVERY ATTEMPT {fixer_iteration}/{MAX_FIXER_ITERATIONS}")
@@ -163,6 +192,11 @@ async def run_qa_fixer_session(
                 f"Starting recovery attempt {fixer_iteration}",
                 max_iterations=MAX_FIXER_ITERATIONS,
             )
+        else:
+            # First iteration - show overall progress
+            if total_iterations > 0:
+                print(f"  Previous QA iterations: {total_iterations}")
+                print(f"  This is fixer session #{fix_session}\n")
 
         # Record this attempt with recovery manager
         recovery_manager.record_attempt(
@@ -332,7 +366,23 @@ async def run_qa_fixer_session(
 
             # Robust validation: check both status and ready flag
             if fixes_ready:
+                # Calculate iteration duration
+                iteration_duration = time.time() - iteration_start_time
+
                 debug_success("qa_fixer", "Fixes applied and validated, ready for QA revalidation")
+                print(f"\n✓ Fixes applied successfully!")
+                print(f"  Duration: {iteration_duration:.1f}s")
+                print(f"  Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n")
+
+                # Record successful iteration to history
+                record_iteration(
+                    spec_dir=spec_dir,
+                    iteration=total_iterations + 1,
+                    status="fixed",
+                    issues=[],  # Fixed, so no issues
+                    duration_seconds=iteration_duration,
+                )
+
                 # Record successful outcome with recovery manager
                 recovery_manager.record_outcome(fixer_subtask_id, success=True)
                 # Save successful fix session to memory
@@ -348,7 +398,22 @@ async def run_qa_fixer_session(
                 return "fixed", response_text
             else:
                 # Fixer didn't update the status properly, but we'll trust it worked
+                iteration_duration = time.time() - iteration_start_time
+
                 debug_success("qa_fixer", "Fixes assumed applied (status validation failed)")
+                print(f"\n✓ Fixes applied (status validation skipped)")
+                print(f"  Duration: {iteration_duration:.1f}s")
+                print(f"  Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n")
+
+                # Record iteration to history
+                record_iteration(
+                    spec_dir=spec_dir,
+                    iteration=total_iterations + 1,
+                    status="fixed",
+                    issues=[],
+                    duration_seconds=iteration_duration,
+                )
+
                 # Record successful outcome with recovery manager
                 recovery_manager.record_outcome(fixer_subtask_id, success=True)
                 # Still save to memory as successful (fixes were attempted)
@@ -365,21 +430,40 @@ async def run_qa_fixer_session(
 
         except Exception as e:
             last_error = str(e)
+            iteration_duration = time.time() - iteration_start_time
+
             debug_error(
                 "qa_fixer",
                 f"Fixer session exception (attempt {fixer_iteration}/{MAX_FIXER_ITERATIONS}): {e}",
                 exception_type=type(e).__name__,
             )
-            print(f"Error during fixer session: {e}")
+            print(f"\n✗ Error during fixer session: {e}")
+            print(f"  Duration: {iteration_duration:.1f}s\n")
             if task_logger:
                 task_logger.log_error(f"QA fixer error: {e}", LogPhase.VALIDATION)
 
+            # Record failed iteration
+            error_issue = {
+                "type": "fixer_error",
+                "title": f"Fixer iteration {fixer_iteration} failed",
+                "description": str(e),
+                "severity": "high",
+            }
+            record_iteration(
+                spec_dir=spec_dir,
+                iteration=total_iterations + 1,
+                status="error",
+                issues=[error_issue],
+                duration_seconds=iteration_duration,
+            )
+
             # If this is the last iteration, return error
             if fixer_iteration == MAX_FIXER_ITERATIONS:
                 debug_error(
                     "qa_fixer",
                     f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, giving up",
                 )
+                print(f"⚠️  Max recovery attempts ({MAX_FIXER_ITERATIONS}) reached. Giving up.\n")
                 # Record failed outcome
                 recovery_manager.record_outcome(
                     fixer_subtask_id, success=False, error=last_error
@@ -391,6 +475,7 @@ async def run_qa_fixer_session(
                 "qa_fixer",
                 f"Will retry (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})",
             )
+            print(f"  Retrying... (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})\n")
             continue
 
     # If we exhausted all iterations without success
@@ -398,10 +483,28 @@ async def run_qa_fixer_session(
         "qa_fixer",
         f"Exhausted all {MAX_FIXER_ITERATIONS} fixer iterations without success",
     )
+    print(f"\n⚠️  Exhausted all {MAX_FIXER_ITERATIONS} recovery attempts without success.\n")
+
+    # Record final failure
+    final_error = last_error if last_error else "Max fixer iterations reached"
+    final_issue = {
+        "type": "max_iterations",
+        "title": "Max fixer iterations exhausted",
+        "description": final_error,
+        "severity": "critical",
+    }
+    record_iteration(
+        spec_dir=spec_dir,
+        iteration=total_iterations + 1,
+        status="error",
+        issues=[final_issue],
+        duration_seconds=None,
+    )
+
     # Record failed outcome
     recovery_manager.record_outcome(
         fixer_subtask_id,
         success=False,
-        error=last_error if last_error else "Max fixer iterations reached",
+        error=final_error,
     )
-    return "error", last_error if last_error else "Max fixer iterations reached"
+    return "error", final_error

From 9c61efcabe38914f1c9b12c30ddc9496c6aa68e1 Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Fri, 6 Feb 2026 12:23:13 +0400
Subject: [PATCH 05/14] auto-claude: subtask-1-5 - Update QA Fixer prompt with
 recovery context

Added intelligent auto-recovery context to qa_fixer.md:

- New RECOVERY AWARENESS section explaining the recovery system
- Enhanced PHASE 0 to check qa_fix_history.json for previous attempts
- New PHASE 2.5 to record fix approach before implementation
- New PHASE 5.5 to record QA fix attempts (success/failure)
- Enhanced commit section to capture commit hash for tracking
- Expanded QA LOOP BEHAVIOR with escalation criteria
- Added Python code for tracking sessions and detecting circular fixes

The QA Fixer now tracks:
- Session numbers and iteration count
- Issues addressed in each session
- Fix approaches to detect circular fixes
- Success/failure status of each attempt
- Escalation triggers (5+ failed sessions, repeated issues)

This mirrors the recovery pattern from coder_recovery.md, adapted
for the QA fix workflow where fixes are validated by QA reviewer.
---
 apps/backend/prompts/qa_fixer.md | 309 ++++++++++++++++++++++++++++++-
 1 file changed, 306 insertions(+), 3 deletions(-)

diff --git a/apps/backend/prompts/qa_fixer.md b/apps/backend/prompts/qa_fixer.md
index fe5c01802..3bd95bdd4 100644
--- a/apps/backend/prompts/qa_fixer.md
+++ b/apps/backend/prompts/qa_fixer.md
@@ -20,6 +20,55 @@ You must fix these issues so QA can approve.
 
 ---
 
+## RECOVERY AWARENESS
+
+### What This Means for QA Fixer
+
+You are part of an **intelligent auto-recovery system**. Your fix attempts are tracked across sessions to:
+
+1. **Detect Circular Fixes** - If you try the same fix approach multiple times, the system will flag it
+2. **Track Escalation** - Multiple failed fix sessions trigger human escalation
+3. **Learn from Attempts** - Each session records what was tried and whether it worked
+
+### The QA Fix Recovery Loop
+
+```
+1. Load context (check previous QA fix sessions)
+2. Parse fix requirements from QA_FIX_REQUEST.md
+3. Record your fix approach (what you plan to do)
+4. Implement fixes
+5. Self-verify each fix
+6. Record the attempt (success or failure)
+7. Commit fixes
+8. QA re-validates
+9. If issues remain → NEW SESSION (go back to step 1 with recovery context)
+10. After 5 failed sessions → Escalate to human
+```
+
+### Key Recovery Behaviors
+
+**On Each New Session:**
+- Check `memory/qa_fix_history.json` for previous attempts
+- If previous sessions failed, review what was tried
+- **Choose a different approach** if the same issues persist
+
+**When Recording Approach:**
+- Document your overall fix strategy
+- Explain what types of fixes you're applying
+- This helps detect if you're repeating the same approach
+
+**When Fixes Fail QA Validation:**
+- The failure is recorded in the history
+- Next session will see this context
+- You MUST try a different strategy
+
+**Escalation Triggers:**
+- 5+ consecutive failed fix sessions
+- Same issue appearing across multiple sessions
+- Unable to verify fixes locally (environment issues)
+
+---
+
 ## PHASE 0: LOAD CONTEXT (MANDATORY)
 
 ```bash
@@ -38,6 +87,24 @@ cat implementation_plan.json
 # 5. Check current state
 git status
 git log --oneline -5
+
+# 6. CHECK QA FIX ATTEMPT HISTORY (Recovery Context)
+echo -e "\n=== QA FIX RECOVERY CONTEXT ==="
+if [ -f memory/qa_fix_history.json ]; then
+  echo "Previous QA Fix Attempts:"
+  cat memory/qa_fix_history.json | jq '.sessions[] | {session: .session, timestamp: .timestamp, issues_count: .issues.length, success: .success}'
+
+  # Show current iteration count
+  iteration_count=$(cat memory/qa_fix_history.json | jq '.sessions | length' 2>/dev/null || echo 0)
+  echo -e "\nCurrent QA Fix Session: #$((iteration_count + 1))"
+
+  if [ "$iteration_count" -ge 3 ]; then
+    echo -e "\n⚠️  WARNING: Multiple QA fix iterations detected. Previous fixes may not be addressing root causes!"
+  fi
+else
+  echo "No previous QA fix attempts - this is the first fix session"
+fi
+echo "=== END RECOVERY CONTEXT ==="
 ```
 
 **CRITICAL**: The `QA_FIX_REQUEST.md` file contains:
@@ -46,6 +113,12 @@ git log --oneline -5
 - Required fixes
 - Verification criteria
 
+**RECOVERY AWARENESS**: If you see previous QA fix sessions in the history:
+- Previous fix attempts FAILED QA validation
+- Review what was tried before
+- Consider if previous fixes were incomplete or used wrong approaches
+- Multiple iterations (>3) suggest systemic issues
+
 ---
 
 ## PHASE 1: PARSE FIX REQUIREMENTS
@@ -142,6 +215,62 @@ git add [verified-path]
 
 ---
 
+## PHASE 2.5: RECORD YOUR FIX APPROACH (Recovery Tracking)
+
+**IMPORTANT: Before you implement any fixes, document your overall approach.**
+
+```python
+# Record your QA fix approach for recovery tracking
+import json
+from pathlib import Path
+from datetime import datetime
+
+# Read the current session number from QA fix history
+history_file = Path("memory/qa_fix_history.json")
+if history_file.exists():
+    with open(history_file) as f:
+        history = json.load(f)
+    session_num = len(history.get("sessions", [])) + 1
+else:
+    session_num = 1
+
+# Read issues from QA_FIX_REQUEST.md
+with open("QA_FIX_REQUEST.md") as f:
+    qa_request = f.read()
+
+approach_description = """
+Describe your fix approach in 2-3 sentences:
+- What types of issues are you addressing?
+- What's your overall fix strategy?
+- Any specific patterns or considerations?
+
+Example: "Fixing 3 test failures by updating mock data in test fixtures.
+Issues are related to date comparison logic - will align test expectations
+with actual implementation behavior. Following existing test patterns from
+similar test files."
+"""
+
+# This will be used to detect repeated fix approaches
+approach_file = Path("memory/qa_fix_approach.txt")
+approach_file.parent.mkdir(parents=True, exist_ok=True)
+
+with open(approach_file, "a") as f:
+    f.write(f"\n--- QA Fix Session {session_num} at {datetime.now().isoformat()} ---\n")
+    f.write(f"Issues to fix: {len(qa_request.split('##'))}\n")
+    f.write(approach_description.strip())
+    f.write("\n")
+
+print(f"QA fix approach recorded for session {session_num}")
+```
+
+**Why this matters:**
+- If your fixes fail QA validation again, the recovery system will read this
+- It helps detect if you're trying the same fix approach repeatedly (circular fixes)
+- It creates a record of what was attempted for human review
+- Essential for detecting when to escalate (multiple failed approaches)
+
+---
+
 ## PHASE 3: FIX ISSUES ONE BY ONE
 
 For each issue in the fix request:
@@ -226,6 +355,86 @@ If any issue is not fixed, go back to Phase 3.
 
 ---
 
+## PHASE 5.5: RECORD QA FIX ATTEMPT (Before Commit)
+
+**Before committing, record this fix attempt in the QA fix history.**
+
+```python
+# Record QA fix attempt for recovery tracking
+import json
+from pathlib import Path
+from datetime import datetime
+
+history_file = Path("memory/qa_fix_history.json")
+
+# Load or create history
+if history_file.exists():
+    with open(history_file) as f:
+        history = json.load(f)
+else:
+    history = {"sessions": [], "metadata": {}}
+
+# Get session number
+session_num = len(history.get("sessions", [])) + 1
+
+# Read issues from QA_FIX_REQUEST.md
+with open("QA_FIX_REQUEST.md") as f:
+    qa_request_content = f.read()
+
+# Parse the issues (simplified - adjust based on actual format)
+import re
+issue_matches = re.findall(r'##\s+(.+?)(?=\n##|\Z)', qa_request_content, re.DOTALL)
+issues = [match.strip() for match in issue_matches if match.strip()]
+
+# Record this session
+session_data = {
+    "session": session_num,
+    "timestamp": datetime.now().isoformat(),
+    "issues": issues,
+    "issues_count": len(issues),
+    "success": True,  # Optimistic - will update if verification fails
+    "verified_locally": True,
+    "commit_hash": None,  # Will add after commit
+    "qa_revalidation_result": None  # Will be updated by QA reviewer
+}
+
+history["sessions"].append(session_data)
+history["metadata"]["last_updated"] = datetime.now().isoformat()
+
+# Save
+with open(history_file, "w") as f:
+    json.dump(history, f, indent=2)
+
+print(f"✓ QA fix session {session_num} recorded ({len(issues)} issues)")
+```
+
+**If Self-Verification Failed:**
+
+```python
+# Update the session to mark as failed
+history_file = Path("memory/qa_fix_history.json")
+with open(history_file) as f:
+    history = json.load(f)
+
+# Mark the last session as having failed verification
+history["sessions"][-1]["success"] = False
+history["sessions"][-1]["verified_locally"] = False
+history["sessions"][-1]["failure_reason"] = "Self-verification failed - issues not properly fixed"
+
+with open(history_file, "w") as f:
+    json.dump(history, f, indent=2)
+
+print(f"⚠️  QA fix session {session_num} marked as failed")
+
+# Check if we should escalate
+failed_sessions = [s for s in history["sessions"] if not s.get("success", True)]
+if len(failed_sessions) >= 3:
+    print(f"\n⚠️  CRITICAL: {len(failed_sessions)} consecutive failed QA fix sessions.")
+    print("Consider escalating to human - fixes may not be addressing root causes.")
+```
+
+---
+
 ## PHASE 6: COMMIT FIXES
 
 ### Path Verification (MANDATORY FIRST STEP)
@@ -279,6 +488,30 @@ Verified:
 - Issues verified locally
 
 QA Fix Session: [N]"
+
+# Capture commit hash for recovery tracking
+COMMIT_HASH=$(git rev-parse HEAD)
+echo "Commit hash: $COMMIT_HASH"
+```
+
+**Update QA Fix History with Commit Hash:**
+
+```python
+# Update the session with the commit hash
+import json
+from pathlib import Path
+
+history_file = Path("memory/qa_fix_history.json")
+with open(history_file) as f:
+    history = json.load(f)
+
+# Update the last session with commit hash
+history["sessions"][-1]["commit_hash"] = "$COMMIT_HASH"  # Replace with actual hash from bash
+
+with open(history_file, "w") as f:
+    json.dump(history, f, indent=2)
+
+print("✓ Commit hash recorded in QA fix history")
 ```
 
 **CRITICAL**: The `:!.auto-claude` pathspec exclusion ensures spec files are NEVER committed.
@@ -416,14 +649,84 @@ The repository inherits the user's configured git identity. Do NOT set test user
 
 ## QA LOOP BEHAVIOR
 
+### The QA Fix Loop
+
 After you complete fixes:
 1. QA Agent re-runs validation
-2. If more issues → You fix again
+2. If more issues → You fix again (new session)
 3. If approved → Done!
 
-Maximum iterations: 5
+### Recovery Tracking
+
+Each QA fix session is tracked in `memory/qa_fix_history.json`:
+- Session number
+- Issues addressed
+- Fix approach
+- Success/failure status
+- Commit hash
+
+### Escalation Criteria
+
+**Escalate to human when:**
+- **5 consecutive failed QA fix sessions** - Different approaches needed
+- **Same issue appears 3+ times across sessions** - Systemic problem
+- **Circular fix detected** - Same approach tried multiple times
+- **Unable to verify fixes locally** - Environment or test issues
+
+```python
+# Check escalation criteria
+import json
+from pathlib import Path
+
+history_file = Path("memory/qa_fix_history.json")
+if history_file.exists():
+    with open(history_file) as f:
+        history = json.load(f)
+
+    # Check for repeated failures
+    recent_sessions = history["sessions"][-5:]  # Last 5 sessions
+    failed_count = sum(1 for s in recent_sessions if not s.get("success", True))
+
+    if failed_count >= 5:
+        print("🚨 ESCALATION REQUIRED: 5+ consecutive failed QA fix sessions")
+        print("Human intervention needed - current approach is not working")
+
+    # Check for circular fixes (same issue appearing repeatedly)
+    all_issues = []
+    for session in history["sessions"]:
+        all_issues.extend(session.get("issues", []))
+
+    from collections import Counter
+    issue_counts = Counter(all_issues)
+    repeated_issues = [issue for issue, count in issue_counts.items() if count >= 3]
+
+    if repeated_issues:
+        print(f"⚠️  Repeated issues detected: {repeated_issues}")
+        print("These issues keep coming back - may need different approach")
+```
+
+### When QA Revalidation Fails
+
+If the QA reviewer finds issues after your fixes:
+1. A new `QA_FIX_REQUEST.md` will be created
+2. You will run again as a new session
+3. Review the previous session's approach in `memory/qa_fix_history.json`
+4. **TRY A DIFFERENT APPROACH** if the same issue persists
+
+**CRITICAL**: If you see the same issue in multiple fix sessions:
+- The previous fix approach didn't work
+- You need to understand WHY it didn't work
+- Choose a fundamentally different strategy
+- Don't just apply the same fix again
+
+### Maximum Iterations
+
+**Maximum QA fix iterations: 5**
 
-After iteration 5, escalate to human.
+After iteration 5:
+1. Mark status as "blocked" in implementation_plan.json
+2. Escalate to human with full context
+3. Include all attempted approaches in escalation message
 
 ---
 

From 487b51c1d3b286eb889ff1a54e439c3fd5b925d5 Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Fri, 6 Feb 2026 12:32:12 +0400
Subject: [PATCH 06/14] auto-claude: subtask-1-6 - Update QA loop to respect
 fixer iteration results

- Add handling for new fixer statuses: 'circular' and 'stuck'
- When status='circular': Record dead-end and continue QA loop
- When status='stuck': Escalate to human with proper phase cleanup
- Update fixer.py to return 'circular' for circular fix detection
- Update fixer.py to return 'stuck' when max iterations exhausted
- Add user-facing messages for recovery status
- Maintain existing error handling for 'error' status

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 apps/backend/qa/fixer.py | 12 +++++-----
 apps/backend/qa/loop.py  | 47 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py
index 6d669d307..081799ceb 100644
--- a/apps/backend/qa/fixer.py
+++ b/apps/backend/qa/fixer.py
@@ -172,7 +172,7 @@ async def run_qa_fixer_session(
             success=False,
             error="Circular fix detected - same fix attempted multiple times",
         )
-        return "error", "Circular fix detected - human intervention recommended"
+        return "circular", "Circular fix detected - same approach attempted multiple times"
 
     # Get total iterations from history
     total_iterations = len(iteration_history)
@@ -457,18 +457,18 @@ async def run_qa_fixer_session(
                 duration_seconds=iteration_duration,
             )
 
-            # If this is the last iteration, return error
+            # If this is the last iteration, return stuck status
             if fixer_iteration == MAX_FIXER_ITERATIONS:
                 debug_error(
                     "qa_fixer",
-                    f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, giving up",
+                    f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, fixer is stuck",
                 )
-                print(f"⚠️  Max recovery attempts ({MAX_FIXER_ITERATIONS}) reached. Giving up.\n")
+                print(f"⚠️  Max recovery attempts ({MAX_FIXER_ITERATIONS}) reached. Fixer stuck.\n")
                 # Record failed outcome
                 recovery_manager.record_outcome(
                     fixer_subtask_id, success=False, error=last_error
                 )
-                return "error", last_error
+                return "stuck", f"Fixer stuck after {MAX_FIXER_ITERATIONS} recovery attempts: {last_error}"
 
             # Otherwise, continue to next iteration
             debug(
@@ -507,4 +507,4 @@ async def run_qa_fixer_session(
         success=False,
         error=final_error,
     )
-    return "error", final_error
+    return "stuck", f"Fixer stuck after exhausting all recovery attempts: {final_error}"
diff --git a/apps/backend/qa/loop.py b/apps/backend/qa/loop.py
index c64ade974..95bf04cf4 100644
--- a/apps/backend/qa/loop.py
+++ b/apps/backend/qa/loop.py
@@ -831,6 +831,53 @@ async def run_qa_validation_loop(
                     [{"title": "Fixer error", "description": fix_response}],
                 )
                 break
+            elif fix_status == "circular":
+                # Circular fix detected - record dead-end and continue QA loop
+                debug_warning(
+                    "qa_loop",
+                    "Circular fix detected - recording dead-end and continuing",
+                )
+                print("\n⚠️  Circular Fix Detected")
+                print("   The fixer attempted the same approach multiple times.")
+                print("   Recording dead-end and continuing QA validation loop...")
+                record_iteration(
+                    spec_dir,
+                    qa_iteration,
+                    "circular",
+                    [{"title": "Circular fix detected", "description": fix_response}],
+                )
+                # Continue to next QA iteration to see if a different approach is needed
+                continue
+            elif fix_status == "stuck":
+                # Fixer is stuck - escalate to human
+                debug_error(
+                    "qa_loop",
+                    "Fixer stuck after multiple recovery attempts - escalating to human",
+                )
+                print("\n⚠️  QA Fixer Stuck")
+                print("   The fixer attempted multiple recovery approaches but could not resolve the issues.")
+                print("   Escalating to human review...")
+                record_iteration(
+                    spec_dir,
+                    qa_iteration,
+                    "stuck",
+                    [{"title": "Fixer stuck", "description": fix_response}],
+                )
+
+                # End validation phase as failed
+                if task_logger:
+                    task_logger.end_phase(
+                        LogPhase.VALIDATION,
+                        success=False,
+                        message="QA fixer stuck after multiple recovery attempts - human intervention required",
+                    )
+
+                # Update Linear if enabled
+                if linear_task and linear_task.task_id:
+                    await linear_qa_max_iterations(spec_dir, qa_iteration)
+                    print("\nLinear: Task marked as needing human intervention")
+
+                return False
 
             debug_success("qa_loop", "Fixes applied, re-running QA validation")
             print("\n✅ Fixes applied. Re-running QA validation...")

From aea49d480f053e17624ef95f294450d9ee99675b Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Fri, 6 Feb 2026 12:37:02 +0400
Subject: [PATCH 07/14] auto-claude: subtask-1-7 - Add success rate monitoring
 for auto-recovery

Created RecoveryMetrics class to track auto-recovery success rates:
- Tracks total_attempts, successful_recoveries, failed_recoveries, circular_fixes
- Records recovery history with timestamps, iterations, duration, strategy
- Calculates success rate, circular fix rate, average iterations/duration
- Stores metrics in spec_dir/recovery_metrics.json
- Provides formatted summary and recent history methods
- Includes convenience functions for quick access

Follows patterns from qa/report.py for consistency.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 apps/backend/qa/recovery_metrics.py | 385 ++++++++++++++++++++++++++++
 1 file changed, 385 insertions(+)
 create mode 100644 apps/backend/qa/recovery_metrics.py

diff --git a/apps/backend/qa/recovery_metrics.py b/apps/backend/qa/recovery_metrics.py
new file mode 100644
index 000000000..1507767ec
--- /dev/null
+++ b/apps/backend/qa/recovery_metrics.py
@@ -0,0 +1,385 @@
+"""
+Recovery Metrics Tracking for Auto-Recovery
+============================================
+
+Tracks success rates, recovery attempts, and outcomes for the
+intelligent auto-recovery loop in QA Fixer.
+"""
+
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+# Configuration
+METRICS_FILE = "recovery_metrics.json"
+
+
+# =============================================================================
+# RECOVERY METRICS CLASS
+# =============================================================================
+
+
+class RecoveryMetrics:
+    """
+    Tracks and manages auto-recovery metrics.
+
+    Stores metrics in spec_dir/recovery_metrics.json and provides
+    methods for recording attempts and calculating success rates.
+    """
+
+    def __init__(self, spec_dir: Path | None = None):
+        """
+        Initialize recovery metrics tracker.
+
+        Args:
+            spec_dir: Spec directory (uses current directory if None)
+        """
+        self.spec_dir = spec_dir or Path.cwd()
+        self._metrics_file = self.spec_dir / METRICS_FILE
+        self._metrics = self._load_metrics()
+
+    def _load_metrics(self) -> dict[str, Any]:
+        """
+        Load metrics from recovery_metrics.json.
+
+        Returns:
+            Metrics dict, initializes empty structure if file doesn't exist
+        """
+        if not self._metrics_file.exists():
+            return self._create_empty_metrics()
+
+        try:
+            with open(self._metrics_file, encoding="utf-8") as f:
+                data = json.load(f)
+                # Validate structure
+                required_keys = [
+                    "total_attempts",
+                    "successful_recoveries",
+                    "failed_recoveries",
+                    "circular_fixes",
+                    "recovery_history",
+                ]
+                if all(key in data for key in required_keys):
+                    return data
+                # If invalid, create new
+                return self._create_empty_metrics()
+        except (OSError, json.JSONDecodeError, UnicodeDecodeError):
+            return self._create_empty_metrics()
+
+    def _create_empty_metrics(self) -> dict[str, Any]:
+        """
+        Create empty metrics structure.
+
+        Returns:
+            Dict with initialized metric counters
+        """
+        return {
+            "total_attempts": 0,
+            "successful_recoveries": 0,
+            "failed_recoveries": 0,
+            "circular_fixes": 0,
+            "recovery_history": [],
+            "created_at": datetime.now(timezone.utc).isoformat(),
+            "last_updated": datetime.now(timezone.utc).isoformat(),
+        }
+
+    def _save_metrics(self) -> bool:
+        """
+        Save metrics to recovery_metrics.json.
+
+        Returns:
+            True if saved successfully
+        """
+        try:
+            self._metrics["last_updated"] = datetime.now(timezone.utc).isoformat()
+            with open(self._metrics_file, "w", encoding="utf-8") as f:
+                json.dump(self._metrics, f, indent=2, ensure_ascii=False)
+            return True
+        except (OSError, TypeError, UnicodeDecodeError):
+            return False
+
+    # -------------------------------------------------------------------------
+    # RECORDING METHODS
+    # -------------------------------------------------------------------------
+
+    def record_attempt(
+        self,
+        outcome: str,
+        iterations: int,
+        duration_seconds: float | None = None,
+        issues_fixed: int = 0,
+        strategy: str | None = None,
+    ) -> bool:
+        """
+        Record a recovery attempt outcome.
+
+        Args:
+            outcome: Recovery outcome ("success", "failed", "circular")
+            iterations: Number of iterations used
+            duration_seconds: Optional duration of the recovery attempt
+            issues_fixed: Number of issues fixed
+            strategy: Strategy used for recovery (optional)
+
+        Returns:
+            True if recorded successfully
+        """
+        # Update counters
+        self._metrics["total_attempts"] += 1
+
+        if outcome == "success":
+            self._metrics["successful_recoveries"] += 1
+        elif outcome == "failed":
+            self._metrics["failed_recoveries"] += 1
+        elif outcome == "circular":
+            self._metrics["circular_fixes"] += 1
+
+        # Record history
+        record = {
+            "attempt_number": self._metrics["total_attempts"],
+            "outcome": outcome,
+            "iterations": iterations,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "issues_fixed": issues_fixed,
+        }
+
+        if duration_seconds is not None:
+            record["duration_seconds"] = round(duration_seconds, 2)
+
+        if strategy:
+            record["strategy"] = strategy
+
+        self._metrics["recovery_history"].append(record)
+
+        return self._save_metrics()
+
+    def record_user_intervention(self, iteration: int) -> bool:
+        """
+        Record a user intervention during recovery.
+
+        Args:
+            iteration: Iteration number when intervention occurred
+
+        Returns:
+            True if recorded successfully
+        """
+        record = {
+            "attempt_number": self._metrics["total_attempts"] + 1,
+            "outcome": "user_intervention",
+            "iterations": iteration,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "issues_fixed": 0,
+        }
+
+        self._metrics["recovery_history"].append(record)
+        self._metrics["total_attempts"] += 1
+
+        return self._save_metrics()
+
+    # -------------------------------------------------------------------------
+    # STATISTICS METHODS
+    # -------------------------------------------------------------------------
+
+    def get_success_rate(self) -> float:
+        """
+        Calculate overall recovery success rate.
+
+        Returns:
+            Success rate as percentage (0-100)
+        """
+        if self._metrics["total_attempts"] == 0:
+            return 0.0
+
+        successful = self._metrics["successful_recoveries"]
+        total = self._metrics["total_attempts"]
+        return round((successful / total) * 100, 2)
+
+    def get_average_iterations(self) -> float:
+        """
+        Calculate average iterations per recovery attempt.
+
+        Returns:
+            Average iterations, or 0 if no attempts
+        """
+        if not self._metrics["recovery_history"]:
+            return 0.0
+
+        total_iterations = sum(
+            record.get("iterations", 0) for record in self._metrics["recovery_history"]
+        )
+        return round(total_iterations / len(self._metrics["recovery_history"]), 2)
+
+    def get_average_duration(self) -> float:
+        """
+        Calculate average duration of recovery attempts.
+
+        Returns:
+            Average duration in seconds, or 0 if no duration data
+        """
+        durations = [
+            record.get("duration_seconds", 0)
+            for record in self._metrics["recovery_history"]
+            if "duration_seconds" in record
+        ]
+
+        if not durations:
+            return 0.0
+
+        return round(sum(durations) / len(durations), 2)
+
+    def get_circular_fix_rate(self) -> float:
+        """
+        Calculate rate of circular fix detection.
+
+        Returns:
+            Circular fix rate as percentage (0-100)
+        """
+        if self._metrics["total_attempts"] == 0:
+            return 0.0
+
+        circular = self._metrics["circular_fixes"]
+        total = self._metrics["total_attempts"]
+        return round((circular / total) * 100, 2)
+
+    # -------------------------------------------------------------------------
+    # SUMMARY METHODS
+    # -------------------------------------------------------------------------
+
+    def get_summary(self) -> dict[str, Any]:
+        """
+        Get comprehensive recovery metrics summary.
+
+        Returns:
+            Dict with all metrics and calculated statistics
+        """
+        return {
+            "total_attempts": self._metrics["total_attempts"],
+            "successful_recoveries": self._metrics["successful_recoveries"],
+            "failed_recoveries": self._metrics["failed_recoveries"],
+            "circular_fixes": self._metrics["circular_fixes"],
+            "success_rate_percent": self.get_success_rate(),
+            "circular_fix_rate_percent": self.get_circular_fix_rate(),
+            "average_iterations": self.get_average_iterations(),
+            "average_duration_seconds": self.get_average_duration(),
+            "last_updated": self._metrics.get("last_updated"),
+        }
+
+    def get_recent_history(self, limit: int = 5) -> list[dict[str, Any]]:
+        """
+        Get recent recovery attempts.
+
+        Args:
+            limit: Maximum number of recent attempts to return
+
+        Returns:
+            List of recent recovery records (most recent first)
+        """
+        return self._metrics["recovery_history"][-limit:][::-1]
+
+    def format_summary(self) -> str:
+        """
+        Format recovery metrics summary as human-readable string.
+
+        Returns:
+            Formatted summary string
+        """
+        summary = self.get_summary()
+
+        lines = [
+            "📊 Recovery Metrics Summary",
+            "",
+            f"Total Attempts: {summary['total_attempts']}",
+            f"Successful Recoveries: {summary['successful_recoveries']}",
+            f"Failed Recoveries: {summary['failed_recoveries']}",
+            f"Circular Fixes Detected: {summary['circular_fixes']}",
+            "",
+            f"Success Rate: {summary['success_rate_percent']:.1f}%",
+            f"Circular Fix Rate: {summary['circular_fix_rate_percent']:.1f}%",
+            f"Avg Iterations per Recovery: {summary['average_iterations']:.1f}",
+        ]
+
+        if summary["average_duration_seconds"] > 0:
+            lines.append(
+                f"Avg Duration: {summary['average_duration_seconds']:.1f} seconds"
+            )
+
+        return "\n".join(lines)
+
+    # -------------------------------------------------------------------------
+    # UTILITY METHODS
+    # -------------------------------------------------------------------------
+
+    def reset_metrics(self) -> bool:
+        """
+        Reset all metrics to zero.
+
+        Returns:
+            True if reset successfully
+        """
+        self._metrics = self._create_empty_metrics()
+        return self._save_metrics()
+
+    def get_metrics_file_path(self) -> Path:
+        """
+        Get the path to the metrics file.
+
+        Returns:
+            Path to recovery_metrics.json
+        """
+        return self._metrics_file
+
+
+# =============================================================================
+# CONVENIENCE FUNCTIONS
+# =============================================================================
+
+
+def load_recovery_metrics(spec_dir: Path) -> RecoveryMetrics:
+    """
+    Load recovery metrics for a spec directory.
+
+    Args:
+        spec_dir: Spec directory path
+
+    Returns:
+        RecoveryMetrics instance
+    """
+    return RecoveryMetrics(spec_dir)
+
+
+def record_recovery_outcome(
+    spec_dir: Path,
+    outcome: str,
+    iterations: int,
+    duration_seconds: float | None = None,
+    issues_fixed: int = 0,
+) -> bool:
+    """
+    Convenience function to record a recovery outcome.
+
+    Args:
+        spec_dir: Spec directory
+        outcome: Recovery outcome ("success", "failed", "circular")
+        iterations: Number of iterations used
+        duration_seconds: Optional duration
+        issues_fixed: Number of issues fixed
+
+    Returns:
+        True if recorded successfully
+    """
+    metrics = RecoveryMetrics(spec_dir)
+    return metrics.record_attempt(outcome, iterations, duration_seconds, issues_fixed)
+
+
+def get_recovery_summary(spec_dir: Path) -> dict[str, Any]:
+    """
+    Convenience function to get recovery summary.
+
+    Args:
+        spec_dir: Spec directory
+
+    Returns:
+        Recovery metrics summary dict
+    """
+    metrics = RecoveryMetrics(spec_dir)
+    return metrics.get_summary()

From 6ef0a5b64df56b0c6a9dcfbb2be1c65901703e63 Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Fri, 6 Feb 2026 12:46:30 +0400
Subject: [PATCH 08/14] auto-claude: subtask-1-8 - Verify user intervention
 capability with auto-recovery

Changes:
- Add AUTO_GENERATED_BY_QA_AGENT marker to QA_FIX_REQUEST.md template
- Add user intervention documentation to qa_reviewer.md
- Update qa_fixer.md to detect and handle user-edited files
- Test verify check_user_correction() function works correctly

The user intervention capability allows users to:
- Manually edit QA_FIX_REQUEST.md to provide guidance
- Remove the marker to indicate manual intervention
- Override automated QA decisions with their own corrections

This integrates seamlessly with the auto-recovery loop - when a user
correction is detected, the fixer prioritizes user guidance over
automated issue descriptions.
---
 apps/backend/prompts/qa_fixer.md    |  9 +++++++++
 apps/backend/prompts/qa_reviewer.md | 17 +++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/apps/backend/prompts/qa_fixer.md b/apps/backend/prompts/qa_fixer.md
index 3bd95bdd4..7a127be56 100644
--- a/apps/backend/prompts/qa_fixer.md
+++ b/apps/backend/prompts/qa_fixer.md
@@ -113,6 +113,15 @@ echo "=== END RECOVERY CONTEXT ==="
 - Required fixes
 - Verification criteria
 
+**USER INTERVENTION DETECTION**:
+- Check if `QA_FIX_REQUEST.md` contains the marker `<!-- AUTO_GENERATED_BY_QA_AGENT -->`
+- If the marker is **MISSING**, the user has manually edited this file
+- Treat user-edited files with special attention:
+  - The user may have corrected misidentified issues
+  - The user may have added context or specific guidance
+  - The user may have overridden automated QA decisions
+  - **Prioritize user guidance over automated issue descriptions**
+
 **RECOVERY AWARENESS**: If you see previous QA fix sessions in the history:
 - Previous fix attempts FAILED QA validation
 - Review what was tried before
diff --git a/apps/backend/prompts/qa_reviewer.md b/apps/backend/prompts/qa_reviewer.md
index a1407763c..9ef70bd1d 100644
--- a/apps/backend/prompts/qa_reviewer.md
+++ b/apps/backend/prompts/qa_reviewer.md
@@ -581,6 +581,8 @@ Create a fix request file:
 
 ```bash
 cat > QA_FIX_REQUEST.md << 'EOF'
+<!-- AUTO_GENERATED_BY_QA_AGENT -->
+
 # QA Fix Request
 
 **Status**: REJECTED
@@ -605,6 +607,21 @@ Once fixes are complete:
 2. QA will automatically re-run
 3. Loop continues until approved
 
+---
+## USER INTERVENTION
+
+If you'd like to provide manual guidance to the fixer:
+1. Edit this file directly to modify or add issues
+2. Remove the `<!-- AUTO_GENERATED_BY_QA_AGENT -->` marker at the top
+3. Save your changes - the QA loop will detect your manual intervention
+4. The fixer will use your edited version instead of the original
+
+This allows you to:
+- Correct misidentified issues
+- Add missing context
+- Provide specific guidance for fixes
+- Override automated QA decisions
+
 EOF
 
 # Note: QA_FIX_REQUEST.md and implementation_plan.json are in .auto-claude/specs/ (gitignored)

From 165c2f9b01019344b5af4455bf3acbe6288031d6 Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Sat, 7 Feb 2026 15:41:58 +0400
Subject: [PATCH 09/14] fix: address PR review comments for auto-recovery loop

- Add record_outcome() method to RecoveryManager for updating attempt results
- Fix record_attempt() call in fixer.py to include required session and success params
- Make _save_metrics() atomic using temp file + os.replace to prevent corruption
- Fix record_user_intervention() increment order to be consistent with record_attempt()

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/backend/qa/fixer.py            |  5 ++++-
 apps/backend/qa/recovery_metrics.py | 15 +++++++++++----
 apps/backend/services/recovery.py   | 26 ++++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py
index 081799ceb..0896b4ffb 100644
--- a/apps/backend/qa/fixer.py
+++ b/apps/backend/qa/fixer.py
@@ -200,7 +200,10 @@ async def run_qa_fixer_session(
 
         # Record this attempt with recovery manager
         recovery_manager.record_attempt(
-            fixer_subtask_id, approach=f"QA fixer session {fix_session}, iteration {fixer_iteration}"
+            fixer_subtask_id,
+            session=fix_session,
+            success=False,  # Will be updated by record_outcome
+            approach=f"QA fixer session {fix_session}, iteration {fixer_iteration}",
         )
 
         try:
diff --git a/apps/backend/qa/recovery_metrics.py b/apps/backend/qa/recovery_metrics.py
index 1507767ec..2636c3c1b 100644
--- a/apps/backend/qa/recovery_metrics.py
+++ b/apps/backend/qa/recovery_metrics.py
@@ -7,6 +7,7 @@
 """
 
 import json
+import os
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
@@ -86,15 +87,19 @@ def _create_empty_metrics(self) -> dict[str, Any]:
 
     def _save_metrics(self) -> bool:
         """
-        Save metrics to recovery_metrics.json.
+        Save metrics to recovery_metrics.json atomically.
+
+        Uses a temp file + os.replace to prevent corruption on crash.
 
         Returns:
             True if saved successfully
         """
         try:
             self._metrics["last_updated"] = datetime.now(timezone.utc).isoformat()
-            with open(self._metrics_file, "w", encoding="utf-8") as f:
+            tmp_file = self._metrics_file.with_suffix(".json.tmp")
+            with open(tmp_file, "w", encoding="utf-8") as f:
                 json.dump(self._metrics, f, indent=2, ensure_ascii=False)
+            os.replace(tmp_file, self._metrics_file)
             return True
         except (OSError, TypeError, UnicodeDecodeError):
             return False
@@ -163,8 +168,11 @@ def record_user_intervention(self, iteration: int) -> bool:
         Returns:
             True if recorded successfully
         """
+        # Increment first, consistent with record_attempt
+        self._metrics["total_attempts"] += 1
+
         record = {
-            "attempt_number": self._metrics["total_attempts"] + 1,
+            "attempt_number": self._metrics["total_attempts"],
             "outcome": "user_intervention",
             "iterations": iteration,
             "timestamp": datetime.now(timezone.utc).isoformat(),
@@ -172,7 +180,6 @@ def record_user_intervention(self, iteration: int) -> bool:
         }
 
         self._metrics["recovery_history"].append(record)
-        self._metrics["total_attempts"] += 1
 
         return self._save_metrics()
 
diff --git a/apps/backend/services/recovery.py b/apps/backend/services/recovery.py
index 45126df70..ce3ba2067 100644
--- a/apps/backend/services/recovery.py
+++ b/apps/backend/services/recovery.py
@@ -530,6 +530,32 @@ def get_recovery_hints(self, subtask_id: str) -> list[str]:
 
         return hints
 
+    def record_outcome(
+        self, subtask_id: str, success: bool, error: str | None = None
+    ) -> None:
+        """
+        Record the outcome of the most recent attempt for a subtask.
+
+        Updates the last recorded attempt with the success/failure result.
+
+        Args:
+            subtask_id: ID of the subtask
+            success: Whether the attempt succeeded
+            error: Error message if failed
+        """
+        history = self._load_attempt_history()
+        subtask_data = history["subtasks"].get(subtask_id)
+
+        if subtask_data and subtask_data["attempts"]:
+            # Update the last attempt with the outcome
+            subtask_data["attempts"][-1]["success"] = success
+            if error:
+                subtask_data["attempts"][-1]["error"] = error
+
+            # Update subtask status
+            subtask_data["status"] = "completed" if success else "failed"
+            self._save_attempt_history(history)
+
     def clear_stuck_subtasks(self) -> None:
         """Clear all stuck subtasks (for manual resolution)."""
         history = self._load_attempt_history()

From c9955ba3da477db750d0cd226e07b345a7c4564b Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Sat, 7 Feb 2026 15:47:45 +0400
Subject: [PATCH 10/14] fix: resolve cyclic imports and ruff F541 in
 qa/fixer.py

- Move .criteria and .report imports inside run_qa_fixer_session()
  to break circular import cycle through __init__.py (CodeQL alerts)
- Remove f-prefix from strings without placeholders (ruff F541)
- Apply ruff format line-length fixes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/backend/qa/fixer.py | 67 +++++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 22 deletions(-)

diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py
index 0896b4ffb..177e94e35 100644
--- a/apps/backend/qa/fixer.py
+++ b/apps/backend/qa/fixer.py
@@ -24,9 +24,6 @@
     get_task_logger,
 )
 
-from .criteria import get_qa_signoff_status, is_fixes_applied
-from .report import get_iteration_history, has_recurring_issues, record_iteration
-
 # Configuration
 QA_PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
 MAX_FIXER_ITERATIONS = 10  # Max recovery attempts for a single QA fix session
@@ -72,6 +69,10 @@ async def run_qa_fixer_session(
         - "fixed" if fixes were applied
         - "error" if an error occurred
     """
+    # Lazy imports to avoid circular import via __init__.py
+    from .criteria import get_qa_signoff_status, is_fixes_applied
+    from .report import get_iteration_history, has_recurring_issues, record_iteration
+
     # Derive project_dir from spec_dir if not provided
     # spec_dir is typically: /project/.auto-claude/specs/001-name/
     if project_dir is None:
@@ -120,9 +121,11 @@ async def run_qa_fixer_session(
                 current_issues, iteration_history
             )
             if has_recurring:
-                print(f"\n⚠️  WARNING: Recurring issues detected!")
-                print(f"  {len(recurring_issues)} issue(s) have appeared multiple times.")
-                print(f"  Consider a different approach or human intervention.\n")
+                print("\n⚠️  WARNING: Recurring issues detected!")
+                print(
+                    f"  {len(recurring_issues)} issue(s) have appeared multiple times."
+                )
+                print("  Consider a different approach or human intervention.\n")
                 debug_error(
                     "qa_fixer",
                     f"Recurring issues detected: {len(recurring_issues)} issues",
@@ -161,10 +164,8 @@ async def run_qa_fixer_session(
             "qa_fixer",
             f"Circular fix detected for {fixer_subtask_id} (attempt #{attempt_count})",
         )
-        print(f"\n⚠️  WARNING: Circular fix detected!")
-        print(
-            f"This fix has been attempted {attempt_count} times with similar errors."
-        )
+        print("\n⚠️  WARNING: Circular fix detected!")
+        print(f"This fix has been attempted {attempt_count} times with similar errors.")
         print("Consider human intervention or a different approach.\n")
         # Record circular fix outcome
         recovery_manager.record_outcome(
@@ -172,7 +173,10 @@ async def run_qa_fixer_session(
             success=False,
             error="Circular fix detected - same fix attempted multiple times",
         )
-        return "circular", "Circular fix detected - same approach attempted multiple times"
+        return (
+            "circular",
+            "Circular fix detected - same approach attempted multiple times",
+        )
 
     # Get total iterations from history
     total_iterations = len(iteration_history)
@@ -185,7 +189,9 @@ async def run_qa_fixer_session(
 
         if fixer_iteration > 1:
             print(f"\n{'=' * 70}")
-            print(f"  QA FIXER RECOVERY ATTEMPT {fixer_iteration}/{MAX_FIXER_ITERATIONS}")
+            print(
+                f"  QA FIXER RECOVERY ATTEMPT {fixer_iteration}/{MAX_FIXER_ITERATIONS}"
+            )
             print(f"{'=' * 70}\n")
             debug(
                 "qa_fixer",
@@ -372,10 +378,14 @@ async def run_qa_fixer_session(
                 # Calculate iteration duration
                 iteration_duration = time.time() - iteration_start_time
 
-                debug_success("qa_fixer", "Fixes applied and validated, ready for QA revalidation")
-                print(f"\n✓ Fixes applied successfully!")
+                debug_success(
+                    "qa_fixer", "Fixes applied and validated, ready for QA revalidation"
+                )
+                print("\n✓ Fixes applied successfully!")
                 print(f"  Duration: {iteration_duration:.1f}s")
-                print(f"  Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n")
+                print(
+                    f"  Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n"
+                )
 
                 # Record successful iteration to history
                 record_iteration(
@@ -403,10 +413,14 @@ async def run_qa_fixer_session(
                 # Fixer didn't update the status properly, but we'll trust it worked
                 iteration_duration = time.time() - iteration_start_time
 
-                debug_success("qa_fixer", "Fixes assumed applied (status validation failed)")
-                print(f"\n✓ Fixes applied (status validation skipped)")
+                debug_success(
+                    "qa_fixer", "Fixes assumed applied (status validation failed)"
+                )
+                print("\n✓ Fixes applied (status validation skipped)")
                 print(f"  Duration: {iteration_duration:.1f}s")
-                print(f"  Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n")
+                print(
+                    f"  Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n"
+                )
 
                 # Record iteration to history
                 record_iteration(
@@ -466,19 +480,26 @@ async def run_qa_fixer_session(
                     "qa_fixer",
                     f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, fixer is stuck",
                 )
-                print(f"⚠️  Max recovery attempts ({MAX_FIXER_ITERATIONS}) reached. Fixer stuck.\n")
+                print(
+                    f"⚠️  Max recovery attempts ({MAX_FIXER_ITERATIONS}) reached. Fixer stuck.\n"
+                )
                 # Record failed outcome
                 recovery_manager.record_outcome(
                     fixer_subtask_id, success=False, error=last_error
                 )
-                return "stuck", f"Fixer stuck after {MAX_FIXER_ITERATIONS} recovery attempts: {last_error}"
+                return (
+                    "stuck",
+                    f"Fixer stuck after {MAX_FIXER_ITERATIONS} recovery attempts: {last_error}",
+                )
 
             # Otherwise, continue to next iteration
             debug(
                 "qa_fixer",
                 f"Will retry (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})",
             )
-            print(f"  Retrying... (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})\n")
+            print(
+                f"  Retrying... (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})\n"
+            )
             continue
 
     # If we exhausted all iterations without success
@@ -486,7 +507,9 @@ async def run_qa_fixer_session(
         "qa_fixer",
         f"Exhausted all {MAX_FIXER_ITERATIONS} fixer iterations without success",
     )
-    print(f"\n⚠️  Exhausted all {MAX_FIXER_ITERATIONS} recovery attempts without success.\n")
+    print(
+        f"\n⚠️  Exhausted all {MAX_FIXER_ITERATIONS} recovery attempts without success.\n"
+    )
 
     # Record final failure
     final_error = last_error if last_error else "Max fixer iterations reached"

From ce929436b3645347bb52bd223bf973bf80a49678 Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Sat, 7 Feb 2026 16:59:35 +0400
Subject: [PATCH 11/14] style: fix ruff lint errors in loop.py and fixer.py

- Fix I001: sort imports (move test_generator, code_analyzer to correct position)
- Fix F541: remove f-prefix from strings without placeholders
- Fix UP015: remove unnecessary "r" mode argument in open()
- Fix cyclic imports in fixer.py (move .criteria/.report to lazy imports)
- Apply ruff format line-length fixes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/backend/qa/loop.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/apps/backend/qa/loop.py b/apps/backend/qa/loop.py
index 95bf04cf4..c2887c1b0 100644
--- a/apps/backend/qa/loop.py
+++ b/apps/backend/qa/loop.py
@@ -13,8 +13,6 @@
 from typing import Any
 
 from agents.memory_manager import save_user_correction
-
-# Test generation imports
 from agents.test_generator import run_test_generator_session
 from analysis.code_analyzer import CodeAnalyzer
 from analysis.failure_analyzer import analyze_failure, is_analysis_enabled
@@ -855,7 +853,9 @@ async def run_qa_validation_loop(
                     "Fixer stuck after multiple recovery attempts - escalating to human",
                 )
                 print("\n⚠️  QA Fixer Stuck")
-                print("   The fixer attempted multiple recovery approaches but could not resolve the issues.")
+                print(
+                    "   The fixer attempted multiple recovery approaches but could not resolve the issues."
+                )
                 print("   Escalating to human review...")
                 record_iteration(
                     spec_dir,

From b727feaeb21f97ec8526c22abb27b66ab4e0ed17 Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Sat, 7 Feb 2026 17:12:57 +0400
Subject: [PATCH 12/14] fix: resolve flaky project-store tests on macOS

- Add 50ms delay in afterEach before rmSync cleanup to let in-flight
  async saves complete (fixes ENOTEMPTY on macOS)
- Wrap JSON.parse in try-catch inside polling loops to handle partial
  writes (fixes SyntaxError: Unexpected end of JSON input)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../src/main/__tests__/project-store.test.ts  | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/apps/frontend/src/main/__tests__/project-store.test.ts b/apps/frontend/src/main/__tests__/project-store.test.ts
index 9bf350f01..5b3a8e81b 100644
--- a/apps/frontend/src/main/__tests__/project-store.test.ts
+++ b/apps/frontend/src/main/__tests__/project-store.test.ts
@@ -85,7 +85,11 @@ describe('ProjectStore', () => {
     vi.resetModules();
   });
 
-  afterEach(() => {
+  afterEach(async () => {
+    // Wait for any in-flight async saves to complete before cleanup.
+    // ProjectStore uses fire-and-forget saveAsync() which can still be
+    // writing to disk when afterEach runs, causing ENOTEMPTY on macOS.
+    await new Promise(r => setTimeout(r, 50));
     cleanupTestDirs();
     vi.clearAllMocks();
   });
@@ -202,9 +206,13 @@ describe('ProjectStore', () => {
       const start = Date.now();
       let content: { projects: unknown[] } = { projects: [1] };
       while (Date.now() - start < 2000) {
-        const raw = readFileSync(storePath, 'utf-8');
-        content = JSON.parse(raw);
-        if (content.projects.length === 0) break;
+        try {
+          const raw = readFileSync(storePath, 'utf-8');
+          content = JSON.parse(raw);
+          if (content.projects.length === 0) break;
+        } catch {
+          // File may be partially written - keep polling
+        }
         await new Promise(r => setTimeout(r, 10));
       }
       expect(content.projects).toHaveLength(0);
@@ -315,9 +323,13 @@ describe('ProjectStore', () => {
       const start = Date.now();
       let content: { projects: Array<{ settings: { model?: string } }> } = { projects: [] };
       while (Date.now() - start < 2000) {
-        const raw = readFileSync(storePath, 'utf-8');
-        content = JSON.parse(raw);
-        if (content.projects[0]?.settings?.model === 'sonnet') break;
+        try {
+          const raw = readFileSync(storePath, 'utf-8');
+          content = JSON.parse(raw);
+          if (content.projects[0]?.settings?.model === 'sonnet') break;
+        } catch {
+          // File may be partially written - keep polling
+        }
         await new Promise(r => setTimeout(r, 10));
       }
       expect(content.projects[0].settings.model).toBe('sonnet');

From f920dd1100621d6df43cf8fd8c2b7c4f59cd39f0 Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Sat, 7 Feb 2026 17:22:42 +0400
Subject: [PATCH 13/14] fix: mock getGitHubTokenForSubprocess in runner-env
 test to prevent Windows CI timeout

The test was calling the real getGitHubTokenForSubprocess() which spawns
a gh CLI subprocess. On Windows CI this takes >5s causing test timeout.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../ipc-handlers/github/utils/__tests__/runner-env.test.ts | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/apps/frontend/src/main/ipc-handlers/github/utils/__tests__/runner-env.test.ts b/apps/frontend/src/main/ipc-handlers/github/utils/__tests__/runner-env.test.ts
index d06023007..9397f8c84 100644
--- a/apps/frontend/src/main/ipc-handlers/github/utils/__tests__/runner-env.test.ts
+++ b/apps/frontend/src/main/ipc-handlers/github/utils/__tests__/runner-env.test.ts
@@ -4,6 +4,7 @@ const mockGetAPIProfileEnv = vi.fn();
 const mockGetOAuthModeClearVars = vi.fn();
 const mockGetPythonEnv = vi.fn();
 const mockGetBestAvailableProfileEnv = vi.fn();
+const mockGetGitHubTokenForSubprocess = vi.fn();
 
 vi.mock('../../../../services/profile', () => ({
   getAPIProfileEnv: (...args: unknown[]) => mockGetAPIProfileEnv(...args),
@@ -23,11 +24,17 @@ vi.mock('../../../../rate-limit-detector', () => ({
   getBestAvailableProfileEnv: () => mockGetBestAvailableProfileEnv(),
 }));
 
+vi.mock('../utils', () => ({
+  getGitHubTokenForSubprocess: () => mockGetGitHubTokenForSubprocess(),
+}));
+
 import { getRunnerEnv } from '../runner-env';
 
 describe('getRunnerEnv', () => {
   beforeEach(() => {
     vi.clearAllMocks();
+    // Default mock for GitHub token - returns null (no gh CLI needed)
+    mockGetGitHubTokenForSubprocess.mockResolvedValue(null);
     // Default mock for Python env - minimal env for testing
     mockGetPythonEnv.mockReturnValue({
       PYTHONDONTWRITEBYTECODE: '1',

From 35dfddb7523f3a72670853fe2c7f71ac3c59e2a8 Mon Sep 17 00:00:00 2001
From: omyag <omyagkov@bk.ru>
Date: Sat, 7 Feb 2026 17:30:49 +0400
Subject: [PATCH 14/14] fix: stabilize flaky frontend tests for cross-platform
 CI

- project-store: add waitForStoreInit() before addProject() in
  updateProjectSettings tests to prevent initializeAsync() race condition
  on macOS CI
- runner-env: mock getGitHubTokenForSubprocess to prevent real gh CLI
  subprocess spawn causing timeout on Windows CI

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/frontend/src/main/__tests__/project-store.test.ts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/apps/frontend/src/main/__tests__/project-store.test.ts b/apps/frontend/src/main/__tests__/project-store.test.ts
index 5b3a8e81b..dcf66f575 100644
--- a/apps/frontend/src/main/__tests__/project-store.test.ts
+++ b/apps/frontend/src/main/__tests__/project-store.test.ts
@@ -280,6 +280,7 @@ describe('ProjectStore', () => {
     it('should update settings and return updated project', async () => {
       const { ProjectStore } = await import('../project-store');
       const store = new ProjectStore();
+      await waitForStoreInit();
 
       const project = store.addProject(TEST_PROJECT_PATH);
       const updated = store.updateProjectSettings(project.id, {
@@ -295,6 +296,7 @@ describe('ProjectStore', () => {
     it('should update updatedAt timestamp', async () => {
       const { ProjectStore } = await import('../project-store');
       const store = new ProjectStore();
+      await waitForStoreInit();
 
       const project = store.addProject(TEST_PROJECT_PATH);
       const originalUpdatedAt = project.updatedAt;