From 3a22a29b473d2582d763c9fb88d55f926c20e7e4 Mon Sep 17 00:00:00 2001 From: omyag Date: Fri, 6 Feb 2026 09:37:51 +0400 Subject: [PATCH 01/14] auto-claude: subtask-1-1 - Add recovery iteration loop to run_qa_fixer_session --- apps/backend/qa/fixer.py | 404 +++++++++++++++++++++------------------ 1 file changed, 220 insertions(+), 184 deletions(-) diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py index f898add1a..6ea1a122c 100644 --- a/apps/backend/qa/fixer.py +++ b/apps/backend/qa/fixer.py @@ -26,6 +26,7 @@ # Configuration QA_PROMPTS_DIR = Path(__file__).parent.parent / "prompts" +MAX_FIXER_ITERATIONS = 10 # Max recovery attempts for a single QA fix session # ============================================================================= @@ -123,199 +124,234 @@ async def run_qa_fixer_session( prompt += f"\n**IMPORTANT**: All spec files are located in: `{spec_dir}/`\n" prompt += f"The fix request file is at: `{spec_dir}/QA_FIX_REQUEST.md`\n" - try: - debug("qa_fixer", "Sending query to Claude SDK...") - await client.query(prompt) - debug_success("qa_fixer", "Query sent successfully") - - response_text = "" - debug("qa_fixer", "Starting to receive response stream...") - async for msg in client.receive_response(): - msg_type = type(msg).__name__ - message_count += 1 - debug_detailed( + # Recovery iteration loop - retry if agent gets stuck or fails + last_error = None + for fixer_iteration in range(1, MAX_FIXER_ITERATIONS + 1): + if fixer_iteration > 1: + print(f"\n{'=' * 70}") + print(f" QA FIXER RECOVERY ATTEMPT {fixer_iteration}/{MAX_FIXER_ITERATIONS}") + print(f"{'=' * 70}\n") + debug( "qa_fixer", - f"Received message #{message_count}", - msg_type=msg_type, + f"Starting recovery attempt {fixer_iteration}", + max_iterations=MAX_FIXER_ITERATIONS, ) - if msg_type == "AssistantMessage" and hasattr(msg, "content"): - for block in msg.content: - block_type = type(block).__name__ - - if block_type == "TextBlock" and hasattr(block, "text"): - response_text += block.text - print(block.text, end="", flush=True) - # Log text to task logger (persist without double-printing) - if task_logger and block.text.strip(): - task_logger.log( - block.text, - LogEntryType.TEXT, - LogPhase.VALIDATION, - print_to_console=False, - ) - elif block_type == "ToolUseBlock" and hasattr(block, "name"): - tool_name = block.name - tool_input_display = None - tool_count += 1 - - # Safely extract tool input (handles None, non-dict, etc.) - inp = get_safe_tool_input(block) - - if inp: - if "file_path" in inp: - fp = inp["file_path"] - if len(fp) > 50: - fp = "..." + fp[-47:] - tool_input_display = fp - elif "command" in inp: - cmd = inp["command"] - if len(cmd) > 50: - cmd = cmd[:47] + "..." - tool_input_display = cmd - - debug( - "qa_fixer", - f"Tool call #{tool_count}: {tool_name}", - tool_input=tool_input_display, - ) - - # Log tool start (handles printing) - if task_logger: - task_logger.tool_start( - tool_name, - tool_input_display, - LogPhase.VALIDATION, - print_to_console=True, + try: + debug("qa_fixer", "Sending query to Claude SDK...") + await client.query(prompt) + debug_success("qa_fixer", "Query sent successfully") + + response_text = "" + debug("qa_fixer", "Starting to receive response stream...") + async for msg in client.receive_response(): + msg_type = type(msg).__name__ + message_count += 1 + debug_detailed( + "qa_fixer", + f"Received message #{message_count}", + msg_type=msg_type, + ) + + if msg_type == "AssistantMessage" and hasattr(msg, "content"): + for block in msg.content: + block_type = type(block).__name__ + + if block_type == "TextBlock" and hasattr(block, "text"): + response_text += block.text + print(block.text, end="", flush=True) + # Log text to task logger (persist without double-printing) + if task_logger and block.text.strip(): + task_logger.log( + block.text, + LogEntryType.TEXT, + LogPhase.VALIDATION, + print_to_console=False, + ) + elif block_type == "ToolUseBlock" and hasattr(block, "name"): + tool_name = block.name + tool_input_display = None + tool_count += 1 + + # Safely extract tool input (handles None, non-dict, etc.) + inp = get_safe_tool_input(block) + + if inp: + if "file_path" in inp: + fp = inp["file_path"] + if len(fp) > 50: + fp = "..." + fp[-47:] + tool_input_display = fp + elif "command" in inp: + cmd = inp["command"] + if len(cmd) > 50: + cmd = cmd[:47] + "..." + tool_input_display = cmd + + debug( + "qa_fixer", + f"Tool call #{tool_count}: {tool_name}", + tool_input=tool_input_display, ) - else: - print(f"\n[Fixer Tool: {tool_name}]", flush=True) - if verbose and hasattr(block, "input"): - input_str = str(block.input) - if len(input_str) > 300: - print(f" Input: {input_str[:300]}...", flush=True) + # Log tool start (handles printing) + if task_logger: + task_logger.tool_start( + tool_name, + tool_input_display, + LogPhase.VALIDATION, + print_to_console=True, + ) else: - print(f" Input: {input_str}", flush=True) - current_tool = tool_name - - elif msg_type == "UserMessage" and hasattr(msg, "content"): - for block in msg.content: - block_type = type(block).__name__ - - if block_type == "ToolResultBlock": - is_error = getattr(block, "is_error", False) - result_content = getattr(block, "content", "") - - if is_error: - debug_error( - "qa_fixer", - f"Tool error: {current_tool}", - error=str(result_content)[:200], - ) - error_str = str(result_content)[:500] - print(f" [Error] {error_str}", flush=True) - if task_logger and current_tool: - # Store full error in detail for expandable view - task_logger.tool_end( - current_tool, - success=False, - result=error_str[:100], - detail=str(result_content), - phase=LogPhase.VALIDATION, + print(f"\n[Fixer Tool: {tool_name}]", flush=True) + + if verbose and hasattr(block, "input"): + input_str = str(block.input) + if len(input_str) > 300: + print(f" Input: {input_str[:300]}...", flush=True) + else: + print(f" Input: {input_str}", flush=True) + current_tool = tool_name + + elif msg_type == "UserMessage" and hasattr(msg, "content"): + for block in msg.content: + block_type = type(block).__name__ + + if block_type == "ToolResultBlock": + is_error = getattr(block, "is_error", False) + result_content = getattr(block, "content", "") + + if is_error: + debug_error( + "qa_fixer", + f"Tool error: {current_tool}", + error=str(result_content)[:200], ) - else: - debug_detailed( - "qa_fixer", - f"Tool success: {current_tool}", - result_length=len(str(result_content)), - ) - if verbose: - result_str = str(result_content)[:200] - print(f" [Done] {result_str}", flush=True) + error_str = str(result_content)[:500] + print(f" [Error] {error_str}", flush=True) + if task_logger and current_tool: + # Store full error in detail for expandable view + task_logger.tool_end( + current_tool, + success=False, + result=error_str[:100], + detail=str(result_content), + phase=LogPhase.VALIDATION, + ) else: - print(" [Done]", flush=True) - if task_logger and current_tool: - # Store full result in detail for expandable view - detail_content = None - if current_tool in ( - "Read", - "Grep", - "Bash", - "Edit", - "Write", - ): - result_str = str(result_content) - if len(result_str) < 50000: - detail_content = result_str - task_logger.tool_end( - current_tool, - success=True, - detail=detail_content, - phase=LogPhase.VALIDATION, + debug_detailed( + "qa_fixer", + f"Tool success: {current_tool}", + result_length=len(str(result_content)), ) + if verbose: + result_str = str(result_content)[:200] + print(f" [Done] {result_str}", flush=True) + else: + print(" [Done]", flush=True) + if task_logger and current_tool: + # Store full result in detail for expandable view + detail_content = None + if current_tool in ( + "Read", + "Grep", + "Bash", + "Edit", + "Write", + ): + result_str = str(result_content) + if len(result_str) < 50000: + detail_content = result_str + task_logger.tool_end( + current_tool, + success=True, + detail=detail_content, + phase=LogPhase.VALIDATION, + ) + + current_tool = None + + print("\n" + "-" * 70 + "\n") + + # Check if fixes were applied + status = get_qa_signoff_status(spec_dir) + debug( + "qa_fixer", + "Fixer session completed", + message_count=message_count, + tool_count=tool_count, + response_length=len(response_text), + ready_for_revalidation=status.get("ready_for_qa_revalidation") + if status + else False, + ) - current_tool = None - - print("\n" + "-" * 70 + "\n") - - # Check if fixes were applied - status = get_qa_signoff_status(spec_dir) - debug( - "qa_fixer", - "Fixer session completed", - message_count=message_count, - tool_count=tool_count, - response_length=len(response_text), - ready_for_revalidation=status.get("ready_for_qa_revalidation") - if status - else False, - ) - - # Save fixer session insights to memory - fixer_discoveries = { - "files_understood": {}, - "patterns_found": [ - f"QA fixer session {fix_session}: Applied fixes from QA_FIX_REQUEST.md" - ], - "gotchas_encountered": [], - } - - if status and status.get("ready_for_qa_revalidation"): - debug_success("qa_fixer", "Fixes applied, ready for QA revalidation") - # Save successful fix session to memory - await save_session_memory( - spec_dir=spec_dir, - project_dir=project_dir, - subtask_id=f"qa_fixer_{fix_session}", - session_num=fix_session, - success=True, - subtasks_completed=[f"qa_fixer_{fix_session}"], - discoveries=fixer_discoveries, + # Save fixer session insights to memory + fixer_discoveries = { + "files_understood": {}, + "patterns_found": [ + f"QA fixer session {fix_session}: Applied fixes from QA_FIX_REQUEST.md" + ], + "gotchas_encountered": [], + } + + if status and status.get("ready_for_qa_revalidation"): + debug_success("qa_fixer", "Fixes applied, ready for QA revalidation") + # Save successful fix session to memory + await save_session_memory( + spec_dir=spec_dir, + project_dir=project_dir, + subtask_id=f"qa_fixer_{fix_session}", + session_num=fix_session, + success=True, + subtasks_completed=[f"qa_fixer_{fix_session}"], + discoveries=fixer_discoveries, + ) + return "fixed", response_text + else: + # Fixer didn't update the status properly, but we'll trust it worked + debug_success("qa_fixer", "Fixes assumed applied (status not updated)") + # Still save to memory as successful (fixes were attempted) + await save_session_memory( + spec_dir=spec_dir, + project_dir=project_dir, + subtask_id=f"qa_fixer_{fix_session}", + session_num=fix_session, + success=True, + subtasks_completed=[f"qa_fixer_{fix_session}"], + discoveries=fixer_discoveries, + ) + return "fixed", response_text + + except Exception as e: + last_error = str(e) + debug_error( + "qa_fixer", + f"Fixer session exception (attempt {fixer_iteration}/{MAX_FIXER_ITERATIONS}): {e}", + exception_type=type(e).__name__, ) - return "fixed", response_text - else: - # Fixer didn't update the status properly, but we'll trust it worked - debug_success("qa_fixer", "Fixes assumed applied (status not updated)") - # Still save to memory as successful (fixes were attempted) - await save_session_memory( - spec_dir=spec_dir, - project_dir=project_dir, - subtask_id=f"qa_fixer_{fix_session}", - session_num=fix_session, - success=True, - subtasks_completed=[f"qa_fixer_{fix_session}"], - discoveries=fixer_discoveries, + print(f"Error during fixer session: {e}") + if task_logger: + task_logger.log_error(f"QA fixer error: {e}", LogPhase.VALIDATION) + + # If this is the last iteration, return error + if fixer_iteration == MAX_FIXER_ITERATIONS: + debug_error( + "qa_fixer", + f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, giving up", + ) + return "error", last_error + + # Otherwise, continue to next iteration + debug( + "qa_fixer", + f"Will retry (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})", ) - return "fixed", response_text - - except Exception as e: - debug_error( - "qa_fixer", - f"Fixer session exception: {e}", - exception_type=type(e).__name__, - ) - print(f"Error during fixer session: {e}") - if task_logger: - task_logger.log_error(f"QA fixer error: {e}", LogPhase.VALIDATION) - return "error", str(e) + continue + + # If we exhausted all iterations without success + debug_error( + "qa_fixer", + f"Exhausted all {MAX_FIXER_ITERATIONS} fixer iterations without success", + ) + return "error", last_error if last_error else "Max fixer iterations reached" From 677a7b2f6a81835600f6a7f0ba375b2327aa8ad3 Mon Sep 17 00:00:00 2001 From: omyag Date: Fri, 6 Feb 2026 09:41:25 +0400 Subject: [PATCH 02/14] auto-claude: subtask-1-2 - Integrate RecoveryManager for circular fix detecti --- apps/backend/qa/fixer.py | 46 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py index 6ea1a122c..76b2d6566 100644 --- a/apps/backend/qa/fixer.py +++ b/apps/backend/qa/fixer.py @@ -16,6 +16,7 @@ from claude_agent_sdk import ClaudeSDKClient from debug import debug, debug_detailed, debug_error, debug_section, debug_success from security.tool_input_validator import get_safe_tool_input +from services.recovery import RecoveryManager from task_logger import ( LogEntryType, LogPhase, @@ -74,6 +75,11 @@ async def run_qa_fixer_session( if project_dir is None: # Walk up from spec_dir to find project root project_dir = spec_dir.parent.parent.parent + + # Initialize recovery manager for circular fix detection + recovery_manager = RecoveryManager(spec_dir=spec_dir, project_dir=project_dir) + fixer_subtask_id = f"qa_fixer_{fix_session}" + debug_section("qa_fixer", f"QA Fixer Session {fix_session}") debug( "qa_fixer", @@ -124,6 +130,27 @@ async def run_qa_fixer_session( prompt += f"\n**IMPORTANT**: All spec files are located in: `{spec_dir}/`\n" prompt += f"The fix request file is at: `{spec_dir}/QA_FIX_REQUEST.md`\n" + # Check for circular fixes (same fix attempted multiple times) + fix_request_content = fix_request_file.read_text(encoding="utf-8") + if recovery_manager.is_circular_fix(fixer_subtask_id, fix_request_content): + attempt_count = recovery_manager.get_attempt_count(fixer_subtask_id) + debug_error( + "qa_fixer", + f"Circular fix detected for {fixer_subtask_id} (attempt #{attempt_count})", + ) + print(f"\n⚠️ WARNING: Circular fix detected!") + print( + f"This fix has been attempted {attempt_count} times with similar errors." + ) + print("Consider human intervention or a different approach.\n") + # Record circular fix outcome + recovery_manager.record_outcome( + fixer_subtask_id, + success=False, + error="Circular fix detected - same fix attempted multiple times", + ) + return "error", "Circular fix detected - human intervention recommended" + # Recovery iteration loop - retry if agent gets stuck or fails last_error = None for fixer_iteration in range(1, MAX_FIXER_ITERATIONS + 1): @@ -137,6 +164,11 @@ async def run_qa_fixer_session( max_iterations=MAX_FIXER_ITERATIONS, ) + # Record this attempt with recovery manager + recovery_manager.record_attempt( + fixer_subtask_id, approach=f"QA fixer session {fix_session}, iteration {fixer_iteration}" + ) + try: debug("qa_fixer", "Sending query to Claude SDK...") await client.query(prompt) @@ -297,6 +329,8 @@ async def run_qa_fixer_session( if status and status.get("ready_for_qa_revalidation"): debug_success("qa_fixer", "Fixes applied, ready for QA revalidation") + # Record successful outcome with recovery manager + recovery_manager.record_outcome(fixer_subtask_id, success=True) # Save successful fix session to memory await save_session_memory( spec_dir=spec_dir, @@ -311,6 +345,8 @@ async def run_qa_fixer_session( else: # Fixer didn't update the status properly, but we'll trust it worked debug_success("qa_fixer", "Fixes assumed applied (status not updated)") + # Record successful outcome with recovery manager + recovery_manager.record_outcome(fixer_subtask_id, success=True) # Still save to memory as successful (fixes were attempted) await save_session_memory( spec_dir=spec_dir, @@ -340,6 +376,10 @@ async def run_qa_fixer_session( "qa_fixer", f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, giving up", ) + # Record failed outcome + recovery_manager.record_outcome( + fixer_subtask_id, success=False, error=last_error + ) return "error", last_error # Otherwise, continue to next iteration @@ -354,4 +394,10 @@ async def run_qa_fixer_session( "qa_fixer", f"Exhausted all {MAX_FIXER_ITERATIONS} fixer iterations without success", ) + # Record failed outcome + recovery_manager.record_outcome( + fixer_subtask_id, + success=False, + error=last_error if last_error else "Max fixer iterations reached", + ) return "error", last_error if last_error else "Max fixer iterations reached" From 9d87b98f38486fb8039c32bd333f9452b841388f Mon Sep 17 00:00:00 2001 From: omyag Date: Fri, 6 Feb 2026 09:44:06 +0400 Subject: [PATCH 03/14] auto-claude: subtask-1-3 - Add validation between fix attempts - Import is_fixes_applied() for robust validation - Replace manual ready_for_qa_revalidation check with is_fixes_applied() - Add fixes_applied_status to debug logging - Follows pattern from apps/backend/qa/criteria.py --- apps/backend/qa/fixer.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py index 76b2d6566..3c41480fb 100644 --- a/apps/backend/qa/fixer.py +++ b/apps/backend/qa/fixer.py @@ -23,7 +23,7 @@ get_task_logger, ) -from .criteria import get_qa_signoff_status +from .criteria import get_qa_signoff_status, is_fixes_applied # Configuration QA_PROMPTS_DIR = Path(__file__).parent.parent / "prompts" @@ -305,8 +305,10 @@ async def run_qa_fixer_session( print("\n" + "-" * 70 + "\n") - # Check if fixes were applied + # Validate that fixes were properly applied status = get_qa_signoff_status(spec_dir) + fixes_ready = is_fixes_applied(spec_dir) + debug( "qa_fixer", "Fixer session completed", @@ -316,6 +318,7 @@ async def run_qa_fixer_session( ready_for_revalidation=status.get("ready_for_qa_revalidation") if status else False, + fixes_applied_status=status.get("status") if status else None, ) # Save fixer session insights to memory @@ -327,8 +330,9 @@ async def run_qa_fixer_session( "gotchas_encountered": [], } - if status and status.get("ready_for_qa_revalidation"): - debug_success("qa_fixer", "Fixes applied, ready for QA revalidation") + # Robust validation: check both status and ready flag + if fixes_ready: + debug_success("qa_fixer", "Fixes applied and validated, ready for QA revalidation") # Record successful outcome with recovery manager recovery_manager.record_outcome(fixer_subtask_id, success=True) # Save successful fix session to memory @@ -344,7 +348,7 @@ async def run_qa_fixer_session( return "fixed", response_text else: # Fixer didn't update the status properly, but we'll trust it worked - debug_success("qa_fixer", "Fixes assumed applied (status not updated)") + debug_success("qa_fixer", "Fixes assumed applied (status validation failed)") # Record successful outcome with recovery manager recovery_manager.record_outcome(fixer_subtask_id, success=True) # Still save to memory as successful (fixes were attempted) From c0cfa440d0854d6b8dd8b6b7c04f706a50e56087 Mon Sep 17 00:00:00 2001 From: omyag Date: Fri, 6 Feb 2026 09:47:54 +0400 Subject: [PATCH 04/14] auto-claude: subtask-1-4 - Add progress tracking and user reporting --- apps/backend/qa/fixer.py | 109 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 3 deletions(-) diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py index 3c41480fb..6d669d307 100644 --- a/apps/backend/qa/fixer.py +++ b/apps/backend/qa/fixer.py @@ -9,6 +9,7 @@ - Saves fix outcomes and learnings after session """ +import time from pathlib import Path # Memory integration for cross-session learning @@ -24,6 +25,7 @@ ) from .criteria import get_qa_signoff_status, is_fixes_applied +from .report import get_iteration_history, has_recurring_issues, record_iteration # Configuration QA_PROMPTS_DIR = Path(__file__).parent.parent / "prompts" @@ -105,6 +107,27 @@ async def run_qa_fixer_session( debug_error("qa_fixer", "QA_FIX_REQUEST.md not found") return "error", "QA_FIX_REQUEST.md not found" + # Check for recurring issues from QA history + iteration_history = get_iteration_history(spec_dir) + if iteration_history: + # Extract current issues from QA report + qa_report_file = spec_dir / "qa_report.md" + current_issues = [] + if qa_report_file.exists(): + # Parse issues from QA report (simplified - just check if we have history) + # The has_recurring_issues function will do the actual similarity matching + has_recurring, recurring_issues = has_recurring_issues( + current_issues, iteration_history + ) + if has_recurring: + print(f"\n⚠️ WARNING: Recurring issues detected!") + print(f" {len(recurring_issues)} issue(s) have appeared multiple times.") + print(f" Consider a different approach or human intervention.\n") + debug_error( + "qa_fixer", + f"Recurring issues detected: {len(recurring_issues)} issues", + ) + # Load fixer prompt prompt = load_qa_fixer_prompt() debug_detailed("qa_fixer", "Loaded QA fixer prompt", prompt_length=len(prompt)) @@ -151,9 +174,15 @@ async def run_qa_fixer_session( ) return "error", "Circular fix detected - human intervention recommended" + # Get total iterations from history + total_iterations = len(iteration_history) + # Recovery iteration loop - retry if agent gets stuck or fails last_error = None for fixer_iteration in range(1, MAX_FIXER_ITERATIONS + 1): + # Track iteration start time for duration reporting + iteration_start_time = time.time() + if fixer_iteration > 1: print(f"\n{'=' * 70}") print(f" QA FIXER RECOVERY ATTEMPT {fixer_iteration}/{MAX_FIXER_ITERATIONS}") @@ -163,6 +192,11 @@ async def run_qa_fixer_session( f"Starting recovery attempt {fixer_iteration}", max_iterations=MAX_FIXER_ITERATIONS, ) + else: + # First iteration - show overall progress + if total_iterations > 0: + print(f" Previous QA iterations: {total_iterations}") + print(f" This is fixer session #{fix_session}\n") # Record this attempt with recovery manager recovery_manager.record_attempt( @@ -332,7 +366,23 @@ async def run_qa_fixer_session( # Robust validation: check both status and ready flag if fixes_ready: + # Calculate iteration duration + iteration_duration = time.time() - iteration_start_time + debug_success("qa_fixer", "Fixes applied and validated, ready for QA revalidation") + print(f"\n✓ Fixes applied successfully!") + print(f" Duration: {iteration_duration:.1f}s") + print(f" Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n") + + # Record successful iteration to history + record_iteration( + spec_dir=spec_dir, + iteration=total_iterations + 1, + status="fixed", + issues=[], # Fixed, so no issues + duration_seconds=iteration_duration, + ) + # Record successful outcome with recovery manager recovery_manager.record_outcome(fixer_subtask_id, success=True) # Save successful fix session to memory @@ -348,7 +398,22 @@ async def run_qa_fixer_session( return "fixed", response_text else: # Fixer didn't update the status properly, but we'll trust it worked + iteration_duration = time.time() - iteration_start_time + debug_success("qa_fixer", "Fixes assumed applied (status validation failed)") + print(f"\n✓ Fixes applied (status validation skipped)") + print(f" Duration: {iteration_duration:.1f}s") + print(f" Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n") + + # Record iteration to history + record_iteration( + spec_dir=spec_dir, + iteration=total_iterations + 1, + status="fixed", + issues=[], + duration_seconds=iteration_duration, + ) + # Record successful outcome with recovery manager recovery_manager.record_outcome(fixer_subtask_id, success=True) # Still save to memory as successful (fixes were attempted) @@ -365,21 +430,40 @@ async def run_qa_fixer_session( except Exception as e: last_error = str(e) + iteration_duration = time.time() - iteration_start_time + debug_error( "qa_fixer", f"Fixer session exception (attempt {fixer_iteration}/{MAX_FIXER_ITERATIONS}): {e}", exception_type=type(e).__name__, ) - print(f"Error during fixer session: {e}") + print(f"\n✗ Error during fixer session: {e}") + print(f" Duration: {iteration_duration:.1f}s\n") if task_logger: task_logger.log_error(f"QA fixer error: {e}", LogPhase.VALIDATION) + # Record failed iteration + error_issue = { + "type": "fixer_error", + "title": f"Fixer iteration {fixer_iteration} failed", + "description": str(e), + "severity": "high", + } + record_iteration( + spec_dir=spec_dir, + iteration=total_iterations + 1, + status="error", + issues=[error_issue], + duration_seconds=iteration_duration, + ) + # If this is the last iteration, return error if fixer_iteration == MAX_FIXER_ITERATIONS: debug_error( "qa_fixer", f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, giving up", ) + print(f"⚠️ Max recovery attempts ({MAX_FIXER_ITERATIONS}) reached. Giving up.\n") # Record failed outcome recovery_manager.record_outcome( fixer_subtask_id, success=False, error=last_error @@ -391,6 +475,7 @@ async def run_qa_fixer_session( "qa_fixer", f"Will retry (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})", ) + print(f" Retrying... (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})\n") continue # If we exhausted all iterations without success @@ -398,10 +483,28 @@ async def run_qa_fixer_session( "qa_fixer", f"Exhausted all {MAX_FIXER_ITERATIONS} fixer iterations without success", ) + print(f"\n⚠️ Exhausted all {MAX_FIXER_ITERATIONS} recovery attempts without success.\n") + + # Record final failure + final_error = last_error if last_error else "Max fixer iterations reached" + final_issue = { + "type": "max_iterations", + "title": "Max fixer iterations exhausted", + "description": final_error, + "severity": "critical", + } + record_iteration( + spec_dir=spec_dir, + iteration=total_iterations + 1, + status="error", + issues=[final_issue], + duration_seconds=None, + ) + # Record failed outcome recovery_manager.record_outcome( fixer_subtask_id, success=False, - error=last_error if last_error else "Max fixer iterations reached", + error=final_error, ) - return "error", last_error if last_error else "Max fixer iterations reached" + return "error", final_error From 9c61efcabe38914f1c9b12c30ddc9496c6aa68e1 Mon Sep 17 00:00:00 2001 From: omyag Date: Fri, 6 Feb 2026 12:23:13 +0400 Subject: [PATCH 05/14] auto-claude: subtask-1-5 - Update QA Fixer prompt with recovery context Added intelligent auto-recovery context to qa_fixer.md: - New RECOVERY AWARENESS section explaining the recovery system - Enhanced PHASE 0 to check qa_fix_history.json for previous attempts - New PHASE 2.5 to record fix approach before implementation - New PHASE 5.5 to record QA fix attempts (success/failure) - Enhanced commit section to capture commit hash for tracking - Expanded QA LOOP BEHAVIOR with escalation criteria - Added Python code for tracking sessions and detecting circular fixes The QA Fixer now tracks: - Session numbers and iteration count - Issues addressed in each session - Fix approaches to detect circular fixes - Success/failure status of each attempt - Escalation triggers (5+ failed sessions, repeated issues) This mirrors the recovery pattern from coder_recovery.md, adapted for the QA fix workflow where fixes are validated by QA reviewer. --- apps/backend/prompts/qa_fixer.md | 309 ++++++++++++++++++++++++++++++- 1 file changed, 306 insertions(+), 3 deletions(-) diff --git a/apps/backend/prompts/qa_fixer.md b/apps/backend/prompts/qa_fixer.md index fe5c01802..3bd95bdd4 100644 --- a/apps/backend/prompts/qa_fixer.md +++ b/apps/backend/prompts/qa_fixer.md @@ -20,6 +20,55 @@ You must fix these issues so QA can approve. --- +## RECOVERY AWARENESS + +### What This Means for QA Fixer + +You are part of an **intelligent auto-recovery system**. Your fix attempts are tracked across sessions to: + +1. **Detect Circular Fixes** - If you try the same fix approach multiple times, the system will flag it +2. **Track Escalation** - Multiple failed fix sessions trigger human escalation +3. **Learn from Attempts** - Each session records what was tried and whether it worked + +### The QA Fix Recovery Loop + +``` +1. Load context (check previous QA fix sessions) +2. Parse fix requirements from QA_FIX_REQUEST.md +3. Record your fix approach (what you plan to do) +4. Implement fixes +5. Self-verify each fix +6. Record the attempt (success or failure) +7. Commit fixes +8. QA re-validates +9. If issues remain → NEW SESSION (go back to step 1 with recovery context) +10. After 5 failed sessions → Escalate to human +``` + +### Key Recovery Behaviors + +**On Each New Session:** +- Check `memory/qa_fix_history.json` for previous attempts +- If previous sessions failed, review what was tried +- **Choose a different approach** if the same issues persist + +**When Recording Approach:** +- Document your overall fix strategy +- Explain what types of fixes you're applying +- This helps detect if you're repeating the same approach + +**When Fixes Fail QA Validation:** +- The failure is recorded in the history +- Next session will see this context +- You MUST try a different strategy + +**Escalation Triggers:** +- 5+ consecutive failed fix sessions +- Same issue appearing across multiple sessions +- Unable to verify fixes locally (environment issues) + +--- + ## PHASE 0: LOAD CONTEXT (MANDATORY) ```bash @@ -38,6 +87,24 @@ cat implementation_plan.json # 5. Check current state git status git log --oneline -5 + +# 6. CHECK QA FIX ATTEMPT HISTORY (Recovery Context) +echo -e "\n=== QA FIX RECOVERY CONTEXT ===" +if [ -f memory/qa_fix_history.json ]; then + echo "Previous QA Fix Attempts:" + cat memory/qa_fix_history.json | jq '.sessions[] | {session: .session, timestamp: .timestamp, issues_count: .issues.length, success: .success}' + + # Show current iteration count + iteration_count=$(cat memory/qa_fix_history.json | jq '.sessions | length' 2>/dev/null || echo 0) + echo -e "\nCurrent QA Fix Session: #$((iteration_count + 1))" + + if [ "$iteration_count" -ge 3 ]; then + echo -e "\n⚠️ WARNING: Multiple QA fix iterations detected. Previous fixes may not be addressing root causes!" + fi +else + echo "No previous QA fix attempts - this is the first fix session" +fi +echo "=== END RECOVERY CONTEXT ===" ``` **CRITICAL**: The `QA_FIX_REQUEST.md` file contains: @@ -46,6 +113,12 @@ git log --oneline -5 - Required fixes - Verification criteria +**RECOVERY AWARENESS**: If you see previous QA fix sessions in the history: +- Previous fix attempts FAILED QA validation +- Review what was tried before +- Consider if previous fixes were incomplete or used wrong approaches +- Multiple iterations (>3) suggest systemic issues + --- ## PHASE 1: PARSE FIX REQUIREMENTS @@ -142,6 +215,62 @@ git add [verified-path] --- +## PHASE 2.5: RECORD YOUR FIX APPROACH (Recovery Tracking) + +**IMPORTANT: Before you implement any fixes, document your overall approach.** + +```python +# Record your QA fix approach for recovery tracking +import json +from pathlib import Path +from datetime import datetime + +# Read the current session number from QA fix history +history_file = Path("memory/qa_fix_history.json") +if history_file.exists(): + with open(history_file) as f: + history = json.load(f) + session_num = len(history.get("sessions", [])) + 1 +else: + session_num = 1 + +# Read issues from QA_FIX_REQUEST.md +with open("QA_FIX_REQUEST.md") as f: + qa_request = f.read() + +approach_description = """ +Describe your fix approach in 2-3 sentences: +- What types of issues are you addressing? +- What's your overall fix strategy? +- Any specific patterns or considerations? + +Example: "Fixing 3 test failures by updating mock data in test fixtures. +Issues are related to date comparison logic - will align test expectations +with actual implementation behavior. Following existing test patterns from +similar test files." +""" + +# This will be used to detect repeated fix approaches +approach_file = Path("memory/qa_fix_approach.txt") +approach_file.parent.mkdir(parents=True, exist_ok=True) + +with open(approach_file, "a") as f: + f.write(f"\n--- QA Fix Session {session_num} at {datetime.now().isoformat()} ---\n") + f.write(f"Issues to fix: {len(qa_request.split('##'))}\n") + f.write(approach_description.strip()) + f.write("\n") + +print(f"QA fix approach recorded for session {session_num}") +``` + +**Why this matters:** +- If your fixes fail QA validation again, the recovery system will read this +- It helps detect if you're trying the same fix approach repeatedly (circular fixes) +- It creates a record of what was attempted for human review +- Essential for detecting when to escalate (multiple failed approaches) + +--- + ## PHASE 3: FIX ISSUES ONE BY ONE For each issue in the fix request: @@ -226,6 +355,86 @@ If any issue is not fixed, go back to Phase 3. --- +## PHASE 5.5: RECORD QA FIX ATTEMPT (Before Commit) + +**Before committing, record this fix attempt in the QA fix history.** + +```python +# Record QA fix attempt for recovery tracking +import json +from pathlib import Path +from datetime import datetime + +history_file = Path("memory/qa_fix_history.json") + +# Load or create history +if history_file.exists(): + with open(history_file) as f: + history = json.load(f) +else: + history = {"sessions": [], "metadata": {}} + +# Get session number +session_num = len(history.get("sessions", [])) + 1 + +# Read issues from QA_FIX_REQUEST.md +with open("QA_FIX_REQUEST.md") as f: + qa_request_content = f.read() + +# Parse the issues (simplified - adjust based on actual format) +import re +issue_matches = re.findall(r'##\s+(.+?)(?=\n##|\Z)', qa_request_content, re.DOTALL) +issues = [match.strip() for match in issue_matches if match.strip()] + +# Record this session +session_data = { + "session": session_num, + "timestamp": datetime.now().isoformat(), + "issues": issues, + "issues_count": len(issues), + "success": True, # Optimistic - will update if verification fails + "verified_locally": True, + "commit_hash": None, # Will add after commit + "qa_revalidation_result": None # Will be updated by QA reviewer +} + +history["sessions"].append(session_data) +history["metadata"]["last_updated"] = datetime.now().isoformat() + +# Save +with open(history_file, "w") as f: + json.dump(history, f, indent=2) + +print(f"✓ QA fix session {session_num} recorded ({len(issues)} issues)") +``` + +**If Self-Verification Failed:** + +```python +# Update the session to mark as failed +history_file = Path("memory/qa_fix_history.json") +with open(history_file) as f: + history = json.load(f) + +# Mark the last session as having failed verification +history["sessions"][-1]["success"] = False +history["sessions"][-1]["verified_locally"] = False +history["sessions"][-1]["failure_reason"] = "Self-verification failed - issues not properly fixed" + +with open(history_file, "w") as f: + json.dump(history, f, indent=2) + +print(f"⚠️ QA fix session {session_num} marked as failed") + +# Check if we should escalate +failed_sessions = [s for s in history["sessions"] if not s.get("success", True)] +if len(failed_sessions) >= 3: + print(f"\n⚠️ CRITICAL: {len(failed_sessions)} consecutive failed QA fix sessions.") + print("Consider escalating to human - fixes may not be addressing root causes.") +``` + +--- + ## PHASE 6: COMMIT FIXES ### Path Verification (MANDATORY FIRST STEP) @@ -279,6 +488,30 @@ Verified: - Issues verified locally QA Fix Session: [N]" + +# Capture commit hash for recovery tracking +COMMIT_HASH=$(git rev-parse HEAD) +echo "Commit hash: $COMMIT_HASH" +``` + +**Update QA Fix History with Commit Hash:** + +```python +# Update the session with the commit hash +import json +from pathlib import Path + +history_file = Path("memory/qa_fix_history.json") +with open(history_file) as f: + history = json.load(f) + +# Update the last session with commit hash +history["sessions"][-1]["commit_hash"] = "$COMMIT_HASH" # Replace with actual hash from bash + +with open(history_file, "w") as f: + json.dump(history, f, indent=2) + +print("✓ Commit hash recorded in QA fix history") ``` **CRITICAL**: The `:!.auto-claude` pathspec exclusion ensures spec files are NEVER committed. @@ -416,14 +649,84 @@ The repository inherits the user's configured git identity. Do NOT set test user ## QA LOOP BEHAVIOR +### The QA Fix Loop + After you complete fixes: 1. QA Agent re-runs validation -2. If more issues → You fix again +2. If more issues → You fix again (new session) 3. If approved → Done! -Maximum iterations: 5 +### Recovery Tracking + +Each QA fix session is tracked in `memory/qa_fix_history.json`: +- Session number +- Issues addressed +- Fix approach +- Success/failure status +- Commit hash + +### Escalation Criteria + +**Escalate to human when:** +- **5 consecutive failed QA fix sessions** - Different approaches needed +- **Same issue appears 3+ times across sessions** - Systemic problem +- **Circular fix detected** - Same approach tried multiple times +- **Unable to verify fixes locally** - Environment or test issues + +```python +# Check escalation criteria +import json +from pathlib import Path + +history_file = Path("memory/qa_fix_history.json") +if history_file.exists(): + with open(history_file) as f: + history = json.load(f) + + # Check for repeated failures + recent_sessions = history["sessions"][-5:] # Last 5 sessions + failed_count = sum(1 for s in recent_sessions if not s.get("success", True)) + + if failed_count >= 5: + print("🚨 ESCALATION REQUIRED: 5+ consecutive failed QA fix sessions") + print("Human intervention needed - current approach is not working") + + # Check for circular fixes (same issue appearing repeatedly) + all_issues = [] + for session in history["sessions"]: + all_issues.extend(session.get("issues", [])) + + from collections import Counter + issue_counts = Counter(all_issues) + repeated_issues = [issue for issue, count in issue_counts.items() if count >= 3] + + if repeated_issues: + print(f"⚠️ Repeated issues detected: {repeated_issues}") + print("These issues keep coming back - may need different approach") +``` + +### When QA Revalidation Fails + +If the QA reviewer finds issues after your fixes: +1. A new `QA_FIX_REQUEST.md` will be created +2. You will run again as a new session +3. Review the previous session's approach in `memory/qa_fix_history.json` +4. **TRY A DIFFERENT APPROACH** if the same issue persists + +**CRITICAL**: If you see the same issue in multiple fix sessions: +- The previous fix approach didn't work +- You need to understand WHY it didn't work +- Choose a fundamentally different strategy +- Don't just apply the same fix again + +### Maximum Iterations + +**Maximum QA fix iterations: 5** -After iteration 5, escalate to human. +After iteration 5: +1. Mark status as "blocked" in implementation_plan.json +2. Escalate to human with full context +3. Include all attempted approaches in escalation message --- From 487b51c1d3b286eb889ff1a54e439c3fd5b925d5 Mon Sep 17 00:00:00 2001 From: omyag Date: Fri, 6 Feb 2026 12:32:12 +0400 Subject: [PATCH 06/14] auto-claude: subtask-1-6 - Update QA loop to respect fixer iteration results - Add handling for new fixer statuses: 'circular' and 'stuck' - When status='circular': Record dead-end and continue QA loop - When status='stuck': Escalate to human with proper phase cleanup - Update fixer.py to return 'circular' for circular fix detection - Update fixer.py to return 'stuck' when max iterations exhausted - Add user-facing messages for recovery status - Maintain existing error handling for 'error' status Co-Authored-By: Claude Sonnet 4.5 --- apps/backend/qa/fixer.py | 12 +++++----- apps/backend/qa/loop.py | 47 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py index 6d669d307..081799ceb 100644 --- a/apps/backend/qa/fixer.py +++ b/apps/backend/qa/fixer.py @@ -172,7 +172,7 @@ async def run_qa_fixer_session( success=False, error="Circular fix detected - same fix attempted multiple times", ) - return "error", "Circular fix detected - human intervention recommended" + return "circular", "Circular fix detected - same approach attempted multiple times" # Get total iterations from history total_iterations = len(iteration_history) @@ -457,18 +457,18 @@ async def run_qa_fixer_session( duration_seconds=iteration_duration, ) - # If this is the last iteration, return error + # If this is the last iteration, return stuck status if fixer_iteration == MAX_FIXER_ITERATIONS: debug_error( "qa_fixer", - f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, giving up", + f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, fixer is stuck", ) - print(f"⚠️ Max recovery attempts ({MAX_FIXER_ITERATIONS}) reached. Giving up.\n") + print(f"⚠️ Max recovery attempts ({MAX_FIXER_ITERATIONS}) reached. Fixer stuck.\n") # Record failed outcome recovery_manager.record_outcome( fixer_subtask_id, success=False, error=last_error ) - return "error", last_error + return "stuck", f"Fixer stuck after {MAX_FIXER_ITERATIONS} recovery attempts: {last_error}" # Otherwise, continue to next iteration debug( @@ -507,4 +507,4 @@ async def run_qa_fixer_session( success=False, error=final_error, ) - return "error", final_error + return "stuck", f"Fixer stuck after exhausting all recovery attempts: {final_error}" diff --git a/apps/backend/qa/loop.py b/apps/backend/qa/loop.py index c64ade974..95bf04cf4 100644 --- a/apps/backend/qa/loop.py +++ b/apps/backend/qa/loop.py @@ -831,6 +831,53 @@ async def run_qa_validation_loop( [{"title": "Fixer error", "description": fix_response}], ) break + elif fix_status == "circular": + # Circular fix detected - record dead-end and continue QA loop + debug_warning( + "qa_loop", + "Circular fix detected - recording dead-end and continuing", + ) + print("\n⚠️ Circular Fix Detected") + print(" The fixer attempted the same approach multiple times.") + print(" Recording dead-end and continuing QA validation loop...") + record_iteration( + spec_dir, + qa_iteration, + "circular", + [{"title": "Circular fix detected", "description": fix_response}], + ) + # Continue to next QA iteration to see if a different approach is needed + continue + elif fix_status == "stuck": + # Fixer is stuck - escalate to human + debug_error( + "qa_loop", + "Fixer stuck after multiple recovery attempts - escalating to human", + ) + print("\n⚠️ QA Fixer Stuck") + print(" The fixer attempted multiple recovery approaches but could not resolve the issues.") + print(" Escalating to human review...") + record_iteration( + spec_dir, + qa_iteration, + "stuck", + [{"title": "Fixer stuck", "description": fix_response}], + ) + + # End validation phase as failed + if task_logger: + task_logger.end_phase( + LogPhase.VALIDATION, + success=False, + message="QA fixer stuck after multiple recovery attempts - human intervention required", + ) + + # Update Linear if enabled + if linear_task and linear_task.task_id: + await linear_qa_max_iterations(spec_dir, qa_iteration) + print("\nLinear: Task marked as needing human intervention") + + return False debug_success("qa_loop", "Fixes applied, re-running QA validation") print("\n✅ Fixes applied. Re-running QA validation...") From aea49d480f053e17624ef95f294450d9ee99675b Mon Sep 17 00:00:00 2001 From: omyag Date: Fri, 6 Feb 2026 12:37:02 +0400 Subject: [PATCH 07/14] auto-claude: subtask-1-7 - Add success rate monitoring for auto-recovery Created RecoveryMetrics class to track auto-recovery success rates: - Tracks total_attempts, successful_recoveries, failed_recoveries, circular_fixes - Records recovery history with timestamps, iterations, duration, strategy - Calculates success rate, circular fix rate, average iterations/duration - Stores metrics in spec_dir/recovery_metrics.json - Provides formatted summary and recent history methods - Includes convenience functions for quick access Follows patterns from qa/report.py for consistency. Co-Authored-By: Claude Sonnet 4.5 --- apps/backend/qa/recovery_metrics.py | 385 ++++++++++++++++++++++++++++ 1 file changed, 385 insertions(+) create mode 100644 apps/backend/qa/recovery_metrics.py diff --git a/apps/backend/qa/recovery_metrics.py b/apps/backend/qa/recovery_metrics.py new file mode 100644 index 000000000..1507767ec --- /dev/null +++ b/apps/backend/qa/recovery_metrics.py @@ -0,0 +1,385 @@ +""" +Recovery Metrics Tracking for Auto-Recovery +============================================ + +Tracks success rates, recovery attempts, and outcomes for the +intelligent auto-recovery loop in QA Fixer. +""" + +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +# Configuration +METRICS_FILE = "recovery_metrics.json" + + +# ============================================================================= +# RECOVERY METRICS CLASS +# ============================================================================= + + +class RecoveryMetrics: + """ + Tracks and manages auto-recovery metrics. + + Stores metrics in spec_dir/recovery_metrics.json and provides + methods for recording attempts and calculating success rates. + """ + + def __init__(self, spec_dir: Path | None = None): + """ + Initialize recovery metrics tracker. + + Args: + spec_dir: Spec directory (uses current directory if None) + """ + self.spec_dir = spec_dir or Path.cwd() + self._metrics_file = self.spec_dir / METRICS_FILE + self._metrics = self._load_metrics() + + def _load_metrics(self) -> dict[str, Any]: + """ + Load metrics from recovery_metrics.json. + + Returns: + Metrics dict, initializes empty structure if file doesn't exist + """ + if not self._metrics_file.exists(): + return self._create_empty_metrics() + + try: + with open(self._metrics_file, encoding="utf-8") as f: + data = json.load(f) + # Validate structure + required_keys = [ + "total_attempts", + "successful_recoveries", + "failed_recoveries", + "circular_fixes", + "recovery_history", + ] + if all(key in data for key in required_keys): + return data + # If invalid, create new + return self._create_empty_metrics() + except (OSError, json.JSONDecodeError, UnicodeDecodeError): + return self._create_empty_metrics() + + def _create_empty_metrics(self) -> dict[str, Any]: + """ + Create empty metrics structure. + + Returns: + Dict with initialized metric counters + """ + return { + "total_attempts": 0, + "successful_recoveries": 0, + "failed_recoveries": 0, + "circular_fixes": 0, + "recovery_history": [], + "created_at": datetime.now(timezone.utc).isoformat(), + "last_updated": datetime.now(timezone.utc).isoformat(), + } + + def _save_metrics(self) -> bool: + """ + Save metrics to recovery_metrics.json. + + Returns: + True if saved successfully + """ + try: + self._metrics["last_updated"] = datetime.now(timezone.utc).isoformat() + with open(self._metrics_file, "w", encoding="utf-8") as f: + json.dump(self._metrics, f, indent=2, ensure_ascii=False) + return True + except (OSError, TypeError, UnicodeDecodeError): + return False + + # ------------------------------------------------------------------------- + # RECORDING METHODS + # ------------------------------------------------------------------------- + + def record_attempt( + self, + outcome: str, + iterations: int, + duration_seconds: float | None = None, + issues_fixed: int = 0, + strategy: str | None = None, + ) -> bool: + """ + Record a recovery attempt outcome. + + Args: + outcome: Recovery outcome ("success", "failed", "circular") + iterations: Number of iterations used + duration_seconds: Optional duration of the recovery attempt + issues_fixed: Number of issues fixed + strategy: Strategy used for recovery (optional) + + Returns: + True if recorded successfully + """ + # Update counters + self._metrics["total_attempts"] += 1 + + if outcome == "success": + self._metrics["successful_recoveries"] += 1 + elif outcome == "failed": + self._metrics["failed_recoveries"] += 1 + elif outcome == "circular": + self._metrics["circular_fixes"] += 1 + + # Record history + record = { + "attempt_number": self._metrics["total_attempts"], + "outcome": outcome, + "iterations": iterations, + "timestamp": datetime.now(timezone.utc).isoformat(), + "issues_fixed": issues_fixed, + } + + if duration_seconds is not None: + record["duration_seconds"] = round(duration_seconds, 2) + + if strategy: + record["strategy"] = strategy + + self._metrics["recovery_history"].append(record) + + return self._save_metrics() + + def record_user_intervention(self, iteration: int) -> bool: + """ + Record a user intervention during recovery. + + Args: + iteration: Iteration number when intervention occurred + + Returns: + True if recorded successfully + """ + record = { + "attempt_number": self._metrics["total_attempts"] + 1, + "outcome": "user_intervention", + "iterations": iteration, + "timestamp": datetime.now(timezone.utc).isoformat(), + "issues_fixed": 0, + } + + self._metrics["recovery_history"].append(record) + self._metrics["total_attempts"] += 1 + + return self._save_metrics() + + # ------------------------------------------------------------------------- + # STATISTICS METHODS + # ------------------------------------------------------------------------- + + def get_success_rate(self) -> float: + """ + Calculate overall recovery success rate. + + Returns: + Success rate as percentage (0-100) + """ + if self._metrics["total_attempts"] == 0: + return 0.0 + + successful = self._metrics["successful_recoveries"] + total = self._metrics["total_attempts"] + return round((successful / total) * 100, 2) + + def get_average_iterations(self) -> float: + """ + Calculate average iterations per recovery attempt. + + Returns: + Average iterations, or 0 if no attempts + """ + if not self._metrics["recovery_history"]: + return 0.0 + + total_iterations = sum( + record.get("iterations", 0) for record in self._metrics["recovery_history"] + ) + return round(total_iterations / len(self._metrics["recovery_history"]), 2) + + def get_average_duration(self) -> float: + """ + Calculate average duration of recovery attempts. + + Returns: + Average duration in seconds, or 0 if no duration data + """ + durations = [ + record.get("duration_seconds", 0) + for record in self._metrics["recovery_history"] + if "duration_seconds" in record + ] + + if not durations: + return 0.0 + + return round(sum(durations) / len(durations), 2) + + def get_circular_fix_rate(self) -> float: + """ + Calculate rate of circular fix detection. + + Returns: + Circular fix rate as percentage (0-100) + """ + if self._metrics["total_attempts"] == 0: + return 0.0 + + circular = self._metrics["circular_fixes"] + total = self._metrics["total_attempts"] + return round((circular / total) * 100, 2) + + # ------------------------------------------------------------------------- + # SUMMARY METHODS + # ------------------------------------------------------------------------- + + def get_summary(self) -> dict[str, Any]: + """ + Get comprehensive recovery metrics summary. + + Returns: + Dict with all metrics and calculated statistics + """ + return { + "total_attempts": self._metrics["total_attempts"], + "successful_recoveries": self._metrics["successful_recoveries"], + "failed_recoveries": self._metrics["failed_recoveries"], + "circular_fixes": self._metrics["circular_fixes"], + "success_rate_percent": self.get_success_rate(), + "circular_fix_rate_percent": self.get_circular_fix_rate(), + "average_iterations": self.get_average_iterations(), + "average_duration_seconds": self.get_average_duration(), + "last_updated": self._metrics.get("last_updated"), + } + + def get_recent_history(self, limit: int = 5) -> list[dict[str, Any]]: + """ + Get recent recovery attempts. + + Args: + limit: Maximum number of recent attempts to return + + Returns: + List of recent recovery records (most recent first) + """ + return self._metrics["recovery_history"][-limit:][::-1] + + def format_summary(self) -> str: + """ + Format recovery metrics summary as human-readable string. + + Returns: + Formatted summary string + """ + summary = self.get_summary() + + lines = [ + "📊 Recovery Metrics Summary", + "", + f"Total Attempts: {summary['total_attempts']}", + f"Successful Recoveries: {summary['successful_recoveries']}", + f"Failed Recoveries: {summary['failed_recoveries']}", + f"Circular Fixes Detected: {summary['circular_fixes']}", + "", + f"Success Rate: {summary['success_rate_percent']:.1f}%", + f"Circular Fix Rate: {summary['circular_fix_rate_percent']:.1f}%", + f"Avg Iterations per Recovery: {summary['average_iterations']:.1f}", + ] + + if summary["average_duration_seconds"] > 0: + lines.append( + f"Avg Duration: {summary['average_duration_seconds']:.1f} seconds" + ) + + return "\n".join(lines) + + # ------------------------------------------------------------------------- + # UTILITY METHODS + # ------------------------------------------------------------------------- + + def reset_metrics(self) -> bool: + """ + Reset all metrics to zero. + + Returns: + True if reset successfully + """ + self._metrics = self._create_empty_metrics() + return self._save_metrics() + + def get_metrics_file_path(self) -> Path: + """ + Get the path to the metrics file. + + Returns: + Path to recovery_metrics.json + """ + return self._metrics_file + + +# ============================================================================= +# CONVENIENCE FUNCTIONS +# ============================================================================= + + +def load_recovery_metrics(spec_dir: Path) -> RecoveryMetrics: + """ + Load recovery metrics for a spec directory. + + Args: + spec_dir: Spec directory path + + Returns: + RecoveryMetrics instance + """ + return RecoveryMetrics(spec_dir) + + +def record_recovery_outcome( + spec_dir: Path, + outcome: str, + iterations: int, + duration_seconds: float | None = None, + issues_fixed: int = 0, +) -> bool: + """ + Convenience function to record a recovery outcome. + + Args: + spec_dir: Spec directory + outcome: Recovery outcome ("success", "failed", "circular") + iterations: Number of iterations used + duration_seconds: Optional duration + issues_fixed: Number of issues fixed + + Returns: + True if recorded successfully + """ + metrics = RecoveryMetrics(spec_dir) + return metrics.record_attempt(outcome, iterations, duration_seconds, issues_fixed) + + +def get_recovery_summary(spec_dir: Path) -> dict[str, Any]: + """ + Convenience function to get recovery summary. + + Args: + spec_dir: Spec directory + + Returns: + Recovery metrics summary dict + """ + metrics = RecoveryMetrics(spec_dir) + return metrics.get_summary() From 6ef0a5b64df56b0c6a9dcfbb2be1c65901703e63 Mon Sep 17 00:00:00 2001 From: omyag Date: Fri, 6 Feb 2026 12:46:30 +0400 Subject: [PATCH 08/14] auto-claude: subtask-1-8 - Verify user intervention capability with auto-recovery Changes: - Add AUTO_GENERATED_BY_QA_AGENT marker to QA_FIX_REQUEST.md template - Add user intervention documentation to qa_reviewer.md - Update qa_fixer.md to detect and handle user-edited files - Test verify check_user_correction() function works correctly The user intervention capability allows users to: - Manually edit QA_FIX_REQUEST.md to provide guidance - Remove the marker to indicate manual intervention - Override automated QA decisions with their own corrections This integrates seamlessly with the auto-recovery loop - when a user correction is detected, the fixer prioritizes user guidance over automated issue descriptions. --- apps/backend/prompts/qa_fixer.md | 9 +++++++++ apps/backend/prompts/qa_reviewer.md | 17 +++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/apps/backend/prompts/qa_fixer.md b/apps/backend/prompts/qa_fixer.md index 3bd95bdd4..7a127be56 100644 --- a/apps/backend/prompts/qa_fixer.md +++ b/apps/backend/prompts/qa_fixer.md @@ -113,6 +113,15 @@ echo "=== END RECOVERY CONTEXT ===" - Required fixes - Verification criteria +**USER INTERVENTION DETECTION**: +- Check if `QA_FIX_REQUEST.md` contains the marker `` +- If the marker is **MISSING**, the user has manually edited this file +- Treat user-edited files with special attention: + - The user may have corrected misidentified issues + - The user may have added context or specific guidance + - The user may have overridden automated QA decisions + - **Prioritize user guidance over automated issue descriptions** + **RECOVERY AWARENESS**: If you see previous QA fix sessions in the history: - Previous fix attempts FAILED QA validation - Review what was tried before diff --git a/apps/backend/prompts/qa_reviewer.md b/apps/backend/prompts/qa_reviewer.md index a1407763c..9ef70bd1d 100644 --- a/apps/backend/prompts/qa_reviewer.md +++ b/apps/backend/prompts/qa_reviewer.md @@ -581,6 +581,8 @@ Create a fix request file: ```bash cat > QA_FIX_REQUEST.md << 'EOF' + + # QA Fix Request **Status**: REJECTED @@ -605,6 +607,21 @@ Once fixes are complete: 2. QA will automatically re-run 3. Loop continues until approved +--- +## USER INTERVENTION + +If you'd like to provide manual guidance to the fixer: +1. Edit this file directly to modify or add issues +2. Remove the `` marker at the top +3. Save your changes - the QA loop will detect your manual intervention +4. The fixer will use your edited version instead of the original + +This allows you to: +- Correct misidentified issues +- Add missing context +- Provide specific guidance for fixes +- Override automated QA decisions + EOF # Note: QA_FIX_REQUEST.md and implementation_plan.json are in .auto-claude/specs/ (gitignored) From 165c2f9b01019344b5af4455bf3acbe6288031d6 Mon Sep 17 00:00:00 2001 From: omyag Date: Sat, 7 Feb 2026 15:41:58 +0400 Subject: [PATCH 09/14] fix: address PR review comments for auto-recovery loop - Add record_outcome() method to RecoveryManager for updating attempt results - Fix record_attempt() call in fixer.py to include required session and success params - Make _save_metrics() atomic using temp file + os.replace to prevent corruption - Fix record_user_intervention() increment order to be consistent with record_attempt() Co-Authored-By: Claude Opus 4.6 --- apps/backend/qa/fixer.py | 5 ++++- apps/backend/qa/recovery_metrics.py | 15 +++++++++++---- apps/backend/services/recovery.py | 26 ++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py index 081799ceb..0896b4ffb 100644 --- a/apps/backend/qa/fixer.py +++ b/apps/backend/qa/fixer.py @@ -200,7 +200,10 @@ async def run_qa_fixer_session( # Record this attempt with recovery manager recovery_manager.record_attempt( - fixer_subtask_id, approach=f"QA fixer session {fix_session}, iteration {fixer_iteration}" + fixer_subtask_id, + session=fix_session, + success=False, # Will be updated by record_outcome + approach=f"QA fixer session {fix_session}, iteration {fixer_iteration}", ) try: diff --git a/apps/backend/qa/recovery_metrics.py b/apps/backend/qa/recovery_metrics.py index 1507767ec..2636c3c1b 100644 --- a/apps/backend/qa/recovery_metrics.py +++ b/apps/backend/qa/recovery_metrics.py @@ -7,6 +7,7 @@ """ import json +import os from datetime import datetime, timezone from pathlib import Path from typing import Any @@ -86,15 +87,19 @@ def _create_empty_metrics(self) -> dict[str, Any]: def _save_metrics(self) -> bool: """ - Save metrics to recovery_metrics.json. + Save metrics to recovery_metrics.json atomically. + + Uses a temp file + os.replace to prevent corruption on crash. Returns: True if saved successfully """ try: self._metrics["last_updated"] = datetime.now(timezone.utc).isoformat() - with open(self._metrics_file, "w", encoding="utf-8") as f: + tmp_file = self._metrics_file.with_suffix(".json.tmp") + with open(tmp_file, "w", encoding="utf-8") as f: json.dump(self._metrics, f, indent=2, ensure_ascii=False) + os.replace(tmp_file, self._metrics_file) return True except (OSError, TypeError, UnicodeDecodeError): return False @@ -163,8 +168,11 @@ def record_user_intervention(self, iteration: int) -> bool: Returns: True if recorded successfully """ + # Increment first, consistent with record_attempt + self._metrics["total_attempts"] += 1 + record = { - "attempt_number": self._metrics["total_attempts"] + 1, + "attempt_number": self._metrics["total_attempts"], "outcome": "user_intervention", "iterations": iteration, "timestamp": datetime.now(timezone.utc).isoformat(), @@ -172,7 +180,6 @@ def record_user_intervention(self, iteration: int) -> bool: } self._metrics["recovery_history"].append(record) - self._metrics["total_attempts"] += 1 return self._save_metrics() diff --git a/apps/backend/services/recovery.py b/apps/backend/services/recovery.py index 45126df70..ce3ba2067 100644 --- a/apps/backend/services/recovery.py +++ b/apps/backend/services/recovery.py @@ -530,6 +530,32 @@ def get_recovery_hints(self, subtask_id: str) -> list[str]: return hints + def record_outcome( + self, subtask_id: str, success: bool, error: str | None = None + ) -> None: + """ + Record the outcome of the most recent attempt for a subtask. + + Updates the last recorded attempt with the success/failure result. + + Args: + subtask_id: ID of the subtask + success: Whether the attempt succeeded + error: Error message if failed + """ + history = self._load_attempt_history() + subtask_data = history["subtasks"].get(subtask_id) + + if subtask_data and subtask_data["attempts"]: + # Update the last attempt with the outcome + subtask_data["attempts"][-1]["success"] = success + if error: + subtask_data["attempts"][-1]["error"] = error + + # Update subtask status + subtask_data["status"] = "completed" if success else "failed" + self._save_attempt_history(history) + def clear_stuck_subtasks(self) -> None: """Clear all stuck subtasks (for manual resolution).""" history = self._load_attempt_history() From c9955ba3da477db750d0cd226e07b345a7c4564b Mon Sep 17 00:00:00 2001 From: omyag Date: Sat, 7 Feb 2026 15:47:45 +0400 Subject: [PATCH 10/14] fix: resolve cyclic imports and ruff F541 in qa/fixer.py - Move .criteria and .report imports inside run_qa_fixer_session() to break circular import cycle through __init__.py (CodeQL alerts) - Remove f-prefix from strings without placeholders (ruff F541) - Apply ruff format line-length fixes Co-Authored-By: Claude Opus 4.6 --- apps/backend/qa/fixer.py | 67 +++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py index 0896b4ffb..177e94e35 100644 --- a/apps/backend/qa/fixer.py +++ b/apps/backend/qa/fixer.py @@ -24,9 +24,6 @@ get_task_logger, ) -from .criteria import get_qa_signoff_status, is_fixes_applied -from .report import get_iteration_history, has_recurring_issues, record_iteration - # Configuration QA_PROMPTS_DIR = Path(__file__).parent.parent / "prompts" MAX_FIXER_ITERATIONS = 10 # Max recovery attempts for a single QA fix session @@ -72,6 +69,10 @@ async def run_qa_fixer_session( - "fixed" if fixes were applied - "error" if an error occurred """ + # Lazy imports to avoid circular import via __init__.py + from .criteria import get_qa_signoff_status, is_fixes_applied + from .report import get_iteration_history, has_recurring_issues, record_iteration + # Derive project_dir from spec_dir if not provided # spec_dir is typically: /project/.auto-claude/specs/001-name/ if project_dir is None: @@ -120,9 +121,11 @@ async def run_qa_fixer_session( current_issues, iteration_history ) if has_recurring: - print(f"\n⚠️ WARNING: Recurring issues detected!") - print(f" {len(recurring_issues)} issue(s) have appeared multiple times.") - print(f" Consider a different approach or human intervention.\n") + print("\n⚠️ WARNING: Recurring issues detected!") + print( + f" {len(recurring_issues)} issue(s) have appeared multiple times." + ) + print(" Consider a different approach or human intervention.\n") debug_error( "qa_fixer", f"Recurring issues detected: {len(recurring_issues)} issues", @@ -161,10 +164,8 @@ async def run_qa_fixer_session( "qa_fixer", f"Circular fix detected for {fixer_subtask_id} (attempt #{attempt_count})", ) - print(f"\n⚠️ WARNING: Circular fix detected!") - print( - f"This fix has been attempted {attempt_count} times with similar errors." - ) + print("\n⚠️ WARNING: Circular fix detected!") + print(f"This fix has been attempted {attempt_count} times with similar errors.") print("Consider human intervention or a different approach.\n") # Record circular fix outcome recovery_manager.record_outcome( @@ -172,7 +173,10 @@ async def run_qa_fixer_session( success=False, error="Circular fix detected - same fix attempted multiple times", ) - return "circular", "Circular fix detected - same approach attempted multiple times" + return ( + "circular", + "Circular fix detected - same approach attempted multiple times", + ) # Get total iterations from history total_iterations = len(iteration_history) @@ -185,7 +189,9 @@ async def run_qa_fixer_session( if fixer_iteration > 1: print(f"\n{'=' * 70}") - print(f" QA FIXER RECOVERY ATTEMPT {fixer_iteration}/{MAX_FIXER_ITERATIONS}") + print( + f" QA FIXER RECOVERY ATTEMPT {fixer_iteration}/{MAX_FIXER_ITERATIONS}" + ) print(f"{'=' * 70}\n") debug( "qa_fixer", @@ -372,10 +378,14 @@ async def run_qa_fixer_session( # Calculate iteration duration iteration_duration = time.time() - iteration_start_time - debug_success("qa_fixer", "Fixes applied and validated, ready for QA revalidation") - print(f"\n✓ Fixes applied successfully!") + debug_success( + "qa_fixer", "Fixes applied and validated, ready for QA revalidation" + ) + print("\n✓ Fixes applied successfully!") print(f" Duration: {iteration_duration:.1f}s") - print(f" Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n") + print( + f" Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n" + ) # Record successful iteration to history record_iteration( @@ -403,10 +413,14 @@ async def run_qa_fixer_session( # Fixer didn't update the status properly, but we'll trust it worked iteration_duration = time.time() - iteration_start_time - debug_success("qa_fixer", "Fixes assumed applied (status validation failed)") - print(f"\n✓ Fixes applied (status validation skipped)") + debug_success( + "qa_fixer", "Fixes assumed applied (status validation failed)" + ) + print("\n✓ Fixes applied (status validation skipped)") print(f" Duration: {iteration_duration:.1f}s") - print(f" Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n") + print( + f" Recovery iterations: {fixer_iteration}/{MAX_FIXER_ITERATIONS}\n" + ) # Record iteration to history record_iteration( @@ -466,19 +480,26 @@ async def run_qa_fixer_session( "qa_fixer", f"Max fixer iterations ({MAX_FIXER_ITERATIONS}) reached, fixer is stuck", ) - print(f"⚠️ Max recovery attempts ({MAX_FIXER_ITERATIONS}) reached. Fixer stuck.\n") + print( + f"⚠️ Max recovery attempts ({MAX_FIXER_ITERATIONS}) reached. Fixer stuck.\n" + ) # Record failed outcome recovery_manager.record_outcome( fixer_subtask_id, success=False, error=last_error ) - return "stuck", f"Fixer stuck after {MAX_FIXER_ITERATIONS} recovery attempts: {last_error}" + return ( + "stuck", + f"Fixer stuck after {MAX_FIXER_ITERATIONS} recovery attempts: {last_error}", + ) # Otherwise, continue to next iteration debug( "qa_fixer", f"Will retry (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})", ) - print(f" Retrying... (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})\n") + print( + f" Retrying... (attempt {fixer_iteration + 1}/{MAX_FIXER_ITERATIONS})\n" + ) continue # If we exhausted all iterations without success @@ -486,7 +507,9 @@ async def run_qa_fixer_session( "qa_fixer", f"Exhausted all {MAX_FIXER_ITERATIONS} fixer iterations without success", ) - print(f"\n⚠️ Exhausted all {MAX_FIXER_ITERATIONS} recovery attempts without success.\n") + print( + f"\n⚠️ Exhausted all {MAX_FIXER_ITERATIONS} recovery attempts without success.\n" + ) # Record final failure final_error = last_error if last_error else "Max fixer iterations reached" From ce929436b3645347bb52bd223bf973bf80a49678 Mon Sep 17 00:00:00 2001 From: omyag Date: Sat, 7 Feb 2026 16:59:35 +0400 Subject: [PATCH 11/14] style: fix ruff lint errors in loop.py and fixer.py - Fix I001: sort imports (move test_generator, code_analyzer to correct position) - Fix F541: remove f-prefix from strings without placeholders - Fix UP015: remove unnecessary "r" mode argument in open() - Fix cyclic imports in fixer.py (move .criteria/.report to lazy imports) - Apply ruff format line-length fixes Co-Authored-By: Claude Opus 4.6 --- apps/backend/qa/loop.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/backend/qa/loop.py b/apps/backend/qa/loop.py index 95bf04cf4..c2887c1b0 100644 --- a/apps/backend/qa/loop.py +++ b/apps/backend/qa/loop.py @@ -13,8 +13,6 @@ from typing import Any from agents.memory_manager import save_user_correction - -# Test generation imports from agents.test_generator import run_test_generator_session from analysis.code_analyzer import CodeAnalyzer from analysis.failure_analyzer import analyze_failure, is_analysis_enabled @@ -855,7 +853,9 @@ async def run_qa_validation_loop( "Fixer stuck after multiple recovery attempts - escalating to human", ) print("\n⚠️ QA Fixer Stuck") - print(" The fixer attempted multiple recovery approaches but could not resolve the issues.") + print( + " The fixer attempted multiple recovery approaches but could not resolve the issues." + ) print(" Escalating to human review...") record_iteration( spec_dir, From b727feaeb21f97ec8526c22abb27b66ab4e0ed17 Mon Sep 17 00:00:00 2001 From: omyag Date: Sat, 7 Feb 2026 17:12:57 +0400 Subject: [PATCH 12/14] fix: resolve flaky project-store tests on macOS - Add 50ms delay in afterEach before rmSync cleanup to let in-flight async saves complete (fixes ENOTEMPTY on macOS) - Wrap JSON.parse in try-catch inside polling loops to handle partial writes (fixes SyntaxError: Unexpected end of JSON input) Co-Authored-By: Claude Opus 4.6 --- .../src/main/__tests__/project-store.test.ts | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/apps/frontend/src/main/__tests__/project-store.test.ts b/apps/frontend/src/main/__tests__/project-store.test.ts index 9bf350f01..5b3a8e81b 100644 --- a/apps/frontend/src/main/__tests__/project-store.test.ts +++ b/apps/frontend/src/main/__tests__/project-store.test.ts @@ -85,7 +85,11 @@ describe('ProjectStore', () => { vi.resetModules(); }); - afterEach(() => { + afterEach(async () => { + // Wait for any in-flight async saves to complete before cleanup. + // ProjectStore uses fire-and-forget saveAsync() which can still be + // writing to disk when afterEach runs, causing ENOTEMPTY on macOS. + await new Promise(r => setTimeout(r, 50)); cleanupTestDirs(); vi.clearAllMocks(); }); @@ -202,9 +206,13 @@ describe('ProjectStore', () => { const start = Date.now(); let content: { projects: unknown[] } = { projects: [1] }; while (Date.now() - start < 2000) { - const raw = readFileSync(storePath, 'utf-8'); - content = JSON.parse(raw); - if (content.projects.length === 0) break; + try { + const raw = readFileSync(storePath, 'utf-8'); + content = JSON.parse(raw); + if (content.projects.length === 0) break; + } catch { + // File may be partially written - keep polling + } await new Promise(r => setTimeout(r, 10)); } expect(content.projects).toHaveLength(0); @@ -315,9 +323,13 @@ describe('ProjectStore', () => { const start = Date.now(); let content: { projects: Array<{ settings: { model?: string } }> } = { projects: [] }; while (Date.now() - start < 2000) { - const raw = readFileSync(storePath, 'utf-8'); - content = JSON.parse(raw); - if (content.projects[0]?.settings?.model === 'sonnet') break; + try { + const raw = readFileSync(storePath, 'utf-8'); + content = JSON.parse(raw); + if (content.projects[0]?.settings?.model === 'sonnet') break; + } catch { + // File may be partially written - keep polling + } await new Promise(r => setTimeout(r, 10)); } expect(content.projects[0].settings.model).toBe('sonnet'); From f920dd1100621d6df43cf8fd8c2b7c4f59cd39f0 Mon Sep 17 00:00:00 2001 From: omyag Date: Sat, 7 Feb 2026 17:22:42 +0400 Subject: [PATCH 13/14] fix: mock getGitHubTokenForSubprocess in runner-env test to prevent Windows CI timeout The test was calling the real getGitHubTokenForSubprocess() which spawns a gh CLI subprocess. On Windows CI this takes >5s causing test timeout. Co-Authored-By: Claude Opus 4.6 --- .../ipc-handlers/github/utils/__tests__/runner-env.test.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/apps/frontend/src/main/ipc-handlers/github/utils/__tests__/runner-env.test.ts b/apps/frontend/src/main/ipc-handlers/github/utils/__tests__/runner-env.test.ts index d06023007..9397f8c84 100644 --- a/apps/frontend/src/main/ipc-handlers/github/utils/__tests__/runner-env.test.ts +++ b/apps/frontend/src/main/ipc-handlers/github/utils/__tests__/runner-env.test.ts @@ -4,6 +4,7 @@ const mockGetAPIProfileEnv = vi.fn(); const mockGetOAuthModeClearVars = vi.fn(); const mockGetPythonEnv = vi.fn(); const mockGetBestAvailableProfileEnv = vi.fn(); +const mockGetGitHubTokenForSubprocess = vi.fn(); vi.mock('../../../../services/profile', () => ({ getAPIProfileEnv: (...args: unknown[]) => mockGetAPIProfileEnv(...args), @@ -23,11 +24,17 @@ vi.mock('../../../../rate-limit-detector', () => ({ getBestAvailableProfileEnv: () => mockGetBestAvailableProfileEnv(), })); +vi.mock('../utils', () => ({ + getGitHubTokenForSubprocess: () => mockGetGitHubTokenForSubprocess(), +})); + import { getRunnerEnv } from '../runner-env'; describe('getRunnerEnv', () => { beforeEach(() => { vi.clearAllMocks(); + // Default mock for GitHub token - returns null (no gh CLI needed) + mockGetGitHubTokenForSubprocess.mockResolvedValue(null); // Default mock for Python env - minimal env for testing mockGetPythonEnv.mockReturnValue({ PYTHONDONTWRITEBYTECODE: '1', From 35dfddb7523f3a72670853fe2c7f71ac3c59e2a8 Mon Sep 17 00:00:00 2001 From: omyag Date: Sat, 7 Feb 2026 17:30:49 +0400 Subject: [PATCH 14/14] fix: stabilize flaky frontend tests for cross-platform CI - project-store: add waitForStoreInit() before addProject() in updateProjectSettings tests to prevent initializeAsync() race condition on macOS CI - runner-env: mock getGitHubTokenForSubprocess to prevent real gh CLI subprocess spawn causing timeout on Windows CI Co-Authored-By: Claude Opus 4.6 --- apps/frontend/src/main/__tests__/project-store.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/apps/frontend/src/main/__tests__/project-store.test.ts b/apps/frontend/src/main/__tests__/project-store.test.ts index 5b3a8e81b..dcf66f575 100644 --- a/apps/frontend/src/main/__tests__/project-store.test.ts +++ b/apps/frontend/src/main/__tests__/project-store.test.ts @@ -280,6 +280,7 @@ describe('ProjectStore', () => { it('should update settings and return updated project', async () => { const { ProjectStore } = await import('../project-store'); const store = new ProjectStore(); + await waitForStoreInit(); const project = store.addProject(TEST_PROJECT_PATH); const updated = store.updateProjectSettings(project.id, { @@ -295,6 +296,7 @@ describe('ProjectStore', () => { it('should update updatedAt timestamp', async () => { const { ProjectStore } = await import('../project-store'); const store = new ProjectStore(); + await waitForStoreInit(); const project = store.addProject(TEST_PROJECT_PATH); const originalUpdatedAt = project.updatedAt;