diff --git a/.claude/templates/auto_improve_prompt.template.md b/.claude/templates/auto_improve_prompt.template.md new file mode 100644 index 00000000..d9ac67b9 --- /dev/null +++ b/.claude/templates/auto_improve_prompt.template.md @@ -0,0 +1,160 @@ +## YOUR ROLE - AUTO-IMPROVE AGENT + +You are running in **auto-improve mode**. Your entire job this session is to make the application **meaningfully better** in exactly ONE way. The project is already finished — all existing features pass. You are here to polish, enhance, and evolve it. + +This is a FRESH context window. You have no memory of previous sessions. Previous auto-improve sessions may have already added improvements. Your job is to pick ONE new improvement, implement it, and commit it. + +### STEP 1: GET YOUR BEARINGS + +Start by orienting yourself: + +```bash +# Understand the project +pwd +ls -la +cat app_spec.txt 2>/dev/null || cat .autoforge/prompts/app_spec.txt 2>/dev/null + +# See what's been done recently (previous auto-improvements, other commits) +git log --oneline -20 + +# See recent progress notes if they exist +tail -200 claude-progress.txt 2>/dev/null || true +``` + +Then use MCP tools to check feature status: + +``` +Use the feature_get_stats tool +Use the feature_get_summary tool +``` + +You are looking at an app that someone is running in "autopilot polish" mode. Respect what is already there. Read some of the actual source to get a feel for the codebase. + +### STEP 2: CHOOSE ONE MEANINGFUL IMPROVEMENT + +Brainstorm silently, then pick exactly ONE improvement. Valid categories: + +- **Performance** — cache a hot path, remove an N+1, memoize an expensive component, debounce a noisy handler +- **UX / UI polish** — empty states, loading states, error states, keyboard shortcuts, micro-interactions, accessibility +- **Visual design** — spacing, typography, color hierarchy, alignment, iconography +- **Small new feature** — a natural next step that fits the app's purpose +- **Security hardening** — input validation, authorization checks, rate limits, secret handling +- **Refactor for clarity** — extract a confused function, rename a misleading variable, split a file that has outgrown itself +- **Accessibility** — focus rings, aria-labels, keyboard navigation, color contrast +- **Dependency / config** — bump a safe dep, tighten a lint rule that would catch a real class of bugs + +**Choose deliberately:** +- The improvement must be genuinely useful to an end user or to future developers. +- Prefer improvements that complement what's already there over inventing new scope. +- If the app has obvious rough edges, fix those first before inventing new features. +- Do NOT touch any feature on the Kanban that is currently `in_progress` — leave it alone. +- Avoid duplicating past improvements (read `git log` to see what's already been done). + +### STEP 3: ADD THE IMPROVEMENT AS A FEATURE + +Call the `feature_create` MCP tool with: + +- `category`: e.g., `"Performance"`, `"UX Polish"`, `"Security"`, `"Refactor"`, `"Accessibility"`, `"New Feature"` +- `name`: a short imperative title, e.g., `"Add empty state to project list"` +- `description`: 1-3 sentences explaining what the change is and why it matters +- `steps`: 3-5 concrete acceptance steps (what must be true when this is done) + +**Record the returned feature ID.** You will use it in later steps. Then mark it in progress: + +``` +Use the feature_mark_in_progress tool with feature_id={your_new_id} +``` + +### STEP 4: IMPLEMENT THE IMPROVEMENT + +Implement the change fully. Keep scope tight: + +- Edit only the files you need to change. +- Don't add speculative abstractions or "while I'm here" refactors. +- Don't add comments/docstrings to code you didn't touch. +- Don't rename things that don't need renaming. +- If you discover a bug that is NOT your chosen improvement, leave it alone (or note it in `claude-progress.txt` for a future session). + +If your improvement is a UI change, actually look at the result — take a screenshot with `playwright-cli` if the dev server is running, or at minimum open the relevant component and verify your edit makes sense. + +### STEP 5: VERIFY WITH LINT / TYPECHECK / BUILD + +**Mandatory.** Before committing, confirm the code still compiles cleanly. Pick the right commands based on the project type (check `package.json`, `pyproject.toml`, `Cargo.toml`, etc.). + +Typical command sets: + +- **Node / TypeScript / Vite / Next**: `npm run lint && npm run build` + (or `npm run typecheck` if it exists as a separate script) +- **Python**: `ruff check . && mypy .` (or whatever is configured in `pyproject.toml`) +- **Rust**: `cargo check && cargo clippy` +- **Go**: `go vet ./... && go build ./...` + +**Resolve any issues your change introduced.** If lint/typecheck/build was already failing before your change (unrelated breakage), do NOT "fix" the unrelated failures — that's scope creep. Revert your change and pick a different improvement if the codebase is in a broken baseline state. + +### STEP 6: MARK THE FEATURE PASSING + +Call the feature MCP tool: + +``` +Use the feature_mark_passing tool with feature_id={your_new_id} +``` + +### STEP 7: CREATE A COMMIT + +Stage your changes and commit with a **short, concise, TLDR-style message**. One line for the subject, optionally one or two more for the "why". No verbose bullet lists, no trailing summaries. + +```bash +git status +git add +git commit -m "Add empty state to project list when no projects exist" +``` + +Good commit message examples: +- `"Cache project stats query to cut dashboard load time"` +- `"Add keyboard shortcut (Cmd+K) to open command palette"` +- `"Harden upload endpoint against oversized files"` +- `"Extract confused session handling into its own module"` + +Bad commit message examples: +- `"Various improvements"` (too vague) +- `"Made the app better by implementing several changes to improve UX including..."` (too long) + +### STEP 8: EXIT THIS SESSION + +When the commit is created successfully, your work for this session is done. Do NOT try to find a second improvement — one per session is the rule. Stop and let the next scheduled tick handle the next improvement. + +--- + +## GUARDRAILS (READ CAREFULLY) + +1. **One improvement per session.** If you finish early, don't start another. Exit cleanly. +2. **Never skip lint / typecheck / build.** If they fail, fix or revert. +3. **Never commit broken code.** A commit with failing lint/build is worse than no commit. +4. **Don't touch features other agents are working on** (anything with `in_progress=True`). +5. **Don't bypass the feature MCP tools.** Create a real Kanban feature for your change so it shows up in the UI. +6. **Keep commit messages under 72 characters for the subject line.** +7. **Don't add dependencies you don't need.** If the improvement needs a new package, be sure it's justified. +8. **Respect the existing architecture.** Don't rewrite patterns the project has already committed to. + +--- + +## BROWSER AUTOMATION (OPTIONAL) + +If your improvement is visual and the dev server is running, you may use `playwright-cli` to verify it renders correctly: + +- Open: `playwright-cli open http://localhost:PORT` +- Screenshot: `playwright-cli screenshot` +- Read the screenshot file to verify visual appearance +- Close: `playwright-cli close` + +Browser verification is **optional** in auto-improve mode. Lint + typecheck + build is mandatory; visual verification is a bonus when relevant. + +--- + +## SUCCESS CRITERIA + +A successful auto-improve session ends with: +1. One new feature on the Kanban, marked passing. +2. A clean git commit with a short TLDR message. +3. No lint / typecheck / build errors introduced. +4. The agent exits cleanly without starting a second improvement. diff --git a/agent.py b/agent.py index c2244390..668391f7 100644 --- a/agent.py +++ b/agent.py @@ -31,6 +31,7 @@ ) from prompts import ( copy_spec_to_project, + get_auto_improve_prompt, get_batch_feature_prompt, get_coding_prompt, get_initializer_prompt, @@ -163,6 +164,7 @@ async def run_autonomous_agent( agent_type: Optional[str] = None, testing_feature_id: Optional[int] = None, testing_feature_ids: Optional[list[int]] = None, + auto_improve: bool = False, ) -> None: """ Run the autonomous agent loop. @@ -177,6 +179,9 @@ async def run_autonomous_agent( agent_type: Type of agent: "initializer", "coding", "testing", or None (auto-detect) testing_feature_id: For testing agents, the pre-claimed feature ID to test (legacy single mode) testing_feature_ids: For testing agents, list of feature IDs to batch test + auto_improve: If True, run in auto-improve mode (agent creates one + improvement feature, implements it, commits, and exits). Takes + precedence over other prompt selection branches. """ print("\n" + "=" * 70) print(" AUTONOMOUS CODING AGENT") @@ -185,6 +190,8 @@ async def run_autonomous_agent( print(f"Model: {model}") if agent_type: print(f"Agent type: {agent_type}") + if auto_improve: + print("Mode: AUTO-IMPROVE (one improvement + commit per session)") if yolo_mode: print("Mode: YOLO (testing agents disabled)") if feature_ids and len(feature_ids) > 1: @@ -240,7 +247,8 @@ async def run_autonomous_agent( # Check if all features are already complete (before starting a new session) # Skip this check if running as initializer (needs to create features first) - if not is_initializer and iteration == 1: + # or auto-improve mode (intentionally runs against finished projects) + if not is_initializer and not auto_improve and iteration == 1: passing, in_progress, total, _nhi = count_passing_tests(project_dir) if total > 0 and passing == total: print("\n" + "=" * 70) @@ -262,7 +270,11 @@ async def run_autonomous_agent( client = create_client(project_dir, model, yolo_mode=yolo_mode, agent_type=agent_type) # Choose prompt based on agent type - if agent_type == "initializer": + # auto_improve takes precedence over other branches — it's a distinct + # mode where the agent creates its own feature before implementing it. + if auto_improve: + prompt = get_auto_improve_prompt(project_dir, yolo_mode=yolo_mode) + elif agent_type == "initializer": prompt = get_initializer_prompt(project_dir) elif agent_type == "testing": prompt = get_testing_prompt(project_dir, testing_feature_id, testing_feature_ids) diff --git a/autonomous_agent_demo.py b/autonomous_agent_demo.py index af95611a..787cdf6e 100644 --- a/autonomous_agent_demo.py +++ b/autonomous_agent_demo.py @@ -186,6 +186,17 @@ def parse_args() -> argparse.Namespace: help="Max features per coding agent batch (1-15, default: 3)", ) + parser.add_argument( + "--auto-improve", + action="store_true", + default=False, + help=( + "Run in auto-improve mode: a single agent session that analyses " + "the codebase, creates one improvement feature, implements it, " + "verifies with lint/typecheck/build, commits, and exits." + ), + ) + return parser.parse_args() @@ -262,7 +273,22 @@ def main() -> None: return try: - if args.agent_type: + if args.auto_improve: + # Auto-improve mode: single agent session, one improvement per run. + # Bypasses the parallel orchestrator entirely — auto-improve is + # always single-agent, single-feature, and exits after one commit. + print("[AUTO-IMPROVE] Starting single-session improvement run...", flush=True) + asyncio.run( + run_autonomous_agent( + project_dir=project_dir, + model=args.model, + max_iterations=1, + yolo_mode=args.yolo, + agent_type="coding", + auto_improve=True, + ) + ) + elif args.agent_type: # Subprocess mode - spawned by orchestrator for a specific role asyncio.run( run_autonomous_agent( diff --git a/prompts.py b/prompts.py index dedead0f..6031080d 100644 --- a/prompts.py +++ b/prompts.py @@ -151,6 +151,30 @@ def get_coding_prompt(project_dir: Path | None = None, yolo_mode: bool = False) return prompt +def get_auto_improve_prompt(project_dir: Path | None = None, yolo_mode: bool = False) -> str: + """Load the auto-improve agent prompt (project-specific if available). + + The auto-improve prompt instructs the agent to analyze an already-finished + project, pick ONE meaningful improvement, create a feature on the Kanban, + implement it, verify with lint/typecheck/build, mark passing, and commit. + + Args: + project_dir: Optional project directory for project-specific prompts + yolo_mode: If True, strip browser automation sections for YOLO-mode + token savings. Browser verification is already optional in + auto-improve mode, so this is a small adjustment. + + Returns: + The auto-improve prompt, optionally stripped of browser testing. + """ + prompt = load_prompt("auto_improve_prompt", project_dir) + + if yolo_mode: + prompt = _strip_browser_testing_sections(prompt) + + return prompt + + def get_testing_prompt( project_dir: Path | None = None, testing_feature_id: int | None = None, diff --git a/registry.py b/registry.py index 8ea1e98d..37a2c851 100644 --- a/registry.py +++ b/registry.py @@ -16,7 +16,7 @@ from pathlib import Path from typing import Any -from sqlalchemy import Column, DateTime, Integer, String, create_engine, text +from sqlalchemy import Boolean, Column, DateTime, Integer, String, create_engine, text from sqlalchemy.orm import DeclarativeBase, sessionmaker # Module logger @@ -119,6 +119,8 @@ class Project(Base): path = Column(String, nullable=False) # POSIX format for cross-platform created_at = Column(DateTime, nullable=False) default_concurrency = Column(Integer, nullable=False, default=3) + auto_improve_enabled = Column(Boolean, nullable=False, default=False) + auto_improve_interval_minutes = Column(Integer, nullable=False, default=10) class Settings(Base): @@ -184,6 +186,7 @@ def _get_engine(): ) Base.metadata.create_all(bind=_engine) _migrate_add_default_concurrency(_engine) + _migrate_add_auto_improve(_engine) _SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=_engine) logger.debug("Initialized registry database at: %s", db_path) @@ -203,6 +206,25 @@ def _migrate_add_default_concurrency(engine) -> None: logger.info("Migrated projects table: added default_concurrency column") +def _migrate_add_auto_improve(engine) -> None: + """Add auto-improve columns if missing (for existing databases).""" + with engine.connect() as conn: + result = conn.execute(text("PRAGMA table_info(projects)")) + columns = [row[1] for row in result.fetchall()] + if "auto_improve_enabled" not in columns: + conn.execute(text( + "ALTER TABLE projects ADD COLUMN auto_improve_enabled INTEGER NOT NULL DEFAULT 0" + )) + conn.commit() + logger.info("Migrated projects table: added auto_improve_enabled column") + if "auto_improve_interval_minutes" not in columns: + conn.execute(text( + "ALTER TABLE projects ADD COLUMN auto_improve_interval_minutes INTEGER NOT NULL DEFAULT 10" + )) + conn.commit() + logger.info("Migrated projects table: added auto_improve_interval_minutes column") + + @contextmanager def _get_session(): """ @@ -359,7 +381,11 @@ def list_registered_projects() -> dict[str, dict[str, Any]]: p.name: { "path": p.path, "created_at": p.created_at.isoformat() if p.created_at else None, - "default_concurrency": getattr(p, 'default_concurrency', 3) or 3 + "default_concurrency": getattr(p, 'default_concurrency', 3) or 3, + "auto_improve_enabled": bool(getattr(p, 'auto_improve_enabled', False)), + "auto_improve_interval_minutes": int( + getattr(p, 'auto_improve_interval_minutes', 10) or 10 + ), } for p in projects } @@ -386,7 +412,11 @@ def get_project_info(name: str) -> dict[str, Any] | None: return { "path": project.path, "created_at": project.created_at.isoformat() if project.created_at else None, - "default_concurrency": getattr(project, 'default_concurrency', 3) or 3 + "default_concurrency": getattr(project, 'default_concurrency', 3) or 3, + "auto_improve_enabled": bool(getattr(project, 'auto_improve_enabled', False)), + "auto_improve_interval_minutes": int( + getattr(project, 'auto_improve_interval_minutes', 10) or 10 + ), } finally: session.close() @@ -464,6 +494,71 @@ def set_project_concurrency(name: str, concurrency: int) -> bool: return True +def get_project_auto_improve(name: str) -> tuple[bool, int]: + """ + Get a project's auto-improve configuration. + + Args: + name: The project name. + + Returns: + Tuple of (enabled, interval_minutes). Defaults to (False, 10) if + the project is not found or the columns are missing. + """ + _, SessionLocal = _get_engine() + session = SessionLocal() + try: + project = session.query(Project).filter(Project.name == name).first() + if project is None: + return (False, 10) + enabled = bool(getattr(project, "auto_improve_enabled", False)) + interval = int(getattr(project, "auto_improve_interval_minutes", 10) or 10) + return (enabled, interval) + finally: + session.close() + + +def set_project_auto_improve( + name: str, + enabled: bool | None = None, + interval_minutes: int | None = None, +) -> bool: + """ + Update a project's auto-improve configuration. + + Either field can be updated independently by passing None for the other. + + Args: + name: The project name. + enabled: If provided, set the enabled flag. + interval_minutes: If provided, set the interval in minutes (1-1440). + + Returns: + True if updated, False if the project wasn't found. + + Raises: + ValueError: If interval_minutes is outside the 1-1440 range. + """ + if interval_minutes is not None and (interval_minutes < 1 or interval_minutes > 1440): + raise ValueError("interval_minutes must be between 1 and 1440") + + with _get_session() as session: + project = session.query(Project).filter(Project.name == name).first() + if not project: + return False + + if enabled is not None: + project.auto_improve_enabled = bool(enabled) + if interval_minutes is not None: + project.auto_improve_interval_minutes = int(interval_minutes) + + logger.info( + "Set project '%s' auto_improve: enabled=%s, interval=%s", + name, enabled, interval_minutes, + ) + return True + + # ============================================================================= # Validation Functions # ============================================================================= diff --git a/server/routers/projects.py b/server/routers/projects.py index 7787ed74..ff0e38b8 100644 --- a/server/routers/projects.py +++ b/server/routers/projects.py @@ -66,10 +66,12 @@ def _get_registry_functions(): sys.path.insert(0, str(root)) from registry import ( + get_project_auto_improve, get_project_concurrency, get_project_path, list_registered_projects, register_project, + set_project_auto_improve, set_project_concurrency, unregister_project, validate_project_path, @@ -82,6 +84,8 @@ def _get_registry_functions(): validate_project_path, get_project_concurrency, set_project_concurrency, + get_project_auto_improve, + set_project_auto_improve, ) @@ -118,7 +122,7 @@ async def list_projects(): _init_imports() assert _check_spec_exists is not None # guaranteed by _init_imports() (_, _, _, list_registered_projects, validate_project_path, - get_project_concurrency, _) = _get_registry_functions() + get_project_concurrency, _, _, _) = _get_registry_functions() projects = list_registered_projects() result = [] @@ -140,6 +144,10 @@ async def list_projects(): has_spec=has_spec, stats=stats, default_concurrency=info.get("default_concurrency", 3), + auto_improve_enabled=bool(info.get("auto_improve_enabled", False)), + auto_improve_interval_minutes=int( + info.get("auto_improve_interval_minutes", 10) or 10 + ), )) return result @@ -151,7 +159,7 @@ async def create_project(project: ProjectCreate): _init_imports() assert _scaffold_project_prompts is not None # guaranteed by _init_imports() (register_project, _, get_project_path, list_registered_projects, - _, _, _) = _get_registry_functions() + _, _, _, _, _) = _get_registry_functions() name = validate_project_name(project.name) project_path = Path(project.path).resolve() @@ -232,7 +240,8 @@ async def get_project(name: str): _init_imports() assert _check_spec_exists is not None # guaranteed by _init_imports() assert _get_project_prompts_dir is not None # guaranteed by _init_imports() - (_, _, get_project_path, _, _, get_project_concurrency, _) = _get_registry_functions() + (_, _, get_project_path, _, _, get_project_concurrency, _, + get_project_auto_improve, _) = _get_registry_functions() name = validate_project_name(name) project_dir = get_project_path(name) @@ -246,6 +255,7 @@ async def get_project(name: str): has_spec = _check_spec_exists(project_dir) stats = get_project_stats(project_dir) prompts_dir = _get_project_prompts_dir(project_dir) + ai_enabled, ai_interval = get_project_auto_improve(name) return ProjectDetail( name=name, @@ -254,6 +264,8 @@ async def get_project(name: str): stats=stats, prompts_dir=str(prompts_dir), default_concurrency=get_project_concurrency(name), + auto_improve_enabled=ai_enabled, + auto_improve_interval_minutes=ai_interval, ) @@ -267,7 +279,7 @@ async def delete_project(name: str, delete_files: bool = False): delete_files: If True, also delete the project directory and files """ _init_imports() - (_, unregister_project, get_project_path, _, _, _, _) = _get_registry_functions() + (_, unregister_project, get_project_path, _, _, _, _, _, _) = _get_registry_functions() name = validate_project_name(name) project_dir = get_project_path(name) @@ -304,7 +316,7 @@ async def get_project_prompts(name: str): """Get the content of project prompt files.""" _init_imports() assert _get_project_prompts_dir is not None # guaranteed by _init_imports() - (_, _, get_project_path, _, _, _, _) = _get_registry_functions() + (_, _, get_project_path, _, _, _, _, _, _) = _get_registry_functions() name = validate_project_name(name) project_dir = get_project_path(name) @@ -338,7 +350,7 @@ async def update_project_prompts(name: str, prompts: ProjectPromptsUpdate): """Update project prompt files.""" _init_imports() assert _get_project_prompts_dir is not None # guaranteed by _init_imports() - (_, _, get_project_path, _, _, _, _) = _get_registry_functions() + (_, _, get_project_path, _, _, _, _, _, _) = _get_registry_functions() name = validate_project_name(name) project_dir = get_project_path(name) @@ -368,7 +380,7 @@ def write_file(filename: str, content: str | None): async def get_project_stats_endpoint(name: str): """Get current progress statistics for a project.""" _init_imports() - (_, _, get_project_path, _, _, _, _) = _get_registry_functions() + (_, _, get_project_path, _, _, _, _, _, _) = _get_registry_functions() name = validate_project_name(name) project_dir = get_project_path(name) @@ -395,7 +407,7 @@ async def reset_project(name: str, full_reset: bool = False): Dictionary with list of deleted files and reset type """ _init_imports() - (_, _, get_project_path, _, _, _, _) = _get_registry_functions() + (_, _, get_project_path, _, _, _, _, _, _) = _get_registry_functions() name = validate_project_name(name) project_dir = get_project_path(name) @@ -487,12 +499,13 @@ async def reset_project(name: str, full_reset: bool = False): @router.patch("/{name}/settings", response_model=ProjectDetail) async def update_project_settings(name: str, settings: ProjectSettingsUpdate): - """Update project-level settings (concurrency, etc.).""" + """Update project-level settings (concurrency, auto-improve, etc.).""" _init_imports() assert _check_spec_exists is not None # guaranteed by _init_imports() assert _get_project_prompts_dir is not None # guaranteed by _init_imports() (_, _, get_project_path, _, _, get_project_concurrency, - set_project_concurrency) = _get_registry_functions() + set_project_concurrency, get_project_auto_improve, + set_project_auto_improve) = _get_registry_functions() name = validate_project_name(name) project_dir = get_project_path(name) @@ -509,10 +522,43 @@ async def update_project_settings(name: str, settings: ProjectSettingsUpdate): if not success: raise HTTPException(status_code=500, detail="Failed to update concurrency") + # Update auto-improve config if either field was provided + auto_improve_touched = ( + settings.auto_improve_enabled is not None + or settings.auto_improve_interval_minutes is not None + ) + if auto_improve_touched: + try: + success = set_project_auto_improve( + name, + enabled=settings.auto_improve_enabled, + interval_minutes=settings.auto_improve_interval_minutes, + ) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + if not success: + raise HTTPException(status_code=500, detail="Failed to update auto-improve settings") + + # Sync the scheduler with the new state. + from ..services.scheduler_service import get_scheduler + scheduler = get_scheduler() + ai_enabled, ai_interval = get_project_auto_improve(name) + if ai_enabled: + try: + await scheduler.register_auto_improve(name, project_dir, ai_interval) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to register auto-improve schedule: {e}", + ) + else: + scheduler.remove_auto_improve(name) + # Return updated project details has_spec = _check_spec_exists(project_dir) stats = get_project_stats(project_dir) prompts_dir = _get_project_prompts_dir(project_dir) + ai_enabled, ai_interval = get_project_auto_improve(name) return ProjectDetail( name=name, @@ -521,4 +567,6 @@ async def update_project_settings(name: str, settings: ProjectSettingsUpdate): stats=stats, prompts_dir=str(prompts_dir), default_concurrency=get_project_concurrency(name), + auto_improve_enabled=ai_enabled, + auto_improve_interval_minutes=ai_interval, ) diff --git a/server/schemas.py b/server/schemas.py index abe5bbcf..3619a480 100644 --- a/server/schemas.py +++ b/server/schemas.py @@ -46,6 +46,8 @@ class ProjectSummary(BaseModel): has_spec: bool stats: ProjectStats default_concurrency: int = 3 + auto_improve_enabled: bool = False + auto_improve_interval_minutes: int = 10 class ProjectDetail(BaseModel): @@ -56,6 +58,8 @@ class ProjectDetail(BaseModel): stats: ProjectStats prompts_dir: str default_concurrency: int = 3 + auto_improve_enabled: bool = False + auto_improve_interval_minutes: int = 10 class ProjectPrompts(BaseModel): @@ -75,6 +79,8 @@ class ProjectPromptsUpdate(BaseModel): class ProjectSettingsUpdate(BaseModel): """Request schema for updating project-level settings.""" default_concurrency: int | None = None + auto_improve_enabled: bool | None = None + auto_improve_interval_minutes: int | None = None @field_validator('default_concurrency') @classmethod @@ -83,6 +89,13 @@ def validate_concurrency(cls, v: int | None) -> int | None: raise ValueError("default_concurrency must be between 1 and 5") return v + @field_validator('auto_improve_interval_minutes') + @classmethod + def validate_auto_improve_interval(cls, v: int | None) -> int | None: + if v is not None and (v < 1 or v > 1440): + raise ValueError("auto_improve_interval_minutes must be between 1 and 1440") + return v + # ============================================================================ # Feature Schemas diff --git a/server/services/process_manager.py b/server/services/process_manager.py index 9891916f..0a7b8fee 100644 --- a/server/services/process_manager.py +++ b/server/services/process_manager.py @@ -85,6 +85,7 @@ def __init__( self.parallel_mode: bool = False # Parallel execution mode self.max_concurrency: int | None = None # Max concurrent agents self.testing_agent_ratio: int = 1 # Regression testing agents (0-3) + self.auto_improve: bool = False # Auto-improve mode (single session) # Support multiple callbacks (for multiple WebSocket clients) self._output_callbacks: Set[Callable[[str], Awaitable[None]]] = set() @@ -375,6 +376,7 @@ async def start( playwright_headless: bool = True, batch_size: int = 3, testing_batch_size: int = 3, + auto_improve: bool = False, ) -> tuple[bool, str]: """ Start the agent as a subprocess. @@ -386,6 +388,10 @@ async def start( max_concurrency: Max concurrent coding agents (1-5, default 1) testing_agent_ratio: Number of regression testing agents (0-3, default 1) playwright_headless: If True, run browser in headless mode + auto_improve: If True, run in auto-improve mode. Forces single-agent + execution (concurrency=1, testing_ratio=0) and passes + --auto-improve to the subprocess so it runs exactly one + improvement session and exits. Returns: Tuple of (success, message) @@ -408,12 +414,19 @@ async def start( # Clean up features stuck from a previous crash/stop self._cleanup_stale_features() + # Auto-improve mode forces single-agent execution and skips testing + # agents — the subprocess bypasses the orchestrator entirely. + if auto_improve: + max_concurrency = 1 + testing_agent_ratio = 0 + # Store for status queries self.yolo_mode = yolo_mode self.model = model self.parallel_mode = True # Always True now (unified orchestrator) self.max_concurrency = max_concurrency or 1 self.testing_agent_ratio = testing_agent_ratio + self.auto_improve = auto_improve # Build command - unified orchestrator with --concurrency cmd = [ @@ -432,6 +445,10 @@ async def start( if yolo_mode: cmd.append("--yolo") + # Add --auto-improve flag: bypasses the orchestrator for a one-shot run + if auto_improve: + cmd.append("--auto-improve") + # Add --concurrency flag (unified orchestrator always uses this) cmd.extend(["--concurrency", str(max_concurrency or 1)]) diff --git a/server/services/scheduler_service.py b/server/services/scheduler_service.py index 3e0576d6..f8e06857 100644 --- a/server/services/scheduler_service.py +++ b/server/services/scheduler_service.py @@ -15,6 +15,7 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.triggers.cron import CronTrigger +from apscheduler.triggers.interval import IntervalTrigger # Add parent directory for imports sys.path.insert(0, str(Path(__file__).parent.parent.parent)) @@ -73,19 +74,39 @@ async def stop(self): logger.info("Scheduler service stopped") async def _load_all_schedules(self): - """Load schedules for all registered projects.""" + """Load schedules and auto-improve jobs for all registered projects.""" from registry import list_registered_projects try: projects = list_registered_projects() total_loaded = 0 + total_auto_improve = 0 for project_name, info in projects.items(): project_path = Path(info.get("path", "")) - if project_path.exists(): - count = await self._load_project_schedules(project_name, project_path) - total_loaded += count + if not project_path.exists(): + continue + + # Windowed schedules (cron-based) + count = await self._load_project_schedules(project_name, project_path) + total_loaded += count + + # Auto-improve interval jobs (stored in the registry directly) + if info.get("auto_improve_enabled"): + interval = int(info.get("auto_improve_interval_minutes", 10) or 10) + try: + await self.register_auto_improve(project_name, project_path, interval) + total_auto_improve += 1 + except Exception as e: + logger.error( + f"Failed to register auto-improve for {project_name}: {e}" + ) + if total_loaded > 0: logger.info(f"Loaded {total_loaded} schedule(s) across all projects") + if total_auto_improve > 0: + logger.info( + f"Registered {total_auto_improve} auto-improve job(s) across all projects" + ) except Exception as e: logger.error(f"Error loading schedules: {e}") @@ -205,6 +226,135 @@ def remove_schedule(self, schedule_id: int): else: logger.warning(f"No jobs found to remove for schedule {schedule_id}") + # ------------------------------------------------------------------ + # Auto-improve interval jobs + # ------------------------------------------------------------------ + + @staticmethod + def _auto_improve_job_id(project_name: str) -> str: + return f"auto_improve_{project_name}" + + async def register_auto_improve( + self, + project_name: str, + project_dir: Path, + interval_minutes: int, + ): + """Register or replace the auto-improve interval job for a project. + + Uses APScheduler IntervalTrigger to fire every ``interval_minutes``. + On each tick, a single one-shot auto-improve agent session is started + (unless the project's agent is already running, in which case the tick + is silently skipped). + """ + if interval_minutes < 1 or interval_minutes > 1440: + raise ValueError("interval_minutes must be between 1 and 1440") + + job_id = self._auto_improve_job_id(project_name) + trigger = IntervalTrigger(minutes=interval_minutes) + + self.scheduler.add_job( + self._handle_auto_improve_tick, + trigger, + id=job_id, + args=[project_name, str(project_dir)], + replace_existing=True, + misfire_grace_time=300, + max_instances=1, # Never overlap ticks + coalesce=True, # Collapse missed ticks during long runs + ) + + job = self.scheduler.get_job(job_id) + next_run = job.next_run_time if job else None + logger.info( + f"Registered auto-improve for {project_name}: " + f"every {interval_minutes} min (next: {next_run})" + ) + + def remove_auto_improve(self, project_name: str): + """Remove the auto-improve interval job for a project (no-op if missing).""" + job_id = self._auto_improve_job_id(project_name) + try: + self.scheduler.remove_job(job_id) + logger.info(f"Removed auto-improve job for {project_name}") + except Exception: + logger.debug(f"No auto-improve job to remove for {project_name}") + + async def _handle_auto_improve_tick( + self, + project_name: str, + project_dir_str: str, + ): + """Fire one auto-improve agent session for a project. + + Silently skips if the agent is already running (manual run, another + tick still executing, etc.). The scheduler's ``max_instances=1`` and + ``coalesce=True`` settings make sure ticks never stack up. + """ + logger.info(f"Auto-improve tick for {project_name}") + project_dir = Path(project_dir_str) + + if not project_dir.exists(): + logger.warning( + f"Auto-improve tick: project dir missing for {project_name}, skipping" + ) + return + + try: + from .process_manager import get_manager + + root_dir = Path(__file__).parent.parent.parent + manager = get_manager(project_name, project_dir, root_dir) + + if manager.status in ("running", "paused", "pausing", "paused_graceful"): + logger.info( + f"Auto-improve tick for {project_name}: agent already " + f"{manager.status}, skipping this tick" + ) + return + + # Resolve effective yolo/model from global settings, mirroring the + # agent router's _get_settings_defaults() pattern so auto-improve + # respects whatever the user has configured globally. + yolo_mode, model = self._resolve_agent_defaults() + + logger.info( + f"Starting auto-improve agent for {project_name} " + f"(yolo={yolo_mode}, model={model})" + ) + success, msg = await manager.start( + yolo_mode=yolo_mode, + model=model, + max_concurrency=1, + testing_agent_ratio=0, + playwright_headless=True, + auto_improve=True, + ) + + if success: + logger.info(f"Auto-improve agent started for {project_name}") + else: + logger.warning( + f"Auto-improve agent failed to start for {project_name}: {msg}" + ) + except Exception as e: + logger.error(f"Error in auto-improve tick for {project_name}: {e}") + + @staticmethod + def _resolve_agent_defaults() -> tuple[bool, str]: + """Resolve (yolo_mode, model) from global settings. + + Kept separate from the agent router's helper so the scheduler never + has to import FastAPI routers. Mirrors the parsing behavior of + ``server/routers/agent.py::_get_settings_defaults``. + """ + from registry import DEFAULT_MODEL, get_all_settings + + settings = get_all_settings() + yolo_mode = (settings.get("yolo_mode") or "false").lower() == "true" + model = settings.get("api_model") or settings.get("model", DEFAULT_MODEL) + return yolo_mode, model + async def _handle_scheduled_start( self, project_name: str, schedule_id: int, project_dir_str: str ): diff --git a/ui/src/components/AgentControl.tsx b/ui/src/components/AgentControl.tsx index 7dc7e0a3..bbd1194d 100644 --- a/ui/src/components/AgentControl.tsx +++ b/ui/src/components/AgentControl.tsx @@ -1,5 +1,5 @@ import { useState, useEffect, useRef, useCallback } from 'react' -import { Play, Square, Loader2, GitBranch, Clock, Pause, PlayCircle } from 'lucide-react' +import { Play, Square, Loader2, GitBranch, Clock, Pause, PlayCircle, Sparkles } from 'lucide-react' import { useStartAgent, useStopAgent, @@ -7,10 +7,12 @@ import { useGracefulResumeAgent, useSettings, useUpdateProjectSettings, + useProject, } from '../hooks/useProjects' import { useNextScheduledRun } from '../hooks/useSchedules' import { formatNextRun, formatEndTime } from '../lib/timeUtils' import { ScheduleModal } from './ScheduleModal' +import { AutoImproveModal } from './AutoImproveModal' import type { AgentStatus } from '../lib/types' import { Button } from '@/components/ui/button' import { Badge } from '@/components/ui/badge' @@ -65,8 +67,12 @@ export function AgentControl({ projectName, status, defaultConcurrency = 3 }: Ag const gracefulPause = useGracefulPauseAgent(projectName) const gracefulResume = useGracefulResumeAgent(projectName) const { data: nextRun } = useNextScheduledRun(projectName) + const { data: project } = useProject(projectName) + const autoImproveEnabled = Boolean(project?.auto_improve_enabled) + const autoImproveInterval = project?.auto_improve_interval_minutes ?? 10 const [showScheduleModal, setShowScheduleModal] = useState(false) + const [showAutoImproveModal, setShowAutoImproveModal] = useState(false) const isLoading = startAgent.isPending || stopAgent.isPending || gracefulPause.isPending || gracefulResume.isPending const isRunning = status === 'running' || status === 'paused' || status === 'pausing' || status === 'paused_graceful' @@ -224,6 +230,20 @@ export function AgentControl({ projectName, status, defaultConcurrency = 3 }: Ag > + + {/* Sparkles button to open auto-improve modal */} + {/* Schedule Modal */} @@ -232,6 +252,13 @@ export function AgentControl({ projectName, status, defaultConcurrency = 3 }: Ag isOpen={showScheduleModal} onClose={() => setShowScheduleModal(false)} /> + + {/* Auto-Improve Modal */} + setShowAutoImproveModal(false)} + /> ) } diff --git a/ui/src/components/AutoImproveModal.tsx b/ui/src/components/AutoImproveModal.tsx new file mode 100644 index 00000000..16cd351c --- /dev/null +++ b/ui/src/components/AutoImproveModal.tsx @@ -0,0 +1,166 @@ +/** + * Auto-Improve Modal Component + * + * Configures per-project auto-improve mode: on an interval, the agent + * creates one improvement feature, implements it, verifies, and commits. + * Ticks are silently skipped while the agent is already running. + */ + +import { useState, useEffect } from 'react' +import { Sparkles } from 'lucide-react' +import { useProject, useUpdateProjectSettings } from '../hooks/useProjects' +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogFooter, +} from '@/components/ui/dialog' +import { Button } from '@/components/ui/button' +import { Input } from '@/components/ui/input' +import { Label } from '@/components/ui/label' +import { Switch } from '@/components/ui/switch' +import { Alert, AlertDescription } from '@/components/ui/alert' + +interface AutoImproveModalProps { + projectName: string + isOpen: boolean + onClose: () => void +} + +const PRESETS: { label: string; minutes: number }[] = [ + { label: '1 min', minutes: 1 }, + { label: '5 min', minutes: 5 }, + { label: '10 min', minutes: 10 }, + { label: '30 min', minutes: 30 }, + { label: '1 hour', minutes: 60 }, +] + +export function AutoImproveModal({ projectName, isOpen, onClose }: AutoImproveModalProps) { + const { data: project } = useProject(projectName) + const updateSettings = useUpdateProjectSettings(projectName) + + const [enabled, setEnabled] = useState(false) + const [intervalMinutes, setIntervalMinutes] = useState(10) + const [error, setError] = useState(null) + + // Sync local form state with current project settings when the modal opens + useEffect(() => { + if (isOpen && project) { + setEnabled(Boolean(project.auto_improve_enabled)) + setIntervalMinutes(project.auto_improve_interval_minutes || 10) + setError(null) + } + }, [isOpen, project]) + + const handleSave = async () => { + setError(null) + + if (intervalMinutes < 1 || intervalMinutes > 1440) { + setError('Interval must be between 1 and 1440 minutes') + return + } + + try { + await updateSettings.mutateAsync({ + auto_improve_enabled: enabled, + auto_improve_interval_minutes: intervalMinutes, + }) + onClose() + } catch (err) { + const message = err instanceof Error ? err.message : 'Failed to save settings' + setError(message) + } + } + + const handlePreset = (minutes: number) => { + setIntervalMinutes(minutes) + } + + return ( + !open && onClose()}> + + + + + Auto-Improve + + + +
+ {/* Explanatory copy */} +

+ Auto-Improve runs the agent on a timer. Each tick it analyzes the + codebase, picks one meaningful improvement, adds it to the Kanban, + implements it, and commits with a short TLDR message. Ticks skip + silently while the agent is already running. +

+ + {/* Enable toggle */} +
+
+ +

+ When enabled, the agent will run every {intervalMinutes} minute + {intervalMinutes === 1 ? '' : 's'}. +

+
+ +
+ + {/* Interval input */} +
+ + setIntervalMinutes(Number(e.target.value))} + disabled={!enabled} + className="max-w-[140px]" + /> +
+ {PRESETS.map((preset) => ( + + ))} +
+
+ + {error && ( + + {error} + + )} +
+ + + + + +
+
+ ) +} diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts index 7e5b9e59..ae961208 100644 --- a/ui/src/lib/types.ts +++ b/ui/src/lib/types.ts @@ -16,6 +16,8 @@ export interface ProjectSummary { has_spec: boolean stats: ProjectStats default_concurrency: number + auto_improve_enabled: boolean + auto_improve_interval_minutes: number } export interface ProjectDetail extends ProjectSummary { @@ -669,6 +671,8 @@ export interface SettingsUpdate { export interface ProjectSettingsUpdate { default_concurrency?: number + auto_improve_enabled?: boolean + auto_improve_interval_minutes?: number } // ============================================================================