From 76b1406b8f89e7610fab2e6f9064379919b06915 Mon Sep 17 00:00:00 2001 From: Connor Tyndall Date: Sun, 11 Jan 2026 18:16:03 -0600 Subject: [PATCH] fix: Use least-tested-first selection for regression testing Replaces random selection in feature_get_for_regression with a least-tested-first approach to ensure even distribution of regression testing across all features. Changes: - Add regression_count column to Feature model to track test frequency - Add database migration for existing databases - Update feature_get_for_regression to order by regression_count (ascending) - Increment regression_count when features are selected for testing This prevents the same features from being tested repeatedly while others are never tested, reducing wasted tokens and ensuring comprehensive regression coverage. Closes leonvanzyl/autocoder#20 Co-Authored-By: Claude Opus 4.5 --- api/database.py | 20 +++++++++++++++++++- mcp_server/feature_mcp.py | 25 +++++++++++++++++++------ 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/api/database.py b/api/database.py index a74b857a..0ce1d4ba 100644 --- a/api/database.py +++ b/api/database.py @@ -29,6 +29,7 @@ class Feature(Base): steps = Column(JSON, nullable=False) # Stored as JSON array passes = Column(Boolean, default=False, index=True) in_progress = Column(Boolean, default=False, index=True) + regression_count = Column(Integer, default=0, index=True) # Track regression test frequency def to_dict(self) -> dict: """Convert feature to dictionary for JSON serialization.""" @@ -41,6 +42,7 @@ def to_dict(self) -> dict: "steps": self.steps, "passes": self.passes, "in_progress": self.in_progress, + "regression_count": self.regression_count or 0, } @@ -73,6 +75,21 @@ def _migrate_add_in_progress_column(engine) -> None: conn.commit() +def _migrate_add_regression_count_column(engine) -> None: + """Add regression_count column to existing databases that don't have it.""" + from sqlalchemy import text + + with engine.connect() as conn: + # Check if column exists + result = conn.execute(text("PRAGMA table_info(features)")) + columns = [row[1] for row in result.fetchall()] + + if "regression_count" not in columns: + # Add the column with default value + conn.execute(text("ALTER TABLE features ADD COLUMN regression_count INTEGER DEFAULT 0")) + conn.commit() + + def create_database(project_dir: Path) -> tuple: """ Create database and return engine + session maker. @@ -87,8 +104,9 @@ def create_database(project_dir: Path) -> tuple: engine = create_engine(db_url, connect_args={"check_same_thread": False}) Base.metadata.create_all(bind=engine) - # Migrate existing databases to add in_progress column + # Migrate existing databases to add new columns _migrate_add_in_progress_column(engine) + _migrate_add_regression_count_column(engine) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) return engine, SessionLocal diff --git a/mcp_server/feature_mcp.py b/mcp_server/feature_mcp.py index 1534bc1b..56962e6b 100755 --- a/mcp_server/feature_mcp.py +++ b/mcp_server/feature_mcp.py @@ -28,7 +28,6 @@ from mcp.server.fastmcp import FastMCP from pydantic import BaseModel, Field -from sqlalchemy.sql.expression import func # Add parent directory to path so we can import from api module sys.path.insert(0, str(Path(__file__).parent.parent)) @@ -178,11 +177,14 @@ def feature_get_next() -> str: def feature_get_for_regression( limit: Annotated[int, Field(default=3, ge=1, le=10, description="Maximum number of passing features to return")] = 3 ) -> str: - """Get random passing features for regression testing. + """Get passing features for regression testing, prioritizing least-tested features. - Returns a random selection of features that are currently passing. - Use this to verify that previously implemented features still work - after making changes. + Returns features that are currently passing, ordered by regression_count (ascending) + so that features tested fewer times are prioritized. This ensures even distribution + of regression testing across all features, avoiding duplicate testing of the same + features while others are never tested. + + Each returned feature has its regression_count incremented to track testing frequency. Args: limit: Maximum number of features to return (1-10, default 3) @@ -192,14 +194,25 @@ def feature_get_for_regression( """ session = get_session() try: + # Select features with lowest regression_count first (least tested) + # Use id as secondary sort for deterministic ordering when counts are equal features = ( session.query(Feature) .filter(Feature.passes == True) - .order_by(func.random()) + .order_by(Feature.regression_count.asc(), Feature.id.asc()) .limit(limit) .all() ) + # Increment regression_count for selected features + for feature in features: + feature.regression_count = (feature.regression_count or 0) + 1 + session.commit() + + # Refresh to get updated counts + for feature in features: + session.refresh(feature) + return json.dumps({ "features": [f.to_dict() for f in features], "count": len(features)