SummerOneTwo · SummerOneTwo · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "autocode",
-  "version": "0.6.0",
+  "version": "0.7.0",
   "description": "Claude Code plugin for competitive programming problem-setting workflows.",
   "author": {
     "name": "SummerOneTwo",

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.7.0] - 2026-04-27
+
+### Features
+
+- **source_path 直接编译**: 当使用 `source_path` 参数时，直接从原始文件编译，不再覆盖到标准位置。标准位置仍保留副本以供其他工具使用。所有构建工具返回 `canonical_path`（标准位置副本）和 `source_path`（实际编译源）。
+- **resolve_source() 公共函数**: 提取 5 个构建工具中的源码解析逻辑到 `mixins.py` 的 `resolve_source()` 函数和 `ResolvedSource` 数据类，消除约 100 行重复代码。
+- **name 参数**: `solution_build` 和 `solution_run` 新增 `name` 参数，支持自定义文件名（如 `name="brute_force"` 替代默认 `brute`）。
+- **sol_name / brute_name**: `stress_test_run` 新增 `sol_name` 和 `brute_name` 参数，支持查找自定义命名的解法二进制文件。
+- **output_dir 参数**: `problem_generate_tests` 新增 `output_dir` 参数，可指定测试数据输出目录（默认 `problem_dir/tests`）。
+- **extra_args 参数**: `stress_test_run`、`generator_run`、`problem_generate_tests` 的 `test_configs` 新增 `extra_args` 参数，支持传递自定义命令行参数给 generator。协议扩展为 `gen.exe <seed> <type> <n_min> <n_max> <t_min> <t_max> [extra_args...]`。
+- **types 参数**: `stress_test_run` 新增 `types` 参数，支持在对拍中循环使用多种生成策略（如 `["1","2","3","4"]`）。
+- **problem_verify_tests 工具**: 新增测试数据验证工具，检查文件配对、答案一致性（重新运行 sol）、validator 验证、无空文件等。
+- **stress_test_run 统计信息**: 对拍通过/失败时返回详细统计，包括 sol/brute 运行时间分布、N 值分布、最慢轮次等。
+- **构建结果透明度**: 所有构建工具返回 `binary_size` 和 `canonical_path`，`source_path` 返回实际编译源文件路径。
+
+### Improvements
+
+- **smart mode 文档**: `problem_generate_tests` 的 `constraints` 参数说明更明确，返回 `effective_test_configs` 展示实际使用的配置。
+- **workflow_guard 自定义命名**: `infer_state()` 支持自定义解法文件名（前缀匹配），新增 `tests_verified` 状态字段。
+- **工作流步骤更新**: 新增 `problem_verify_tests(passed)` 步骤，位于 `problem_generate_tests` 和 `problem_pack_polygon` 之间。
+
 ## [0.6.0] - 2026-04-25
 
 ### Features

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -66,6 +66,7 @@ AutoCode/
 | stress_test_run | 压力测试 |
 | problem_create | 初始化题目 |
 | problem_generate_tests | 生成测试数据 |
+| problem_verify_tests | 验证测试数据质量 |
 | problem_validate | 验证题面样例 |
 | problem_pack_polygon | 打包为 Polygon 格式 |
 
@@ -102,7 +103,8 @@ AutoCode/
 6. 运行压力测试 (`stress_test_run`, completed_rounds == total_rounds)
 7. 按需构建检查器 (`checker_build`, accuracy >= 0.9)
 8. 生成测试数据 (`problem_generate_tests`, generated_test_count > 0)
-9. 打包 Polygon (`problem_pack_polygon`)
+9. 验证测试数据 (`problem_verify_tests`, passed)
+10. 打包 Polygon (`problem_pack_polygon`)
 
 该顺序会被 [hooks/hooks.json](/c:/userProgram/program/AutoCode/hooks/hooks.json) 和 [scripts/workflow_guard.py](/c:/userProgram/program/AutoCode/scripts/workflow_guard.py) 实际强制执行。
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "autocode-mcp"
-version = "0.6.0"
+version = "0.7.0"
 description = "MCP Server for competitive programming problem creation, based on AutoCode paper"
 readme = "README.md"
 requires-python = ">=3.10"

diff --git a/scripts/workflow_guard.py b/scripts/workflow_guard.py
@@ -36,11 +36,12 @@ def state_file(problem_dir: str) -> Path:
 
 def infer_state(problem_dir: str) -> dict[str, Any]:
     root = Path(problem_dir)
+    solutions_dir = root / "solutions"
     return {
         "problem_dir": str(root),
         "created": root.exists() and (root / "files").exists() and (root / "solutions").exists(),
-        "sol_built": (root / "solutions" / "sol.cpp").exists() or any(root.glob("solutions/sol.*")),
-        "brute_built": (root / "solutions" / "brute.cpp").exists() or any(root.glob("solutions/brute.*")),
+        "sol_built": _has_solution(solutions_dir, "sol"),
+        "brute_built": _has_solution(solutions_dir, "brute"),
         "validator_ready": (root / "files" / "val.cpp").exists() or any(root.glob("files/val.*")),
         "validator_accuracy": None,
         "generator_built": (root / "files" / "gen.cpp").exists() or any(root.glob("files/gen.*")),
@@ -54,10 +55,25 @@ def infer_state(problem_dir: str) -> dict[str, Any]:
         "validation_passed": False,
         "tests_generated": any((root / "tests").glob("*.in")) if (root / "tests").exists() else False,
         "generated_test_count": len(list((root / "tests").glob("*.in"))) if (root / "tests").exists() else 0,
+        "tests_verified": False,
         "packaged": (root / "problem.xml").exists(),
     }
 
 
+def _has_solution(solutions_dir: Path, prefix: str) -> bool:
+    """检查 solutions/ 下是否有指定前缀的解法文件（支持自定义命名）。"""
+    if not solutions_dir.exists():
+        return False
+    # 精确匹配（如 sol.cpp, brute.cpp）
+    if (solutions_dir / f"{prefix}.cpp").exists():
+        return True
+    # 前缀匹配（如 brute_force.cpp）
+    for f in solutions_dir.iterdir():
+        if f.is_file() and f.stem.startswith(prefix) and f.suffix == ".cpp":
+            return True
+    return False
+
+
 def load_state(problem_dir: str) -> dict[str, Any]:
     path = state_file(problem_dir)
     if path.exists():
@@ -120,7 +136,7 @@ def pre_tool(payload: dict[str, Any]) -> int:
         "checker_build": "必须先通过 stress_test_run（completed_rounds == total_rounds），再构建 checker。",
         "problem_validate": "必须先通过 stress_test_run（completed_rounds == total_rounds），再进行验证。",
         "problem_generate_tests": "必须先通过 problem_validate（验证通过），才能生成最终测试数据。",
-        "problem_pack_polygon": "必须先生成最终测试数据，并且生成数量 > 0，再进行打包。",
+        "problem_pack_polygon": "必须先生成最终测试数据并通过 problem_verify_tests(passed)，再进行打包。",
     }
 
     tool_input = payload.get("tool_input", {})
@@ -169,6 +185,7 @@ def pre_tool(payload: dict[str, Any]) -> int:
 
     if short_name == "problem_pack_polygon" and not (
         state["tests_generated"] and state.get("generated_test_count", 0) > 0
+        and state.get("tests_verified", False)
     ):
         deny(reasons["problem_pack_polygon"])
         return 0
@@ -194,6 +211,12 @@ def post_tool(payload: dict[str, Any]) -> int:
         save_state(problem_dir, state)
         return 0
 
+    if short_name == "problem_verify_tests" and not success:
+        state = load_state(problem_dir)
+        state["tests_verified"] = False
+        save_state(problem_dir, state)
+        return 0
+
     if not success:
         return 0
 
@@ -229,6 +252,9 @@ def post_tool(payload: dict[str, Any]) -> int:
         generated_tests = data.get("generated_tests", [])
         state["tests_generated"] = bool(generated_tests)
         state["generated_test_count"] = len(generated_tests)
+        state["tests_verified"] = False
+    elif short_name == "problem_verify_tests":
+        state["tests_verified"] = bool(data.get("passed", False))
     elif short_name == "problem_pack_polygon":
         state["packaged"] = True
 
@@ -244,7 +270,8 @@ def session_start() -> int:
         "stress_test_run(completed_rounds == total_rounds) -> "
         "checker_build if needed (accuracy >= 0.9) -> "
         "problem_validate(validation_passed) -> "
-        "problem_generate_tests(generated_test_count > 0) -> problem_pack_polygon. "
+        "problem_generate_tests(generated_test_count > 0) -> "
+        "problem_verify_tests(passed) -> problem_pack_polygon. "
         "If a hook blocks a step, complete the missing prerequisite instead of retrying blindly."
     )
     print(

diff --git a/src/autocode_mcp/__init__.py b/src/autocode_mcp/__init__.py
@@ -6,7 +6,7 @@
 """
 import os
 
-__version__ = "0.6.0"
+__version__ = "0.7.0"
 
 # 获取 templates 目录路径（包内目录）
 _PACKAGE_DIR = os.path.dirname(__file__)

diff --git a/src/autocode_mcp/server.py b/src/autocode_mcp/server.py
@@ -1,7 +1,7 @@
 """
 MCP Server 入口。
 
-提供 15 个原子工具，基于 AutoCode 论文框架。
+提供 17 个原子工具，基于 AutoCode 论文框架。
 """
 
 from __future__ import annotations
@@ -35,6 +35,7 @@
 from .tools.problem import ProblemCreateTool, ProblemGenerateTestsTool, ProblemPackPolygonTool
 from .tools.solution import SolutionBuildTool, SolutionRunTool
 from .tools.stress_test import StressTestRunTool
+from .tools.test_verify import ProblemVerifyTestsTool
 from .tools.validation import ProblemValidateTool
 from .tools.validator import ValidatorBuildTool, ValidatorSelectTool
 
@@ -67,6 +68,7 @@ def register_all_tools() -> None:
     # Problem 工具组
     register_tool(ProblemCreateTool())
     register_tool(ProblemGenerateTestsTool())
+    register_tool(ProblemVerifyTestsTool())
     register_tool(ProblemPackPolygonTool())
     register_tool(ProblemValidateTool())
 

diff --git a/src/autocode_mcp/tools/checker.py b/src/autocode_mcp/tools/checker.py
@@ -11,7 +11,7 @@
 from ..utils.compiler import run_binary_with_args
 from ..utils.platform import get_exe_extension
 from .base import Tool, ToolResult
-from .mixins import BuildToolMixin
+from .mixins import BuildToolMixin, resolve_source
 
 
 class CheckerBuildTool(Tool, BuildToolMixin):
@@ -91,58 +91,45 @@ async def execute(
         compiler: str = "g++",
     ) -> ToolResult:
         """执行 Checker 构建。"""
-        # 解析源代码：source_path 优先于 code
-        source_dir = None
-        if source_path:
-            if not os.path.isabs(source_path):
-                source_path = os.path.join(problem_dir, source_path)
-            if not os.path.exists(source_path):
-                return ToolResult.fail(f"Source file not found: {source_path}")
-            try:
-                with open(source_path, encoding="utf-8") as f:
-                    code = f.read()
-            except UnicodeDecodeError:
-                try:
-                    with open(source_path, encoding="latin-1") as f:
-                        code = f.read()
-                except Exception as e:
-                    return ToolResult.fail(f"Failed to read source file: {e}")
-            source_dir = os.path.dirname(os.path.abspath(source_path))
-        elif code is None:
-            return ToolResult.fail("Either 'code' or 'source_path' must be provided")
+        resolved, err = resolve_source(problem_dir, code, source_path)
+        if err is not None:
+            return err
+        assert resolved is not None
 
         os.makedirs(problem_dir, exist_ok=True)
-
-        # 保存到 files/ 子目录
         files_dir = os.path.join(problem_dir, "files")
         os.makedirs(files_dir, exist_ok=True)
 
-        # 保存代码
-        source_path = os.path.join(files_dir, "checker.cpp")
+        canonical_path = os.path.join(files_dir, "checker.cpp")
         try:
-            with open(source_path, "w", encoding="utf-8") as f:
-                f.write(code)
+            with open(canonical_path, "w", encoding="utf-8") as f:
+                f.write(resolved.code)
         except Exception as e:
             return ToolResult.fail(f"Failed to save code: {str(e)}")
 
-        # 编译
         binary_path = os.path.join(files_dir, f"checker{get_exe_extension()}")
 
-        include_dirs = [source_dir] if source_dir else None
-        compile_result = await self.build(source_path, binary_path, compiler=compiler, include_dirs=include_dirs)
+        compile_source = resolved.original_source_path or canonical_path
+        include_dirs = [resolved.include_dir] if resolved.include_dir else None
+        compile_result = await self.build(compile_source, binary_path, compiler=compiler, include_dirs=include_dirs)
 
         if not compile_result.success:
             return ToolResult.fail(
                 f"Compilation failed: {compile_result.error}",
-                source_path=source_path,
+                source_path=compile_source,
+                canonical_path=canonical_path,
                 compile_log=compile_result.stderr,
             )
 
+        binary_size = os.path.getsize(binary_path) if os.path.exists(binary_path) else 0
+
         # 如果没有测试场景，直接返回成功
         if not test_scenarios:
             return ToolResult.ok(
-                source_path=source_path,
+                source_path=compile_source,
+                canonical_path=canonical_path,
                 binary_path=binary_path,
+                binary_size=binary_size,
                 compile_log=compile_result.stderr,
                 message="Checker built successfully (no test scenarios provided)",
             )
@@ -214,8 +201,10 @@ async def execute(
         accuracy = correct_count / total if total > 0 else 0
 
         return ToolResult.ok(
-            source_path=source_path,
+            source_path=compile_source,
+            canonical_path=canonical_path,
             binary_path=binary_path,
+            binary_size=binary_size,
             compile_log=compile_result.stderr,
             test_results=test_results,
             correct_count=correct_count,