Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude-plugin/plugin.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "autocode",
"version": "0.6.0",
"version": "0.7.0",
"description": "Claude Code plugin for competitive programming problem-setting workflows.",
"author": {
"name": "SummerOneTwo",
Expand Down
21 changes: 21 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.7.0] - 2026-04-27

### Features

- **source_path 直接编译**: 当使用 `source_path` 参数时,直接从原始文件编译,不再覆盖到标准位置。标准位置仍保留副本以供其他工具使用。所有构建工具返回 `canonical_path`(标准位置副本)和 `source_path`(实际编译源)。
- **resolve_source() 公共函数**: 提取 5 个构建工具中的源码解析逻辑到 `mixins.py` 的 `resolve_source()` 函数和 `ResolvedSource` 数据类,消除约 100 行重复代码。
- **name 参数**: `solution_build` 和 `solution_run` 新增 `name` 参数,支持自定义文件名(如 `name="brute_force"` 替代默认 `brute`)。
- **sol_name / brute_name**: `stress_test_run` 新增 `sol_name` 和 `brute_name` 参数,支持查找自定义命名的解法二进制文件。
- **output_dir 参数**: `problem_generate_tests` 新增 `output_dir` 参数,可指定测试数据输出目录(默认 `problem_dir/tests`)。
- **extra_args 参数**: `stress_test_run`、`generator_run`、`problem_generate_tests` 的 `test_configs` 新增 `extra_args` 参数,支持传递自定义命令行参数给 generator。协议扩展为 `gen.exe <seed> <type> <n_min> <n_max> <t_min> <t_max> [extra_args...]`。
- **types 参数**: `stress_test_run` 新增 `types` 参数,支持在对拍中循环使用多种生成策略(如 `["1","2","3","4"]`)。
- **problem_verify_tests 工具**: 新增测试数据验证工具,检查文件配对、答案一致性(重新运行 sol)、validator 验证、无空文件等。
- **stress_test_run 统计信息**: 对拍通过/失败时返回详细统计,包括 sol/brute 运行时间分布、N 值分布、最慢轮次等。
- **构建结果透明度**: 所有构建工具返回 `binary_size` 和 `canonical_path`,`source_path` 返回实际编译源文件路径。

### Improvements

- **smart mode 文档**: `problem_generate_tests` 的 `constraints` 参数说明更明确,返回 `effective_test_configs` 展示实际使用的配置。
- **workflow_guard 自定义命名**: `infer_state()` 支持自定义解法文件名(前缀匹配),新增 `tests_verified` 状态字段。
- **工作流步骤更新**: 新增 `problem_verify_tests(passed)` 步骤,位于 `problem_generate_tests` 和 `problem_pack_polygon` 之间。

## [0.6.0] - 2026-04-25

### Features
Expand Down
4 changes: 3 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ AutoCode/
| stress_test_run | 压力测试 |
| problem_create | 初始化题目 |
| problem_generate_tests | 生成测试数据 |
| problem_verify_tests | 验证测试数据质量 |
| problem_validate | 验证题面样例 |
| problem_pack_polygon | 打包为 Polygon 格式 |

Expand Down Expand Up @@ -102,7 +103,8 @@ AutoCode/
6. 运行压力测试 (`stress_test_run`, completed_rounds == total_rounds)
7. 按需构建检查器 (`checker_build`, accuracy >= 0.9)
8. 生成测试数据 (`problem_generate_tests`, generated_test_count > 0)
9. 打包 Polygon (`problem_pack_polygon`)
9. 验证测试数据 (`problem_verify_tests`, passed)
10. 打包 Polygon (`problem_pack_polygon`)

该顺序会被 [hooks/hooks.json](/c:/userProgram/program/AutoCode/hooks/hooks.json) 和 [scripts/workflow_guard.py](/c:/userProgram/program/AutoCode/scripts/workflow_guard.py) 实际强制执行。

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "autocode-mcp"
version = "0.6.0"
version = "0.7.0"
description = "MCP Server for competitive programming problem creation, based on AutoCode paper"
readme = "README.md"
requires-python = ">=3.10"
Expand Down
35 changes: 31 additions & 4 deletions scripts/workflow_guard.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@ def state_file(problem_dir: str) -> Path:

def infer_state(problem_dir: str) -> dict[str, Any]:
root = Path(problem_dir)
solutions_dir = root / "solutions"
return {
"problem_dir": str(root),
"created": root.exists() and (root / "files").exists() and (root / "solutions").exists(),
"sol_built": (root / "solutions" / "sol.cpp").exists() or any(root.glob("solutions/sol.*")),
"brute_built": (root / "solutions" / "brute.cpp").exists() or any(root.glob("solutions/brute.*")),
"sol_built": _has_solution(solutions_dir, "sol"),
"brute_built": _has_solution(solutions_dir, "brute"),
"validator_ready": (root / "files" / "val.cpp").exists() or any(root.glob("files/val.*")),
"validator_accuracy": None,
"generator_built": (root / "files" / "gen.cpp").exists() or any(root.glob("files/gen.*")),
Expand All @@ -54,10 +55,25 @@ def infer_state(problem_dir: str) -> dict[str, Any]:
"validation_passed": False,
"tests_generated": any((root / "tests").glob("*.in")) if (root / "tests").exists() else False,
"generated_test_count": len(list((root / "tests").glob("*.in"))) if (root / "tests").exists() else 0,
"tests_verified": False,
"packaged": (root / "problem.xml").exists(),
}
Comment on lines 56 to 60
Copy link

Copilot AI Apr 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

workflow_guard introduces state["tests_verified"] and advertises a required problem_verify_tests step, but the state is never updated in post_tool() and pre_tool() still allows problem_pack_polygon after only tests_generated. This makes the new verification step unenforced in the guard. Update post_tool() to set tests_verified based on problem_verify_tests output (e.g., data.get("passed")) and gate problem_pack_polygon on tests_verified.

Copilot uses AI. Check for mistakes.


def _has_solution(solutions_dir: Path, prefix: str) -> bool:
"""检查 solutions/ 下是否有指定前缀的解法文件(支持自定义命名)。"""
if not solutions_dir.exists():
return False
# 精确匹配(如 sol.cpp, brute.cpp)
if (solutions_dir / f"{prefix}.cpp").exists():
return True
# 前缀匹配(如 brute_force.cpp)
for f in solutions_dir.iterdir():
if f.is_file() and f.stem.startswith(prefix) and f.suffix == ".cpp":
return True
return False


def load_state(problem_dir: str) -> dict[str, Any]:
path = state_file(problem_dir)
if path.exists():
Expand Down Expand Up @@ -120,7 +136,7 @@ def pre_tool(payload: dict[str, Any]) -> int:
"checker_build": "必须先通过 stress_test_run(completed_rounds == total_rounds),再构建 checker。",
"problem_validate": "必须先通过 stress_test_run(completed_rounds == total_rounds),再进行验证。",
"problem_generate_tests": "必须先通过 problem_validate(验证通过),才能生成最终测试数据。",
"problem_pack_polygon": "必须先生成最终测试数据,并且生成数量 > 0,再进行打包。",
"problem_pack_polygon": "必须先生成最终测试数据并通过 problem_verify_tests(passed),再进行打包。",
}

tool_input = payload.get("tool_input", {})
Expand Down Expand Up @@ -169,6 +185,7 @@ def pre_tool(payload: dict[str, Any]) -> int:

if short_name == "problem_pack_polygon" and not (
state["tests_generated"] and state.get("generated_test_count", 0) > 0
and state.get("tests_verified", False)
):
deny(reasons["problem_pack_polygon"])
return 0
Expand All @@ -194,6 +211,12 @@ def post_tool(payload: dict[str, Any]) -> int:
save_state(problem_dir, state)
return 0

if short_name == "problem_verify_tests" and not success:
state = load_state(problem_dir)
state["tests_verified"] = False
save_state(problem_dir, state)
return 0

if not success:
return 0

Expand Down Expand Up @@ -229,6 +252,9 @@ def post_tool(payload: dict[str, Any]) -> int:
generated_tests = data.get("generated_tests", [])
state["tests_generated"] = bool(generated_tests)
state["generated_test_count"] = len(generated_tests)
state["tests_verified"] = False
elif short_name == "problem_verify_tests":
state["tests_verified"] = bool(data.get("passed", False))
elif short_name == "problem_pack_polygon":
state["packaged"] = True

Expand All @@ -244,7 +270,8 @@ def session_start() -> int:
"stress_test_run(completed_rounds == total_rounds) -> "
"checker_build if needed (accuracy >= 0.9) -> "
"problem_validate(validation_passed) -> "
"problem_generate_tests(generated_test_count > 0) -> problem_pack_polygon. "
"problem_generate_tests(generated_test_count > 0) -> "
"problem_verify_tests(passed) -> problem_pack_polygon. "
"If a hook blocks a step, complete the missing prerequisite instead of retrying blindly."
)
print(
Expand Down
2 changes: 1 addition & 1 deletion src/autocode_mcp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""
import os

__version__ = "0.6.0"
__version__ = "0.7.0"

# 获取 templates 目录路径(包内目录)
_PACKAGE_DIR = os.path.dirname(__file__)
Expand Down
4 changes: 3 additions & 1 deletion src/autocode_mcp/server.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
MCP Server 入口。

提供 15 个原子工具,基于 AutoCode 论文框架。
提供 17 个原子工具,基于 AutoCode 论文框架。
"""

from __future__ import annotations
Expand Down Expand Up @@ -35,6 +35,7 @@
from .tools.problem import ProblemCreateTool, ProblemGenerateTestsTool, ProblemPackPolygonTool
from .tools.solution import SolutionBuildTool, SolutionRunTool
from .tools.stress_test import StressTestRunTool
from .tools.test_verify import ProblemVerifyTestsTool
from .tools.validation import ProblemValidateTool
from .tools.validator import ValidatorBuildTool, ValidatorSelectTool

Expand Down Expand Up @@ -67,6 +68,7 @@ def register_all_tools() -> None:
# Problem 工具组
register_tool(ProblemCreateTool())
register_tool(ProblemGenerateTestsTool())
register_tool(ProblemVerifyTestsTool())
register_tool(ProblemPackPolygonTool())
register_tool(ProblemValidateTool())

Expand Down
53 changes: 21 additions & 32 deletions src/autocode_mcp/tools/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ..utils.compiler import run_binary_with_args
from ..utils.platform import get_exe_extension
from .base import Tool, ToolResult
from .mixins import BuildToolMixin
from .mixins import BuildToolMixin, resolve_source


class CheckerBuildTool(Tool, BuildToolMixin):
Expand Down Expand Up @@ -91,58 +91,45 @@ async def execute(
compiler: str = "g++",
) -> ToolResult:
"""执行 Checker 构建。"""
# 解析源代码:source_path 优先于 code
source_dir = None
if source_path:
if not os.path.isabs(source_path):
source_path = os.path.join(problem_dir, source_path)
if not os.path.exists(source_path):
return ToolResult.fail(f"Source file not found: {source_path}")
try:
with open(source_path, encoding="utf-8") as f:
code = f.read()
except UnicodeDecodeError:
try:
with open(source_path, encoding="latin-1") as f:
code = f.read()
except Exception as e:
return ToolResult.fail(f"Failed to read source file: {e}")
source_dir = os.path.dirname(os.path.abspath(source_path))
elif code is None:
return ToolResult.fail("Either 'code' or 'source_path' must be provided")
resolved, err = resolve_source(problem_dir, code, source_path)
if err is not None:
return err
assert resolved is not None

os.makedirs(problem_dir, exist_ok=True)

# 保存到 files/ 子目录
files_dir = os.path.join(problem_dir, "files")
os.makedirs(files_dir, exist_ok=True)

# 保存代码
source_path = os.path.join(files_dir, "checker.cpp")
canonical_path = os.path.join(files_dir, "checker.cpp")
try:
with open(source_path, "w", encoding="utf-8") as f:
f.write(code)
with open(canonical_path, "w", encoding="utf-8") as f:
f.write(resolved.code)
except Exception as e:
return ToolResult.fail(f"Failed to save code: {str(e)}")

# 编译
binary_path = os.path.join(files_dir, f"checker{get_exe_extension()}")

include_dirs = [source_dir] if source_dir else None
compile_result = await self.build(source_path, binary_path, compiler=compiler, include_dirs=include_dirs)
compile_source = resolved.original_source_path or canonical_path
include_dirs = [resolved.include_dir] if resolved.include_dir else None
compile_result = await self.build(compile_source, binary_path, compiler=compiler, include_dirs=include_dirs)

if not compile_result.success:
return ToolResult.fail(
f"Compilation failed: {compile_result.error}",
source_path=source_path,
source_path=compile_source,
canonical_path=canonical_path,
compile_log=compile_result.stderr,
)

binary_size = os.path.getsize(binary_path) if os.path.exists(binary_path) else 0

# 如果没有测试场景,直接返回成功
if not test_scenarios:
return ToolResult.ok(
source_path=source_path,
source_path=compile_source,
canonical_path=canonical_path,
binary_path=binary_path,
binary_size=binary_size,
compile_log=compile_result.stderr,
message="Checker built successfully (no test scenarios provided)",
)
Expand Down Expand Up @@ -214,8 +201,10 @@ async def execute(
accuracy = correct_count / total if total > 0 else 0

return ToolResult.ok(
source_path=source_path,
source_path=compile_source,
canonical_path=canonical_path,
binary_path=binary_path,
binary_size=binary_size,
compile_log=compile_result.stderr,
test_results=test_results,
correct_count=correct_count,
Expand Down
Loading
Loading