-
Notifications
You must be signed in to change notification settings - Fork 0
Add Pre-Commit Hook to Validate Sub-CLAUDE.md Completeness #1978
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| #!/usr/bin/env python3 | ||
| """Validate sub-CLAUDE.md file tables cover all .py files in their directories. | ||
|
|
||
| Pre-commit hook (validate-only). Exits 1 with structured messages when a .py | ||
| file exists in a directory whose CLAUDE.md does not mention it. | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import sys | ||
| from pathlib import Path | ||
|
|
||
| PROJECT_ROOT = Path(__file__).resolve().parent.parent | ||
| SRC_ROOT = PROJECT_ROOT / "src" / "autoskillit" | ||
| TESTS_ROOT = PROJECT_ROOT / "tests" | ||
|
|
||
| SRC_EXPECTED = [ | ||
| "core/types/CLAUDE.md", | ||
| "core/runtime/CLAUDE.md", | ||
| "execution/headless/CLAUDE.md", | ||
| "execution/process/CLAUDE.md", | ||
| "execution/session/CLAUDE.md", | ||
| "execution/merge_queue/CLAUDE.md", | ||
| "recipe/rules/CLAUDE.md", | ||
| "server/tools/CLAUDE.md", | ||
| "cli/doctor/CLAUDE.md", | ||
| "cli/fleet/CLAUDE.md", | ||
| "cli/session/CLAUDE.md", | ||
| "cli/ui/CLAUDE.md", | ||
| "cli/update/CLAUDE.md", | ||
| "hooks/guards/CLAUDE.md", | ||
| "hooks/formatters/CLAUDE.md", | ||
| "CLAUDE.md", | ||
| "core/CLAUDE.md", | ||
| "config/CLAUDE.md", | ||
| "pipeline/CLAUDE.md", | ||
| "execution/CLAUDE.md", | ||
| "workspace/CLAUDE.md", | ||
| "planner/CLAUDE.md", | ||
| "recipe/CLAUDE.md", | ||
| "migration/CLAUDE.md", | ||
| "fleet/CLAUDE.md", | ||
| "cli/CLAUDE.md", | ||
| "hooks/CLAUDE.md", | ||
| ] | ||
|
|
||
| TESTS_EXPECTED = [ | ||
| "arch/CLAUDE.md", | ||
| "assets/CLAUDE.md", | ||
| "cli/CLAUDE.md", | ||
| "config/CLAUDE.md", | ||
| "contracts/CLAUDE.md", | ||
| "core/CLAUDE.md", | ||
| "docs/CLAUDE.md", | ||
| "execution/CLAUDE.md", | ||
| "fleet/CLAUDE.md", | ||
| "hooks/CLAUDE.md", | ||
| "infra/CLAUDE.md", | ||
| "migration/CLAUDE.md", | ||
| "pipeline/CLAUDE.md", | ||
| "planner/CLAUDE.md", | ||
| "recipe/CLAUDE.md", | ||
| "server/CLAUDE.md", | ||
| "skills/CLAUDE.md", | ||
| "skills_extended/CLAUDE.md", | ||
| "workspace/CLAUDE.md", | ||
| ] | ||
|
|
||
|
|
||
| def check_coverage(root: Path, expected: list[str]) -> list[str]: | ||
| """Check that each CLAUDE.md in expected mentions all .py files in its directory. | ||
|
|
||
| Returns a list of failure messages (empty if all coverage is complete). | ||
| """ | ||
| failures: list[str] = [] | ||
| for rel_path in expected: | ||
| claude_md = root / rel_path | ||
| if not claude_md.exists(): | ||
|
Trecek marked this conversation as resolved.
|
||
| failures.append(f"{rel_path}: CLAUDE.md not found") | ||
| continue | ||
| content = claude_md.read_text(encoding="utf-8") | ||
| directory = claude_md.parent | ||
| for py_file in directory.glob("*.py"): | ||
| if py_file.name == "__init__.py": | ||
| if "`__init__.py`" not in content: | ||
|
Trecek marked this conversation as resolved.
|
||
| failures.append(f"{rel_path}: missing `__init__.py` in file table") | ||
| else: | ||
| if f"`{py_file.name}`" not in content: | ||
| failures.append(f"{rel_path}: missing {py_file.name}") | ||
| return failures | ||
|
|
||
|
|
||
| def main() -> int: | ||
| src_failures = check_coverage(SRC_ROOT, SRC_EXPECTED) | ||
| tests_failures = check_coverage(TESTS_ROOT, TESTS_EXPECTED) | ||
| all_failures = src_failures + tests_failures | ||
| if all_failures: | ||
| print("sub-CLAUDE.md file table gaps found:\n") | ||
| for f in all_failures: | ||
| print(f" {f}") | ||
| print(f"\nTotal: {len(all_failures)} gap(s)") | ||
| print("\nTo fix: add the missing file(s) to the CLAUDE.md file table in the") | ||
| print("directory where the .py file(s) were added.") | ||
| return 1 | ||
| print("All sub-CLAUDE.md file tables are complete.") | ||
| return 0 | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| sys.exit(main()) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,131 @@ | ||
| """Unit and integration tests for scripts/check_sub_claude_md.py.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import importlib.util | ||
| import sys | ||
| from pathlib import Path | ||
|
|
||
| import pytest | ||
|
|
||
| pytestmark = pytest.mark.medium | ||
|
|
||
| REPO_ROOT = Path(__file__).parent.parent.parent | ||
| _SCRIPT = REPO_ROOT / "scripts" / "check_sub_claude_md.py" | ||
|
|
||
|
|
||
| @pytest.fixture(scope="module") | ||
|
Trecek marked this conversation as resolved.
|
||
| def script_mod(): | ||
| spec = importlib.util.spec_from_file_location("check_sub_claude_md", _SCRIPT) | ||
| assert spec is not None and spec.loader is not None | ||
| mod = importlib.util.module_from_spec(spec) | ||
| sys.modules[spec.name] = mod | ||
| spec.loader.exec_module(mod) | ||
| yield mod | ||
| sys.modules.pop(spec.name, None) | ||
|
|
||
|
|
||
| class TestCheckCoverage: | ||
| def test_check_coverage_all_files_mentioned(self, script_mod, tmp_path): | ||
| """Returns empty list when CLAUDE.md mentions every .py file in the directory.""" | ||
| subdir = tmp_path / "mypackage" | ||
| subdir.mkdir() | ||
| (subdir / "CLAUDE.md").write_text( | ||
| "| File | Purpose |\n|------|----------|\n" | ||
| "| `__init__.py` | init |\n| `foo.py` | foo |\n", | ||
| encoding="utf-8", | ||
| ) | ||
| (subdir / "__init__.py").touch() | ||
| (subdir / "foo.py").touch() | ||
| result = script_mod.check_coverage(tmp_path, ["mypackage/CLAUDE.md"]) | ||
| assert result == [] | ||
|
|
||
| def test_check_coverage_missing_regular_py_file(self, script_mod, tmp_path): | ||
| """Returns failure string containing the missing filename.""" | ||
| subdir = tmp_path / "mypackage" | ||
| subdir.mkdir() | ||
| (subdir / "CLAUDE.md").write_text( | ||
| "| File | Purpose |\n|------|----------|\n| `__init__.py` | init |\n", | ||
| encoding="utf-8", | ||
| ) | ||
| (subdir / "__init__.py").touch() | ||
| (subdir / "bar.py").touch() | ||
| result = script_mod.check_coverage(tmp_path, ["mypackage/CLAUDE.md"]) | ||
| assert result == ["mypackage/CLAUDE.md: missing bar.py"] | ||
|
|
||
| def test_check_coverage_missing_init_py_backtick(self, script_mod, tmp_path): | ||
| """Returns failure when __init__.py exists but CLAUDE.md lacks backtick-wrapped mention.""" | ||
| subdir = tmp_path / "mypackage" | ||
| subdir.mkdir() | ||
| (subdir / "CLAUDE.md").write_text( | ||
| "| File | Purpose |\n|------|----------|\n| `foo.py` | foo |\n", | ||
| encoding="utf-8", | ||
| ) | ||
| (subdir / "__init__.py").touch() | ||
| (subdir / "foo.py").touch() | ||
| result = script_mod.check_coverage(tmp_path, ["mypackage/CLAUDE.md"]) | ||
| assert result == ["mypackage/CLAUDE.md: missing `__init__.py` in file table"] | ||
|
|
||
| def test_check_coverage_init_py_without_backticks_fails(self, script_mod, tmp_path): | ||
| """Returns failure when CLAUDE.md mentions __init__.py without backtick wrapping.""" | ||
| subdir = tmp_path / "mypackage" | ||
| subdir.mkdir() | ||
| (subdir / "CLAUDE.md").write_text( | ||
| "| File | Purpose |\n|------|----------|\n| __init__.py | init |\n", | ||
| encoding="utf-8", | ||
| ) | ||
| (subdir / "__init__.py").touch() | ||
| result = script_mod.check_coverage(tmp_path, ["mypackage/CLAUDE.md"]) | ||
| assert result == ["mypackage/CLAUDE.md: missing `__init__.py` in file table"] | ||
|
|
||
| def test_check_coverage_flags_missing_claude_md(self, script_mod, tmp_path): | ||
| """Returns failure when expected CLAUDE.md path does not exist on disk.""" | ||
| result = script_mod.check_coverage(tmp_path, ["nonexistent/CLAUDE.md"]) | ||
| assert result == ["nonexistent/CLAUDE.md: CLAUDE.md not found"] | ||
|
|
||
| def test_check_coverage_multiple_missing_files(self, script_mod, tmp_path): | ||
| """Returns one failure entry per missing file.""" | ||
| subdir = tmp_path / "mypackage" | ||
| subdir.mkdir() | ||
| (subdir / "CLAUDE.md").write_text( | ||
| "| File | Purpose |\n|------|----------|\n", | ||
| encoding="utf-8", | ||
| ) | ||
| (subdir / "__init__.py").touch() | ||
| (subdir / "a.py").touch() | ||
| (subdir / "b.py").touch() | ||
| result = sorted(script_mod.check_coverage(tmp_path, ["mypackage/CLAUDE.md"])) | ||
| assert result == [ | ||
| "mypackage/CLAUDE.md: missing `__init__.py` in file table", | ||
| "mypackage/CLAUDE.md: missing a.py", | ||
| "mypackage/CLAUDE.md: missing b.py", | ||
| ] | ||
|
|
||
|
|
||
| class TestExpectedListsSync: | ||
|
Trecek marked this conversation as resolved.
|
||
| def test_src_expected_matches_test_file(self, script_mod): | ||
| """SRC_EXPECTED matches EXPECTED_SUB_CLAUDE_MDS in test_sub_claude_md_completeness.""" | ||
| test_module_name = "test_sub_claude_md_completeness" | ||
| test_file = REPO_ROOT / "tests" / "docs" / f"{test_module_name}.py" | ||
| spec = importlib.util.spec_from_file_location(test_module_name, test_file) | ||
| assert spec is not None and spec.loader is not None | ||
| test_mod = importlib.util.module_from_spec(spec) | ||
| spec.loader.exec_module(test_mod) | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [warning] bugs: spec.loader.exec_module(test_mod) is called without guarding that spec.loader implements exec_module. The assertion on L110 only checks spec.loader is not None. A TypeError or AttributeError would surface as an opaque test failure rather than a clear message.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Valid observation — flagged for design decision. spec_from_file_location for a .py file always returns SourceFileLoader which implements exec_module — near-zero risk in practice. Adding a hasattr guard would improve the error message but requires a design decision on failure behavior. |
||
| assert sorted(script_mod.SRC_EXPECTED) == sorted(test_mod.EXPECTED_SUB_CLAUDE_MDS) | ||
|
|
||
| def test_tests_expected_matches_test_file(self, script_mod): | ||
| """TESTS_EXPECTED list matches that in test_tests_sub_claude_md_completeness.""" | ||
| test_module_name = "test_tests_sub_claude_md_completeness" | ||
| test_file = REPO_ROOT / "tests" / "docs" / f"{test_module_name}.py" | ||
| spec = importlib.util.spec_from_file_location(test_module_name, test_file) | ||
| assert spec is not None and spec.loader is not None | ||
| test_mod = importlib.util.module_from_spec(spec) | ||
| spec.loader.exec_module(test_mod) | ||
| assert sorted(script_mod.TESTS_EXPECTED) == sorted(test_mod.EXPECTED_SUB_CLAUDE_MDS) | ||
|
|
||
|
|
||
| class TestMain: | ||
| def test_main_returns_zero_on_live_repo(self, script_mod): | ||
|
Trecek marked this conversation as resolved.
|
||
| """main() returns 0 against the actual project (integration guard).""" | ||
| result = script_mod.main() | ||
| assert result == 0, "main() should return 0 on a clean repo" | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[warning] arch: SRC_EXPECTED and TESTS_EXPECTED are static hardcoded lists. When a new package sub-directory with a CLAUDE.md is added, the script silently ignores it. The hook should dynamically discover all CLAUDE.md files under SRC_ROOT and TESTS_ROOT rather than relying on an opt-in list that must be manually maintained.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Valid observation — flagged for design decision. Dynamic discovery vs. hardcoded opt-in list is an architectural choice requiring human decision: dynamic discovery would make the lists in TestExpectedListsSync sync tests obsolete and change the hook's scope significantly.