Skip to content

Commit 72ab089

Browse files
committed
build: enforce explicit local cpp types
1 parent 3031c1e commit 72ab089

3 files changed

Lines changed: 214 additions & 0 deletions

File tree

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Wraps CMake for convenient day-to-day workflow
33

44
BUILD_DIR := build
5+
CPP_AUTO_AUDIT := python3 tools/cpp_auto_audit.py
56
CMAKE := cmake
67
NPROC := $(shell nproc 2>/dev/null || echo 4)
78
BENCH_ITERATIONS := 254
@@ -45,6 +46,8 @@ lint:
4546
echo "clang-format not found. Install clang-format to run lint."; \
4647
exit 1; \
4748
fi
49+
$(CPP_AUTO_AUDIT)
50+
4851

4952
# Format code in place
5053
format:

tools/cpp_auto_allowlist.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Repo-owned exceptions for the explicit local-type policy.
2+
# Format: relative/path.cpp|line snippet

tools/cpp_auto_audit.py

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
#!/usr/bin/env python3
2+
"""Audit repo-owned C++ sources for disallowed local ``auto`` usage.
3+
4+
Policy:
5+
- allow ``auto`` for structured bindings
6+
- allow ``auto`` for lambda closure objects
7+
- allow ``auto`` for iterator-like values
8+
- allow explicit one-off exceptions marked with ``auto-ok``
9+
10+
Everything else is expected to spell out the type directly. Repos can
11+
carry a local allowlist while older files are being burned down. The
12+
allowlist format is one rule per line:
13+
14+
relative/path.cpp|line snippet
15+
16+
The snippet is matched as a substring against the stripped source line.
17+
"""
18+
19+
from __future__ import annotations
20+
21+
import argparse
22+
import subprocess
23+
import sys
24+
from dataclasses import dataclass
25+
from pathlib import Path
26+
import re
27+
28+
29+
CPP_SUFFIXES = {".cpp", ".hpp", ".cc", ".cxx", ".hh", ".hxx"}
30+
DEFAULT_ALLOWLIST = "tools/cpp_auto_allowlist.txt"
31+
WORD_AUTO = re.compile(r"\bauto\b")
32+
STRUCTURED_BINDING = re.compile(r"\bauto(?:\s*[\*&])?\s*\[")
33+
DECLARATION_AUTO = re.compile(
34+
r"^\s*(?:for\s*\(\s*)?(?:const\s+|constexpr\s+|static\s+|volatile\s+|mutable\s+)*"
35+
r"auto(?:\s*[\*&])?\s*(?:\w+|\[)"
36+
)
37+
38+
ITERATOR_HINTS = (
39+
".begin(",
40+
".end(",
41+
".cbegin(",
42+
".cend(",
43+
".rbegin(",
44+
".rend(",
45+
".crbegin(",
46+
".crend(",
47+
".find(",
48+
".lower_bound(",
49+
".upper_bound(",
50+
".equal_range(",
51+
"std::begin(",
52+
"std::end(",
53+
"std::cbegin(",
54+
"std::cend(",
55+
"std::find(",
56+
"std::lower_bound(",
57+
"std::upper_bound(",
58+
"std::equal_range(",
59+
"std::max_element(",
60+
"std::min_element(",
61+
"std::prev(",
62+
"std::next(",
63+
)
64+
65+
66+
@dataclass(frozen=True, slots=True)
67+
class AllowRule:
68+
relative_path: str
69+
snippet: str
70+
71+
72+
@dataclass(frozen=True, slots=True)
73+
class Violation:
74+
relative_path: str
75+
line_number: int
76+
line: str
77+
78+
79+
def tracked_cpp_files(repo_root: Path) -> list[Path]:
80+
proc = subprocess.run(
81+
["git", "ls-files"],
82+
cwd=repo_root,
83+
check=True,
84+
capture_output=True,
85+
text=True,
86+
)
87+
files: list[Path] = []
88+
for raw in proc.stdout.splitlines():
89+
path = repo_root / raw
90+
if path.suffix not in CPP_SUFFIXES:
91+
continue
92+
if any(part.startswith("build") for part in path.parts):
93+
continue
94+
if "_deps" in path.parts:
95+
continue
96+
files.append(path)
97+
return files
98+
99+
100+
def read_allowlist(path: Path) -> list[AllowRule]:
101+
if not path.exists():
102+
return []
103+
rules: list[AllowRule] = []
104+
for raw in path.read_text().splitlines():
105+
stripped = raw.strip()
106+
if not stripped or stripped.startswith("#"):
107+
continue
108+
relative_path, sep, snippet = stripped.partition("|")
109+
if not sep:
110+
raise ValueError(f"invalid allowlist rule: {raw!r}")
111+
rules.append(AllowRule(relative_path=relative_path.strip(), snippet=snippet.strip()))
112+
return rules
113+
114+
115+
def is_lambda_closure(line: str) -> bool:
116+
if "=" not in line:
117+
return False
118+
eq_index = line.find("=")
119+
open_bracket = line.find("[", eq_index)
120+
close_bracket = line.find("]", open_bracket + 1) if open_bracket != -1 else -1
121+
return open_bracket != -1 and close_bracket != -1 and close_bracket > open_bracket
122+
123+
124+
def is_iterator_like(line: str) -> bool:
125+
return any(hint in line for hint in ITERATOR_HINTS)
126+
127+
128+
def is_allowed_auto(line: str) -> bool:
129+
stripped = line.strip()
130+
if "auto-ok" in stripped:
131+
return True
132+
if not DECLARATION_AUTO.search(stripped):
133+
return True
134+
if STRUCTURED_BINDING.search(stripped):
135+
return True
136+
if is_lambda_closure(stripped):
137+
return True
138+
if is_iterator_like(stripped):
139+
return True
140+
return False
141+
142+
143+
def matches_allowlist(relative_path: str, line: str, rules: list[AllowRule]) -> bool:
144+
stripped = line.strip()
145+
for rule in rules:
146+
if rule.relative_path == relative_path and rule.snippet in stripped:
147+
return True
148+
return False
149+
150+
151+
def collect_violations(repo_root: Path, allowlist: list[AllowRule]) -> list[Violation]:
152+
violations: list[Violation] = []
153+
for path in tracked_cpp_files(repo_root):
154+
relative_path = path.relative_to(repo_root).as_posix()
155+
for line_number, line in enumerate(path.read_text().splitlines(), start=1):
156+
if not WORD_AUTO.search(line):
157+
continue
158+
if is_allowed_auto(line):
159+
continue
160+
if matches_allowlist(relative_path, line, allowlist):
161+
continue
162+
violations.append(
163+
Violation(relative_path=relative_path, line_number=line_number, line=line.rstrip())
164+
)
165+
return violations
166+
167+
168+
def parse_args() -> argparse.Namespace:
169+
parser = argparse.ArgumentParser()
170+
parser.add_argument("--repo-root", default=".")
171+
parser.add_argument("--allowlist", default=DEFAULT_ALLOWLIST)
172+
parser.add_argument("--write-allowlist", action="store_true")
173+
return parser.parse_args()
174+
175+
176+
def write_allowlist(path: Path, violations: list[Violation]) -> None:
177+
path.parent.mkdir(parents=True, exist_ok=True)
178+
lines = [
179+
"# Repo-owned exceptions for the explicit local-type policy.",
180+
"# Format: relative/path.cpp|line snippet",
181+
]
182+
for violation in violations:
183+
lines.append(f"{violation.relative_path}|{violation.line.strip()}")
184+
path.write_text("\n".join(lines) + "\n")
185+
186+
187+
def main() -> int:
188+
args = parse_args()
189+
repo_root = Path(args.repo_root).resolve()
190+
allowlist_path = repo_root / args.allowlist
191+
allowlist = read_allowlist(allowlist_path)
192+
violations = collect_violations(repo_root, allowlist)
193+
if args.write_allowlist:
194+
write_allowlist(allowlist_path, violations)
195+
return 0
196+
if not violations:
197+
print("cpp_auto_audit: no disallowed local auto usage found")
198+
return 0
199+
print("cpp_auto_audit: disallowed local auto usage found", file=sys.stderr)
200+
for violation in violations:
201+
print(
202+
f"{violation.relative_path}:{violation.line_number}: {violation.line.strip()}",
203+
file=sys.stderr,
204+
)
205+
return 1
206+
207+
208+
if __name__ == "__main__":
209+
raise SystemExit(main())

0 commit comments

Comments
 (0)