|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Audit repo-owned C++ sources for disallowed local ``auto`` usage. |
| 3 | +
|
| 4 | +Policy: |
| 5 | +- allow ``auto`` for structured bindings |
| 6 | +- allow ``auto`` for lambda closure objects |
| 7 | +- allow ``auto`` for iterator-like values |
| 8 | +- allow explicit one-off exceptions marked with ``auto-ok`` |
| 9 | +
|
| 10 | +Everything else is expected to spell out the type directly. Repos can |
| 11 | +carry a local allowlist while older files are being burned down. The |
| 12 | +allowlist format is one rule per line: |
| 13 | +
|
| 14 | + relative/path.cpp|line snippet |
| 15 | +
|
| 16 | +The snippet is matched as a substring against the stripped source line. |
| 17 | +""" |
| 18 | + |
| 19 | +from __future__ import annotations |
| 20 | + |
| 21 | +import argparse |
| 22 | +import subprocess |
| 23 | +import sys |
| 24 | +from dataclasses import dataclass |
| 25 | +from pathlib import Path |
| 26 | +import re |
| 27 | + |
| 28 | + |
| 29 | +CPP_SUFFIXES = {".cpp", ".hpp", ".cc", ".cxx", ".hh", ".hxx"} |
| 30 | +DEFAULT_ALLOWLIST = "tools/cpp_auto_allowlist.txt" |
| 31 | +WORD_AUTO = re.compile(r"\bauto\b") |
| 32 | +STRUCTURED_BINDING = re.compile(r"\bauto(?:\s*[\*&])?\s*\[") |
| 33 | +DECLARATION_AUTO = re.compile( |
| 34 | + r"^\s*(?:for\s*\(\s*)?(?:const\s+|constexpr\s+|static\s+|volatile\s+|mutable\s+)*" |
| 35 | + r"auto(?:\s*[\*&])?\s*(?:\w+|\[)" |
| 36 | +) |
| 37 | + |
| 38 | +ITERATOR_HINTS = ( |
| 39 | + ".begin(", |
| 40 | + ".end(", |
| 41 | + ".cbegin(", |
| 42 | + ".cend(", |
| 43 | + ".rbegin(", |
| 44 | + ".rend(", |
| 45 | + ".crbegin(", |
| 46 | + ".crend(", |
| 47 | + ".find(", |
| 48 | + ".lower_bound(", |
| 49 | + ".upper_bound(", |
| 50 | + ".equal_range(", |
| 51 | + "std::begin(", |
| 52 | + "std::end(", |
| 53 | + "std::cbegin(", |
| 54 | + "std::cend(", |
| 55 | + "std::find(", |
| 56 | + "std::lower_bound(", |
| 57 | + "std::upper_bound(", |
| 58 | + "std::equal_range(", |
| 59 | + "std::max_element(", |
| 60 | + "std::min_element(", |
| 61 | + "std::prev(", |
| 62 | + "std::next(", |
| 63 | +) |
| 64 | + |
| 65 | + |
| 66 | +@dataclass(frozen=True, slots=True) |
| 67 | +class AllowRule: |
| 68 | + relative_path: str |
| 69 | + snippet: str |
| 70 | + |
| 71 | + |
| 72 | +@dataclass(frozen=True, slots=True) |
| 73 | +class Violation: |
| 74 | + relative_path: str |
| 75 | + line_number: int |
| 76 | + line: str |
| 77 | + |
| 78 | + |
| 79 | +def tracked_cpp_files(repo_root: Path) -> list[Path]: |
| 80 | + proc = subprocess.run( |
| 81 | + ["git", "ls-files"], |
| 82 | + cwd=repo_root, |
| 83 | + check=True, |
| 84 | + capture_output=True, |
| 85 | + text=True, |
| 86 | + ) |
| 87 | + files: list[Path] = [] |
| 88 | + for raw in proc.stdout.splitlines(): |
| 89 | + path = repo_root / raw |
| 90 | + if path.suffix not in CPP_SUFFIXES: |
| 91 | + continue |
| 92 | + if any(part.startswith("build") for part in path.parts): |
| 93 | + continue |
| 94 | + if "_deps" in path.parts: |
| 95 | + continue |
| 96 | + files.append(path) |
| 97 | + return files |
| 98 | + |
| 99 | + |
| 100 | +def read_allowlist(path: Path) -> list[AllowRule]: |
| 101 | + if not path.exists(): |
| 102 | + return [] |
| 103 | + rules: list[AllowRule] = [] |
| 104 | + for raw in path.read_text().splitlines(): |
| 105 | + stripped = raw.strip() |
| 106 | + if not stripped or stripped.startswith("#"): |
| 107 | + continue |
| 108 | + relative_path, sep, snippet = stripped.partition("|") |
| 109 | + if not sep: |
| 110 | + raise ValueError(f"invalid allowlist rule: {raw!r}") |
| 111 | + rules.append(AllowRule(relative_path=relative_path.strip(), snippet=snippet.strip())) |
| 112 | + return rules |
| 113 | + |
| 114 | + |
| 115 | +def is_lambda_closure(line: str) -> bool: |
| 116 | + if "=" not in line: |
| 117 | + return False |
| 118 | + eq_index = line.find("=") |
| 119 | + open_bracket = line.find("[", eq_index) |
| 120 | + close_bracket = line.find("]", open_bracket + 1) if open_bracket != -1 else -1 |
| 121 | + return open_bracket != -1 and close_bracket != -1 and close_bracket > open_bracket |
| 122 | + |
| 123 | + |
| 124 | +def is_iterator_like(line: str) -> bool: |
| 125 | + return any(hint in line for hint in ITERATOR_HINTS) |
| 126 | + |
| 127 | + |
| 128 | +def is_allowed_auto(line: str) -> bool: |
| 129 | + stripped = line.strip() |
| 130 | + if "auto-ok" in stripped: |
| 131 | + return True |
| 132 | + if not DECLARATION_AUTO.search(stripped): |
| 133 | + return True |
| 134 | + if STRUCTURED_BINDING.search(stripped): |
| 135 | + return True |
| 136 | + if is_lambda_closure(stripped): |
| 137 | + return True |
| 138 | + if is_iterator_like(stripped): |
| 139 | + return True |
| 140 | + return False |
| 141 | + |
| 142 | + |
| 143 | +def matches_allowlist(relative_path: str, line: str, rules: list[AllowRule]) -> bool: |
| 144 | + stripped = line.strip() |
| 145 | + for rule in rules: |
| 146 | + if rule.relative_path == relative_path and rule.snippet in stripped: |
| 147 | + return True |
| 148 | + return False |
| 149 | + |
| 150 | + |
| 151 | +def collect_violations(repo_root: Path, allowlist: list[AllowRule]) -> list[Violation]: |
| 152 | + violations: list[Violation] = [] |
| 153 | + for path in tracked_cpp_files(repo_root): |
| 154 | + relative_path = path.relative_to(repo_root).as_posix() |
| 155 | + for line_number, line in enumerate(path.read_text().splitlines(), start=1): |
| 156 | + if not WORD_AUTO.search(line): |
| 157 | + continue |
| 158 | + if is_allowed_auto(line): |
| 159 | + continue |
| 160 | + if matches_allowlist(relative_path, line, allowlist): |
| 161 | + continue |
| 162 | + violations.append( |
| 163 | + Violation(relative_path=relative_path, line_number=line_number, line=line.rstrip()) |
| 164 | + ) |
| 165 | + return violations |
| 166 | + |
| 167 | + |
| 168 | +def parse_args() -> argparse.Namespace: |
| 169 | + parser = argparse.ArgumentParser() |
| 170 | + parser.add_argument("--repo-root", default=".") |
| 171 | + parser.add_argument("--allowlist", default=DEFAULT_ALLOWLIST) |
| 172 | + parser.add_argument("--write-allowlist", action="store_true") |
| 173 | + return parser.parse_args() |
| 174 | + |
| 175 | + |
| 176 | +def write_allowlist(path: Path, violations: list[Violation]) -> None: |
| 177 | + path.parent.mkdir(parents=True, exist_ok=True) |
| 178 | + lines = [ |
| 179 | + "# Repo-owned exceptions for the explicit local-type policy.", |
| 180 | + "# Format: relative/path.cpp|line snippet", |
| 181 | + ] |
| 182 | + for violation in violations: |
| 183 | + lines.append(f"{violation.relative_path}|{violation.line.strip()}") |
| 184 | + path.write_text("\n".join(lines) + "\n") |
| 185 | + |
| 186 | + |
| 187 | +def main() -> int: |
| 188 | + args = parse_args() |
| 189 | + repo_root = Path(args.repo_root).resolve() |
| 190 | + allowlist_path = repo_root / args.allowlist |
| 191 | + allowlist = read_allowlist(allowlist_path) |
| 192 | + violations = collect_violations(repo_root, allowlist) |
| 193 | + if args.write_allowlist: |
| 194 | + write_allowlist(allowlist_path, violations) |
| 195 | + return 0 |
| 196 | + if not violations: |
| 197 | + print("cpp_auto_audit: no disallowed local auto usage found") |
| 198 | + return 0 |
| 199 | + print("cpp_auto_audit: disallowed local auto usage found", file=sys.stderr) |
| 200 | + for violation in violations: |
| 201 | + print( |
| 202 | + f"{violation.relative_path}:{violation.line_number}: {violation.line.strip()}", |
| 203 | + file=sys.stderr, |
| 204 | + ) |
| 205 | + return 1 |
| 206 | + |
| 207 | + |
| 208 | +if __name__ == "__main__": |
| 209 | + raise SystemExit(main()) |
0 commit comments