Skip to content

Commit c5457bb

Browse files
committed
ENH: Remove all references and uses of PERL
1 parent a56aefd commit c5457bb

File tree

3 files changed

+100
-18
lines changed

3 files changed

+100
-18
lines changed

Latex/doubleWordCheck.pl

Lines changed: 0 additions & 17 deletions
This file was deleted.

Latex/doubleWordCheck.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Search for doubled words in text
4+
5+
Behavior:
6+
- Reads one or more files (or stdin if none).
7+
- Processes input in "records" delimited by ".\n" (Perl: $/ = ".\n").
8+
- In each record, highlights a repeated word (case-insensitive) where the two
9+
occurrences are separated by whitespace and/or simple HTML tags.
10+
- Removes any leading lines that contain no escape characters.
11+
- Prefixes each remaining line with "<filename>: ".
12+
"""
13+
14+
from __future__ import annotations
15+
16+
import argparse
17+
import re
18+
import sys
19+
from pathlib import Path
20+
21+
22+
ESC = "\x1b"
23+
24+
# Perl: s/\b([a-z]+)((\s|<[^>]+>)+)(\1\b)/\e[7m$1\e[m$2\e[7m$4\e[m/ig
25+
DOUBLE_WORD_RE = re.compile(
26+
r"\b([a-z]+)((?:\s|<[^>]+>)+)(\1\b)",
27+
re.IGNORECASE,
28+
)
29+
30+
# Perl: s/^([^\e]*\n)+//mg
31+
# Interpreted as: drop initial consecutive lines that contain no ESC.
32+
LEADING_NO_ESC_LINES_RE = re.compile(r"^(?:[^\x1b]*\n)+", re.MULTILINE)
33+
34+
35+
def highlight_double_words(record: str) -> str | None:
36+
"""
37+
Return transformed record if a double-word pattern is found; otherwise None.
38+
"""
39+
40+
def repl(m: re.Match[str]) -> str:
41+
w1 = m.group(1)
42+
sep = m.group(2)
43+
w2 = m.group(3) # same text as group(1) as matched
44+
return f"{ESC}[7m{w1}{ESC}[m{sep}{ESC}[7m{w2}{ESC}[m"
45+
46+
new_record, n = DOUBLE_WORD_RE.subn(repl, record, count=1)
47+
if n == 0:
48+
return None
49+
50+
new_record = LEADING_NO_ESC_LINES_RE.sub("", new_record, count=1)
51+
return new_record
52+
53+
54+
def iter_records(text: str, sep: str = ".\n"):
55+
"""
56+
Yield records split by the exact separator, including the separator (like Perl $/).
57+
"""
58+
start = 0
59+
while True:
60+
idx = text.find(sep, start)
61+
if idx == -1:
62+
if start < len(text):
63+
yield text[start:]
64+
break
65+
end = idx + len(sep)
66+
yield text[start:end]
67+
start = end
68+
69+
70+
def process_stream(name: str, data: str, out) -> None:
71+
for record in iter_records(data, sep=".\n"):
72+
transformed = highlight_double_words(record)
73+
if transformed is None:
74+
continue
75+
76+
# Perl: s/^/$ARGV: /mg => prefix each line
77+
prefixed = re.sub(r"^", f"{name}: ", transformed, flags=re.MULTILINE)
78+
out.write(prefixed)
79+
80+
81+
def main() -> int:
82+
ap = argparse.ArgumentParser()
83+
ap.add_argument("files", nargs="*", help="Files to scan; if empty, read stdin.")
84+
args = ap.parse_args()
85+
86+
if not args.files:
87+
data = sys.stdin.read()
88+
process_stream("<stdin>", data, sys.stdout)
89+
return 0
90+
91+
for f in args.files:
92+
p = Path(f)
93+
data = p.read_text(encoding="utf-8", errors="replace")
94+
process_stream(f, data, sys.stdout)
95+
96+
return 0
97+
98+
99+
if __name__ == "__main__":
100+
raise SystemExit(main())

TODO

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
Remove all references to perl PERL
21
Get output images only produced in BINARY_DIR.
32

43
-- (DONE) Have python code line extractor throw warning when lines are too long to print.

0 commit comments

Comments
 (0)