|
12 | 12 | # * regenerate_tx_config: recreate configuration for all resources.
|
13 | 13 |
|
14 | 14 | from argparse import ArgumentParser
|
15 |
| -from collections import Counter |
16 | 15 | import os
|
17 | 16 | from dataclasses import dataclass
|
| 17 | +from difflib import SequenceMatcher |
| 18 | +from itertools import combinations |
| 19 | +from pathlib import Path |
18 | 20 | from re import match
|
19 |
| -from subprocess import call, run |
| 21 | +from subprocess import call |
20 | 22 | import sys
|
21 | 23 | from typing import Self, Callable
|
22 | 24 | from urllib.parse import urlparse, parse_qs
|
| 25 | +from warnings import warn |
| 26 | + |
| 27 | +from polib import pofile |
23 | 28 |
|
24 | 29 | LANGUAGE = 'pl'
|
25 | 30 |
|
@@ -168,14 +173,34 @@ def progress_from_resources(resources: list[ResourceLanguageStatistics], filter_
|
168 | 173 |
|
169 | 174 |
|
170 | 175 | def get_number_of_translators():
|
171 |
| - process = run( |
172 |
| - ['grep', '-ohP', r'(?<=^# )(.+)(?=, \d+$)', '-r', '.'], |
173 |
| - capture_output=True, |
174 |
| - text=True, |
175 |
| - ) |
176 |
| - translators = [match('(.*)( <.*>)?', t).group(1) for t in process.stdout.splitlines()] |
177 |
| - unique_translators = Counter(translators).keys() |
178 |
| - return len(unique_translators) |
| 176 | + translators = _fetch_translators() |
| 177 | + _remove_aliases(translators) |
| 178 | + _check_for_new_aliases(translators) |
| 179 | + return len(translators) |
| 180 | + |
| 181 | + |
| 182 | +def _fetch_translators() -> set[str]: |
| 183 | + translators = set() |
| 184 | + for file in Path().rglob('*.po'): |
| 185 | + header = pofile(file).header.splitlines() |
| 186 | + for translator_record in header[header.index('Translators:') + 1:]: |
| 187 | + translator, _year = translator_record.split(', ') |
| 188 | + translators.add(translator) |
| 189 | + return translators |
| 190 | + |
| 191 | + |
| 192 | +def _remove_aliases(translators: set[str]) -> None: |
| 193 | + for alias, main in (("m_aciek <maciej.olko@gmail.com>", "Maciej Olko <maciej.olko@gmail.com>"),): |
| 194 | + translators.remove(alias) |
| 195 | + assert main in translators |
| 196 | + |
| 197 | + |
| 198 | +def _check_for_new_aliases(translators) -> None: |
| 199 | + for pair in combinations(translators, 2): |
| 200 | + if (ratio := SequenceMatcher(lambda x: x in '<>@', *pair).ratio()) > 0.64: |
| 201 | + warn( |
| 202 | + f"{pair} are similar ({ratio:.3f}). Please add them to aliases list or bump the limit." |
| 203 | + ) |
179 | 204 |
|
180 | 205 |
|
181 | 206 | def language_switcher(entry: ResourceLanguageStatistics) -> bool:
|
|
0 commit comments