✨Support wild cards as CLI pruefi args (#144)

* ✨Support wild cards as CLI pruefi args * fix mypy * Update src/kohlrahbi/__init__.py * Extend docstring, explain why no regex (yet) * Use fnmatch (#162) * 🎨 Use fnmatch * 🚸 Add example for ? wildcard * 🎨 Fix wrong wildcard character + -> ? * ✅ Add more unit test cases with new wildcard character `?` * 💡 improve docstring mention unix wildcard * 🚨 ignore linter --------- Co-authored-by: kevin <kevin.krechan@hochfrequenz.de> Co-authored-by: kevin <68426071+hf-krechan@users.noreply.github.com>
Hochfrequenz · Jul 24, 2023 · d395307 · d395307
1 parent c9143fd
commit d395307
Show file tree

Hide file tree

Showing 2 changed files with 168 additions and 9 deletions.
diff --git a/src/kohlrahbi/__init__.py b/src/kohlrahbi/__init__.py
@@ -1,11 +1,12 @@
 """
 kohlrahbi is a package to scrape AHBs (in docx format)
 """
+import fnmatch
 import gc
 import re
 import sys
 from pathlib import Path
-from typing import Any
+from typing import Any, Optional
 
 import click
 import docx  # type:ignore[import]
@@ -20,12 +21,23 @@
 _pruefi_pattern = re.compile(r"^[1-9]\d{4}$")
 
 
-def get_valid_pruefis(list_of_pruefis: list[str]) -> list[str]:
+# pylint:disable=anomalous-backslash-in-string
+def get_valid_pruefis(list_of_pruefis: list[str], all_known_pruefis: Optional[list[str]] = None) -> list[str]:
     """
-    This function returns a new list with only those pruefis which match the pruefi_pattern.
+    This function returns a new list with only those pruefis which match the pruefi_pattern r"^[1-9]\d{4}$".
+    It also supports unix wildcards like '*' and '?' iff a list of known pruefis is given.
+    E.g. '11*' for all pruefis starting with '11' or '*01' for all pruefis ending with '01'.
     """
-    valid_pruefis: list[str] = [pruefi for pruefi in list_of_pruefis if _pruefi_pattern.match(pruefi)]
-    return valid_pruefis
+    result: set[str] = set()
+
+    for pruefi in list_of_pruefis:
+        if ("*" in pruefi or "?" in pruefi) and all_known_pruefis:
+            filtered_pruefis = fnmatch.filter(all_known_pruefis, pruefi)
+            result = result.union(filtered_pruefis)
+        elif _pruefi_pattern.match(pruefi):
+            result.add(pruefi)
+
+    return sorted(list(result))
 
 
 def check_python_version():
@@ -95,7 +107,7 @@ def load_all_known_pruefis_from_file(
     "--pruefis",
     default=[],
     required=False,
-    help="Five digit number like 11042.",
+    help="Five digit number like 11042 or use wildcards like 110* or *042 or 11?42.",
     multiple=True,
 )
 @click.option(

diff --git a/unittests/test_input_checks.py b/unittests/test_input_checks.py
@@ -4,35 +4,182 @@
 
 
 @pytest.mark.parametrize(
-    "input_pruefis, expected_pruefis",
+    "input_pruefis, expected_pruefis, known_pruefis",
     [
         pytest.param(
             ["11042", "13007"],
             ["11042", "13007"],
+            None,
             id="only valid pruefis",
         ),
         pytest.param(
             ["01042", "13007"],
             ["13007"],
+            None,
             id="invalid pruefi: leading zero",
         ),
         pytest.param(
             ["1042", "13007"],
             ["13007"],
+            None,
             id="invalid pruefi: only four digits",
         ),
         pytest.param(
             ["abc", "13007"],
             ["13007"],
+            None,
             id="invalid pruefi: characters",
         ),
         pytest.param(
             ["abc"],
             [],
+            None,
             id="invalid pruefi: empty result",
         ),
+        pytest.param(
+            ["11*"],
+            ["11001", "11002", "11003"],
+            ["11001", "11002", "11003", "12001", "12002", "12003", "13001", "13002", "13003"],
+            id="wildcard `*` at end",
+        ),
+        pytest.param(
+            ["*1"],
+            ["11001", "12001", "13001"],
+            ["11001", "11002", "11003", "12001", "12002", "12003", "13001", "13002", "13003"],
+            id="wildcard `*` at begin",
+        ),
+        pytest.param(
+            ["11*1"],
+            ["11001"],
+            ["11001", "11002", "11003", "12001", "12002", "12003", "13001", "13002", "13003"],
+            id="wildcard `*` in the middle",  # who should seriously want this?
+        ),
+        pytest.param(
+            ["?1001"],
+            ["11001", "21001", "31001"],
+            ["11001", "11002", "11003", "12002", "12003", "13003", "21001", "31001"],
+            id="wildcard `?` at begin",
+        ),
+        pytest.param(
+            ["11?42"],
+            ["11042", "11142"],
+            ["11001", "11002", "11003", "11042", "11142", "12001", "12002", "12003", "13001", "13002", "13003"],
+            id="wildcard `?` in the middle",
+        ),
+        pytest.param(
+            ["1100?"],
+            [
+                "11001",
+                "11002",
+                "11003",
+                "11004",
+                "11005",
+                "11006",
+                "11007",
+                "11008",
+                "11009",
+            ],
+            [
+                "11001",
+                "11002",
+                "11003",
+                "11004",
+                "11005",
+                "11006",
+                "11007",
+                "11008",
+                "11009",
+                "11042",
+                "11142",
+                "12001",
+                "12002",
+                "12003",
+                "13001",
+                "13002",
+                "13003",
+            ],
+            id="wildcard `?` at the end",
+        ),
+        pytest.param(
+            ["110??"],
+            [
+                "11001",
+                "11002",
+                "11003",
+                "11004",
+                "11005",
+                "11006",
+                "11007",
+                "11008",
+                "11009",
+                "11010",
+                "11042",
+            ],
+            [
+                "11001",
+                "11002",
+                "11003",
+                "11004",
+                "11005",
+                "11006",
+                "11007",
+                "11008",
+                "11009",
+                "11010",
+                "11042",
+                "11142",
+                "12001",
+                "12002",
+                "12003",
+                "13001",
+                "13002",
+                "13003",
+            ],
+            id="wildcard `??` at the end",
+        ),
+        pytest.param(
+            ["*00?"],
+            [
+                "11001",
+                "11002",
+                "11003",
+                "11004",
+                "11005",
+                "11006",
+                "11007",
+                "11008",
+                "11009",
+                "12001",
+                "12002",
+                "12003",
+                "13001",
+                "13002",
+                "13003",
+            ],
+            [
+                "11001",
+                "11002",
+                "11003",
+                "11004",
+                "11005",
+                "11006",
+                "11007",
+                "11008",
+                "11009",
+                "11010",
+                "11042",
+                "11142",
+                "12001",
+                "12002",
+                "12003",
+                "13001",
+                "13002",
+                "13003",
+            ],
+            id="wildcard combination `*` and `?`",
+        ),
     ],
 )
-def test_get_only_valid_pruefis(input_pruefis, expected_pruefis):
-    valid_pruefis = get_valid_pruefis(list_of_pruefis=input_pruefis)
+def test_get_only_valid_pruefis(input_pruefis: list[str], expected_pruefis: list[str], known_pruefis: list[str] | None):
+    valid_pruefis = get_valid_pruefis(list_of_pruefis=input_pruefis, all_known_pruefis=known_pruefis)
     assert valid_pruefis == expected_pruefis