Skip to content

Commit

Permalink
✨Support wild cards as CLI pruefi args (#144)
Browse files Browse the repository at this point in the history
* ✨Support wild cards as CLI pruefi args

* fix mypy

* Update src/kohlrahbi/__init__.py

* Extend docstring, explain why no regex (yet)

* Use fnmatch (#162)

* 🎨 Use fnmatch

* 🚸 Add example for ? wildcard

* 🎨 Fix wrong wildcard character + -> ?

* ✅ Add more unit test cases with new wildcard character `?`

* 💡 improve docstring

mention unix wildcard

* 🚨 ignore linter

---------

Co-authored-by: kevin <kevin.krechan@hochfrequenz.de>
Co-authored-by: kevin <68426071+hf-krechan@users.noreply.github.com>
  • Loading branch information
3 people committed Jul 24, 2023
1 parent c9143fd commit d395307
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 9 deletions.
24 changes: 18 additions & 6 deletions src/kohlrahbi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""
kohlrahbi is a package to scrape AHBs (in docx format)
"""
import fnmatch
import gc
import re
import sys
from pathlib import Path
from typing import Any
from typing import Any, Optional

import click
import docx # type:ignore[import]
Expand All @@ -20,12 +21,23 @@
_pruefi_pattern = re.compile(r"^[1-9]\d{4}$")


def get_valid_pruefis(list_of_pruefis: list[str]) -> list[str]:
# pylint:disable=anomalous-backslash-in-string
def get_valid_pruefis(list_of_pruefis: list[str], all_known_pruefis: Optional[list[str]] = None) -> list[str]:
"""
This function returns a new list with only those pruefis which match the pruefi_pattern.
This function returns a new list with only those pruefis which match the pruefi_pattern r"^[1-9]\d{4}$".
It also supports unix wildcards like '*' and '?' iff a list of known pruefis is given.
E.g. '11*' for all pruefis starting with '11' or '*01' for all pruefis ending with '01'.
"""
valid_pruefis: list[str] = [pruefi for pruefi in list_of_pruefis if _pruefi_pattern.match(pruefi)]
return valid_pruefis
result: set[str] = set()

for pruefi in list_of_pruefis:
if ("*" in pruefi or "?" in pruefi) and all_known_pruefis:
filtered_pruefis = fnmatch.filter(all_known_pruefis, pruefi)
result = result.union(filtered_pruefis)
elif _pruefi_pattern.match(pruefi):
result.add(pruefi)

return sorted(list(result))


def check_python_version():
Expand Down Expand Up @@ -95,7 +107,7 @@ def load_all_known_pruefis_from_file(
"--pruefis",
default=[],
required=False,
help="Five digit number like 11042.",
help="Five digit number like 11042 or use wildcards like 110* or *042 or 11?42.",
multiple=True,
)
@click.option(
Expand Down
153 changes: 150 additions & 3 deletions unittests/test_input_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,182 @@


@pytest.mark.parametrize(
"input_pruefis, expected_pruefis",
"input_pruefis, expected_pruefis, known_pruefis",
[
pytest.param(
["11042", "13007"],
["11042", "13007"],
None,
id="only valid pruefis",
),
pytest.param(
["01042", "13007"],
["13007"],
None,
id="invalid pruefi: leading zero",
),
pytest.param(
["1042", "13007"],
["13007"],
None,
id="invalid pruefi: only four digits",
),
pytest.param(
["abc", "13007"],
["13007"],
None,
id="invalid pruefi: characters",
),
pytest.param(
["abc"],
[],
None,
id="invalid pruefi: empty result",
),
pytest.param(
["11*"],
["11001", "11002", "11003"],
["11001", "11002", "11003", "12001", "12002", "12003", "13001", "13002", "13003"],
id="wildcard `*` at end",
),
pytest.param(
["*1"],
["11001", "12001", "13001"],
["11001", "11002", "11003", "12001", "12002", "12003", "13001", "13002", "13003"],
id="wildcard `*` at begin",
),
pytest.param(
["11*1"],
["11001"],
["11001", "11002", "11003", "12001", "12002", "12003", "13001", "13002", "13003"],
id="wildcard `*` in the middle", # who should seriously want this?
),
pytest.param(
["?1001"],
["11001", "21001", "31001"],
["11001", "11002", "11003", "12002", "12003", "13003", "21001", "31001"],
id="wildcard `?` at begin",
),
pytest.param(
["11?42"],
["11042", "11142"],
["11001", "11002", "11003", "11042", "11142", "12001", "12002", "12003", "13001", "13002", "13003"],
id="wildcard `?` in the middle",
),
pytest.param(
["1100?"],
[
"11001",
"11002",
"11003",
"11004",
"11005",
"11006",
"11007",
"11008",
"11009",
],
[
"11001",
"11002",
"11003",
"11004",
"11005",
"11006",
"11007",
"11008",
"11009",
"11042",
"11142",
"12001",
"12002",
"12003",
"13001",
"13002",
"13003",
],
id="wildcard `?` at the end",
),
pytest.param(
["110??"],
[
"11001",
"11002",
"11003",
"11004",
"11005",
"11006",
"11007",
"11008",
"11009",
"11010",
"11042",
],
[
"11001",
"11002",
"11003",
"11004",
"11005",
"11006",
"11007",
"11008",
"11009",
"11010",
"11042",
"11142",
"12001",
"12002",
"12003",
"13001",
"13002",
"13003",
],
id="wildcard `??` at the end",
),
pytest.param(
["*00?"],
[
"11001",
"11002",
"11003",
"11004",
"11005",
"11006",
"11007",
"11008",
"11009",
"12001",
"12002",
"12003",
"13001",
"13002",
"13003",
],
[
"11001",
"11002",
"11003",
"11004",
"11005",
"11006",
"11007",
"11008",
"11009",
"11010",
"11042",
"11142",
"12001",
"12002",
"12003",
"13001",
"13002",
"13003",
],
id="wildcard combination `*` and `?`",
),
],
)
def test_get_only_valid_pruefis(input_pruefis, expected_pruefis):
valid_pruefis = get_valid_pruefis(list_of_pruefis=input_pruefis)
def test_get_only_valid_pruefis(input_pruefis: list[str], expected_pruefis: list[str], known_pruefis: list[str] | None):
valid_pruefis = get_valid_pruefis(list_of_pruefis=input_pruefis, all_known_pruefis=known_pruefis)
assert valid_pruefis == expected_pruefis

0 comments on commit d395307

Please sign in to comment.