From cf2e14e3dff040018078f51b53a6423528bbef88 Mon Sep 17 00:00:00 2001 From: Daniel <139119540+DeltaDaniel@users.noreply.github.com> Date: Thu, 11 Jul 2024 07:21:38 +0200 Subject: [PATCH] ignore ahb tables where no pruefi is provided (#371) --- src/kohlrahbi/ahb/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/kohlrahbi/ahb/__init__.py b/src/kohlrahbi/ahb/__init__.py index 5a4c6609..fd2c6c03 100644 --- a/src/kohlrahbi/ahb/__init__.py +++ b/src/kohlrahbi/ahb/__init__.py @@ -189,7 +189,9 @@ def extract_pruefis_from_table(table: Table) -> list[str]: def table_header_contains_text_pruefidentifikator(table: Table) -> bool: """Checks if the table header contains the text 'Prüfidentifikator'.""" - return table.row_cells(0)[-1].paragraphs[-1].text.startswith("Prüfidentifikator") # type:ignore[no-any-return] + pattern = r"Prüfidentifikator(?:\t){0,10}\t\d+" + # "matches "Prüfidentifikator" followed by at least 1 tab separated numbers, max 11 pruefis is chosen arbitrarily + return bool(re.search(pattern, table.row_cells(0)[-1].text)) def get_pruefi_to_file_mapping(basic_input_path: Path, format_version: EdifactFormatVersion) -> dict[str, str]: