fix #1732: [DE] Support "Ref-behindthename" template, and others (#1747)

BoboTiG · Feb 3, 2023 · 87de4e4 · 87de4e4
1 parent 995529d
commit 87de4e4
Show file tree

Hide file tree

Showing 7 changed files with 92 additions and 9 deletions.
diff --git a/tests/test_2_render.py b/tests/test_2_render.py
@@ -86,3 +86,22 @@ def test_find_section_definitions_and_es_replace_defs_list_with_numbered_lists()
             "artículo de un diccionario, enciclopedia u obra de referencia.",
         ),
     ]
+
+
+@pytest.mark.parametrize(
+    "locale, code, expected",
+    [
+        (
+            "de",
+            "{{Bedeutungen}}\n:[1] \n\n{{Herkunft}}\n:[[Abkürzung]] von [[Sturmkanone]]",
+            "=== {{Bedeutungen}} ===\n# \n\n=== {{Herkunft}} ===\n:[[Abkürzung]] von [[Sturmkanone]]",
+        ),
+        (
+            "de",
+            "{{Bedeutungen}}\n:[1] {{K|Handwerk|Architektur|ft=[[defektives Verb{{!}}defektiv]]}}",
+            "=== {{Bedeutungen}} ===\n# {{K|Handwerk|Architektur|ft=[[defektives Verb|defektiv]]}}",
+        ),
+    ],
+)
+def test_adjust_wikicode(locale: str, code: str, expected: str) -> None:
+    assert render.adjust_wikicode(code, locale) == expected
diff --git a/tests/test_4_check_word.py b/tests/test_4_check_word.py
@@ -130,6 +130,12 @@ def test_no_definition_nor_etymology(craft_urls: Callable[[str, str], str]) -> N
             '<small class="noprint" title="Luther 2017 bei www.bibleserver.com"></small>',  # noqa
             "",
         ],
+        # DE - lang link in {{Üxx5}}
+        [
+            "de",
+            '<a href="/w/index.php?title=grc:%E1%BC%80%CE%BD%CE%AE%CF%81&amp;action=edit&amp;redlink=1" class="new" title="grc:ἀνήρ (Seite nicht vorhanden)"><sup>→&nbsp;grc</sup>',  # noqa
+            "",
+        ],
         # DE - other Wikis
         [
             "de",

diff --git a/tests/test_de.py b/tests/test_de.py
@@ -70,6 +70,18 @@ def test_parse_word(
             "{{MZ|0|2|3|4|5|6|7|8|9|10|11}}",
             "[0] 2<br/>3<br/>4<br/>5<br/>6<br/>7<br/>8<br/>9<br/>10<br/>11",
         ),
+        (
+            "{{Plainlink|1=http://de.wikipedia.org/wiki/Ludwig_XIV.|2=Ludwig XIV.}}",
+            "Ludwig XIV.",
+        ),
+        (
+            "{{Plainlink|1=http://de.wikipedia.org/wiki/Ludwig_XIV.|Ludwig XIV.}}",
+            "Ludwig XIV.",
+        ),
+        (
+            "{{Plainlink|http://de.wikipedia.org/wiki/Ludwig_XIV.|2=Ludwig XIV.}}",
+            "Ludwig XIV.",
+        ),
         ("{{Ü|pl|dzień}}", "dzień"),
         ("{{übertr.}}", "<i>übertragen</i>"),
         ("{{übertr.|:}}", "<i>übertragen:</i>"),

diff --git a/wikidict/check_word.py b/wikidict/check_word.py
@@ -106,6 +106,10 @@ def filter_html(html: str, locale: str) -> str:
         for a in bs.find_all("a", {"class": "external"}):
             if "archive.org" in a["href"]:
                 a.decompose()
+        # Lang link in {{Üxx5}}
+        for a in bs.find_all("a"):
+            if (sup := a.find("sup")) and sup.text.startswith("→"):
+                a.decompose()
         # Other Wikis
         for a in bs.find_all("a", {"class": "extiw"}):
             if (

diff --git a/wikidict/lang/de/__init__.py b/wikidict/lang/de/__init__.py
@@ -15,10 +15,10 @@
 etyl_section = ("{{Herkunft}}",)
 sections = (
     *etyl_section,
+    "{{Alte Schreibweise|",
     "{{Aussprache}",
     "{{Bedeutungen}",
     "{{Grundformverweis ",
-    "{{Alte Schreibweise|",
 )
 
 # Variants
@@ -37,17 +37,22 @@
     "Audio",
     "Bpur",
     "Fremdsprachige Beispiele",
+    "Herkunft fehlt",
     "Herkunft unbelegt",
     "Hintergrundfarbe",
     "Hörbeispiele",
     "IA",
     "IPA",
     "Lautschrift",
+    "QS Bedeutung",
     "QS Bedeutungen",
     "QS_Bedeutungen",
     "QS Herkunft",
     "QS_Herkunft",
+    "Ref-Adelung",
+    "Ref-Bibel",
     "Ref-Duden",
+    "Ref-DWDS",
     "Wikipedia",
 )
 
@@ -102,10 +107,19 @@
     "nf": "italic('n, f')",
     # {{noredlink|diminutiv}}
     "noredlink": "parts[-1]",
+    # {{Plainlink|1=http://de.wikipedia.org/wiki/Ludwig_XIV.|2=Ludwig XIV.}}
+    "Plainlink": "parts[-1].removeprefix('2=')",
     # {{Polytonisch|(το)}}
     "Polytonisch": "parts[-1]",
+    # {{Ref-behindthename|Alan}}
+    "Ref-behindthename": "f'behindthename.com „{word}“'",
+    "Ref-Grimm": "f'Jacob Grimm, Wilhelm Grimm: Deutsches Wörterbuch. 16 Bände in 32 Teilbänden. Leipzig 1854–1961 „{word}“'",  # noqa
     # {{Ü|pl|dzień}}
     "Ü": "parts[-1]",
+    # {{Unicode|kɔ}}
+    "Unicode": "parts[-1]",
+    # {{vergleiche}}
+    "vergleiche": "italic('vergleiche:')",
     # {{vgl.}}
     "vgl.": "italic('vergleiche:')",
     # {{W|Datenkompression|Datenkompressionen}}
@@ -115,16 +129,19 @@
 
 # Templates that will be completed/replaced using custom style.
 templates_other = {
+    "(R)": "®",
     "DMG": "'DMG:'",
     "Gen.": "Genitiv:",
     "İA": "'İA:'",
     "ISO 9": "ISO 9:",
     "NNBSP": "&nbsp;",
+    "Part.": "Partizip II: ",
     "Pl.": "Plural:",
     "Pl.1": "Plural 1:",
     "Pl.2": "Plural 2:",
     "Pl.3": "Plural 3:",
     "Pl.4": "Plural 4:",
+    "Prät.": "Präteritum: ",
 }
 
 templates_markierung = {

diff --git a/wikidict/lang/de/template_handlers.py b/wikidict/lang/de/template_handlers.py
@@ -1,7 +1,7 @@
 from collections import defaultdict  # noqa
 from typing import Dict, List, Tuple
 
-from ...user_functions import extract_keywords_from, italic
+from ...user_functions import extract_keywords_from, italic, strong
 from .abk import abk
 
 bibel_names = {
@@ -299,10 +299,14 @@ def render_Uxx4(tpl: str, parts: List[str], data: Dict[str, str]) -> str:
     'ܡܫܺܝܚܳܐ (ALA-LC: mšiḥāʾ) ‚Messias‘'
     >>> render_Uxx4("Üxx4", ["fr", "ܡܫܝܚܐ"], defaultdict(str, {"v":"ܡܫܺܝܚܳܐ", "d":"mšiḥāʾ", "b":"Messias"}))
     'ܡܫܺܝܚܳܐ (mšiḥāʾ) ‚Messias‘'
+    >>> render_Uxx4("Üxx4?", ["fr", "ܡܫܝܚܐ"], defaultdict(str, {"v":"ܡܫܺܝܚܳܐ", "d":"mšiḥāʾ", "b":"Messias"}))
+    '<b>?</b>&nbsp;ܡܫܺܝܚܳܐ (mšiḥāʾ) ‚Messias‘'
     """
     language = parts.pop(0)
     phrase = parts.pop(0) if parts else ""
     phrase = data.get("v", data.get("2", phrase))
+    if tpl == "Üxx4?":
+        phrase = f"{strong('?')}&nbsp;{phrase}"
     if "d" in data:
         if language in ("ar", "fa", "ha", "ota", "pnb"):
             phrase += f" (DMG: {data['d']})"
@@ -320,13 +324,23 @@ def render_Uxx4(tpl: str, parts: List[str], data: Dict[str, str]) -> str:
     return phrase
 
 
+def render_Uxx5(tpl: str, parts: List[str], data: Dict[str, str]) -> str:
+    """
+    >>> render_Uxx5("Üxx5", ["grc", "anḗr, andrós", "ἀνήρ, ἀνδρός", "ἀνήρ"], defaultdict(str))
+    'ἀνήρ, ἀνδρός (anḗr, andrós)'
+    """
+    return f"{parts[2]} ({parts[1]})"
+
+
 template_mapping = {
     "Bibel": render_bibel,
     "K": render_K,
     "Ref-dejure": render_ref_dejure,
     "Üt": render_Ut,
     "Üt?": render_Ut,
     "Üxx4": render_Uxx4,
+    "Üxx4?": render_Uxx4,
+    "Üxx5": render_Uxx5,
 }
 
 

diff --git a/wikidict/render.py b/wikidict/render.py
@@ -332,20 +332,21 @@ def add_potential_variant(
         variants.append(variant)
 
 
-def parse_word(word: str, code: str, locale: str, force: bool = False) -> Word:
-    """Parse *code* Wikicode to find word details.
-    *force* can be set to True to force the pronunciation and gender guessing.
-    It is disabled by default to speed-up the overall process, but enabled when
-    called from get_and_parse_word().
-    """
+def adjust_wikicode(code: str, locale: str) -> str:
+    """Sometimes we need to adapt the Wikicode."""
     code = re.sub(r"(<!--.*?-->)", "", code, flags=re.DOTALL)
 
     if locale == "de":
         # {{Bedeutungen}} -> === {{Bedeutungen}} ===
         code = re.sub(r"^\{\{(.+)\}\}", r"=== {{\1}} ===", code, flags=re.MULTILINE)
 
         # Definition lists are not well supported by the parser, replace them by numbered lists
-        code = re.sub(r":\[\d+\]\s*", "# ", code)
+        # Note: using `[ ]*` rather than `\s*` to bypass issues when a section above another one
+        #       contains an empty item.
+        code = re.sub(r":\[\d+\][ ]*", "# ", code)
+
+        # {{!}} -> "|"
+        code = code.replace("{{!}}", "|")
 
     elif locale == "es":
         # {{ES|xxx|núm=n}} -> == {{lengua|es}} ==
@@ -367,6 +368,16 @@ def parse_word(word: str, code: str, locale: str, force: bool = False) -> Word:
         # {{-avv-}} -> === {{avv}} ===
         code = re.sub(r"^\{\{-(\w+)-\}\}", r"=== {{\1}} ===", code, flags=re.MULTILINE)
 
+    return code
+
+
+def parse_word(word: str, code: str, locale: str, force: bool = False) -> Word:
+    """Parse *code* Wikicode to find word details.
+    *force* can be set to True to force the pronunciation and gender guessing.
+    It is disabled by default to speed-up the overall process, but enabled when
+    called from get_and_parse_word().
+    """
+    code = adjust_wikicode(code, locale)
     top_sections, parsed_sections = find_sections(code, locale)
     prons = []
     genders = []