In [None]:
import urllib.request
import requests
import json
from typing import Optional
from rich import print as rprint

# https://www.mediawiki.org/wiki/API:Query
SEARCH_URL = "https://de.wiktionary.org/w/api.php"


def find_word_page_id(word: str) -> Optional[int]:
    params = {
        "action": "query",
        "format": "json",
        "titles": word,
    }
    response = requests.get(SEARCH_URL, params).json()
    pages = list(response["query"]["pages"].keys())
    if not pages:
        return None
    return int(pages[0])

page_id = find_word_page_id("spenden")
page_id

In [None]:
PAGE_URL = "https://de.wiktionary.org/w/api.php"


def get_page_wikitext(page_id: int) -> str:
    params = {
        "action": "parse",
        "format": "json",
        "prop": "wikitext",
        "pageid": page_id
    }
    response = requests.get(PAGE_URL, params).json()
    wikitext = response["parse"]["wikitext"]["*"]
    return wikitext


wikitext = get_page_wikitext(page_id)
print(wikitext)

In [None]:
import re

AUDIO_RE = re.compile(r"\{\{Audio\|(?P<file>.*?)(|spr=(?P<spr>at))?\}\}")
FILES_URL = "https://de.wiktionary.org/w/api.php"

def get_file_url(file_name: str) -> Optional[str]:
    params = {
        "action": "query",
        "format": "json",
        "prop": "imageinfo",
        "iiprop": "url",
        "titles": f"File:{file_name}",
    }
    response = requests.get(FILES_URL, params).json()
    pages = list(response["query"]['pages'].values())
    imageinfo = pages[0]["imageinfo"][0]
    return imageinfo["url"]


def get_best_audio_match(matches) -> Optional[str]:
    """
    Return latest one withou specified language. It has the best audio quality.
    """
    for match in reversed(matches):
        if match.group("spr") is not None:
            continue
        file_name = match.group("file")
        if file_name.startswith("De-"):
            return file_name

def get_audio_url_from_wikitext(wikitext: str) -> Optional[str]:
    matches = list(AUDIO_RE.finditer(wikitext))

    if not matches:
        return
    audio_file_name = get_best_audio_match(matches)

    audio_file_url = get_file_url(audio_file_name)
    if not audio_file_url:
        print(f"Audio file URL was not found for file: {audio_file_name}")
        return
    return audio_file_url

get_audio_url_from_wikitext(wikitext)