In [2]:
def detect_family_tree_templates(wikitext):
    """
    Returns list of matched family tree-related templates in wikitext.
    """
    pattern = re.findall(
        r"\{\{(ahnentafel[^|\n]*|family tree[^|\n]*|tree chart[^|\n]*|chart top[^|\n]*|ancestry[^|\n]*)",
        wikitext,
        re.IGNORECASE
    )
    return list(set(pattern))  # unique matches


In [3]:
def extract_template_blocks(wikitext):
    """
    Extract all full template blocks related to family trees.
    """
    matches = re.findall(
        r"\{\{(?:ahnentafel|family tree|tree chart|chart top|ancestry)[\s\S]+?\n\}\}",
        wikitext,
        re.IGNORECASE
    )
    return matches


In [4]:
def parse_template(template_text):
    """
    Dispatch parsing based on template type.
    """
    if "ahnentafel" in template_text.lower():
        return extract_ahnentafel_relationships(template_text) + extract_spouse_relationships(template_text)
    else:
        print("⚠️ Unsupported template for now.")
        return []


In [5]:
def process_article(title):
    """
    Main function to process a Wikipedia article and extract relationships.
    """
    params = {
        "action": "parse",
        "page": title,
        "prop": "wikitext",
        "format": "json",
    }
    response = requests.get("https://en.wikipedia.org/w/api.php", params=params).json()

    if "parse" not in response or "wikitext" not in response["parse"]:
        print("No wikitext found.")
        return []

    wikitext = response["parse"]["wikitext"]["*"]
    templates = detect_family_tree_templates(wikitext)
    blocks = extract_template_blocks(wikitext)

    print(f"\n📄 Article: {title}")
    print(f"🧩 Detected templates: {templates}")
    if not blocks:
        print("❌ No usable family tree blocks found.")
        return []

    all_relationships = []
    for block in blocks:
        rels = parse_template(block)
        all_relationships.extend(rels)

    print(f"✅ Extracted {len(all_relationships)} relationships.\n")
    return all_relationships


In [6]:
titles = ["Charles III", "Elizabeth II", "George VI of the United Kingdom", "Albert Einstein", "Queen Victoria"]

for title in titles:
    relationships = process_article(title)
    for r in relationships:
        print(f"[{r[0]}, {r[1]}, {r[2]}]")



📄 Article: Charles III
🧩 Detected templates: ['ahnentafel']
✅ Extracted 14 relationships.

['''Charles III of the United Kingdom''', child of, Prince Philip, Duke of Edinburgh]
['''Charles III of the United Kingdom''', child of, Elizabeth II of the United Kingdom]
[Prince Philip, Duke of Edinburgh, child of, Prince Andrew of Greece and Denmark]
[Prince Philip, Duke of Edinburgh, child of, Princess Alice of Battenberg]
[Elizabeth II of the United Kingdom, child of, George VI of the United Kingdom]
[Elizabeth II of the United Kingdom, child of, Lady Elizabeth Bowes-Lyon]
[Prince Andrew of Greece and Denmark, child of, George I of Greece]
[Prince Andrew of Greece and Denmark, child of, Grand Duchess Olga Constantinovna of Russia]
[Princess Alice of Battenberg, child of, Prince Louis of Battenberg]
[Princess Alice of Battenberg, child of, Princess Victoria of Hesse and by Rhine]
[George VI of the United Kingdom, child of, George V of the United Kingdom]
[George VI of the United Kingdom, c