In [1]:
import requests
import re

# Step 1: Get raw wikitext
def get_family_tree_template(page_title: str):
    """Fetch wikitext and extract family tree templates (e.g., ahnentafel)."""
    params = {
        "action": "parse",
        "page": page_title,
        "prop": "wikitext",
        "format": "json",
    }
    response = requests.get("https://en.wikipedia.org/w/api.php", params=params).json()

    if "parse" not in response or "wikitext" not in response["parse"]:
        print("Could not retrieve wikitext.")
        return None

    wikitext = response["parse"]["wikitext"]["*"]

    # Extract first ahnentafel template (some are long)
    match = re.findall(r"\{\{ahnentafel[\s\S]+?\n\}\}", wikitext, re.IGNORECASE)
    if match:
        return match[0]
    else:
        print("No ahnentafel template found.")
        return None


# Step 2: Extract entries from the ahnentafel template
def extract_ahnentafel_relationships(template_text):
    """
    Extract [child, 'child of', parent] triples from ahnentafel template.
    Ensures correct direction: child ← parent
    """
    raw_entries = re.findall(r"\|\s*(\d+)\s*=\s*(?:\d+\.\s*)?(?:\[{2})?([^\|\]\n]+)", template_text)

    # Clean names by removing leading numbering (like '2. ')
    entries = {num: re.sub(r"^\s*\d+\.\s*", "", name.strip()) for num, name in raw_entries}

    relationships = []

    for num_str, child_name in entries.items():
        num = int(num_str)

        # In Ahnentafel: person N → parents at 2N (father), 2N+1 (mother)
        father_num = 2 * num
        mother_num = 2 * num + 1

        if str(father_num) in entries:
            relationships.append([child_name, "child of", entries[str(father_num)]])

        if str(mother_num) in entries:
            relationships.append([child_name, "child of", entries[str(mother_num)]])

    return relationships

    # Add Charles (entry 1) manually
    if "1" in entries:
        child = entries["1"]
        if "2" in entries:
            relationships.append([child, "child of", entries["2"]])
        if "3" in entries:
            relationships.append([child, "child of", entries["3"]])

    return relationships



# Step 3: Extract spouse if exists
def extract_spouse_relationships(template_text):
    """Extract spouse entries if they exist."""
    spouses = re.findall(r"\|\s*spouse\d*\s*=\s*\[{2}([^\|\]]+)", template_text)
    main = re.search(r'\|\s*1\s*=\s*\[{2}([^\|\]]+)', template_text)
    if main:
        main_person = main.group(1)
        return [[main_person.strip(), "spouse of", spouse.strip()] for spouse in spouses]
    else:
        return []


# Step 4: Display everything
def build_tree_from_template(title):
    raw_template = get_family_tree_template(title)
    if not raw_template:
        print("No family tree found.")
        return

    # ✅ DEBUG: show raw template preview
    print("\n=== Raw Template Preview ===\n")
    print(raw_template[:1000])

    child_relationships = extract_ahnentafel_relationships(raw_template)
    spouse_relationships = extract_spouse_relationships(raw_template)
    all_rels = child_relationships + spouse_relationships

    print(f"\nGenealogical relationships for {title}:")
    print("Format: [entity1, relationship, entity2]\n")
    for rel in all_rels:
        print(f"[{rel[0]}, {rel[1]}, {rel[2]}]")

    print(f"\nTotal relationships found: {len(all_rels)}")

# Run for Charles III
build_tree_from_template("Charles III")



=== Raw Template Preview ===

{{ahnentafel
|collapsed=yes|align=center |ref={{sfn|Paget|1977}}
|boxstyle_1=background-color: #fcc;
|boxstyle_2=background-color: #fb9;
|boxstyle_3=background-color: #ffc;
|boxstyle_4=background-color: #bfc;
|1= 1. '''Charles III of the United Kingdom'''
|2= 2. [[Prince Philip, Duke of Edinburgh]]
|3= 3. [[Elizabeth II of the United Kingdom]]
|4= 4. [[Prince Andrew of Greece and Denmark]]
|5= 5. [[Princess Alice of Battenberg]]
|6= 6. [[George VI of the United Kingdom]]
|7= 7. [[Lady Elizabeth Bowes-Lyon]]
|8= 8. [[George I of Greece]]
|9= 9. [[Grand Duchess Olga Constantinovna of Russia]]
|10= 10. [[Prince Louis of Battenberg]]
|11= 11. [[Princess Victoria of Hesse and by Rhine]]
|12= 12. [[George V of the United Kingdom]]
|13= 13. [[Princess Victoria Mary of Teck]]
|14= 14. [[Claude Bowes-Lyon, 14th Earl of Strathmore and Kinghorne]]
|15= 15. [[Cecilia Cavendish-Bentinck]]
}}

Genealogical relationships for Charles III:
Format: [entity1, relationship, 