In [None]:
# Personenregister Hainhofer

from lxml import etree as ET
import pandas as pd

# Load XML file
file_path = 'hainhofer-lod.xml'
tree = ET.parse(file_path)
root = tree.getroot()

# Define namespaces
namespaces = {
    'tei': 'http://www.tei-c.org/ns/1.0',
    "schema": "http://schema.org/",
    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
    "rdfs": "http://www.w3.org/2000/01/rdf-schema#"
}

# The base URL you want to filter for
base_url = "https://hainhofer.hab.de/register/personen/"

# Result dictionary
person_to_psn = {}

# Loop through all rdf:Description elements
for personname in root.findall(".//rdf:Description", namespaces):
    person = personname.find("rdfs:label", namespaces)
    psn = personname.find("schema:mainEntityOfPage", namespaces)

    # Iterate only through the relevant elements
    if personname is None or psn is None or not psn.text.startswith(base_url):
        continue  # Skip this entry if any part is missing or URL doesn't match the prefix

    # Remove the URL prefix and store the result
    url = psn.text[43:]
    person_to_psn[person.text] = url

print(person_to_psn)

# Save the result to an output file
df = pd.DataFrame(list(person_to_psn.items()), columns=['person', 'psn'])
df.to_excel('register_personen_psn.xlsx', index=False)
df.to_csv('register_personen_psn.csv', index=False)

{'Hans von Aachen': 'hans_von_aach', 'Aaron, biblische Person': 'aaron', 'Aba Sámuel, König (Ungarn)': 'aba_samuel_koenig_ungarn', 'Abed-Nego, biblische Person': 'abed-nego', 'Abel, biblische Person': 'abel', 'Johann Wilhelm Abel': 'abel_johann_wilhelm', 'Veit Abel': 'veit_abel', 'Niclas von Abensberg': 'abensperg_niclas', 'Alessandro Abondio': 'abondio_alessandro', 'Abiram, biblische Person': 'abiram_bibel', 'Abraham, Erzvater': 'abraham_ervater', 'Absalom, biblische Person': 'absalom_bibel', 'Hans Heinrich von Absberg': 'absberg_hans_heinrich', 'Walter Ach': 'ach_walter', 'Achates': 'achates', 'Acheloos, Gott': 'acheloos_gott', 'Achilleus': 'achilleus', 'Paulus zum Acker': 'acker_paulus', 'Ada, biblische Person': 'ada', 'Adalbert von Pommern, Bischof (Pommern)': 'adalbert_pommern_bischof', 'Adalbert III., Graf (Bogen)': 'adalbert_3_bogen_von', 'Adalbold II., Bischof (Utrecht)': 'adalbold_utrecht_bischof', 'Adalung, Bischof (Eichstätt)': 'adalung', 'Adam, biblische Person': 'adam', 'A