In [None]:
import xml.etree.ElementTree as ET
import duckdb
import os
from duckdb import ConstraintException
from pathlib import Path
from tqdm import tqdm

In [None]:
# Connect to sql database
con = duckdb.connect(database='german-parliament', read_only=False)

In [None]:
def parse_xml(file_path):
    """
    Parses an XML file and returns the root element.

    Args:
        file_path (str): The path to the XML file.

    Returns:
        xml.etree.ElementTree.Element: The root element of the parsed XML tree,
                                         or None if parsing fails.
    """
    try:
         # 'utf-8-sig' consumes the three-byte BOM (EF BB BF) if it exists
        with open(file_path, encoding="utf-8-sig") as f:
            xml_text = f.read()

        root = ET.fromstring(xml_text)           # build tree from string
        return root
    except ET.ParseError as e:
        print(f"Error parsing XML file: {e}")
        return None
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None

In [None]:
path = "../data/plenary_minutes/wahlperiode_19/19_001_2017-10-24.xml"
root = parse_xml(path)
print(ET.tostring(root,encoding="utf-8-sig"))