In [14]:
import xml.etree.ElementTree as ET
from fpdf import FPDF
import html  # For decoding HTML entities

def convert_xml_to_pdf(input_file: str, output_file: str):
    """
    Converts an XML glossary file to a formatted PDF after sorting entries alphabetically.

    Args:
        input_file (str): Path to the XML file.
        output_file (str): Path to save the output PDF.
    """
    try:
        # Parse the XML file
        tree = ET.parse(input_file)
        root = tree.getroot()

        # Find the <ENTRIES> tag under <INFO>
        entries = root.find("INFO/ENTRIES")
        if entries is None:
            raise ValueError("No <ENTRIES> found in the XML file.")

        # Extract and sort entries by <CONCEPT> text
        entry_list = list(entries.findall("ENTRY"))
        sorted_entries = sorted(
            entry_list,
            key=lambda entry: (
                entry.find("CONCEPT").text if entry.find("CONCEPT") is not None else ""
            ).lower()
        )

        # Initialize PDF
        pdf = FPDF()
        pdf.set_auto_page_break(auto=True, margin=15)
        pdf.add_page()
        pdf.set_font("Arial", size=12)

        # Add a title
        pdf.set_font("Arial", style="B", size=16)
        pdf.cell(0, 10, "Dune - Terminology of the Imperium", ln=True, align="C")
        pdf.ln(10)

        # Write sorted entries to the PDF
        for entry in sorted_entries:
            concept = entry.find("CONCEPT").text if entry.find("CONCEPT") is not None else "[No Concept]"
            definition = entry.find("DEFINITION").text if entry.find("DEFINITION") is not None else "[No Definition]"

            # Decode HTML entities
            concept = html.unescape(concept)
            definition = html.unescape(definition)

            # Write to PDF
            pdf.set_font("Arial", style="B", size=10)
            pdf.cell(0, 10, concept, ln=True)
            pdf.set_font("Arial", size=10)
            pdf.multi_cell(0, 10, definition)
            pdf.ln(5)

        # Save the PDF
        pdf.output(output_file)
        print(f"PDF successfully saved to {output_file}")

    except Exception as e:
        print(f"An error occurred: {e}")

# Usage
XMLFile = "C:/Users/marzi/Downloads/Terminology of the Imperium-Glossary of Terms in the book.xml"
PDFOut = "C:/Users/marzi/Desktop/output.pdf" 
convert_xml_to_pdf(XMLFile, PDFOut)


PDF successfully saved to C:/Users/marzi/Desktop/output.pdf
