In [None]:
import os
import shutil
import xml.etree.ElementTree as ET
from fpdf import FPDF
import tempfile

# Define utility functions

def parse_xml(xml_file):
    try:
        tree = ET.parse(xml_file)
        root = tree.getroot()

        data = {}
        additional_info = []

        for elem in root.iter():
            if elem.tag not in data:
                data[elem.tag] = elem.text.strip() if elem.text else None
            else:
                additional_info.append({elem.tag: elem.text.strip() if elem.text else None})

        return data, additional_info

    except Exception as e:
        print(f"Error parsing XML: {e}")
        return None, None

def generate_pdf(data, additional_info, output_file, logo_path=None):
    class PDF(FPDF):
        def footer(self):
            self.set_y(-15)
            self.set_font("Arial", size=8)
            self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")
    
    pdf = PDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()

    if logo_path:
        pdf.image(logo_path, 10, 8, 33)
    pdf.set_font("Arial", size=12)
    pdf.set_xy(50, 10)
    pdf.cell(0, 10, "XML to Readable PDF", ln=True, align="R")
    pdf.ln(10)

    pdf.set_font("Arial", style="B", size=20)
    pdf.set_text_color(0, 102, 204)
    name = data.get("APP_NAME", "Unknown")
    pdf.cell(0, 10, name, ln=True, align="C")
    pdf.ln(10)

    pdf.set_font("Arial", size=12)
    pdf.set_text_color(0, 0, 0)
    col_width_key = 100
    line_height = 8

    for key, value in data.items():
        if value:
            pdf.set_font("Arial", style="B", size=12)
            pdf.cell(col_width_key, line_height, f"{key}: ", border=0)
            pdf.set_font("Arial", size=12)
            pdf.multi_cell(0, line_height, value)
            pdf.ln(1)

    if additional_info:
        pdf.ln(5)
        pdf.set_font("Arial", style="B", size=14)
        pdf.cell(0, 10, "Additional Information:", ln=True)
        pdf.set_font("Arial", size=12)
        for info in additional_info:
            for key, value in info.items():
                pdf.cell(col_width_key, line_height, f"{key}: ", border=0)
                pdf.multi_cell(0, line_height, value if value else "Not Provided")
                pdf.ln(1)

    pdf.output(output_file)

# Main automation function

def process_master_folder(master_folder, output_base_folder, logo_path=None):
    if not os.path.exists(output_base_folder):
        os.makedirs(output_base_folder)

    for subfolder in os.listdir(master_folder):
        subfolder_path = os.path.join(master_folder, subfolder)

        if os.path.isdir(subfolder_path):
            output_subfolder_path = os.path.join(output_base_folder, subfolder)
            os.makedirs(output_subfolder_path, exist_ok=True)

            for file in os.listdir(subfolder_path):
                file_path = os.path.join(subfolder_path, file)
                if os.path.isfile(file_path) and file.endswith(".xml"):
                    print(f"Processing {file} in {subfolder}...")

                    data, additional_info = parse_xml(file_path)
                    if not data:
                        print(f"Failed to parse {file}. Skipping.")
                        continue

                    txt_file_name = f"{data.get('APP_PAN_NO', 'Unknown')}_{data.get('APP_NAME', 'Unknown').replace(' ', '_')}.txt"
                    txt_file_path = os.path.join(output_subfolder_path, txt_file_name)
                    with open(txt_file_path, "w", encoding="utf-8") as txt_file:
                        for key, value in data.items():
                            if value:
                                txt_file.write(f"{key}: {value}\n")
                        if additional_info:
                            txt_file.write("\nAdditional Information:\n")
                            for info in additional_info:
                                for key, value in info.items():
                                    txt_file.write(f"{key}: {value if value else 'Not Provided'}\n")

                    pdf_file_name = f"{data.get('APP_PAN_NO', 'Unknown')}_{data.get('APP_NAME', 'Unknown').replace(' ', '_')}.pdf"
                    pdf_file_path = os.path.join(output_subfolder_path, pdf_file_name)
                    generate_pdf(data, additional_info, pdf_file_path, logo_path)

    print("Processing complete.")

# Example usage

if __name__ == "__main__":
    master_folder = "/home/dhananjay-porwal/Downloads/Done-20241221T032632Z-001/Done"  # Replace with the actual path to your master folder
    output_base_folder = "/home/dhananjay-porwal/Downloads/Done-20241221T032632Z-001/FreshlyDone"  # Replace with the desired output directory path
    logo_path = "candor_investing_logo.png"  # Ensure this file exists in your working directory or specify its path

    process_master_folder(master_folder, output_base_folder, logo_path)
