In [None]:
# You are a software engineer. Create python code that can embed xml into pdf (only save it as metadata) and then save the output file.

from PyPDF2 import PdfReader, PdfWriter

def embed_xml_to_pdf(input_pdf_path, output_pdf_path, xml_metadata):
    # Load the input PDF using PyPDF2
    reader = PdfReader(input_pdf_path)
    writer = PdfWriter()

    # Add all pages from the original PDF
    for page_num in range(len(reader.pages)):
        writer.add_page(reader.pages[page_num])

    # Create an output stream for writing the PDF
    with open(output_pdf_path, "wb") as output_pdf_file:
        writer.write(output_pdf_file)

    # Add custom XML metadata using pikepdf, allowing overwriting of the input file
    with pikepdf.open(output_pdf_path, allow_overwriting_input=True) as pdf:
        # Insert XML metadata into the document info
        pdf.docinfo['/XMLMetadata'] = xml_metadata
        pdf.save(output_pdf_path)

# Example usage
input_pdf = "0.pdf"  # Replace with your input PDF file path
output_pdf = "output_file.pdf"  # Replace with your desired output PDF file path
xml_data = """<?xml version="1.0" encoding="UTF-8"?>
<metadata>
    <title>Embedded XML Example</title>
    <author>Aurel</author>
    <description>This is example of embedding metadata</description>
</metadata>"""

# Call the function to embed XML into the PDF
embed_xml_to_pdf(input_pdf, output_pdf, xml_data)

print(f"PDF with embedded XML metadata saved to {output_pdf}")


In [None]:
import pikepdf

def read_xml_from_pdf(pdf_path):
    # Open the PDF file
    with pikepdf.open(pdf_path) as pdf:
        # Check if '/XMLMetadata' exists in the document info
        if '/XMLMetadata' in pdf.docinfo:
            xml_metadata = pdf.docinfo['/XMLMetadata']
            print("XML Metadata found:\n")
            print(xml_metadata)
        else:
            print("No XML Metadata found in this PDF.")

# Example usage
pdf_file = "output_file.pdf"  # Replace with your PDF file path containing the embedded XML

# Call the function to read XML metadata from the PDF
read_xml_from_pdf(pdf_file)

In [None]:
from PyPDF2 import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from io import BytesIO
from PIL import Image

# Step 1: Create a function to convert the PNG image into a PDF overlay (transparent background)
def create_image_overlay(image_path, page_width, page_height, x, y):
    img_pdf = BytesIO()

    # Use reportlab to create a transparent canvas for the image
    can = canvas.Canvas(img_pdf, pagesize=(page_width, page_height))

    # Draw the image at the specified position (x, y)
    img = Image.open(image_path)
    img_width, img_height = img.size
    can.drawImage(image_path, x, y, width=200, height=200)

    can.showPage()
    can.save()

    img_pdf.seek(0)
    return img_pdf

# Step 2: Insert the image on top of an existing page (e.g., page 0)
def insert_image_on_page(input_pdf_path, image_path, output_pdf_path, page_number=0, x=0, y=0):
    reader = PdfReader(input_pdf_path)
    writer = PdfWriter()

    # Get the dimensions of the page where the image will be placed
    page = reader.pages[page_number]
    page_width = page.mediabox.width
    page_height = page.mediabox.height

    # Create the image overlay PDF
    image_overlay_pdf = create_image_overlay(image_path, page_width, page_height, x, y)
    image_overlay_reader = PdfReader(image_overlay_pdf)

    # Merge the image with the existing page
    page.merge_page(image_overlay_reader.pages[0])

    # Step 3: Add all the pages to the writer (including the modified one)
    for p in reader.pages:
        writer.add_page(p)

    # Step 4: Save the modified PDF to a new file
    with open(output_pdf_path, "wb") as output_file:
        writer.write(output_file)

# Example usage
insert_image_on_page("Laporan_Magang_Ivan (1).pdf-Internship Thesis-Aurelius-036-Aurelius-00000054769-signed.pdf", "signature.png", "output_with_signature.pdf", page_number=0, x=100, y=50)

In [None]:
import os
import fitz

def embed_files_in_existing_pdf(existing_pdf_path, output_pdf_path, files_to_embed):
    """
    Embed files into an existing PDF file.

    :param existing_pdf_path: Path to the existing PDF file.
    :param output_pdf_path: Path to the output PDF file.
    :param files_to_embed: List of file paths to embed into the PDF.
    """
    # Open the existing PDF
    doc = fitz.open(existing_pdf_path)

    # Embed each file
    for file_path in files_to_embed:
        try:
            if os.path.isfile(file_path):
                with open(file_path, 'rb') as f:
                    file_content = f.read()
                doc.embfile_add(
                    name=os.path.basename(file_path),
                    buffer_=file_content,
                    filename=file_path
                )
                print(f"Successfully embedded {file_path}")
            else:
                print(f"File not found: {file_path}")
        except Exception as e:
            print(f"Could not embed file {file_path}\nError: {str(e)}")

    # Save the modified PDF
    doc.save(output_pdf_path)
    print(f"PDF saved as {output_pdf_path}")

# Usage
files = ['data.json', 'data.xml']
embed_files_in_existing_pdf('0.pdf', 'output.pdf', files)
