In [1]:
import fitz  # PyMuPDF

# Path to your image
image_path = "fish.jpg"
pdf_path = "fish.pdf"

# Open the image to get its dimensions
img_doc = fitz.open(image_path)
pix = img_doc[0].get_pixmap()
img_width, img_height = pix.width, pix.height

# Create a new PDF
doc = fitz.open()

# Define page size slightly larger than the image
page_width = int(img_width * 1.5)   # 50% wider than image
page_height = int(img_height * 1.5) # 50% taller than image

# Add a new page with custom size
page = doc.new_page(width=page_width, height=page_height)

# Define where to place the image (e.g., top-left corner, leaving empty margins)
rect = fitz.Rect(50, 50, 50 + img_width, 50 + img_height)

# Insert the image into the rectangle
page.insert_image(rect, filename=image_path)

# Save the PDF
doc.save(pdf_path)
doc.close()

print(f"PDF saved as {pdf_path}")


PDF saved as fish.pdf


In [2]:
import fitz  # PyMuPDF
import cv2
import numpy as np

def trim_pdf(input_pdf, output_pdf, dpi=200):
    """
    Create a cleaned PDF by cropping each page to the bounding box of its content.
    
    Args:
        input_pdf (str): Path to input PDF.
        output_pdf (str): Path to save trimmed PDF.
        dpi (int): Resolution for rendering and output quality.
    """
    doc = fitz.open(input_pdf)
    new_doc = fitz.open()

    for page_index, page in enumerate(doc):
        # Render page to image
        pix = page.get_pixmap(dpi=dpi)
        img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
        if pix.n == 4:  # RGBA → drop alpha
            img = img[:, :, :3]

        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

        # Threshold (white background → 255)
        _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)

        # Find contours
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        if contours:
            # Get bounding box covering all content
            x_min = min(cv2.boundingRect(c)[0] for c in contours)
            y_min = min(cv2.boundingRect(c)[1] for c in contours)
            x_max = max(cv2.boundingRect(c)[0] + cv2.boundingRect(c)[2] for c in contours)
            y_max = max(cv2.boundingRect(c)[1] + cv2.boundingRect(c)[3] for c in contours)

            cropped = img[y_min:y_max, x_min:x_max]

            # Convert cropped image to Pixmap for PDF embedding
            cropped_rgb = cv2.cvtColor(cropped, cv2.COLOR_RGB2BGR)
            cropped_bytes = cv2.imencode(".png", cropped_rgb)[1].tobytes()

            # Insert into new PDF
            rect = fitz.Rect(0, 0, cropped.shape[1], cropped.shape[0])  # width x height
            new_page = new_doc.new_page(width=rect.width, height=rect.height)
            new_page.insert_image(rect, stream=cropped_bytes)

    # Save new PDF
    new_doc.save(output_pdf)
    doc.close()
    new_doc.close()
    print(f"Trimmed PDF saved as {output_pdf}")


trim_pdf("fish.pdf", "clean_fish.pdf")


Trimmed PDF saved as clean_fish.pdf
