In [6]:
import fitz
from PIL import Image
import subprocess
import os

In [7]:
doc = fitz.open("table.pdf")
page = doc[0]
paths = page.get_drawings()  # extract existing drawings
# this is a list of "paths", which can directly be drawn again using Shape
# -------------------------------------------------------------------------

In [8]:
# define some output page with the same dimensions
outpdf = fitz.open()
outpage = outpdf.new_page(width=page.rect.width, height=page.rect.height)
shape = outpage.new_shape()  # make a drawing canvas for the output page

In [9]:
# --------------------------------------
# loop through the paths and draw them
# --------------------------------------
for path in paths:
    # ------------------------------------
    # draw each entry of the 'items' list
    # ------------------------------------
    for item in path["items"]:  # these are the draw commands
        if item[0] == "l":  # line
            shape.draw_line(item[1], item[2])
        elif item[0] == "re":  # rectangle
            shape.draw_rect(item[1])
        elif item[0] == "qu":  # quad
            shape.draw_quad(item[1])
        elif item[0] == "c":  # curve
            shape.draw_bezier(item[1], item[2], item[3], item[4])
        else:
            raise ValueError("unhandled drawing", item)
    # ------------------------------------------------------
    # all items are drawn, now apply the common properties
    # to finish the path
    # ------------------------------------------------------
    shape.finish()
# all paths processed - commit the shape to its page

In [10]:
shape.commit()
outpdf.save("test_file_ext.pdf")

In [11]:
import fitz
from pdf2image import convert_from_path

pdf_path = 'test_file_ext.pdf'
doc = fitz.open(pdf_path)

In [12]:
# Install Poppler using wget and save the files in Program Files
poppler_version = '24.02.0'
poppler_url = f'https://dl.bintray.com/gildor/poppler/poppler-{poppler_version}-windows_x86_64.zip'
poppler_zip_path = os.path.join(os.environ['ProgramFiles'], f'poppler-{poppler_version}.zip')
poppler_path = os.path.join(os.environ['ProgramFiles'], f'poppler-{poppler_version}')

# Download Poppler
subprocess.run(['wget', '-O', poppler_zip_path, poppler_url])

# Extract Poppler
subprocess.run(['unzip', '-o', poppler_zip_path, '-d', os.environ['ProgramFiles']])

In [13]:
# Convert PDF to images using pdf2image
images = convert_from_path(pdf_path, poppler_path=poppler_path)

# Save each image as a JPEG file
for image in images:
    image.save('extracted_image1.jpg', 'JPEG')