## Line Extractin from pdf file

In [1]:
import fitz
from PIL import Image

In [2]:
doc = fitz.open("table.pdf")
page = doc[0]
paths = page.get_drawings()  # extract existing drawings
# this is a list of "paths", which can directly be drawn again using Shape
# -------------------------------------------------------------------------

In [3]:
# define some output page with the same dimensions
outpdf = fitz.open()
outpage = outpdf.new_page(width=page.rect.width, height=page.rect.height)
shape = outpage.new_shape()  # make a drawing canvas for the output page

In [4]:
# --------------------------------------
# loop through the paths and draw them
# --------------------------------------
for path in paths:
    # ------------------------------------
    # draw each entry of the 'items' list
    # ------------------------------------
    for item in path["items"]:  # these are the draw commands
        if item[0] == "l":  # line
            shape.draw_line(item[1], item[2])
        elif item[0] == "re":  # rectangle
            shape.draw_rect(item[1])
        elif item[0] == "qu":  # quad
            shape.draw_quad(item[1])
        elif item[0] == "c":  # curve
            shape.draw_bezier(item[1], item[2], item[3], item[4])
        else:
            raise ValueError("unhandled drawing", item)
    # ------------------------------------------------------
    # all items are drawn, now apply the common properties
    # to finish the path
    # ------------------------------------------------------
    shape.finish()

In [5]:
# all paths processed - commit the shape to its page
shape.commit()
outpdf.save("test_file_extracted2.pdf")

### Code to convert pdf to image file

In [6]:
from pdf2image import convert_from_path

# Specify the path to the PDF file
pdf_path = 'test_file_extracted.pdf'

# Convert the PDF file to a list of images
images = convert_from_path(pdf_path)

# Save each image as a JPEG file
for i, image in enumerate(images):
    image.save(f'page_{i+1}.jpg', 'JPEG')