In [1]:
import platform
print('Python version:', platform.python_version())

Python version: 3.11.0


### Import Libraries

1. [Click here for documentation on convert_from_path](https://pdf2image.readthedocs.io/en/latest/reference.html#:~:text=pdf2image.pdf2image.convert_from_path)<br>
1. [Download Poppler for Windows from this link](https://github.com/oschwartz10612/poppler-windows/releases/tag/v23.01.0-0)<br>
Note: Download the latest release
1. Extract the folder from the zip downloaded Poppler Zip file<br>
1. Copy the extracted folder to C:\Program Files\ or in your current working directory. In this project, it is saved under folder called <i>Poppler</i> in the current working directory.

In [2]:
from pdf2image import convert_from_path
import os

### Inputs

In [3]:
# POPPLER_PATH = r'C:\Program Files\poppler-23.01.0\Library\bin'
POPPLER_PATH = "./Poppler/poppler-23.01.0/Library/bin/"
# Letter size paper in pixels
IMAGE_WIDTH = 2550 # 8.5 × 300
IMAGE_HEIGHT = 3300 # 11 × 300
IMAGES_FOLDER = "./Images/"
pdf_file_path = "PDF reader test.pdf"

### Functions

In [4]:
# Function to save images to a folder
# The function takes the argument of pdf_file_path, images,
# and destination_folder (default to IMAGES_FOLDER)
# Get the file name from the path,
# creates a folder with the same name as the file name, 
# and saves the images to the IMAGES_FOLDER
def save_images(pdf_file_path: str, images: list, destination_folder: str = IMAGES_FOLDER):
    '''
    Function to save images from a pdf file to a folder

    Parameters
    ----------
    pdf_file_path : str
        The path of the pdf file
    images : list
        A list of images
    destination_folder : str, optional
        The destination folder to save the images, by default IMAGES_FOLDER

    Returns
    -------
    None
    '''
    # Get the file name from the path using basename
    file_name = os.path.basename(pdf_file_path)
    # Remove the file extension
    file_name_without_extension = os.path.splitext(file_name)[0]
    # Create a folder with the same name as the file name
    folder_name = os.path.join(IMAGES_FOLDER, file_name_without_extension)
    # Create the folder if it does not exist
    if not os.path.exists(folder_name):
        os.mkdir(folder_name)
    else:
        # If the folder exists, delete all the files in the folder
        for file in os.listdir(folder_name):
            os.remove(os.path.join(folder_name, file))
    # Save the images to the folder
    for i, image in enumerate(images):
        image.save(os.path.join(folder_name, f"{file_name_without_extension}_{i}.jpg"))

### Get images from PDF and save them in a folder

In [5]:
images = convert_from_path(pdf_file_path, size = (IMAGE_WIDTH, IMAGE_HEIGHT), poppler_path = POPPLER_PATH)

In [6]:
save_images(pdf_file_path, images)