In [1]:
import platform
print('Python version:', platform.python_version())

Python version: 3.10.0rc2


### Import Libraries

1. [Click here for documentation on convert_from_path](https://pdf2image.readthedocs.io/en/latest/reference.html#:~:text=pdf2image.pdf2image.convert_from_path)<br>
1. [Download Poppler for Windows from this link](https://github.com/oschwartz10612/poppler-windows/releases/tag/v23.01.0-0)<br>
Note: Download the latest release
1. Extract the folder from the zip downloaded Poppler Zip file<br>
1. Copy the extracted folder to C:\Program Files\ or in your current working directory. In this project, it is saved under folder called <i>Poppler</i> in the current working directory.

In [2]:
from pdf2image import convert_from_path
import os
from PIL import Image

### Inputs

In [3]:
# POPPLER_PATH = r'C:\Program Files\poppler-23.01.0\Library\bin'
POPPLER_PATH = "./Poppler/poppler-23.01.0/Library/bin/"
# Letter size paper in pixels
IMAGE_WIDTH = 2550 # 8.5 × 300
IMAGE_HEIGHT = 3300 # 11 × 300
IMAGES_FOLDER = "./Images/"
pdf_file_path = "PDF reader test 2.pdf"
TEMP_TASK = "Wrap and Put away" # "Wrap and Put away" or "Pull mats"

### Functions

In [4]:
# Function to save images to a folder
# The function takes the argument of pdf_file_path, images,
# and destination_folder (default to IMAGES_FOLDER)
# Get the file name from the path,
# creates a folder with the same name as the file name, 
# and saves the images to the IMAGES_FOLDER
def save_images(pdf_file_path: str, images: list, destination_folder: str = IMAGES_FOLDER):
    '''
    Function to save images from a pdf file to a folder

    Parameters
    ----------
    pdf_file_path : str
        The path of the pdf file
    images : list
        A list of images
    destination_folder : str, optional
        The destination folder to save the images, by default IMAGES_FOLDER

    Returns
    -------
    None
    '''
    # Get the file name from the path using basename
    file_name = os.path.basename(pdf_file_path)
    # Remove the file extension
    file_name_without_extension = os.path.splitext(file_name)[0]
    # Create a folder with the same name as the file name
    folder_name = os.path.join(IMAGES_FOLDER, file_name_without_extension)
    # Create the folder if it does not exist
    if not os.path.exists(folder_name):
        os.mkdir(folder_name)
    else:
        # If the folder exists, delete all the files in the folder
        for file in os.listdir(folder_name):
            os.remove(os.path.join(folder_name, file))
    # Save the images to the folder
    for i, image in enumerate(images):
        image.save(os.path.join(folder_name, f"{file_name_without_extension}_{i}.jpg"))

def inches_to_pixels(inches: float, dpi: int) -> int:
    """
    Converts inches to pixels based on the given DPI (dots per inch)
    """
    return int(inches * dpi)

def mm_to_pixels(mm: float, dpi: int) -> int:
    """
    Converts millimeters to pixels based on the given DPI (dots per inch)
    """
    return int(mm * dpi / 25.4)

# Function to crop an image
def crop_image(image_path: str, x_start: int, y_start: int, width: int, height: int, save_path: str):
    '''
    Function to crop an image

    Parameters
    ----------
    image_path : str
        The path of the image file
    x_start : int
        The x-coordinate of the top-left corner of the crop area
    y_start : int
        The y-coordinate of the top-left corner of the crop area
    width : int
        The width of the crop area
    height : int
        The height of the crop area
    save_path : str
        The path to save the cropped image

    Returns
    -------
    None
    '''
    # Open the image
    image = Image.open(image_path)
    # Check the size and DPI of the image
    print("Original image size:", image.size)
    print("Original image DPI:", image.info.get("dpi"))

    # Crop the image
    crop_area = (x_start, y_start, x_start+width, y_start+height)
    cropped_image = image.crop(crop_area)

    # Save the cropped image
    cropped_image.save(save_path)
    print("Cropped image saved at", save_path)

### Get images from PDF and save them in a folder

In [5]:
images = convert_from_path(pdf_file_path, size = (IMAGE_WIDTH, IMAGE_HEIGHT), poppler_path = POPPLER_PATH)

In [6]:
save_images(pdf_file_path, images)

### Read image file from the folder

In [7]:
# Get the file name from the path using basename
file_name = os.path.basename(pdf_file_path)

# Get the file name from the path using basename
file_name_with_ext = os.path.basename(pdf_file_path)

file_name = os.path.splitext(file_name_with_ext)[0]

image_folder_path = os.path.join(IMAGES_FOLDER, file_name)

# Get the first image file name
image_file_path = os.path.join(image_folder_path, f"{file_name}_0.jpg")

# Replace \ with / for Windows
image_file_path = image_file_path.replace("\\", "/")

print(image_file_path)

./Images/PDF reader test 2/PDF reader test 2_0.jpg


### Crop image

In [8]:
# Task dictionary co-ordinates in the format of
# {task_name: (x_start_in, y_start_in, width_in, height_in)}
task_dict_coords = {
    'Pull mats': (2.7, 1.87, 0.25, 0.19),
    'Wrap and Put away': (2.7, 1.71, 0.25, 0.19)
    }

x_start_in, y_start_in, width_in, height_in = task_dict_coords[TEMP_TASK]


In [9]:
save_cropped_image_path = f"./Images/{file_name}_cropped_image.jpg"
x_start = inches_to_pixels(x_start_in, 500)
y_start = inches_to_pixels(y_start_in, 500)
width = inches_to_pixels(width_in, 500)
height = inches_to_pixels(height_in, 500)

crop_image(image_file_path, x_start, y_start, width, height, save_cropped_image_path)

Original image size: (2550, 3300)
Original image DPI: None
Cropped image saved at ./Images/PDF reader test 2_cropped_image.jpg


### Load PreTrained Model

In [10]:
# Requires Torch
# pip3 install torch torchvision torchaudio
# Pytorch requires Python 3.10 as Python 3.11 has issues currently as of 2023-01-23

In [11]:
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import requests

processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

  from .autonotebook import tqdm as notebook_tqdm
Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Predict text from image

In [12]:
# load custom image
file_name = save_cropped_image_path
image = Image.open(file_name)#.convert("RGB")

pixel_values = processor(image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)

generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
generated_text



'5'