In [1]:
import json
from PIL import Image
import os

In [5]:
def crop_and_save(image_path, json_data, output_folder):
    image = Image.open(image_path)
    image_name = os.path.splitext(os.path.basename(image_path))[0]

    shapes = json_data["shapes"]

    for idx, shape in enumerate(shapes):
        label = shape["label"]
        points = shape["points"]

        points = [(int(point[0]), int(point[1])) for point in points]

        points.sort()

        x1, y1 = points[0]
        x2, y2 = points[1]

        if x1 >= x2 or y1 >= y2:
            x1, x2 = min(x1, x2), max(x1, x2)
            y1, y2 = min(y1, y2), max(y1, y2)

            if x1 >= x2 or y1 >= y2:
                raise ValueError("Invalid cropping coordinates. 'lower' must be less than 'upper'.")

        word_image = image.crop((x1, y1, x2, y2))

        output_image_name = f"{image_name}_{str(idx + 1).zfill(3)}.jpg"
        word_image.save(os.path.join(output_folder, output_image_name))

        output_text_name = f"{image_name}_{str(idx + 1).zfill(3)}.txt"
        output_text_path = os.path.join(output_folder, output_text_name)
        with open(output_text_path, "w", encoding="utf-8") as text_file:
            text_file.write(label)
            
            
if __name__ == "__main__":
    input_folder = "converted"
    output_folder = "wordlevel_data"

    for image_file in os.listdir(input_folder):
        if image_file.endswith(".jpg"):
            image_path = os.path.join(input_folder, image_file)
            json_path = os.path.join(input_folder, image_file.replace(".jpg", ".json"))

            with open(json_path, "r", encoding="utf-8") as json_file:
                json_data = json.load(json_file)

            os.makedirs(output_folder, exist_ok=True)

            crop_and_save(image_path, json_data, output_folder)

    print("Cropping and saving completed.")

Cropping and saving completed.


: 