In [1]:
import cv2
import numpy as np
import easyocr
import json
from tqdm import tqdm

# Initialize the OCR reader
reader = easyocr.Reader(['en'], gpu=False, recog_network='best_norm_ED',
                        user_network_directory='../models', model_storage_directory='../models')

images = ['test.png']

# Initialize the list to store annotations for all images
all_annotations = []

for image_name in tqdm(images):
    image = cv2.imread("../examples/" + image_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = reader.readtext(image=image)

    # Get image dimensions
    height, width, _ = image.shape

    # Create a dictionary to store data for the current image
    image_data = {
        "id": len(all_annotations) + 1,
        "annotations": [],
        "file_upload": image_name,
        "data": {"ocr": f"/data/upload/1/{image_name}"}
        # Add other fields as needed
    }

    for idx, (bbox, text, prob) in enumerate(results):
        top_left, top_right, bottom_right, bottom_left = bbox

        # Calculate the bounding box and text area dimensions
        x = top_left[0] / width * 100
        y = top_left[1] / height * 100
        bbox_width = (bottom_right[0] - top_left[0]) / width * 100
        bbox_height = (bottom_right[1] - top_left[1]) / height * 100

        # Create annotation and transcription dictionaries
        bbox_annotation = {
            "original_width": width,
            "original_height": height,
            "image_rotation": 0,
            "value": {
                "x": x,
                "y": y,
                "width": bbox_width,
                "height": bbox_height,
                "rotation": 0
            },
            "id": f"bbox_{idx}",
            "from_name": "bbox",
            "to_name": "image",
            "type": "rectangle",
            "origin": "manual"
        }

        text_annotation = {
            "original_width": width,
            "original_height": height,
            "image_rotation": 0,
            "value": {
                "x": x,
                "y": y,
                "width": bbox_width,
                "height": bbox_height,
                "rotation": 0,
                "text": [text]
            },
            "id": f"text_{idx}",
            "from_name": "transcription",
            "to_name": "image",
            "type": "textarea",
            "origin": "manual"
        }

        # Add annotations to the image data
        image_data["annotations"].append(bbox_annotation)
        image_data["annotations"].append(text_annotation)

    # Add the image data to the all annotations list
    all_annotations.append(image_data)

# Write the annotations to a JSON file
with open('label_studio_annotations.json', 'w') as outfile:
    json.dump(all_annotations, outfile, indent=4)


Using CPU. Note: This module is much faster with a GPU.


/home/bma/EasyOCR_kz/test_model/../easyocr/__init__.py


100%|██████████| 1/1 [00:24<00:00, 24.54s/it]
