In [None]:
pip install paddleocr

In [None]:
pip install paddlepaddle paddleocr opencv-python

In [None]:
pip install protobuf==3.20.*

In [None]:
import os
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"


In [None]:
import os
import cv2
import json
import numpy as np
from paddleocr import PaddleOCR

def get_coords_and_text(image_path, output_json_path, raw_ocr_json_path):
    try:
        coor_txt_list = []

        # Initialize PaddleOCR
        ocr = PaddleOCR(
            use_angle_cls=True, 
            lang='en', 
            use_gpu=False
        )

        # Read image
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Unable to read image from path: {image_path}")

        # Perform OCR using PaddleOCR
        result = ocr.ocr(image_path, cls=True)
        
        # Save the raw OCR result
        with open(raw_ocr_json_path, 'w') as raw_json_file:
            json.dump(result, raw_json_file, indent=4)
        
        print("OCR Result saved to:", raw_ocr_json_path)  # Debugging output

        # Check if result is empty
        if not result or not result[0]:
            raise ValueError("No text detected in the image.")

        for line in result[0]:
            bbox, (text, conf) = line[:4], line[1]
            print(f"Processing line: BBox={bbox}, Text={text}, Conf={conf}")  # Debugging output

            try:
                # Ensure the coordinates are numeric and in integers
                bbox_coords = [[int(round(p[0])), int(round(p[1]))] for p in bbox]

                # Prepare bounding box in the format required
                x_min = min([coord[0] for coord in bbox_coords])
                y_min = min([coord[1] for coord in bbox_coords])
                x_max = max([coord[0] for coord in bbox_coords])
                y_max = max([coord[1] for coord in bbox_coords])
                bbox_coords = [x_min, y_min, x_max, y_max]

                coor_txt_dict = {
                    "name": text,
                    "coords": bbox_coords,
                    "conf_value": conf
                }
                coor_txt_list.append(coor_txt_dict)
                
                # Draw bounding box on the image
                bbox_array = np.array(bbox_coords, dtype=np.int32).reshape((-1, 1, 2))
                cv2.polylines(image, [bbox_array], isClosed=True, color=(0, 255, 0), thickness=2)

            except Exception as e:
                print(f"Error processing line {line}: {e}")
                continue

        # Ensure output directory exists
        output_dir = os.path.dirname(output_json_path)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Save the extracted data as JSON
        with open(output_json_path, 'w') as fp:
            json.dump(coor_txt_list, fp, indent=4)
        
        # Debugging output
        print("Processed JSON data written successfully.")

        # Save the image with bounding boxes
        output_image_path = os.path.join(output_dir, f"{os.path.basename(image_path).split('.')[0]}_boxed.png")
        cv2.imwrite(output_image_path, image)
        print(f"Image with bounding boxes saved to {output_image_path}")

    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
image_path = r"C:\PaddleOCR_TextExtraction\page_7.png"
output_json_path = r"C:\Users\Aventior\Documents\Paddleocr_result\Paddleocr_result.json"
raw_ocr_json_path = r"C:\PaddleOCR_TextExtraction\page_7.json"

get_coords_and_text(image_path, output_json_path, raw_ocr_json_path)


In [None]:
from paddleocr import __version__

print(f"PaddleOCR version: {__version__}")
