In [2]:
import cv2
from PIL import Image
from typing import List, Tuple
import matplotlib.pyplot as plt
from surya.recognition import OCRResult, RecognitionPredictor
from surya.detection import DetectionPredictor

class SuryaOCR:
    def __init__(self)-> None:
        self.detection_predictor = DetectionPredictor()
        self.recognition_predictor = RecognitionPredictor()
        self.langs = ['es']

    def get_predictions(self, image: Image.Image)-> List[OCRResult]:
        return self.recognition_predictor(images=[image], langs=[self.langs], det_predictor=self.detection_predictor)

    def get_text_from_predictions(self, predictions: List[OCRResult])-> str:
        return predictions[0].text_lines[0].text

    def get_bouding_boxes_from_predictions(self, predictions: List[OCRResult])-> List[float]:
        return predictions[0].text_lines[0].bbox

    def get_confidence_from_predictions(self, predictions: List[OCRResult])-> float:
        confidence: float | None = predictions[0].text_lines[0].confidence
        return confidence if confidence is not None else 0

    def show_image_with_bounding_boxes(self, image_path: str, bounding_boxes: List[float], text: str = "Image with boundings")-> None:
        if not image_path:
            raise ValueError('Image is required')
        if not bounding_boxes:
            raise ValueError('Bounding boxes are required')

        image = cv2.imread(image_path)

        x1, y1, x2, y2 = map(int, bounding_boxes)

        cv2.rectangle(img=image, pt1=(x1, y1), pt2=(x2, y2), color=(0, 255, 0), thickness=2)

        plt.imshow(X=cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.title(label=text)
        plt.show()

    def predict(self, image_path: str, showResult: bool = False)-> Tuple[str, List[float], float]:
        if not image_path:
            raise ValueError('Image is required')

        image: Image.Image = Image.open(fp=image_path)
        predictions: List[OCRResult] = self.get_predictions(image=image)
        extracted_text: str = self.get_text_from_predictions(predictions=predictions)
        bounding_boxes: List[float] = self.get_bouding_boxes_from_predictions(predictions=predictions)
        confidence: float = self.get_confidence_from_predictions(predictions=predictions)

        if showResult:
            self.show_image_with_bounding_boxes(image_path=image_path, bounding_boxes=bounding_boxes, text=extracted_text)

        return extracted_text, bounding_boxes, confidence

In [4]:
suryaOCR: SuryaOCR = SuryaOCR()

Loaded detection model s3://text_detection/2025_02_28 on device cpu with dtype torch.float32
Loaded recognition model s3://text_recognition/2025_02_18 on device cpu with dtype torch.float32
