In [31]:
from PIL import Image, ImageDraw
import os

from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
from surya.model.recognition.processor import load_processor as load_rec_processor
from surya.model.recognition.model import load_model as load_rec_model
from surya.model.detection.model import load_model, load_processor
from surya.detection import batch_text_detection
from surya.layout import batch_layout_detection
from surya.settings import settings
from surya.ocr import run_ocr

**Home page :** https://github.com/VikParuchuri/surya

The model weights are hosted on HuggingFace:
- Text detection : https://huggingface.co/vikp/surya_det3/tree/main (154 MB)
- Text recognition (OCR) : https://huggingface.co/vikp/surya_rec2/tree/main (941 MB)
- Layout analysis : https://huggingface.co/vikp/surya_layout3/tree/main (154 MB)
- Reading order : https://huggingface.co/vikp/surya_order/tree/main (550 MB)
- Table recognition : https://huggingface.co/vikp/surya_tablerec/tree/main (397 MB)

Models are downloaded by HuggingFace in : C:/Users/Colin/.cache/huggingface/hub

#### Text line detection

In [57]:
test_img_path = os.path.join('C://', 'Users', 'Colin', 'Downloads', 'Sans titre.png')
test_img_path = os.path.join('C://', 'Users', 'Colin', 'Downloads', 'bo2.png')
test_img_path = os.path.join('C://', 'Users', 'Colin', 'Downloads', 'bo1.png')
test_img_path = os.path.join('C://', 'Users', 'Colin', 'Downloads', 'manhwa1.png')

In [7]:
image = Image.open(test_img_path)
langs = ["en"] # Replace with your languages - optional but recommended
det_processor, det_model = load_det_processor(), load_det_model()
rec_model, rec_processor = load_rec_model(), load_rec_processor()

predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)

Loaded detection model vikp/surya_det3 on device cuda with dtype torch.float16
Loaded recognition model vikp/surya_rec2 on device cuda with dtype torch.float16


Detecting bboxes: 100%|██████████| 1/1 [00:00<00:00,  1.81it/s]
Recognizing Text: 100%|██████████| 1/1 [00:05<00:00,  5.54s/it]


#### Layout analysis

In [58]:
image = Image.open(test_img_path)
model = load_model(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT)
processor = load_processor(checkpoint=settings.LAYOUT_MODEL_CHECKPOINT)
det_model = load_model()
det_processor = load_processor()

# layout_predictions is a list of dicts, one per image
line_predictions = batch_text_detection([image], det_model, det_processor)
layout_predictions = batch_layout_detection([image], model, processor, line_predictions)
# The polygon for the text line in (x1, y1), (x2, y2), (x3, y3), (x4, y4) format. The points are in clockwise order from the top left.

Loaded detection model vikp/surya_layout3 on device cuda with dtype torch.float16
Loaded detection model vikp/surya_det3 on device cuda with dtype torch.float16


Detecting bboxes: 100%|██████████| 1/1 [00:04<00:00,  4.01s/it]
Detecting bboxes: 100%|██████████| 1/1 [00:08<00:00,  8.93s/it]


In [59]:
layout_predictions[0].bboxes[0]

LayoutBox(polygon=[[0, 1011], [799, 1011], [799, 2060], [0, 2060]], confidence=1.0, label='Figure', bbox=[0, 1011, 799, 2060])

In [None]:
img_with_squares = image.copy()
tmp = ImageDraw.Draw(img_with_squares)

for bb in layout_predictions[0].bboxes:
    tmp.rectangle(bb.polygon[0] + bb.polygon[2], outline ="red")
    
img_with_squares