In [5]:
from transformers import FastImageProcessor, FastForSceneTextRecognition, AutoConfig, AutoBackbone
from transformers import TextNetConfig
from transformers import TextNetBackbone
from PIL import Image
import requests

url = "https://huggingface.co/datasets/Raghavan/fast_model_samples/resolve/main/img657.jpg"
image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
processor = FastImageProcessor.from_pretrained("jadechoghari/fast-tiny")


In [6]:
model = FastForSceneTextRecognition.from_pretrained("jadechoghari/fast-tiny")

In [7]:
import torch
inputs = processor(image, return_tensors="pt")
# forward pass
with torch.no_grad():
    outputs = model(pixel_values=inputs["pixel_values"])

In [14]:
target_sizes = [image.size]
threshold = 0.88
text_locations = processor.post_process_text_detection(outputs, target_sizes, threshold, bbox_type="poly")

image size (640, 864)


In [36]:
target_sizes

[(400, 300)]

In [42]:
text_locations

[{'bboxes': [[134,
    75,
    134,
    79,
    134,
    80,
    133,
    80,
    133,
    86,
    132,
    87,
    131,
    87,
    131,
    89,
    131,
    90,
    129,
    90,
    129,
    91,
    128,
    92,
    123,
    92,
    123,
    96,
    123,
    97,
    122,
    97,
    122,
    99,
    121,
    100,
    120,
    100,
    120,
    101,
    120,
    102,
    116,
    102,
    116,
    104,
    116,
    105,
    115,
    105,
    115,
    106,
    114,
    107,
    112,
    107,
    112,
    126,
    112,
    127,
    111,
    127,
    111,
    139,
    112,
    139,
    112,
    140,
    112,
    141,
    113,
    141,
    113,
    142,
    113,
    144,
    114,
    144,
    115,
    145,
    115,
    156,
    114,
    157,
    113,
    157,
    113,
    164,
    113,
    165,
    112,
    165,
    112,
    181,
    112,
    182,
    111,
    182,
    111,
    204,
    113,
    204,
    113,
    205,
    113,
    206,
    126,
    206,
    126,
    207,
    126,
    209,

In [17]:
import random
import numpy as np
import cv2
img = np.array(image)

# Iterate over the text_locations to process and visualize bounding boxes
for idx, location in enumerate(text_locations):
    print(f"Processing entry {idx + 1}:")
    bboxes = location['bboxes']
    
    for bbox_idx, bbox in enumerate(bboxes):
        # Convert flat bbox list to (x, y) coordinate pairs
        points = np.array([(bbox[i], bbox[i + 1]) for i in range(0, len(bbox), 2)], np.int32)
        print(f"  Bounding Box {bbox_idx + 1}: {points.tolist()}")

        # Random color for each bounding box
        color = (random.randint(100, 255), random.randint(100, 255), random.randint(100, 255))

        # Draw the polygon on the image
        cv2.polylines(img, [points], isClosed=True, color=color, thickness=2)

# Convert the image back to PIL format for display and saving
img_with_bboxes = Image.fromarray(img)
img_with_bboxes.show()
img_with_bboxes.save("final_bbox_visualization.png")

Processing entry 1:
  Bounding Box 1: [[134, 75], [134, 79], [134, 80], [133, 80], [133, 86], [132, 87], [131, 87], [131, 89], [131, 90], [129, 90], [129, 91], [128, 92], [123, 92], [123, 96], [123, 97], [122, 97], [122, 99], [121, 100], [120, 100], [120, 101], [120, 102], [116, 102], [116, 104], [116, 105], [115, 105], [115, 106], [114, 107], [112, 107], [112, 126], [112, 127], [111, 127], [111, 139], [112, 139], [112, 140], [112, 141], [113, 141], [113, 142], [113, 144], [114, 144], [115, 145], [115, 156], [114, 157], [113, 157], [113, 164], [113, 165], [112, 165], [112, 181], [112, 182], [111, 182], [111, 204], [113, 204], [113, 205], [113, 206], [126, 206], [126, 207], [126, 209], [130, 209], [130, 207], [130, 206], [135, 206], [135, 202], [136, 201], [138, 201], [138, 202], [138, 204], [144, 204], [144, 202], [144, 201], [145, 201], [145, 200], [145, 199], [146, 199], [146, 190], [147, 189], [151, 189], [151, 187], [151, 186], [156, 186], [156, 185], [156, 184], [157, 184], [157, 

display-im6.q16: unable to open X server `' @ error/display.c/DisplayImageCommand/412.


In [3]:
config_model_file_path = hf_hub_download(repo_id="jadechoghari/fast-configs", filename="fast_tiny_ic17mlt_640.json")

with open(config_model_file_path) as f:
        content_model = json.loads(f.read())
model_config = content_model["model"]
test_config = content_model.get("test_cfg", None)
data_config = content_model["data"]

fast_tiny_ic17mlt_640.json: 100%|██████████| 1.75k/1.75k [00:00<00:00, 6.70MB/s]
