In [1]:
# !mkdir examples
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Hello.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Hello_cursive.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Red.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/sentence.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/i_love_you.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/merrychristmas.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Rock.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Bob.png

In [2]:
import os
import numpy as np
import gradio as gr
from PIL import Image
from transformers import TrOCRProcessor, AutoModelForVision2Seq

In [3]:
import torch
print(torch.cuda.is_available())

False


# Section 1: Image Upload UI

In [4]:
# Create a simple image upload interface using Gradio
with gr.Blocks() as app:
    gr.Markdown("# Handwritten Image OCR")
    image = gr.Image(label="Handwritten image file")  # Image upload input
    output = gr.Textbox(label="Output Box")  # Textbox to show OCR result
    convert_btn = gr.Button("Convert")  # Button to trigger OCR conversion

In [5]:
app.launch(inline=False, share=True)  # Launch the app without inline display and share the link

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://cd672f778d9230b356.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




# Section 2: Implement Handwritten OCR with TrOCR Model

In [25]:
# Class to handle TrOCR-based OCR processing
class TrOCRInferencer:
    def __init__(self):
        print("[info] init TrOCR Inferencer")

        # Initialize the processor
        self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-handwritten", clean_up_tokenization_spaces=True)
        
        # Initialize the model using AutoModel
        self.model = AutoModelForVision2Seq.from_pretrained("microsoft/trocr-large-handwritten")
    
    # Method to process and infer text from an image
    def inference(self, image):
        pixel_values = self.processor(images=image, return_tensors='pt').pixel_values
        generated_ids = self.model.generate(pixel_values)
        generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        return generated_text

In [None]:
# Create an instance of TrOCRInferencer
inferencer = TrOCRInferencer()

[info] init TrOCR Inferencer


# Section 3: Image-to-Text Functionality

In [21]:
# Function to convert an uploaded image to text using the OCR model
def image_to_text(image):
    image = Image.fromarray(image).convert('RGB')  # Convert the image to RGB
    text = inferencer.inference(image)  # Get the predicted text
    return text

In [9]:
# Create a Gradio interface for image upload and OCR conversion
with gr.Blocks() as app:
    gr.Markdown("# Handwritten Image OCR")  # Add a title
    image = gr.Image(label="Handwritten image file")  # Image upload input
    output = gr.Textbox(label="Output Box")  # Textbox to display OCR result
    convert_btn = gr.Button("Convert")  # Convert button
    convert_btn.click(
        fn=image_to_text, inputs=image, outputs=output  # Link the button to the OCR function
    )

In [10]:
app.launch(inline=False, share=True)  # Launch the app

Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://d4808841892014290f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




# Section 4: Canvas UI for Handwritten Drawing Recognition

In [11]:
# Create a sketchpad for users to draw their handwriting, then recognize it using OCR
with gr.Blocks() as app:
    gr.Markdown("# Handwritten Image OCR")
    sketchpad = gr.Sketchpad(
        label="Handwritten Sketchpad",
        height=300,
        width=600,
    )
    output = gr.Textbox(label="Output Box")  # Textbox for OCR result
    convert_btn = gr.Button("Convert")  # Button to trigger OCR on the drawing
    convert_btn.click(
        fn=image_to_text, inputs=sketchpad, outputs=output  # Link sketchpad input to OCR function
    )

In [12]:
app.launch(inline=False, share=True)  # Launch the app

Running on local URL:  http://127.0.0.1:7862
Running on public URL: https://a3c81ec42e7fd48252.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Traceback (most recent call last):
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/gradio/queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/gradio/blocks.py", line 1935, in process_api
    result = await self.call_function(
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/gradio/blocks.py", line 1520, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 2405, in run_sync_in_worker_thread


# Section 5: Final App with Tabs for Image Upload and Drawing

In [22]:
# Build the final app with tabs for both image upload and sketchpad
with gr.Blocks() as app:
    gr.Markdown("# Handwritten Image OCR")

    # First Tab: Image upload and OCR
    with gr.Tab("Image upload"):
        image = gr.Image(label="Handwritten image file")
        output = gr.Textbox(label="Output Box")
        convert_btn = gr.Button("Convert")
        convert_btn.click(
            fn=image_to_text, inputs=image, outputs=output  # Link the image input to OCR function
        )
        gr.Markdown("## Image Examples")
        # Provide example images for users to test
        gr.Examples(
            examples=[
                os.path.join(os.getcwd(), "examples/Hello.png"),
                os.path.join(os.getcwd(), "examples/Hello_cursive.png"),
                os.path.join(os.getcwd(), "examples/Red.png"),
                os.path.join(os.getcwd(), "examples/sentence.png"),
                os.path.join(os.getcwd(), "examples/i_love_you.png"),
                os.path.join(os.getcwd(), "examples/merrychristmas.png"),
                os.path.join(os.getcwd(), "examples/Rock.png"),
                os.path.join(os.getcwd(), "examples/Bob.png"),
            ],
            inputs=image,
            outputs=output,
            fn=image_to_text
        )

    # Second Tab: Drawing on a sketchpad and recognizing the text
    with gr.Tab("Drawing"):
        gr.Markdown("# Handwritten Image OCR")
        sketchpad = gr.Sketchpad(
            label="Handwritten Sketchpad",
            height=300,
            width=600,
        )
        output = gr.Textbox(label="Output Box")
        convert_btn = gr.Button("Convert")
        convert_btn.click(
            fn=image_to_text, inputs=sketchpad, outputs=output  # Link sketchpad input to OCR function
        )

In [23]:
app.launch(inline=False, share=True)  # Launch the app with sharing enabled

Running on local URL:  http://127.0.0.1:7864
Running on public URL: https://5083b7cdbaf827d208.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Traceback (most recent call last):
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/gradio/queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/gradio/blocks.py", line 1935, in process_api
    result = await self.call_function(
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/gradio/blocks.py", line 1520, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/root/miniconda3/envs/my_env/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 2405, in run_sync_in_worker_thread
