In [None]:
# !mkdir examples
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Hello.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Hello_cursive.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Red.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/sentence.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/i_love_you.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/merrychristmas.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Rock.png
# !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Bob.png

In [None]:
import os
import numpy as np
import gradio as gr
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

In [None]:
import torch
print(torch.cuda.is_available())

# Section 1: Image Upload UI

In [13]:
# Create a simple image upload interface using Gradio
with gr.Blocks() as app:
    gr.Markdown("# Handwritten Image OCR")
    image = gr.Image(label="Handwritten image file")  # Image upload input
    output = gr.Textbox(label="Output Box")  # Textbox to show OCR result
    convert_btn = gr.Button("Convert")  # Button to trigger OCR conversion

In [14]:
app.launch(inline=False, share=True)  # Launch the app without inline display and share the link

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://d860293b2ee8d209b0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




# Section 2: Implement Handwritten OCR with TrOCR Model

In [16]:
# Class to handle TrOCR-based OCR processing
class TrOCRInferencer:
    def __init__(self):
        print("[info] init TrOCR Inferencer")
        # Initialize the processor and model using pretrained TrOCR model
        self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-handwritten")
        self.model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-handwritten")
        
    # Method to process and infer text from an image
    def inference(self, image):
        pixel_values = self.processor(images=image, return_tensors='pt').pixel_values
        generated_ids = self.model.generate(pixel_values)
        generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        return generated_text

In [None]:
# Create an instance of TrOCRInferencer
inferencer = TrOCRInferencer()