# 손글씨 인식 Application
Colab 환경에서 손글씨 인식 애플리케이션을 만들어봅시다. 애플리케이션 사용자의 유스케이스는 아래와 같습니다.
- 사용자는 손글씨 이미지 파일을 업로드할 수 있다.
- 사용자는 캔버스에 손글씨를 쓸 수 있다.
- 사용자는 텍스트 결과를 확인할 수 있다.

## Colab 환경 설정
python package들을 설치합니다. 예제로 사용할 이미지들도 다운로드 받습니다.

In [None]:
# Local에서 Run하는 경우 False로 변경
using_colab = True

In [None]:
if using_colab:
    !wget https://raw.githubusercontent.com/mrsyee/dl_apps/main/ocr/requirements.txt
    !pip install -r requirements.txt

    !mkdir examples
    !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Hello.png
    !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Hello_cursive.png
    !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Red.png
    !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/sentence.png
    !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/i_love_you.png
    !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/merrychristmas.png
    !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Rock.png
    !cd examples && wget https://github.com/mrsyee/dl_apps/raw/main/ocr/examples/Bob.png

## Import dependency

In [None]:
import os

import gradio as gr
import numpy as np
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

## 이미지 업로드 UI

In [None]:
with gr.Blocks() as app:
    gr.Markdown("# Handwritten Image OCR")
    image = gr.Image(label="Handwritten image file")
    output = gr.Textbox(label="Output Box")
    convert_btn = gr.Button("Convert")

In [None]:
app.launch(inline=False, share=True)

In [None]:
app.close()

## TrOCR 추론기 클래스
TrOCR 추론기 클래스는 TrOCR 모델 및 processor 초기화와 추론 작업을 수행하는 클래스입니다.

In [None]:
class TrOCRInferencer:
    def __init__(self):
        print("[INFO] Initialize TrOCR Inferencer.")
        self.processor = TrOCRProcessor.from_pretrained(
            "microsoft/trocr-base-handwritten"
        )
        self.model = VisionEncoderDecoderModel.from_pretrained(
            "microsoft/trocr-base-handwritten"
        )

    def inference(self, image: Image) -> str:
        """Inference using model.

        It is performed as a procedure of preprocessing - inference - postprocessing.
        """
        # preprocess
        pixel_values = self.processor(images=image, return_tensors="pt").pixel_values
        # inference
        generated_ids = self.model.generate(pixel_values)
        # postprocess
        generated_text = self.processor.batch_decode(
            generated_ids, skip_special_tokens=True
        )[0]

        return generated_text


inferencer = TrOCRInferencer()

## 추론 기능 구현

In [None]:
def image_to_text(image: np.ndarray) -> str:
    image = Image.fromarray(image).convert("RGB")
    text = inferencer.inference(image)
    return text

In [None]:
with gr.Blocks() as app:
    gr.Markdown("# Handwritten Image OCR")
    image = gr.Image(label="Handwritten image file")
    output = gr.Textbox(label="Output Box")
    convert_btn = gr.Button("Convert")
    convert_btn.click(
        fn=image_to_text, inputs=image, outputs=output
    )

In [None]:
app.launch(inline=False, share=True)

In [None]:
app.close()

## 캔버스 UI

In [None]:
with gr.Blocks() as app:
    gr.Markdown("# Handwritten Image OCR")
    sketchpad = gr.Sketchpad(
        label="Handwritten Sketchpad",
        shape=(600, 192),
        brush_radius=2,
        invert_colors=False,
    )
    output = gr.Textbox(label="Output Box")
    convert_btn = gr.Button("Convert")
    convert_btn.click(
        fn=image_to_text, inputs=sketchpad, outputs=output
    )

In [None]:
app.launch(inline=False, share=True)

In [None]:
app.close()

## 최종 App 구현

In [None]:
# Implement inferencer
class TrOCRInferencer:
    def __init__(self):
        print("[INFO] Initialize TrOCR Inferencer.")
        self.processor = TrOCRProcessor.from_pretrained(
            "microsoft/trocr-base-handwritten"
        )
        self.model = VisionEncoderDecoderModel.from_pretrained(
            "microsoft/trocr-base-handwritten"
        )

    def inference(self, image: Image) -> str:
        """Inference using model.

        It is performed as a procedure of preprocessing - inference - postprocessing.
        """
        # preprocess
        pixel_values = self.processor(images=image, return_tensors="pt").pixel_values
        # inference
        generated_ids = self.model.generate(pixel_values)
        # postprocess
        generated_text = self.processor.batch_decode(
            generated_ids, skip_special_tokens=True
        )[0]

        return generated_text

inferencer = TrOCRInferencer()


# Implement event function
def image_to_text(image: np.ndarray) -> str:
    image = Image.fromarray(image).convert("RGB")
    text = inferencer.inference(image)
    return text


# Implement app
with gr.Blocks() as app:
    gr.Markdown("# Handwritten Image OCR")
    with gr.Tab("Image upload"):
        image = gr.Image(label="Handwritten image file")
        output = gr.Textbox(label="Output Box")
        convert_btn = gr.Button("Convert")
        convert_btn.click(
            fn=image_to_text, inputs=image, outputs=output
        )

        gr.Markdown("## Image Examples")
        gr.Examples(
            examples=[
                os.path.join(os.getcwd(), "examples/Hello.png"),
                os.path.join(os.getcwd(), "examples/Hello_cursive.png"),
                os.path.join(os.getcwd(), "examples/Red.png"),
                os.path.join(os.getcwd(), "examples/sentence.png"),
                os.path.join(os.getcwd(), "examples/i_love_you.png"),
                os.path.join(os.getcwd(), "examples/merrychristmas.png"),
                os.path.join(os.getcwd(), "examples/Rock.png"),
                os.path.join(os.getcwd(), "examples/Bob.png"),
            ],
            inputs=image,
            outputs=output,
            fn=image_to_text,
        )

    with gr.Tab("Drawing"):
        sketchpad = gr.Sketchpad(
            label="Handwritten Sketchpad",
            shape=(600, 192),
            brush_radius=2,
            invert_colors=False,
        )
        output = gr.Textbox(label="Output Box")
        convert_btn = gr.Button("Convert")
        convert_btn.click(
            fn=image_to_text, inputs=sketchpad, outputs=output
        )

In [None]:
# App 실행
app.launch(inline=False, share=True)

In [None]:
app.close()