In [1]:
# demo code to test the basic utilization of pytesseract library and host app on huggingface using gradio

In [None]:
# import all the required package 
import re
import cv2 
import numpy as np
import pytesseract
from pytesseract import Output
import gradio as gr

In [2]:
#image preprocessing function 

#grayscale image
# simplifies the image and reduces computational complexity for further processing.
def get_grayscale(image):
    return cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)

# noise removal
# helps in smoothing the image and reducing irregularities, which can improve the accuracy of OCR (Optical Character Recognition).
def remove_noise(image):
    return cv2.medianBlur(image,5)

#thresholding
# makes the text stand out by converting all pixel values above a certain threshold to white (255) and below that threshold to black (0).
def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

#dilation
# help in connecting broken parts of the text.
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
#erosion
# help in removing small white noise and separating connected objects.
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
# useful for removing noise and small artifacts from the binary image.
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
# useful for identifying the outlines of text and other significant regions.
def canny(image):
    return cv2.Canny(image, 100, 200)

#skew correction
# ensures that the text is properly aligned horizontally, which can improve OCR accuracy.
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

#template matching
# can be useful for detecting specific patterns or text areas within the image.
def match_template(image, template):
    return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) 

In [4]:
# gradio app 
def extract_text_from_image(img):
    # Convert To PIL Image
    im_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    gray = get_grayscale(im_rgb)
    noise_remove = remove_noise(gray)
    # thresh = thresholding(noise_remove)
    text = pytesseract.image_to_string(noise_remove)
    return text

app = gr.Interface(
    fn=extract_text_from_image,
    inputs=gr.Image(label="upload image"),
    outputs= gr.Textbox()
)
app.launch(share=True)

IMPORTANT: You are using gradio version 3.34.0, however version 4.29.0 is available, please upgrade.
--------
Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://71d73c36700f0feeb3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


