In [2]:
import pytesseract
from pytesseract import Output
import cv2

In [15]:
# Preprocessing functions
# get grayscale image
import numpy as np


def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal
def remove_noise(image):
    return cv2.medianBlur(image,5)
 
#thresholding
def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

#dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
def canny(image):
    return cv2.Canny(image, 100, 200)

#skew correction
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

#template matching
def match_template(image, template):
    return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) 

#border removal
def remove_border(image):
    mask = np.zeros(image.shape, dtype=np.uint8)

    cnts = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]

    cv2.fillPoly(mask, cnts, [255,255,255])
    mask = 255 - mask
    result = cv2.bitwise_or(image, mask)
    return result    

In [35]:
# Inputting image as BGR
# image = cv2.imread("D:/Download/ocr_test.png")
image = cv2.imread("E:/Internship/Common_resources/Screenshots/Screenshot 2023-12-15 141717.png")
# Converting to RGB
rgbImage = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Preprocessing
processed_img = get_grayscale(image)
# processed_img = remove_noise(processed_img)
# processed_img = dilate(processed_img)
# processed_img = erode(processed_img)
processed_img = thresholding(processed_img)
processed_img = remove_border(processed_img)
cv2.imshow('img', processed_img)
cv2.waitKey(0)

-1

In [24]:
# Custom script for pytesseract
custom_config = r'--oem 3 --psm 12'
pytesseract.pytesseract.tesseract_cmd = r'D:/Programs/Tesseract-OCR/tesseract.exe'

# Extract data to array
d = pytesseract.image_to_data(processed_img, config=custom_config, output_type=Output.DICT)
print(d)

{'level': [1, 2, 3, 4, 5, 2, 3, 4, 5], 'page_num': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'block_num': [0, 1, 1, 1, 1, 2, 2, 2, 2], 'par_num': [0, 0, 1, 1, 1, 0, 1, 1, 1], 'line_num': [0, 0, 0, 1, 1, 0, 0, 1, 1], 'word_num': [0, 0, 0, 0, 1, 0, 0, 0, 1], 'left': [0, 32, 32, 32, 32, 26, 26, 26, 26], 'top': [0, 9, 9, 9, 9, 41, 41, 41, 41], 'width': [96, 33, 33, 33, 33, 23, 23, 23, 23], 'height': [73, 25, 25, 25, 25, 25, 25, 25, 25], 'conf': [-1, -1, -1, -1, 53, -1, -1, -1, 93], 'text': ['', '', '', '', '19C', '', '', '', '50']}


In [30]:
# Display bounding boxes
n_boxes = len(d['level'])
for i in range(n_boxes):
    (l,t,w,h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
    # Draw bounding boxes on words only (conf > -1)
    if int(d['conf'][i]) > -1:
        cv2.rectangle(rgbImage, (l,t), (l+w, t+h), (0, 255, 0), 2)

cv2.imshow('img', rgbImage)
cv2.waitKey(0)

-1