# Workflow
![document_OCR](img_embed/w8_doc_scan.webp)

# Step 0: Setup
Make sure to install Tesseract OCR on your PC.

```
!pip install pytesseract

import pytesseract

pytesseract.pytesseract.tesseract_cmd = "C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe"
```

In [1]:
import pytesseract

pytesseract.pytesseract.tesseract_cmd = "C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe"

In [2]:
import cv2 as cv
import numpy as np
from utils import display_image, display_images

if not cv.useOptimized():
    cv.setUseOptimized(True)

cv.useOptimized()

True

# Step 1: Load image and display image

In [3]:
img = cv.imread("images/w8/docs/receipt2.webp")
display_image("image", img, adjust=True)

# Description of the project workflow
1. Read image and visualize
2. Resize the image
3. Bilateral filtering
4. change from BGR to grayscale
5. Canny edge detector
6. Contour detection and post processing
7. Detect rectangle with Douglas Peuker algorithms
8. Sort the points of the contours
9. Perspective transform
10. Apply OCR with tesseract.

In [4]:
from utils import resize_aspect_ratio, auto_canny

In [5]:
def filterFunc(x):
    """Filter function to filter x
    Arguments:
    ---
    x: list of contours (outputs of cv.findContours())
    
    Returns:
    ---
    Boolean"""
    if cv.contourArea(x) > min_area:
        return True
    else:
        return False

Credit to the [PyImageSearch blog post](https://pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/) on the tips and tricks of how to utilize Google `pytesseract` API.

In [6]:
from scipy.spatial.distance import euclidean

In [7]:
def order_points(pts):
    """"""
    rect = np.zeros((4, 2), dtype="float32")

    s = pts.sum(axis=-1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]

    diff = np.diff(pts, axis=-1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]

    return rect

def four_point_transform(image, pts):
    """"""
    rect = order_points(pts)
    tl, tr, br, bl = rect

    w1 = euclidean(tl, tr)
    w2 = euclidean(bl, br)
    w = max(w1, w2)

    h1 = euclidean(tl, bl)
    h2 = euclidean(tr, br)
    h = max(h1, h2)

    dst_points = np.float32([[0, 0], [w-1, 0], [w-1, h-1], [0, h-1]])

    M = cv.getPerspectiveTransform(rect, dst_points)
    warped = cv.warpPerspective(image, M, (int(w), int(h)))

    return warped

In [8]:
# User-defined parameters
new_width = 500
min_area = 400
canny_method = "otsu"
frac_peri = 0.02
# ratio = img.shape[1] / new_width

In [9]:
def detect_rectangle(img, new_width=new_width, frac_peri=frac_peri, min_area=min_area, canny_method=canny_method, debug=False):
    """"""
    # resize
    resized = resize_aspect_ratio(img, new_width, interpolation=cv.INTER_CUBIC)
    # bilateral filter
    resized = cv.bilateralFilter(resized, d=9, sigmaColor=25, sigmaSpace=25)
    # display_images([img, resized], ("image", "resize"), adjust=True)
    # to grayscale
    gray = cv.cvtColor(resized, cv.COLOR_BGR2GRAY)
    # auto Canny
    edge = auto_canny(gray, method=canny_method)
    if debug:
        display_image("edge", edge)
    # contour
    contours, _ = cv.findContours(edge, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    # 
    contours_filtered = filter(filterFunc, contours)
    contours_sorted = sorted(contours_filtered, key=cv.contourArea, reverse=True)
    rect = None
    
    for c in contours_sorted:
        # approximate contour
        peri = cv.arcLength(c, True)
        approx = cv.approxPolyDP(c, frac_peri * peri, True)
    
        if len(approx) == 4:
            rect = approx
            break
    
    if rect is not None:
        # draw contour
        cv.drawContours(resized, [rect], -1, (0, 255, 0), 2)
        if debug:
            display_image("contour", resized)

    return rect

In [13]:
warped.shape

(458, 454, 3)

In [10]:
import os

img_dir = "./images/w8/docs"
file_names = os.listdir(img_dir)
img_texts = {}

for file in file_names:
    filepath = os.path.join(img_dir, file)
    img = cv.imread(cv.samples.findFile(filepath))
    ratio = img.shape[1] / new_width
    rect = detect_rectangle(img)
    if rect is None:
        continue
    warped = four_point_transform(img, rect * ratio)
    # image preprocessing before OCR
    # blur = cv.medianBlur(warped, 3)
    # gray = cv.cvtColor(warped, cv.COLOR_BGR2GRAY)
    # Otsu thresholding
    # th = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)[1]
    # display_image("th", th)
    display_image("warped", warped)
    text = pytesseract.image_to_string(
        cv.cvtColor(warped, cv.COLOR_BGR2GRAY),
        config="--psm 4 --oem 1" 
    )
    print(text)
    img_texts[file] = text

|
NOT TESTED ON ANIMALS

pay

Main Street Restaurant
6332 Business Drive

Suite 528

Palo Alto california 94301

575-1628095

Fri 04/07/2017 11:36 AM

Merchant ID: 9hqjxvufdr
Terminal ID: 11111
Transaction ID: #e6d598eT
Type: CREDIT
PURCHASE
Number:  XXXXXXXXXXXX0041
Entry Mode: Swiped
Card Type: DISCOVER
Response: APPROVED
Approval Code: 819543
Sub Total USD$ 25.23
Viele eA
Total USD$ 29.01

Thanks for suppor ting
local business!

THANK YOU

No.
Received from

the sum of pesos

RECEIPT

Date —

HS 108

A

as payment for

p

Signature

TESCO

— ae ae ome

Metro
Pl TEL 0845 677921¢

F9ESH MILK 18
MJESLI 2129
DARK CHOCOLATE » F
26 £0.95 1.90

TOTAL 5. Of
MASTERCARD SALE 508

AID : A0000000041019

NJMBER 1 KKK ERHKKKKKKO IGE tcc

PAN SEQ NO : 02

AUTH CODE ; 036017

MERCHANT : 1833431

START : 10/10 EXPIRY : 11/13

Cardholder PIN Verified
CHANGE QUE

R_634004902428294x
POINTS THIS VISIT 5
TOTAL UP TO 22/04/12 717
TOTAL INCLUDES ;
T?F BONUS POINTS 360

. “

How did we do?
Visit www.tescoco

**Some important notes about the choice of psm and oem**:
1. `psm 4`: need to OCR column data and require text to be concatenated row-wise.
2. `oem 1`: LSTM only. Refer to this [blog post](https://www.docsumo.com/blog/tesseract-ocr) for more info.

In [11]:
file_names

['doc_ocr4.jpg',
 'receipt.PNG',
 'receipt1.jpeg',
 'receipt2.webp',
 'receipt3.jpg']

In [12]:
img_texts

{'doc_ocr4.jpg': '|\nNOT TESTED ON ANIMALS\n\npay\n',
 'receipt1.jpeg': 'Main Street Restaurant\n6332 Business Drive\n\nSuite 528\n\nPalo Alto california 94301\n\n575-1628095\n\nFri 04/07/2017 11:36 AM\n\nMerchant ID: 9hqjxvufdr\nTerminal ID: 11111\nTransaction ID: #e6d598eT\nType: CREDIT\nPURCHASE\nNumber:  XXXXXXXXXXXX0041\nEntry Mode: Swiped\nCard Type: DISCOVER\nResponse: APPROVED\nApproval Code: 819543\nSub Total USD$ 25.23\nViele eA\nTotal USD$ 29.01\n\nThanks for suppor ting\nlocal business!\n\nTHANK YOU\n',
 'receipt2.webp': 'No.\nReceived from\n\nthe sum of pesos\n\nRECEIPT\n\nDate —\n\nHS 108\n\nA\n\nas payment for\n\np\n\nSignature\n',
 'receipt3.jpg': 'TESCO\n\n— ae ae ome\n\nMetro\nPl TEL 0845 677921¢\n\nF9ESH MILK 18\nMJESLI 2129\nDARK CHOCOLATE » F\n26 £0.95 1.90\n\nTOTAL 5. Of\nMASTERCARD SALE 508\n\nAID : A0000000041019\n\nNJMBER 1 KKK ERHKKKKKKO IGE tcc\n\nPAN SEQ NO : 02\n\nAUTH CODE ; 036017\n\nMERCHANT : 1833431\n\nSTART : 10/10 EXPIRY : 11/13\n\nCardholder PIN Ver

# Save the results in a text file

In [13]:
import os
os.makedirs("text_outputs", exist_ok=True)

with open("text_outputs/output.txt", "w") as f:
    for k, v in img_texts.items():
        f.writelines(f"{k}: \n {v} \n\n")

It is worth noting that rectangle detection pipeline fails to detect the receipt in "receipt.PNG"