In [1]:
import os
import re
from pathlib import Path

import cv2 as cv
import imutils
import pytesseract
from imutils.perspective import four_point_transform

In [4]:
# Set paths
raw_img_folder = Path('images/receipts')
proc_img_folder = Path('images/receipts_processed')

test_img_file = 'receipt.jpg'
test_img_file = 'test1.jpg'
test_img_filepath = raw_img_folder / test_img_file

In [5]:
# Get image
raw_img = cv.imread(str(test_img_filepath))
img = raw_img.copy()

# Resize maintaining aspect ratio
img = imutils.resize(img, width=500)
ratio = raw_img.shape[1] / float(img.shape[1])

## Image processing

### Adjusting image

In [6]:
# Adjust img colors
grayed = cv.cvtColor(
    img, cv.COLOR_BGR2GRAY)  # convert to grayscale
blurred = cv.GaussianBlur(
    grayed, (5, 5, ), 0)     # blur using Gaussian kernel
edged = cv.Canny(
    blurred, 75, 200)        # apply edge detection

In [7]:
# Show img
cv.imshow('Input', img)
#cv.imshow('Grayed', grayed)
#cv.imshow('Blurred', blurred)
cv.imshow('Edged', edged)
cv.waitKey(0)
cv.destroyAllWindows()

###  Detecting contours

In [8]:
# Find contours in the edge map
contours = cv.findContours(
    edged.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE
)
contours = imutils.grab_contours(contours)

# Sort contours according to their area size
contours = sorted(contours, key=cv.contourArea, reverse=True)

Sorting is an important step since it is assumed that the largest contour in the input image with a four corners is the receipt.

Now we have to check if a contour with four vertices was found.

In [9]:
# Initialize a variable to store the contour
receipt_contour = None

for c in contours:
    # Approximate the contour by reducing the number of points
    peri = cv.arcLength(c, True)
    approx = cv.approxPolyDP(c, 0.02 * peri, True)
    
    # If the approximated contour has 4 points...
    if len(approx) == 4:
        # ...we can assume it's the receipt's outline
        receipt_contour = approx
        break

if receipt_contour is None:
    raise Exception('Could not find proper receipt contours. '
                    'Review the input image and try again.')

In [None]:
# Initialize a variable to store the contour
receipt_contour = None
max_contour_area = 0

for c in contours:
    # Approximate the contour by reducing the number of points
    peri = cv.arcLength(c, False)
    approx = cv.approxPolyDP(c, 0.02 * peri, True)
    
    # If the approximated contour has 4 points...
    if len(approx) == 4:
        # ...we can assume it's the receipt's outline
        if cv.contourArea(approx) > max_contour_area:
            max_contour_area = cv.contourArea(approx)
            receipt_contour = approx

if receipt_contour is None:
    raise Exception('Could not find proper receipt contours. '
                    'Review the input image and try again.')


In [10]:
receipt_contour

array([[[388,   6]],

       [[ 18,  24]],

       [[ 65, 640]],

       [[482, 579]]], dtype=int32)

In [11]:
output = img.copy()
cv.drawContours(output, [receipt_contour], -1, (0, 255, 0), 2)
cv.imshow('Receipt outline', output)
cv.waitKey(0)

-1

### Applying perspective transform

If the receipt contour is found, the perspective transform can be applied to the image.

In [None]:
# Apply a four-point perspective transform to the original image
receipt = four_point_transform(raw_img, receipt_contour.reshape(4, 2) * ratio)

# Show transformed image
#receipt = cv.resize(receipt, (960, 540)) 
cv.imshow('Receipt transform', imutils.resize(receipt, width=500))
cv.waitKey(0)

In [None]:
options = "--psm 4"
text = pytesseract.image_to_string(
    cv.cvtColor(receipt, cv.COLOR_BGR2RGB),
    config=options
)

In [None]:
print(text)