# Detect Invoice Number From Image

In [5]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # Suppress TensorFlow logging (1)
import pathlib
import tensorflow as tf
import cv2
import sys

import time
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils

import numpy as np
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore') 

tf.get_logger().setLevel('ERROR')           # Suppress TensorFlow logging (2)

In [75]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

###  Variables:
       - IMAGE_PATH : String       -> .jpg || .jpeg file
       - LABEL_MAP_PATH : String   -> .pbtxt file
       - SAVED_MODEL_PATH : String -> "reference to saved_model folder of exported model"

In [76]:
IMAGE_PATH          = "./images/sample2.jpg"
LABEL_MAP_PATH      = "./data/label_map.pbtxt"
SAVED_MODEL_PATH    = "./data/saved_model"

### 1. Load the Model

In [77]:
detect_fn = tf.saved_model.load(SAVED_MODEL_PATH)

### 2. Load label map data (for plotting)

In [78]:
category_index = label_map_util.create_category_index_from_labelmap(LABEL_MAP_PATH, use_display_name=True) 

### 3. Convert image into numpy_array

In [79]:
def load_image_into_numpy_array(path):
    return np.array(Image.open(path))

image_np = load_image_into_numpy_array(IMAGE_PATH) 

### 4. The input needs to be a tensor, convert it using `tf.convert_to_tensor`.

In [80]:
input_tensor = tf.convert_to_tensor(image_np)

### 5. The model expects a batch of images, so add an axis with `tf.newaxis`.

In [81]:
input_tensor = input_tensor[tf.newaxis, ...]

### 6. Run Detection

In [82]:
detections = detect_fn(input_tensor)

### 7. All outputs are batches tensors. 
**Convert to numpy arrays, and take index [0] to remove the batch dimension. We're only interested in the first num_detections.**

In [83]:
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()}
detections['num_detections'] = num_detections

### 8. detection_classes should be ints.

In [84]:
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
image_np_with_detections = image_np.copy()

### *Detection Result Breakdown*

In [32]:
detections['detection_boxes'][0]

array([0.04715748, 0.15597913, 0.24276222, 0.24058676], dtype=float32)

In [33]:
np.where(detections['detection_scores'] == detections['detection_scores'].max())

(array([0], dtype=int64),)

In [34]:
image_np_with_detections.shape

(2048, 1536, 3)

# Draw Bounding Box

In [85]:
box = tuple(detections['detection_boxes'][0].tolist())
print(box)

(0.7322491407394409, 0.8059602379798889, 0.9526413679122925, 0.8902712464332581)


In [86]:
classes = detections['detection_classes']
class_name = category_index[classes[0]]['name']
print(class_name)

invoice-number


In [87]:
score = '{}%'.format(round(100*detections['detection_scores'][0]))
print(score)

59%


In [88]:
ymin, xmin, ymax, xmax = box

In [89]:
image_pil = Image.fromarray(np.uint8(image_np_with_detections)).convert('RGB')

In [90]:
draw = ImageDraw.Draw(image_pil)
im_width, im_height = image_pil.size
# using normalize_coordinate
(left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                  ymin * im_height, ymax * im_height)
# draw line
draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
               (left, top)],
              width=4,
              fill='red')


# Croping Invoice Number

In [94]:
im_crop = image_pil.crop((left, top, right, bottom))
im_crop.show()
im_crop.save("./images/cropped_img2.jpeg", "JPEG")

In [92]:
image_pil.size

(1536, 2048)

# Extract Invoice Number using Tesseract
*Preprocessing :*
1. Resize image 4x
2. Clear the noise
3. Sharpen

In [6]:
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

In [95]:
# Filename
images_path = "images/"
img_cropped_filename = "cropped_img.jpeg"                      # 96 DPI

img_resized_filename = "cropped_img_resized.png"               # 300 DPI
img_cropped_edited_filename = "cropped_img_edited.jpeg"        # 96 DPI
img_resized_edited_filename = "cropped_img_resized_edited.png" # 300 DPI


In [51]:
def resize_image(filename):
    low_res_img_pil = Image.open(images_path + filename)
    h, w = low_res_img_pil.size
    resize = h*4, w*4
    im_resized = low_res_img_pil.resize(resize, Image.ANTIALIAS)
    resized_img_filename =  filename.split(".")[0] + "_resized.jpeg"
    im_resized.save(images_path + resized_img_filename, "JPEG")
    return resized_img_filename

def denoise_image(filename):
    noised_img_cv = cv2.imread(images_path + filename)
    denoised_img = cv2.fastNlMeansDenoising(noised_img_cv, None, 10)
    denoised_img_filename =  filename.split(".")[0] + "_denoised.jpeg"
    cv2.imwrite(images_path + denoised_img_filename, denoised_img)
    return denoised_img_filename
    
def sharpen_image(filename):
    unsharpen_img_cv = cv2.imread(images_path + filename)
    kernel = np.array([[0, -1, 0],
                       [-1, 5,-1],
                       [0, -1, 0]])
    sharpened_img = cv2.filter2D(src=unsharpen_img_cv, ddepth=-1, kernel=kernel)
    sharpened_img_filename =  filename.split(".")[0] + "_sharpened.jpeg"
    cv2.imwrite(images_path + sharpened_img_filename, sharpened_img)
    return sharpened_img_filename

def scale_contrast_image(filename):
    image = cv2.imread(images_path + filename)
    new_image = np.zeros(image.shape, image.dtype)
    alpha = 2.0 # Simple contrast control
    beta = 1    # Simple brightness control
    
    for y in range(image.shape[0]):
        for x in range(image.shape[1]):
            for c in range(image.shape[2]):
                new_image[y,x,c] = np.clip(alpha*image[y,x,c] + beta, 0, 255)
    new_filename = filename.split(".")[0] + "_contrast.jpeg"
    cv2.imwrite(images_path + new_filename, new_image)
    return new_filename

def adaptive_thresholding_image(filename):
    image = cv2.imread(images_path + filename)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#     ret,thresh1 = cv2.threshold(image,200,255,cv2.THRESH_BINARY)
    new_image = cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,31,3)
    new_filename = filename.split(".")[0] + "_at.jpeg"
    cv2.imwrite(images_path + new_filename, new_image)
    return new_filename

In [52]:
# # Image Preprocessing Pipepline
# image_choice = "sample2_cropped.jpeg"
# resized_image = resize_image(image_choice)
# sharpened_image = sharpen_image(resized_image)
# denoised_image = denoise_image(sharpened_image)
# denoised_image_2x = denoise_image(denoised_image)
# contrast_image = scale_contrast_image(denoised_image_2x)
at_image = adaptive_thresholding_image(contrast_image)

# Good to read

In [54]:
# Read Image (opencv)
images_path = "./images/"
# goal = images_path + img_resized_edited_filename
# test = images_path + bt_image
asd = images_path + "sample2_cropped_resized_sharpened_denoised_2x_contrast_at.jpeg"
choosedImage = asd
# choosedImage = img_resized_edited_filename
img_cv = cv2.imread(choosedImage)
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
img_cv = cv2.rotate(img_cv, cv2.ROTATE_90_CLOCKWISE)
# ret,img_cv = cv2.threshold(img_cv,100,255,cv2.THRESH_BINARY)
img_cv = cv2.adaptiveThreshold(img_cv,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,31,3)
print(choosedImage + " shape: " + str(img_cv.shape))
print(choosedImage + " max val: " + str(img_cv.max))
cv2.imshow(choosedImage,img_cv)
cv2.waitKey(0)
print(pytesseract.image_to_string(img_cv, config="outputbase digits"))

./images/sample2_cropped_resized_sharpened_denoised_2x_contrast_at.jpeg shape: (516, 1804)
./images/sample2_cropped_resized_sharpened_denoised_2x_contrast_at.jpeg max val: <built-in method max of numpy.ndarray object at 0x0000022CE352DA50>
86269

- -




In [95]:
ocr_result = []
for x in range(4):
    img_cv = cv2.rotate(img_cv, cv2.ROTATE_90_CLOCKWISE)
    _results = pytesseract.image_to_string(img_cv, config="outputbase digits")
    _results = _results.split('\n')
    for _result in _results:
        if len(_result) == 5:
            ocr_result.append(_result)
for res in ocr_result:
    print("count: ("+str(len(res))+") " + res + " ")

response = {
    "data" : ocr_result
}
    
print(response)

count: (5) 69798 
count: (5) 86269 
count: (5) 86269 
count: (5) 69798 
{'data': ['69798', '86269', '86269', '69798']}


In [97]:
import json

print(json.dumps(response))

{"data": ["69798", "86269", "86269", "69798"]}


In [104]:
import time

a = time.time()


In [118]:
b = time.time()
print("--- %s seconds ---" % (b-a))

--- 12.768976926803589 seconds ---
