# Detect Invoice Number From Image

In [19]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # Suppress TensorFlow logging (1)
import pathlib
import tensorflow as tf
import cv2
import sys

import time
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils

import numpy as np
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore') 

tf.get_logger().setLevel('ERROR')           # Suppress TensorFlow logging (2)

In [20]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

###  Variables:
       - IMAGE_PATH : String       -> .jpg || .jpeg file
       - LABEL_MAP_PATH : String   -> .pbtxt file
       - SAVED_MODEL_PATH : String -> "reference to saved_model folder of exported model"

In [23]:
IMAGE_PATH          = "./images/sample.jpg"
LABEL_MAP_PATH      = "./data/label_map.pbtxt"
SAVED_MODEL_PATH    = "./data/saved_model"

### 1. Load the Model

In [24]:
detect_fn = tf.saved_model.load(SAVED_MODEL_PATH)

### 2. Load label map data (for plotting)

In [25]:
category_index = label_map_util.create_category_index_from_labelmap(LABEL_MAP_PATH, use_display_name=True) 

### 3. Convert image into numpy_array

In [26]:
def load_image_into_numpy_array(path):
    return np.array(Image.open(path))

image_np = load_image_into_numpy_array(IMAGE_PATH) 

### 4. The input needs to be a tensor, convert it using `tf.convert_to_tensor`.

In [27]:
input_tensor = tf.convert_to_tensor(image_np)

### 5. The model expects a batch of images, so add an axis with `tf.newaxis`.

In [28]:
input_tensor = input_tensor[tf.newaxis, ...]

### 6. Run Detection

In [29]:
detections = detect_fn(input_tensor)

### 7. All outputs are batches tensors. 
**Convert to numpy arrays, and take index [0] to remove the batch dimension. We're only interested in the first num_detections.**

In [30]:
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()}
detections['num_detections'] = num_detections

### 8. detection_classes should be ints.

In [31]:
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
image_np_with_detections = image_np.copy()

### *Detection Result Breakdown*

In [32]:
detections['detection_boxes'][0]

array([0.04715748, 0.15597913, 0.24276222, 0.24058676], dtype=float32)

In [33]:
np.where(detections['detection_scores'] == detections['detection_scores'].max())

(array([0], dtype=int64),)

In [34]:
image_np_with_detections.shape

(2048, 1536, 3)

# Draw Bounding Box

In [35]:
box = tuple(detections['detection_boxes'][0].tolist())
print(box)

(0.04715748131275177, 0.15597912669181824, 0.2427622228860855, 0.2405867576599121)


In [36]:
classes = detections['detection_classes']
class_name = category_index[classes[0]]['name']
print(class_name)

invoice-number


In [37]:
score = '{}%'.format(round(100*detections['detection_scores'][0]))
print(score)

83%


In [38]:
ymin, xmin, ymax, xmax = box

In [39]:
image_pil = Image.fromarray(np.uint8(image_np_with_detections)).convert('RGB')

In [40]:
draw = ImageDraw.Draw(image_pil)
im_width, im_height = image_pil.size
# using normalize_coordinate
(left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                  ymin * im_height, ymax * im_height)
# draw line
draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
               (left, top)],
              width=4,
              fill='red')


# Croping Invoice Number

In [41]:
im_crop = image_pil.crop((left, top, right, bottom))
im_crop.show()
im_crop.save("cropped_img", "JPEG")

In [43]:
image_pil.size

(1536, 2048)

# Extract Invoice Number using Tesseract
*Preprocessing :*
1. Resize image 4x
2. Clear the noise
3. Sharpen

In [3]:
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

In [4]:
# Filename
images_path = "images/"
img_cropped_filename = "cropped_img.jpeg"                      # 96 DPI

img_resized_filename = "cropped_img_resized.png"               # 300 DPI
img_cropped_edited_filename = "cropped_img_edited.jpeg"        # 96 DPI
img_resized_edited_filename = "cropped_img_resized_edited.png" # 300 DPI


In [17]:
def resize_image(filename):
    low_res_img_pil = Image.open(images_path + filename)
    h, w = low_res_img_pil.size
    resize = h*4, w*4
    im_resized = low_res_img_pil.resize(resize, Image.ANTIALIAS)
    resized_img_filename =  filename.split(".")[0] + "_resized.jpeg"
    im_resized.save(images_path + resized_img_filename, "JPEG")
    return resized_img_filename

def denoise_image(filename):
    noised_img_cv = cv2.imread(images_path + filename)
    denoised_img = cv2.fastNlMeansDenoising(noised_img_cv, None, 10)
    denoised_img_filename =  filename.split(".")[0] + "_denoised.jpeg"
    cv2.imwrite(images_path + denoised_img_filename, denoised_img)
    return denoised_img_filename
    
def sharpen_image(filename):
    unsharpen_img_cv = cv2.imread(images_path + filename)
    kernel = np.array([[0, -1, 0],
                       [-1, 5,-1],
                       [0, -1, 0]])
    sharpened_img = cv2.filter2D(src=unsharpen_img_cv, ddepth=-1, kernel=kernel)
    sharpened_img_filename =  filename.split(".")[0] + "_sharpened.jpeg"
    cv2.imwrite(images_path + sharpened_img_filename, sharpened_img)
    return sharpened_img_filename

In [18]:
# Image Preprocessing Pipepline
image_choice = img_cropped_filename
resized_image = resize_image(image_choice)
denoised_image = denoise_image(resized_image)
sharpened_image = sharpen_image(denoised_image)

In [68]:
current_image = denoised_image
img_cv = cv2.imread(images_path + current_image)
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
img_cv = cv2.rotate(img_cv, cv2.cv2.ROTATE_90_CLOCKWISE)
img_cv = cv2.medianBlur(img_cv,7)
ret,img_cv = cv2.threshold(img_cv,127,255,cv2.THRESH_BINARY)
# img_cv = cv2.adaptiveThreshold(img_cv, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,11,2)
print(denoised_image + " shape: " + str(img_cv.shape))
cv2.imshow(denoised_image,img_cv)
cv2.waitKey(0)

cropped_img_resized_denoised.jpeg shape: (520, 1600)


-1

In [64]:
print(pytesseract.image_to_string(img_cv, config="outputbase digits"))




In [62]:
foto_buatan = cv2.imread(images_path + img_resized_edited_filename)
foto_buatan = cv2.cvtColor(foto_buatan, cv2.COLOR_BGR2GRAY)
print(pytesseract.image_to_string(foto_buatan, config="outputbase digits"))

697986




In [None]:
low_res_img_pil = Image.open("cropped_img.jpeg")
low_res_img_pil.size
h, w = low_res_img_pil.size
resize = h*4, w*4
im_resized = low_res_img_pil.resize(resize, Image.ANTIALIAS)
im_resized.save("cropped_img_resized.png", "PNG")

In [20]:
# Read Image (opencv)
choosedImage = img_resized_edited_filename
img_cv = cv2.imread(choosedImage)
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)
print(choosedImage + " shape: " + str(img_cv.shape))
cv2.imshow(choosedImage,img_cv)
cv2.waitKey(0)

cropped_img_resized_edited.png shape: (520, 1600, 3)


-1

In [19]:
print(pytesseract.image_to_string(img_cv, config="outputbase digits"))

69798




In [99]:
# Up-Scale the image resolution
# low_res_img_pil = Image.open("cropped_img.jpeg")
# print("dpi: " + str(low_res_img_pil.info['dpi']))
# low_res_img_pil.save("cropped_img-600.png", dpi=(600,600))

dpi: (96, 96)


In [103]:
# low_res_img_pil.size
# h, w = low_res_img_pil.size
# resize = h*4, w*4
# im_resized = low_res_img_pil.resize(resize, Image.ANTIALIAS)
# im_resized.save("cropped_img_resized.png", "PNG")

In [110]:
# hi_res_image = Image.open("cropped_img_resized.png")
# print("new dpi: " + str(hi_res_image.shape))

AttributeError: shape

In [94]:
upscale_image = cv2.imread('cropped_img-600.png')
print(upscale_image.shape)
cv2.imshow('image',upscale_image)
cv2.waitKey(0)

im = Image.open("my_image.png")
size = 7016, 4961
im_resized = im.resize(size, Image.ANTIALIAS)
im_resized.save("my_image_resized.png", "PNG")

(130, 400, 3)


-1