In [1]:
import numpy as np
import pandas as pd
from PIL import Image, ImageEnhance

In [2]:
from google.cloud import vision
import io

In [3]:
def detect_text(path):
    """Detects text in the file."""
    
    client = vision.ImageAnnotatorClient()

    img = Image.open(path)
    enhancer = ImageEnhance.Contrast(img)
    
    img2 = enhancer.enhance(2)
    img2.save(path)

    words = []
    poly_bounds = []
    
    with io.open(path, 'rb') as image_file:
        content = image_file.read()
    
    image = vision.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
    
    print('Texts:')
    
    vertices = []
    for text in texts:
        print('\n"{}"'.format(text.description))
        li = []
        for vertex in text.bounding_poly.vertices:
            li.append([vertex.x, vertex.y])
        words.append(text.description)
        poly_bounds.append(li)

    if response.error.message:
        raise Exception(
            '{}\nFor more info on error messages, check: '
            'https://cloud.google.com/apis/design/errors'.format(
                response.error.message))
    return texts, words, poly_bounds

In [4]:
texts, words, poly_bounds = detect_text('./bill4.jpeg')

Texts:

"Item Name
MRP
요ty
Price
GOWARDHAN GO CHEE
55.00
1.000
54.04
AMUL VANILA IC CR
20.00
1.000
19.58
APPY JUICE 400 ML
18.00
1.000
17.44
BT RAWRICE
48.00
1.000
41.14
96.00 0.606 95.00
22.00 1.000
ORANGE CITRUS
GOODLIFE PURE COW
21.75
SUGAR
40.00 1.000
38.76
BAMBINO ROASTED V
37.00 1.000
36.37
CUCUMBER HYBRID
19.00 0.300
18.00
18.74
45.00
DOUBLE HORSE IDIY
19.50
1.000
GINGER
46.00 0.076
46.00 0.076
1.000
45.00
85.73
GREEN CHILLY
TIDE JR 1 KG
88.00
PEPSODENT KDS FRT
49.00
1.000
47.10
"

"Item"

"Name"

"MRP"

"요"

"ty"

"Price"

"GOWARDHAN"

"GO"

"CHEE"

"55.00"

"1.000"

"54.04"

"AMUL"

"VANILA"

"IC"

"CR"

"20.00"

"1.000"

"19.58"

"APPY"

"JUICE"

"400"

"ML"

"18.00"

"1.000"

"17.44"

"BT"

"RAWRICE"

"48.00"

"1.000"

"41.14"

"96.00"

"0.606"

"95.00"

"22.00"

"1.000"

"ORANGE"

"CITRUS"

"GOODLIFE"

"PURE"

"COW"

"21.75"

"SUGAR"

"40.00"

"1.000"

"38.76"

"BAMBINO"

"ROASTED"

"V"

"37.00"

"1.000"

"36.37"

"CUCUMBER"

"HYBRID"

"19.00"

"0.300"

"18.00"

"18.74"

"4

In [5]:
print(words[0])

Item Name
MRP
요ty
Price
GOWARDHAN GO CHEE
55.00
1.000
54.04
AMUL VANILA IC CR
20.00
1.000
19.58
APPY JUICE 400 ML
18.00
1.000
17.44
BT RAWRICE
48.00
1.000
41.14
96.00 0.606 95.00
22.00 1.000
ORANGE CITRUS
GOODLIFE PURE COW
21.75
SUGAR
40.00 1.000
38.76
BAMBINO ROASTED V
37.00 1.000
36.37
CUCUMBER HYBRID
19.00 0.300
18.00
18.74
45.00
DOUBLE HORSE IDIY
19.50
1.000
GINGER
46.00 0.076
46.00 0.076
1.000
45.00
85.73
GREEN CHILLY
TIDE JR 1 KG
88.00
PEPSODENT KDS FRT
49.00
1.000
47.10



In [14]:
Final_str = ""

In [15]:
prev_x_start = 0
prev_y_start = 0
prev_y_end = 0
prev_x_end = 0

count_words = 0

for word in words:
    if count_words == 0:
        count_words += 1
        continue
    if count_words == 1:
        prev_x_start = poly_bounds[count_words][0][0]
        prev_x_end = poly_bounds[count_words][1][0]
        prev_y_start = poly_bounds[count_words][0][1]
        prev_y_end = poly_bounds[count_words][2][1]
        Final_str = Final_str + word
    
    else:
        x_start = poly_bounds[count_words][0][0]
        x_end = poly_bounds[count_words][1][0]
        y_start = poly_bounds[count_words][0][1]
        y_end = poly_bounds[count_words][2][1]
        
        y_diff = abs(float(y_start - prev_y_start))/20
        x_diff1 = abs(float(x_start - prev_x_end))
        x_diff2 = abs(float(x_diff1))
#         print(y_diff, x_diff1, x_diff2)

        if(y_diff > 1):
            Final_str = Final_str + "\n" + word
        else:
            if(x_diff1>50):
                Final_str += "\t" + word
            elif(x_diff1>7):
                Final_str += "_" + word
            else:
                Final_str += word
        prev_x_start = x_start
        prev_x_end = x_end
        prev_y_start = y_start
        prev_y_end = y_end
    count_words += 1
    

In [16]:
print(Final_str)
count_words

Item_Name	MRP	요ty	Price
GOWARDHAN_GO_CHEE	55.00	1.000	54.04
AMUL_VANILA_IC_CR	20.00	1.000	19.58
APPY_JUICE_400_ML	18.00	1.000	17.44
BT_RAWRICE	48.00	1.000	41.14
96.00	0.606	95.00
22.00	1.000
ORANGE_CITRUS
GOODLIFE_PURE_COW	21.75
SUGAR	40.00	1.000	38.76
BAMBINO_ROASTED_V	37.00	1.000	36.37
CUCUMBER_HYBRID	19.00	0.300	18.00
18.74
45.00
DOUBLE_HORSE_IDIY	19.50	1.000
GINGER	46.00	0.076
46.00	0.076
1.000
45.00
85.73
GREEN_CHILLY
TIDE_JR_1_KG	88.00
PEPSODENT_KDS_FRT	49.00	1.000	47.10


86