In [45]:
import pytesseract
from PIL import Image
import cv2 
import openai
import os
import getpass 
import tqdm 
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import pickle
import ast
import torch
import json 



from pathlib import Path
from difflib import SequenceMatcher
from StringUtils import CorrectDigits, AssignNERTags, AssignLabel
from OCRUtils import apply_tesseract,DrawBoundBoxes,denormalize_box, ProcessImage

In [57]:
def read_bbox_and_words(path : Path):
  bbox_and_words_list = []

  with open(path, 'r', errors='ignore') as f:
    for line in f.read().splitlines():
      if len(line) == 0:
        continue
        
      split_lines = line.split(",")

      bbox = np.array(split_lines[0:8], dtype=np.int32)
      text = ",".join(split_lines[8:])

      # From the splited line we save (filename, [bounding box points], text line).
      # The filename will be useful in the future
      bbox_and_words_list.append([path.stem, *bbox, text])
    
  dataframe = pd.DataFrame(bbox_and_words_list, columns=['filename', 'x0', 'y0', 'x1', 'y1', 'x2', 'y2', 'x3', 'y3', 'line'])
  dataframe = dataframe.drop(columns=['x1', 'y1', 'x3', 'y3'])

  return dataframe

sroie_folder_path = './SROIE2019/'
example_file = 'X00016469612.txt'
# Example usage
bbox_file_path = os.path.join(sroie_folder_path + 'train/box',example_file)
bbox_file_path = Path(bbox_file_path)
bbox = read_bbox_and_words(path=bbox_file_path)
print("\n== Dataframe ==")
bbox


== Dataframe ==


Unnamed: 0,filename,x0,y0,x2,y2,line
0,X00016469612,72,25,326,64,TAN WOON YANN
1,X00016469612,50,82,440,121,BOOK TA .K(TAMAN DAYA) SDN BND
2,X00016469612,205,121,285,139,789417-W
3,X00016469612,110,144,383,163,"NO.53 55,57 & 59, JALAN SAGU 18,"
4,X00016469612,192,169,299,187,"TAMAN DAYA,"
5,X00016469612,162,193,334,211,"81100 JOHOR BAHRU,"
6,X00016469612,217,216,275,233,JOHOR.
7,X00016469612,50,342,279,359,DOCUMENT NO : TD01167104
8,X00016469612,50,372,96,390,DATE:
9,X00016469612,165,372,342,389,25/12/2018 8:13:39 PM


In [47]:
def read_entities(path: Path):
  with open(path, 'r') as f:
    data = json.load(f)

  dataframe = pd.DataFrame([data])
  return dataframe


sroie_folder_path = './SROIE2019/'
example_file = 'X00016469612.txt'

# Example usage
entities_file_path = os.path.join(sroie_folder_path + 'train/entities',example_file)


entities = read_entities(path=entities_file_path)
print("\n\n== Dataframe ==")
entities



== Dataframe ==


Unnamed: 0,company,date,address,total
0,BOOK TA .K (TAMAN DAYA) SDN BHD,25/12/2018,"NO.53 55,57 & 59, JALAN SAGU 18, TAMAN DAYA, 8...",9.0


In [48]:
line = bbox.loc[1,"line"]
entitiesDict = entities.to_dict()
entitiesDict['company'] = entitiesDict['company'][0]
entitiesDict['date'] = entitiesDict['date'][0]
entitiesDict['total'] = entitiesDict['total'][0]
entitiesDict['address'] = entitiesDict['address'][0]

entitiesDict['COMPANY'] = entitiesDict.pop('company')
entitiesDict['DATE'] = entitiesDict.pop('date')
entitiesDict['TOTAL'] = entitiesDict.pop('total')
entitiesDict['LOCATION'] = entitiesDict.pop('address')
line = 'TAMAN'
label = AssignLabel(line, entitiesDict)

print("Line:", line)
print("Assigned label:", label)

Line: TAMAN
Assigned label: COMPANY


In [59]:
SROIE2019Train = pd.read_pickle('./SROIE2019Train')

In [58]:
sroie_train_path = os.path.join(sroie_folder_path,'train/box')

SROIETrain = []
for filename in tqdm(os.listdir(sroie_train_path)):
    file_path = os.path.join(sroie_train_path,filename)
    bbox = read_bbox_and_words(path=Path(file_path))
    words_column = []
    boxes_column = []
    lines = ""

    for indx,row in bbox.iterrows() :
        line = row['line']
        boxe = [row['x0'],row['y0'],row['x2'],row['y2']]
        x0, y0, x2, y2 = (row['x0'],row['y0'],row['x2'],row['y2'])
        lines = lines + " " + line


        bbox_width = boxe[2] - boxe[0]
        words_line = line.split(" ")


        for index, word in enumerate(words_line):
            x2 = x0 + int(bbox_width * len(word)/len(line))

            boxes_column.append([x0,y0,x2,y2])
            x0 = x2 + 5

    words = lines.split(" ")
    words = [word for word in words if len(word) >= 1]

    SROIETrain.append({'filename' : filename, 'words' : words, 'boxes' : boxes_column})
    
SROIETrain = pd.DataFrame(SROIETrain)
        

    

  0%|          | 0/626 [00:00<?, ?it/s]

 TAN WOON YANN
['', 'TAN', 'WOON', 'YANN']
 TAN WOON YANN BOOK TA .K(TAMAN DAYA) SDN BND
['', 'TAN', 'WOON', 'YANN', 'BOOK', 'TA', '.K(TAMAN', 'DAYA)', 'SDN', 'BND']
 TAN WOON YANN BOOK TA .K(TAMAN DAYA) SDN BND 789417-W
['', 'TAN', 'WOON', 'YANN', 'BOOK', 'TA', '.K(TAMAN', 'DAYA)', 'SDN', 'BND', '789417-W']
 TAN WOON YANN BOOK TA .K(TAMAN DAYA) SDN BND 789417-W NO.53 55,57 & 59, JALAN SAGU 18,
['', 'TAN', 'WOON', 'YANN', 'BOOK', 'TA', '.K(TAMAN', 'DAYA)', 'SDN', 'BND', '789417-W', 'NO.53', '55,57', '&', '59,', 'JALAN', 'SAGU', '18,']
 TAN WOON YANN BOOK TA .K(TAMAN DAYA) SDN BND 789417-W NO.53 55,57 & 59, JALAN SAGU 18, TAMAN DAYA,
['', 'TAN', 'WOON', 'YANN', 'BOOK', 'TA', '.K(TAMAN', 'DAYA)', 'SDN', 'BND', '789417-W', 'NO.53', '55,57', '&', '59,', 'JALAN', 'SAGU', '18,', 'TAMAN', 'DAYA,']
 TAN WOON YANN BOOK TA .K(TAMAN DAYA) SDN BND 789417-W NO.53 55,57 & 59, JALAN SAGU 18, TAMAN DAYA, 81100 JOHOR BAHRU,
['', 'TAN', 'WOON', 'YANN', 'BOOK', 'TA', '.K(TAMAN', 'DAYA)', 'SDN', 'BND', '7




In [84]:
SROIETrain

Unnamed: 0,filename,words,boxes
0,X00016469612.txt,"[TAN, WOON, YANN, BOOK, TA, .K(TAMAN, DAYA), S...","[[72, 25, 130, 64], [135, 25, 213, 64], [218, ..."
1,X00016469619.txt,"[TAN, WOON, YANN, INDAH, GIFT, &, HOME, DECO, ...","[[76, 50, 133, 84], [138, 50, 214, 84], [219, ..."
2,X00016469620.txt,"[TAN, WOON, YANN, MR, D.T.Y., (JOHOR), SDN, BH...","[[119, 47, 176, 80], [181, 47, 257, 80], [262,..."
3,X00016469622.txt,"[TAN, WOON, YANN, YONGFATT, ENTERPRISE, (JM051...","[[96, 41, 155, 79], [160, 41, 239, 79], [244, ..."
4,X00016469623.txt,"[TAN, WOON, YANN, MR, D.I.Y., (M), SDN, BHD, (...","[[83, 41, 140, 78], [145, 41, 221, 78], [226, ..."
...,...,...,...
621,X51008164997.txt,"[ONE, ONE, THREE, SEAFOOD, RESTAURANT, SDN, BH...","[[86, 329, 131, 368], [136, 329, 181, 368], [1..."
622,X51008164998.txt,"[ONE, ONE, THREE, SEAFOOD, RESTAURANT, SDN, BH...","[[87, 341, 132, 377], [137, 341, 182, 377], [1..."
623,X51008164999.txt,"[ONE, ONE, THREE, SEAFOOD, RESTAURANT, SDN, BH...","[[85, 334, 130, 363], [135, 334, 180, 363], [1..."
624,X51009453801.txt,"[3180303, GL, HANDICRAFT, &, TAIL, ORING, 19,,...","[[16, 40, 131, 62], [104, 195, 203, 219], [208..."


In [85]:
SROIETrain.to_pickle('./SROIETrain')

In [61]:
sroie_folder_path = './SROIE2019/'
sroie_test_path = os.path.join(sroie_folder_path,'test/box')

SROIEtest = []
for filename in tqdm(os.listdir(sroie_test_path)):
    file_path = os.path.join(sroie_test_path,filename)
    bbox = read_bbox_and_words(path=Path(file_path))
    words_column = []
    boxes_column = []
    lines = ""

    for indx,row in bbox.iterrows() :
        line = row['line']
        boxe = [row['x0'],row['y0'],row['x2'],row['y2']]
        x0, y0, x2, y2 = (row['x0'],row['y0'],row['x2'],row['y2'])
        lines = lines + " " + line
        bbox_width = boxe[2] - boxe[0]
        words_line = line.split(" ")

        
        for index, word in enumerate(words_line):
            x2 = x0 + int(bbox_width * len(word)/len(line))

            boxes_column.append([x0,y0,x2,y2])
            x0 = x2 + 5

    words = lines.split(" ")
    words = [word for word in words if len(word) >= 1]

    SROIEtest.append({'filename' : filename, 'words' : words, 'boxes' : boxes_column})
        
SROIEtest = pd.DataFrame(SROIEtest)
        

    

100%|██████████| 347/347 [00:03<00:00, 112.67it/s]


In [62]:
SROIEtest = pd.DataFrame(SROIEtest)

In [63]:
SROIEtest

Unnamed: 0,filename,words,boxes
0,X00016469670.txt,"[TAN, CHAY, YEE, ***, COPY, ***, OJC, MARKETIN...","[[98, 26, 153, 66], [158, 26, 232, 66], [237, ..."
1,X00016469671.txt,"[TAN, CHAY, YEE, OJC, MARKETING, SDN, BHD, ROC...","[[114, 54, 167, 92], [172, 54, 242, 92], [247,..."
2,X51005200931.txt,"[PERNIAGAAN, ZHENG, HUI, JM0326955-V, NO.59, J...","[[380, 241, 498, 266], [503, 241, 562, 266], [..."
3,X51005230605.txt,"[PETRON, BKT, LANJAN, SB, ALSERKAM, ENTERPRISE...","[[153, 104, 239, 141], [244, 104, 287, 141], [..."
4,X51005230616.txt,"[190, GERBANG, ALAF, RESTAURANTS, SDN, BHD, (6...","[[244, 105, 327, 167], [37, 188, 133, 225], [1..."
...,...,...,...
342,X51008099100.txt,"[PAPPARICH, BMC, NO.19, &, 21, JALAN, TEMENGGU...","[[178, 149, 549, 188], [554, 149, 677, 188], [..."
343,X51009008095.txt,"[BOON, SENG, PAPER, SDN, BHD, (1248717-W), 12,...","[[97, 150, 159, 183], [164, 150, 226, 183], [2..."
344,X51009447842.txt,"[PASARAYA, BORONG, PINTAR, SDN, BHD, BR, NO.:,...","[[131, 184, 383, 232], [388, 184, 577, 232], [..."
345,X51009453729.txt,"[3180303, LIAN, HING, STATIONERY, SDN, BHD, (1...","[[10, 21, 126, 45], [152, 137, 197, 156], [202..."


In [52]:
SROIEtest.to_pickle('./SROIETest')

## Complete Data Preparation

In [53]:
DatasetSROIE2019 = pd.read_pickle('./DatasetSROIE2019V3')
SROIEtest = pd.read_pickle('./SROIETest')
SROIEtrain = pd.read_pickle('./SROIE2019Train')

In [54]:
SROIEtrain

Unnamed: 0,filename,words,boxes,entities,ner_tags
0,X00016469612.txt,"[TAN, WOON, YANN, BOOK, TA, .K(TAMAN, DAYA), S...","[[72, 25, 130, 64], [135, 25, 213, 64], [218, ...","{'COMPANY': 'BOOK TA .K (TAMAN DAYA) SDN BHD',...","[O, O, O, COMPANY, COMPANY, COMPANY, COMPANY, ..."
1,X00016469619.txt,"[TAN, WOON, YANN, INDAH, GIFT, &, HOME, DECO, ...","[[76, 50, 133, 84], [138, 50, 214, 84], [219, ...","{'COMPANY': 'INDAH GIFT & HOME DECO', 'DATE': ...","[O, O, O, COMPANY, COMPANY, COMPANY, COMPANY, ..."
2,X00016469620.txt,"[TAN, WOON, YANN, MR, D.T.Y., (JOHOR), SDN, BH...","[[119, 47, 176, 80], [181, 47, 257, 80], [262,...","{'COMPANY': 'MR D.I.Y. (JOHOR) SDN BHD', 'DATE...","[O, O, O, COMPANY, COMPANY, COMPANY, COMPANY, ..."
3,X00016469622.txt,"[TAN, WOON, YANN, YONGFATT, ENTERPRISE, (JM051...","[[96, 41, 155, 79], [160, 41, 239, 79], [244, ...","{'COMPANY': 'YONGFATT ENTERPRISE', 'DATE': '25...","[O, O, O, COMPANY, COMPANY, O, LOCATION, LOCAT..."
4,X00016469623.txt,"[TAN, WOON, YANN, MR, D.I.Y., (M), SDN, BHD, (...","[[83, 41, 140, 78], [145, 41, 221, 78], [226, ...","{'COMPANY': 'MR D.I.Y. (M) SDN BHD', 'DATE': '...","[O, O, O, COMPANY, COMPANY, COMPANY, COMPANY, ..."
...,...,...,...,...,...
621,X51008164997.txt,"[ONE, ONE, THREE, SEAFOOD, RESTAURANT, SDN, BH...","[[86, 329, 131, 368], [136, 329, 181, 368], [1...",{'COMPANY': 'ONE ONE THREE SEAFOOD RESTAURANT ...,"[COMPANY, COMPANY, COMPANY, COMPANY, COMPANY, ..."
622,X51008164998.txt,"[ONE, ONE, THREE, SEAFOOD, RESTAURANT, SDN, BH...","[[87, 341, 132, 377], [137, 341, 182, 377], [1...",{'COMPANY': 'ONE ONE THREE SEAFOOD RESTAURANT ...,"[COMPANY, COMPANY, COMPANY, COMPANY, COMPANY, ..."
623,X51008164999.txt,"[ONE, ONE, THREE, SEAFOOD, RESTAURANT, SDN, BH...","[[85, 334, 130, 363], [135, 334, 180, 363], [1...",{'COMPANY': 'ONE ONE THREE SEAFOOD RESTAURANT ...,"[COMPANY, COMPANY, COMPANY, COMPANY, COMPANY, ..."
624,X51009453801.txt,"[3180303, GL, HANDICRAFT, &, TAIL, ORING, 19,,...","[[16, 40, 131, 62], [104, 195, 203, 219], [208...","{'COMPANY': 'GL HANDICRAFT & TAIL ORING', 'DAT...","[O, COMPANY, COMPANY, COMPANY, COMPANY, COMPAN..."


Here we prompt gpt to give us the corresponding values for tax and currency

In [55]:
message_prompt = """You are POS receipt data expert, parse, detect, recognize and convert following receipt OCR image result into structure receipt data object. 
Don't make up value not in the Input. Output must be a well-formed JSON object. give me ONLY the list of:
- date named entity (DD/MM/YYYY only format)
- ADDRESS named entity
- COMPANY named entity
- tax value entity
- Total price value entity
- Currency value entity (euro USD etc ...)
Format the output in json with the following keys:
- COMPANY for organization named entity
- ADDRESS for location named entity
- DATE for date named entity
- TAX for tax value entity
- TOTAL for total price
- CURRENCY for currency value


it is important to find CURRENCY and TAX, make sure to give to CURRENCY a value that is in the words list e.g do not give USD value if USD doesn't exist and instead $ is existant

And Make sure to not confuse COMPANY and ADDRESS as it is important

list of words below: """

entities_column = []
for indx, row in tqdm(SROIEtest.iterrows()):
    words = row['words']
    sentence = str(words)
    message = message_prompt + '\n' + sentence
    filename = row['filename']

    try:
        # GPT API Prompt
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {
                    "role": "user",
                    "content": message
                }
            ],
            temperature=0.7,
            max_tokens=400,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0
        )

        DictResponse = ast.literal_eval(response.choices[0].message.content)

        entities_column.append(DictResponse)
        
    except Exception as e:
        print(f"Error processing file {row['filename']}: {e}")
        entity_path = os.path.join()
        entities_column.append()

        continue
SROIEtest['entities'] = entities_column

SROIEtest.to_pickle('SROIE2019Test')

269it [11:44,  2.32s/it]

Error processing file X51006912998.txt: malformed node or string on line 5: <ast.Name object at 0x0000029D6282B400>


282it [12:22,  2.96s/it]

Error processing file X51007231276.txt: unterminated string literal (detected at line 3) (<unknown>, line 3)


347it [15:14,  2.64s/it]


ValueError: Length of values (345) does not match length of index (347)

In [19]:
folder_entities_path = './SROIE2019/train/entities'

for indx, row in SROIEtrain.iterrows():
    filename = row['filename']
    file_entity = os.path.join(folder_entities_path, filename)
    
    with open(file_entity, 'r') as file:
        DictEntities = json.load(file)

    if row['entities']['TAX'] == row['entities']['TOTAL']:
        row['entities']['TAX'] = '0'

    newDict = {
        'COMPANY': DictEntities.get('company', row['entities']['COMPANY']),
        'ADDRESS': DictEntities.get('address', row['entities']['ADDRESS']),
        'DATE': DictEntities.get('date', row['entities']['DATE']),
        'TOTAL': DictEntities.get('total', row['entities']['TOTAL']),
        'TAX': row['entities']['TAX'],
        'CURRENCY': row['entities']['CURRENCY']
    }
    
    row['entities'] = newDict


In [25]:
SROIEtrain.to_pickle('./SROIETrain')

In [27]:
ner_tags_SROIE2019 = []

for indx ,row in tqdm(SROIEtrain.iterrows()):
    words = row['words'] 
    boxes = row['boxes']
    entities = row['entities']

    ner_tags = AssignNERTags(words=words,entities=row['entities'],boxes=boxes)

    ner_tags_SROIE2019.append(ner_tags)
    
    filename = row['filename']


SROIEtrain['ner_tags'] = ner_tags_SROIE2019

0it [00:00, ?it/s]

626it [00:15, 40.29it/s]


In [29]:
SROIEtrain.to_pickle('./SROIE2019Train')

In [33]:
image_column = []

for indx ,row in tqdm(SROIEtrain.iterrows()):
    filename = row['filename']
    folder_path = './SROIE2019/train/img'
    image_path = os.path.join(folder_path, filename)
    base_name, extension = os.path.splitext(image_path)

    # Replace the extension with .jpg
    image_path = base_name + '.jpg'
    image = Image.open(image_path)

    image_column.append(image)

SROIEtrain['image'] = image_column

626it [00:00, 1509.96it/s]


In [37]:
indexes_to_drop = []
for indx ,row in tqdm(SROIEtrain.iterrows()):
    image = row['image']
    print(image.size)
    # image_array = np.array(image)
    # ndim = image_array.ndim
    # if ndim < 3 :
    #     print(indx, f'has only {ndim} dimensions ')
    #     indexes_to_drop.append(indx)

# SROIEtrain.drop(index = indexes_to_drop, inplace= True)


623it [00:00, 11268.20it/s]

(463, 1013)
(439, 1004)
(459, 949)
(461, 933)
(463, 1026)
(463, 605)
(457, 1170)
(463, 797)
(992, 1403)
(604, 1716)
(873, 1656)
(752, 2214)
(747, 1412)
(807, 1390)
(780, 2044)
(747, 1412)
(818, 1924)
(835, 2333)
(818, 1924)
(447, 915)
(623, 1255)
(993, 2481)
(953, 1629)
(2481, 3508)
(2481, 3508)
(2481, 3508)
(1654, 2339)
(1654, 2339)
(949, 1349)
(748, 1338)
(1080, 1527)
(1080, 1527)
(1080, 1527)
(1080, 1527)
(1080, 1527)
(1080, 1519)
(1080, 1527)
(1080, 1527)
(1080, 1527)
(591, 1711)
(619, 1131)
(619, 1203)
(627, 1235)
(627, 1535)
(1080, 1527)
(1080, 1528)
(1080, 1527)
(1080, 1527)
(1080, 1527)
(1080, 1527)
(1080, 1528)
(1080, 1528)
(1080, 1528)
(1080, 1528)
(1080, 1528)
(1080, 1528)
(1080, 1528)
(1080, 1527)
(1080, 1527)
(1080, 1527)
(789, 1772)
(792, 2748)
(753, 1305)
(664, 1664)
(668, 1598)
(936, 1766)
(932, 2160)
(932, 1987)
(932, 1771)
(888, 1571)
(888, 1718)
(928, 2130)
(932, 2683)
(615, 931)
(583, 1303)
(615, 931)
(619, 875)
(944, 2439)
(936, 1841)
(932, 2220)
(884, 1678)
(936, 




In [36]:
SROIEtrain = SROIEtrain.reset_index()

Unnamed: 0,index,filename,words,boxes,entities,ner_tags,image
0,0,X00016469612.txt,"[TAN, WOON, YANN, BOOK, TA, .K(TAMAN, DAYA), S...","[[72, 25, 130, 64], [135, 25, 213, 64], [218, ...","{'COMPANY': 'BOOK TA .K (TAMAN DAYA) SDN BHD',...","[O, O, O, COMPANY, COMPANY, COMPANY, COMPANY, ...",<PIL.JpegImagePlugin.JpegImageFile image mode=...
1,1,X00016469619.txt,"[TAN, WOON, YANN, INDAH, GIFT, &, HOME, DECO, ...","[[76, 50, 133, 84], [138, 50, 214, 84], [219, ...","{'COMPANY': 'INDAH GIFT & HOME DECO', 'DATE': ...","[O, O, O, COMPANY, COMPANY, COMPANY, COMPANY, ...",<PIL.JpegImagePlugin.JpegImageFile image mode=...
2,2,X00016469620.txt,"[TAN, WOON, YANN, MR, D.T.Y., (JOHOR), SDN, BH...","[[119, 47, 176, 80], [181, 47, 257, 80], [262,...","{'COMPANY': 'MR D.I.Y. (JOHOR) SDN BHD', 'DATE...","[O, O, O, COMPANY, COMPANY, COMPANY, COMPANY, ...",<PIL.JpegImagePlugin.JpegImageFile image mode=...
3,3,X00016469622.txt,"[TAN, WOON, YANN, YONGFATT, ENTERPRISE, (JM051...","[[96, 41, 155, 79], [160, 41, 239, 79], [244, ...","{'COMPANY': 'YONGFATT ENTERPRISE', 'DATE': '25...","[O, O, O, COMPANY, COMPANY, O, LOCATION, LOCAT...",<PIL.JpegImagePlugin.JpegImageFile image mode=...
4,4,X00016469623.txt,"[TAN, WOON, YANN, MR, D.I.Y., (M), SDN, BHD, (...","[[83, 41, 140, 78], [145, 41, 221, 78], [226, ...","{'COMPANY': 'MR D.I.Y. (M) SDN BHD', 'DATE': '...","[O, O, O, COMPANY, COMPANY, COMPANY, COMPANY, ...",<PIL.JpegImagePlugin.JpegImageFile image mode=...
...,...,...,...,...,...,...,...
618,621,X51008164997.txt,"[ONE, ONE, THREE, SEAFOOD, RESTAURANT, SDN, BH...","[[86, 329, 131, 368], [136, 329, 181, 368], [1...",{'COMPANY': 'ONE ONE THREE SEAFOOD RESTAURANT ...,"[COMPANY, COMPANY, COMPANY, COMPANY, COMPANY, ...",<PIL.JpegImagePlugin.JpegImageFile image mode=...
619,622,X51008164998.txt,"[ONE, ONE, THREE, SEAFOOD, RESTAURANT, SDN, BH...","[[87, 341, 132, 377], [137, 341, 182, 377], [1...",{'COMPANY': 'ONE ONE THREE SEAFOOD RESTAURANT ...,"[COMPANY, COMPANY, COMPANY, COMPANY, COMPANY, ...",<PIL.JpegImagePlugin.JpegImageFile image mode=...
620,623,X51008164999.txt,"[ONE, ONE, THREE, SEAFOOD, RESTAURANT, SDN, BH...","[[85, 334, 130, 363], [135, 334, 180, 363], [1...",{'COMPANY': 'ONE ONE THREE SEAFOOD RESTAURANT ...,"[COMPANY, COMPANY, COMPANY, COMPANY, COMPANY, ...",<PIL.JpegImagePlugin.JpegImageFile image mode=...
621,624,X51009453801.txt,"[3180303, GL, HANDICRAFT, &, TAIL, ORING, 19,,...","[[16, 40, 131, 62], [104, 195, 203, 219], [208...","{'COMPANY': 'GL HANDICRAFT & TAIL ORING', 'DAT...","[O, COMPANY, COMPANY, COMPANY, COMPANY, COMPAN...",<PIL.JpegImagePlugin.JpegImageFile image mode=...
