In [None]:
# ===============================================================================================================#
# Copyright 2022 Infosys Ltd.                                                                          #
# Use of this source code is governed by Apache License Version 2.0 that can be found in the LICENSE file or at  #
# http://www.apache.org/licenses/                                                                                #
# ===============================================================================================================#

## 1. Preparation

In [None]:
from _internal_utils.demo_helper import DemoHelper
import pandas as pd
from IPython.display import Image

#### <span style='color:blue'>Enter image and OCR file paths</span>

In [None]:
IMAGE_PATHS=["./data/coi_1.jpg", "./data/coi_2.jpg"]
IMAGE_OCR_PATHS=['./data/coi_1.jpg.hocr', './data/coi_2.jpg.hocr']

#### Visualize

In [None]:
tab_widget = DemoHelper.create_tab_toolbar(IMAGE_PATHS)
display(tab_widget)
for idx, image_path in enumerate(IMAGE_PATHS):
    with tab_widget.children[idx]:
        display(Image(filename=image_path, width=1000, height=50))

## 2. Initialization

#### <span style='color:blue'>Create new instance with desired OCR service provider</span>

In [None]:
import json
import logging
import os

from infy_ocr_parser import ocr_parser
from infy_ocr_parser.providers.tesseract_ocr_data_service_provider import TesseractOcrDataServiceProvider
from infy_ocr_parser.providers.azure_read_ocr_data_service_provider import AzureReadOcrDataServiceProvider

ocr_file_list = IMAGE_OCR_PATHS
ocr_file_list = [os.path.abspath(ocr_file) for ocr_file in ocr_file_list]

if not os.path.exists("./logs"):
    os.makedirs("./logs")
logging.basicConfig(
    filename=("./logs" + "/app_log.log"),
    format="%(asctime)s- %(levelname)s- %(message)s",
    level=logging.INFO,
    datefmt="%d-%b-%y %H:%M:%S",
)
logger = logging.getLogger()

# Uncomment below for Tesseract OCR service
data_service_provider_obj = TesseractOcrDataServiceProvider()
# Uncomment below for Azure OCR Read service
# data_service_provider_obj = AzureReadOcrDataServiceProvider()

ocr_parse_obj = ocr_parser.OcrParser(ocr_file_list=ocr_file_list,
                                     data_service_provider=data_service_provider_obj,
                                     logger=logger)

#### <span style='color:blue'>Enter Within Bounding Box Coordinates</span>

In [None]:
# Format = [x,y,w,h]
my_within_bbox = [100, 590, 590, 130]

#### Visualize

In [None]:
img = DemoHelper.read_image(IMAGE_PATHS[0])
img = DemoHelper.draw_bboxes_on_image(img, [my_within_bbox], border_thickness=4,
                                         border_color = DemoHelper.Constants.COLOR_BLUE)
img = DemoHelper.reduce_image_based_on_context(img, [my_within_bbox])

DemoHelper.show_image(img)

## 3. API - get_tokens_from_ocr()

In [None]:
TOKEN_TYPE_WORD=1
TOKEN_TYPE_LINE=2
TOKEN_TYPE_PHRASE=3

### 3.1 - Get `word` tokens for the given region and pages

In [None]:
result = ocr_parse_obj.get_tokens_from_ocr(
    token_type_value=TOKEN_TYPE_WORD, 
    within_bbox=my_within_bbox,
    pages=[1]
)

#### Output

In [None]:
df = pd.json_normalize(result)
print('No. of records found =', len(df))
df

#### Visualize

In [None]:
# Get all tokens i.e. without any bounding box restrictions
all_tokens = ocr_parse_obj.get_tokens_from_ocr(
    token_type_value=1,     
    pages=[1]
)
all_tokens_text_bboxes = [x['bbox'] for x in all_tokens]

text_bboxes = [x['bbox'] for x in result]
all_bboxes = text_bboxes + [my_within_bbox]
img = DemoHelper.read_image(IMAGE_PATHS[0])
img = DemoHelper.draw_bboxes_on_image(img, all_tokens_text_bboxes, border_color = DemoHelper.Constants.COLOR_GRAY)
img = DemoHelper.draw_bboxes_on_image(img, [my_within_bbox], border_color = DemoHelper.Constants.COLOR_BLUE)
img = DemoHelper.draw_bboxes_on_image(img, text_bboxes, border_color = DemoHelper.Constants.COLOR_RED)
img = DemoHelper.reduce_image_based_on_context(img, all_bboxes)

DemoHelper.show_image(img)

### 3.2 - Get `phrase` tokens for the given region and pages

In [None]:
result = ocr_parse_obj.get_tokens_from_ocr(
    token_type_value=TOKEN_TYPE_PHRASE, 
    within_bbox=my_within_bbox,
    pages=[1]
)

#### Output

In [None]:
df = pd.json_normalize(result)
print('No. of records found =', len(df))
df

#### Visualize

In [None]:
# Get all tokens i.e. without any bounding box restrictions
all_tokens = ocr_parse_obj.get_tokens_from_ocr(
    token_type_value=3,     
    pages=[1]
)
all_tokens_text_bboxes = [x['bbox'] for x in all_tokens]

text_bboxes = [x['bbox'] for x in result]
all_bboxes = text_bboxes + [my_within_bbox]
img = DemoHelper.read_image(IMAGE_PATHS[0])
img = DemoHelper.draw_bboxes_on_image(img, all_tokens_text_bboxes, border_color = DemoHelper.Constants.COLOR_GRAY)
img = DemoHelper.draw_bboxes_on_image(img, [my_within_bbox], border_color = DemoHelper.Constants.COLOR_BLUE)
img = DemoHelper.draw_bboxes_on_image(img, text_bboxes, border_color = DemoHelper.Constants.COLOR_RED)
img = DemoHelper.reduce_image_based_on_context(img, all_bboxes)

DemoHelper.show_image(img)

### 3.3 - Get `line` tokens for the given region and pages

In [None]:
result = ocr_parse_obj.get_tokens_from_ocr(
    token_type_value=TOKEN_TYPE_LINE,
    within_bbox=my_within_bbox,
    pages=[1]
)

#### Output

In [None]:
df = pd.json_normalize(result)
print('No. of records found =', len(df))
df[1:5]

#### Visualize

In [None]:
# Get all tokens i.e. without any bounding box restrictions
all_tokens = ocr_parse_obj.get_tokens_from_ocr(
    token_type_value=2,     
    pages=[1]
)
all_tokens_text_bboxes = [x['bbox'] for x in all_tokens]

text_bboxes = [x['bbox'] for x in result]
all_bboxes = text_bboxes + [my_within_bbox]
img = DemoHelper.read_image(IMAGE_PATHS[0])
img = DemoHelper.draw_bboxes_on_image(img, all_tokens_text_bboxes, border_color = DemoHelper.Constants.COLOR_GRAY)
img = DemoHelper.draw_bboxes_on_image(img, [my_within_bbox], border_color = DemoHelper.Constants.COLOR_BLUE)
img = DemoHelper.draw_bboxes_on_image(img, text_bboxes, border_color = DemoHelper.Constants.COLOR_RED)
img = DemoHelper.reduce_image_based_on_context(img, all_bboxes)

DemoHelper.show_image(img)