In [None]:
# ===============================================================================================================#
# Copyright 2023 Infosys Ltd.                                                                                    #
# Use of this source code is governed by Apache License Version 2.0 that can be found in the LICENSE file or at  #
# http://www.apache.org/licenses/                                                                                #
# ===============================================================================================================#

# Use case 05 - Extract Chunks From Image

<hr/>

## Sample Task: Document preprocessing(segmentation, chunking) using sample Annual Report

<hr/>

<img src="./data/infy_annual/infy_annual1.jpg" style="border-style: dotted;" width=1000 height=700 />

## Installation of Reuired Libraries
 #### `uncomment` below lines and run the cell

In [None]:
# pip install ./lib/infy_dpp_sdk-0.0.5-py3-none-any.whl
# pip install ./lib/infy_dpp_core-0.0.1-py3-none-any.whl
# pip install ./lib/infy_dpp_segmentation-0.0.1-py3-none-any.whl

## 1. Importing Necessary Libraries

In [1]:
import os
import json
import shutil
import pandas as pd
import infy_dpp_core
import infy_dpp_segmentation
import infy_dpp_sdk
from IPython.display import display, HTML

No module named 'torch'


## 2a. Loading Your Input and Pipeline Config

In [6]:
DATA_FOLDER_PATH = os.path.abspath('./data')
PROCESSOR_INPUT_CONFIG_PATH = f'{DATA_FOLDER_PATH}/config/dpp_pipeline1_input_config.json'

In [7]:
# copy input file
input_path = f'{DATA_FOLDER_PATH}/input'
if not os.path.isdir(input_path):
    os.mkdir(input_path)
shutil.copy(f'{DATA_FOLDER_PATH}/annual_report/annual_report1.jpg',f'{input_path}/annual_report1.jpg')
print('File copied to input folder')

File copied to input folder


## 2b. Configuration

In [8]:
CLIENT_CONFIG_DATA_DICT = {
    "storage_data": {
        "storage_uri": f"file://{DATA_FOLDER_PATH}",
        "storage_server_url": None,
        "storage_access_key": None,
        "storage_secret_key": None,
        "logging_level":40
    },
    "container_data": {
        "container_root_path": f"{DATA_FOLDER_PATH}",
    }
}

##### *Storage Data is for object store configuration 
##### *Container data is for container local path

## 3. Initializing Config Data

In [9]:
infy_dpp_core.ConfigurationManager().load(
    infy_dpp_core.ClientConfigData(**CLIENT_CONFIG_DATA_DICT))
infy_dpp_segmentation.ConfigurationManager().load(
    infy_dpp_segmentation.ClientConfigData(**CLIENT_CONFIG_DATA_DICT))
config_data_json = infy_dpp_core.common.FileUtil.load_json(
    PROCESSOR_INPUT_CONFIG_PATH)

## 4. Execute Pipeline(Batch Mode)

In [None]:
orchestrator_native_obj: infy_dpp_sdk.interface.i_orchestrator_native.IOrchestratorNative = None
orchestrator_native_obj = infy_dpp_sdk.orchestrator.controller.OrchestratorNativeBasic(config_data_json)
response_data_list = orchestrator_native_obj.run_batch()

## 5. Output File Path

In [11]:
response_data_list[0].context_data.get('request_closer')

{'output_file_path': '/output/D-0e803f24-f5bf-4603-a76d-2b6f3e9011f7'}

## 6. Review Output

In [12]:
response_data_json = json.loads(infy_dpp_sdk.common.InfyJSONEncoder().encode(response_data_list[0]))

### 6a. Page Wise Data 

In [13]:
print(response_data_json['context_data']['chunk_data_parser']['page_segment_data']['1'])

Business highlights
Performance overview
28
₹1,46,767cr (par value of ₹ 5 each)
20.7% growth Y-o-Y
25.6% CC growth Y-o-Y
Digital revenues
(as a % of total revenue)
15.4% CC growth Y-o-Y
Revenues
62.2%
Infosys
0
H2)
9.7% growth Y-o-Y
Basic earnings per share
57.63
Robust operating margin
Operating margin
21.0%
Consolidated cash and
investments(2)
liquidity position
Continue to main strong
₹ 31,286cr
Dividend per share (in ₹)
^34.0
9.7% growth Y-o-Y
10
O
leading revenue growth of
value for all our stakeholders.
2023. Our ESG Vision 2030 and
margin of 21.0% for fiscal
ambitions continue to drive
Infosys achieved industry-
15.4% with healthy operating
Free cash(1)
Number of US$ 50 million + clients
₹ 20,443cr
Strong client metrics with increase
of 11 clients Y-o-Y
75
FCF conversion at 84.8% of net profit
Buyback completed
₹ 9,300cr
at an average price of ₹ 1,539.06
$9.8b
Sustained momentum in large
Return on equity
the last fiscal
Large deal TCV
deal wins continues
31.2%
Improved by 2.1% o