/
client.py
119 lines (89 loc) · 3.33 KB
/
client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Purpose
Test code for running the Amazon Textract Lambda
function example code.
"""
import argparse
import logging
import base64
import json
import io
import boto3
from botocore.exceptions import ClientError
from PIL import Image, ImageDraw
logger = logging.getLogger(__name__)
def analyze_image(function_name, image):
"""Analyzes a document with an AWS Lambda function.
:param image: The document that you want to analyze.
:return The list of Block objects in JSON format.
"""
lambda_client = boto3.client('lambda')
lambda_payload = {}
if image.startswith('s3://'):
logger.info("Analyzing document from S3 bucket: %s", image)
bucket, key = image.replace("s3://", "").split("/", 1)
s3_object = {
'Bucket': bucket,
'Name': key
}
lambda_payload = {"S3Object": s3_object}
else:
with open(image, 'rb') as image_file:
logger.info("Analyzing local document: %s ", image)
image_bytes = image_file.read()
data = base64.b64encode(image_bytes).decode("utf8")
lambda_payload = {"image": data}
# Call the lambda function with the document.
response = lambda_client.invoke(FunctionName=function_name,
Payload=json.dumps(lambda_payload))
return json.loads(response['Payload'].read().decode())
def add_arguments(parser):
"""
Adds command line arguments to the parser.
:param parser: The command line parser.
"""
parser.add_argument(
"function", help="The name of the AWS Lambda function that you want " \
"to use to analyze the document.")
parser.add_argument(
"image", help="The document that you want to analyze.")
def main():
"""
Entrypoint for script.
"""
try:
logging.basicConfig(level=logging.INFO,
format="%(levelname)s: %(message)s")
# Get command line arguments.
parser = argparse.ArgumentParser(usage=argparse.SUPPRESS)
add_arguments(parser)
args = parser.parse_args()
# Get analysis results.
result = analyze_image(args.function, args.image)
# print(result)
status = result['statusCode']
blocks = result['body']
blocks = json.loads(blocks)
if status == 200:
for block in blocks:
print('Type: ' + block['BlockType'])
if block['BlockType'] != 'PAGE':
print('Detected: ' + block['Text'])
print('Confidence: ' + "{:.2f}".format(block['Confidence']) + "%")
print('Id: {}'.format(block['Id']))
if 'Relationships' in block:
print('Relationships: {}'.format(block['Relationships']))
print('Bounding Box: {}'.format(block['Geometry']['BoundingBox']))
print('Polygon: {}'.format(block['Geometry']['Polygon']))
print()
print("Blocks detected: " + str(len(blocks)))
else:
print(f"Error: {result['statusCode']}")
print(f"Message: {result['body']}")
except ClientError as error:
logging.error(error)
print(error)
if __name__ == "__main__":
main()