In [1]:
import logging
import boto3
from botocore.exceptions import ClientError
from botocore.exceptions import NoCredentialsError
import os


In [55]:


def upload_file_to_s3(file_name, bucket_name, object_name=None):
    """
    Uploads a file to an S3 bucket.
    
    :param file_name: Path to the file to upload
    :param bucket_name: Name of the S3 bucket
    :param object_name: S3 object name. If not specified, file_name is used
    :return: a string of the response
    """
    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = file_name

    # Initialize the S3 client
    s3 = boto3.client('s3')

    
    try:
        with open(file_name, "rb") as file_data: # Uploading the FILE CONTENTS not the filepath
            response = s3.put_object(
                Body=file_data,
                Bucket=bucket_name,
                Key=object_name,                # This is the what the file will be called in S3
            )
        s = response
        print(response)
        return s
    except FileNotFoundError:
        print(f"The file {file_name} was not found.")
    except NoCredentialsError:
        print("AWS credentials not available.")



In [56]:
# Use your own bucket and file name
bucket_name = "test-bucket-cnevares-2024"
file_name = "data/Supplies1.jpg"
object_name = file_name.split('/')[1]

s = upload_file_to_s3(file_name, bucket_name, object_name=object_name)

{'ResponseMetadata': {'RequestId': 'M8B731H3N9WJV9N5', 'HostId': '/3wpVWxjwlxMIFCb3oYEE/lTPkbo2CeodY+rlD9bTiG9/YHoISw4NvAQDZygKwQ5nL3KQnUqiC1+Q7S3fs1MSKzQVRc5J01e', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': '/3wpVWxjwlxMIFCb3oYEE/lTPkbo2CeodY+rlD9bTiG9/YHoISw4NvAQDZygKwQ5nL3KQnUqiC1+Q7S3fs1MSKzQVRc5J01e', 'x-amz-request-id': 'M8B731H3N9WJV9N5', 'date': 'Tue, 24 Dec 2024 21:00:24 GMT', 'x-amz-server-side-encryption': 'AES256', 'etag': '"276a1a0737b038e64d9324f56ae0fdcc"', 'content-length': '0', 'server': 'AmazonS3'}, 'RetryAttempts': 0}, 'ETag': '"276a1a0737b038e64d9324f56ae0fdcc"', 'ServerSideEncryption': 'AES256'}


In [62]:
def analyze_receipt(bucket_name, object_name):
    """
    :param bucket_name: Name of the S3 bucket
    :param object_name: S3 object name
    :return: string of the response
    """
    
    client = boto3.client('textract')

    try:
        response = client.analyze_expense(
            
            Document = {
                "S3Object": {
                    "Bucket": bucket_name,
                    "Name": object_name
                }
            }
        )
        s = response
        print("Success!")
        return s
        
    except FileNotFoundError:
        print(f"The file {file_name} was not found.")
    except NoCredentialsError:
        print("AWS credentials not available.")

In [63]:
s = analyze_receipt(bucket_name, object_name)

Success!


In [98]:
# Retrieving the FIRST itemization
print(s['ExpenseDocuments'][0]['LineItemGroups'][0]['LineItems'][0]['LineItemExpenseFields'][0]['Type']['Text'])
print(s['ExpenseDocuments'][0]['LineItemGroups'][0]['LineItems'][0]['LineItemExpenseFields'][0]['ValueDetection']['Text'])

ITEM
1 of: Elgato Wave: 3 Premium Studio
Free Mixer Quality USB Condenser Microphone for Streaming, Podcast, Gaming and Home Office,
Sold by: Amazon.com Software, Sound Effect Plugins, Anti-Distortion, Plug 'n Play, for Mac, PC
Services, Inc
Supplied by: Other


In [99]:
## The Fields detected and their values
for i in range(len(s['ExpenseDocuments'][0]['SummaryFields'])):
    print(s['ExpenseDocuments'][0]['SummaryFields'][i]['Type']['Text'] +": " + s['ExpenseDocuments'][0]['SummaryFields'][i]['ValueDetection']['Text'] )
    print()
print()

ADDRESS: Christopher S, Nevares
17035 W VALLEY HWY
SEATTLE, WA 98188-5519
United States

STREET: 17035 W VALLEY HWY

CITY: SEATTLE,

STATE: WA

ZIP_CODE: 98188-5519

COUNTRY: United States

NAME: Christopher S, Nevares

ADDRESS_BLOCK: 17035 W VALLEY HWY
SEATTLE, WA 98188-5519
United States

ADDRESS: Christopher S Nevares
10812 9TH ST E
EDGEWOOD, WA 98372-6607
United States

STREET: 10812 9TH ST E

CITY: EDGEWOOD,

STATE: WA

ZIP_CODE: 98372-6607

COUNTRY: United States

NAME: Christopher S Nevares

ADDRESS_BLOCK: 10812 9TH ST E
EDGEWOOD, WA 98372-6607
United States

NAME: Amazon.com Inc.

INVOICE_RECEIPT_DATE: 8/13/24.

INVOICE_RECEIPT_ID: 112-9546U10-U268240

INVOICE_RECEIPT_ID: #112-9546010-0268240

INVOICE_RECEIPT_ID: 112-9546010-0268240

ORDER_DATE: August 13, 2024

RECEIVER_ADDRESS: Christopher S, Nevares
17035 W VALLEY HWY
SEATTLE, WA 98188-5519
United States

RECEIVER_ADDRESS: Christopher S Nevares
10812 9TH ST E
EDGEWOOD, WA 98372-6607
United States

RECEIVER_NAME: Christopher 

In [106]:
# Analyze a local file

def analyze_receipt_local(file_name): 
    client = boto3.client('textract')
    

    try:
        with open(file_name, "rb") as document:
            response = client.analyze_expense(
                Document={
                    'Bytes': document.read()
                }
        )
        s = response
        print("Success!")
        return s
        
    except FileNotFoundError:
        print(f"The file {file_name} was not found.")
    except NoCredentialsError:
        print("AWS credentials not available.")

In [None]:
file_name = 'data/Supplies1.jpg'
analyze_receipt_local("data/Supplies1.jpg")

In [108]:
pip freeze > requirements.txt

Note: you may need to restart the kernel to use updated packages.
