## Summary

This step function takes a receipt placed in S3, runs the receipt through Textract, extracts the date and amount, and categorizes the receipt by querying Bedrock. It also saves receipt data into two relational tables deployed with AWS RDS. 
The following functions are the corresponding Lambda functions for the state machine/step function defined in the json and image <br>
<br>
<br>
WIP Updated 2/17/2025

## Outside of step function

### GetObjectNameTriggerStepFunction

This is not part of the step function, but will trigger the step function to start on when triggered by invoking

**Sample Input**:
{
  "key": "Airfare1.jpg",
  "user_id": "123456"
}

**Sample Output**:
{
  "bucket": "receipts-for-step",
  "key": "Airfare1.jpg",
  "user_id": "123456"
}

In [None]:
import json
import urllib.parse
import boto3
import time
import os

print('Loading function')

stepfunction_client = boto3.client('stepfunctions')



def lambda_handler(event, context):

    step_function_arn = os.environ.get("STEP_FUNCTION_ARN")
    bucket = os.environ.get('BUCKET')
    key = event['key']
    user_id = event['user_id']
    

    try:
        print("in try loop")
        step_function_input = {
            "bucket": bucket,
            "key": key,
            "user_id": user_id
        }

        # Trigger the Step Function
        response = stepfunction_client.start_execution(
            stateMachineArn=step_function_arn,
            input=json.dumps(step_function_input)
        )
        print("returned from step function")
        print(response)

        # Get the executionArn
        execution_arn = response['executionArn']
        print(f"Execution ARN: {execution_arn}")

        # Poll for the Step Function execution result
        output = get_execution_output(execution_arn)
        print(output)
        return output
        
    except Exception as e:
        print(e)
        print(f"Error getting object {key} from bucket {bucket}. Make sure they exist and your bucket is in the same region as this function.")
        raise e

def get_execution_output(execution_arn):
    while True:
        # Describe the execution
        execution_response = stepfunction_client.describe_execution(executionArn=execution_arn)
        
        status = execution_response['status']
        if status == 'SUCCEEDED':
            # Return the output if execution succeeded
            return json.loads(execution_response['output'])
        elif status in ['FAILED', 'TIMED_OUT', 'ABORTED']:
            raise Exception(f"Step Function execution failed with status: {status}")

        # Wait before polling again
        time.sleep(2)

## Inside step function

### RetrieveObjectFromS3

Start of step function. Simply parses input to retrieve bucket and object name passed into step function

**Sample Input**:
{
  "bucket": "receipts-for-step",
  "key": "Airfare1.jpg",
  "user_id": "123456"
}

**Sample Output**:
{
  "Bucket": "receipts-for-step",
  "Name": "Airfare1.jpg",
  "user_id": "123456"
}

In [3]:
import json
import urllib.parse
import boto3

print('Loading function')

s3 = boto3.client('s3')


def lambda_handler(event, context):
    print("IN FUNCTION")

    print("Received event: " + json.dumps(event, indent=2))

    bucket = event['bucket']
    key = event['key']
    
    content = { 
                'Bucket': bucket,
                'Name': key,
    }
    
    return content
    

Loading function


### AnalyzeExpense

Analyzes the receipt place in S3.

**Sample Input**: { "Bucket": "receipts-for-step", "Name": "Airfare1.jpg", "user_id": "123456" }

**Sample Output**:<br> {
{
  "Bucket": "receipts-for-step",
  "Name": "Airfare1.jpg",
  "user_id": "123456",
  "textract_response": {
    "DocumentMetadata": {
      "Pages": 1
    },
    "ExpenseDocuments": [
      {
        "Blocks": [
          {
            "BlockType": "PAGE",
            "Geometry": {
              "BoundingBox": {
                "Height": 0.91024506,
                "Left": 0.03953533,
                "Top": 0.03506707,
                "Width": 0.93437904
              },
              "Polygon": [
                {
                  "X": 0.03953533,
                  "Y": 0.069427595
                },
                {
                  "X": 0.95922524,
                  "Y": 0.03506707
                },
                {
                  "X": 0.9739144,
                  "Y": 0.94531214
                },
                {
                  "X": 0.040106706,
                  "Y": 0.9411977
                }
              ]
            },
            "Id": "18718433-2648-45f1-8810-e86aa1cf6a74",

### CondenseTextractOutput

This function condenses the Textract output by removing confidence and location info

**Sample Input**:
{
  "Bucket": "receipts-for-step",
  "Name": "Airfare1.jpg",
  "user_id": "123456",
  "textract_response": {
    "DocumentMetadata": {
      "Pages": 1
    },
    "ExpenseDocuments": [
      {
        "Blocks": [
          {
            "BlockType": "PAGE",
            "Geometry": {
              "BoundingBox": {
                "Height": 0.91024506,
                "Left": 0.03953533,
                "Top": 0.03506707,
                "Width": 0.93437904
              },
              "Polygon": [
                {
                  "X": 0.03953533,
                  "Y": 0.069427595
                },
                {
                  "X": 0.95922524,
                  "Y": 0.03506707
                },
                {
                  "X": 0.9739144,
                  "Y": 0.94531214
                },
                {
                  "X": 0.040106706,
                  "Y": 0.9411977
                }
              ]
            },
            "Id": "18718433-2648-45f1-8810-e86aa1cf6a74",

**Sample Output**:
{
  "key": "Airfare1.jpg",
  "user_id": "123456",
  "condensed_extract": {
    "NAME": "DELTA",
    "INVOICE_RECEIPT_DATE": "22Sep24",
    "INVOICE_RECEIPT_ID": "0062265322160",
    "TOTAL": "USD45.00 45.00\nUSD",
    "VENDOR_NAME": "DELTA"
  }
}

In [None]:
import json

def lambda_handler(event, context):
    print("received event: ", event)

    condensed_extract = {}
    name = event['Name']
    user_id = event['user_id']
    textract = event['textract_response']
 
    for i in range(len(textract['ExpenseDocuments'][0]['SummaryFields'])):
        key = textract['ExpenseDocuments'][0]['SummaryFields'][i]['Type']['Text']
        value = textract['ExpenseDocuments'][0]['SummaryFields'][i]['ValueDetection']['Text']
        
        if key not in condensed_extract.keys():
                condensed_extract[key] = value

        else:
            temp = " " + value
            condensed_extract[key] += temp
        
        if len(textract['ExpenseDocuments'][0]['LineItemGroups'][0]['LineItems'])> 0:
            condensed_extract['items'] = {}
            for j in range(len(textract['ExpenseDocuments'][0]['LineItemGroups'][0]['LineItems'][0]['LineItemExpenseFields'])):
                value = textract['ExpenseDocuments'][0]['LineItemGroups'][0]['LineItems'][0]['LineItemExpenseFields'][j]['ValueDetection']['Text']
                condensed_extract['items']['item'+str(j)] = value
    
    if len(condensed_extract) == 0:
        return {
            'key':name,
            'user_id':user_id,
            'empty':'empty'
        }

    output = {
        'key':name,
        'user_id':user_id,
        'condensed_extract': condensed_extract
    }
    return output



### Choice

If Textract returns nothing, the stepfunction will not proceed further and return a json with default values from the function.

### GetAmountAndDate

This function parses the condensed Textract output to retrieve the receipt date and amount.

**Sample Input**: {
  "key": "Airfare1.jpg",
  "user_id": "123456",
  "condensed_extract": {
    "NAME": "DELTA",
    "INVOICE_RECEIPT_DATE": "22Sep24",
    "INVOICE_RECEIPT_ID": "0062265322160",
    "TOTAL": "USD45.00 45.00\nUSD",
    "VENDOR_NAME": "DELTA"
  }
}

**Sample Output**: 
{
  "key": "Airfare1.jpg",
  "user_id": "123456",
  "predicted_date": {
    "full_date": "09/22/2024",
    "month": "09",
    "year": "2024",
    "day": "22"
  },
  "predicted_amount": "45.00",
  "condensed_extract": {
    "NAME": "DELTA",
    "INVOICE_RECEIPT_DATE": "22Sep24",
    "INVOICE_RECEIPT_ID": "0062265322160",
    "TOTAL": "USD45.00 45.00\nUSD",
    "VENDOR_NAME": "DELTA"
  }
}

In [None]:
import json
from datetime import datetime
import re

def lambda_handler(event, context):
    json_file = event['condensed_extract']
    key = event['key']
    user_id = event['user_id']

    if len(json_file) == 0:

        return {
            "empty" : "empty"
        }   


    if 'INVOICE_RECEIPT_DATE' in json_file.keys():
        date = extract_date_from_invoice_date_string(json_file['INVOICE_RECEIPT_DATE'].replace(',',' ').replace('.', ' ').strip())
        reformatted_date = reformat_date(date)
            
    else: 
        # If field not found, attempt to extract from full text
        date = extract_date_from_full_string(json.dumps(json_file))
 
        if date != "":
            reformatted_date = reformat_date(date)
        else:
            temp_date = datetime(1899, 1, 1)
            
            reformatted_date= { 
                "full_date" : temp_date.strftime('%m/%d/%Y'),
                "month" : temp_date.strftime("%m"),
                "year" : temp_date.strftime("%Y"),
                "day" : temp_date.strftime("%d")
            }
        
    if 'TOTAL' in json_file.keys() and extract_amt_from_string(json_file['TOTAL']) != 0.00:
        extracted_total = extract_amt_from_string(json_file['TOTAL'])
            
    elif 'AMOUNT_PAID' in json_file.keys():
        extracted_total = extract_amt_from_string(json_file['AMOUNT_PAID'])
        
    elif "SUBTOTAL" in json_file.keys():
        subtotal = extract_amt_from_string(json_file['SUBTOTAL'])
            
        try:
            tax = extract_amt_from_string(json_file['TAX'])
        except KeyError:
            tax = 0
        extracted_total = subtotal + tax
            
    else:
        extracted_total = 0
    
    

    content = {
        "key": key,
        "user_id": user_id,
        "predicted_date" : reformatted_date,
        "predicted_amount" : extracted_total,
        "condensed_extract" : json_file
    }

    return content


# Function to extract an amount from a string input from Textract

def extract_amt_from_string(s):
    regex = r'\d+\.\d{2}?'
    amounts = re.findall(regex, s)
    if len(amounts) >0:
        amounts = [float(j) for j in amounts]
        amount = max(amounts)
    else:
        amount = 0
    formatted_amount = f"{amount:.2f}"

    return formatted_amount


def extract_date_from_invoice_date_string(s):
    # List of prioritized regex patterns
    regex_patterns = [
        r'\b\d{1,2}[A-Za-z]{3}\d{2}\b',             # Specific format: 22Sep24
        r'\b\d{1,2}[- ][A-Za-z]{3}[- ]\d{4}\b',     # dd-MMM-yyyy, e.g., 14-Dec-2024
        r'\b[A-Za-z]+\s+\d{1,2}\s+\d{4}\b',         # Full month name with day and year, e.g., September 4  2024
        r'\b[A-Za-z]{3}\s+\d{1,2},?\s+\d{4}\b',     # Abbreviated month name with day and year, e.g., Sep 4, 2024
        r'\b[A-Za-z]{3}\s+\d{1,2}\b',               # Abbreviated month name with day, e.g., Sep 4
        r'\b\d{4}-\d{1,2}-\d{1,2}\b',               # yyyy-mm-dd
        r'\b\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b',       # mm/dd/yy, mm/dd/yyyy, mm-dd-yy, mm-dd-yyyy
        r'\b\d{1,2}-\d{1,2}\b',                     # mm-dd
        r'\b\d{1,2}/\d{1,2}\b',                     # mm/dd
    ]
    
    # Try each regex pattern in order
    for pattern in regex_patterns:
        matches = re.findall(pattern, s)
        if matches:
            return matches[-1].strip()
    
    # Return empty string if no matches are found
    return ""

def extract_date_from_full_string(s):

    # mm/dd/yy, mm/dd/yyyy, mm-dd-yy, mm-dd-yyyy
    regex = r'\b\d{1,2}[-/]\d{1,2}[-/]\d{2}\d{2}?\b'

    matches = re.findall(regex, s)

    if matches:
  
        # Return the last match found for the current pattern
        return matches[-1].strip()
    
    # Return Other if no matches are found
    return ""

# We'll use this to convert whatever date Textract retrieved into a datetime object format m/d/yyyy.

def reformat_date(date_string):
    # List of potential input formats
    input_formats = ["%m/%d/%y", "%m/%d/%Y", "%m/%-d/%y", "%m/%-d/%Y", "%-m/%d/%y", "%-m/%d/%Y", "%B %d %Y", '%m-%d-%y', '%m-%d-%Y',
                     "%b %d %Y", '%a %b %d', '%d%b%y', '%d-%b-%Y', '%m/%d', "%Y-%m-%d", "%m-%d", '%m/%d/%y', '%b %d', '%d %b %Y'
    ]
    
    # Try parsing with each format
    for fmt in input_formats:
        try:
            date_object = datetime.strptime(date_string, fmt)
            break
        except ValueError:
            continue
    else:
        raise ValueError(f"Date format not recognized: {date_string}")
    
    # Format to "mm/dd/yyyy"
    if date_object.year == 1900:
        date_object = date_object.replace(year = 2024)

    day = date_object.strftime("%d")
    year = date_object.strftime("%Y")
    month = date_object.strftime("%m")
    date_object = date_object.strftime("%m/%d/%Y")

    json_date = {
        "full_date" : date_object,
        "month" : month,
        "year" : year,
        "day" : day
    }

    return json_date



### Attach Prompt

This function attaches a prompt to the condensed Textract output to prepare for querying the Bedrock model.

**Sample Input**: 
{
  "key": "Airfare1.jpg",
  "user_id": "123456",
  "predicted_date": {
    "full_date": "09/22/2024",
    "month": "09",
    "year": "2024",
    "day": "22"
  },
  "predicted_amount": "45.00",
  "condensed_extract": {
    "NAME": "DELTA",
    "INVOICE_RECEIPT_DATE": "22Sep24",
    "INVOICE_RECEIPT_ID": "0062265322160",
    "TOTAL": "USD45.00 45.00\nUSD",
    "VENDOR_NAME": "DELTA"
  }
}

**Sample Output**:<br>
{
  "key": "Airfare1.jpg",
  "user_id": "123456",
  "predicted_date": {
    "full_date": "09/22/2024",
    "month": "09",
    "year": "2024",
    "day": "22"
  },
  "predicted_amount": "45.00",
  "condensed_extract": {
    "NAME": "DELTA",
    "INVOICE_RECEIPT_DATE": "22Sep24",
    "INVOICE_RECEIPT_ID": "0062265322160",
    "TOTAL": "USD45.00 45.00\nUSD",
    "VENDOR_NAME": "DELTA"
  },
  "prompt": "<>[INST] <<SYS>>\n    You are an expert in receipt categorization. Categorize the following receipt into one of these categories: Meals, Supplies, Safety, Travel, Lodging, or Other. \n\n    These are the definitions of each category with examples:\n    Meals: Expenses for food and drinks (e.g., restaurant bills, coffee shop receipts).\n    Supplies: Purchases for office or work-related materials (e.g., stationery, printer ink, electronics).\n    Safety: Expenses related to safety equipment or services (e.g., gloves, helmets, fire extinguishers).\n    Travel: Expenses for transportation (e.g., airfare, train tickets, taxi fares, gas, car rentals).\n    Lodging: Accommodation expenses (e.g., hotel bills, Airbnb receipts).\n    Other: Any expense that does not fit the above categories.\n    \n    Do not include explanations, steps, or any additional text.\n    If you do not know, pick a category at random.\n    Respond strictly in the format: Category:<category>\n\n    <</SYS>>\n\n    Receipt:\n\n    NAME:DELTA\nINVOICE_RECEIPT_DATE:22Sep24\nINVOICE_RECEIPT_ID:0062265322160\nTOTAL:USD45.00 45.00\nUSD\nVENDOR_NAME:DELTA\nWhat category does this receipt belong to? [/INST]</s>"
}

In [2]:
import json

def lambda_handler(event, context):
    receipt_extract = event["condensed_extract"]

    prompt = '''<s>[INST] <<SYS>>
    You are an expert in receipt categorization. Categorize the following receipt into one of these categories: Meals, Supplies, Safety, Travel, Lodging, or Other. 

    These are the definitions of each category with examples:
    Meals: Expenses for food and drinks (e.g., restaurant bills, coffee shop receipts).
    Supplies: Purchases for office or work-related materials (e.g., stationery, printer ink, electronics).
    Safety: Expenses related to safety equipment or services (e.g., gloves, helmets, fire extinguishers).
    Travel: Expenses for transportation (e.g., airfare, train tickets, taxi fares, gas, car rentals).
    Lodging: Accommodation expenses (e.g., hotel bills, Airbnb receipts).
    Other: Any expense that does not fit the above categories.
    
    Do not include explanations, steps, or any additional text.
    If you do not know, pick a category at random.
    Respond strictly in the format: Category:<category>

    <</SYS>>

    Receipt:

    '''
    for key in receipt_extract.keys():
        if key == 'items':
            prompt+=key +":\n"
            for k in receipt_extract['items'].keys():
                prompt+= k + ":" + receipt_extract['items'][k].replace('\n',' ') +'\n'
        else:
            prompt += key +":"+receipt_extract[key]+"\n"

    prompt+= '''What category does this receipt belong to? [/INST]</s>'''
    
  
    event['prompt'] = prompt

    return event


### BedrockInvoke

Queries the defined Bedrock model to categorize the receipt.

**Sample Input**:
{
  "key": "Airfare1.jpg",
  "user_id": "123456",
  "predicted_date": {
    "full_date": "09/22/2024",
    "month": "09",
    "year": "2024",
    "day": "22"
  },
  "predicted_amount": "45.00",
  "condensed_extract": {
    "NAME": "DELTA",
    "INVOICE_RECEIPT_DATE": "22Sep24",
    "INVOICE_RECEIPT_ID": "0062265322160",
    "TOTAL": "USD45.00 45.00\nUSD",
    "VENDOR_NAME": "DELTA"
  },
  "prompt": "<>[INST] <<SYS>>\n    You are an expert in receipt categorization. Categorize the following receipt into one of these categories: Meals, Supplies, Safety, Travel, Lodging, or Other. \n\n    These are the definitions of each category with examples:\n    Meals: Expenses for food and drinks (e.g., restaurant bills, coffee shop receipts).\n    Supplies: Purchases for office or work-related materials (e.g., stationery, printer ink, electronics).\n    Safety: Expenses related to safety equipment or services (e.g., gloves, helmets, fire extinguishers).\n    Travel: Expenses for transportation (e.g., airfare, train tickets, taxi fares, gas, car rentals).\n    Lodging: Accommodation expenses (e.g., hotel bills, Airbnb receipts).\n    Other: Any expense that does not fit the above categories.\n    \n    Do not include explanations, steps, or any additional text.\n    If you do not know, pick a category at random.\n    Respond strictly in the format: Category:<category>\n\n    <</SYS>>\n\n    Receipt:\n\n    NAME:DELTA\nINVOICE_RECEIPT_DATE:22Sep24\nINVOICE_RECEIPT_ID:0062265322160\nTOTAL:USD45.00 45.00\nUSD\nVENDOR_NAME:DELTA\nWhat category does this receipt belong to? [/INST]</s>"
}

**Sample Output**:
{
  "key": "Airfare1.jpg",
  "user_id": "123456",
  "predicted_date": {
    "full_date": "09/22/2024",
    "month": "09",
    "year": "2024",
    "day": "22"
  },
  "predicted_amount": "45.00",
  "condensed_extract": {
    "NAME": "DELTA",
    "INVOICE_RECEIPT_DATE": "22Sep24",
    "INVOICE_RECEIPT_ID": "0062265322160",
    "TOTAL": "USD45.00 45.00\nUSD",
    "VENDOR_NAME": "DELTA"
  },
  "prompt": "<>[INST] <<SYS>>\n    You are an expert in receipt categorization. Categorize the following receipt into one of these categories: Meals, Supplies, Safety, Travel, Lodging, or Other. \n\n    These are the definitions of each category with examples:\n    Meals: Expenses for food and drinks (e.g., restaurant bills, coffee shop receipts).\n    Supplies: Purchases for office or work-related materials (e.g., stationery, printer ink, electronics).\n    Safety: Expenses related to safety equipment or services (e.g., gloves, helmets, fire extinguishers).\n    Travel: Expenses for transportation (e.g., airfare, train tickets, taxi fares, gas, car rentals).\n    Lodging: Accommodation expenses (e.g., hotel bills, Airbnb receipts).\n    Other: Any expense that does not fit the above categories.\n    \n    Do not include explanations, steps, or any additional text.\n    If you do not know, pick a category at random.\n    Respond strictly in the format: Category:<category>\n\n    <</SYS>>\n\n    Receipt:\n\n    NAME:DELTA\nINVOICE_RECEIPT_DATE:22Sep24\nINVOICE_RECEIPT_ID:0062265322160\nTOTAL:USD45.00 45.00\nUSD\nVENDOR_NAME:DELTA\nWhat category does this receipt belong to? [/INST]</s>",
  "bedrock_response": {
    "Body": {
      "generation": " \n\nCategory: Travel",
      "prompt_token_count": 283,
      "generation_token_count": 5,
      "stop_reason": "stop"
    },
    "ContentType": "application/json"
  }
}

### ParseBedrockOutput

Condenses Bedrock output to the final output.

**Sample Input**:
{
  "predicted_date": {
    "full_date": "09/22/2024",
    "month": "09",
    "year": "2024",
    "day": "22"
  },
  "predicted_amount": "45.00",
  "condensed_extract": {
    "NAME": "DELTA",
    "INVOICE_RECEIPT_DATE": "22Sep24",
    "INVOICE_RECEIPT_ID": "0062265322160",
    "TOTAL": "USD45.00 45.00\nUSD",
    "VENDOR_NAME": "DELTA"
  },
  "prompt": "<s>[INST] <<SYS>>\n    You are an expert in receipt categorization. Categorize the following receipt into one of these categories: Meals, Supplies, Safety, Travel, Lodging, or Other. \n\n    These are the definitions of each category with examples:\n    Meals: Expenses for food and drinks (e.g., restaurant bills, coffee shop receipts).\n    Supplies: Purchases for office or work-related materials (e.g., stationery, printer ink, electronics).\n    Safety: Expenses related to safety equipment or services (e.g., gloves, helmets, fire extinguishers).\n    Travel: Expenses for transportation (e.g., airfare, train tickets, taxi fares, gas, car rentals).\n    Lodging: Accommodation expenses (e.g., hotel bills, Airbnb receipts).\n    Other: Any expense that does not fit the above categories.\n    \n    Do not include explanations, steps, or any additional text.\n    If you do not know, pick a category at random.\n    Respond strictly in the format: Category:<category>\n\n    <</SYS>>\n\n    Receipt:\n\n    NAME:DELTA\nINVOICE_RECEIPT_DATE:22Sep24\nINVOICE_RECEIPT_ID:0062265322160\nTOTAL:USD45.00 45.00\nUSD\nVENDOR_NAME:DELTA\nWhat category does this receipt belong to? [/INST]</s>",
  "bedrock_response": {
    "Body": {
      "generation": " \n\nCategory:Travel",
      "prompt_token_count": 283,
      "generation_token_count": 5,
      "stop_reason": "stop"
    },
    "ContentType": "application/json"
  }
}

**Sample Output**:
{
  "key": "Airfare1.jpg",
  "user_id": "123456",
  "predicted_category": "Travel",
  "predicted_date": {
    "full_date": "09/22/2024",
    "month": "09",
    "year": "2024",
    "day": "22"
  },
  "predicted_amount": "45.00"
}

In [None]:
import json
import re

def lambda_handler(event, context):
    key = event['key']
    user_id = event['user_id']

    ## llame model
    generation = event['bedrock_response']['Body']['generation']

    predicted_category = parse_llama_response(generation)

    predicted_date = event['predicted_date']
    predicted_amount = event['predicted_amount']

    content = {
        "key": key,
        "user_id": user_id,
        "predicted_category" : predicted_category,
        "predicted_date" : predicted_date,
        "predicted_amount" : predicted_amount
    }

    return content




def parse_llama_response(parsed_body):
    regex = r'(Meals|Supplies|Safety|Travel|Lodging|Other)'
    match = re.search(regex, parsed_body)
    print(match)
    if match is None:
        return "Meals"
    return match.group()

### Parallel Action

The receipt item will be added into two RDS tables; one to hold predicted values (receipt_pred) and one to hold actual values (receipt_data). If a user changes values, the receipt_data table will be updated - Not yet implemented on the app.

### PutToRDS

This function adds receipt information to the receipt_pred table

In [None]:
import json
import psycopg2
import os
import datetime

def lambda_handler(event, context):
    
    db_host = os.getenv("DB_HOST")
    db_port = os.getenv("DB_PORT", 5432)
    db_user = os.getenv("DB_USER")
    db_password = os.getenv("DB_PASSWORD")
    db_name = os.getenv("DB_NAME")

    receipt_id = event['key']
    user_id = event['user_id']
    pred_amount = event["predicted_amount"]
    pred_date_str = event["predicted_date"]['full_date']
    pred_category = event['predicted_category']
    
    pred_date = datetime.datetime.strptime(pred_date_str, "%m/%d/%Y").date() if pred_date_str else None


    insert_query = """
    INSERT INTO receipt_pred (receipt_id, user_id, pred_amount, pred_date, pred_category)
    VALUES (%s, %s, %s, %s, %s)
    """
    
    connection = None
    try:
        # Connect to PostgreSQL database
        connection = psycopg2.connect(
            host=db_host,
            port=db_port,
            user=db_user,
            password=db_password,
            database = db_name
        )
        
        cursor = connection.cursor()

        # Execute the INSERT query
        cursor.execute(insert_query, (receipt_id, user_id, pred_amount, pred_date, pred_category))

        connection.commit()

        print(f"Successfully inserted receipt {receipt_id} for user {user_id} into receipt_pred")

    except Exception as e:
        print(f"Error: {e}")

    finally:
        if connection:
            connection.close()

    return event


### PutToRDS-receipt_data

This function adds the item to the receipt_data table

In [None]:
import json
import psycopg2
import os
import datetime

def lambda_handler(event, context):
    
    db_host = os.getenv("DB_HOST")
    db_port = os.getenv("DB_PORT", 5432)
    db_user = os.getenv("DB_USER")
    db_password = os.getenv("DB_PASSWORD")
    db_name = os.getenv("DB_NAME")

    receipt_id = event['key']
    user_id = event['user_id']
    pred_amount = event["predicted_amount"]
    pred_date_str = event["predicted_date"]['full_date']
    pred_category = event['predicted_category']
    
    pred_date = datetime.datetime.strptime(pred_date_str, "%m/%d/%Y").date() if pred_date_str else None


    insert_query = """
    INSERT INTO receipt_pred (receipt_id, user_id, pred_amount, pred_date, pred_category)
    VALUES (%s, %s, %s, %s, %s)
    """
    
    connection = None
    try:
        # Connect to PostgreSQL database
        connection = psycopg2.connect(
            host=db_host,
            port=db_port,
            user=db_user,
            password=db_password,
            database = db_name
        )
        
        cursor = connection.cursor()

        # Execute the INSERT query
        cursor.execute(insert_query, (receipt_id, user_id, pred_amount, pred_date, pred_category))

        connection.commit()

        print(f"Successfully inserted receipt {receipt_id} for user {user_id} into receipt_pred")

    except Exception as e:
        print(f"Error: {e}")

    finally:
        if connection:
            connection.close()

    return event


### Step Function Return

The step function will return a json with the receipt name, user id, and predicted values

**Sample Output:**
{
  "key": "Airfare1.jpg",
  "user_id": "123456",
  "predicted_category": "Travel",
  "predicted_date": {
    "full_date": "09/22/2024",
    "month": "09",
    "year": "2024",
    "day": "22"
  },
  "predicted_amount": "45.00"
}