In [159]:
def process_document(project_id: str, location: str, file_path: str, processor_id: str, mime_type: str):
    from google.cloud import documentai_v1beta3 as documentai

    # Use the built-in Invoice Parser processor

    client = documentai.DocumentProcessorServiceClient()

    # The full resource name of the processor
    name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"

    with open(file_path, "rb") as image:
        image_content = image.read()

    document = {"mime_type": mime_type, "content": image_content}

    request = {"name": name, "document": document}

    result = client.process_document(request=request)

    document = result.document
    print("Document processing complete.")
    
    # Extract predicted entities
    predicted_fields = {}
    i = 1
    for entity in document.entities:
        field_name = entity.type_ 

        if field_name == 'line_item':
            field_name += f"_{i}"
            i += 1

        field_value = entity.mention_text
        predicted_fields[field_name] = field_value


    return predicted_fields

def get_text(anchor, document):
    """
    Function to locate a layout token given its layout anchor.
    """
    response = ''
    for segment in anchor.segments:
        start_index = segment.start_index
        end_index = segment.end_index
        response += document.text[start_index:end_index]
    return response.strip()

In [160]:
keys = get_keys()

In [161]:
# Call the function
predicted_fields= process_document(project_id=keys['project_id'], location="us", processor_id=keys['processor_id'], file_path="recibo_test.png", mime_type="image/png")


Document processing complete.


In [162]:
predicted_fields

{'due_date': '12 ABR 22',
 'invoice_date': '28 FEB 22 - 31 MAR 22',
 'total_amount': '1,821,864',
 'invoice_id': '999850102730',
 'supplier_registration': 'GDMTH',
 'invoice_type': '',
 'supplier_address': 'ANASTASIO BUSTAMANTE SN\nNTE Y ID DD10E09V ELSTER Y ID DD10E09V ELSTER\nCENTRO SAN NICOLAS\nSAN NICOLAS DE LOS G, N.L.',
 'line_item_1': 'kW punta 1,361',
 'line_item_2': 'kW intermedia 1,615',
 'line_item_3': 'kWh punta 94,590',
 'line_item_4': 'kWh intermedia 423,442',
 'line_item_5': 'kW base 1,230',
 'line_item_6': 'kWh base 163,738',
 'line_item_7': 'CARGA CONECTADA KW 3100 DEMANDA CONTRATADA KW 2991',
 'line_item_8': 'kWMax'}