# Convert AutoML Response to DocumentAI Format

* Author: docai-incubator@google.com

## Disclaimer

This tool is not supported by the Google engineering team or product team. It is provided and supported on a best-effort basis by the **DocAI Incubator Team**. No guarantees of performance are implied.


## Objective

This tool helps to call Image object detection endpoint via API call and converts its response to DocumentAI format and stores post-processed results in GCS as JSON files.

## Prerequisites
* GCP Project ID
* DocumentAI OCR Processor ID
* Endpoint ID from VertexAI Model


## Step by Step procedure 

### 1.Importing Required Modules

In [None]:
!wget https://raw.githubusercontent.com/GoogleCloudPlatform/document-ai-samples/main/incubator-tools/best-practices/utilities/utilities.py

In [None]:
import base64
from io import BytesIO
from pathlib import Path
from typing import Dict

from google.api_core.client_options import ClientOptions
from google.cloud import aiplatform
from google.cloud import documentai_v1beta3 as documentai
from google.cloud import storage
from PIL import Image

from utilities import file_names, store_document_as_json

### 2.Setup the inputs
* `project_id` : Provide GCP project Number 
* `processor_location`: Provide DocumentAI processor location (“us” or “eu”)
* `processor_id` : Provide GCP DocumentAI processor id
* `processor_version` : Provide GCP DocumentAI processor version id
* `endpoint_id` : Provide prediction endpoint id
* `endpoint_location` : Provide prediction endpoint location
* `score_threshold` : Provide threshold value for prediction object confidences. Ranges from 0 - 1
* `gcs_input_folder` :  Provide GCS path of images folder
* `gcs_output_folder` :  Provide GCS path to store postprocessed results(DocumentAI JSON format results)


In [None]:
project_id = "<<project_id>>"
processor_location = "<<location>>"
processor_id = "<<OCR_processor_id>>"  # OCR
processor_version = "pretrained-ocr-v1.0-2020-09-23"
endpoint_id = "<<endpoint>>"
endpoint_location = "<<endpoint_location>>"
score_threshold = 0.1
gcs_input_folder = "gs://<<Bucket_name>>/<<input_files_sub_path>>/"
gcs_output_folder = "gs://<<Bucket_name>>/<<output_files_sub_path>>/"

### 3.Run the required functions

In [None]:
def process_document_sample(
    project_id: str,
    location: str,
    processor_id: str,
    pdf_bytes: bytes,
    processor_version: str,
    mime_type: str,
) -> "documentai.ProcessResponse":
    """
    Processes a document using Google Document AI and returns the processed result.

    Args:
        project_id (str): The ID of the Google Cloud project.
        location (str): The location of the Document AI processor (e.g., 'us', 'eu').
        processor_id (str): The unique ID of the processor used to process the document.
        pdf_bytes (bytes): The byte content of the PDF file to be processed.
        processor_version (str): The version of the processor to use (e.g., 'v1').
        mime_type (str): The MIME type of the file (e.g., 'application/pdf').

    Returns:
        documentai.ProcessResponse: The response object containing the results of the document processing.

    Raises:
        google.api_core.exceptions.GoogleAPIError: If an error occurs during processing.
    """
    # You must set the `api_endpoint` if you use a location other than "us".
    opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")
    client = documentai.DocumentProcessorServiceClient(client_options=opts)

    name = client.processor_version_path(
        project_id, location, processor_id, processor_version
    )

    raw_document = documentai.RawDocument(content=pdf_bytes, mime_type=mime_type)

    # Configure the process request
    request = documentai.ProcessRequest(
        name=name, raw_document=raw_document, skip_human_review=False
    )

    # Recognizes text entities in the PDF document
    result = client.process_document(request=request)
    return result


def predict_image_object_detection_sample(
    project: str,
    endpoint_id: str,
    payload: Dict[str, str],
    location: str = "us-central1",
) -> Dict:
    """
    Sends an image object detection request to the AI Platform and returns the prediction result.

    Args:
        project (str): The ID of the Google Cloud project.
        endpoint_id (str): The endpoint ID of the deployed model for object detection.
        payload (Dict[str, str]): The input data for prediction, typically containing the image information (e.g., base64 encoded image).
        location (str, optional): The region where the endpoint is hosted. Default is 'us-central1'.

    Returns:
        Dict: The prediction result containing detected objects and their corresponding details (e.g., bounding boxes, labels, confidence scores).

    Raises:
        google.api_core.exceptions.GoogleAPIError: If an error occurs during the prediction request.
    """
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": f"{location}-aiplatform.googleapis.com"}
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(endpoint=endpoint, instances=[payload])
    return dict(response.predictions[0])

### 4.Run the code

In [None]:
def main():
    splits = gcs_input_folder.strip("/").split("/")
    input_bucket, input_folder = splits[2], "/".join(splits[3:])
    splits = gcs_output_folder.strip("/").split("/")
    output_bucket, output_folder = splits[2], "/".join(splits[3:])
    _, files_dict = file_names(gcs_input_folder)

    input_storage_client = storage.Client()
    input_bucket_obj = input_storage_client.get_bucket(input_bucket)
    mime_type = "application/pdf"
    for fn, fp in files_dict.items():
        print(f"Processing {fn}")
        print("\tCalling DocAI API...")
        pdf_bytes = input_bucket_obj.blob(fp).download_as_bytes()
        if fn.endswith(".png"):
            mime_type = "image/png"
        elif fn.endswith(".jpeg"):
            mime_type = "image/jpeg"
        elif fn.endswith(".jpg"):
            mime_type = "image/jpeg"
        doc = process_document_sample(
            project_id,
            processor_location,
            processor_id,
            pdf_bytes,
            processor_version,
            mime_type,
        ).document
        try:
            encoded_content = base64.b64encode(pdf_bytes).decode("utf-8")
            payload = {"content": encoded_content}
            res = predict_image_object_detection_sample(
                project_id, endpoint_id, payload
            )
        except Exception as e:
            print("Unable to process", fn, str(e))
            continue
        for page in doc.pages:
            img = Image.open(BytesIO(page.image.content))
            width, height = img.size
            elements = []
            for i in range(len(res["displayNames"])):
                if res["confidences"][i] < score_threshold:
                    continue
                xmin, xmax, ymin, ymax = res["bboxes"][i]
                visual_element_bbox_v = [
                    int(xmin * width),
                    int(ymin * height),
                    int(xmax * width),
                    int(ymax * height),
                ]
                visual_element_bbox_nv = [xmin, ymin, xmax, ymax]
                ele = {}
                ele["type_"] = res["displayNames"][i]
                ele["confidence"] = float(res["confidences"][i])
                ele["vertices"] = visual_element_bbox_v
                ele["normalized_vertices"] = visual_element_bbox_nv
                elements.append(ele)
            entities = []
            for ele in elements:
                _type = ele["type_"]
                _confidence = ele["confidence"]
                _mention_text = ""

                _vertices = []
                x, y = ele["vertices"][0::2], ele["vertices"][1::2]
                xy = [
                    [min(x), min(y)],
                    [max(x), min(y)],
                    [max(x), max(y)],
                    [min(x), max(y)],
                ]
                for _x, _y in xy:
                    vertex = documentai.Vertex(x=_x, y=_y)
                    _vertices.append(vertex)

                _normalized_vertices = []
                x, y = (
                    ele["normalized_vertices"][0::2],
                    ele["normalized_vertices"][1::2],
                )
                xy = [
                    [min(x), min(y)],
                    [max(x), min(y)],
                    [max(x), max(y)],
                    [min(x), max(y)],
                ]
                for _x, _y in xy:
                    normalized_vertex = documentai.NormalizedVertex(x=_x, y=_y)
                    _normalized_vertices.append(normalized_vertex)

                _bounding_poly = documentai.BoundingPoly(
                    vertices=_vertices, normalized_vertices=_normalized_vertices
                )
                _page_ref = documentai.Document.PageAnchor.PageRef(
                    page=page.page_number - 1,
                    bounding_poly=_bounding_poly,
                    layout_type="LAYOUT_TYPE_UNSPECIFIED",
                )
                _page_anchor = documentai.Document.PageAnchor(page_refs=[_page_ref])
                _text_anchor = documentai.Document.TextAnchor()
                ent = documentai.Document.Entity(
                    type_=_type,
                    mention_text=_mention_text,
                    confidence=_confidence,
                    page_anchor=_page_anchor,
                    text_anchor=_text_anchor,
                )
                ent.normalized_value.boolean_value = True
                entities.append(ent)

                ve = documentai.Document.Page.VisualElement()
                ve.type_ = ele["type_"].upper()
                ve.layout.bounding_poly = ent.page_anchor.page_refs[0].bounding_poly
                page.visual_elements.extend([ve])
            doc.entities.extend(entities)

        json_str = documentai.Document.to_json(
            doc, including_default_value_fields=False
        )
        fn = fn.split(".")[-2] + ".json"
        file_name = f"{output_folder}/{fn}"
        print(f"\t  Output gcs uri - gs://{output_bucket}/{file_name}")
        store_document_as_json(json_str, output_bucket, file_name)


main()

### 5.Output

Sample image of which shows prediction results DocumentAI visual elements & entities

#### Visualization from DocumentAI UI
<img src="./Images/DocAI_UI_Visualization.png" width=800 height=400 ></img>
#### Visualization from JSON File 
<img src="./Images/JSON_Visualization.png" width=800 height=400 ></img>