In [2]:
import requests
import json

def process_document(api_key, project_id, location, processor_id, file_path):
    """
    Processes a document using Google Document AI API.

    :param api_key: API key for authentication
    :param project_id: Google Cloud Project ID
    :param location: Region where the processor is deployed (e.g., "us")
    :param processor_id: The ID of the Document AI processor
    :param file_path: Path to the PDF file
    """
    # Define the endpoint URL
    endpoint = f"https://{location}-documentai.googleapis.com/v1/projects/{project_id}/locations/{location}/processors/{processor_id}:process"

    # Read the file content
    with open(file_path, "rb") as file:
        file_content = file.read()

    # Headers for the request
    headers = {
        "Content-Type": "application/pdf",
        "Authorization": f"Bearer {api_key}",
    }

    # Send the request
    response = requests.post(endpoint, headers=headers, data=file_content)

    if response.status_code == 200:
        result = response.json()
        document = result.get("document", {})
        text = document.get("text", "")
        print("Document Text:")
        print(text)

        # Optionally print entities if they exist
        print("\nExtracted Entities:")
        entities = document.get("entities", [])
        for entity in entities:
            entity_type = entity.get("type", "Unknown")
            mention_text = entity.get("mentionText", "")
            print(f"{entity_type}: {mention_text}")
    else:
        print("Error:", response.status_code, response.text)

# Replace these variables with your details
API_KEY = ""
PROJECT_ID = "ml-harvard-herbaria"
LOCATION = "us"  # Adjust as needed
PROCESSOR_ID = ""
FILE_PATH = "Example 2.pdf"

# Call the function to process the document
process_document(API_KEY, PROJECT_ID, LOCATION, PROCESSOR_ID, FILE_PATH)


Error: 401 {
  "error": {
    "code": 401,
    "message": "Request had invalid authentication credentials. Expected OAuth 2 access token, login cookie or other valid authentication credential. See https://developers.google.com/identity/sign-in/web/devconsole-project.",
    "status": "UNAUTHENTICATED",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
        "reason": "ACCESS_TOKEN_TYPE_UNSUPPORTED",
        "metadata": {
          "service": "documentai.googleapis.com",
          "method": "google.cloud.documentai.v1.DocumentProcessorService.ProcessDocument"
        }
      }
    ]
  }
}

