Let's test our Document Intelligence API to make sure we are able to extract data.

In [1]:
import os, getpass
from langchain_openai import AzureChatOpenAI

from dotenv import load_dotenv
import os, getpass
from pathlib import Path

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

# Get root directory path
root_dir = Path().absolute().parent
env_path = root_dir / '.env'

# Load .env from root
load_dotenv(dotenv_path=env_path)
print(f"Loaded .env from {env_path}")
# Access variables
api_key = os.getenv('AZURE_OPENAI_API_KEY')
debug = os.getenv('DEBUG')
more_research = os.getenv('MORE_RESEARCH')

print(f"API Key: {  api_key[:4] + '*' * 28 + api_key[-4:] }")

Loaded .env from c:\Users\rickcau\source\repos\vendor-contracts-gen-ai\.env
API Key: 8PVz****************************Isv1


In [2]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.storage.blob import BlobServiceClient
from azure.storage.blob import generate_blob_sas
from azure.storage.blob import BlobSasPermissions
from datetime import datetime, timedelta, UTC  # Added UTC

# Azure Document Intelligence settings
endpoint = os.getenv('FORM_RECOGNIZER_ENDPOINT')
key = os.getenv('FORM_RECOGNIZER_KEY')

# Azure Storage settings
storage_account_name = os.getenv('STORAGE_ACCOUNT_NAME')
storage_account_key = os.getenv('STORAGE_ACCOUNT_KEY')
container_name = "source"


def generate_sas_url(blob_name: str) -> str:
    """
    Generate a full URL with SAS token for a specific blob
    """
    # Define the permissions for the SAS token
    sas_permissions = BlobSasPermissions(read=True)
    
    # Set token expiry time using timezone-aware datetime
    expiry_time = datetime.now(UTC) + timedelta(hours=1)
    
    # Generate the SAS token
    sas_token = generate_blob_sas(
        account_name=storage_account_name,
        account_key=storage_account_key,
        container_name=container_name,
        blob_name=blob_name,
        permission=sas_permissions,
        expiry=expiry_time
    )
    
    # Construct the full URL including the SAS token
    blob_url = f"https://{storage_account_name}.blob.core.windows.net/{container_name}/{blob_name}?{sas_token}"
    
    return blob_url

def read_pdf(input_file: str) -> str:
    """
    Read and analyze a PDF file using Azure Document Intelligence.
    """
    try:
        # Get the URL with SAS token
        document_url = generate_sas_url(input_file)
        
        print(f"Starting document analysis...")
        
        # Create Document Intelligence client
        credential = AzureKeyCredential(key)
        doc_intelligence_client = DocumentIntelligenceClient(endpoint, credential)

        # Begin analysis with the authenticated URL using the correct model ID
        poller = doc_intelligence_client.begin_analyze_document(
            model_id="prebuilt-read",  # Changed from "prebuilt-document" to "prebuilt-read"
            analyze_request={
                "urlSource": document_url
            }
        )

        # Get results
        result = poller.result()
        
        print("Successfully analyzed document")
        return result.content

    except Exception as e:
        print(f"Error analyzing document: {str(e)}")
        raise

# Example usage
if __name__ == "__main__":
    try:
        content = read_pdf("VendorAgreement-Fabrikam-5004432.pdf")
        print("\nExtracted content:")
        print(content)
    except Exception as e:
        print(f"Failed to process document: {str(e)}")

Starting document analysis...
Successfully analyzed document

Extracted content:
Vendor Contractor Agreement
Contract ID: 5004432
This Vendor Contractor Agreement ("Agreement") is entered into as of 12/7/2024 by and between:
. [Contoso Elite] (the "Company"), with its principal office located at [Address], and
· [Fabrikam Services] (the "Vendor"), with its principal office located at [Address].
1. Services
The Vendor agrees to provide the following services:
. 200 Hours of Developer Support for GenAl Contracts Project
The services must be completed by [Feb 20, 2024], as outlined in Exhibit A (Scope of Work).
2. Compensation
The Company will pay the Vendor [$20,000 US] upon completion of services. Payments will be made within [10] business days after receiving an invoice from the Vendor.
3. Term
This Agreement begins on [12/14/2024] and ends on [Feb 20, 2024].
4. Confidentiality
The Vendor agrees to maintain the confidentiality of any proprietary information provided by the Company and 