In [None]:
from dotenv import load_dotenv
import os

load_dotenv(override=True)

# Load and validate Azure AI Services configs
AZURE_AI_SERVICES_ENDPOINT = os.getenv("AZURE_AI_SERVICES_ENDPOINT")
AZURE_AI_SERVICES_API_VERSION = os.getenv("AZURE_AI_SERVICES_API_VERSION")
AZURE_AI_SERVICES_API_KEY = os.getenv("AZURE_AI_SERVICES_API_KEY", None)
assert AZURE_AI_SERVICES_ENDPOINT, "AZURE_AI_SERVICES_ENDPOINT must be set"
assert AZURE_AI_SERVICES_API_VERSION, "AZURE_AI_SERVICES_API_VERSION must be set"


Create custom analyzer using agreement schemas

In [48]:
import json
import sys
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from pathlib import Path

# Get the path to the file that will be analyzed
# Sample report source: https://www.imf.org/en/Publications/CR/Issues/2024/07/18/United-States-2024-Article-IV-Consultation-Press-Release-Staff-Report-and-Statement-by-the-552100
file = Path("./data/ISDA-2.pdf")


# Add the parent directory to the path to use shared modules
parent_dir = Path(Path.cwd()).parent
sys.path.append(
    str(parent_dir)
)
from utility.content_understanding_client import AzureContentUnderstandingClient

# Get token provider once since it will be used for several resources. If you prefer to only use API keys, you can comment out this line.
token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")

# Get path to sample template
parent_dir = Path.cwd()  # Now points to the current directory
ANALYZER_TEMPLATE_PATH = "analyzer_templates/isda_extraction.json"
path_to_sample_template = os.path.abspath(
    os.path.join(
        parent_dir,
        ANALYZER_TEMPLATE_PATH,
    )
)
print(path_to_sample_template)

# Create analyzer
ANALYZER_ID = "isda_agreement"
if AZURE_AI_SERVICES_API_KEY:
    content_understanding_client = AzureContentUnderstandingClient(
        endpoint=AZURE_AI_SERVICES_ENDPOINT,
        api_version=AZURE_AI_SERVICES_API_VERSION,
        subscription_key=AZURE_AI_SERVICES_API_KEY
    )
else:
    content_understanding_client = AzureContentUnderstandingClient(
        endpoint=AZURE_AI_SERVICES_ENDPOINT,
        api_version=AZURE_AI_SERVICES_API_VERSION,
        api_token=token_provider()
    )
try:
    response = content_understanding_client.begin_create_analyzer(ANALYZER_ID, analyzer_schema_path=path_to_sample_template)
    result = content_understanding_client.poll_result(response)
    if result is not None and "status" in result and result["status"] == "Succeeded":
        print(f'Analyzer details for {result["result"]["analyzerId"]}:')
        print(json.dumps(result, indent=2))
except Exception as e:
    print(e)
    print("Error in creating analyzer. Please double-check your analysis settings.\nIf there is a conflict, you can delete the analyzer and then recreate it, or move to the next cell and use the existing analyzer.")

/Users/danilodiaz/Source/DocExtraction-CU/analyzer_templates/isda_extraction.json
Analyzer details for isda_agreement:
{
  "id": "209a50b5-a734-48a7-8edb-8123da7637e5",
  "status": "Succeeded",
  "result": {
    "analyzerId": "isda_agreement",
    "description": "Extract parties and collateral information from ISDA agreement",
    "createdAt": "2025-02-14T00:48:24Z",
    "lastModifiedAt": "2025-02-14T00:48:29Z",
    "config": {
      "returnDetails": false,
      "enableOcr": true,
      "enableLayout": true,
      "enableBarcode": false,
      "enableFormula": false,
      "disableContentFiltering": false
    },
    "fieldSchema": {
      "fields": {
        "PartyA": {
          "type": "string",
          "method": "extract",
          "description": "Party A"
        },
        "PartyB": {
          "type": "string",
          "method": "extract",
          "description": "Party A"
        },
        "Collateral": {
          "type": "array",
          "method": "extract",
        

In [49]:
content_understanding_response = content_understanding_client.begin_analyze(ANALYZER_ID, file)
content_understanding_result = content_understanding_client.poll_result(content_understanding_response, timeout_seconds=1000)

print(json.dumps(content_understanding_result, indent=2))
           

{
  "id": "a9be8da9-e194-48ad-b016-74f093484afe",
  "status": "Succeeded",
  "result": {
    "analyzerId": "isda_agreement",
    "apiVersion": "2024-12-01-preview",
    "createdAt": "2025-02-14T00:48:41Z",
    "contents": [
      {
        "markdown": "<!-- PageHeader=\"2/11/25, 5:14 PM\" -->\n<!-- PageHeader=\"Credit Support Annex\" -->\n<!-- PageHeader=\"EX-10.11 13 a8821_ex10-11.htm CREDIT SUPPORT ANNEX\" -->\n\n\n# (Bilateral Form)\n\nExhibit 10.11\n(ISDA Agreements Subject to New York Law Only)\n\nISDA\u00ae\n\nInternational Swaps and Derivatives Association, Inc.\n\nCREDIT SUPPORT ANNEX\nto the Schedule to the\n\n\n## ISDA MASTER AGREEMENT dated as of November 16, 2004 between\n\n\n### WACHOVIA BANK, NATIONAL ASSOCIATION (\"Party A\") and\n\n\n#### GOLD BANK (\"Party B\")\n\nThis Annex supplements, forms part of, and is subject to, the ISDA Master Agreement referred to above (this\n\"Agreement\"), is part of its Schedule and is a Credit Support Document under this Agreement with 