# Azure AI Content Understanding

Azure AI Content Understanding is a new generative AI-based Azure AI Service that analyzes files of any modality (documents, images, videos, and audio) and extracts structured output in user-defined field formats.

Requirements:
1. Azure AI Services

Note: Check supported regions: https://learn.microsoft.com/en-us/azure/ai-services/content-understanding/language-region-support?tabs=document#region-support

## Load Azure Configuration

In [None]:
import os

azure_ai_services_endpoint = os.environ["AZURE_AI_SERVICES_ENDPOINT"]
azure_ai_services_key = os.environ["AZURE_AI_SERVICES_KEY"]

## Create a Custom Analyzer

In [None]:
import requests
import json

# Create a new analyzer
def create_analyzer(analyzer_id, request_body):
    endpoint = azure_ai_services_endpoint
    key = azure_ai_services_key
    
    url = f"{endpoint}/contentunderstanding/analyzers/{analyzer_id}?api-version=2024-12-01-preview"
    headers = {
        "Ocp-Apim-Subscription-Key": key,
        "Content-Type": "application/json"
    }
    
    response = requests.put(url, headers=headers, data=json.dumps(request_body))
    # The 201 (Created) response includes an Operation-Location header containing a URL that you can use to track the status of this asynchronous creation operation.
    operation_location = response.headers.get("Operation-Location")
    return operation_location

In [None]:
ANALYZER_NAME = "ImageAnalyzer"

request_body = {
    "description": "Sample Image analyzer",
    "scenario": "image",
    "fieldSchema": {
        "fields": {
            "title": {
                "type": "string"
            },
            "imagedescription": {
                "type": "string",
                "method": "generate"
            },
            "imagecategory": {
                "type": "string",
                "method": "classify",
                "enum": ["car", "bus", "bicycle"]
            }
        }
    }
}

custom_analyzer_operation_location = create_analyzer(ANALYZER_NAME, request_body)

print(custom_analyzer_operation_location)

## Track Status of Asynchronous Create Operation

In [None]:
import requests

def get_analyzer_operation_status(url):
    
    key = azure_ai_services_key
    
    headers = {
        "Ocp-Apim-Subscription-Key": key
    }
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        print("Operation status retrieved successfully.")
        print(response.json())
    else:
        print(f"Failed to retrieve operation status. Status code: {response.status_code}, Response: {response.text}")

In [None]:
response = get_analyzer_operation_status(custom_analyzer_operation_location)

## Analyze a file

In [None]:
import requests

def analyze_file(analyzer_id, file_url):
    
    endpoint = azure_ai_services_endpoint
    key = azure_ai_services_key

    api_version = "2024-12-01-preview"
    url = f"{endpoint}/contentunderstanding/analyzers/{analyzer_id}:analyze?api-version={api_version}"
    headers = {
        "Ocp-Apim-Subscription-Key": key,
        "Content-Type": "application/json"
    }
    data = {
        "url": file_url
    }

    response = requests.post(url, headers=headers, json=data)
    
    operation_location = response.headers.get("Operation-Location")
    # print(f"Operation-Location: {operation_location}")
    return operation_location


In [None]:
analyze_file_operation_location = analyze_file(ANALYZER_NAME, "https://ziggystorage01.blob.core.windows.net/images/bus1.jpeg")
print(analyze_file_operation_location)

## Track Status of Analysis

In [None]:
import requests

def analyze_file_track_status(url):
    key = azure_ai_services_key

    headers = {
        "Ocp-Apim-Subscription-Key": key,
        "Content-Type": "application/json"
    }

    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        response_json = response.json()
        status = response_json.get("status")
        if status == "Succeeded":
            return response_json
        else:
            return status
    else:
        response.raise_for_status()

In [None]:
result = analyze_file_track_status(analyze_file_operation_location)

if isinstance(result, dict) and result.get("status") == "Succeeded":
    contents = result.get("result", {}).get("contents", [])
    if contents:
        fields = contents[0].get("fields", {})
        image_description = fields.get("imagedescription", {}).get("valueString")
        image_category = fields.get("imagecategory", {}).get("valueString")
        title = fields.get("title", {}).get("valueString")
        
        print(f"Title: {title}")
        print(f"Image Description: {image_description}")
        print(f"Image Category: {image_category}")
else:
    print(result)

In [None]:
analyze_file_operation_location = analyze_file(ANALYZER_NAME, "https://ziggystorage01.blob.core.windows.net/images/car1.jpeg")

In [None]:
result = analyze_file_track_status(analyze_file_operation_location)

if isinstance(result, dict) and result.get("status") == "Succeeded":
    contents = result.get("result", {}).get("contents", [])
    if contents:
        fields = contents[0].get("fields", {})
        image_description = fields.get("imagedescription", {}).get("valueString")
        image_category = fields.get("imagecategory", {}).get("valueString")
        title = fields.get("title", {}).get("valueString")
        
        print(f"Title: {title}")
        print(f"Image Description: {image_description}")
        print(f"Image Category: {image_category}")
else:
    print(result)

## Delete Analyzer

In [None]:
import requests

def delete_analyzer(analyzer_id):
    endpoint = azure_ai_services_endpoint
    key = azure_ai_services_key

    api_version = "2024-12-01-preview"
    url = f"{endpoint}/contentunderstanding/analyzers/{analyzer_id}?api-version={api_version}"
    
    headers = {
        "Ocp-Apim-Subscription-Key": key,
        "Content-Type": "application/json"
    }

    response = requests.delete(url, headers=headers)
    
    if response.status_code == 204:
        return "Analyzer deleted successfully."
    else:
        response.raise_for_status()

In [None]:
result = delete_analyzer(ANALYZER_NAME)
print(result)

## Analyzing Video Example

## Create Custom Analyzer

In [None]:
ANALYZER_NAME = "VideoAnalyzerZZ"

request_body = {
    "description": "Sample Video analyzer",
    "scenario": "videoShot",
    "fieldSchema": {
        "fields": {
            "title": {
                "type": "string"
            },
            "videodescription": {
                "type": "string",
                "method": "generate",
                "description": "Detailed summary of the video segment"
            },
            "company": {
                "type": "string",
                "method": "generate",
                "description": "The company that this video segment is about"
            },
            "azure": {
                "type": "string",
                "method": "generate",
                "description": "The Microsoft Azure service used by the company in this video"
            },
            "azurepurpose": {
                "type": "string",
                "method": "generate",
                "description": "The purpose of why the Microsoft Azure service is being used by the company"
            },
        }
    }
}

custom_analyzer_operation_location = create_analyzer(ANALYZER_NAME, request_body)

print(custom_analyzer_operation_location)

## Track Status of Custom Analyzer

In [None]:
response = get_analyzer_operation_status(custom_analyzer_operation_location)

## Analyze the file

In [None]:
analyze_file_operation_location = analyze_file(ANALYZER_NAME, "https://ziggystorage01.blob.core.windows.net/videos/mercedes.mp4")
print(analyze_file_operation_location)

## Track status of File Analysis

In [None]:
result = analyze_file_track_status(analyze_file_operation_location)

if isinstance(result, dict) and result.get("status") == "Succeeded":
    contents = result.get("result", {}).get("contents", [])
    if contents:
        fields = contents[0].get("fields", {})
        title = fields.get("title", {}).get("valueString")
        company = fields.get("company", {}).get("valueString")
        video_description = fields.get("videodescription", {}).get("valueString")
        azure = fields.get("azure", {}).get("valueString")
        azurepurpose = fields.get("azurepurpose", {}).get("valueString")

        print(f"Title: {title}")
        print(f"Company: {company}")
        print(f"Video Description: {video_description}")
        print(f"Azure Product: {azure}")
        print(f"Azure Purpose: {azurepurpose}")
        
else:
    print(result)

In [None]:
result = delete_analyzer(ANALYZER_NAME)
print(result)