# Analyze Images using Azure OpenAI

Pre-requisites:
1. Create Azure OpenAI resource
2. Deploy gpt-4 and above model

## Load Azure Configuration

In [5]:
import os

azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION")

## Get and Prepare the Image

In [6]:
import base64
from pathlib import Path

# Create a Path object for the image file
image_path = Path("images/generated_image.jpg")

# Using a context manager to open the file with Path.open()
with image_path.open("rb") as image_file:
    base64_image = base64.b64encode(image_file.read()).decode("utf-8")

# Prepare the image content in the required format for the Azure OpenAI service
content_images = [
    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
    for base64_image in [base64_image]
]

## Create a Client

In [7]:
from openai import AsyncAzureOpenAI

# Create the Vision client
vision_client = AsyncAzureOpenAI(
    api_key=azure_openai_key, 
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint
)
vision_deployment_name = "gpt-4o"

## Analyze the Image

In [8]:
# Define the user prompt for the image description
user_prompt = "Describe this image in detail."

# Send a request to the Azure OpenAI service to analyze the image and generate a description
response = await vision_client.chat.completions.create(
    model=vision_deployment_name,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": user_prompt,
                },
                *content_images,  # Include the image content in the request
            ],
        }
    ],
    max_tokens=1000,  # Set the maximum number of tokens for the response
)

# Print the generated description of the image
print("Response: " + response.choices[0].message.content)


Response: This is a photograph of a domestic cat walking casually down a quiet residential street during what appears to be early morning or late afternoon due to the soft, golden lighting. The cat is mostly white with dark gray and black tabby markings on its head, back, and tail. It has striking green eyes and a focused expression, giving the impression that it is on a mission or exploration.

The foreground showcases the textured surface of the asphalt road. In the background, there is a residential neighborhood with houses featuring pitched roofs. A metal fence and gate, a parked car, and a blue trash bin are visible on the right side of the image. On the left, further down the street, there are colorful shrubs with red flowers and a palm tree in the distance, all slightly blurred due to the shallow depth of field. The image captures a tranquil, serene moment in what seems like a suburban environment.


## Getting results similar to Image Analysis

In [9]:
# Define the user prompt for the image description
user_prompt = "Provide me 10 captions for this image."

# Send a request to the Azure OpenAI service to analyze the image and generate a description
response = await vision_client.chat.completions.create(
    model=vision_deployment_name,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": user_prompt,
                },
                *content_images,  # Include the image content in the request
            ],
        }
    ],
    max_tokens=1000,  # Set the maximum number of tokens for the response
)

# Print the generated description of the image
print("Response: " + response.choices[0].message.content)


Response: 1. "On patrol: the neighborhood guardian is on the move! 🐾"  
2. "Strutting into Monday like I own the block. 😼"  
3. "Sunrise strolls and paws on the pavement. ☀️🐾"  
4. "Keeping an eye on the street—security at its finest. 🐈‍⬛"  
5. "The queen of this quiet corner takes her morning walk. 👑"  
6. "Determined and unfazed, as only a cat can be. 😏"  
7. "Today's forecast: partly curious with a chance of mischief. 😺"  
8. "Every street’s runway needs its own supermodel. 🐾✨"  
9. "When you’re the unspoken ruler of the entire neighborhood. 🏡"  
10. "Step aside, humans. This road clearly belongs to me. 🚶‍♀️🐈"  


In [10]:
# Define the user prompt for the image description
user_prompt = """Analyze this image.
    Provide response in sample JSON format.
    {
        "description": "Describe the image in less than 50 words",
        "category": cat, dog, or mouse
    }

"""

# Send a request to the Azure OpenAI service to analyze the image and generate a description
response = await vision_client.chat.completions.create(
    model=vision_deployment_name,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": user_prompt,
                },
                *content_images,  # Include the image content in the request
            ],
        }
    ],
    max_tokens=1000,  # Set the maximum number of tokens for the response
)

# Print the generated description of the image
print("Response: " + response.choices[0].message.content)

Response: ```json
{
    "description": "A gray-and-white cat is walking confidently on a quiet suburban street during a sunny day.",
    "category": "cat"
}
```
