## Use Generative AI in Images Solution

## Install Library

In [None]:
%pip install openai
%pip install requests
%pip install pillow

## Load Azure Configuration

In [None]:
import os

azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION")

azure_computer_vision_endpoint = os.environ["AZURE_COMPUTER_VISION_ENDPOINT"]
azure_computer_vision_key = os.environ["AZURE_COMPUTER_VISION_KEY"]

azure_ai_services_endpoint = os.environ["AZURE_AI_SERVICES_ENDPOINT"]
azure_ai_services_key = os.environ["AZURE_AI_SERVICES_KEY"]

## Create Clients

In [None]:
from openai import AsyncAzureOpenAI

# AzureOpenAI is the standard client for interacting with Azure's OpenAI Service, but it does not support asynchronous operations
# AsyncAzureOpenAI is designed to support asynchronous operations, allowing your code to perform other tasks while waiting for the API response
dalle_client = AsyncAzureOpenAI(
    api_key=azure_openai_key, 
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint
)
deployment_name = "dall-e-3"

# Create the Vision client
vision_client = AsyncAzureOpenAI(
    api_key=azure_openai_key, 
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint
)
vision_deployment_name = "gpt-4o"

## Generate, Show and Save the Images

In [None]:
from PIL import Image
import requests
import matplotlib.pyplot as plt

user_prompt = """Create an image of a playful bulldog puppy sitting in a grassy park, with a colorful ball next to it. 
The bulldog should have a happy expression and its tongue sticking out."""

# generate an image using the DALL-E 3 model
result = await dalle_client.images.generate(
    model=deployment_name, # the name of your DALL-E 3 deployment
    prompt=user_prompt,
    size="1024x1024", 
    style="natural",
    quality="standard",
    n=1
)

# Retrieve the image URL from the response (assuming response structure)
image_url = result.data[0].url

# Open the image from the URL
im = Image.open(requests.get(image_url, stream=True).raw)

# Save the image to a file
im.save("images/dog1.jpg")

# Display the image with matplotlib
plt.imshow(im)
plt.axis("off")  # Turn off axis labels
plt.show()

In [None]:
from PIL import Image
import requests
import matplotlib.pyplot as plt

user_prompt = """Generate an image of a regal-looking adult bulldog wearing a small crown, sitting on a red velvet cushion. 
The background should be a luxurious room with elegant decor."""

# generate an image using the DALL-E 3 model
result = await dalle_client.images.generate(
    model=deployment_name, # the name of your DALL-E 3 deployment
    prompt=user_prompt,
    size="1024x1024", 
    style="natural",
    quality="standard",
    n=1
)

# Retrieve the image URL from the response (assuming response structure)
image_url = result.data[0].url

# Open the image from the URL
im = Image.open(requests.get(image_url, stream=True).raw)

# Save the image to a file
im.save("images/dog2.jpg")

# Display the image with matplotlib
plt.imshow(im)
plt.axis("off")  # Turn off axis labels
plt.show()

In [None]:
from PIL import Image
import requests
import matplotlib.pyplot as plt

user_prompt = """Create an image of a poodle dressed in a cute outfit, walking down a city street with a leash in its mouth."""

# generate an image using the DALL-E 3 model
result = await dalle_client.images.generate(
    model=deployment_name, # the name of your DALL-E 3 deployment
    prompt=user_prompt,
    size="1024x1024", 
    style="natural",
    quality="standard",
    n=1
)

# Retrieve the image URL from the response (assuming response structure)
image_url = result.data[0].url

# Open the image from the URL
im = Image.open(requests.get(image_url, stream=True).raw)

# Save the image to a file
im.save("images/dog3.jpg")

# Display the image with matplotlib
plt.imshow(im)
plt.axis("off")  # Turn off axis labels
plt.show()

## Get the Image and Analyze it

In [None]:
import base64
from pathlib import Path

# Create a Path object for the image file
image_path = Path("images/dog1.jpg")

# Using a context manager to open the file with Path.open()
with image_path.open("rb") as image_file:
    base64_image = base64.b64encode(image_file.read()).decode("utf-8")

# Prepare the image content in the required format for the Azure OpenAI service
content_images = [
    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
    for base64_image in [base64_image]
]

# Define the user prompt for the image description
user_prompt = "Describe this image in detail."

# Send a request to the Azure OpenAI service to analyze the image and generate a description
response = await vision_client.chat.completions.create(
    model=vision_deployment_name,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": user_prompt,
                },
                *content_images,  # Include the image content in the request
            ],
        }
    ],
    max_tokens=1000,  # Set the maximum number of tokens for the response
)

# Print the generated description of the image
print("Response: " + response.choices[0].message.content)

## Calculate Vector Similarity

In [None]:
import numpy as np

def cosine_similarity(vector1, vector2):
    return np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))

## Vectorize Image API

In [None]:
import requests

# Function to vectorize an image
def vectorize_image(image_source, is_url=True):
    # API URL
    url = f"{azure_computer_vision_endpoint}/computervision/retrieval:vectorizeImage?api-version=2024-02-01&model-version=2023-04-15"

    headers = {
        "Ocp-Apim-Subscription-Key": azure_computer_vision_key
    }

    try:
        if is_url:
            # Set headers for URL
            headers["Content-Type"] = "application/json"
            data = {
                "url": image_source
            }
            # Make the request
            response = requests.post(url, headers=headers, json=data)
        else:
            # Read the image file
            with open(image_source, "rb") as image_file:
                image_data = image_file.read()

            # Set headers for image file
            headers["Content-Type"] = "application/octet-stream"
            # Make the request
            response = requests.post(url, headers=headers, data=image_data)

        response.raise_for_status()  # Raise an exception for HTTP errors

        # Return the response
        return response.json()

    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None

## Calculate Image Embeddings

In [None]:
dog1_result = vectorize_image("images/dog1.jpg", False)
print("Dog 1: ", dog1_result["vector"])

dog2_result = vectorize_image("images/dog2.jpg", False)
print("Dog 2: ", dog1_result["vector"])

dog3_result = vectorize_image("images/dog3.jpg", False)
print("Dog 3: ", dog1_result["vector"])


## Calculate Image Similarity

In [None]:
print("Dog 1 vs Dog 2")
print(cosine_similarity(dog1_result["vector"], dog2_result["vector"]))

print("Dog 1 vs Dog 3")
print(cosine_similarity(dog1_result["vector"], dog3_result["vector"]))

print("Dog 2 vs Dog 3")
print(cosine_similarity(dog2_result["vector"], dog3_result["vector"]))

# Dog 1 and Dog 2 are more similar than Dog 1 and Dog 3 or Dog 2 and Dog 3

## Vectorize Text API

In [None]:
import requests

def vectorize_text(text):
    
    # API URL
    url = f"{azure_computer_vision_endpoint}/computervision/retrieval:vectorizeText?api-version=2024-02-01&model-version=2023-04-15"

    # Set headers
    headers = {
        "Content-Type": "application/json",
        "Ocp-Apim-Subscription-Key": azure_computer_vision_key
    }

    # Set the data payload
    data = {
        "text": text
    }

    try:
        # Make the request
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Return the JSON response
        return response.json()

    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None


## Calculate Text Embedding

In [None]:
user_input = "bulldog"
text_bulldog_vector = vectorize_text(user_input)
print("Bulldog: ", text_bulldog_vector["vector"])

user_input = "poodle"
text_poodle_vector = vectorize_text(user_input)
print("Poodle: ", text_poodle_vector["vector"])


## Calculate Similarity between Text Input and Images

In [None]:
print("Bulldog Text vs Images")
print(cosine_similarity(text_bulldog_vector["vector"], dog1_result["vector"]))
print(cosine_similarity(text_bulldog_vector["vector"], dog2_result["vector"]))
print(cosine_similarity(text_bulldog_vector["vector"], dog3_result["vector"]))

print("Poodle Text vs Images")
print(cosine_similarity(text_poodle_vector["vector"], dog1_result["vector"]))
print(cosine_similarity(text_poodle_vector["vector"], dog2_result["vector"]))
print(cosine_similarity(text_poodle_vector["vector"], dog3_result["vector"]))

# Bulldog text is more similar to the first and second image vs the third image
# Poodle text is more similar to the third image vs the first and second image