# REST API Grounding Enhanchment Samples


## Objective
Applying grounding techniques to image inputs in GPT-4V.	

## Time

You should expect to spend 5-10 minutes running this sample.

## Before you begin

#### Installation

In [None]:
%pip install -r ../requirements.txt

### Parameters
You need to set a series of configurations such as GPT-4V_DEPLOYMENT_NAME, OPENAI_API_BASE, OPENAI_API_VERSION, VISION_API_ENDPOINT.

Add "OPENAI_API_KEY" and "VISION_API_KEY" as variable name and \<Your API Key Value\> and \<Your VISION Key Value\> as variable value in the environment variables.
 <br>
      
      WINDOWS Users: 
         setx OPENAI_API_KEY "REPLACE_WITH_YOUR_KEY_VALUE_HERE"
         setx VISION_API_KEY "REPLACE_WITH_YOUR_KEY_VALUE_HERE"

      MACOS/LINUX Users: 
         export OPENAI_API_KEY="REPLACE_WITH_YOUR_KEY_VALUE_HERE"
         export VISION_API_KEY="REPLACE_WITH_YOUR_KEY_VALUE_HERE"


In [None]:
# Setting up the deployment name
deployment_name: str = "<your GPT-4V deployment name>"
# The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
openai_api_base: str = "<your resource base URL>"
# Currently OPENAI API have the following versions available: 2022-12-01.
# All versions follow the YYYY-MM-DD date structure.
openai_api_version: str = "<your OpenAI API version>"

# The base URL for your vision resource endpoint, e.g. "https://<your-resource-name>.cognitiveservices.azure.com"
vision_api_endpoint: str = "<your vision resource endpoint>"

should_cleanup: bool = False

## Connect to your project
To start with let us create a config file with your project details. This file can be used in this sample or other samples to connect to your workspace.

In [None]:
import json
from pathlib import Path

config = {
    "GPT-4V_DEPLOYMENT_NAME": deployment_name,
    "OPENAI_API_BASE": openai_api_base,
    "OPENAI_API_VERSION": openai_api_version,
    "VISION_API_ENDPOINT": vision_api_endpoint,
}

p = Path("../config.json")

with p.open(mode="w") as file:
    file.write(json.dumps(config))

## Run this Example

In [None]:
import base64
import sys
import os
import re
import matplotlib.pyplot as plt
import textwrap
import random
from PIL import Image, ImageDraw, ImageFont
from typing import Tuple

parent_dir = Path(Path.cwd()).parent
sys.path.append(str(parent_dir))
from shared_functions import call_GPT4V_image

# Setting up the vision resource key
vision_api_key = os.getenv("VISION_API_KEY")


def random_color() -> Tuple[int, int, int]:
    """Generate a random color."""
    return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))


def draw_groundings(image_path: str, groundings: object) -> None:
    # Load the image
    original_image = Image.open(image_path)
    width, height = original_image.size

    box_width = max(2, width // 200)
    font_size = max(10, height // 30)

    extended_width = width + 200  # More space for text
    image = Image.new("RGB", (extended_width, height), "white")
    image.paste(original_image, (0, 0))

    draw = ImageDraw.Draw(image)

    predefined_colors = ["red", "green", "blue", "purple", "orange", "pink", "cyan"]
    text_color_map = {}  # Dictionary to map text to colors
    text_x = width + 20  # Starting just after the original image
    text_y = 20  # Initial vertical position

    # Iterate over each grounding in the data
    for grounding in groundings:
        text = grounding["text"]
        polygon = grounding["polygon"]

        if text not in text_color_map:
            if predefined_colors:
                text_color_map[text] = predefined_colors.pop(0)
            else:
                text_color_map[text] = random_color()

        color = text_color_map[text]
        absolute_polygon = [(p["x"] * width, p["y"] * height) for p in polygon]

        bounding_box = [
            min(p[0] for p in absolute_polygon),
            min(p[1] for p in absolute_polygon),
            max(p[0] for p in absolute_polygon),
            max(p[1] for p in absolute_polygon),
        ]
        draw.rectangle(bounding_box, outline=color, width=box_width)

    # Draw the text on the extended right side of the image
    for text, color in text_color_map.items():
        try:
            font = ImageFont.truetype("DejaVuSans.ttf", font_size)
        except IOError:
            font = ImageFont.load_default()

        # Text wrapping
        wrapped_text = textwrap.fill(text, width=40)  # Adjust width as needed
        for line in wrapped_text.split("\n"):
            if text_y + font_size < height:
                draw.text((text_x, text_y), line, fill=color, font=font)
                text_y += font_size + 5  # Increment y position for next line
            else:
                break  # Stop if there's no more space

    # Save or display the image
    plt.figure(figsize=(15, 10))
    plt.imshow(image)
    plt.axis("off")
    plt.show()


# Image Description Assistant
image_file_path = "ImageDescriptionAssistant.jpg"  # Update with your image path
sys_message = "You are an AI assistant that helps people craft a clear and detailed sentence that describes the content depicted in an image."
user_prompt = "Describe image"

# Encode the image in base64
with Path(image_file_path).open("rb") as image_file:
    encoded_image = base64.b64encode(image_file.read()).decode("utf-8")

messages = [
    {"role": "system", "content": [{"type": "text", "text": sys_message}]},
    {
        "role": "user",
        "content": [
            {"type": "text", "text": user_prompt},  # Prompt for the user
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"},  # Image to be processed
            },
        ],
    },
]

vision_api_config = {"endpoint": vision_api_endpoint, "key": vision_api_key}

# Send the request and handle the response
try:
    response_content = call_GPT4V_image(messages, grounding=True, vision_api=vision_api_config)
    text = response_content["choices"][0]["message"]["content"]
    sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", text)
    for sentence in sentences:  # Print the content of the response
        print(sentence)
    draw_groundings(image_file_path, response_content["choices"][0]["enhancements"]["grounding"]["lines"][0]["spans"])
except Exception as e:
    print(f"Failed to call GPT-4V API. Error: {e}")

## Cleaning up

To clean up all Azure ML resources used in this example, you can delete the individual resources you created in this tutorial.

If you made a resource group specifically to run this example, you could instead [delete the resource group](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/delete-resource-group).

In [None]:
if should_cleanup:
    # {{TODO: Add resource cleanup}}
    pass