Import libraries

In [None]:
import os
import base64
from pdf2image import convert_from_path
from openai import AzureOpenAI
from PIL import Image, ImageOps
import io
import numpy as np

Add api key, api version, api base url, and deployment name.

In [None]:
api_key= ""
api_version = ""
api_base = "" 
deployment_name = ""  

Initialize the AzureOpenAI client

In [None]:
client = AzureOpenAI(
    api_key=api_key,
    api_version=api_version,
    base_url=f"{api_base}/openai/deployments/{deployment_name}"
)

Helper functions for converting pdfs to jpg and images to base 64.

In [None]:
def pdf_to_base64(pdf_path):
    images = convert_from_path(pdf_path, first_page=1, last_page=1)
    image = images[0]  
    
    img_byte_arr = io.BytesIO()
    image.save(img_byte_arr, format='JPEG')
    img_byte_arr.seek(0)  

    base64_image = base64.b64encode(img_byte_arr.read()).decode('utf-8')
    return base64_image

def jpg_to_base64(image_path, percentage=100, target_size=None, normalize=False):
    """
    Preprocess an image (resize, normalize, and pad if needed) and convert it to Base64.

    Args:
        image_path (str): Path to the image file.
        percentage (int): Resize percentage (default is 100, no resizing).
        target_size (tuple): Optional target size (width, height) for resizing (default is None).
        normalize (bool): Whether to normalize pixel values to [0, 1] (default is False).

    Returns:
        str: Base64-encoded string of the preprocessed image.
    """
    with Image.open(image_path) as img:
        img = img.convert("RGB")

        if target_size:
            img = ImageOps.fit(img, target_size, method=Image.Resampling.LANCZOS, centering=(0.5, 0.5))
        elif percentage != 100:
            width, height = img.size
            new_width = int(width * (percentage / 100))
            new_height = int(height * (percentage / 100))
            img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)

        if normalize:
            img_array = np.array(img) / 255.0
            img = Image.fromarray((img_array * 255).astype(np.uint8)) 

        img_byte_arr = io.BytesIO()
        img.save(img_byte_arr, format='JPEG', quality=85)  
        img_byte_arr.seek(0) 

        base64_image = base64.b64encode(img_byte_arr.read()).decode("utf-8")
    return base64_image

Initiate images, either jpg or pdf.

In [None]:
map_image = jpg_to_base64("200.jpg")

map_explanation = pdf_to_base64("200_tegnforklaring.pdf")

Experiment with different prompts, and AI contexts.

In [None]:
context1 = """
You are a highly knowledgeable and precise expert in map analysis, specializing in identifying and extracting regions, features, and relevant details from maps. Your expertise includes understanding color-coded areas, boundaries, and labels, as well as accurately interpreting legends and visual information. Analyze the provided map image in detail, leveraging your advanced skills to identify all distinct regions based on the map's color scheme and boundaries.

1. Carefully examine the entire map, ensuring no region is overlooked, including small, irregular, or partially obscured areas.
2. Match the identified regions to their corresponding descriptions in the map's legend, providing an exact match for each color or feature.
3. For each region, provide:
   - Its bounding box coordinates (x_min, y_min, x_max, y_max) based on the map's resolution.
   - The color or pattern associated with the region.
   - Any text or labels present within the region.
4. Consider subtle variations in colors or patterns and include all distinguishable areas, regardless of size or prominence.
5. If the map contains overlapping or adjacent regions, clearly delineate them and avoid duplication.
6. Take your time to ensure precision and accuracy in your analysis, ensuring no relevant detail is missed.

Present your findings systematically, making it easy to parse and interpret programmatically or manually. Use structured formats where appropriate to ensure clarity and completeness.
"""

context2 = """
You are a highly skilled expert in reading and interpreting map explanations, including legends, labels, and associated color codes. Your task is to thoroughly analyze the provided map's explanation (legend) to identify the exact names of all areas and their corresponding colors. Leverage your expertise to ensure precision and accuracy in extracting this information.

1. Carefully read the map explanation and interpret all details, including subtle variations in color shades or patterns.
2. For each area listed in the explanation, provide:
   - The exact name of the area as stated in the legend.
   - The precise color code, shade, or pattern associated with the area.
3. If the explanation includes unique patterns (e.g., striped or dotted regions), describe them clearly and associate them with their respective areas.
4. Focus on clarity and avoid ambiguities; ensure each area name and its associated color are explicitly matched.
5. Take your time to cross-check the information to ensure no area or color is omitted, regardless of prominence.

Present your results in a structured and easy-to-interpret format, ensuring that each area name is clearly associated with its exact color or pattern as described in the map explanation.
"""



prompt1 = """
Analyze the provided image thoroughly. Focus specifically on identifying all orange areas in the image.
Ensure that you capture every distinct orange region, including small, irregular, or partially obscured areas.
Provide detailed coordinates for each orange area as bounding boxes (x_min, y_min, x_max, y_max).
Take your time to carefully analyze and ensure no orange area is missed.
"""

prompt2 = """
Using the analysis results from the image, ensure each orange area's bounding box is clearly defined.
If there are overlapping or adjacent areas, distinguish them clearly.
List all orange areas systematically with their coordinates and size descriptions.
Provide the results in a structured format that can be parsed programmatically.
"""


Create a chat completion request with the Base64 images and the desired prompt.

In [None]:
response = client.chat.completions.create(
    model=deployment_name,
    messages=[
        {"role": "system", "content": context1},
        {"role": "user", "content": [
            {"type": "text", "text": prompt1},
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{map_image}"}},
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{map_explanation}"}}
        ]}
    ],
    max_tokens=4096
)