In [None]:
import os
import base64
from pdf2image import convert_from_path
from openai import AzureOpenAI
from PIL import Image
import io
import fitz 
import cv2
import numpy as np

In [None]:
api_base = ""
api_key= ""
api_version = "2024-08-01-preview"
api_base = ""  # Base URL
deployment_name = "gpt-4o"  # Deployment name from your endpoint

# Initialize the AzureOpenAI client
client = AzureOpenAI(
    api_key=api_key,
    api_version=api_version,
    base_url=f"{api_base}/openai/deployments/{deployment_name}"
)

In [None]:
def extract_image_from_pdf_base64(pdf_path, page_number=0, image_index=0):
    """
    Extract an image from a specified page in a PDF and return it as a Base64-encoded string.
    
    Args:
        pdf_path (str): Path to the PDF file.
        page_number (int): Page number (0-indexed) to extract the image from.
        image_index (int): Index of the image on the page (default is the first image).
        
    Returns:
        str: Base64-encoded string of the extracted image, or None if no image is found.
    """
    try:
        print(f"Opening PDF: {pdf_path}")
        pdf_document = fitz.open(pdf_path)
        print(f"PDF successfully opened. Number of pages: {len(pdf_document)}")

        if page_number >= len(pdf_document):
            print(f"Error: Page number {page_number} is out of range. Total pages: {len(pdf_document)}")
            return None

        print(f"Loading page {page_number + 1}")
        page = pdf_document.load_page(page_number)
        images = page.get_images(full=True)
        print(f"Number of images found on page {page_number + 1}: {len(images)}")

        if not images or image_index >= len(images):
            print(f"No image found on page {page_number + 1} at index {image_index}.")
            return None

        print(f"Extracting image at index {image_index} on page {page_number + 1}")
        xref = images[image_index][0]  # XREF of the image
        base_image = pdf_document.extract_image(xref)
        image_bytes = base_image["image"]
        print(f"Image extracted successfully. Image size: {len(image_bytes)} bytes")

        print(f"Decoding image bytes into NumPy array")
        image = cv2.imdecode(np.frombuffer(image_bytes, np.uint8), cv2.IMREAD_COLOR)

        if image is None:
            print(f"Error: OpenCV failed to decode the image.")
            return None

        print(f"Image decoded successfully. Encoding to Base64.")
        pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        img_byte_arr = io.BytesIO()
        pil_image.save(img_byte_arr, format="JPEG")
        img_byte_arr.seek(0)
        base64_image = base64.b64encode(img_byte_arr.read()).decode("utf-8")
        return base64_image

    except Exception as e:
        print(f"An error occurred: {e}")
        return None

    finally:
        if 'pdf_document' in locals():
            pdf_document.close()
            print(f"PDF document closed.")


def numpy_to_binary_stream(image_array):
    """
    Converts a NumPy image array to a binary stream.
    Args:
        image_array (np.ndarray): Image in NumPy array format.
    Returns:
        io.BytesIO: Binary stream of the image in JPEG format.
    """
    pil_image = Image.fromarray(image_array)
    binary_stream = io.BytesIO()
    pil_image.save(binary_stream, format="JPEG")
    binary_stream.seek(0)  # Move to the start of the stream
    return binary_stream

# Convert a local image to Base64 (for the second image)
def image_to_base64(image_path, percentage=100):
    """
    Resize an image by a percentage and convert it to Base64.

    Args:
        image_path (str): Path to the image file.
        percentage (int): Resize percentage (default is 100, no resizing).

    Returns:
        str: Base64-encoded string of the resized image.
    """
    with Image.open(image_path) as img:
        # Calculate new dimensions based on percentage
        if percentage != 100:
            width, height = img.size
            new_width = int(width * (percentage / 100))
            new_height = int(height * (percentage / 100))
            img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
        
        # Save the resized image to a BytesIO buffer
        img_byte_arr = io.BytesIO()
        img.save(img_byte_arr, format='JPEG')
        img_byte_arr.seek(0)  # Rewind the buffer
        
        # Convert to Base64
        base64_image = base64.b64encode(img_byte_arr.read()).decode("utf-8")
    return base64_image

In [None]:
image = extract_image_from_pdf_base64("dataset/aalesund/FOKUS/1504200/200_tegnforklaring.pdf")

In [None]:
# Create the chat completion request with the Base64 encoded images
response = client.chat.completions.create(
    model=deployment_name,
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": [
            {"type": "text", "text": "Write the areas and their corresponding colors. Be accurate when identifying which color belongs to each area name."},
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image}"}},
        ]}
    ],
    max_tokens=4096
)

# Extract the message content
message_content = response.choices[0].message.content

# Print only the message content
print(message_content)