In [20]:
import os
from dotenv import load_dotenv
from pprint import pprint
import base64
import json
import mimetypes
import os
import requests


In [10]:
## access to API key
 
notebook_directory = os.getcwd()

# Construct the absolute path to the api.env file
env_file_path = os.path.join(notebook_directory, "api.env")

# Load environment variables from the api.env file
load_dotenv(env_file_path)

# Access the API key
api_key = os.getenv("OPENAI_API_KEY")

In [19]:
# !setx OPENAI_API_KEY api_key

os.environ['OPENAI_API_KEY'] = api_key

In [21]:
def encode_image(image_path: str):
    """Encodes an image to base64 and determines the correct MIME type."""
    mime_type, _ = mimetypes.guess_type(image_path)
    if mime_type is None:
        raise ValueError(f"Cannot determine MIME type for {image_path}")

    with open(image_path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
        return f"data:{mime_type};base64,{encoded_string}"


def create_payload(images: list[str], prompt: str, model="gpt-4-vision-preview", max_tokens=100, detail="high"):
    """Creates the payload for the API request."""
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": prompt,
                },
            ],
        },
    ]

    for image in images:
        base64_image = encode_image(image)
        messages[0]["content"].append({
            "type": "image_url",
            "image_url": {
                "url": base64_image,
                "detail": detail,
            }
        })

    return {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens
    }


def query_openai(payload):
    """Sends a request to the OpenAI API and prints the response."""
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    return response.json()

In [22]:
image_paths = ["C:/Users/Esra/Desktop/Deep_Learning/Image_Classification/Fashion/Classify_ThreadUp_Images/data/StanfordJpegs/item57221724.jpg"]
prompt = "describe the fashion item with one sentence"
payload = create_payload(image_paths, prompt)
response = query_openai(payload)
print(response)

{'id': 'chatcmpl-8VT7PLzLBrIr50rW8QGk24tWLbuTB', 'object': 'chat.completion', 'created': 1702510791, 'model': 'gpt-4-1106-vision-preview', 'usage': {'prompt_tokens': 779, 'completion_tokens': 38, 'total_tokens': 817}, 'choices': [{'message': {'role': 'assistant', 'content': 'The item is a heathered grey, sleeveless tank top with the Eiffel Tower and the words "Paris France" and "A&Fitch" printed on the front.'}, 'finish_details': {'type': 'stop', 'stop': '<|fim_suffix|>'}, 'index': 0}]}
