## Generating image description using multimodal LLMs

In [6]:
#!pip install openai

In [20]:
#!pip show pydantic

In [21]:
#!pip install pydantic==1.10.7

In [3]:
import boto3
import json
import requests

In [12]:
#import openai
from openai import OpenAI
import os
import pandas as pd

In [30]:
#Image related
from PIL import Image
import io
import base64

In [43]:
# Getting openai key
from botocore.exceptions import ClientError


# Function to get the OpenAI API key from AWS Secrets Manager
def get_secret():
    secret_name = "openAI_key_warsztaty_PW_2024"
    region_name = "us-east-1"

    try:
        # Create a Secrets Manager client
        session = boto3.session.Session()
        client = session.client(
            service_name='secretsmanager',
            region_name=region_name
        )

        get_secret_value_response = client.get_secret_value(SecretId=secret_name)
        return json.loads(get_secret_value_response['SecretString'])
    except Exception as e:
        print(f"Error fetching secret: {e}")
        # Fallback to local .env or environment variables
        return {
            "openAI_key_warsztaty_PW_2024": os.environ.get("OPENAI_API_KEY"),
            # Add other secrets as needed
        }



In [44]:
# Retrieve the secrets
secrets = get_secret()

# Set the OpenAI API key
openai_key = secrets['openAI_key_warsztaty_PW_2024']

In [45]:
client = OpenAI(api_key = openai_key)

### Using OpenAI models to analyze image and generate the description

In [46]:
def get_image_description(client, image_source, prompt, from_url=False):
    """
    Generate a description for an image using GPT-4o-mini API.

    Parameters:
    - client: OpenAI API client.
    - image_source: Either a file-like object (e.g., from Streamlit file uploader) or a URL string.
    - prompt: Text prompt to guide the description.
    - from_url: Boolean indicating if `image_source` is a URL. If False, `image_source` is assumed to be a file-like object.

    Returns:
    - Description generated by the GPT-4o model.
    """

    if from_url:
        # Handle image source as URL
        response = requests.get(image_source)
        response.raise_for_status()
        image_data = response.content
    else:
        # Read image data directly from file-like object
        image_data = image_source.read()

    # Encode the image data in base64
    encoded_image = base64.b64encode(image_data).decode('utf-8')

    # Create the GPT-4o API request (for demonstration, this is speculative, as true image input is not yet supported)
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/png;base64,{encoded_image}"}
                    },
                ],
            }
        ],
        max_tokens=300,
    )

    # Extract and return the description
    return response.choices[0].message.content

In [47]:
prompt_message = "What’s in this image? Provide concise but detailed description in Polish up to 500 characters."


In [48]:
image_url = 'https://ireland.apollo.olxcdn.com/v1/files/3nkwoqc9iowj2-PL/image;s=1000x700'

In [49]:
get_image_description(client, image_url, prompt_message, from_url=True)

'Na zdjęciu znajduje się srebrny samochód marki Opel, stojący na kostce brukowej przed budynkiem. Auto ma nowoczesny wygląd z wyraźnymi liniami nadwozia. Przednie światła są lekko okrągłe, a zderzak ma sportowy wygląd z wlotem powietrza. Na tablicy rejestracyjnej widnieje napis "ERICAR". W tle widać dobrze utrzymany ogród z krzewami oraz fasadę domu w ciepłych kolorach, z kamiennymi elementami architektonicznymi. Całość sprawia wrażenie eleganckiego miejsca.'

In [None]:
#Bedrock

In [27]:
import boto3
import json
import time

# Initialize the bedrock-runtime client
bedrock_runtime_client = boto3.client('bedrock-runtime')

# Define the model ID and input prompt
model_id = 'anthropic.claude-3-sonnet-20240229-v1:0'
input_prompt = {
    "prompt": "Describe the purpose of a 'hello world' program in one line.",
    "max_tokens_to_sample": 100  # Specify the max tokens to sample
}

body = json.dumps(input_prompt).encode('utf-8')

max_retries = 5
wait_time = 1  # Start with a 1-second delay

for attempt in range(max_retries):
    try:
        response = bedrock_runtime_client.invoke_model(
            modelId=model_id,
            body=body,
            contentType='application/json'
        )
        # Process response if successful
        response_body = response['body'].read().decode('utf-8')
        output = json.loads(response_body)
        print(output)
        break  # Exit the loop if successful

    except bedrock_runtime_client.exceptions.ThrottlingException:
        print(f"ThrottlingException: Attempt {attempt + 1} of {max_retries}. Retrying in {wait_time} seconds...")
        time.sleep(wait_time)
        wait_time *= 2  # Exponential backoff

    except Exception as e:
        print(f"Error: {e}")
        break  # Break on other exceptions


ThrottlingException: Attempt 1 of 5. Retrying in 1 seconds...


KeyboardInterrupt: 