In [2]:
import boto3
import json
import os
import base64
import re
from dotenv import load_dotenv


In [3]:
# Load AWS credentials from .env
load_dotenv()
aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")

# Initialize AWS Bedrock client
bedrock = boto3.client(
    "bedrock-runtime",
    region_name="us-east-1",
    aws_access_key_id=aws_access_key,
    aws_secret_access_key=aws_secret_key
)

# Profile ARN
model_id = os.getenv("ARN")


In [4]:

def encode_image_to_base64(image_path):
    """Convert an image file to base64 encoded string"""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def invoke_claude_with_image(prompt, image_path=None, enable_thinking=False):
    """
    Invoke Claude with optional image and thinking mode
    
    Args:
        prompt (str): The text prompt to send
        image_path (str, optional): Path to an image file to include
        enable_thinking (bool): Whether to enable extended thinking mode
    
    Returns:
        tuple: (full_response, thinking_part, final_answer) if thinking is enabled
               or just the response text if thinking is disabled
    """
    # Create the messages array
    messages = []
    
    if image_path:
        base64_image = encode_image_to_base64(image_path)
        
        messages.append({
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/jpeg",  
                        "data": base64_image
                    }
                }
            ]
        })
    else:
        # Text-only message
        messages.append({"role": "user", "content": prompt})
    
    # Prepare the request body
    request_body = {
        "anthropic_version": "bedrock-2023-05-31",
        "messages": messages,
        "max_tokens": 1024
    }
    
    # Enable thinking mode if requested
    if enable_thinking:
        request_body["system"] = "Use your reasoning abilities to think step-by-step before answering. Begin with \"<thinking>\" followed by your detailed thought process. Then end your thinking with \"</thinking>\" and provide your final answer."
    
    # Convert to JSON
    body = json.dumps(request_body)
    
    # Invoke the model
    response = bedrock.invoke_model(
        body=body,
        modelId=model_id,
        accept="application/json",
        contentType="application/json"
    )
    
    # Parse response
    response_body = json.loads(response.get("body").read())
    
    # Handle the response format
    response_text = ""
    if isinstance(response_body.get("content", []), list):
        # Extract the text from content blocks
        for content_block in response_body["content"]:
            if content_block["type"] == "text":
                response_text = content_block["text"]
                break
    else:
        # Fallback for older response format
        response_text = response_body.get("content", "No response received")
    
    # If thinking mode was enabled, try to extract the thinking part
    if enable_thinking:
        thinking_pattern = r"<thinking>(.*?)</thinking>"
        thinking_match = re.search(thinking_pattern, response_text, re.DOTALL)
        
        thinking_part = thinking_match.group(1).strip() if thinking_match else "No explicit thinking section found"
        
        final_answer = re.sub(thinking_pattern, "", response_text, flags=re.DOTALL).strip()
        
        return response_text, thinking_part, final_answer
    
    return response_text


In [9]:

# Example usage
if __name__ == "__main__":
    # Set thinking mode flag here
    use_thinking_mode = True
    
    # Image description with thinking mode
    result = invoke_claude_with_image(
    prompt = """Describe this image in detail. Think step by step about what you see.""",
        image_path="images/sd.png",
        enable_thinking=use_thinking_mode
    )
    
    if use_thinking_mode:
        full_response, thinking, final_answer = result
        
        print("=== FULL RESPONSE ===")
        print(full_response)
        print("\n" + "="*50 + "\n")
        
        print("=== THINKING PROCESS ===")
        print(thinking)
        print("\n" + "="*50 + "\n")
        
        print("=== FINAL ANSWER ===")
        print(final_answer)
    else:
        print("Image description response:")
        print(result)

=== FULL RESPONSE ===
<thinking>
This image is a screenshot from the popular video game "Stardew Valley." Let me break down what I'm seeing:

1. The setting is a farm - there's a wooden farmhouse with a red roof at the top of the image.

2. In front of the farmhouse is a porch with what appears to be a potted plant.

3. The land is divided into different sections:
   - There's tilled soil with crops at various stages of growth organized in neat rows
   - Some crops have red fruits/vegetables (likely strawberries or tomatoes)
   - Other crops are green and appear to be in early growing stages

4. There are two character sprites visible:
   - Both are wearing straw hats and blue overalls (typical farmer attire in the game)
   - One is in the center-right of the farm area, and another is standing among the crops

5. The farm has infrastructure:
   - A stone well on the right side
   - Wooden fences surrounding much of the farmable area
   - What looks like a mailbox near the house
   - So