In [8]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Access your OpenAI API key
openai_api_key = os.getenv("OPENAI_API_KEY")

In [17]:
import json
import base64
import requests
from PIL import Image
from io import BytesIO

def json_to_image(image_path: str) -> Image.Image:
    """
    Converts an image file to a PIL Image object.

    Parameters:
    - image_path (str): Path to the image file.

    Returns:
    - PIL.Image.Image: Image as a PIL Image object.
    """
    return Image.open(image_path)

def read_img(json_path: str) -> dict:
    """
    Reads an image path from a JSON file, converts both a constant image and the image from JSON to JPEG, and sends them to the OpenAI API.

    Parameters:
    - json_path (str): Path to the JSON file containing the image path.
    - openai_api_key (str): OpenAI API key for authentication.

    Returns:
    - dict: Response from the OpenAI API.
    """
    # Constant image path
    constant_image_path = "/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/_preview.png"

    # Load JSON data from file
    with open(json_path, 'r') as json_file:
        json_data = json.load(json_file)
    
    # Extract the image path from the JSON data
    image_path = json_data['output_path']

    # Convert image files to PIL Images
    constant_image = json_to_image(constant_image_path).convert("RGB")
    image = json_to_image(image_path).convert("RGB")

    # Convert images to base64 strings in JPEG format
    buffered1 = BytesIO()
    constant_image.save(buffered1, format="JPEG")
    base64_constant_image = base64.b64encode(buffered1.getvalue()).decode('utf-8')

    buffered2 = BytesIO()
    image.save(buffered2, format="JPEG")
    base64_image = base64.b64encode(buffered2.getvalue()).decode('utf-8')

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {openai_api_key}"
    }

    payload = {
        "model": "gpt-4o",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "compare the two image object positions. and suggest what to change in image 2 to make it like image 1"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_constant_image}"
                        }
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 300
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    response_data = response.json()
    return response_data['choices'][0]['message']['content']


In [18]:
json_path = "./output/output.json"
read_img(json_path)


'The objects in both images are arranged differently. Here are some key differences and suggestions to make Image 2 more similar to Image 1:\n\n1. **Product Placement**:\n    - **Image 1**: The products, branded tea packs, are laid out in a scattered manner along with additional decorative elements (sunglasses, tea cup, and a tea bag).\n    - **Image 2**: The image mostly focuses on a tea kettle and a single glass with tea.\n\n    **Suggestion**: Include multiple tea packs and accessories scattered around the tea kettle and glass to match the variety and layout found in Image 1.\n\n2. **Text Position**:\n    - **Image 1**: There\'s a prominent call to action with text "Subscribe to Taster Pack Combo today and get 50% OFF!" in the middle of the image, and another "SHOP NOW" button at the bottom center.\n    - **Image 2**: The text "Enjoy tea delivered to your door" is centered at the bottom.\n\n    **Suggestion**: Add a call to action text at the middle of the image similar to Image 1. 

In [45]:
import numpy as np
from PIL import Image, ImageEnhance
import os
import json

def get_position_from_constant(position_name, larger_image, smaller_image):
    """
    Get the (x, y) coordinates for a given position name based on the size of the larger and smaller images.

    Parameters:
    - position_name (str): Name of the position (e.g., "top right").
    - larger_image (PIL.Image.Image): The larger image.
    - smaller_image (PIL.Image.Image): The smaller image.

    Returns:
    - tuple: (x, y) coordinates for the given position.
    """
    larger_w, larger_h = larger_image.size
    smaller_w, smaller_h = smaller_image.size

    positions = {
        "top left": (0, 0),
        "top center": ((larger_w - smaller_w) // 2, 0),
        "top right": (larger_w - smaller_w, 0),
        "center left": (0, (larger_h - smaller_h) // 2),
        "center center": ((larger_w - smaller_w) // 2, (larger_h - smaller_h) // 2),
        "center right": (larger_w - smaller_w, (larger_h - smaller_h) // 2),
        "bottom left": (0, larger_h - smaller_h),
        "bottom center": ((larger_w - smaller_w) // 2, larger_h - smaller_h),
        "bottom right": (larger_w - smaller_w, larger_h - smaller_h),
    }

    return positions.get(position_name, (0, 0))

def blend_images(position_names, alpha: float = 0.5, output_dir: str = './output') -> str:
    """
    Blends multiple small images by placing them on top of a larger image at specified positions without overlapping and saves the blended image.

    Parameters:
    - position_names (list of str): List of position names for each small image, e.g., ["top left", "top right", "center center"].
    - alpha (float): Blending factor for transparency (0.0 to 1.0). Default is 0.5.
    - output_dir (str): Directory where the blended image will be saved. Default is './output'.

    Returns:
    - str: Path to the JSON file containing the output image information.
    """
    # Define paths to small images (example paths)
    small_images_paths = [
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/cta.jpg',
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/discover.png',
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/endframe_3.png',
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/engagement_animation_1.png',
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/engagement_instruction_1.png',
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/landing_endframe.jpg'
    ]

    if len(small_images_paths) != len(position_names):
        raise ValueError("The number of small images must match the number of position names.")

    # Load larger image from file path (example path)
    larger_image_path = '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/endframe_1.jpg'
    larger_image = Image.open(larger_image_path)
    larger_image = larger_image.convert("RGBA")  # Ensure the larger image has an alpha channel

    # Create a blank image with the same size as the larger image
    blended_image = Image.new("RGBA", larger_image.size)

    # Paste the larger image onto the blank image
    blended_image.paste(larger_image, (0, 0))

    # Blend each small image at its respective position
    for i, small_image_path in enumerate(small_images_paths):
        # Load small image from file path
        smaller_image = Image.open(small_image_path)
        smaller_image = smaller_image.convert("RGBA")  # Ensure the smaller image has an alpha channel

        # Resize smaller image if necessary
        smaller_w, smaller_h = smaller_image.size
        larger_w, larger_h = larger_image.size
        if smaller_h > larger_h or smaller_w > larger_w:
            aspect_ratio = smaller_w / smaller_h
            if smaller_h > larger_h:
                smaller_h = larger_h
                smaller_w = int(smaller_h * aspect_ratio)
            if smaller_w > larger_w:
                smaller_w = larger_w
                smaller_h = int(smaller_w / aspect_ratio)
            smaller_image = smaller_image.resize((smaller_w, smaller_h))

        # Make the smaller image semi-transparent
        enhancer = ImageEnhance.Brightness(smaller_image)
        smaller_image = enhancer.enhance(alpha)

        # Get position for the smaller image
        x_offset, y_offset = get_position_from_constant(position_names[i], larger_image, smaller_image)

        # Paste the smaller image onto the blended image at the calculated position
        blended_image.paste(smaller_image, (x_offset, y_offset), smaller_image)

    # Save blended image to output directory
    os.makedirs(output_dir, exist_ok=True)
    output_file = os.path.join(output_dir, 'blended_image.png')
    blended_image.save(output_file)

    # Create JSON response
    json_data = {
        'output_path': output_file,
        'positions': position_names,
        'alpha': alpha
    }

    # Write JSON to file
    json_output_file = os.path.join(output_dir, 'output.json')
    with open(json_output_file, 'w') as f:
        json.dump(json_data, f, indent=4)

    return json_output_file


In [46]:
positions_list = ["top left", "top right", "center center", "bottom left", "bottom center", "bottom right"]

# Call the blend_images function
blend_images(positions_list, alpha=0.5, output_dir='./output')

'./output/output.json'

In [20]:
llm_config2 = {"config_list": [{"model": "gpt-4", "api_key": openai_api_key}]}
code_execution_config = {"use_docker": False}

# Initialize the assistant agent with the given configurations
config_list = [
    {"model": "gpt-4", "api_key": openai_api_key, "api_type": "openai"},
]

In [39]:
llm_config={
        "temperature": 0,
        "timeout": 600,
        "cache_seed": 42,
        "config_list": config_list,
        "functions": [
             {
                        "name": "blend_images",
                        "description": "use this function to blend the images",
                        "parameters": {
                            "type": "object",
                            "properties": {
                                "position_names": {
                                    "type": "string",
                                    "description": "This is where you will position the blending"
                                },
                            },
                            "required": ["positions_str"]
                        }
                        },
             {
                        "name": "read_img",
                        "description": "use this to read the image blended from blended image",
                        "parameters": {
                            "type": "object",
                            "properties": {
                                "output_json": {
                                    "type": "object",
                                    "description": "This is the blended image"
                                },
                            },
                            "required": ["position"]
                        }
                        }
                        ],
}


In [52]:
import os
import autogen

from autogen import ConversableAgent

# Let's first define the assistant agent that suggests tool calls.
img_blend_assistant = autogen.AssistantAgent(
    name="image_blending_assistant",
    code_execution_config=False,
    system_message=
    
"""You are a helpful AI assistant. 
The main problems you will be solving include:
- suggest diffrent "positions" to make a good advertising 
    use this as example: "List of position names for each small image, e.g., ["top left", "top right", "center center", "bottom left", "bottom center", "bottom right"]"
    make sure you are giving only 6 positions
    make sure that the images will not overlap
- This are the discription of the pictures:
    1. the first position is for a picture that have a text saying 'shop now '
    2. the second position is for a picture that have a text saying 'Discover 12 unique tea flavours delivered to your door'
    3. the third position is for a picture that have a text saying 'Enjoy tea delivered to your home'
    4. the fourth position is for a picture that shows a hand pointing
    5. the fifth position is for a picture that have a text saying 'tap to get letter box delivery of tea'
    6. the sixth position is for a picture that have a text saying 'off black generation picture'

- your task:
    - considering the above discription for each picture find a way to position each picture to give a good advertaizing 
    based on the recomendation you got from 'img_critic_assistant'
    - Then modifay the position after the comment from 'img_critic_assistant'
    - 'TERMINATE' when the image you blend looks like image 1.
    """
    ,
    llm_config=llm_config,
    function_map={
        "blend_images": blend_images,
        # "read_img": read_img
    }
        
)

img_critic_assistant = autogen.AssistantAgent(
    name="image_blending_assistant",
    code_execution_config=False,
    system_message="You are a advertizing image critic AI assistant. "
    """You task is to critic the 'output.json' from 'img_blend_assistant'
    critic the following part 
  
                            - the first position is for a picture that have a text saying 'shop now '
                            - the second position is for a picture that have a text saying 'Discover 12 unique tea flavours delivered to your door'
                            - the third position is for a picture that have a text saying 'Enjoy tea delivered to your home'
                            - the fourth position is for a picture that shows a hand pointing
                            - the fifth position is for a picture that have a text saying 'tap to get letter box delivery of tea'
                            - the sixth position is for a picture that have a text saying 'off black generation picture'
    recomend 'img_blend_assistant' for a better advertising by comparing to image 1 which is a good advertisement.
    "Return 'TERMINATE' when the task is done.""",
    llm_config=llm_config,
    function_map={
        # "blend_images": blend_images,
        "read_img": read_img
    }
        
)




# The user proxy agent is used for interacting with the assistant agent
# and executes tool calls.
def termination_msg(x):
    return isinstance(x, dict) and "TERMINATE" == str(x.get("content", ""))[-9:].upper()
user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    is_termination_msg=termination_msg,
    human_input_mode="NEVER",
    code_execution_config=False
    # # is_termination_msg=lambda x: "content" in x and x["content"] is not None and x["content"].rstrip().endswith("TERMINATE"),
    # # code_execution_config={"work_dir": "planning"},
    # function_map={"blend_images": blend_images},
    # code_execution_config=False
)



groupchat = autogen.GroupChat(
    agents=[user_proxy, img_blend_assistant, img_critic_assistant],
 
    messages=[],  # The initial messages in the chat
    max_round=15,  # Maximum rounds of conversation
    select_speaker_message_template = 
    """ This is the flow of the converstion:
    1. 'user_proxy'
    2. 'img_blend_assistant'
    3. 'img_critic_assistant'
    4. 'img_blend_assistant'

"""
)

manager = autogen.GroupChatManager(
    groupchat=groupchat,
    llm_config=llm_config2
)



In [54]:
message = user_proxy.initiate_chat(
    manager, message="blend the images at diffrent positions")

[33muser_proxy[0m (to chat_manager):

blend the images at diffrent positions

--------------------------------------------------------------------------------
[32m
Next speaker: image_blending_assistant
[0m
[33mimage_blending_assistant[0m (to chat_manager):

[32m***** Suggested function call: blend_images *****[0m
Arguments: 
{
  "position_names": ["top left", "top right", "center center", "bottom left", "bottom center", "bottom right"]
}
[32m*************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: image_blending_assistant
[0m
[35m
>>>>>>>> EXECUTING FUNCTION blend_images...[0m
[33mimage_blending_assistant[0m (to chat_manager):

[32m***** Response from calling function (blend_images) *****[0m
./output/output.json
[32m*********************************************************[0m

--------------------------------------------------------------------------------
[32m
Next sp

In [58]:
chat = message.chat_history
chat[-1]['Response from calling function (blend_images)']

KeyError: 'Response from calling function (blend_images)'

In [59]:
message

ChatResult(chat_id=None, chat_history=[{'content': 'blend the images at diffrent positions', 'role': 'assistant'}, {'content': '', 'function_call': {'arguments': '{\n  "position_names": ["top left", "top right", "center center", "bottom left", "bottom center", "bottom right"]\n}', 'name': 'blend_images'}, 'name': 'image_blending_assistant', 'role': 'assistant'}, {'content': './output/output.json', 'name': 'blend_images', 'role': 'function'}, {'content': "The images have been blended successfully. Now, let's read the blended image to provide a critique.", 'function_call': {'arguments': '{}', 'name': 'read_img'}, 'name': 'image_blending_assistant', 'role': 'assistant'}, {'content': "Error: read_img() missing 1 required positional argument: 'json_path'", 'name': 'read_img', 'role': 'function'}, {'content': "Apologies for the confusion. Let's try reading the image again.", 'function_call': {'arguments': '{\n  "json_path": "./output/output.json"\n}', 'name': 'read_img'}, 'name': 'image_blen