In [81]:
%pip install ultralytics
import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.2.54 ðŸš€ Python-3.8.10 torch-2.3.1 CPU (aarch64)
Setup complete âœ… (16 CPUs, 30.8 GB RAM, 52.0/193.6 GB disk)


In [83]:
import torch
from PIL import Image
import os

# Load the YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

def describe_objects_in_images(image_paths):
    descriptions = {}
    
    for img_path in image_paths:
        # Ensure the image path is valid
        if not os.path.isfile(img_path):
            descriptions[img_path] = "Invalid image path."
            continue

        # Load image
        img = Image.open(img_path)

        # Perform inference
        results = model(img)

        # Extract predictions
        predictions = results.pred[0]  # Assuming we are only interested in the first image if batch

        # Convert predictions to descriptions
        objects = []
        for *box, conf, cls in predictions:
            label = results.names[int(cls)]
            objects.append(f"{label} ({conf:.2f})")

        # Save descriptions
        descriptions[img_path] = objects
    
    return descriptions


Using cache found in /home/jabez_kassa/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 ðŸš€ 2024-7-12 Python-3.8.10 torch-2.3.1 CPU

Fusing layers... 


YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [10]:
import json
import base64
from PIL import Image
from io import BytesIO
import requests

def read_img(json_path: str) -> dict:
    """
    Reads image paths from a JSON file, converts the images to JPEG, and sends them to the OpenAI API for description.

    Parameters:
    - json_path (str): Path to the JSON file containing the image paths.
    - openai_api_key (str): OpenAI API key for authentication.

    Returns:
    - dict: Response from the OpenAI API.
    """
    # Load JSON data from file
    with open(json_path, 'r') as json_file:
        json_data = json.load(json_file)
    
    # Extract the image paths from the JSON data
    image_paths = json_data['output_paths']
    
    # Process each image
    images_base64 = []
    for image_path in image_paths:
        image = Image.open(image_path)
        
        # Convert image mode if necessary
        if image.mode != 'RGB':
            image = image.convert('RGB')
        
        # Convert image to base64 string in JPEG format
        buffered = BytesIO()
        image.save(buffered, format="JPEG")
        base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
        images_base64.append(base64_image)
    
    # Construct the payload with multiple images
    messages = []
    for idx, base64_image in enumerate(images_base64):
        order = ["first", "second", "third", "fourth", "fifth", "sixth"]
        message_content = [
            {
                "role": "user",
                "content": f"Please describe the {order[idx]} image."
            },
            {
                "role": "user",
                "content": {
                    "type": "image",
                    "image": {
                        "url": f"data:image/jpeg;base64,{base64_image}"
                    }
                }
            }
        ]
        messages.extend(message_content)
    
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {openai_api_key}"
    }

    payload = {
        "model": "gpt-4o",
        "messages": messages,
        "max_tokens": 300
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    
    # Check for API response errors
    if response.status_code != 200:
        raise Exception(f"OpenAI API error: {response.status_code} {response.text}")
    
    response_data = response.json()
    
    # Check if 'choices' key is present in the response
    if 'choices' not in response_data:
        raise KeyError("'choices' key not found in OpenAI API response")
    
    # Extract and return responses
    descriptions = [resp['message']['content'] for resp in response_data['choices']]
    return descriptions

In [11]:
json_path='/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/notebooks/img_path.json'
descriptions = read_img(json_path)
for i, description in enumerate(descriptions):
    print(f"Description for image {i+1}: {description}")

Exception: OpenAI API error: 400 {
  "error": {
    "message": "Invalid type for 'messages[1].content': expected one of a string or array of objects, but got an object instead.",
    "type": "invalid_request_error",
    "param": "messages[1].content",
    "code": "invalid_type"
  }
}

In [86]:
small_images_paths = [
'/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/cta.jpg',
'/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/discover.png',
'/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/endframe_3.png',
'/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/engagement_animation_1.png',
'/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/engagement_instruction_1.png',
'/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/landing_endframe.jpg'
]

In [87]:
describe_objects_in_images(small_images_paths)

{'/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/cta.jpg': [],
 '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/discover.png': ['keyboard (0.36)'],
 '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/endframe_3.png': [],
 '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/engagement_animation_1.png': [],
 '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/engagement_instruction_1.png': [],
 '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/landing_endframe.jpg': []}

In [1]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Access your OpenAI API key
openai_api_key = os.getenv("OPENAI_API_KEY")

In [2]:
import json
import base64
import requests
from PIL import Image
from io import BytesIO

def encode_image(image_path: str) -> str:
    """
    Encodes an image to a base64 string.

    Parameters:
    - image_path (str): Path to the input image.

    Returns:
    - str: Base64-encoded image data.
    """
    with open(image_path, 'rb') as image_file:
        image_data = image_file.read()
    return base64.b64encode(image_data).decode('utf-8')

def json_to_image(image_path: str) -> Image.Image:
    """
    Converts an image file to a PIL Image object.

    Parameters:
    - image_path (str): Path to the image file.

    Returns:
    - PIL.Image.Image: Image as a PIL Image object.
    """
    # Open the image file
    image = Image.open(image_path)
    
    return image

def read_img(json_path: str) -> dict:
    """
    Reads an image path from a JSON file, converts the image to JPEG, and sends it to the OpenAI API.

    Parameters:
    - json_path (str): Path to the JSON file containing the image path.
    - openai_api_key (str): OpenAI API key for authentication.

    Returns:
    - dict: Response from the OpenAI API.
    """
    # Load JSON data from file
    with open(json_path, 'r') as json_file:
        json_data = json.load(json_file)
    
    # Extract the image path from the JSON data
    image_path = json_data['output_path']
    
    # Convert image file to PIL Image
    image = json_to_image(image_path)
    
    # Convert image to base64 string in JPEG format
    buffered = BytesIO()
    image.save(buffered, format="JPEG")
    base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
    
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {openai_api_key}"
    }

    payload = {
        "model": "gpt-4o",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "describe the image"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 300
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    response_data =  response.json()
    return response_data['choices'][0]['message']['content']

In [3]:
json_path = "./output/output.json"
read_img(json_path)


In [28]:
# import numpy as np
# from PIL import Image
# import os
# import json

# def blend_images(positions, alpha: float = 0.5, output_dir: str = './output') -> str:
#     # Example usage

#     small_images_paths = [
#     '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/cta.jpg',
#     '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/discover.png',
#     '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/endframe_3.png',
#     '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/engagement_animation_1.png',
#     '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/engagement_instruction_1.png',
#     '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/landing_endframe.jpg'
# ]
#     """
#     Blends multiple small images by placing them on top of a larger image at specified positions without overlapping and saves the blended image.

#     Parameters:
#     - small_images_paths (list of str): List of paths to the small images.
#     - positions (list of tuples): List of (x, y) positions where each small image will be placed on the larger image.
#     - alpha (float): Blending factor for transparency (0.0 to 1.0). Default is 0.5.
#     - output_dir (str): Directory where the blended image will be saved. Default is './output'.

#     Returns:
#     - str: Path to the JSON file containing the output image information.
#     """
#     if len(small_images_paths) != len(positions):
#         raise ValueError("The number of small images must match the number of positions.")

#     larger_image_path = '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/endframe_1.jpg'
    
#     # Load larger image from file path
#     larger_image = Image.open(larger_image_path)
#     larger_image_np = np.array(larger_image)
    
#     # Get dimensions of the larger image
#     larger_h, larger_w = larger_image_np.shape[:2]

#     blended_image_np = larger_image_np.copy()

#     for i, small_image_path in enumerate(small_images_paths):
#         # Load small image from file path
#         smaller_image = Image.open(small_image_path)
#         smaller_image_np = np.array(smaller_image)

#         # Get dimensions of the smaller image
#         smaller_h, smaller_w = smaller_image_np.shape[:2]

#         # Resize smaller image if necessary
#         if smaller_h > larger_h or smaller_w > larger_w:
#             aspect_ratio = smaller_w / smaller_h
#             if smaller_h > larger_h:
#                 smaller_h = larger_h
#                 smaller_w = int(smaller_h * aspect_ratio)
#             if smaller_w > larger_w:
#                 smaller_w = larger_w
#                 smaller_h = int(smaller_w / aspect_ratio)
#             smaller_image = smaller_image.resize((smaller_w, smaller_h))
#             smaller_image_np = np.array(smaller_image)

#         x_offset, y_offset = positions[i]

#         # Ensure the small image does not go out of bounds
#         if x_offset + smaller_w > larger_w or y_offset + smaller_h > larger_h:
#             raise ValueError(f"Small image at position {positions[i]} exceeds the bounds of the larger image.")

#         # Blend images
#         if smaller_image_np.ndim == 2:  # Grayscale image
#             smaller_image_np = np.stack([smaller_image_np] * 3, axis=-1)  # Convert to 3-channel image
#         for c in range(3):
#             blended_image_np[y_offset:y_offset+smaller_h, x_offset:x_offset+smaller_w, c] = (
#                 alpha * smaller_image_np[:, :, c] +
#                 (1 - alpha) * larger_image_np[y_offset:y_offset+smaller_h, x_offset:x_offset+smaller_w, c]
#             )

#     # Convert blended image back to PIL Image
#     blended_image = Image.fromarray(blended_image_np.astype('uint8'))

#     # Save blended image to output directory
#     os.makedirs(output_dir, exist_ok=True)
#     output_file = os.path.join(output_dir, 'blended_image.png')
#     blended_image.save(output_file)

#     # Create JSON response
#     json_data = {
#         'output_path': output_file,
#         'positions': positions,
#         'alpha': alpha
#     }

#     # Write JSON to file
#     json_output_file = os.path.join(output_dir, 'output.json')
#     with open(json_output_file, 'w') as f:
#         json.dump(json_data, f, indent=4)

#     return json_output_file



In [3]:
import numpy as np
from PIL import Image
import os
import json

def parse_positions(positions_str):
    """
    Parses a string of positions into a list of tuples.

    Parameters:
    - positions_str (str): String representation of positions, e.g., "[(10, 10), (100, 50), (200, 100)]"

    Returns:
    - list of tuples: List of (x, y) positions.
    """
    positions = []
    positions_str = positions_str.strip()[1:-1]  # Remove the outer brackets
    for pos in positions_str.split('),'):
        pos = pos.replace('(', '').replace(')', '').strip()
        x, y = map(int, pos.split(','))
        positions.append((x, y))
    return positions

def blend_images(positions_str: str, alpha: float = 0.5, output_dir: str = './output') -> str:
    """
    Blends multiple small images by placing them on top of a larger image at specified positions without overlapping and saves the blended image.

    Parameters:
    - positions_str (str): String representation of positions, e.g., "[(20, 20), (120, 60), (220, 120), (320, 180), (420, 240), (520, 300)]"
    - alpha (float): Blending factor for transparency (0.0 to 1.0). Default is 0.5.
    - output_dir (str): Directory where the blended image will be saved. Default is './output'.

    Returns:
    - str: Path to the JSON file containing the output image information.
    """
    # Define paths to small images (example paths)
    small_images_paths = [
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/cta.jpg',
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/discover.png',
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/endframe_3.png',
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/engagement_animation_1.png',
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/engagement_instruction_1.png',
        '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/landing_endframe.jpg'
    ]

    # Parse positions string
    positions = parse_positions(positions_str)

    if len(small_images_paths) != len(positions):
        raise ValueError("The number of small images must match the number of positions.")

    # Load larger image from file path (example path)
    larger_image_path = '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/endframe_1.jpg'
    larger_image = Image.open(larger_image_path)
    larger_image_np = np.array(larger_image)
    
    # Get dimensions of the larger image
    larger_h, larger_w = larger_image_np.shape[:2]

    blended_image_np = larger_image_np.copy()

    # Blend each small image at its respective position
    for i, small_image_path in enumerate(small_images_paths):
        # Load small image from file path
        smaller_image = Image.open(small_image_path)
        smaller_image_np = np.array(smaller_image)

        # Get dimensions of the smaller image
        smaller_h, smaller_w = smaller_image_np.shape[:2]

        # Resize smaller image if necessary
        if smaller_h > larger_h or smaller_w > larger_w:
            aspect_ratio = smaller_w / smaller_h
            if smaller_h > larger_h:
                smaller_h = larger_h
                smaller_w = int(smaller_h * aspect_ratio)
            if smaller_w > larger_w:
                smaller_w = larger_w
                smaller_h = int(smaller_w / aspect_ratio)
            smaller_image = smaller_image.resize((smaller_w, smaller_h))
            smaller_image_np = np.array(smaller_image)

        x_offset, y_offset = positions[i]

        # Ensure the small image does not go out of bounds
        if x_offset + smaller_w > larger_w:
            x_offset = larger_w - smaller_w
        if y_offset + smaller_h > larger_h:
            y_offset = larger_h - smaller_h

        # Blend images
        if smaller_image_np.ndim == 2:  # Grayscale image
            smaller_image_np = np.stack([smaller_image_np] * 3, axis=-1)  # Convert to 3-channel image
        for c in range(3):
            blended_image_np[y_offset:y_offset+smaller_h, x_offset:x_offset+smaller_w, c] = (
                alpha * smaller_image_np[:, :, c] +
                (1 - alpha) * larger_image_np[y_offset:y_offset+smaller_h, x_offset:x_offset+smaller_w, c]
            )

    # Convert blended image back to PIL Image
    blended_image = Image.fromarray(blended_image_np.astype('uint8'))

    # Save blended image to output directory
    os.makedirs(output_dir, exist_ok=True)
    output_file = os.path.join(output_dir, 'blended_image.png')
    blended_image.save(output_file)

    # Create JSON response
    json_data = {
        'output_path': output_file,
        'positions': positions,
        'alpha': alpha
    }

    # Write JSON to file
    json_output_file = os.path.join(output_dir, 'output.json')
    with open(json_output_file, 'w') as f:
        json.dump(json_data, f, indent=4)

    return json_output_file


In [25]:
positions = '[(20, 20), (120, 60), (220, 120), (320, 180), (420, 240), (520, 300)]'
blend_images(positions)

'./output/output.json'

In [22]:
# import numpy as np
# from PIL import Image
# import os
# import json

# def blend_images(position: str = 'center', alpha: float = 0.5, output_dir: str = './output') -> str:
#     """
#     Blends two images by placing the smaller image on top of the larger image at a specified position and saves the blended image.

#     Parameters:
#     - position (str): Position where the smaller image will be placed on the larger image.
#                       Options are 'center', 'top-left', 'top-right', 'bottom-left', 'bottom-right'. Default is 'center'.
#     - alpha (float): Blending factor for transparency (0.0 to 1.0). Default is 0.5.
#     - output_dir (str): Directory where the blended image will be saved. Default is './output'.

#     Returns:
#     - str: Path to the JSON file containing the output image information.
#     """
#     larger_image_path = '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/endframe_1.jpg'
#     smaller_image_path = '/home/jabez_kassa/week_12_updated/Semantic-Image-and-Text-Alignment/data/Assets/015efcdd8de3698ffc4dad6dabd6664a/cta.jpg'

#     # Load images from file paths
#     larger_image = Image.open(larger_image_path)
#     smaller_image = Image.open(smaller_image_path)

#     # Convert images to NumPy arrays
#     larger_image_np = np.array(larger_image)
#     smaller_image_np = np.array(smaller_image)

#     # Get dimensions
#     larger_h, larger_w = larger_image_np.shape[:2]
#     smaller_h, smaller_w = smaller_image_np.shape[:2]

#     # Resize smaller image if necessary
#     if smaller_h > larger_h or smaller_w > larger_w:
#         aspect_ratio = smaller_w / smaller_h
#         if smaller_h > larger_h:
#             smaller_h = larger_h
#             smaller_w = int(smaller_h * aspect_ratio)
#         if smaller_w > larger_w:
#             smaller_w = larger_w
#             smaller_h = int(smaller_w / aspect_ratio)
#         smaller_image = smaller_image.resize((smaller_w, smaller_h))
#         smaller_image_np = np.array(smaller_image)

#     # Determine position
#     positions = {
#         'center': ((larger_w - smaller_w) // 2, (larger_h - smaller_h) // 2),
#         'top-left': (0, 0),
#         'top-right': (larger_w - smaller_w, 0),
#         'bottom-left': (0, larger_h - smaller_h),
#         'bottom-right': (larger_w - smaller_w, larger_h - smaller_h)
#     }

#     if position not in positions:
#         raise ValueError("Invalid position argument. Choose from 'center', 'top-left', 'top-right', 'bottom-left', 'bottom-right'")
    
#     x_offset, y_offset = positions[position]

#     # Blend images
#     blended_image_np = larger_image_np.copy()
#     for c in range(3):
#         blended_image_np[y_offset:y_offset+smaller_h, x_offset:x_offset+smaller_w, c] = (
#             alpha * smaller_image_np[:, :, c] +
#             (1 - alpha) * larger_image_np[y_offset:y_offset+smaller_h, x_offset:x_offset+smaller_w, c]
#         )

#     # Convert blended image back to PIL Image
#     blended_image = Image.fromarray(blended_image_np.astype('uint8'))

#     # Save blended image to output directory
#     os.makedirs(output_dir, exist_ok=True)
#     output_file = os.path.join(output_dir, 'blended_image.png')
#     blended_image.save(output_file)

#     # Create JSON response
#     json_data = {
#         'output_path': output_file,
#         'position': position,
#         'alpha': alpha
#     }

#     # Write JSON to file
#     json_output_file = os.path.join(output_dir, 'output.json')
#     with open(json_output_file, 'w') as f:
#         json.dump(json_data, f, indent=4)

#     return json_output_file


In [4]:
llm_config2 = {"config_list": [{"model": "gpt-4", "api_key": openai_api_key}]}
code_execution_config = {"use_docker": False}

# Initialize the assistant agent with the given configurations
config_list = [
    {"model": "gpt-4", "api_key": openai_api_key, "api_type": "openai"},
]

In [5]:
llm_config={
        "temperature": 0,
        "timeout": 600,
        "cache_seed": 42,
        "config_list": config_list,
        "functions": [
             {
                        "name": "blend_images",
                        "description": "use this function to blend the images",
                        "parameters": {
                            "type": "object",
                            "properties": {
                                "positions_str": {
                                    "type": "string",
                                    "description": "This is where you will position the blending"
                                },
                            },
                            "required": ["positions_str"]
                        }
                        },
             {
                        "name": "read_img",
                        "description": "use this to read the image blended from blended image",
                        "parameters": {
                            "type": "object",
                            "properties": {
                                "output_json": {
                                    "type": "object",
                                    "description": "This is the blended image"
                                },
                            },
                            "required": ["position"]
                        }
                        }
                        ],
}


In [6]:
import os
import autogen

from autogen import ConversableAgent

# Let's first define the assistant agent that suggests tool calls.
img_blend_assistant = autogen.AssistantAgent(
    name="image_blending_assistant",
    code_execution_config=False,
    system_message=
    
"""You are a helpful AI assistant. 
The main problems you will be solving include:
- suggest diffrent "positions" to make a good advertising 
    use this as example: "positions_str = [(10, 10), (100, 50), (200, 100), (300, 150), (400, 200), (500, 250)]"
    make sure you are giving only 6 positions
    make sure that the images will not overlap
- This are the discription of the pictures:
    1. the first position is for a picture that have a text saying 'shop now '
    2. the second position is for a picture that have a text saying 'Discover 12 unique tea flavours delivered to your door'
    3. the third position is for a picture that have a text saying 'Enjoy tea delivered to your home'
    4. the fourth position is for a picture that shows a hand pointing
    5. the fifth position is for a picture that have a text saying 'tap to get letter box delivery of tea'
    6. the sixth position is for a picture that have a text saying 'off black generation picture'

- your task:
    - considering the above discription for each picture find a way to position each picture to give a good advertaizing
    - Then modifay the position after the comment from 'img_critic_assistant'
    """
    ,
    llm_config=llm_config,
    function_map={
        "blend_images": blend_images,
        "read_img": read_img
    }
        
)

img_critic_assistant = autogen.AssistantAgent(
    name="image_blending_assistant",
    code_execution_config=False,
    system_message="You are a advertizing image critic AI assistant. "
    """You task is to critic the 'output.json' from 'img_blend_assistant'
    critic the following part 
  
                            - the first position is for a picture that have a text saying 'shop now '
                            - the second position is for a picture that have a text saying 'Discover 12 unique tea flavours delivered to your door'
                            - the third position is for a picture that have a text saying 'Enjoy tea delivered to your home'
                            - the fourth position is for a picture that shows a hand pointing
                            - the fifth position is for a picture that have a text saying 'tap to get letter box delivery of tea'
                            - the sixth position is for a picture that have a text saying 'off black generation picture'
    recomend 'img_blend_assistant' for a better advertising.
    "Return 'TERMINATE' when the task is done.""",
    llm_config=llm_config,
    function_map={
        "blend_images": blend_images,
        "read_img": read_img
    }
        
)




# The user proxy agent is used for interacting with the assistant agent
# and executes tool calls.
def termination_msg(x):
    return isinstance(x, dict) and "TERMINATE" == str(x.get("content", ""))[-9:].upper()
user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    is_termination_msg=termination_msg,
    human_input_mode="NEVER",
    code_execution_config=False
    # # is_termination_msg=lambda x: "content" in x and x["content"] is not None and x["content"].rstrip().endswith("TERMINATE"),
    # # code_execution_config={"work_dir": "planning"},
    # function_map={"blend_images": blend_images},
    # code_execution_config=False
)



groupchat = autogen.GroupChat(
    agents=[user_proxy, img_blend_assistant, img_critic_assistant],
 
    messages=[],  # The initial messages in the chat
    max_round=10,  # Maximum rounds of conversation
    select_speaker_message_template = 
    """ This is the flow of the converstion:
    1. 'user_proxy'
    2. 'img_blend_assistant'
    3. 'img_critic_assistant'
    4. 'img_blend_assistant'

"""
)

manager = autogen.GroupChatManager(
    groupchat=groupchat,
    llm_config=llm_config2
)



In [7]:
user_proxy.initiate_chat(
    manager, message="blend the images at diffrent positions")

[33muser_proxy[0m (to chat_manager):

blend the images at diffrent positions

--------------------------------------------------------------------------------
[32m
Next speaker: image_blending_assistant
[0m
[33mimage_blending_assistant[0m (to chat_manager):

[32m***** Suggested function call: blend_images *****[0m
Arguments: 
{
"positions_str": "[(10, 10), (100, 50), (200, 100), (300, 150), (400, 200), (500, 250)]"
}
[32m*************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: image_blending_assistant
[0m
[35m
>>>>>>>> EXECUTING FUNCTION blend_images...[0m
[33mimage_blending_assistant[0m (to chat_manager):

[32m***** Response from calling function (blend_images) *****[0m
./output/output.json
[32m*********************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: image_blending_

ChatResult(chat_id=None, chat_history=[{'content': 'blend the images at diffrent positions', 'role': 'assistant'}, {'content': '', 'function_call': {'arguments': '{\n"positions_str": "[(10, 10), (100, 50), (200, 100), (300, 150), (400, 200), (500, 250)]"\n}', 'name': 'blend_images'}, 'name': 'image_blending_assistant', 'role': 'assistant'}, {'content': './output/output.json', 'name': 'blend_images', 'role': 'function'}, {'content': "The images have been blended at the specified positions. Now, let's read the blended image to evaluate the result.", 'function_call': {'arguments': '{}', 'name': 'read_img'}, 'name': 'image_blending_assistant', 'role': 'assistant'}, {'content': "Error: read_img() missing 1 required positional argument: 'json_path'", 'name': 'read_img', 'role': 'function'}, {'content': "I apologize for the mistake. Let's try reading the image again with the correct parameters.", 'function_call': {'arguments': '{\n"json_path": "./output/output.json"\n}', 'name': 'read_img'}, 