In [10]:
from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
import autogen
import replicate
import requests
from datetime import datetime
import os

# Load the configuration list from a JSON file or environment variable
# config_list = config_list_from_json(env_or_file="OPENAI_CONFIG_LIST")
os.environ["REPLICATE_API_TOKEN"] = ""
config_list = [
    {
        "model": "gpt-4o",
        "api_key": "",
        "tags": ["gpt-4o", "tool"]
    }
]

# Create a configuration dictionary for the Language Model (LLM)
llm_config = {
    "config_list": config_list,
    "timeout": 120
}


In [11]:
import openai
import requests
from datetime import datetime

openai.api_key = os.getenv("OPENAI_API_KEY")

In [12]:
def img_review(image_path, prompt):
    with open(image_path, "rb") as image_file:
        image_data = image_file.read()
        encoded_image = base64.b64encode(image_data).decode('utf-8')

    response = openai.ChatCompletion.create(
        model="gpt-4o",  # Assuming there's a vision-capable GPT-4 model
        messages=[
            {"role": "system", "content": "You are an AI image critic."},
            {"role": "user", "content": f"Please provide a description of the image and then rate, on a scale of 1 to 10, how closely the image aligns with the provided description. {prompt}?"},
            {"role": "user", "content": f"Image data: {encoded_image}"}
        ]
    )

    review = response['choices'][0]['message']['content']
    return review

In [13]:
def text_to_image_generation(prompt):
    response = openai.Image.create(
        model="gpt-4o",  # Using GPT-4 for text-to-image generation
        prompt=prompt,
        n=1,
        size="1024x1024"  # Define the size of the generated image
    )

    if response and 'data' in response:
        image_url = response['data'][0]['url']
        print(f"Generated image for '{prompt}': {image_url}")

        current_time = datetime.now().strftime("%Y%m%d%H%M%S")
        shortened_prompt = prompt[:50]
        filename = f"data/images/{shortened_prompt}_{current_time}.png"

        response = requests.get(image_url)
        if response.status_code == 200:
            with open(filename, "wb") as file:
                file.write(response.content)
            return f"Image saved as '{filename}'"
        else:
            return "The image could not be successfully downloaded and saved."
    else:
        return "The image generation process was unsuccessful."

In [14]:
llm_config_assistants = {
    "functions": [
        {
            "name": "text_to_image_generation",
            "description": "Utilize the most recent AI model to create an image using a given prompt and provide the file path to the generated image.",
            "parameters": {
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "A detailed textual prompt that provides a description of the image to be generated.",
                    }
                },
                "required": ["prompt"],
            },
        },
        {
            "name": "image_review",
            "description": "Examine and assess the image created by AI according to the initial prompt, offering feedback and recommendations for enhancement.",
            "parameters": {
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "The original input text that served as the prompt for generating the image.",
                    },
                    "image_path": {
                        "type": "string",
                        "description": "The complete file path for the image, including both the directory path and the file extension.",
                    }
                },
                "required": ["prompt", "image_path"],
            },
        },
    ],
    "config_list": config_list,
    "timeout": 120
}

img_gen_assistant = AssistantAgent(
    name="text_to_img_prompt_expert",
    system_message="As an expert in text-to-image AI models, you will utilize the 'text_to_image_generation' function to create an image based on the given prompt and iterate on the prompt, incorporating feedback until it achieves a perfect rating of 10/10.",
    llm_config=llm_config_assistants,
    function_map={
        "image_review": img_review,
        "text_to_image_generation": text_to_image_generation
    }
)

img_critic_assistant = AssistantAgent(
    name="img_critic",
    system_message="In the role of an AI image critic, your task is to employ the 'image_review' function to evaluate the image generated by the 'text_to_img_prompt_expert' using the original prompt. You will then offer feedback on how to enhance the prompt for better image generation.",
    llm_config=llm_config_assistants,
    function_map={
        "image_review": img_review,
        "text_to_image_generation": text_to_image_generation
    }
)

user_proxy = UserProxyAgent(
    name="user_proxy",
    human_input_mode="ALWAYS",
    code_execution_config={"use_docker": False},
)


In [15]:
groupchat = autogen.GroupChat(
    agents=[user_proxy, img_gen_assistant, img_critic_assistant],
    messages=[],  # The initial messages in the chat
    max_round=10  # Maximum rounds of conversation
)

manager = autogen.GroupChatManager(
    groupchat=groupchat,
    llm_config=llm_config
)

user_proxy.initiate_chat(
    manager, message="Generate a photorealistic image of a corgi riding a skateboard."
)


[33muser_proxy[0m (to chat_manager):

Generate a photorealistic image of a corgi riding a skateboard.

--------------------------------------------------------------------------------
[32m
Next speaker: text_to_img_prompt_expert
[0m
[33mtext_to_img_prompt_expert[0m (to chat_manager):

[32m***** Suggested function call: text_to_image_generation *****[0m
Arguments: 
{"prompt":"A photorealistic image of a corgi riding a skateboard"}
[32m*************************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: img_critic
[0m
[35m
>>>>>>>> EXECUTING FUNCTION text_to_image_generation...[0m
[33mimg_critic[0m (to chat_manager):

[32m***** Response from calling function (text_to_image_generation) *****[0m
Error: ReplicateError Details:
title: Free time limit reached
status: 402
detail: You have reached the free time limit. To continue using Replicate, set up billing at https://replicate

ChatResult(chat_id=None, chat_history=[{'content': 'Generate a photorealistic image of a corgi riding a skateboard.', 'role': 'assistant'}, {'content': '', 'function_call': {'arguments': '{"prompt":"A photorealistic image of a corgi riding a skateboard"}', 'name': 'text_to_image_generation'}, 'name': 'text_to_img_prompt_expert', 'role': 'assistant'}, {'content': 'Error: ReplicateError Details:\ntitle: Free time limit reached\nstatus: 402\ndetail: You have reached the free time limit. To continue using Replicate, set up billing at https://replicate.com/account/billing#billing.', 'name': 'text_to_image_generation', 'role': 'function'}, {'content': '', 'role': 'assistant'}], summary='', cost={'usage_including_cached_inference': {'total_cost': 0}, 'usage_excluding_cached_inference': {'total_cost': 0}}, human_input=['', 'exit'])