In [1]:
import os
from dotenv import load_dotenv
from swarm import Swarm, Agent
import openai
import base64
from prompts import *

from Toolbox import ImageProcessingToolBoxes

# Set the environment variable for the OpenAI API key
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = api_key

# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [2]:
base64_image = encode_image('./marek-piwnicki-oSrxBxW-wZA-unsplash.jpg')
image_message = [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}",}}]}]

In [3]:
image_process = ImageProcessingToolBoxes(image_path='./marek-piwnicki-oSrxBxW-wZA-unsplash.jpg', output_dir_name='test')

In [4]:
image_process.adjust_saturation(2, reason="yes")

Adjusting saturation of image-1 to 2, generate image-2, reason: yes.


In [5]:
image_process.undo_step(reason="yes")

Undo the last operation `adjust_saturation` with parameter `2`, generate image-3, reason: yes.


In [6]:
image_process.adjust_saturation(0, reason="yes")

Adjusting saturation of image-3 to 0, generate image-4, reason: yes.


In [7]:
image_process.undo_step(reason="yes")

Undo the last operation `adjust_saturation` with parameter `0`, generate image-5, reason: yes.


In [9]:
image_process.processing_log

['Adjusting saturation of image-1 to 2, reason: yes.',
 'Undo the last operation adjust_saturation with parameter 2, reason: yes.',
 'Adjusting saturation of image-3 to 0, reason: yes.',
 'Undo the last operation adjust_saturation with parameter 0, reason: yes.']

In [9]:
image_process.adjust_saturation.tool_doc

list

In [None]:
# 定义我们可以调用的函数
def get_current_weather(location: str, unit: str = "celsius"):
    return {
        "location": location,
        "temperature": "22",
        "unit": unit,
        "description": "Sunny"
    }

# 函数的描述，用于告诉 GPT 这个函数的作用
functions = [
    {
        "name": "get_current_weather",
        "description": "Get the current weather in a given location",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "The city and state, e.g. San Francisco, CA"
                },
                "unit": {
                    "type": "string",
                    "enum": ["celsius", "fahrenheit"],
                    "description": "The unit of temperature"
                }
            },
            "required": ["location"]
        }
    }
]

# 发送请求给 OpenAI API，并让它决定是否调用函数
response = openai.ChatCompletion.create(
    model="gpt-4-0613",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What's the weather like in New York?"}
    ],
    functions=functions,
    function_call="auto"  # 让模型自动决定何时调用函数
)

# 检查 API 是否建议调用某个函数
if response.choices[0].finish_reason == "function_call":
    # 模型请求调用的函数及其参数
    function_name = response.choices[0]["message"]["function_call"]["name"]
    arguments = response.choices[0]["message"]["function_call"]["arguments"]

    # 解析函数参数 (arguments 是一个字符串，需要先将其解析为字典)
    import json
    function_args = json.loads(arguments)
    
    # 调用对应的函数
    if function_name == "get_current_weather":
        result = get_current_weather(**function_args)
        print(result)

In [3]:
client = Swarm()

image_content_analyzer = Agent(
    name="Image-Content-Analyzer",
    instructions=image_content_analyzer_prompt(),
)

image_photo_value_analyzer = Agent(
    name="Image-Photography-Value-Analyzer",
    instructions=image_photo_value_analyzer_prompt(),
)

image_emo_value_analyzer = Agent(
    name="Image-Emotional-Value-Analyzer",
    instructions=image_emo_value_analyzer_prompt(),
)

In [4]:
def transfer_to_photo_value_analyzer():
    """After analyzing the contents of the image, execute this function to go to the next analysis step.."""
    return image_photo_value_analyzer

def transfer_to_emo_value_analyzer():
    """After analyzing the contents of the image, execute this function to go to the next analysis step.."""
    return image_emo_value_analyzer

image_content_analyzer.functions.append(transfer_to_photo_value_analyzer)
image_photo_value_analyzer.functions.append(transfer_to_emo_value_analyzer)

In [5]:
messages = image_message

response = client.run(agent=image_content_analyzer, messages=messages, debug=True)

print(response.messages[-1]["content"])

[97m[[90m2024-10-25 14:56:58[97m][90m Getting chat completion for...: [{'role': 'system', 'content': 'Carefully observe and describe the scene shown in this photo.\nNo more than 100 words.\nAfter analyze, Transfer to the next analyzer.'}, {'role': 'user', 'content': [{'type': 'image_url', 'image_url': {'url': '

In [6]:
response

Response(messages=[{'content': 'A majestic snow-covered mountain peak stands against a clear blue sky. The mountain features sharp ridges and faces, with visible rocky outcroppings contrasting with the smooth snow. The bright sunlight accentuates the texture and depth of the icy surfaces.', 'refusal': None, 'role': 'assistant', 'audio': None, 'function_call': None, 'tool_calls': None, 'sender': 'Image-Content-Analyzer'}], agent=Agent(name='Image-Content-Analyzer', model='gpt-4o', instructions='Carefully observe and describe the scene shown in this photo.\nNo more than 100 words.', functions=[<function transfer_to_photo_value_analyzer at 0x10f00aef0>], tool_choice=None, parallel_tool_calls=True), context_variables={})