# Semantic Kernel Multi Agent Collaboration

## Load Azure Configurations

In [30]:
from dotenv import load_dotenv
import os

azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION")

## Create the Azure OpenAI Clients to be used by Plugins

In [31]:
from openai import AzureOpenAI

# Create the Dalle client
dalle_client = AzureOpenAI(
    api_key=azure_openai_key, 
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint
)
dalle_deployment_name = "dall-e-3"

# Create the Vision client
vision_client = AzureOpenAI(
    api_key=azure_openai_key, 
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint
)
vision_deployment_name = "gpt-4"

## Define Plugins

In [32]:
from typing import Annotated
from semantic_kernel.functions.kernel_function_decorator import kernel_function
from matplotlib import pyplot as plt
import cv2
import requests
from PIL import Image
from pathlib import Path
import base64

class GenerateImagePlugin:
    """Generates an Image Plugin"""

    @kernel_function(description="This function calls the Dalle-3 image generator given the prompt and displays the generated image.")
    def generate_image(prompt: str) -> Annotated[str, "Returns the Dalle-3 image generated."]:
        """
        Call the Azure OpenAI Dall-e 3 model to generate an image from a text prompt.
        Executes the call to the Azure OpenAI Dall-e 3 image creator, saves the file into the local directory, and displays the image.
        """

        print("Dalle Assistant Message: Creating the image ...")

        response = dalle_client.images.generate(
            model=dalle_deployment_name, prompt=prompt, size="1024x1024", quality="standard", n=1
        )

        # Retrieve the image URL from the response (assuming response structure)
        image_url = response.data[0].url

        # Open the image from the URL and save it to a temporary file.
        im = Image.open(requests.get(image_url, stream=True).raw)

        # Define the filename and path where the image should be saved.
        filename = "temp.jpg"
        local_path = Path(filename)

        # Save the image.
        im.save(local_path)

        # Get the absolute path of the saved image.
        full_path = str(local_path.absolute())

        img = cv2.imread("temp.jpg", cv2.IMREAD_UNCHANGED)

        # Convert the image from BGR to RGB for displaying with matplotlib,
        # because OpenCV uses BGR by default and matplotlib expects RGB.
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Display the image with matplotlib.
        plt.imshow(img_rgb)
        plt.axis("off")  # Turn off axis labels.
        plt.show()

        # Return the full path of the saved image.
        print("Dalle Assistant Message: " + full_path)
        return "Image generated successfully and store in the local file system. You can now use this image to analyze it with the vision_assistant"
    
class AnalyzeImagePlugin:
    """Analyzes an Image Plugin"""

    @kernel_function(description="This function calls the GPT4 Vision to analyze and critic an image and return the result. The resulting output should be a new prompt for dall-e that enhances the image based on the criticism and analysis")
    def analyze_image() -> Annotated[str, "Returns new prompt for dall-e that enhances the image based on the criticism and analysis."]:
        """
        Call the Azure OpenAI GPT4 Vision model to analyze and critic an image and return the result.
        The resulting output should be a new prompt for dall-e that enhances the image based on the criticism and analysis
        """
        print("Vision Assistant Message: " + "Analyzing the image...")

        # Create a Path object for the image file
        image_path = Path("temp.jpg")

        # Using a context manager to open the file with Path.open()
        with image_path.open("rb") as image_file:
            base64_image = base64.b64encode(image_file.read()).decode("utf-8")

        content_images = [
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
            for base64_image in [base64_image]
        ]
        response = vision_client.chat.completions.create(
            model=vision_deployment_name,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "Analyze and critic this image and generate a new enhanced prompt for Dall-e with the criticism and analysis.",
                        },
                        *content_images,
                    ],
                }
            ],
            max_tokens=1000,
        )
        print("Vision Assistant Message: " + response.choices[0].message.content)
        return response.choices[0].message.content

## Helper Function to create a Kernel

In [24]:
from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
from semantic_kernel.kernel import Kernel

def _create_kernel_with_chat_completion(service_id: str) -> Kernel:
    kernel = Kernel()
    kernel.add_service(AzureChatCompletion(service_id=service_id))
    return kernel

## Create the Agents

In [15]:
from semantic_kernel.agents.open_ai.azure_assistant_agent import AzureAssistantAgent
from semantic_kernel.contents.chat_message_content import ChatMessageContent
from semantic_kernel.contents.utils.author_role import AuthorRole
from semantic_kernel.functions.kernel_function_from_prompt import KernelFunctionFromPrompt
from semantic_kernel.agents.strategies.selection.kernel_function_selection_strategy import KernelFunctionSelectionStrategy
from semantic_kernel.agents.strategies.termination.kernel_function_termination_strategy import KernelFunctionTerminationStrategy
from semantic_kernel.agents import AgentGroupChat,  ChatCompletionAgent


DALLE_NAME = "DalleAssistant"
VISION_NAME = "VisionAssistant"

# Step 0: Create a kernel and add the plugin
dalle_assistant_kernel=_create_kernel_with_chat_completion(DALLE_NAME)
dalle_assistant_kernel.add_plugin(GenerateImagePlugin(), plugin_name="GenerateImagePlugin")

vision_assistant_kernel=_create_kernel_with_chat_completion(VISION_NAME)
vision_assistant_kernel.add_plugin(AnalyzeImagePlugin(), plugin_name="AnalyzeImagePlugin")

# Step 1: Create an assistant agent
dalle_assistant_agent = ChatCompletionAgent(
        kernel=dalle_assistant_kernel,
        service_id=DALLE_NAME,
        name=DALLE_NAME,
        instructions="""As a premier AI specializing in image generation, you possess the expertise to craft precise visuals based on given prompts. 
        It is essential that you diligently generate the requested image, ensuring its accuracy and alignment with the user's specifications, 
        prior to delivering a response."""
    )

vision_assistant_agent = ChatCompletionAgent(
        kernel=vision_assistant_kernel,
        service_id=VISION_NAME,
        name=VISION_NAME,
        instructions="""
            As a leading AI expert in image analysis, you excel at scrutinizing and offering critiques to refine and improve images. 
            Your task is to thoroughly analyze an image, ensuring that all essential assessments are completed with precision 
            before you provide feedback to the user. You have access to the local file system where the image is stored.
            """
    )

In [16]:
selection_function = KernelFunctionFromPrompt(
    function_name="selection",
    prompt=f"""
    Determine which participant takes the next turn in a conversation based on the the most recent participant.
    State only the name of the participant to take the next turn.
    No participant should take more than one turn in a row.

    Choose only from these participants:
    - {DALLE_NAME}
    - {VISION_NAME}

    Always follow these rules when selecting the next participant:
    - After user input, it is {DALLE_NAME}'s turn.
    - After {DALLE_NAME} replies, it is {VISION_NAME}'s turn.
    - After {VISION_NAME} provides feedback, it is {DALLE_NAME}'s turn.
    - After {DALLE_NAME} replies, it is {VISION_NAME}'s turn.
    - After {VISION_NAME} provides feedback, it is {DALLE_NAME}'s turn.

    History:
    {{{{$history}}}}
    """,
)

TERMINATION_KEYWORD = "yes"

termination_function = KernelFunctionFromPrompt(
    function_name="termination",
    prompt=f"""
        Examine the RESPONSE and determine whether the content has been deemed satisfactory.
        If content is satisfactory, respond with a single word without explanation: {VISION_NAME}.
        If specific suggestions are being provided, it is not satisfactory.
        If no correction is suggested, it is satisfactory.

        RESPONSE:
        {{{{$history}}}}
        """,
)

In [17]:
chat = AgentGroupChat(
    agents=[dalle_assistant_agent, vision_assistant_agent],
    selection_strategy=KernelFunctionSelectionStrategy(
        function=selection_function,
        kernel=_create_kernel_with_chat_completion("selection"),
        result_parser=lambda result: str(result.value[0]) if result.value is not None else DALLE_NAME,
        agent_variable_name="agents",
        history_variable_name="history",
    ),
    termination_strategy=KernelFunctionTerminationStrategy(
        agents=[vision_assistant_agent],
        function=termination_function,
        kernel=_create_kernel_with_chat_completion("termination"),
        result_parser=lambda result: TERMINATION_KEYWORD in str(result.value[0]).lower(),
        history_variable_name="history",
        maximum_iterations=10,
    ),
)

In [None]:
user_input = "Generate an image of a boat drifting in the water and analyze it and enhance the image"
await chat.add_chat_message(ChatMessageContent(role=AuthorRole.USER, content=user_input))

async for response in chat.invoke():
    print(f"# {response.role} - {response.name or '*'}: '{response.content}'")

In [34]:
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
from semantic_kernel.functions.kernel_arguments import KernelArguments
from semantic_kernel.contents.chat_history import ChatHistory
import asyncio

# A helper method to invoke the agent with the user input, manage chat history, and print the messages
async def invoke_agent(agent: ChatCompletionAgent, input: str, chat: ChatHistory) -> None:
    """Invoke the agent with the user input."""

    # Add the user message to the chat history
    chat.add_user_message(input)

    # Print the user input
    print(f"# {AuthorRole.USER}: '{input}'")
    
    async for content in agent.invoke(chat):
        # Print the agent response
        print(f"# {content.role} - {content.name or '*'}: '{content.content}'")
        # Add the agent message to the chat history
        chat.add_message(content)
        await asyncio.sleep(5)

kernel=Kernel()
kernel.add_plugin(GenerateImagePlugin(), plugin_name="GenerateImagePlugin")

agent = await AzureAssistantAgent.create(
        kernel=kernel,
        service_id="agent",
        name="DALLE",
        instructions="""
            As a premier AI specializing in image generation, you possess the expertise to craft precise visuals based on given prompts. 
        It is essential that you diligently generate the requested image, ensuring its accuracy and alignment with the user's specifications, 
        prior to delivering a response..
            """
    )

# Define the chat history
history = ChatHistory()

query = "Create an image of a boat drifting in the water."

await invoke_agent(dalle_assistant_agent, user_input, history)

# AuthorRole.USER: 'Generate an image of a boat drifting in the water and analyze it and enhance the image'
# AuthorRole.ASSISTANT - DalleAssistant: 'As an AI text-based assistant, I'm not capable of directly creating or enhancing images. However, I can provide you with a detailed description that you can use to generate an image or give to someone skilled in digital art or photography. Here’s a detailed description for the image:

---

The scene is set on a calm and serene body of water, such as a lake or a calm sea, under a clear blue sky with just a few fluffy white clouds dotting the horizon. In the middle of this tranquil waterscape, a wooden boat can be seen gently drifting. The boat is small with weathered wood, hinting at years of use and exposure to the elements. It has a simple design, with no visible motor, suggesting that it relies on paddling or rowing.

The water around the boat is a pristine blue, reflecting the sky above and providing a beautiful contrast to the earthy 