In [1]:
import json
import re
import pandas as pd

from IPython import get_ipython
from typing_extensions import Annotated

import autogen
from autogen import Agent, GroupChat
from autogen.cache import Cache
from dragonTextEnv import DragonTextEnv

config_list = autogen.config_list_from_json(
    "./OAI_CONFIG_LIST",
    filter_dict={"tags": ["tool"]},  # comment out to get all
)


local_llm_config = {
    "config_list": [
        {
            "model": "NotRequired",  # Loaded with LiteLLM command
            "api_key": "NotRequired",  # Not needed
            "base_url": "http://0.0.0.0:4000",  # Your LiteLLM URL
            "price": [0, 0],  # Put in price per 1K tokens [prompt, response] as free!
        }
    ],
    "cache_seed": None,  # Turns off caching, useful for testing different models
}

In [2]:
config_list

[{'model': 'gpt-4-turbo',
  'api_key': 'sk-proj-kqnj8T68RcoSelw4iqYOT3BlbkFJ0G9juPPk2du3FvdlTbro',
  'tags': ['gpt-4-turbo', 'tool']},
 {'model': 'gpt-4o-2024-08-06',
  'api_key': 'sk-proj-kqnj8T68RcoSelw4iqYOT3BlbkFJ0G9juPPk2du3FvdlTbro',
  'tags': ['gpt-4o-snapshot', 'tool']},
 {'model': 'gpt-4o-mini',
  'api_key': 'sk-proj-kqnj8T68RcoSelw4iqYOT3BlbkFJ0G9juPPk2du3FvdlTbro',
  'tags': ['gpt-4o-mini', 'tool']}]

In [3]:
env = DragonTextEnv(
    seed=42,
    include_agent_action=True,
    allow_comm=True,
    act_and_comm=True,
    tool_per_agent=2,
)

In [4]:
background_prompt_agent = "Welcome to our interactive text game! In this game, you'll assume the role of a specialist on a search and rescue team. Alongside two other players, you'll navigate a five-room environment with a mission to defuse five hidden bombs. Your call sign is {agent_id}\
The Map: Imagine a network of rooms represented by a connected graph where each node corresponds to a room, and the edges between nodes depict hallways. The rooms are numbered 0, 3, 6, 5, and 8. Room 0 is connected to all other rooms. Room 5 shares a hallway with room 6. Room 3 is linked to room 8. And room 8 is also connected with room 6. You can only travel to adjacent, directly connected rooms at each turn.\
The Challenge: Scattered among these rooms are five bombs, each coded with different phases represented by colors. To defuse them, you'll need to use the correct wire-cutting tools in the correct sequence. There are one-phase, two-phase, and three-phase bombs, needing 1, 2, or 3 color-coded tool applications in sequence to disarm. For instance, a bomb with a red-green phase sequence requires the red tool first, then the green one. Points are awarded based on the number of tools used for defusing a bomb, with each tool use worth 10 points. Your task is to maximize the team score as soon as possible. The challenge is that the bomb locations and sequences are unknown to players at the start.\
Tools: Each player is equipped with two color-coded wire cutters. Player Alpha has red and green tools, player Bravo wields green and blue, and player Charlie possesses blue and red.\
Actions: Each round, you can opt to do one of the following: 1) Move to an adjacent room, 2) Inspect a bomb's phase sequence in your current room, or 3) Apply your wire cutters to a bomb in the current room. \
Communications: In addition to selecting an action to take from the above list, you can also send communication message texts to both of your teammates in each round. The message text you sent will be shared with both of your teammates in their observation in the next round. \
Observation: While you can only see what's in your current room and read text messages from teammates. You'll also be informed of the current round number, team score and the current location of your teammates. Your teammates have the same observability as you. They will not be able to know your action and its consequences unless you explicitly communicate. \
You will be playing as Player {agent_id}. To facilitate our interaction, reply your action selection and communication messages in this fixed format: Action selection: Your action. Message to Team: “Your Message”. To move to an adjacent room, say: 'Move to Room X'. To inspect the sequence of a bomb in your current room, say: 'Inspect Bomb'. To apply a wire cutter tool, say: 'Apply X Tool'. Remember, your replies must adhere strictly to these rules. Feel free to ask clarifying questions if needed. I'll supply the necessary information as we progress. Are you ready to take on this explosive challenge?"

In [5]:
background_prompt_manager = """Welcome to the Search and Rescue Operations Command Center! As the Group Manager, your main responsibility is to provide strategic oversight with concise communication at the beginning and end of each round.

Mission Brief: Guide the team to locate and defuse five hidden bombs that are scattered in unknown rooms. Provide 'Round X Starting' and 'Round X Summary' messages.

Bomb Defusal: The exact positions of the bombs are unknown and must be discovered through inspection and room exploration. There are one-phase, two-phase, and three-phase bombs needing specific tool sequences. Alpha has red and green tools, Bravo has green and blue tools, Charlie has blue and red tools.

Responsibilities:
1. Start each round with a brief 'Round X Starting' message outlining the current situation and any key updates.
2. After agents select their actions, send a 'Round X Summary' recalling agents' choices and summarizing progress.

Action Protocol: Facilitate communication among agents to ensure they coordinate effectively on room inspections, bomb identification, and tool application.

Scoring: Aim to maximize the team score by efficiently defusing bombs with minimal tool use.

Readiness Check: Ensure agents understand the strategy and objectives before starting."""

In [6]:
# background_prompt_manager = """Welcome to the Search and Rescue Operations Command Center! As the Group Manager, your main responsibility is to provide strategic oversight with concise communication at the beginning and end of each round.

# Mission Brief: Guide the team to locate and defuse five hidden bombs that are scattered in unknown rooms. Provide 'Round X Starting' and 'Round X Summary' messages.

# Bomb Defusal: The exact positions of the bombs are unknown and must be discovered through inspection and room exploration. There are one-phase, two-phase, and three-phase bombs needing specific tool sequences. Alpha has red and green tools, Bravo has green and blue tools, Charlie has blue and red tools.

# Responsibilities:
# 1. Start each round with a brief 'Round X Starting' message outlining the current situation and any key updates.
# 2. After agents select their actions, send a 'Round X Summary' recalling agents' choices and summarizing progress. Conclude with 'TERMINATE' without any other symbols.

# Action Protocol: Facilitate communication among agents to ensure they coordinate effectively on room inspections, bomb identification, and tool application.

# Scoring: Aim to maximize the team score by efficiently defusing bombs with minimal tool use.

# Game Information: Use round updates, team score, player positions, and agents' actions to inform your messages.

# Readiness Check: Ensure agents understand the strategy and objectives before starting."""

In [7]:
logging_session_id = autogen.runtime_logging.start(config={"dbname": "logs.db"})
print("Logging session ID: " + str(logging_session_id))

Logging session ID: 118c5dd0-821a-4b5e-bade-653c6023c463


In [8]:
from pydantic import BaseModel


class DragonExpExtraction(BaseModel):
    agents_names: list[str]
    agents_actions: list[str]
    agents_comm: list[str]

In [9]:
from openai import OpenAI
import inspect

openai_kwargs = set(inspect.getfullargspec(OpenAI.__init__).kwonlyargs)
print(openai_kwargs)

{'base_url', 'api_key', 'timeout', 'http_client', 'default_query', 'default_headers', '_strict_response_validation', 'max_retries', 'organization', 'project'}


In [10]:
llm_config = {
    "temperature": 0,
    "timeout": 120,
    "config_list": config_list,
}

formatter_config = {
    "temperature": 0,
    "timeout": 120,
    "config_list": config_list,
    # "response_format": DragonExpExtraction,  # TODO for structured outputs
}
nb_agents = 3
codenames = [
    "alpha",
    "bravo",
    "charlie",
    "delta",
    "echo",
    "foxtrot",
    "golf",
    "hotel",
    "india",
    "juliett",
]


def terminate_message(x):
    tv = x.get("content", "") and (
        x.get("content", "").rstrip().endswith("TERMINATE")
        or re.search(r"Round \d+ Summary$", x.get("content", "").rstrip() is not None)
    )
    return tv


agents = []
for _, codename in enumerate(codenames[:nb_agents]):
    name = f"agent_{codename}"
    print(name)
    globals()[name] = autogen.AssistantAgent(
        name=name,
        llm_config=llm_config,
        system_message=background_prompt_agent.format(agent_id=codename),
        description=f"Agent {codename} is a specialist on a search and defuse team.",
    )
    agents.append(globals()[name])

agent_manager = autogen.AssistantAgent(
    name="agent_manager",
    llm_config=llm_config,
    system_message=background_prompt_manager,
    description="The Manager of the Search and Defuse Operations Command Center",
)
agents.append(agent_manager)

# create a UserProxyAgent instance named "user_proxy"
user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    is_termination_msg=lambda x: x.get("content", "")
    and x.get("content", "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    max_consecutive_auto_reply=10,
    code_execution_config={"work_dir": "coding", "use_docker": True},
)

agents.append(user_proxy)

formatter = autogen.AssistantAgent(
    name="formatter",
    llm_config=formatter_config,
    description="Formatter to extract experiment data",
    system_message="""You are a formatter that should report the actions and communications of the operative agents only, agent_manager not being one. The actions must be reported in the format :\
        {agents_names: list[str],
            agents_actions: list[str],
            agents_comm: list[str]}
        agents_names should be in lowercase with no spaces.
        agents_actions should be one of the following: 'go_to_node_X', 'inspect_bomb', 'apply_X_tool'.
        Answer "TERMINATE" after your task is done.""",
)

agents.append(formatter)

agent_alpha
agent_bravo
agent_charlie


In [11]:
messages = []


def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):
    """Define a customized speaker selection function.
    A recommended way is to define a transition for each speaker in the groupchat.

    Returns:
        Return an `Agent` class or a string from ['auto', 'manual', 'random', 'round_robin'] to select a default method to use.
    """
    globals()["messages"] = groupchat.messages
    if len(messages) <= 1:
        print("Selecting agent_manager")
        return agent_manager

    if (
        re.search(r"Round \w+ Summary:?", messages[-1].get("content", "").rstrip())
        is not None
    ):
        return formatter

    elif last_speaker is formatter:
        return user_proxy

    else:
        return "auto"

In [12]:
groupchat = autogen.GroupChat(
    agents=agents,
    messages=[],
    max_round=10,
    speaker_selection_method=custom_speaker_selection_func,  # TODO needs tuning to properly give the manager the first turn and formatter the last
)
gc_manager = autogen.GroupChatManager(
    name="groupchat_manager",
    groupchat=groupchat,
    llm_config=llm_config,
    is_termination_msg=lambda x: x.get("content", "")
    and x.get("content", "").rstrip().endswith("TERMINATE"),
)

In [13]:
with Cache.disk() as cache:
    user_proxy.initiate_chat(
        gc_manager,
        message="Agent_manager initiates a new game session and agents procede to play the game.",
    )

[33muser_proxy[0m (to groupchat_manager):

Agent_manager initiates a new game session and agents procede to play the game.

--------------------------------------------------------------------------------
Selecting agent_manager
[32m
Next speaker: agent_manager
[0m
[33magent_manager[0m (to groupchat_manager):

Round 1 Starting:
Welcome, team. Our mission is to locate and defuse five hidden bombs scattered across various rooms. Each agent has specific tools, so coordination is crucial. Let's start by exploring and inspecting rooms to identify the bombs. Stay alert and communicate effectively. Good luck, everyone. Let's begin the operation.

--------------------------------------------------------------------------------
[32m
Next speaker: agent_alpha
[0m
[33magent_alpha[0m (to groupchat_manager):

Action selection: Move to Room 3.
Message to Team: “Heading to Room 3 to check for bombs. Let's spread out and inspect different rooms. Report any bomb sequences you find.”

--------

In [14]:
autogen.runtime_logging.stop()

### Logging processing

In [15]:
def get_log(dbname="logs.db", table="chat_completions"):
    import sqlite3

    con = sqlite3.connect(dbname)
    query = f"SELECT * from {table}"
    cursor = con.execute(query)
    rows = cursor.fetchall()
    column_names = [description[0] for description in cursor.description]
    data = [dict(zip(column_names, row)) for row in rows]
    con.close()
    return data

In [16]:
def str_to_dict(s):
    return json.loads(s)


log_data = get_log()
log_data_df = pd.DataFrame(log_data)

log_data_df["total_tokens"] = log_data_df.apply(
    lambda row: str_to_dict(row["response"])["usage"]["total_tokens"], axis=1
)

log_data_df["request"] = log_data_df.apply(
    lambda row: str_to_dict(row["request"])["messages"][0]["content"], axis=1
)

log_data_df["response"] = log_data_df.apply(
    lambda row: str_to_dict(row["response"])["choices"][0]["message"]["content"], axis=1
)

log_data_df

Unnamed: 0,id,invocation_id,client_id,wrapper_id,session_id,source_name,request,response,is_cached,cost,start_time,end_time,total_tokens
0,1,27ce45ea-077c-41d5-a80f-307f45c7c5b6,126558296521120,126558296813376,7dde265c-0b6b-465c-a2c5-b757f7f166f3,agent_manager,Welcome to the Search and Rescue Operations Co...,"Round 1 Starting:\nWelcome, team. Our mission ...",1,0.00469,2024-08-21 07:13:24.293604,2024-08-21 07:13:24.293902,335
1,2,aea63ab3-4b3e-4a8b-bdff-eb7d0dd113b1,126558296030464,126558295915392,7dde265c-0b6b-465c-a2c5-b757f7f166f3,speaker_selection_agent,You are in a role play game. The following rol...,agent_alpha,1,0.00335,2024-08-21 07:13:24.351304,2024-08-21 07:13:24.351541,331
2,3,ae8e0f9b-1b5b-4881-815a-77ae7ca25f99,126558297086224,126558297100688,7dde265c-0b6b-465c-a2c5-b757f7f166f3,agent_alpha,Welcome to our interactive text game! In this ...,Action selection: Move to Room 3.\nMessage to ...,1,0.00874,2024-08-21 07:13:24.365694,2024-08-21 07:13:24.366049,794
3,4,98b5771f-2cbc-4ed9-ae5e-ecb8970e13c0,126558295662848,126558295586848,7dde265c-0b6b-465c-a2c5-b757f7f166f3,speaker_selection_agent,You are in a role play game. The following rol...,agent_bravo,1,0.00385,2024-08-21 07:13:24.415190,2024-08-21 07:13:24.415366,379
4,5,2de280cc-0b84-4409-bb22-c076a1ad4348,126558296628816,126558297100832,7dde265c-0b6b-465c-a2c5-b757f7f166f3,agent_bravo,Welcome to our interactive text game! In this ...,Action selection: Move to Room 5.\nMessage to ...,1,0.00917,2024-08-21 07:13:24.428711,2024-08-21 07:13:24.428889,841
5,6,343ab784-644a-438a-b025-41d22ce4ebad,126558295789680,126558295721872,7dde265c-0b6b-465c-a2c5-b757f7f166f3,speaker_selection_agent,You are in a role play game. The following rol...,agent_charlie,1,0.00431,2024-08-21 07:13:24.479043,2024-08-21 07:13:24.479201,425
6,7,4bed783e-ca69-4dd3-9aef-42b10f1ec306,126558296398864,126563592232816,7dde265c-0b6b-465c-a2c5-b757f7f166f3,agent_charlie,Welcome to our interactive text game! In this ...,Action selection: Move to Room 6.\nMessage to ...,1,0.00957,2024-08-21 07:13:24.503811,2024-08-21 07:13:24.504294,885
7,8,7ac4cb5b-2c5b-4acd-8141-625fddbab491,126558295792272,126558295312512,7dde265c-0b6b-465c-a2c5-b757f7f166f3,speaker_selection_agent,You are in a role play game. The following rol...,agent_manager,1,0.00472,2024-08-21 07:13:24.552549,2024-08-21 07:13:24.552690,468
8,9,3a39badf-07bb-49f4-92b1-0ba4838b45ae,126558296521120,126558296813376,7dde265c-0b6b-465c-a2c5-b757f7f166f3,agent_manager,Welcome to the Search and Rescue Operations Co...,Round 1 Summary:\n- Agent Alpha moved to Room ...,1,0.00752,2024-08-21 07:13:24.565662,2024-08-21 07:13:24.565824,568
9,10,51109fec-f604-4b08-9086-c0dd22cd8f20,126558296275792,126558297100736,7dde265c-0b6b-465c-a2c5-b757f7f166f3,formatter,You are a formatter that should report the act...,"{\n agents_names: [""Agent_manager"", ""Agent_...",0,0.01104,2024-08-21 07:13:24.588047,2024-08-21 07:13:32.376801,636


### Computing cost

In [17]:
# Sum total tokens for all sessions
try:
    total_tokens = log_data_df["total_tokens"].sum()
    # Total tokens for specific session
    session_tokens = log_data_df[log_data_df["session_id"] == logging_session_id][
        "total_tokens"
    ].sum()
except KeyError:
    total_tokens = "unknown"
    session_tokens = "unknown"

# Sum total cost for all sessions
total_cost = log_data_df["cost"].sum()


session_cost = log_data_df[log_data_df["session_id"] == logging_session_id][
    "cost"
].sum()

print(
    "Total tokens for all sessions: "
    + str(total_tokens)
    + ", total cost: "
    + str(round(total_cost, 4))
    + "$"
)
print(
    "Total tokens for session "
    + str(logging_session_id)
    + ": "
    + str(session_tokens)
    + ", cost: "
    + str(round(session_cost, 4))
    + "$"
)

Total tokens for all sessions: 33799, total cost: 0.394$
Total tokens for session 118c5dd0-821a-4b5e-bade-653c6023c463: 5606, cost: 0.0644$


In [18]:
import json

data = json.loads(log_data_df.iloc[-1]["response"].replace("TERMINATE", ""))
data

{'agents_names': ['agent_alpha', 'agent_bravo', 'agent_charlie'],
 'agents_actions': ['go_to_node_3', 'go_to_node_5', 'go_to_node_6'],
 'agents_comm': ["Heading to Room 3 to check for bombs. Let's spread out and inspect different rooms. Report any bomb sequences you find.",
  "Moving to Room 5 to search for bombs. Will update if I find anything. Let's keep the communication open.",
  "I'm going to Room 6 to look for bombs. Let's coordinate our findings and tool uses efficiently."]}

In [25]:
actions = messages[-1].get("content", "").replace("TERMINATE", "")
json.loads(actions)

{'agents_names': ['agent_alpha', 'agent_bravo', 'agent_charlie'],
 'agents_actions': ['go_to_node_3', 'go_to_node_5', 'go_to_node_6'],
 'agents_comm': ["Heading to Room 3 to check for bombs. Let's spread out and inspect different rooms. Report any bomb sequences you find.",
  "Moving to Room 5 to search for bombs. Will update if I find anything. Let's keep the communication open.",
  "I'm going to Room 6 to look for bombs. Let's coordinate our findings and tool uses efficiently."]}