In [1]:
import os
import json
import re
import numpy as np
import pandas as pd

from IPython import get_ipython
from typing_extensions import Annotated

import autogen
import openai
from autogen import Agent, GroupChat
from autogen.cache import Cache
from dragonTextEnv import DragonTextEnv

config_list = autogen.config_list_from_json(
    "./OAI_CONFIG_LIST",
    filter_dict={"tags": ["tool"]},  # comment out to get all
)

local_llm_config = {
    "config_list": [
        {
            "model": "NotRequired",  # Loaded with LiteLLM command
            "api_key": "NotRequired",  # Not needed
            "base_url": "http://0.0.0.0:4000",  # Your LiteLLM URL
            "price": [0, 0],  # Put in price per 1K tokens [prompt, response] as free!
        }
    ],
    "cache_seed": None,  # Turns off caching, useful for testing different models
}

In [2]:
config_list

[{'model': 'gpt-4-turbo',
  'api_key': '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',
  'tags': ['gpt-4-turbo', 'tool']},
 {'model': 'gpt-4o-2024-08-06',
  'api_key': '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',
  'tags': ['gpt-4o-snapshot', 'tool']},
 {'model': 'gpt-4o-mini',
  'api_key': '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',
  'tags': ['gpt-4o-mini', 'tool']}]

In [3]:
env = DragonTextEnv(
    seed=42,
    include_agent_action=True,
    allow_comm=True,
    act_and_comm=True,
    tool_per_agent=2,
)

initial_node = env.env.agents["alpha"].node.id

In [4]:
# background_prompt_agent = (
#     "Welcome to our interactive text game! In this game, you'll assume the role of a specialist on a search and rescue team.\n"
#     "Alongside two other players, you'll navigate a five-room environment with a mission to defuse five hidden bombs.\n"
#     "Your call sign is {agent_id}.\n"
#     "The Map: Imagine a network of rooms represented by a connected graph where each node corresponds to a room, and the edges between nodes depict hallways.\n"
#     "The rooms are numbered 0, 3, 6, 5, and 8. Room 0 is connected to all other rooms. Room 5 shares a hallway with room 6.\n"
#     "Room 3 is linked to room 8. And room 8 is also connected with room 6. You can only travel to adjacent, directly connected rooms at each turn.\n"
#     "The Challenge: Scattered among these rooms are five bombs, each coded with different phases represented by colors.\n"
#     "To defuse them, you'll need to use the correct wire-cutting tools in the correct sequence.\n"
#     "There are one-phase, two-phase, and three-phase bombs, needing 1, 2, or 3 color-coded tool applications in sequence to disarm.\n"
#     "For instance, a bomb with a red-green phase sequence requires the red tool first, then the green one.\n"
#     "Points are awarded based on the number of tools used for defusing a bomb, with each tool use worth 10 points.\n"
#     "Your task is to maximize the team score as soon as possible. The challenge is that the bomb locations and sequences are unknown to players at the start.\n"
#     "Tools: Each player is equipped with two color-coded wire cutters. Player Alpha has red and green tools, player Bravo wields green and blue, and player Charlie possesses blue and red.\n"
#     "Actions: Each round, you can opt to do one of the following: 1) Move to an adjacent room, 2) Inspect a bomb's phase sequence in your current room, or 3) Apply your wire cutters to a bomb in the current room.\n"
#     "Communications: In addition to selecting an action to take from the above list, you can also send communication message texts to both of your teammates in each round.\n"
#     "The message text you sent will be shared with both of your teammates in their observation in the next round.\n"
#     "Observation: While you can only see what's in your current room and read text messages from teammates, you'll also be informed of the current round number, team score and the current location of your teammates.\n"
#     "Your teammates have the same observability as you. They will not be able to know your action and its consequences unless you explicitly communicate.\n"
#     "You will be playing as Player {agent_id}. To facilitate our interaction, reply your action selection and communication messages in this fixed format:\n"
#     "Action selection: Your action.\n"
#     # "Message to Team: 'Your Message'.\n"
#     "To move to an adjacent room, say: 'Move to Room X'.\n"
#     "To inspect the sequence of a bomb in your current room, say: 'Inspect Bomb'.\n"
#     "To apply a wire cutter tool, say: 'Apply X Tool'.\n"
#     "Remember, your replies must adhere strictly to these rules.\n"
# )

In [5]:
background_prompt_agent = (
    "Welcome to our interactive text game! In this game, you'll assume the role of a specialist on a search and rescue team.\n"
    "Alongside two other players, you'll navigate a five-room environment with a mission to defuse five hidden bombs.\n"
    "Your call sign is {agent_id}.\n"
    "The Map: Picture a network of rooms connected like a graph. Each room is a node, and hallways are the edges connecting them.\n"
    "The rooms are numbered 0, 3, 6, 5, and 8. Room 0 connects to all other rooms. Room 5 is connected to room 6.\n"
    "Room 3 is linked to room 8, and room 8 is also connected to room 6. You can only move to directly connected rooms each turn.\n"
    "The Challenge: Five bombs are hidden in these rooms, each with phases shown by colors.\n"
    "To defuse a bomb, you need to use the right tools in the correct order.\n"
    "There are one-phase, two-phase, and three-phase bombs, needing 1, 2, or 3 color-coded tool applications in sequence to disarm.\n"
    "For example, a bomb with a red-green phase sequence needs the red tool first, then the green one.\n"
    "Points are awarded based on the number of tools used to defuse a bomb, with each tool use worth 10 points.\n"
    "Your goal is to maximize the team score as quickly as possible. The challenge is that the bomb locations and sequences are unknown to players at the start.\n"
    "Tools: Each player has two color-coded wire cutters. Player Alpha has red and green tools, Player Bravo has green and blue, and Player Charlie has blue and red.\n"
    "Actions: Each round, you MUST choose to do one of the following: 1) Move to an adjacent room, 2) Inspect a bomb's phase sequence if you find a bomb in your current room, or 3) Apply your wire cutters to a bomb in the current room.\n"
    "Note: Moving to a room and inspecting a bomb are two different actions that must be done on different turns.\n"
    "Communications: You can also send a message to your teammates each round.\n"
    "Communicating on the tools you own can help the team perform better.\n"
    "Observation: You can only see what's in your current room and read messages from your teammates. You'll also know the current round number, team score, and the current location of your teammates.\n"
    "You are Player {agent_id}. To interact, reply with your action selection and communication messages in this format:\n"
    "Action selection: Your action.\n"
    "To move to an adjacent room, say: 'Move to Room X'.\n"
    "To inspect the sequence of a bomb in your current room, say: 'Inspect Bomb'.\n"
    "To apply a wire cutter tool, say: 'Apply X Tool'.\n"
    "Remember, your replies must follow these rules exactly.\n"
    "Do NOT send messages to your teammates.\n"
)

In [6]:
background_prompt_manager = """Welcome to the Search and Rescue Operations Command Center! You are the Group Manager, and your main job is to guide the team by giving clear messages at the start and end of each round.

Mission Brief: The team must find and defuse five bombs hidden in different rooms. These bombs can be one-phase, two-phase, or three-phase, each needing specific tools. Alpha has red and green tools, Bravo has green and blue tools, and Charlie has blue and red tools.

Responsibilities:
1. At the start of each round, give a simple 'Round X Starting' message that explains the situation and any important updates.
2. After the agents choose their actions, send a 'Round X Summary' to remind them of their choices and sum up the progress.

Bomb Defusal: The bombs are hidden and must be found by inspecting rooms. When moving to a room, you may find a bomb. If you find one, you need to inspect it to get the defusing sequence. Each bomb type requires a different tool sequence to defuse. For example, a one-phase bomb needs one tool, while a three-phase bomb requires all three tools in a specific order.

Action Protocol: Moving to a room and inspecting a room are two different actions that must be done on different turns. Agents should communicate clearly, focusing on the facts of what they see and do, without instructing others on what actions to take. Cooperation is key, but each agent should decide on their own actions based on shared information.

Scoring: The goal is to get the highest team score by defusing bombs quickly and using the fewest tools. The score depends on how many bombs are defused, how efficiently the tools are used, and how fast the team works.

Readiness Check: Before starting, make sure the agents understand the plan, their roles, and what they need to do. You might ask them questions or confirm they know which tools they have and what their tasks are."""

In [7]:
# background_prompt_manager = """Welcome to the Search and Rescue Operations Command Center! As the Group Manager, your main responsibility is to provide strategic oversight with concise communication at the beginning and end of each round.

# Mission Brief: Guide the team to locate and defuse five hidden bombs that are scattered in unknown rooms. Provide 'Round X Starting' and 'Round X Summary' messages.

# Bomb Defusal: The exact positions of the bombs are unknown and must be discovered through inspection and room exploration. There are one-phase, two-phase, and three-phase bombs needing specific tool sequences. Alpha has red and green tools, Bravo has green and blue tools, Charlie has blue and red tools.

# Responsibilities:
# 1. Start each round with a brief 'Round X Starting' message outlining the current situation and any key updates.
# 2. After agents select their actions, send a 'Round X Summary' recalling agents' choices and summarizing progress. Conclude with 'TERMINATE' without any other symbols.

# Action Protocol: Facilitate communication among agents to ensure they coordinate effectively on room inspections, bomb identification, and tool application.

# Scoring: Aim to maximize the team score by efficiently defusing bombs with minimal tool use.

# Game Information: Use round updates, team score, player positions, and agents' actions to inform your messages.

# Readiness Check: Ensure agents understand the strategy and objectives before starting."""

In [8]:
DB_NAME = os.path.join(os.getcwd(), "dragonexp_autogen_data/logs.db")

logging_session_id = autogen.runtime_logging.start(
    config={
        "dbname": DB_NAME,
    }
)
print("Logging session ID: " + str(logging_session_id))

Logging session ID: db4bb8f1-b80f-47f9-9382-7bebf0b17494


In [9]:
llm_config = {
    "temperature": 0,
    "timeout": 120,
    "config_list": config_list,
}

formatter_config = {
    "temperature": 0,
    "timeout": 120,
    "config_list": config_list,
    # "response_format": DragonExpExtraction,  # TODO for structured outputs
}

codenames = [
    "alpha",
    "bravo",
    "charlie",
    "delta",
    "echo",
    "foxtrot",
    "golf",
    "hotel",
    "india",
    "juliett",
]

In [10]:
import logging

# logging.getLogger("autogen.oai.client").setLevel(logging.ERROR)

In [11]:
nb_agents = 3
agents = []
chat_agents = {}
for _, codename in enumerate(codenames[:nb_agents]):
    name = f"agent_{codename}"
    globals()[name] = autogen.AssistantAgent(
        name=name,
        llm_config=llm_config,
        system_message=background_prompt_agent.format(agent_id=codename),
        description=f"Agent {codename} is a specialist on a search and defuse team.",
    )
    agents.append(globals()[name])
    chat_agents[codename] = globals()[name]


agent_manager = autogen.AssistantAgent(
    name="manager",
    llm_config=llm_config,
    system_message=background_prompt_manager,
    description="The Manager of the Search and Defuse Operations Command Center",
)
agents.append(agent_manager)


# create a UserProxyAgent instance named "user_proxy"
user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    is_termination_msg=lambda x: x.get("content", "")
    and x.get("content", "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    max_consecutive_auto_reply=10,
    code_execution_config={"work_dir": "coding", "use_docker": True},
)

agents.append(user_proxy)

formatter = autogen.AssistantAgent(
    name="formatter",
    llm_config=formatter_config,
    description="Formatter to extract experiment data",
    system_message="""You are a formatter that should report the actions and communications of the operative agents only, agent_manager not being one. The actions must be reported in the format :\
        {agents_names: list[str],
            agents_actions: list[str],
            agents_comm: list[str]}
        agents_names should be in lowercase with no spaces.
        agents_actions should be one of the following: 'go_to_node_X', 'inspect_bomb', 'apply_X_tool'.
        Answer "TERMINATE" after your task is done.""",
)

agents.append(formatter)



In [12]:
messages = []


def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):
    """Define a customized speaker selection function.
    A recommended way is to define a transition for each speaker in the groupchat.

    Returns:
        Return an `Agent` class or a string from ['auto', 'manual', 'random', 'round_robin'] to select a default method to use.
    """
    globals()["messages"] = groupchat.messages
    if last_speaker is agent_manager:
        if (
            re.search(r"Round \w+ Summary:?", messages[-1].get("content", "").rstrip())
            is not None
        ):
            print("TOTAL NUMBER OF MESSAGES : " + str(len(messages)))
            return formatter
        else:
            return agent_alpha

    elif last_speaker is user_proxy:
        return agent_manager

    elif last_speaker is formatter:
        return user_proxy

    else:
        return "auto"

In [13]:
def format_messages(messages, codenames):
    # Variables to hold combined data
    round_number = None
    team_score = None
    room_info = {}
    actions = {}
    results = {}
    communication_messages = set()

    # Parsing the messages
    for i, message in enumerate(messages):
        parts = message.split(". ")

        # Extract round number and team score
        if round_number is None:
            round_number = parts[0].split(": ")[-1]
            team_score = parts[1].split(": ")[-1]

        # Extract specific information for each agent
        room_content = parts[4].replace("Room contents: ", "")
        action_taken = parts[3]
        result_info = parts[2].replace("Results: ", "")

        # Capture information specific to the agent
        room_info[codenames[i]] = room_content
        actions[codenames[i]] = action_taken
        results[codenames[i]] = result_info

        # Extract communication messages
        comm_message = (
            parts[-1]
            .replace("Communication messages sent by your teammates: ", "")
            .strip()
        )
        if comm_message:
            communication_messages.add(comm_message)

    # Construct the combined message
    combined_message = (
        f"Round: {round_number} ended. Total team score: {team_score}.\n\n"
    )

    for codename in codenames[: len(messages)]:
        combined_message += f"{codename}:\n"
        combined_message += (
            f"- Room Content: {room_info.get(codename, 'No relevant information.')}\n"
        )
        combined_message += (
            f"- Action: {actions.get(codename, 'No action recorded.')}\n"
        )
        combined_message += (
            f"- Results: {results.get(codename, 'No results recorded.')}\n\n"
        )

    if communication_messages:
        combined_message += "Communication messages sent by your teammates:\n"
        for comm_message in communication_messages:
            combined_message += f"- {comm_message}\n"
    else:
        combined_message += ""

    combined_message += f"Round {int(round_number) +1} will start."
    return combined_message

In [14]:
def generate_summary(feedback_list):
    summary = []
    round_info = ""
    total_score = ""

    for feedback in feedback_list:
        try:
            # Extract round and score information
            if not round_info and not total_score:
                round_part = feedback.split("Round: ")[1].split(".")[0]
                total_score_part = feedback.split("Total team score: ")[1].split(".")[0]
                round_info = round_part
                total_score = total_score_part

            # Extract player-specific information
            agent = (
                feedback.split("Contents of this room include ")[1]
                .split(",")[0]
                .strip()
                .lower()
            )
            room_info_part = feedback.split("Room contents: ")[1].split(".")[0].strip()
            bomb_info_part = feedback.split("Results: ")[1].split(".")[0].strip()
            action_info = (
                bomb_info_part.split("This bomb is a ")[1].split("-stage")[0]
                + "-stage bomb"
                if "This bomb is a" in bomb_info_part
                else "No bomb inspected"
            )
            remaining_sequence = (
                bomb_info_part.split("sequence is ")[1]
                if "sequence is" in bomb_info_part
                else ""
            )
            bomb_results = bomb_info_part.split(".")[0]

            summary.append(
                f"{agent}:\n- Room Content: {room_info_part}\n- Action: {action_info} and its remaining sequence is {remaining_sequence}\n- Results: {bomb_results}"
            )

        except (IndexError, ValueError):
            summary.append("There was an issue parsing one of the feedback entries.")

    # Final summary
    communication_info = "Communication messages sent by your teammates: (None)"
    final_summary = (
        f"Round: {round_info}. Total team score: {total_score}.\n\n"
        + "\n\n".join(summary)
        + f"\n\n{communication_info}"
    )

    return final_summary

In [15]:
def create_unique_filename(filename):
    """
    Check if a file already exists. If it exists, add _1, _2, etc., until a unique name is found.
    Returns the unique filename.
    """
    base, extension = os.path.splitext(filename)
    counter = 1
    unique_filename = filename

    while os.path.exists(unique_filename):
        unique_filename = f"{base}_{counter}{extension}"
        counter += 1

    return unique_filename

In [16]:
def initialize_groupchat(
    who_initiates: Agent = user_proxy,
    initiation_message: str = f"Agent_manager initiates a new game session and agents procede to play the game. All agents are starting on node {initial_node}.",
    messages_list: list = [],
):
    groupchat = autogen.GroupChat(
        agents=agents,
        messages=messages_list,
        max_round=10,
        speaker_selection_method=custom_speaker_selection_func,  # TODO needs tuning to properly give the manager the first turn and formatter the last
    )
    gc_manager = autogen.GroupChatManager(
        name="groupchat_manager",
        groupchat=groupchat,
        llm_config=llm_config,
        is_termination_msg=lambda x: x.get("content", "")
        and x.get("content", "").rstrip().endswith("TERMINATE"),
    )

    with Cache.disk() as cache:
        who_initiates.initiate_chat(
            gc_manager,
            message=initiation_message,
        )

In [17]:
Action = env.env.action_enum


def decode_actions(actions_dict):
    from gym_dragon.core import Tool

    chat_agents = {}
    agents_names = actions_dict.get("agents_names", [])
    actions = actions_dict.get("agents_actions", [])
    for agent_name, action in zip(agents_names, actions):
        agent_name = agent_name.split("_")[-1]
        if action.startswith("go_to_node_"):
            room = int(action.split("_")[-1])
            chat_agents[agent_name] = Action.go_to(room)
        elif action == "inspect_bomb":
            chat_agents[agent_name] = Action.inspect_bomb
        elif action.startswith("apply"):
            tool = action.split("_")[-2]
            if tool == "red":
                chat_agents[agent_name] = Action.use_tool(Tool.red)
            elif tool == "green":
                chat_agents[agent_name] = Action.use_tool(Tool.green)
            elif tool == "blue":
                chat_agents[agent_name] = Action.use_tool(Tool.blue)

    return chat_agents

In [64]:
DATA_PATH = "./dragonexp_autogen_data/"
if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)

RECORD_PATH = create_unique_filename(DATA_PATH + "record.json")
SUMMARY_PATH = create_unique_filename(DATA_PATH + "summary.csv")

done = {"__all__": False}
round = 1
max_step = 15


belief = False
ToM = False
save_record = False
save_summary = True

chat_output = {}
actions = {}
communications = {}

init_actions = {
    "alpha": Action.go_to(initial_node),
    "bravo": Action.go_to(initial_node),
    "charlie": Action.go_to(initial_node),
}
feedbacks = []
while not done["__all__"] and round <= max_step:
    # Pick actions for each agent
    if round == 1 and not belief:
        for agent_id in chat_agents.keys():
            _, reward, done, info, obs_text = env.step(
                agent_id, 0, init_actions, communications
            )

        initialize_groupchat()
    else:
        # if re.search(r"Round \w+ Summary:?", messages[-1].get("content", "").rstrip()):
        #     print("\n\n\n\n ROUND SUMMARY DETECTED \n\n\n\n\n\n")
        summary_message = format_messages(feedbacks, codenames)
        # response = openai.chat.completions.create(
        #     model="gpt-4-turbo-preview",
        #     temperature=0.0,
        #     messages=[
        #         {
        #             "role": "system",
        #             "content": "You will be sent feedback strings, generate a single string that will sum up the informations, with a part for each agent. Do not use latex formatting. Go straight to the point.",
        #         },
        #         {
        #             "role": "user",
        #             "content": "[" + ",".join(feedbacks) + "]",
        #         },
        #     ],
        # )
        # summary_message = response.choices[0].message.content
        feedbacks = []
        # messages.append(summary_message)

        # init_message = "Round {round} Starting, update your beliefs according to recent observations and pick your actions".format(
        #     round=round
        # )

        initialize_groupchat(
            user_proxy,
            initiation_message=summary_message,  # messages_list=messages
        )

    raw_actions = json.loads(messages[-1].get("content", "").replace("TERMINATE", ""))
    actions = decode_actions(raw_actions)

    for agent_id in chat_agents.keys():
        chat_agent = chat_agents[agent_id]

        _, reward, done, info, obs_text = env.step(
            agent_id, round, actions, communications
        )

        feedbacks.append(obs_text)

        # new_belief = chat_agent.update_history(obs_text)

        agent = env.env.agents[agent_id]

        ground_truth = None
        ToM1st = None
        ToM2nd = None
        ToM3rd = None

        if ToM:
            target_id = np.random.choice(
                [x for x in chat_agents.keys() if x != agent_id]
            )
            if actions[agent_id].node() is not None:
                if agent.node.id != actions[agent_id].node().id:
                    ground_truth = False
                else:
                    ground_truth = True

                # first-order ToM / introspective
                ToM1st = chat_agent.ask(
                    obs_text
                    + "Do you know the current contents of room {room_id}?".format(
                        player_id=target_id, room_id=actions[agent_id].node().id
                    )
                )
                # second-order ToM
                ToM2nd = chat_agent.ask(
                    obs_text
                    + "Does player {player_id} know the current contents of room {room_id}?".format(
                        player_id=target_id, room_id=actions[agent_id].node().id
                    )
                )
                # third-order ToM
                ToM3rd = chat_agent.ask(
                    obs_text
                    + "Based on the observation and previous history, is player {player_id} aware of the fact that you know the current contents of room {room_id}?".format(
                        player_id=target_id, room_id=actions[agent_id].node().id
                    )
                )
            elif actions[agent_id].tool() is not None:
                ground_truth = False
                if agent.bomb:
                    bomb_id = agent.bomb.id

                    # first-order ToM / introspective
                    ToM1st = chat_agent.ask(
                        obs_text
                        + "Do you know the state and remaining sequence of bomb {bomb_id} has been changed?".format(
                            player_id=target_id, bomb_id=bomb_id
                        )
                    )

                    # second-order ToM
                    ToM2nd = chat_agent.ask(
                        obs_text
                        + "Does player {player_id} know the state and remaining sequence of bomb {bomb_id} has been changed?".format(
                            player_id=target_id, bomb_id=bomb_id
                        )
                    )
                    # third-order ToM
                    ToM3rd = chat_agent.ask(
                        obs_text
                        + "Based on the observation and previous history, is player {player_id} aware of the fact that you have changed the state and remaining sequence of bomb {bomb_id}?".format(
                            player_id=target_id, bomb_id=bomb_id
                        )
                    )
                elif isinstance(reward, int):
                    if reward >= 0:
                        ToM1st = chat_agent.ask(
                            obs_text + "Do you know a bomb phase has just been defused?"
                        )

                        # second-order ToM
                        ToM2nd = chat_agent.ask(
                            obs_text
                            + "Does player {player_id} know a bomb phase has just been defused?".format(
                                player_id=target_id
                            )
                        )
                        # third-order ToM
                        ToM3rd = chat_agent.ask(
                            obs_text
                            + "Based on the observation and previous history, is player {player_id} aware of the fact that you know a bomb phase has just been defused?".format(
                                player_id=target_id
                            )
                        )

            elif actions[agent_id] == Action.inspect_bomb:
                if agent.bomb:
                    bomb_id = agent.bomb.id
                    who_has_inspected_what[agent_id].add(bomb_id)
                    ground_truth = bomb_id in who_has_inspected_what[target_id]
                    # first-order ToM / introspective
                    ToM1st = chat_agent.ask(
                        obs_text
                        + "Do you know the sequence of bomb {bomb_id}?".format(
                            bomb_id=bomb_id
                        )
                    )
                    # second-order ToM
                    ToM2nd = chat_agent.ask(
                        obs_text
                        + "Does player {player_id} know the sequence of bomb {bomb_id}?".format(
                            player_id=target_id, bomb_id=bomb_id
                        )
                    )
                    # third-order ToM
                    ToM3rd = chat_agent.ask(
                        obs_text
                        + "Based on the observation and previous history, is player {player_id} aware of the fact that you know the sequence of bomb {bomb_id}?".format(
                            player_id=target_id, bomb_id=bomb_id
                        )
                    )
                else:
                    ground_truth = None
                    ToM1st = None
                    ToM2nd = None
                    ToM3rd = None
            else:
                ground_truth = None
                ToM1st = None
                ToM2nd = None
                ToM3rd = None

        if save_record:
            record = {
                "round": round,
                "agent_id": agent_id,
                "chat_output": chat_output[agent_id],
                "action": actions[agent_id].name.replace("_", " "),
                "comm": communications[agent_id],
                "obs_text": obs_text,
            }
            with open(RECORD_PATH, "a+", encoding="utf-8") as f:
                json.dump(record, f)

        if save_summary:
            summary = {
                "round": round,
                "agent_id": agent_id,
                # "chat_output": chat_output[agent_id],
                "action": actions[agent_id].name.replace("_", " "),
                # "comm": communications[agent_id],
                "obs_text": obs_text,
                # "new_belief": new_belief,
                # "ToM1st": ToM1st,
                # "ToM2nd": ToM2nd,
                # "ToM3rd": ToM3rd,
            }
            with open(SUMMARY_PATH, "a+", encoding="utf-8") as f:
                for k, v in summary.items():
                    f.write(str(v).replace(",", ";").replace("\n", ""))
                    f.write(",")
                f.write("\n")

    round += 1

[33muser_proxy[0m (to groupchat_manager):

Agent_manager initiates a new game session and agents procede to play the game. All agents are starting on node 5.

--------------------------------------------------------------------------------


[32m
Next speaker: agent_manager
[0m
[33magent_manager[0m (to groupchat_manager):

**Round 1 Starting**

Welcome, team! All agents are currently positioned at node 5. Your primary objectives are to explore the surrounding rooms to locate and identify any bombs. Remember, moving to a room and inspecting it are separate actions. Communicate your findings clearly and decide your moves wisely based on the shared information.

Let's start the search efficiently and stay alert for any suspicious devices. Good luck, team!

--------------------------------------------------------------------------------
[32m
Next speaker: agent_alpha
[0m
[33magent_alpha[0m (to groupchat_manager):

Action selection: Move to Room 6.

--------------------------------------------------------------------------------
[32m
Next speaker: agent_bravo
[0m
[33magent_bravo[0m (to groupchat_manager):

Action selection: Move to Room 0.

----------------------------------------------------------------------------

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
autogen.runtime_logging.stop()

### Logging processing

In [None]:
def get_log(dbname=DB_NAME, table="chat_completions"):
    import sqlite3

    con = sqlite3.connect(dbname)
    query = f"SELECT * from {table}"
    cursor = con.execute(query)
    rows = cursor.fetchall()
    column_names = [description[0] for description in cursor.description]
    data = [dict(zip(column_names, row)) for row in rows]
    con.close()
    return data

In [None]:
def str_to_dict(s):
    return json.loads(s)


log_data = get_log()
log_data_df = pd.DataFrame(log_data)

log_data_df["total_tokens"] = log_data_df.apply(
    lambda row: str_to_dict(row["response"])["usage"]["total_tokens"], axis=1
)

log_data_df["request"] = log_data_df.apply(
    lambda row: str_to_dict(row["request"])["messages"][0]["content"], axis=1
)

log_data_df["response"] = log_data_df.apply(
    lambda row: str_to_dict(row["response"])["choices"][0]["message"]["content"], axis=1
)

log_data_df

Unnamed: 0,id,invocation_id,client_id,wrapper_id,session_id,source_name,request,response,is_cached,cost,start_time,end_time,total_tokens
0,1,bc65d748-a9a6-4c74-904c-efce8c533dd5,127066449239440,127066449456480,9588dc8d-ba67-4ecd-86b0-0aeeaf8bae24,agent_manager,Welcome to the Search and Rescue Operations Co...,"**Round 1 Starting**\n\nWelcome, team! All age...",1,0.00692,2024-08-28 09:38:42.405215,2024-08-28 09:38:42.405424,522
1,2,53354551-d4e1-4492-b55b-ec2c6269eb29,127066450398416,127066449772704,9588dc8d-ba67-4ecd-86b0-0aeeaf8bae24,agent_alpha,Welcome to our interactive text game! In this ...,Action selection: Move to Room 6,1,0.00705,2024-08-28 09:38:42.429141,2024-08-28 09:38:42.429519,689
2,3,643e02be-6b18-409d-86b8-aa6c0931585b,127066252639296,127066252653472,9588dc8d-ba67-4ecd-86b0-0aeeaf8bae24,speaker_selection_agent,You are in a role play game. The following rol...,agent_bravo,1,0.00383,2024-08-28 09:38:42.479448,2024-08-28 09:38:42.479598,377
3,4,1bd7969f-2dab-40de-83aa-82ca55e57361,127066449559264,127066449772848,9588dc8d-ba67-4ecd-86b0-0aeeaf8bae24,agent_bravo,Welcome to our interactive text game! In this ...,Action selection: Move to Room 0,1,0.00722,2024-08-28 09:38:42.496395,2024-08-28 09:38:42.496818,706
4,5,acb33ba4-2161-47a8-b502-62add78c3e0f,127066252301696,127066252247728,9588dc8d-ba67-4ecd-86b0-0aeeaf8bae24,speaker_selection_agent,You are in a role play game. The following rol...,agent_charlie,1,0.00399,2024-08-28 09:38:42.547980,2024-08-28 09:38:42.548163,393
...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,110,eda95b11-7a8b-4d26-8305-79d9a3102849,129537971854688,129537972618096,843b1b70-0f0c-411c-894a-dfae8ccc77ec,speaker_selection_agent,You are in a role play game. The following rol...,agent_charlie,0,0.00679,2024-08-28 09:56:55.846612,2024-08-28 09:56:56.614264,673
110,111,059d6d93-3858-4440-ac9f-adba5312ecb8,129538243395152,129538192381504,843b1b70-0f0c-411c-894a-dfae8ccc77ec,agent_charlie,Welcome to our interactive text game! In this ...,Action selection: Move to Room 0,0,0.01047,2024-08-28 09:56:56.629447,2024-08-28 09:56:57.634693,1031
111,112,8b6b5890-2ff5-4513-af6d-9691122cba08,129538083757216,129537972224304,843b1b70-0f0c-411c-894a-dfae8ccc77ec,speaker_selection_agent,You are in a role play game. The following rol...,agent_manager,0,0.00692,2024-08-28 09:56:57.686612,2024-08-28 09:56:58.615213,688
112,113,42430e88-8837-4da1-9ec0-4106847af779,129538084345168,129538440332816,843b1b70-0f0c-411c-894a-dfae8ccc77ec,agent_manager,Welcome to the Search and Rescue Operations Co...,Round 5 Summary:\n\n- Alpha moved to Room 6 to...,0,0.01177,2024-08-28 09:56:58.628804,2024-08-28 09:57:06.877838,963


### Computing cost

In [None]:
# Sum totoal tokens for all sessions
try:
    total_tokens = log_data_df["total_tokens"].sum()
    # Total tokens for specific session
    session_tokens = log_data_df[log_data_df["session_id"] == logging_session_id][
        "total_tokens"
    ].sum()
except KeyError:
    total_tokens = "unknown"
    session_tokens = "unknown"

# Sum total cost for all sessions
total_cost = log_data_df["cost"].sum()


session_cost = log_data_df[log_data_df["session_id"] == logging_session_id][
    "cost"
].sum()
print(
    "Total tokens for all sessions: "
    + str(total_tokens)
    + ", total cost: "
    + str(np.round(total_cost, 4))
    + "$"
)

print(
    "Total tokens for session "
    + str(logging_session_id)
    + ": "
    + str(session_tokens)
    + ", cost: "
    + str(np.round(session_cost, 4))
    + "$"
)

non_cached = log_data_df[log_data_df["is_cached"] == 0]
non_cached_cost = non_cached["cost"].sum()

print(
    "Total non-cached tokens : "
    + str(non_cached["total_tokens"].sum())
    + ", total cost: "
    + str(np.round(non_cached_cost, 4))
    + "$"
)

Total tokens for all sessions: 76518, total cost: 0.8302$
Total tokens for session 843b1b70-0f0c-411c-894a-dfae8ccc77ec: 37485, cost: 0.4069$
Total non-cached tokens : 60086, total cost: 0.6419$
