In [35]:
from dotenv import load_dotenv
import os
from collections import defaultdict
import json
import pandas as pd
import time

load_dotenv()

if os.getenv("GROQ_API_KEY"):
    print('Groq API Key loaded successfully')
else:
    print('Groq API Key loading failed, please make sure the .env file exists and the spelling is correct')

if os.getenv("TAVILY_API_KEY"):
    print('Tavily API Key loaded successfully')
else:
    print('Tavily API Key loading failed, please make sure the .env file exists and the spelling is correct')

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5


Python-dotenv could not parse statement starting at line 4


Groq API Key loaded successfully
Tavily API Key loading failed, please make sure the .env file exists and the spelling is correct


In [14]:
from langchain_groq import ChatGroq
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
from langchain_groq import ChatGroq
import random

from IPython.display import Image, display
from langchain_core.runnables.graph import MermaidDrawMethod

load_dotenv()


Python-dotenv could not parse statement starting at line 4


True

## Class for Game Logic

In [15]:
class Game:
    def __init__(self, board_id):
        self.board = self.initialize_board(board_id)
        self.red_words = [key for key, value in self.board.items() if value == "red"]
        self.blue_words = [key for key, value in self.board.items() if value == "blue"]
        self.black_words = [key for key, value in self.board.items() if value == "black"]
        self.neutral_word = [key for key, value in self.board.items() if value == "neutral"]
        self.has_turn = "RED"
        self.winner = None
        self.last_spymaster_answer = None
        self.rounds = 0
        self.guessed_words = set()
        self.max_guesses = 1 # number of words to guess. Updated at every new spymaster clue
        self.current_guess_counter = 0
        self.right_guesses = 0
        self.opponent_guesses = 0
        self.neutral_guesses = 0
        self.invalid_guesses = 0
        self.black_guesses = 0
        self.export = {'red':[], 'blue':[]}
    
        self.current_turn = None

    def initialize_board(self, board_id):
        if board_id == "baseline":
            # hardcoded board for baseline
            board = {'OIL': 'red', 
                     'IVORY': 'red', 
                     'STADIUM': 'blue', 
                     'BOOM': 'neutral', 
                     'TAIL': 'blue', 
                     'BOMB': 'black', 
                     'GROUND': 'neutral', 
                     'COMPOUND': 'blue', 
                     'KID': 'red', 
                     'CLOAK': 'blue', 
                     'SKYSCRAPER': 'blue', 
                     'BLOCK': 'blue', 
                     'GENIUS': 'red', 
                     'IRON': 'red', 
                     'RULER': 'neutral', 
                     'WORM': 'red', 
                     'SHARK': 'neutral', 
                     'FLY': 'red', 
                     'PILOT': 'blue', 
                     'OLYMPUS': 'neutral', 
                     'KETCHUP': 'blue', 
                     'THIEF': 'neutral', 
                     'ICE CREAM': 'red', 
                     'CAR': 'neutral', 
                     'LAWYER': 'red'}
        else:
            word_list = self.load_words()
            random.seed(int(board_id))
            board_words = random.sample(word_list, 25)
            colors = ["red"] * 9 + ["blue"] * 8 + ["neutral"] * 7 + ["black"] * 1
            random.shuffle(colors)
            board = dict(zip(board_words, colors))
        return board
    
    def load_words(self):
        with open('wordlist-eng.txt', 'r') as file:
            words = [line.strip() for line in file]
            return words


## Class for Game History

In [16]:
class History():
    def __init__(self,board): 
        self.board = board
        self.history = []
        self.winner = None
        
    def add_round(self, team, clue, number):
        self.history.append(
            {
            "team" : team,
            "spymaster_clue" : clue,
            "number" : number,
            "actions" : []
            }
        )

    def add_guesser_action(self, teammember, tool_call, tool_args, tool_return, outcome=None):     
        # add action
        action = {
            "teammember" : teammember,
            "tool_call" : tool_call,
            "tool_args" : tool_args,
            "tool_return" : tool_return
        }
        if outcome is not None:
            action["outcome"] = outcome
        self.history[-1]["actions"].append(action)

    def set_winner(self, winner):
        self.winner = winner

    def save(self, filename):
        output = {
            "board" : self.board,
            "history" : self.history,
            "winner" : self.winner
        }
        
        with open(filename, 'w') as file:
            json.dump(output, file)

    # def print_stats(self):
    #     print(" STATS ".center(50, "-"))
    #     print("Guesses per category:")
    #     print(self._getNumberOfGuessesPerCategory())

    # def _getNumberOfGuessesPerCategory(self):
    #     guesses = {
    #         "RED":{},
    #         "BLUE":{}
    #     }
    #     for round in self.history:
    #         for turn in round['guesser_turns']:
    #             if turn['decision'] == 'guess':
    #                 if turn['outcome'] not in guesses[round["team"]].keys():
    #                     guesses[round["team"]][turn['outcome']] = 0
    #                 guesses[round["team"]][turn['outcome']] += 1
    #     return guesses



## Tool Preparation

In [17]:
from typing import Optional, Type

from langchain.callbacks.manager import (
    AsyncCallbackManagerForToolRun,
    CallbackManagerForToolRun,
)
from langchain.pydantic_v1 import BaseModel, Field, PrivateAttr
from langchain.tools import BaseTool, StructuredTool, tool


class GuessInput(BaseModel):
    query: str = Field(description="should be a single word from the game-board")

class GuessTool(BaseTool):
    name: str = "codenames_guess_tool"
    description: str = "essential to guess words from the game-board"
    args_schema: Type[BaseModel] = GuessInput
    game: Game = PrivateAttr()

    def __init__(self, game):
        super().__init__()
        # Store the game object as an instance variable
        self.game = game

    def __call__(self, query: str) -> str:
        """Allows the tool to be callable directly."""
        return self._run(query)

    def _run(
        self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool with context from the game object."""
        query = query.upper()
        if query in self.game.board.keys() and query not in self.game.guessed_words:
            self.game.guessed_words.add(query)
            return self.game.board[query]
        else:
            return f"WRONG: {query}"

    async def _arun(
        self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool asynchronously with context from the game object."""
        raise NotImplementedError("custom_game_tool does not support async")
    

class DiscussInput(BaseModel):
    query: str = Field(description="should be the message for the teammembers")

class DiscussTool(BaseTool):
    name: str = "codenames_discuss_tool"
    description: str = "essential to communicate with your teammembers"
    args_schema: Type[BaseModel] = DiscussInput

    def __init__(self):
        super().__init__()

    def __call__(self, query: str) -> str:
        """Allows the tool to be callable directly."""
        return self._run(query)

    def _run(
        self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool to communicate with your teammembers, the query is the message."""
        return query    

    async def _arun(
        self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool asynchronously with context from the game object."""
        raise NotImplementedError("custom_game_tool does not support async")
    

#class RefrainTool(BaseTool):
    #name: str = "codenames_refrain_tool"
    #description: str = "essential to refrain from interacting to let your teammembers interact"
    #args_schema: Type[BaseModel] = DiscussInput

    #def __init__(self):
     #   super().__init__()

    #def __call__(self, query="") -> str:
    #    """Allows the tool to be callable directly."""
    #    return self._run(query)

    #def _run(
    #    self, query, run_manager: Optional[CallbackManagerForToolRun] = None
    #) -> str:
      #  """Use the tool to refrain from interaction, to let your teammembers interact."""
     #   if query:
       #     return query
      #  else:
      #      return "Has refrained from interaction."    

    async def _arun(
        self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool asynchronously with context from the game object."""
        raise NotImplementedError("custom_game_tool does not support async")
    

class EndTurnTool(BaseTool):
    name: str = "codenames_end_turn_tool"
    description: str = "essential to end turn before spending all available guesses, for high uncertainty and risk mitigation"
    args_schema: Type[BaseModel] = None

    def __init__(self):
        super().__init__()

    def __call__(self, query="") -> str:
        """Allows the tool to be callable directly."""
        return self._run(query)

    def _run(
        self, query, run_manager: Optional[CallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool to end your turn if you have guessed all words with a obvious relation to the clue"""
        if query:
            return query
        else:
            return "Has ended turn preemptively."    

    async def _arun(
        self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
    ) -> str:
        """Use the tool asynchronously with context from the game object."""
        raise NotImplementedError("custom_game_tool does not support async")

## Agent Class

In [18]:
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]

class Agent:
    
    def __init__(self, models: dict[str, dict[str, ChatGroq]], game: Game, api_key:str, tools=None, max_rounds=1):
        self.game = game
        self.models = models
        self.last_message_id = None
        self.max_rounds = max_rounds
        self.end_turn = False
        self.current_guesser_red = 1
        self.current_guesser_blue = 1
        self.invalid_tool_call = False
        self.last_teammember_prompt = None
        self.last_teammember_output = None
        self.history = History(self.game.board)

        graph = StateGraph(AgentState)

        # Setting up the nodes for the state graph
        graph.add_node("spymaster_red", self.call_red_spymaster)
        graph.add_node("spymaster_blue", self.call_blue_spymaster)
        graph.add_node("teammember_red", self.call_red_guesser)
        graph.add_node("teammember_blue", self.call_blue_guesser)
        graph.add_node("action_red", self.take_action)
        graph.add_node("action_blue", self.take_action)

        # Conditional edges based on game state
        graph.add_conditional_edges(
             "action_blue",
             self.define_next_player,
             {0: END, 1: "spymaster_red", 2: "teammember_blue"}
        )
        graph.add_conditional_edges(
             "action_red",
             self.define_next_player,
             {0: END, 1: "spymaster_blue", 2: "teammember_red"}
        )

        # Add edges for turn transitions
        graph.add_edge("spymaster_red", "teammember_red")
        graph.add_edge("spymaster_blue", "teammember_blue")
        graph.set_entry_point("spymaster_red")

        # Conditional edges that check for presence of tool call otherwise invoke next team member (see this as a fail safe)
        graph.add_conditional_edges(
            "teammember_red",
            self.exists_tool_call,
            {True: "action_red", False: "teammember_red"}
        )
        graph.add_conditional_edges(
            "teammember_blue",
            self.exists_tool_call,
            {True: "action_blue", False: "teammember_blue"}
        )
        
        # Compile graph to LangChain Runnable
        self.graph = graph.compile()

        # add LLM to models
        for player_type, player_list in self.models.items():
            if "spymaster" in player_type:
                for spymaster in player_list:
                    spymaster["model"] = ChatGroq(
                        model="llama-3.1-70b-versatile",
                        temperature=0.0,
                        max_tokens=20,
                        timeout=None,
                        max_retries=2,
                        api_key=api_key
                        )

            elif "teammember" in player_type:
                for teammember in player_list:
                    teammember["model"] = ChatGroq(
                        model="llama-3.1-70b-versatile",
                        temperature=0.0,
                        max_tokens=None,
                        timeout=None,
                        max_retries=2,
                        api_key=api_key
                        )
                
        # Bind tools to team member models
        self.tools = {t.name: t for t in tools}
        for player_type, player_list in self.models.items():
            if "teammember" in player_type:
                for teammember in player_list:
                    teammember["model"] = teammember["model"].bind_tools(tools)


    def define_next_player(self, state: AgentState):
        exists_guess = self.exists_guess(state)
        game_end = self.check_game_end(state)
        if game_end:
            return 0
        elif exists_guess and (self.game.current_guess_counter >= self.game.max_guesses or self.end_turn):
            return 1
        else:
            return 2

    def exists_guess(self, state: AgentState):
        last_tool_messages = []
        messages = state['messages']
        index = len(messages) - 1

        while index >= 0:
            message = messages[index]
            if isinstance(message, ToolMessage):
                last_tool_messages.append(message)
            else:
                # Stop if we encounter a non-ToolMessage type
                break
            index -= 1
        was_guessed = any(m.name == "codenames_guess_tool" for m in last_tool_messages)
        return was_guessed
    
    def exists_tool_call(self, state: AgentState):
        result = state['messages'][-1]
        print(f"Test:{result}")
        exists_tool_call = len(result.tool_calls) > 0
        if not exists_tool_call:
            self.invalid_tool_call = True
        return exists_tool_call

            
    def call_red_spymaster(self, state: AgentState):
        return self._call_spymaster("RED", state)

    def call_blue_spymaster(self, state: AgentState):
        return self._call_spymaster("BLUE", state)

    def call_red_guesser(self, state: AgentState):
        #number = random.randint(1,self.team_size)
        if not self.invalid_tool_call:
            # go to next player
            self.current_guesser_red = (self.current_guesser_red % len(self.models["teammembers_RED"]))+ 1

        return self._call_guesser("RED", self.current_guesser_red, state)

    def call_blue_guesser(self, state: AgentState):
        #number = random.randint(1,self.team_size)
        if not self.invalid_tool_call:
            # go to next player
            self.current_guesser_blue = (self.current_guesser_blue % len(self.models["teammembers_BLUE"]))+ 1

        return self._call_guesser("BLUE", self.current_guesser_blue, state)
    
    def check_game_end(self, state: AgentState):
        game_over = False
        if self.game.rounds > self.max_rounds:
            game_over = True
        game_ends_black = any(word in self.game.guessed_words for word in self.game.black_words)
        game_ends_red = all(word in self.game.guessed_words for word in self.game.red_words)
        game_ends_blue = all(word in self.game.guessed_words for word in self.game.blue_words)
        if game_ends_black or game_ends_red or game_ends_blue:
            game_over = True
        if game_ends_black:
            self.game.winner = "BLUE" if self.game.has_turn == "RED" else "RED"
        elif game_ends_red:
            self.game.winner = "RED"
        elif game_ends_blue:
            self.game.winner = "BLUE"

        

        
        if game_over:
            self.history.set_winner(self.game.winner)
            print(f"guessed_words: {', '.join(self.game.guessed_words)}")
            print(f"WINNER: {self.game.winner}")
            print(f"ROUNDS PLAYED: {self.game.rounds}")
            


            return True
        return False
    
    def take_action(self, state: AgentState):
        tool_calls = state.get('messages', [])[-1].tool_calls
        results = []
        all_calls_invalid = True        
        for t in tool_calls:
            if t["name"]=='codenames_guess_tool':
                all_calls_invalid = False
                self.game.current_guess_counter += 1
                if self.game.current_guess_counter >= self.game.max_guesses:
                    # end turn after this turn
                    # this guess can still be taken
                    self.end_turn = True
                
                result = self.tools[t['name']].invoke(t['args'])
                guessed_word_color = result.lower()
                results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result))) 

                # Check the result to determine if the turn should end
                if guessed_word_color == self.game.has_turn.lower():
                    # Correct guess, team can continue if max_guesses not reached
                    self.game.right_guesses  = self.game.right_guesses +1
                    print(f"Correct! {t['args']} belongs to team {guessed_word_color}.")
                    # for export
                    isCorrect = "correct"
                elif guessed_word_color == "black":
                    #End game, other team wins
                    self.game.black_guesses = self.game.black_guesses +1
                    print(f"Black word guessed! {self.game.has_turn} loses.")
                    self.end_turn = True
                    # for export
                    isCorrect = "incorrect - black"
                elif guessed_word_color == "neutral":
                    # Neutral word, end turn
                    self.game.neutral_guesses = self.game.neutral_guesses +1
                    self.end_turn = True
                    print(f"Incorrect! {t['args']} is a {guessed_word_color} word.")
                    # for export
                    isCorrect = "incorrect - neutral"
                elif guessed_word_color == 'red' or guessed_word_color == 'blue':
                    # Opponent's word, end turn
                    self.game.opponent_guesses = self.game.opponent_guesses +1
                    self.end_turn = True
                    print(f"Incorrect! {t['args']} is a {guessed_word_color} word.")
                    # for export
                    isCorrect = "incorrect - opponent"
                else:
                    isCorrect = "incorrect - invalid guess"
                    print(f"Incorrect! {t['args']} is a invalid word.")
                    self.game.invalid_guesses = self.game.invalid_guesses +1
                    self.end_turn = True

                # Export
                if len(self.game.export[self.game.has_turn.lower()]) == self.game.rounds:
                    self.game.export[self.game.has_turn.lower()][-1]['guesses'].append((t["args"]['query'],isCorrect))
                else:
                    self.game.export[self.game.has_turn.lower()].append(
                        {
                            'clue':self.game.last_spymaster_answer.content, 
                            'guesses':[(t["args"]['query'],isCorrect)]
                        }
                    )
                self.history.add_guesser_action(f"guesser_{self.game.has_turn}_{self.current_guesser_blue}", t['name'], t['args'], result, outcome=isCorrect)

                # End the turn if this was decided
                if self.end_turn:
                    break
                
            elif t["name"] == 'codenames_discuss_tool':
                all_calls_invalid = False
                result = self.tools[t['name']].invoke(t['args'])
                results.append(ToolMessage(id=self.last_message_id, tool_call_id=t['id'], name=t['name'], content=str(result)))
                self.history.add_guesser_action(f"guesser_{self.game.has_turn}_{self.current_guesser_blue}", t['name'], t['args'], result)
            elif t["name"]=='codenames_end_turn_tool':
                all_calls_invalid = False
                result = self.tools[t['name']].invoke(t['args'])
                results.append(ToolMessage(id=self.last_message_id, tool_call_id=t['id'], name=t['name'], content=str(result)))
                self.end_turn = True
                self.history.add_guesser_action(f"guesser_{self.game.has_turn}_{self.current_guesser_blue}", t['name'], t['args'], result)
            else:
                results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content="Invalid tool name, retry"))
                self.history.add_guesser_action(f"guesser_{self.game.has_turn}_{self.current_guesser_blue}", t['name'], t['args'], "Invalid tool name, retry")
        if all_calls_invalid:
            self.invalid_tool_call = True
        return {'messages': results}
    
    def _call_spymaster(self, team, state):
        print(' -'*30)
        # increase round counter only when red is spymaster (as red starts the game and therefore each round) 
        if team.lower() == 'red':
            self.game.rounds += 1
        self.game.current_guess_counter = 0
        self.end_turn = False
        self.game.has_turn = team.upper()
        round = self.game.rounds
        messages = state['messages']
        prompt = self.get_spymaster_prompt(team)
        model = self.models[f"spymaster_{team}"][0]["model"]
        message = model.invoke(prompt)
        message.id = f"{round}_spymaster_{team}"
        # Parse the hint number from the spymaster's message (assuming format "Word N")
        hint_text = message.content.split()
        self.game.max_guesses = int(hint_text[1]) if len(hint_text) > 1 and hint_text[1].isdigit() else 1
        #state['messages'].append(message)
        self.game.last_spymaster_answer = message
        self.game.current_turn = f"{round}_spymaster_{team}"
        print(f"{self.game.current_turn} hint: {self.game.last_spymaster_answer.content}")
        # Export
        self.history.add_round(team, hint_text[0], self.game.max_guesses)
        return {'messages': [message]}
    
    def _call_guesser(self, team, number, state):
        print(' -'*30)

        round = self.game.rounds
        messages = state['messages']
        prompt = self.get_guesser_prompt(team, number)[0].content
        new_prompt_content = ""
        filtered_messages = [m for m in messages if m.id is not None and m.id.startswith(f"{round}_teammember_{team}")]
        current_discussion_string = "\n".join(f"{m.id[2:]} said: '{m.content.strip()}'" for m in filtered_messages if m.id is not None)
        if current_discussion_string == "":
            current_discussion_string = "No discussion has taken place yet."

        if self.invalid_tool_call:
            invalid_tool_call = "WARNING: INVALID TOOL CALL WAS JUST MADE! TRY AGAIN AND USE THE PROVIDED TOOLS CORRECTLY"
            new_prompt_content = (
                invalid_tool_call
                + "\n"
                + "THE LAST PROMPT: "
                + self.last_teammember_prompt
                )
            self.invalid_tool_call = False
        else:
            # Retrieve spymaster's hint and prepare discussion string
            spymaster_message = self.get_message_by_id(messages, f"{round}_spymaster_{team}")
            spymaster_hint = f"Spymaster's hint: {spymaster_message if spymaster_message else 'No hint available'} \n"
            
            #Retrieve info who is in the lead
            words_left_red = sum(word not in self.game.guessed_words for word in self.game.red_words)
            words_left_blue = sum(word not in self.game.guessed_words for word in self.game.blue_words)


            if words_left_red > words_left_blue:
                leader_info = "Team Blue is in the lead" if team.lower() == 'red' else "Your Team is in the lead"
            elif words_left_red == words_left_blue:
                leader_info = "Both teams have guessed the same amount of correct words so far"
            else:
                leader_info = "Team Red is in the lead" if team.lower() == 'blue' else "Your Team is in the lead"

            leader_info += f". Team Red has {words_left_red} words left to guess, and Team Blue has {words_left_blue} words left to guess."

            team_members = [f"teammember_{team}_{i + 1}" for i in range(len(self.models[f"teammembers_{team}"]))]
            team_members.remove(f"teammember_{team}_{number}")
            team_members_string = f"Your team members are: {', '.join(team_members)}."
            you_string = f"You are teammember_{team}_{number}."
            self.last_message_id = f"{round}_teammember_{team}_{number}"

            guesses_this_round = self.game.export[team.lower()][-1]['guesses'] if len(self.game.export[team.lower()]) == round else []

            #Combine into new_prompt
            current_discussion_string = current_discussion_string.replace(f"teammember_{team}_{number} said:", "You said:")
            new_prompt_content = (
                prompt
                + "\n"
                + spymaster_hint
                + f"With this hint, these guesses were made: {guesses_this_round}\n"
                + leader_info
                + you_string + "\n"
                + team_members_string + "\n"
                + f"\nThis is the current discussion with your Teammembers:"
                + "\n"
                + current_discussion_string
            )
        self.last_teammember_prompt = new_prompt_content

        model = self.models[f"teammembers_{team}"][number-1]["model"]
        message = model.invoke([new_prompt_content])  # Pass as a single-item list containing the BaseMessage instance
        
        # Set message ID and append
        #message.id = f"{round}_teammember_{team}_{number}"
        #state['messages'].append(message)
        print(f"GuessID: {round}_teammember_{team}_{number}")
        print(f"Current Discussion: \n {current_discussion_string}")
        print(f"Answer: \n {message.content}")  # Display prompt content for verification
        self.last_teammember_output = message.content
        return {'messages': [message]}
        
    def get_spymaster_prompt(self, team):
        not_guessed_board = {k: v for k, v in self.game.board.items() if k not in self.game.guessed_words}
        relevant_words = [k for k, v in not_guessed_board.items() if v.lower() == team.lower()]
        bad_words = [k for k, v in not_guessed_board.items() if v.lower() != team.lower()]
        
        return [SystemMessage(content=self.models[f"spymaster_{team}"][0]["system"]
            + f"\nGood words: {', '.join(relevant_words)}\n"
            + f"Bad words: {', '.join(bad_words)}"
        )]
    
    def get_guesser_prompt(self, team, number):
        not_guessed_board = [k for k in self.game.board if k not in self.game.guessed_words]
        hint_word = self.game.last_spymaster_answer.content

        return [SystemMessage(content=self.models[f"teammembers_{team}"][number-1]["system"]
            + f"\nWord list: {', '.join(not_guessed_board)}\n"
        )]
    
    def get_message_by_id(self, messages, target_id):
        for message in messages:
            if message.id == target_id:
                return message.content
        return None  # Return None if no message with the given id is found

In [19]:
def load_prompt_from_file(filename):
    with open(filename, 'r') as file:
        prompt = file.read()  # Reads the entire content of the file as a string
    return prompt

## Execute Games in loop for evaluation

for i in range(10):
    count = i+10
    test = 'baseline'
    game = Game()  
    game_tool = GuessTool(game)
    discuss_tool = DiscussTool()
    #refrain_tool = RefrainTool()
    end_turn_tool = EndTurnTool()
    abot = Agent(models, team_size=2, game=game, tools=[game_tool, discuss_tool, end_turn_tool], max_rounds=15)
    messages = [HumanMessage(content="Play a game of Codenames.")]
    result = abot.graph.invoke({"messages": messages}, {"recursion_limit": 200})
    export = f"""WINNER: {game.winner} , 
            ROUNDS PLAYED: {game.rounds}
            BOARD:  {game.board} , 
            Guesses RED:{game.export['red']} , 
            Guesses BLUE:{game.export['blue']} , 
            Right Guesses {game.right_guesses}, 
            Wrong Guesses {game.opponent_guesses}
            Neutral Guesses  {game.neutral_guesses}
            Invalid guesses{game.invalid_guesses}
            Black Word guesses {game.black_guesses}"""
    with open("output_game_" + test + "_" + str(count) + ".txt", "w") as file:
                file.write(export)

## Series of Games

In [20]:
class Game_Series():
    def __init__(self, models, run_name, max_rounds=15, recovery_run=False, board_random_states=range(10), wordlist='wordlist-eng.txt', run_baseline_board=True, save_graph=True):
        """
        Parameters
        ----------
        models : dict
            Dictionary with the models for the agents.
        run_name : str
            Name of the run. Used to save the output files.
        max_rounds : int, optional
            Maximum number of rounds per game. 
        recovery_run : bool, optional
            Set to True if you want to continue a previous run (and the output folder already exists). 
        board_random_states : list, optional
            Provide an empty list if you want to run the baseline board only. Else range() with the number of boards you want to run.
        wordlist : str, optional
            Path to the wordlist file. 
        run_baseline_board : bool, optional
            If True, the baseline board will be evaluated.
        save_graph : bool, optional
            If True, the graph will be saved as a png file.
        """

        self.models = models
        self.run_name = run_name
        self.recovery_run = recovery_run
        # change all random states to strings for comparison
        board_random_states = [str(state) for state in board_random_states]
        if run_baseline_board:
            self.board_random_states = ["baseline"] + list(board_random_states)
        self.current_api_id = 0
        self.max_rounds = max_rounds

        # load api keys
        with open("api_keys.json", "r") as f:
            self.api_keys = json.load(f)
        
        # recovery run can be used if previous run was interrupted
        if self.recovery_run:
            # check if folder already exists
            if not os.path.exists(f"output_{self.run_name}"):
                raise Exception(f"Folder output_{self.run_name} does not exist. Please run a non-recovery run or change the run_name.")
            else:
                # get finished game numbers
                finished_games = [file.split("_")[-1].split(".")[0] for file in os.listdir(f"output_{self.run_name}")]
                # remove finished games from board_random_states
                self.board_random_states = [state for state in self.board_random_states if state not in finished_games]
        else:
            # check if folder already exists
            if os.path.exists(f"output_{self.run_name}"):
                raise Exception(f"Folder output_{self.run_name} already exists. Please run a recovery run if you want to continue from there.")
            else:
                os.mkdir(f"output_{self.run_name}")

        if save_graph:
            self.save_graph()


    def run_series(self):
        for board_id in self.board_random_states:
            # print new board id centered in 50 dashes
            print(f"{' BOARD ID: ' + str(board_id) + ' '}".center(50, "#"))
            self.run_1_round(board_id)
        self.combine_results()
        self.calculate_statistics()
            
    def run_1_round(self, board_id):
        for i in range(self.current_api_id, len(self.api_keys)):
            try:
                game = Game(board_id)
                abot = Agent(self.models, game, self.api_keys[i], tools=[GuessTool(game),DiscussTool(),EndTurnTool()], max_rounds=self.max_rounds)
                messages = [HumanMessage(content="Play a game of Codenames.")]
                result = abot.graph.invoke({"messages": messages}, {"recursion_limit": 200})
                self.current_api_id = i
                break
            except Exception as e:
                if i == len(self.api_keys)-1:
                    raise Exception(f"Last API Key failed with error {e}. Please check the error and try again.")
                # wait 10 seconds to prevent too many request
                time.sleep(10)
                print(f"API Key {i} failed with error: {e}. Switching to next API Key in 10 seconds.")
                continue

        # save game history
        abot.history.save(f"output_{self.run_name}/history_game_{board_id}.json")
            
    def combine_results(self):
        all_results = []
        # get all results
        for file in os.listdir(f"output_{self.run_name}"):
            if "graph" in file:
                continue
            if "all_results" in file:
                raise Exception("all_results.json already exists. Please delete it before running this function.")
            with open(f"output_{self.run_name}/{file}", 'r') as f:
                data = json.load(f)
                data['config'] = self.run_name
                data['board_id'] = file.split("_")[-1].split(".")[0]
                all_results.append(data)
        # save all results to one file
        with open(f"output_{self.run_name}/all_results.json", 'w') as f:
            json.dump(all_results, f)
        

    def calculate_statistics(self):
        with open(f"output_{self.run_name}/all_results.json", 'r') as f:
            all_results = json.load(f)

        result_df = pd.DataFrame(all_results)
        
        # extract guesses per team
        def extract_guesses_list(row, team):
            main_list =[]
            for round in row['history']:
                if round['team'].lower() == team:
                    clue = round['spymaster_clue']
                    guesses = []
                    for action in round['actions']:
                        if action['tool_call'] == 'codenames_guess_tool':
                            guess = action['tool_args']['query']
                            outcome = action['outcome']
                            guesses.append((guess, outcome))
                    main_list.append((clue, guesses))
            return main_list
                    

        # get guesses per category per team
        def extract_guesses_count(row, team, category):
            counter = 0
            for round in row['history']:
                if round['team'].lower() == team:
                    for action in round['actions']:
                        if action['tool_call'] == 'codenames_guess_tool':
                            if action['outcome'] == category:
                                counter += 1
            return counter

        # apply functions to get statistics
        result_df['guesses_red_list'] = result_df.apply(lambda row: extract_guesses_list(row, 'red'), axis=1)
        result_df['guesses_blue_list'] = result_df.apply(lambda row: extract_guesses_list(row, 'blue'), axis=1)

        result_df['guesses_red_correct'] = result_df.apply(lambda row: extract_guesses_count(row, 'red', 'correct'), axis=1)
        result_df['guesses_red_incorrect_black'] = result_df.apply(lambda row: extract_guesses_count(row, 'red', 'incorrect - black'), axis=1)
        result_df['guesses_red_incorrect_neutral'] = result_df.apply(lambda row: extract_guesses_count(row, 'red', 'incorrect - neutral'), axis=1)
        result_df['guesses_red_incorrect_opponent'] = result_df.apply(lambda row: extract_guesses_count(row, 'red', 'incorrect - opponent'), axis=1)
        result_df['guesses_red_incorrect_invalid'] = result_df.apply(lambda row: extract_guesses_count(row, 'red', 'incorrect - invalid guess'), axis=1)

        result_df['guesses_blue_correct'] = result_df.apply(lambda row: extract_guesses_count(row, 'blue', 'correct'), axis=1)
        result_df['guesses_blue_incorrect_black'] = result_df.apply(lambda row: extract_guesses_count(row, 'blue', 'incorrect - black'), axis=1)
        result_df['guesses_blue_incorrect_neutral'] = result_df.apply(lambda row: extract_guesses_count(row, 'blue', 'incorrect - neutral'), axis=1)
        result_df['guesses_blue_incorrect_opponent'] = result_df.apply(lambda row: extract_guesses_count(row, 'blue', 'incorrect - opponent'), axis=1)
        result_df['guesses_blue_incorrect_invalid'] = result_df.apply(lambda row: extract_guesses_count(row, 'blue', 'incorrect - invalid guess'), axis=1)
        
        # save result_df
        result_df.to_json(f"output_{self.run_name}/all_results_stats.json")

        # display statistics
        display(result_df)


    def save_graph(self):
        # most of this function is just dummy code which is needed to get the graph
        game = Game("baseline")
        abot = Agent(self.models, game, "xxx", tools=[GuessTool(game),DiscussTool(),EndTurnTool()], max_rounds=self.max_rounds)
        png_graph = abot.graph.get_graph().draw_mermaid_png()
        with open(f"output_{self.run_name}/graph.png", "wb") as f:
            f.write(png_graph)


        

In [23]:
""" 
Only this cell has to be customized for the different prompts and number of teammates.
"""

# for each type of player a list exists, which contains the prompts for the different models
models = {
    "spymaster_BLUE": [
        {"model": None, "system": load_prompt_from_file('spymaster_system_prompt.txt')}
        ],
    "spymaster_RED": [
        {"model": None, "system": load_prompt_from_file('spymaster_system_prompt.txt')}
        ],
    "teammembers_BLUE": [
        {"model": None, "system": load_prompt_from_file('teammember_system_prompt.txt')},
        {"model": None, "system": load_prompt_from_file('teammember_system_prompt.txt')}
        ],
    "teammembers_RED": [
        {"model": None, "system": load_prompt_from_file('teammember_system_prompt.txt')},
        {"model": None, "system": load_prompt_from_file('teammember_system_prompt.txt')}
        ]
}

# create the game series
game_series = Game_Series(models, "baseline", max_rounds=15, recovery_run=True, board_random_states=range(10), wordlist='wordlist-eng.txt', run_baseline_board=True, save_graph=True)

In [24]:
# execute the game series
game_series.run_series()

################## BOARD ID: 8 ###################
 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1_spymaster_RED hint: CITY 2
 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
GuessID: 1_teammember_RED_2
Current Discussion: 
 No discussion has taken place yet.
Answer: 
 
Test:content='' additional_kwargs={'tool_calls': [{'id': 'call_wwnm', 'function': {'arguments': '{"query": "I think the hint CITY could relate to countries or cities, what do you think teammember_RED_1?"}', 'name': 'codenames_discuss_tool'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 38, 'prompt_tokens': 737, 'total_tokens': 775, 'completion_time': 0.152, 'prompt_time': 0.283611504, 'queue_time': 0.0048289029999999955, 'total_time': 0.435611504}, 'model_name': 'llama-3.1-70b-versatile', 'system_fingerprint': 'fp_b6828be2c9', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-056428ae-2400-4c4d-9bf8-6b497a0d0d18-0' tool_calls=[{'name': 'codenames_discuss

Unnamed: 0,board,history,winner,config,board_id,guesses_red_list,guesses_blue_list,guesses_red_correct,guesses_red_incorrect_black,guesses_red_incorrect_neutral,guesses_red_incorrect_opponent,guesses_red_incorrect_invalid,guesses_blue_correct,guesses_blue_incorrect_black,guesses_blue_incorrect_neutral,guesses_blue_incorrect_opponent,guesses_blue_incorrect_invalid
0,"{'NOVEL': 'blue', 'SHAKESPEARE': 'black', 'LAS...","[{'team': 'RED', 'spymaster_clue': 'OPTICAL', ...",RED,baseline,9,"[(OPTICAL, [('LASER', 'correct'), ('TELESCOPE'...","[(FIBER, [('COTTON', 'correct'), ('NEEDLE', 'i...",5,0,1,2,0,4,1,0,2,0
1,"{'SHOT': 'blue', 'FILE': 'black', 'UNDERTAKER'...","[{'team': 'RED', 'spymaster_clue': 'Border', '...",RED,baseline,5,"[(Border, [('FENCE', 'correct'), ('CANADA', 'c...","[(HEALTH, [('NURSE', 'correct'), ('AMBULANCE',...",7,0,1,0,0,5,0,2,2,0
2,"{'EUROPE': 'blue', 'HAM': 'blue', 'CANADA': 'r...","[{'team': 'RED', 'spymaster_clue': 'CARTOON', ...",BLUE,baseline,4,"[(CARTOON, [('COMIC', 'correct'), ('DRAGON', '...","[(MEAT, [('HAM', 'correct'), ('BUCK', 'correct...",6,0,1,2,0,6,0,0,2,0
3,"{'EGYPT': 'red', 'LAB': 'red', 'LAWYER': 'blue...","[{'team': 'RED', 'spymaster_clue': 'CITY', 'nu...",BLUE,baseline,8,"[(CITY, [('LONDON', 'correct'), ('EGYPT', 'cor...","[(BARRIER, [('FENCE', 'correct'), ('NET', 'cor...",6,0,2,3,0,5,0,2,2,0
4,"{'EYE': 'neutral', 'SATURN': 'blue', 'PORT': '...","[{'team': 'RED', 'spymaster_clue': 'EMERGENCY'...",RED,baseline,3,"[(EMERGENCY, [('AMBULANCE', 'correct'), ('POLI...","[(MYTH, [('OCTOPUS', 'incorrect - neutral')]),...",8,0,1,2,0,2,0,2,1,0
5,"{'BELL': 'neutral', 'BUCK': 'red', 'BOX': 'neu...","[{'team': 'RED', 'spymaster_clue': 'CLOTHING',...",BLUE,baseline,2,"[(CLOTHING, [('SOCK', 'correct'), ('STRING', '...","[(WING, [('HAWK', 'incorrect - neutral')]), (W...",7,0,2,1,1,7,0,2,1,0
6,"{'CHEST': 'red', 'RING': 'blue', 'WAVE': 'blue...","[{'team': 'RED', 'spymaster_clue': 'ANIMAL', '...",RED,baseline,1,"[(ANIMAL, [('CAT', 'correct'), ('DOG', 'correc...","[(FINGER, [('RING', 'correct'), ('NAIL', 'corr...",6,0,2,1,0,5,1,1,0,0
7,"{'LIGHT': 'blue', 'WASHINGTON': 'red', 'MERCUR...","[{'team': 'RED', 'spymaster_clue': 'FORTUNE', ...",BLUE,baseline,0,"[(FORTUNE, [('LUCK', 'correct'), ('ROULETTE', ...","[(MUSIC, [('OPERA', 'incorrect - opponent')]),...",5,1,2,0,0,6,0,0,2,0
8,"{'HOOD': 'red', 'CODE': 'red', 'LITTER': 'blue...","[{'team': 'RED', 'spymaster_clue': 'FANTASY', ...",BLUE,baseline,7,"[(FANTASY, [('DRAGON', 'correct'), ('PRINCESS'...","[(SHARP, [('KNIFE', 'correct'), ('SPIKE', 'cor...",6,0,2,3,0,5,0,1,1,1
9,"{'ROBOT': 'blue', 'BOTTLE': 'neutral', 'PAN': ...","[{'team': 'RED', 'spymaster_clue': 'FILM', 'nu...",BLUE,baseline,6,"[(FILM, [('HOLLYWOOD', 'correct'), ('LASER', '...","[(COOK, [('PAN', 'correct'), ('FORK', 'correct...",4,0,3,2,0,6,0,1,1,0


## Create Analysis entry 

In [25]:
result_df = pd.read_json("output_baseline/all_results_stats.json")

analysis_df = pd.read_excel('Analysis_Performance.xlsx')

analysis_df.loc[len(analysis_df)] = [
    result_df['config'][0],  
    result_df['history'].apply(len).mean(), 
    result_df['guesses_red_correct'].mean(), 
    result_df['guesses_red_incorrect_black'].mean(), 
    result_df['guesses_red_incorrect_neutral'].mean(),  
    result_df['guesses_red_incorrect_opponent'].mean(),  
    result_df['guesses_red_incorrect_invalid'].mean(),  
    result_df['guesses_blue_correct'].mean(),  
    result_df['guesses_blue_incorrect_black'].mean(),  
    result_df['guesses_blue_incorrect_neutral'].mean(),  
    result_df['guesses_blue_incorrect_opponent'].mean(),  
    result_df['guesses_blue_incorrect_invalid'].mean(),  
    result_df['winner'].value_counts().idxmax(),  
    (result_df['winner'].value_counts().max() / len(result_df)) * 100,  
    (result_df['guesses_red_correct'].sum() / (  
        result_df['guesses_red_correct'].sum() +
        result_df['guesses_red_incorrect_black'].sum() +
        result_df['guesses_red_incorrect_neutral'].sum() +
        result_df['guesses_red_incorrect_opponent'].sum() +
        result_df['guesses_red_incorrect_invalid'].sum()
    )),
    (result_df['guesses_blue_correct'].sum() / (  
        result_df['guesses_blue_correct'].sum() +
        result_df['guesses_blue_incorrect_black'].sum() +
        result_df['guesses_blue_incorrect_neutral'].sum() +
        result_df['guesses_blue_incorrect_opponent'].sum() +
        result_df['guesses_blue_incorrect_invalid'].sum()
    )),
]

analysis_df.to_excel('Analysis_Performance.xlsx', index = False)