## Codemaster testing

### loading boards

In [None]:
import numpy as np
import pickle
from agents.rsa.rsagent import fasttext_Codemaster, llama_Codemaster, openai_Codemaster, swow_Codemaster, Codemaster_Wrapper

In [None]:
with open("auto_eval/cm/boards.pkl", "rb") as file:
    boards = pickle.load(file)
with open("agents/data/all_codewords.pkl", "rb") as file:
    codewords = pickle.load(file)
with open("agents/data/all_clues.pkl", "rb") as file:
    clues = pickle.load(file)[:20000]
    
labels = ["fasttext", "llama", "openai", "swow"]

In [None]:
for codemaster, label in zip([fasttext_Codemaster, llama_Codemaster, openai_Codemaster, swow_Codemaster], labels):
    agent = Codemaster_Wrapper(codemaster(), clues, codewords)
    agent.codemaster.weights = np.array([1, -1.7, -1.7, -1.7]).astype("float32")
    codemaster_clues = [[*agent.give_clue(board_words, assocs)] for board_words, assocs in boards]
    with open(f"auto_eval/cm/{label}.pkl", "wb+") as file:
        pickle.dump(codemaster_clues, file)

### Generating guesses

In [None]:
from agents.rsa.rsagent import fasttext_Guesser, llama_Guesser, openai_Guesser, swow_Guesser, Guesser_Wrapper
from tqdm import tqdm

In [None]:
for codemaster in labels:
    print(codemaster)
    with open(f"auto_eval/cm/{codemaster}.pkl", "rb") as file:
        codemaster_clues = pickle.load(file)
    board_guesses = []
    for guesser in [fasttext_Guesser, llama_Guesser, openai_Guesser, swow_Guesser]:
        agent = Guesser_Wrapper(guesser(), clues, codewords)
        agent.guesser.weights = np.array([1, -1.7, -1.7, -1.7]).astype("float32")
        guesser_guesses = []
        for idx, (clue, count) in enumerate(tqdm(codemaster_clues)):
            board_words, assocs = boards[idx]
            guesser_guesses.append(agent.guess(assocs, board_words, clue, count))
        board_guesses.append(guesser_guesses)
    with open(f"auto_eval/cm/{codemaster}_guesses_variant.pkl", "wb+") as file:
        pickle.dump(board_guesses, file)

## Exporting the clues for human evaluation

In [None]:
import json

In [None]:
board_exp = []
for codemaster in labels:
    with open(f"auto_eval/cm/{codemaster}.pkl", "rb") as file:
        board_clues = pickle.load(file)
        for idx, board in enumerate(boards):
            board_exp.append({
                "clue": board_clues[idx][0],
                "count": board_clues[idx][1],
                "words": board[0],
                "assoc": [0 if x == "good" else 1 for x in board[1]]
            })
with open("auto_eval/cm/exp.json", "w+") as file:
    json.dump(board_exp, file)

### Evaluation

In [None]:
import pandas as pd
import json

In [None]:
normal_absolute = []
normal_ratio = []
normal_random_absolute = []
normal_random_ratio = []
for codemaster in labels:
    with open(f"auto_eval/cm/{codemaster}_guesses.pkl", "rb") as file:
        board_guesses = pickle.load(file)
    running_correct = 0
    running_correct_random = 0
    running = 0
    running_random = 0
    for guesser in board_guesses:
        for i, guesses in enumerate(guesser):
            if type(guesses) != list:
                for guess in guesses:
                    if boards[i][1][guess] == "good":
                        running_correct_random += 1
                    running_random += 1
            for guess in guesses:
                if boards[i][1][guess] == "good":
                    running_correct += 1
                    running_correct_random += 1
                running += 1
                running_random += 1
    normal_absolute.append(running_correct)
    normal_ratio.append(running_correct / running)
    normal_random_absolute.append(running_correct_random)
    normal_random_ratio.append(running_correct_random / running_random)

In [None]:
variant_absolute = []
variant_ratio = []
for codemaster in labels:
    with open(f"auto_eval/cm/{codemaster}_guesses_variant.pkl", "rb") as file:
        board_guesses = pickle.load(file)
    running_correct = 0
    running_correct_random = 0
    running = 0
    running_random = 0
    for guesser in board_guesses:
        for i, guesses in enumerate(guesser):
            for guess in guesses:
                if boards[i][1][guess] == "good":
                    running_correct += 1
                running += 1
    variant_absolute.append(running_correct)
    variant_ratio.append(running_correct / running)

### Find out which board is from which codemaster because I am stupid

In [None]:
all_boards = []
for codemaster in labels:
    with open(f"auto_eval/cm/{codemaster}.pkl", "rb") as file:
        codemaster_boards = pickle.load(file)
    all_boards += [{"clue": board[0], "count": board[1], "words": boards[i][0], "codemaster": codemaster} for i, board in enumerate(codemaster_boards)]

is_codemaster = lambda board, codemaster: next(b for b in all_boards if b["clue"] == board["clue"] and b["count"] == board["count"] and b["words"] == board["words"])["codemaster"] == codemaster

In [None]:
num_correct = lambda board: len([idx for idx in [board["words"].index(guess) for guess in board["guesses"]] if board["assoc"][idx] == 0])

human_absolute = []
human_ratio = []
with open("auto_eval/cm/exp_results.json", "r") as file:
    human = json.load(file)
for codemaster in labels:
    codemaster_boards = [board for board in human if is_codemaster(board, codemaster)]
    running_correct = 0
    running = 0
    for board in codemaster_boards:
        running_correct += num_correct(board)
        running += len(board["guesses"])
    human_absolute.append(running_correct)
    human_ratio.append(running_correct / running)
        

In [None]:
df = pd.DataFrame(columns=labels,
                  data=[normal_absolute, normal_ratio, normal_random_absolute, normal_random_ratio, variant_absolute, variant_ratio, human_absolute, human_ratio],
                  index=["normal absolute", "normal ratio", "normal absolute + random", "normal ratio + random", "variant absolute", "variate ratio", "human absolute", "human ratio"])

def row_max(row):
    new_row = [0] * len(row)
    max_index = row.idxmax()
    new_row[row.index.get_loc(max_index)] = 1
    return new_row

display(df)
df.apply(row_max, axis=1, result_type="expand").rename({i:e for i,e in enumerate(labels)}, axis="columns").sum()

## Guesser Testing

In [None]:
import json
import numpy as np
import pickle
from agents.rsa.rsagent import fasttext_Codemaster, llama_Codemaster, openai_Codemaster, swow_Codemaster, Codemaster_Wrapper

In [None]:
recode = lambda assoc: ["good" if x == 0 else "neutral" for x in assoc]

with open("../exp/src/agent_list_1.json", "r") as file:
    boards = json.load(file)
boards = [[board["words"], recode(board["assoc"])] for board in boards]
with open("agents/data/all_codewords.pkl", "rb") as file:
    codewords = pickle.load(file)
with open("agents/data/all_clues.pkl", "rb") as file:
    clues = pickle.load(file)[:20000]
    
labels = ["fasttext", "llama", "openai", "swow"]

In [None]:
codemaster_clues = []

for codemaster in [fasttext_Codemaster, llama_Codemaster, openai_Codemaster, swow_Codemaster]:
    agent = Codemaster_Wrapper(codemaster(), clues, codewords)
    agent.codemaster.weights = np.array([1, -1.7, -1.7, -1.7]).astype("float32")
    codemaster_clues += [[*agent.give_clue(board_words, assocs)] + [board_words, assocs] for board_words, assocs in boards]
with open(f"auto_eval/gs/boards_clues.pkl", "wb+") as file:
    pickle.dump(codemaster_clues, file)

### Generating guesses

In [None]:
from agents.rsa.rsagent import fasttext_Guesser, llama_Guesser, openai_Guesser, swow_Guesser, Guesser_Wrapper
from tqdm import tqdm

In [None]:
with open("auto_eval/gs/boards_clues.pkl", "rb") as file:
    codemaster_clues = pickle.load(file)
for guesser, label in zip([fasttext_Guesser, llama_Guesser, openai_Guesser, swow_Guesser], labels):
    guesser_guesses = []
    agent = Guesser_Wrapper(guesser(), clues, codewords)
    agent.guesser.weights = np.array([1, -1.7, -1.7, -1.7]).astype("float32")
    for (clue, count, board_words, assocs) in tqdm(codemaster_clues):
        guesser_guesses.append(agent.guess(assocs, board_words, clue, count))
    with open(f"auto_eval/gs/{label}.pkl", "wb+") as file:
        pickle.dump(guesser_guesses, file)

### Evaluation

In [None]:
import pandas as pd
import json
labels = ["fasttext", "fasttext_variant", "llama", "llama_variant", "openai", "openai_variant", "swow", "swow_variant"]

In [None]:
with open(f"auto_eval/gs/boards_clues.pkl", "rb") as file:
    boards = pickle.load(file)

In [None]:
normal_absolute = []
normal_ratio = []
normal_random_absolute = []
normal_random_ratio = []
for guesser in labels:
    with open(f"auto_eval/gs/{guesser}.pkl", "rb") as file:
        board_guesses = pickle.load(file)
    running_correct = 0
    running_correct_random = 0
    running = 0
    running_random = 0
    for i, guesses in enumerate(board_guesses):
        if type(guesses) != list:
            for guess in guesses[:1]:
                if boards[i][3][guess] == "good":
                    running_correct_random += 1
                running_random += 1
                running += 1
        else:
            for guess in guesses:
                if boards[i][3][guess] == "good":
                    running_correct += 1
                    running_correct_random += 1
                running += 1
                running_random += 1
    normal_absolute.append(running_correct)
    normal_ratio.append(running_correct / running)
    normal_random_absolute.append(running_correct_random)
    normal_random_ratio.append(running_correct_random / running_random)

In [None]:
df = pd.DataFrame(columns=labels,
                  data=[normal_absolute, normal_ratio, normal_random_absolute, normal_random_ratio],
                  index=["normal absolute", "normal ratio", "normal absolute + random", "normal ratio + random"])

def row_max(row):
    new_row = [0] * len(row)
    max_index = row.idxmax()
    new_row[row.index.get_loc(max_index)] = 1
    return new_row

display(df)
df.apply(row_max, axis=1, result_type="expand").rename({i:e for i,e in enumerate(labels)}, axis="columns").sum()

## Playing Games

In [1]:
import pickle
from tqdm import tqdm
from itertools import product
from agents.rsa.rsagent import fasttext_Codemaster, llama_Codemaster, openai_Codemaster, swow_Codemaster
from agents.rsa.rsagent import fasttext_Guesser, llama_Guesser, openai_Guesser, swow_Guesser
from agents.rsa.rsagent import Guesser_Wrapper, Codemaster_Wrapper
from agents.others.gpt import Codemaster_Wrapper as GPT
from agents.game import Codenames

with open("agents/data/all_codewords.pkl", "rb") as file:
    codewords = pickle.load(file)
with open("agents/data/all_clues.pkl", "rb") as file:
    clues = pickle.load(file)[:20000]

boards = []
for i in range(1, 6):
    with open(f"agents/data/boards/board_{i}.pkl", "rb") as file:
        boards.append(pickle.load(file))

In [3]:
pairs = list(product(
    [(fasttext_Guesser, "fasttext"), (llama_Guesser, "llama"), (openai_Guesser, "openai"), (swow_Guesser, "swow")],
    [(fasttext_Codemaster, "fasttext"), (llama_Codemaster, "llama"), (openai_Codemaster, "openai"), (swow_Codemaster, "swow"), (GPT, "other-GPT")]))
pairs = [(pair[0][0], pair[0][1], pair[1][0], pair[1][1]) for pair in pairs]

In [6]:
for (guesser, guesser_name), (codemaster, codemaster_name) in product(
    [(fasttext_Guesser, "fasttext"), (llama_Guesser, "llama"), (openai_Guesser, "openai"), (swow_Guesser, "swow")],
    [(fasttext_Codemaster, "fasttext"), (llama_Codemaster, "llama"), (openai_Codemaster, "openai"), (swow_Codemaster, "swow"), (GPT, "other-GPT")]):
    states = []
    roundss = []
    scoress = []
    for board in tqdm(boards, desc=f"{guesser_name}--{codemaster_name}"):
        guesser_agent = Guesser_Wrapper(guesser(), clues, codewords, mcmc_burn_in=1_000, mcmc_iter=300_000, variant=True)
        codemaster_agent = Codemaster_Wrapper(codemaster(), clues, codewords) if codemaster_name != "other-GPT" else GPT(clues)
        state, rounds, scores = Codenames(board[0], board[1], guesser_agent, codemaster_agent).play_game()
        states.append(state)
        roundss.append(rounds)
        scoress.append(scores)
    with open(f"auto_eval/games/{guesser_name}_variant--{codemaster_name}.pkl", "wb+") as file:
        pickle.dump([guesser_name, codemaster_name, states, roundss, scoress], file)

  samples = .5 + (samples / samples.max())
fasttext--other-GPT: 100%|██████████| 5/5 [45:52<00:00, 550.56s/it]
  samples = .5 + (samples / samples.max())
llama--other-GPT: 100%|██████████| 5/5 [48:17<00:00, 579.47s/it]
openai--other-GPT: 100%|██████████| 5/5 [50:50<00:00, 610.02s/it]
swow--other-GPT: 100%|██████████| 5/5 [59:47<00:00, 717.46s/it]


In [4]:
for (guesser, guesser_name), (codemaster, codemaster_name) in product(
    [(fasttext_Guesser, "fasttext"), (llama_Guesser, "llama"), (openai_Guesser, "openai"), (swow_Guesser, "swow")],
    [(fasttext_Codemaster, "fasttext"), (llama_Codemaster, "llama"), (openai_Codemaster, "openai"), (swow_Codemaster, "swow"), (GPT, "other-GPT")]):
    states = []
    roundss = []
    scoress = []
    for board in tqdm(boards, desc=f"{guesser_name}--{codemaster_name}"):
        guesser_agent = Guesser_Wrapper(guesser(), clues, codewords, mcmc_burn_in=1_000, mcmc_iter=200_000, variant=False)
        codemaster_agent = Codemaster_Wrapper(codemaster(), clues, codewords) if codemaster_name != "other-GPT" else GPT(clues)
        state, rounds, scores = Codenames(board[0], board[1], guesser_agent, codemaster_agent).play_game()
        states.append(state)
        roundss.append(rounds)
        scoress.append(scores)
    with open(f"auto_eval/games/{guesser_name}--{codemaster_name}.pkl", "wb+") as file:
        pickle.dump([guesser_name, codemaster_name, states, roundss, scoress], file)

fasttext--openai: 100%|██████████| 5/5 [4:08:09<00:00, 2977.99s/it]  
fasttext--swow: 100%|██████████| 5/5 [4:23:49<00:00, 3165.81s/it]
fasttext--other-GPT: 100%|██████████| 5/5 [1:04:11<00:00, 770.25s/it]


In [4]:
for (guesser, guesser_name), (codemaster, codemaster_name) in product(
    [(fasttext_Guesser, "fasttext"), (llama_Guesser, "llama"), (openai_Guesser, "openai"), (swow_Guesser, "swow")],
    [(fasttext_Codemaster, "fasttext"), (llama_Codemaster, "llama"), (openai_Codemaster, "openai"), (swow_Codemaster, "swow"), (GPT, "other-GPT")]):
    states = []
    roundss = []
    scoress = []
    for board in tqdm(boards, desc=f"{guesser_name}--{codemaster_name}"):
        guesser_agent = Guesser_Wrapper(guesser(), clues, codewords, mcmc_burn_in=1_000, mcmc_iter=200_000, variant=False)
        codemaster_agent = Codemaster_Wrapper(codemaster(), clues, codewords) if codemaster_name != "other-GPT" else GPT(clues)
        state, rounds, scores = Codenames(board[0], board[1], guesser_agent, codemaster_agent, simple=True).play_game()
        states.append(state)
        roundss.append(rounds)
        scoress.append(scores)
    with open(f"auto_eval/games/{guesser_name}_simple--{codemaster_name}.pkl", "wb+") as file:
        pickle.dump([guesser_name, codemaster_name, states, roundss, scoress], file)

fasttext--fasttext: 100%|██████████| 5/5 [00:10<00:00,  2.07s/it]
fasttext--llama: 100%|██████████| 5/5 [00:04<00:00,  1.24it/s]
fasttext--openai: 100%|██████████| 5/5 [00:03<00:00,  1.30it/s]
fasttext--swow: 100%|██████████| 5/5 [00:04<00:00,  1.20it/s]
fasttext--other-GPT: 100%|██████████| 5/5 [00:18<00:00,  3.72s/it]
llama--fasttext: 100%|██████████| 5/5 [00:04<00:00,  1.21it/s]
llama--llama: 100%|██████████| 5/5 [00:03<00:00,  1.39it/s]
llama--openai: 100%|██████████| 5/5 [00:03<00:00,  1.46it/s]
llama--swow: 100%|██████████| 5/5 [00:04<00:00,  1.09it/s]
llama--other-GPT: 100%|██████████| 5/5 [00:19<00:00,  3.98s/it]
openai--fasttext: 100%|██████████| 5/5 [00:04<00:00,  1.24it/s]
openai--llama: 100%|██████████| 5/5 [00:04<00:00,  1.22it/s]
openai--openai: 100%|██████████| 5/5 [00:03<00:00,  1.41it/s]
openai--swow: 100%|██████████| 5/5 [00:04<00:00,  1.24it/s]
openai--other-GPT: 100%|██████████| 5/5 [00:16<00:00,  3.25s/it]
swow--fasttext: 100%|██████████| 5/5 [00:04<00:00,  1.06it/

In [20]:
guessers = list(map(lambda arr: arr[0] + arr[1], product(["fasttext", "llama", "openai", "swow"], ["", "_variant", "_simple"])))
codemasters = ["fasttext", "llama", "openai", "other-GPT", "swow"]

In [46]:
with open("auto_eval/games/fasttext--swow.pkl", "rb") as file:
    f = pickle.load(file)
f

['fasttext',
 'swow',
 [3, 3, 2, 2, 2],
 [1, 4, 5, 7, 6],
 [[-7.5],
  [1, -2, -2, -7.5],
  [-1, -2, -2, -1, 1],
  [-1, 1, -1, -1, 1, 2, 1],
  [-1, -1, -2, 1, -2, -1]]]