# Agent vs. Agent

Make two agents fight each other!

## Setup

In [None]:
import sys
import time
from tqdm.auto import tqdm
from copy import deepcopy
from IPython.display import clear_output, Markdown

if '..' not in sys.path: sys.path.append('..')
from src.utils.common import *
from src.dnd.actions import *
from src.dnd.units import *
from src.dnd.game_utils import *
from src.dnd.game_board import DnDBoard, GameState
from src.agent.agent import DnDAgent, IdleDnDAgent
from src.agent.agent_utils import agents_play_loop, agent_take_turn, agents_play_loop_bare
from src.dnd.game_configs import *

In [None]:
def create_pair_game(ref: DnDBoard):
    """Copies the given game board, but swithching player id's"""
    game = DnDBoard(ref.board_shape)
    for unit in ref.units:
        game._place_unit(deepcopy(unit), unit.pos, 1 - ref.units_to_players[unit])

    game.initialize_game()
    game.set_turn_order(ref.turn_order.copy())

    return game

## Load agents & game config

Load game configuration:

In [None]:
board_size = (5, 5)
gen = FieldGenerator(board_size=board_size).load_from_folder('../Tokens')

Load agents from disk

In [None]:
agent_path_1 = '../rnd/2v2-1/gen16/checkpoints/agent-11.3i-80.0k'
agent_path_2 = '../rnd/2v2-1/gen15/checkpoints/agent-12.0i-80.0k'
agent1 = DnDAgent.load_agent(agent_path_1, strip=True, epsilon=0)
indices1 = get_observation_indices(DnDBoard.CHANNEL_NAMES[:agent1.in_channels])
agent2 = DnDAgent.load_agent(agent_path_2, strip=True, epsilon=0)
indices2 = get_observation_indices(DnDBoard.CHANNEL_NAMES[:agent2.in_channels])

## One game

Random game board is generated and agents play against each other. All the moves are visualized. Delay between moves can be adjusted.

In [None]:
gen.reset()
game, colormap = decorate_game(gen.generate_balanced_game(targetCR=1))

_ = agents_play_loop([agent1, agent2], game, colormap, manual_input=False, delay=0.5, state_indices=[indices1, indices2])

## Multiple games

Agents play against each other for the given number of games. Each game is played twice, in first game agent 1 plays for player 1 and agent 2 for player 2. In the second game, agent 1 plays as player 2, and agent 2 - as player 1. 

In [None]:
counter = []
games = 2000
max_ic = 0

for i in tqdm(range(games)):
    gen.reset()
    game = gen.generate_balanced_game(targetCR=1)
    game2 = create_pair_game(game)
    
    ic, win = agents_play_loop_bare(game, [agent1, agent2], [indices1, indices2], iter_limit=100)
    ic2, win2 = agents_play_loop_bare(game2, [agent1, agent2], [indices1, indices2], iter_limit=100)
    
    counter.append(win)
    counter.append(win2)

    if win != -1: max_ic = max(max_ic, ic)
    if win2 != -1: max_ic = max(max_ic, ic2)

Print the results of games. Timed out games are the games that lasted longer than the specified `iter_limit` in `play_loop_fast()`. Pair-wins reports the number of times agent won the same board configuration both as player 1 and player 2.

In [None]:
def path_to_agent_name(path):
    split_path = path.split('/')
    if len(split_path) < 3: return path
    return split_path[-3] + '/' + split_path[-1]

npcounter = np.array(counter)
npims = np.array([[float('NaN')] * 5] * 5)
npias = np.array([[float('NaN')] * 5] * 5)
wins_one = npcounter[::2]
wins_pair = npcounter[1::2]

display(Markdown(
f'''|           | `{path_to_agent_name(agent_path_1)}`  | `{path_to_agent_name(agent_path_2)}` |
| --------- | -------  | ------- |
| wins      | {np.sum(npcounter == 0)}    | {np.sum(npcounter == 1)}   |
| pair-wins | {np.sum(np.logical_and(wins_one == 0, wins_pair == 0))}     | {np.sum(np.logical_and(wins_one == 1, wins_pair == 1))}   |
| illegal moves | {np.sum(npims[:, 0])} | {np.sum(npims[:, 1])} |
| illegal actions | {np.sum(npias[:, 0])} | {np.sum(npias[:, 1])} |

Games timed out: {np.sum(npcounter == -1)}
'''))