# Agent vs. Agent

Make two agents fight each other!

## Setup

In [None]:
import sys
import time
from tqdm.auto import tqdm
from copy import deepcopy
from IPython.display import clear_output, Markdown

if '..' not in sys.path: sys.path.append('..')
from src.utils.common import *
from src.dnd.actions import *
from src.dnd.units import *
from src.dnd.game_utils import *
from src.dnd.game_board import DnDBoard, GameState
from src.agent.agent import DnDAgent, IdleDnDAgent
from src.agent.agent_utils import get_states, agents_play_loop
from src.dnd.game_configs import *

In [None]:
def create_pair_game(ref: DnDBoard):
    """Copies the given game board, but swithching player id's"""
    game = DnDBoard(ref.board_shape)
    for unit in ref.units:
        game._place_unit(deepcopy(unit), unit.pos, 1 - ref.units_to_players[unit])

    game.initialize_game()
    game.set_turn_order(ref.turn_order.copy())

    return game

In [None]:
def play_loop_fast(agent1, agent2, game: DnDBoard, iter_limit=1000, indices1=None, indices2=None):
    game_over = False
    iter_count = 0

    while not game_over:
        if iter_count > iter_limit: return iter_count, -1
        
        iter_count += 1

        agent, indices = (agent1, indices1) if game.current_player_id == 0 else (agent2, indices2)
        _, _, new_coords, action = get_states(game, agent, state_indices=indices)
        game_over = game.take_turn(new_coords, action, True)[0] != GameState.PLAYING

    winner = 0 if len(game.players_to_units[1]) == 0 else 1

    return iter_count, winner

In [None]:
# this is needed for older agents
fms_7_ch = ['Ally units', 'Enemy units', 'Current unit', 'Movement speed', 'Attack range', 'Attack damage', 'Health']

## Load agents & game config

Load game configuration:

In [None]:
# 5x5 game, each player has 2 units
board_size, game_config = get_2v2_0_config()

Load agents from disk

In [None]:
agent_path_1 = '../rnd/2v2-0/trained-agents/agent-gen30-11.2i-620000'
agent_path_2 = '../rnd/2v2-0/trained-agents/agent-gen30-11.1i-600000'
agent1 = DnDAgent.load_agent(agent_path_1, strip=True, epsilon=0)
indices1 = get_observation_indices(fms_7_ch if agent1.in_channels == len(fms_7_ch) else None)
agent2 = DnDAgent.load_agent(agent_path_2, strip=True, epsilon=0)
indices2 = get_observation_indices(fms_7_ch if agent2.in_channels == len(fms_7_ch) else None)

## One game

Random game board is generated and agents play against each other. All the moves are visualized. Delay between moves can be adjusted.

In [None]:
game, colormap = decorate_game(generate_balanced_game(board_size, game_config))

_ = agents_play_loop(agent1, agent2, game, colormap, manual_input=False, delay=0.5)

## Multiple games

Agents play against each other for the given number of games. Each game is played twice, in first game agent 1 plays for player 1 and agent 2 for player 2. In the second game, agent 1 plays as player 2, and agent 2 - as player 1. 

In [None]:
counter = []
games = 1000

for i in tqdm(range(games)):
    game = generate_balanced_game(board_size, game_config)
    game2 = create_pair_game(game)
    
    ic, win = play_loop_fast(agent1, agent2, game, indices1=indices1, indices2=indices2)
    ic2, win2 = play_loop_fast(agent1, agent2, game2, indices1=indices1, indices2=indices2)
    
    counter.append(win)
    counter.append(win2)

Print the results of games. Timed out games are the games that lasted longer than the specified `iter_limit` in `play_loop_fast()`. Pair-wins reports the number of times agent won the same board configuration both as player 1 and player 2.

In [None]:
def path_to_agent_name(path):
    split_path = path.split('/')
    if len(split_path) < 3: return path
    return split_path[-3] + '/' + split_path[-1]

npcounter = np.array(counter)
wins_one = npcounter[::2]
wins_pair = npcounter[1::2]

display(Markdown(
f'''|           | `{path_to_agent_name(agent_path_1)}`  | `{path_to_agent_name(agent_path_2)}` |
| --------- | -------  | ------- |
| wins      | {np.sum(npcounter == 0)}    | {np.sum(npcounter == 1)}   |
| pair-wins | {np.sum(np.logical_and(wins_one == 0, wins_pair == 0))}     | {np.sum(np.logical_and(wins_one == 1, wins_pair == 1))}   |

Games timed out: {np.sum(npcounter == -1)}
'''))