# Agent vs. Agent

Make two agents fight each other!

## Setup

In [1]:
import sys
import time
from tqdm.auto import tqdm
from copy import deepcopy
from IPython.display import clear_output, Markdown

if '..' not in sys.path: sys.path.append('..')
from src.utils.common import *
from src.dnd.actions import *
from src.dnd.units import *
from src.dnd.game_utils import *
from src.dnd.game_board import DnDBoard, GameState
from src.agent.agent import DnDAgent, IdleDnDAgent
from src.agent.agent_pg import DnDAgentPolicyGradient
from src.agent.agent_utils import agents_play_loop, agent_take_turn, agents_play_loop_bare
from src.dnd.game_configs import *

In [2]:
def create_pair_game(ref: DnDBoard):
    """Copies the given game board, but swithching player id's"""
    game = DnDBoard(ref.board_shape)
    for unit in ref.units:
        game._place_unit(deepcopy(unit), unit.pos, 1 - ref.units_to_players[unit])

    game.initialize_game()
    game.set_turn_order(ref.turn_order.copy())

    return game

## Load agents & game config

Load game configuration:

In [3]:
board_size, game_config = get_2v2_1_config()
gen = fieldGenerator(board_size).load_from_folder('../Tokens/')

In [4]:
yi, xi = np.meshgrid(np.arange(board_size[0]), np.arange(board_size[1]), indexing='ij')

def dnd_legal_moves_masker(state, ch_out):
    remaining_speed = state[11, 0, 0]
    can_move = remaining_speed > 0
    can_act = state[12, 0, 0] > 0

    mask = np.zeros((ch_out, *state.shape[1:]))
    if can_move or can_act:
        current_unit_pos = np.where(state[2] != 0)
        y, x = current_unit_pos[0][0], current_unit_pos[1][0]        
        distance = np.abs(yi - y) + np.abs(xi - x)

        if can_move:
            occupied = np.logical_or(state[0], state[1])
            possible_positions = np.where(np.logical_and(distance <= remaining_speed, occupied == 0))
            mask[0, possible_positions[0], possible_positions[1]] = 1
        if can_act:
            attack_range = state[4, y, x]
            possible_targets = np.where(np.logical_and(state[1], distance <= attack_range))
            mask[1, possible_targets[0], possible_targets[1]] = 1

    mask[2, 0, 0] = 1

    return mask

Load agents from disk

In [5]:
agent_path_1 = '../rnd/2v2-1/gen16/checkpoints/agent-11.3i-80.0k'
agent_path_2 = '../rnd/_temp-pg/checkpoints/agent-21.1i-9.0k'
agent1 = DnDAgent.load_agent(agent_path_1, strip=True, epsilon=0)
indices1 = get_observation_indices(DnDBoard.CHANNEL_NAMES[:agent1.in_channels])
agent2 = DnDAgentPolicyGradient.load_agent(agent_path_2)
indices2 = get_observation_indices(DnDBoard.CHANNEL_NAMES[:agent2.in_channels])
agent2.legal_moves_masker = dnd_legal_moves_masker
from src.agent.agent import passthrough_masker
agent1.legal_moves_masker = passthrough_masker
agent1.masked_value = 0

## One game

Random game board is generated and agents play against each other. All the moves are visualized. Delay between moves can be adjusted.

In [6]:
game, colormap = decorate_game(generate_balanced_game(board_size, game_config))

_ = agents_play_loop([agent1, agent2], game, colormap, reset_epsilon=False, manual_input=False, delay=0.5, state_indices=[indices1, indices2])

Iteration: 10
	Unit moves: (2, 7) -> (0, 0) [not successful];
	Unit takes aciton `Sword attack` with attributes: {'source_unit': 'Ally archer', 'target_unit': 'Enemy archer'} [successful];
Units alive: 1
Players: 2:
	Player #0 (1 units): `[94mAlly archer (50 HP)[0m`  
	Player #1 (0 units)  
	
    0 1 2 3 4 5 6 7
 0                  0 
 1                  1 
 2               [94m██[0m 2 
 3                  3 
 4                  4 
 5                  5 
 6                  6 
 7                  7 
    0 1 2 3 4 5 6 7

Next move is by player #0: `[94mAlly archer[0m`

Game over in 10 iterations. Winner: player #1


## Multiple games

Agents play against each other for the given number of games. Each game is played twice, in first game agent 1 plays for player 1 and agent 2 for player 2. In the second game, agent 1 plays as player 2, and agent 2 - as player 1. 

In [7]:
counter = []
games = 500
max_ic = 0

for i in tqdm(range(games)):
    gen.reset()
    game = gen.generate_balanced_game(targetCR=1)#generate_balanced_game(board_size, game_config)
    game2 = create_pair_game(game)
    
    ic, win = agents_play_loop_bare(game, [agent1, agent2], [indices1, indices2], iter_limit=100)
    ic2, win2 = agents_play_loop_bare(game2, [agent1, agent2], [indices1, indices2], iter_limit=100)
    
    counter.append(win)
    counter.append(win2)

    if win != -1: max_ic = max(max_ic, ic)
    if win2 != -1: max_ic = max(max_ic, ic2)

  0%|          | 0/500 [00:00<?, ?it/s]

Print the results of games. Timed out games are the games that lasted longer than the specified `iter_limit` in `play_loop_fast()`. Pair-wins reports the number of times agent won the same board configuration both as player 1 and player 2.

In [9]:
def path_to_agent_name(path):
    split_path = path.split('/')
    if len(split_path) < 3: return path
    return split_path[-3] + '/' + split_path[-1]

npcounter = np.array(counter)
npims = np.array([[float('NaN')] * 5] * 5)
npias = np.array([[float('NaN')] * 5] * 5)
wins_one = npcounter[::2]
wins_pair = npcounter[1::2]

display(Markdown(
f'''|           | `{path_to_agent_name(agent_path_1)}`  | `{path_to_agent_name(agent_path_2)}` |
| --------- | -------  | ------- |
| wins      | {np.sum(npcounter == 0)}    | {np.sum(npcounter == 1)}   |
| pair-wins | {np.sum(np.logical_and(wins_one == 0, wins_pair == 0))}     | {np.sum(np.logical_and(wins_one == 1, wins_pair == 1))}   |
| illegal moves | {np.sum(npims[:, 0])} | {np.sum(npims[:, 1])} |
| illegal actions | {np.sum(npias[:, 0])} | {np.sum(npias[:, 1])} |

Games timed out: {np.sum(npcounter == -1)}
'''))

|           | `gen16/agent-11.3i-80.0k`  | `_temp-pg/agent-21.1i-9.0k` |
| --------- | -------  | ------- |
| wins      | 924    | 75   |
| pair-wins | 424     | 0   |
| illegal moves | nan | nan |
| illegal actions | nan | nan |

Games timed out: 1
