In [1]:
# Default needs
import dill
import numpy as np
import torch
import pandas as pd
from collections import Counter
from collections import defaultdict
from matplotlib import pyplot as plt

np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

# Importing Environments
from environments import square_room

from utils.agent_utils import calc_win_percentage

# Combat Handler
from combat_handler import CombatHandler

# agents
from agents import TIME_LIMIT

# 
from actions import *
from players import dungeon_master
from players import hayden
from utils.dnd_utils import roll_dice
from creatures import Creature

# PPO and RandStrat
from agents import PPO
from agents import RandomStrategy

In [2]:
import logging
from datetime import datetime
logging.basicConfig(filename='Plays.log', filemode='w', level=logging.INFO)
logger = logging.getLogger("RUNNER")

In [3]:
# Helpers
def report_win_percentages(winner_list, num_games, combatants, total_rewards, last_states, num_actions_takens):
    """
    :return: None
    """
    win_percentages = calc_win_percentage(winner_list[-num_games:], combatants)
    last_states = torch.cat(last_states).data.numpy()
    print("Win percentages: {}\t".format(win_percentages))
    logger.info(("Win percentages: {}\t".format(win_percentages)))

    results = list(zip(winner_list[-num_games:], total_rewards[-num_games:], last_states, num_actions_takens))
    results = sorted(results, key=lambda x: -x[1])

    for winner, avg_reward, last_state, num_actions_taken in results:
        print(" {}: {} ({}) \t\t{}".format(winner, avg_reward, last_state, num_actions_taken))
    print("----------------------\n")


def intialize_combatants(combatants, combat_handler):
    """
    :param combatants:
    :return:
    """
    [combatant.initialize(combat_handler) for combatant in combatants]


In [4]:
wizard = Creature(
    player=hayden,
    name="Leotris",
    hit_points=16,
    armor_class=11,
    resistance = 0,
    actions=[MoveLeft(), MoveRight(), MoveUp(), MoveDown(), DoNotMove(), fire_bolt_cantrip, ray_of_frost_cantrip, chromatic_orb_level_1, magic_missile_level_1, scorching_ray_level_2, aganazzars_scorcher_level_2],
    location=np.array([5, 10]),
    level_1_spell_slots = 3,
    level_2_spell_slots = 1,
    symbol="x",
    strategy=PPO(win_reward=50,lose_reward=-50,attack_dealt_reward=1,attack_recieved_reward=-1)
)

manticore = Creature(
    player=dungeon_master,
    name="Strahd",
    hit_points=95,
    armor_class=16,
    actions=[MoveLeft(), MoveRight(), MoveUp(), MoveDown(), DoNotMove(), bite, tail_spike],
    level_1_spell_slots = 10,
    location=np.array([5, 5]),
    symbol="@",
    strategy=RandomStrategy()
)

n_iters = 100

winner_list = []
total_rewards = []
last_states = []
num_actions_takens = []

logger.info(("BEGIN RUNNING FOR ",n_iters, datetime.now().isoformat()))

for i in range(n_iters):
    combat_handler = CombatHandler(
        environment=square_room,
        combatants=[wizard, manticore],
        time_limit=TIME_LIMIT
    )
    intialize_combatants([wizard, manticore], combat_handler=combat_handler)
    winner, total_reward, last_state, num_actions_taken = combat_handler.run()

    winner_list.append(winner)
    total_rewards.append(total_reward)
    last_states.append(last_state)
    num_actions_takens.append(num_actions_taken)

    if (i + 1) % 10 == 0:
        report_win_percentages(
                    winner_list=winner_list,
                    num_games=10,
                    combatants=[wizard, manticore],
                    total_rewards=total_rewards,
                    last_states=last_states,
                    num_actions_takens=num_actions_takens
        )

    # Save tabular Q
    if (i + 1) % 10 == 0:
        dill.dump(winner_list, open("results/winner_list_{}.pickle".format(wizard.strategy.name), "wb"))
        dill.dump(wizard.strategy.policy_net, open("results/model_{}.pickle".format(wizard.strategy.name), "wb"))
        dill.dump(total_rewards, open('results/reward_list_{}.pickle'.format(wizard.strategy.name), "wb"))


logger.info(("DONE RUNNING FOR ",n_iters, datetime.now().isoformat()))

Win percentages: [('Leotris', 0.3), ('Strahd', 0.6), ('Timeout', 0.1)]	
 Leotris: 66 ([ 0.875 -0.011  0.8    0.4    0.2    0.2    1.     0.5    0.821]) 		1231
 Leotris: 64 ([0.188 0.    0.8   0.7   0.6   0.8   0.    1.    0.977]) 		1466
 Leotris: 51 ([ 0.062 -0.042  0.8    0.3    0.7    0.5    1.     1.     0.335]) 		503
 Strahd: -39 ([-0.062  0.042  0.7    0.6    0.7    0.5    0.     0.     0.974]) 		1461
 Timeout: -39 ([0.938 0.232 0.8   0.1   0.2   0.8   0.    0.333 1.   ]) 		1500
 Strahd: -47 ([0.    0.442 0.3   0.3   0.5   0.2   1.    0.667 0.634]) 		951
 Strahd: -50 ([-0.125  0.853  0.7    0.2    0.7    0.3    0.     1.     0.105]) 		158
 Strahd: -50 ([0.    0.611 0.7   0.1   0.8   0.3   1.    0.667 0.348]) 		522
 Strahd: -51 ([-0.25   0.726  0.2    0.1    0.3    0.6    0.     1.     0.057]) 		85
 Strahd: -56 ([0.    0.916 0.6   0.8   0.8   0.7   0.    0.833 0.079]) 		119
----------------------





Win percentages: [('Leotris', 0.4), ('Strahd', 0.6)]	
 Leotris: 67 ([-0.125  0.853  0.7    0.2    0.7    0.3    0.     1.     0.105]) 		158
 Leotris: 64 ([0.    0.611 0.7   0.1   0.8   0.3   1.    0.667 0.348]) 		522
 Leotris: 59 ([-0.25   0.726  0.2    0.1    0.3    0.6    0.     1.     0.057]) 		85
 Leotris: 55 ([ 0.062 -0.042  0.8    0.3    0.7    0.5    1.     1.     0.335]) 		503
 Strahd: -37 ([ 0.875 -0.011  0.8    0.4    0.2    0.2    1.     0.5    0.821]) 		1231
 Strahd: -41 ([0.938 0.232 0.8   0.1   0.2   0.8   0.    0.333 1.   ]) 		1500
 Strahd: -46 ([0.188 0.    0.8   0.7   0.6   0.8   0.    1.    0.977]) 		1466
 Strahd: -47 ([0.    0.442 0.3   0.3   0.5   0.2   1.    0.667 0.634]) 		951
 Strahd: -50 ([0.    0.916 0.6   0.8   0.8   0.7   0.    0.833 0.079]) 		119
 Strahd: -51 ([-0.062  0.042  0.7    0.6    0.7    0.5    0.     0.     0.974]) 		1461
----------------------

Win percentages: [('Leotris', 0.5), ('Strahd', 0.4), ('Timeout', 0.1)]	
 Leotris: 64 ([ 0.062 -0.042  0.