# Imports

In [1]:
# Default needs
import dill
import numpy as np
import torch
import pandas as pd
from collections import Counter
from collections import defaultdict
from matplotlib import pyplot as plt

np.set_printoptions(precision=3)
np.set_printoptions(suppress=True)

# Importing Environments
from environments import square_room

from utils.agent_utils import calc_win_percentage

# Combat Handler
from combat_handler import CombatHandler

# agents
from agents import TIME_LIMIT

# Actions and Players
from actions import *
from players import dungeon_master
from players import hayden
from utils.dnd_utils import roll_dice
from creatures import Creature

# PPO and RandStrat
from agents import PPO
from agents import RandomStrategy
from agents import DoubleDQN

# Logging and Time
import logging
from datetime import datetime
import time

# Helper Files

In [2]:
# Helpers
def report_win_percentages(winner_list, num_games, combatants, total_rewards, last_states, num_actions_takens):
    """
    :return: None
    """
    win_percentages = calc_win_percentage(winner_list[-num_games:], combatants)
    last_states = torch.cat(last_states).data.numpy()
    print("Win percentages: {}\t".format(win_percentages))
    logger.info(("Win percentages: {}\t".format(win_percentages)))

    results = list(zip(winner_list[-num_games:], total_rewards[-num_games:], last_states, num_actions_takens))
    results = sorted(results, key=lambda x: -x[1])

    for winner, avg_reward, last_state, num_actions_taken in results:
        print(" {}: {} ({}) \t\t{}".format(winner, avg_reward, last_state, num_actions_taken))
    print("----------------------\n")


def intialize_combatants(combatants, combat_handler):
    """
    :param combatants:
    :return:
    """
    [combatant.initialize(combat_handler) for combatant in combatants]


# DEFINE PARAMS - EDIT HERE
- SET EXPERIMENT NAME - 1 CELL BELOW
- SET CHARACTER STATS - 3 CELLS BELOW
- N_ITERS - 3 CELLS BELOW

#### EDIT BELOW CELL

In [3]:
EXPT_NAME = "Wizard_Vs_Manticore_SparsePPO"

In [4]:
start_time = str(datetime.now().isoformat()[:-7]).replace(':',"-")

log_file_name = "logs\Plays_"+EXPT_NAME+"_"+start_time+".log"

logging.basicConfig(filename=log_file_name, filemode='w', level=logging.INFO)
logger = logging.getLogger("RUNNER")

print("GONNA LOG AT ",log_file_name)

GONNA LOG AT  logs\Plays_Wizard_Vs_Manticore_SparsePPO_2022-11-09T19-24-12.log


#### EDIT BELOW CELL
- CHARACTER DEFINITIONS 
    - copied from creatures.py with modifications
    - For custome reward function, etc
- N_ITERS = iterations

In [5]:
wizard = Creature(
    player=hayden,
    name="Leotris",
    hit_points=16,
    armor_class=11,
    resistance = 0,
    actions=[MoveLeft(), MoveRight(), MoveUp(), MoveDown(), DoNotMove(), fire_bolt_cantrip, ray_of_frost_cantrip, chromatic_orb_level_1, magic_missile_level_1, scorching_ray_level_2, aganazzars_scorcher_level_2],
    location=np.array([5, 10]),
    level_1_spell_slots = 3,
    level_2_spell_slots = 1,
    symbol="x",
    strategy=PPO()
)

manticore = Creature(
    player=dungeon_master,
    name="Strahd",
    hit_points=95,
    armor_class=16,
    actions=[MoveLeft(), MoveRight(), MoveUp(), MoveDown(), DoNotMove(), bite, tail_spike],
    level_1_spell_slots = 10,
    location=np.array([5, 5]),
    symbol="@",
    strategy=RandomStrategy()
)

n_iters = 500

#### Running Now

In [6]:
winner_list = []
total_rewards = []
last_states = []
num_actions_takens = []

logger.info(("BEGIN RUNNING FOR ",n_iters, datetime.now().isoformat()))

for i in range(n_iters):
    print(i)
    combat_handler = CombatHandler(
        environment=square_room,
        combatants=[wizard, manticore],
        time_limit=TIME_LIMIT
    )
    intialize_combatants([wizard, manticore], combat_handler=combat_handler)
    winner, total_reward, last_state, num_actions_taken = combat_handler.run()

    winner_list.append(winner)
    total_rewards.append(total_reward)
    last_states.append(last_state)
    num_actions_takens.append(num_actions_taken)

    if (i + 1) % 10 == 0:
        report_win_percentages(
                    winner_list=winner_list,
                    num_games=10,
                    combatants=[wizard, manticore],
                    total_rewards=total_rewards,
                    last_states=last_states,
                    num_actions_takens=num_actions_takens
        )

    # Save tabular Q
    if (i + 1) % 100 == 0:
        dill.dump(winner_list, open("results/winner_list_{}_EXPT_{}_STARTED_{}_NITERS{}.pickle".format(wizard.strategy.name, EXPT_NAME, start_time, n_iters), "wb"))
        dill.dump(wizard.strategy.policy_net, open("results/model_{}_EXPT_{}_STARTED_{}_ITERS_{}.pickle".format(wizard.strategy.name, EXPT_NAME, start_time,i+1), "wb"))
        dill.dump(total_rewards, open('results/reward_list_{}_EXPT_{}_STARTED_{}_NITERS{}.pickle'.format(wizard.strategy.name, EXPT_NAME, start_time, n_iters), "wb"))


logger.info(("DONE RUNNING FOR ",n_iters, datetime.now().isoformat()))

0
1
2
3
4
5
6
7
8
9
Win percentages: [('Leotris', 0.2), ('Strahd', 0.8)]	
 Leotris: 5 ([ 0.375 -0.021  0.6    0.7    0.8    0.8    0.     1.     0.585]) 		877
 Leotris: 5 ([ 0.125 -0.021  0.2    0.7    0.4    0.8    0.     1.     0.779]) 		1169
 Strahd: 0 ([0.    0.705 0.3   0.1   0.1   0.2   1.    0.833 0.055]) 		83
 Strahd: 0 ([-0.438  0.579  0.1    0.1    0.4    0.1    0.     1.     0.186]) 		279
 Strahd: 0 ([0.    0.337 0.8   0.5   0.8   0.6   0.    1.    0.87 ]) 		1305
 Strahd: 0 ([-0.062  0.642  0.3    0.6    0.3    0.7    0.     1.     0.131]) 		196
 Strahd: 0 ([0.    0.379 0.7   0.4   0.7   0.6   0.    0.667 0.54 ]) 		810
 Strahd: 0 ([0.    0.032 0.7   0.4   0.7   0.6   0.    0.833 0.483]) 		724
 Strahd: 0 ([0.    0.211 0.6   0.2   0.4   0.2   1.    0.333 0.339]) 		508
 Strahd: 0 ([0.    0.684 0.6   0.4   0.4   0.3   0.    0.833 0.157]) 		235
----------------------

10
11
12
13
14
15
16
17
18
19
Win percentages: [('Leotris', 0.6), ('Strahd', 0.4)]	
 Leotris: 5 ([0.    0.705 0.3



101
102
103
104
105
106
107
108
109
Win percentages: [('Leotris', 0.7), ('Strahd', 0.3)]	
 Leotris: 5 ([0.    0.705 0.3   0.1   0.1   0.2   1.    0.833 0.055]) 		83
 Leotris: 5 ([-0.438  0.579  0.1    0.1    0.4    0.1    0.     1.     0.186]) 		279
 Leotris: 5 ([ 0.375 -0.021  0.6    0.7    0.8    0.8    0.     1.     0.585]) 		877
 Leotris: 5 ([ 0.125 -0.021  0.2    0.7    0.4    0.8    0.     1.     0.779]) 		1169
 Leotris: 5 ([0.    0.379 0.7   0.4   0.7   0.6   0.    0.667 0.54 ]) 		810
 Leotris: 5 ([0.    0.032 0.7   0.4   0.7   0.6   0.    0.833 0.483]) 		724
 Leotris: 5 ([0.    0.684 0.6   0.4   0.4   0.3   0.    0.833 0.157]) 		235
 Strahd: 0 ([0.    0.337 0.8   0.5   0.8   0.6   0.    1.    0.87 ]) 		1305
 Strahd: 0 ([-0.062  0.642  0.3    0.6    0.3    0.7    0.     1.     0.131]) 		196
 Strahd: 0 ([0.    0.211 0.6   0.2   0.4   0.2   1.    0.333 0.339]) 		508
----------------------

110
111
112
113
114
115
116
117
118
119
Win percentages: [('Leotris', 0.8), ('Strahd', 0.2)]

In [9]:
import numpy as np
np.unique(np.array(winner_list),return_counts=True)

(array(['Leotris', 'Strahd'], dtype='<U7'), array([359, 141], dtype=int64))