In [46]:
import os
import sys
import numpy as np
import random

import pommerman
from pommerman.agents import SimpleAgent, RandomAgent, PlayerAgent, BaseAgent
from pommerman.configs import ffa_v0_fast_env,  one_vs_one_env
from pommerman.envs.v0 import Pomme
from pommerman.characters import Bomber
from pommerman import utility

from agents.train_agent import TrainAgent
from agents.static_agent import StaticAgent
from util.rewards import woods_close_to_bomb_reward

from dqn import DQN
from models import Pommer_Q
import torch
from util.data import transform_observation_centralized

In [47]:
model_dir = "../../data/tensorboard/20220117T154102/9"

q = Pommer_Q(21, transform_observation_centralized)
q_target = Pommer_Q(21, transform_observation_centralized)

q.load_state_dict(torch.load(model_dir))
q_target.load_state_dict(torch.load(model_dir))

algo = DQN(q, q_target)
policy = algo.get_policy()

agent_list = [
    TrainAgent(policy),
    StaticAgent(0),
    StaticAgent(0),
    StaticAgent(0),
]

env = pommerman.make('PommeTeamCompetition-v0', agent_list)

In [48]:
# Seed and reset the environment
env.seed(0)
obs = env.reset()

In [104]:
actions = env.act(obs)
nobs, reward, done, info = env.step(actions)
env.render()
bomb_pos = obs[0]['position']
blast_strength = obs[0]['blast_strength']
obs = nobs
print(woods_close_to_bomb_reward(obs[0], bomb_pos, blast_strength, 0))
print(actions)

# env.close()

0.1
[3, 0, 0, 0]


In [106]:
env.close()

In [None]:
def woods_close_to_bomb_reward(obs, bomb_pos, blast_strength):
    ''' returns the number ob wooden blocks that would be destroyed by a given bomb '''

    num_wood = 0
    board = obs['board']
    wood_bitmap = np.isin(board, 2).astype(np.uint8)
    wood_positions = np.where(wood_bitmap==1)
    wood_positions = list(zip(wood_positions[0], wood_positions[1]))

    # for every wooden block check if it would be destroyed
    for wood_pos in wood_positions:
        dist_wood_bomb = np.abs(np.array(list(wood_pos))-np.array(list(bomb_pos)))
        if np.any(dist_wood_bomb == 0):
            if np.all(dist_wood_bomb < blast_strength):
                num_wood += 1
    # for each wood reward 0.005
    reward = 0.005 * num_wood
    return reward

In [None]:
def num_wood_destroyed(act, obs, nobs, agents_bomb_pos):

    num_wood_destroyed = 0
    # check if agent has layed bomb, save its position and blast strength
    # if act[0] == 5:
    #     agents_bomb_pos = obs[0]['position']
    #     agents_bomb_blast_strength = obs[0]['bomb_blast_strength'][agents_bomb_pos]
    # check if wooden block disappeared
    board = obs[0]['board']
    board_new = nobs[0]['board']
    wood_bitmap = np.isin(board, 2).astype(np.uint8)
    wood_bitmap_new = np.isin(board_new, 2).astype(np.uint8)
    destroyed_wood_bitmap = np.isin(wood_bitmap-wood_bitmap_new, [-1,1]).astype(np.uint8)
    destroyed_wood_pos = np.where(destroyed_wood_bitmap==1)
    destroyed_wood_pos = list(zip(destroyed_wood_pos[0], destroyed_wood_pos[1]))

    # check if agents bomb exploded
    if obs[0]['board'][agents_bomb_pos] != 3:
            # check if agents bomb destroyed wood
            for wood in destroyed_wood_pos:
                dist_wood_bomb = np.array(list(wood))-np.array(list(agents_bomb_pos))
                if  np.any(dist_wood_bomb == 0):
                    if np.all(dist_wood_bomb < 2):
                        num_wood_destroyed += 1

    return num_wood_destroyed


