diff --git a/.gitignore b/.gitignore index 0fa0b8db6..011a34c5c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,9 +4,10 @@ maps/ runs/* wandb/* -# local replay file from tests/test_deterministic_replay.py, test_render_save.py +# local replay file from test_render_save.py tests/replay_local*.pickle replay* +eval* .vscode @@ -162,3 +163,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +profile.run \ No newline at end of file diff --git a/nmmo/__init__.py b/nmmo/__init__.py index eec648f32..22813666b 100644 --- a/nmmo/__init__.py +++ b/nmmo/__init__.py @@ -6,7 +6,7 @@ from .render.overlay import Overlay, OverlayRegistry from .core import config, agent, action from .core.action import Action -from .core.agent import Agent +from .core.agent import Agent, Scripted from .core.env import Env from .core.terrain import MapGenerator, Terrain @@ -22,7 +22,7 @@ \ \:\ \ \:\ \ \:\ \ \::/ maintained at MIT in \__\/ \__\/ \__\/ \__\/ Phillip Isola's lab ''' -__all__ = ['Env', 'config', 'agent', 'Agent', 'MapGenerator', 'Terrain', +__all__ = ['Env', 'config', 'agent', 'Agent', 'Scripted', 'MapGenerator', 'Terrain', 'action', 'Action', 'material', 'spawn', 'Overlay', 'OverlayRegistry'] diff --git a/nmmo/core/action.py b/nmmo/core/action.py index 6701514a1..5a136be3e 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -2,14 +2,15 @@ # pylint: disable=no-method-argument,unused-argument,no-self-argument,no-member from enum import Enum, auto -from ordered_set import OrderedSet import numpy as np +from nmmo.core.observation import Observation from nmmo.lib import utils from nmmo.lib.utils import staticproperty -from nmmo.systems.item import Item, Stack +from nmmo.systems.item import Stack from nmmo.lib.log import EventCode + class NodeType(Enum): #Tree edges STATIC = auto() #Traverses all edges without decisions @@ -23,7 +24,8 @@ class NodeType(Enum): class Node(metaclass=utils.IterableNameComparable): @classmethod def init(cls, config): - pass + # noop_action is used in some of the N() methods + cls.noop_action = 1 if config.PROVIDE_NOOP_ACTION_TARGET else 0 @staticproperty def edges(): @@ -46,12 +48,9 @@ def leaf(): def N(cls, config): return len(cls.edges) - def deserialize(realm, entity, index): + def deserialize(realm, entity, index, obs: Observation): return index - def args(stim, entity, config): - return [] - class Fixed: pass @@ -75,7 +74,7 @@ def hook(config): arguments = [] for action in Action.edges(config): action.init(config) - for args in action.edges: + for args in action.edges: # pylint: disable=not-an-iterable args.init(config) if not 'edges' in args.__dict__: continue @@ -105,10 +104,6 @@ def edges(cls, config): edges.append(Comm) return edges - def args(stim, entity, config): - raise NotImplementedError - - class Move(Node): priority = 60 nodeType = NodeType.SELECTION @@ -139,12 +134,13 @@ def call(realm, entity, direction): realm.map.tiles[r_new, c_new].add_entity(entity) # exploration record keeping. moved from entity.py, History.update() - dist_from_spawn = utils.linf(entity.spawn_pos, (r_new, c_new)) - if dist_from_spawn > entity.history.exploration: - entity.history.exploration = dist_from_spawn + progress_to_center = realm.map.dist_border_center -\ + utils.linf_single(realm.map.center_coord, (r_new, c_new)) + if progress_to_center > entity.history.exploration: + entity.history.exploration = progress_to_center if entity.is_player: realm.event_log.record(EventCode.GO_FARTHEST, entity, - distance=dist_from_spawn) + distance=progress_to_center) # CHECK ME: material.Impassible includes void, so this line is not reachable # Does this belong to Entity/Player.update()? @@ -169,10 +165,7 @@ class Direction(Node): def edges(): return [North, South, East, West, Stay] - def args(stim, entity, config): - return Direction.edges - - def deserialize(realm, entity, index): + def deserialize(realm, entity, index, obs: Observation): return deserialize_fixed_arg(Direction, index) # a quick helper function @@ -180,7 +173,7 @@ def deserialize_fixed_arg(arg, index): if isinstance(index, (int, np.int64)): if index < 0: return None # so that the action will be discarded - val = min(index-1, len(arg.edges)-1) + val = min(index, len(arg.edges)-1) return arg.edges[val] # if index is not int, it's probably already deserialized @@ -203,7 +196,6 @@ class West(Node): class Stay(Node): delta = (0, 0) - class Attack(Node): priority = 50 nodeType = NodeType.SELECTION @@ -226,7 +218,7 @@ def in_range(entity, stim, config, N): R, C = stim.shape R, C = R//2, C//2 - rets = OrderedSet([entity]) + rets = set([entity]) for r in range(R-N, R+N+1): for c in range(C-N, C+N+1): for e in stim[r, c].entities.values(): @@ -235,14 +227,6 @@ def in_range(entity, stim, config, N): rets = list(rets) return rets - # CHECK ME: do we need l1 distance function? - # systems/ai/utils.py also has various distance functions - # which we may want to clean up - # def l1(pos, cent): - # r, c = pos - # r_cent, c_cent = cent - # return abs(r - r_cent) + abs(c - c_cent) - def call(realm, entity, style, target): if style is None or target is None: return None @@ -256,7 +240,7 @@ def call(realm, entity, style, target): # Testing a spawn immunity against old agents to avoid spawn camping immunity = config.COMBAT_SPAWN_IMMUNITY if entity.is_player and target.is_player and \ - target.history.time_alive < immunity < entity.history.time_alive.val: + target.history.time_alive < immunity: return None #Check if self targeted @@ -264,7 +248,7 @@ def call(realm, entity, style, target): return None #Can't attack out of range - if utils.linf(entity.pos, target.pos) > style.attack_range(config): + if utils.linf_single(entity.pos, target.pos) > style.attack_range(config): return None #Execute attack @@ -293,28 +277,20 @@ class Style(Node): def edges(): return [Melee, Range, Mage] - def args(stim, entity, config): - return Style.edges - - def deserialize(realm, entity, index): + def deserialize(realm, entity, index, obs: Observation): return deserialize_fixed_arg(Style, index) - class Target(Node): argType = None @classmethod def N(cls, config): - return config.PLAYER_N_OBS - - def deserialize(realm, entity, index: int): - # NOTE: index is the entity id - # CHECK ME: should index be renamed to ent_id? - return realm.entity_or_none(index) + return config.PLAYER_N_OBS + cls.noop_action - def args(stim, entity, config): - #Should pass max range? - return Attack.in_range(entity, stim, config, None) + def deserialize(realm, entity, index: int, obs: Observation): + if index >= len(obs.entities.ids): + return None + return realm.entity_or_none(obs.entities.ids[index]) class Melee(Node): nodeType = NodeType.ACTION @@ -346,27 +322,17 @@ def attack_range(config): def skill(entity): return entity.skills.mage - class InventoryItem(Node): argType = None @classmethod def N(cls, config): - return config.INVENTORY_N_OBS - - # TODO(kywch): What does args do? - def args(stim, entity, config): - return stim.exchange.items() + return config.INVENTORY_N_OBS + cls.noop_action - def deserialize(realm, entity, index: int): - # NOTE: index is from the inventory, NOT item id - inventory = Item.Query.owned_by(realm.datastore, entity.id.val) - - if index >= inventory.shape[0]: + def deserialize(realm, entity, index: int, obs: Observation): + if index >= len(obs.inventory.ids): return None - - item_id = inventory[index, Item.State.attr_name_to_col["id"]] - return realm.items[item_id] + return realm.items.get(obs.inventory.ids[index]) class Use(Node): priority = 10 @@ -490,7 +456,6 @@ def call(realm, entity, item, target): realm.event_log.record(EventCode.GIVE_ITEM, entity) - class GiveGold(Node): priority = 30 @@ -528,37 +493,26 @@ def call(realm, entity, amount, target): if not isinstance(amount, int): amount = amount.val - if not (amount > 0 and entity.gold.val > 0): # no gold to give + if amount > entity.gold.val: # no gold to give return - amount = min(amount, entity.gold.val) - entity.gold.decrement(amount) target.gold.increment(amount) realm.event_log.record(EventCode.GIVE_GOLD, entity) - class MarketItem(Node): argType = None @classmethod def N(cls, config): - return config.MARKET_N_OBS - - # TODO(kywch): What does args do? - def args(stim, entity, config): - return stim.exchange.items() - - def deserialize(realm, entity, index: int): - # NOTE: index is from the market, NOT item id - market = Item.Query.for_sale(realm.datastore) + return config.MARKET_N_OBS + cls.noop_action - if index >= market.shape[0]: + def deserialize(realm, entity, index: int, obs: Observation): + if index >= len(obs.market.ids): return None - item_id = market[index, Item.State.attr_name_to_col["id"]] - return realm.items[item_id] + return realm.items.get(obs.market.ids[index]) class Buy(Node): priority = 20 @@ -659,19 +613,24 @@ class Price(Node): @classmethod def init(cls, config): # gold should be > 0 - Price.classes = init_discrete(range(1, config.PRICE_N_OBS+1)) + cls.price_range = range(1, config.PRICE_N_OBS+1) + Price.classes = init_discrete(cls.price_range) + + @classmethod + def index(cls, price): + try: + return cls.price_range.index(price) + except ValueError: + # use the max price, which is config.PRICE_N_OBS + return len(cls.price_range) - 1 @staticproperty def edges(): return Price.classes - def args(stim, entity, config): - return Price.edges - - def deserialize(realm, entity, index): + def deserialize(realm, entity, index, obs: Observation): return deserialize_fixed_arg(Price, index) - class Token(Node): argType = Fixed @@ -683,13 +642,9 @@ def init(cls, config): def edges(): return Token.classes - def args(stim, entity, config): - return Token.edges - - def deserialize(realm, entity, index): + def deserialize(realm, entity, index, obs: Observation): return deserialize_fixed_arg(Token, index) - class Comm(Node): argType = Fixed priority = 99 diff --git a/nmmo/core/agent.py b/nmmo/core/agent.py index 04fdd5500..0bdfa6b10 100644 --- a/nmmo/core/agent.py +++ b/nmmo/core/agent.py @@ -1,4 +1,3 @@ - class Agent: policy = 'Neural' @@ -11,6 +10,7 @@ def __init__(self, config, idx): ''' self.config = config self.iden = idx + self._np_random = None def __call__(self, obs): '''Used by scripted agents to compute actions. Override in subclasses. @@ -18,3 +18,15 @@ def __call__(self, obs): Args: obs: Agent observation provided by the environment ''' + + def set_rng(self, np_random): + '''Set the random number generator for the agent for reproducibility + + Args: + np_random: A numpy random.Generator object + ''' + self._np_random = np_random + +class Scripted(Agent): + '''Base class for scripted agents''' + policy = 'Scripted' diff --git a/nmmo/core/config.py b/nmmo/core/config.py index 5b824f4fe..9cc7cccf9 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -144,28 +144,26 @@ def __init__(self): def game_system_enabled(self, name) -> bool: return hasattr(self, name) - PROVIDE_ACTION_TARGETS = True - '''Flag used to provide action targets mask''' + '''Provide action targets mask''' + + PROVIDE_NOOP_ACTION_TARGET = False + '''Provide a no-op option for each action''' PLAYERS = [Agent] '''Player classes from which to spawn''' - ############################################################################ - ### Emulation Parameters - - EMULATE_FLAT_OBS = False - '''Emulate a flat observation space''' + HORIZON = 1024 + '''Number of steps before the environment resets''' - EMULATE_FLAT_ATN = False - '''Emulate a flat action space''' + CURRICULUM_FILE_PATH = None + '''Path to a curriculum task file containing a list of task specs for training''' - EMULATE_CONST_PLAYER_N = False - '''Emulate a constant number of agents''' - - EMULATE_CONST_HORIZON = False - '''Emulate a constant HORIZON simulations steps''' + TASK_EMBED_DIM = 1024 + '''Dimensionality of task embeddings''' + ALLOW_MULTI_TASKS_PER_AGENT = False + '''Whether to allow multiple tasks per agent''' ############################################################################ ### Population Parameters @@ -219,6 +217,9 @@ def PLAYER_VISION_DIAMETER(self): IMMORTAL = False '''Debug parameter: prevents agents from dying except by void''' + RESET_ON_DEATH = False + '''Whether to reset the environment whenever an agent dies''' + BASE_HEALTH = 10 '''Initial Constitution level and agent health''' @@ -436,19 +437,19 @@ class Progression: PROGRESSION_LEVEL_MAX = 10 '''Max skill level''' - PROGRESSION_MELEE_BASE_DAMAGE = 0 + PROGRESSION_MELEE_BASE_DAMAGE = 20 '''Base Melee attack damage''' PROGRESSION_MELEE_LEVEL_DAMAGE = 5 '''Bonus Melee attack damage per level''' - PROGRESSION_RANGE_BASE_DAMAGE = 0 + PROGRESSION_RANGE_BASE_DAMAGE = 20 '''Base Range attack damage''' PROGRESSION_RANGE_LEVEL_DAMAGE = 5 '''Bonus Range attack damage per level''' - PROGRESSION_MAGE_BASE_DAMAGE = 0 + PROGRESSION_MAGE_BASE_DAMAGE = 20 '''Base Mage attack damage ''' PROGRESSION_MAGE_LEVEL_DAMAGE = 5 @@ -491,13 +492,13 @@ class NPC: NPC_BASE_DEFENSE = 0 '''Base NPC defense''' - NPC_LEVEL_DEFENSE = 30 + NPC_LEVEL_DEFENSE = 15 '''Bonus NPC defense per level''' NPC_BASE_DAMAGE = 15 '''Base NPC damage''' - NPC_LEVEL_DAMAGE = 30 + NPC_LEVEL_DAMAGE = 15 '''Bonus NPC damage per level''' @@ -610,12 +611,8 @@ class Exchange: EXCHANGE_LISTING_DURATION = 5 '''The number of ticks, during which the item is listed for sale''' - @property - def MARKET_N_OBS(self): - # TODO(kywch): This is a hack. Check if the limit is reached - # pylint: disable=no-member - '''Number of distinct item observations''' - return self.PLAYER_N * self.EXCHANGE_LISTING_DURATION + MARKET_N_OBS = 1024 + '''Number of distinct item observations''' PRICE_N_OBS = 99 # make it different from PLAYER_N_OBS '''Number of distinct price observations diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 7537895e4..db0340335 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -1,8 +1,8 @@ import functools -import random from typing import Any, Dict, List, Callable from collections import defaultdict -from ordered_set import OrderedSet +from copy import copy, deepcopy +import dill import gym import numpy as np @@ -15,9 +15,9 @@ from nmmo.core.tile import Tile from nmmo.entity.entity import Entity from nmmo.systems.item import Item -from nmmo.task import task_api +from nmmo.task import task_api, task_spec from nmmo.task.game_state import GameStateGenerator -from scripted.baselines import Scripted +from nmmo.lib import seeding class Env(ParallelEnv): # Environment wrapper for Neural MMO using the Parallel PettingZoo API @@ -26,50 +26,56 @@ class Env(ParallelEnv): def __init__(self, config: Default = nmmo.config.Default(), seed = None): - self._init_random(seed) - + self._np_random = None + self._np_seed = None + self._reset_required = True + self.seed(seed) super().__init__() self.config = config - self.realm = realm.Realm(config) + self.realm = realm.Realm(config, self._np_random) self.obs = None + self._dummy_obs = None self.possible_agents = list(range(1, config.PLAYER_N + 1)) + self._agents = None self._dead_agents = set() - self._episode_stats = defaultdict(lambda: defaultdict(float)) - self.scripted_agents = OrderedSet() + self._dead_this_tick = None + self.scripted_agents = set() self._gamestate_generator = GameStateGenerator(self.realm, self.config) self.game_state = None # Default task: rewards 1 each turn agent is alive self.tasks = task_api.nmmo_default_task(self.possible_agents) - - # pylint: disable=method-cache-max-size-none - @functools.lru_cache(maxsize=None) - def observation_space(self, agent: int): - '''Neural MMO Observation Space - - Args: - agent: Agent ID - - Returns: - observation: gym.spaces object contained the structured observation - for the specified agent. Each visible object is represented by - continuous and discrete vectors of attributes. A 2-layer attentional - encoder can be used to convert this structured observation into - a flat vector embedding.''' - + self.agent_task_map = None + self._dummy_task_embedding = np.zeros(self.config.TASK_EMBED_DIM, dtype=np.float16) + + # curriculum file path, if provided, should exist + self.curriculum_file_path = config.CURRICULUM_FILE_PATH + if self.curriculum_file_path is not None: + # try to open the file to check if it exists + with open(self.curriculum_file_path, 'rb') as f: + curriculum = dill.load(f) # pylint: disable=unused-variable + f.close() + + @functools.cached_property + def _obs_space(self): def box(rows, cols): return gym.spaces.Box( - low=-2**20, high=2**20, + low=-2**15, high=2**15-1, shape=(rows, cols), - dtype=np.float32) + dtype=np.int16) + def mask_box(length): + return gym.spaces.Box(low=0, high=1, shape=(length,), dtype=np.int8) obs_space = { - "CurrentTick": gym.spaces.Discrete(1), - "AgentId": gym.spaces.Discrete(1), + "CurrentTick": gym.spaces.Discrete(self.config.HORIZON+1), + "AgentId": gym.spaces.Discrete(self.config.PLAYER_N+1), "Tile": box(self.config.MAP_N_OBS, Tile.State.num_attributes), "Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes), + "Task": gym.spaces.Box(low=-2**15, high=2**15-1, + shape=(self.config.TASK_EMBED_DIM,), + dtype=np.float16), } if self.config.ITEM_SYSTEM_ENABLED: @@ -79,41 +85,64 @@ def box(rows, cols): obs_space["Market"] = box(self.config.MARKET_N_OBS, Item.State.num_attributes) if self.config.PROVIDE_ACTION_TARGETS: - obs_space['ActionTargets'] = self.action_space(None) + mask_spec = deepcopy(self._atn_space) + for atn_str in mask_spec: + for arg_str in mask_spec[atn_str]: + mask_spec[atn_str][arg_str] = mask_box(self._atn_space[atn_str][arg_str].n) + obs_space["ActionTargets"] = mask_spec return gym.spaces.Dict(obs_space) - def _init_random(self, seed): - if seed is not None: - np.random.seed(seed) - random.seed(seed) - + # pylint: disable=method-cache-max-size-none @functools.lru_cache(maxsize=None) - def action_space(self, agent): - '''Neural MMO Action Space + def observation_space(self, agent: AgentID): + '''Neural MMO Observation Space Args: agent: Agent ID Returns: - actions: gym.spaces object contained the structured actions - for the specified agent. Each action is parameterized by a list - of discrete-valued arguments. These consist of both fixed, k-way - choices (such as movement direction) and selections from the - observation space (such as targeting)''' + observation: gym.spaces object contained the structured observation + for the specified agent.''' + return self._obs_space + @functools.cached_property + def _atn_space(self): actions = {} for atn in sorted(nmmo.Action.edges(self.config)): if atn.enabled(self.config): - - actions[atn] = {} + actions[atn.__name__] = {} # use the string key for arg in sorted(atn.edges): n = arg.N(self.config) - actions[atn][arg] = gym.spaces.Discrete(n) + actions[atn.__name__][arg.__name__] = gym.spaces.Discrete(n) + actions[atn.__name__] = gym.spaces.Dict(actions[atn.__name__]) + return gym.spaces.Dict(actions) - actions[atn] = gym.spaces.Dict(actions[atn]) + @functools.cached_property + def _str_atn_map(self): + '''Map action and argument names to their corresponding objects''' + str_map = {} + for atn in nmmo.Action.edges(self.config): + str_map[atn.__name__] = atn + for arg in atn.edges: + str_map[arg.__name__] = arg + return str_map - return gym.spaces.Dict(actions) + # pylint: disable=method-cache-max-size-none + @functools.lru_cache(maxsize=None) + def action_space(self, agent: AgentID): + '''Neural MMO Action Space + + Args: + agent: Agent ID + + Returns: + actions: gym.spaces object contained the structured actions + for the specified agent. Each action is parameterized by a list + of discrete-valued arguments. These consist of both fixed, k-way + choices (such as movement direction) and selections from the + observation space (such as targeting)''' + return self._atn_space ############################################################################ # Core API @@ -143,28 +172,71 @@ def reset(self, map_id=None, seed=None, options=None, but finite horizon: ~1000 timesteps for small maps and 5000+ timesteps for large maps ''' - - self._init_random(seed) - self.realm.reset(map_id) + self.seed(seed) + self.realm.reset(self._np_random, map_id) + self._agents = list(self.realm.players.keys()) self._dead_agents = set() - self._episode_stats.clear() + self._dead_this_tick = {} # check if there are scripted agents for eid, ent in self.realm.players.items(): - if isinstance(ent.agent, Scripted): + if isinstance(ent.agent, nmmo.Scripted): self.scripted_agents.add(eid) + ent.agent.set_rng(self._np_random) - self.obs = self._compute_observations() - self._gamestate_generator = GameStateGenerator(self.realm, self.config) - - if make_task_fn is not None: + if self.curriculum_file_path is not None: + self.tasks = self._sample_training_tasks() + elif make_task_fn is not None: self.tasks = make_task_fn() else: for task in self.tasks: task.reset() + self.agent_task_map = self._map_task_to_agent() + + self._dummy_obs = self._make_dummy_obs() + self.obs = self._compute_observations() + self._gamestate_generator = GameStateGenerator(self.realm, self.config) + if self.game_state is not None: + self.game_state.clear_cache() + self.game_state = None + + self._reset_required = False return {a: o.to_gym() for a,o in self.obs.items()} + def _sample_training_tasks(self): + with open(self.curriculum_file_path, 'rb') as f: + # curriculum file may have been changed, so read the file when sampling + curriculum = dill.load(f) # a list of TaskSpec + + sampling_weights = [spec.sampling_weight for spec in curriculum] + sampled_spec = self._np_random.choice(curriculum, size=len(self.possible_agents), + p=sampling_weights/np.sum(sampling_weights)) + + return task_spec.make_task_from_spec(self.possible_agents, sampled_spec) + + def _map_task_to_agent(self): + agent_task_map: Dict[int, List[task_api.Task]] = {} + for task in self.tasks: + if task.embedding is None: + task.set_embedding(self._dummy_task_embedding) + # validate task embedding + assert self._obs_space['Task'].contains(task.embedding), "Task embedding is not valid" + + # map task to agents + for agent_id in task.assignee: + if agent_id in agent_task_map: + agent_task_map[agent_id].append(task) + else: + agent_task_map[agent_id] = [task] + + # for now we only support one task per agent + if self.config.ALLOW_MULTI_TASKS_PER_AGENT is False: + for agent_tasks in agent_task_map.values(): + assert len(agent_tasks) == 1, "Only one task per agent is supported" + + return agent_task_map + def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): '''Simulates one game tick or timestep @@ -257,7 +329,7 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): Provided for conformity with PettingZoo ''' - assert self.obs is not None, 'step() called before reset' + assert not self._reset_required, 'step() called before reset' # Add in scripted agents' actions, if any if self.scripted_agents: actions = self._compute_scripted_agent_actions(actions) @@ -266,31 +338,27 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): # we don't need _deserialize_scripted_actions() anymore actions = self._validate_actions(actions) # Execute actions - self.realm.step(actions) + self._dead_this_tick = self.realm.step(actions) + # the list of "current" agents, both alive and dead_this_tick + self._agents = list(set(list(self.realm.players.keys()) + list(self._dead_this_tick.keys()))) + dones = {} - for eid in self.possible_agents: - if eid not in self.realm.players or self.realm.tick >= self.config.HORIZON: - if eid not in self._dead_agents: - self._dead_agents.add(eid) - self._episode_stats[eid]["death_tick"] = self.realm.tick - dones[eid] = True + for agent_id in self.agents: + if agent_id in self._dead_this_tick or \ + self.realm.tick >= self.config.HORIZON or \ + (self.config.RESET_ON_DEATH and len(self._dead_agents) > 0): + self._dead_agents.add(agent_id) + dones[agent_id] = True + else: + dones[agent_id] = False # Store the observations, since actions reference them self.obs = self._compute_observations() gym_obs = {a: o.to_gym() for a,o in self.obs.items()} - rewards, infos = self._compute_rewards(self.obs.keys(), dones) - for k,r in rewards.items(): - self._episode_stats[k]['reward'] += r - - # When the episode ends, add the episode stats to the info of one of - # the last dagents - if len(self._dead_agents) == len(self.possible_agents): - for agent_id, stats in self._episode_stats.items(): - if agent_id not in infos: - infos[agent_id] = {} - infos[agent_id]["episode_stats"] = stats + rewards, infos = self._compute_rewards() + # NOTE: all obs, rewards, dones, infos have data for each agent in self.agents return gym_obs, rewards, dones, infos def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): @@ -311,16 +379,17 @@ def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): validated_actions[ent_id] = {} - for atn, args in sorted(atns.items()): + for atn_key, args in sorted(atns.items()): action_valid = True deserialized_action = {} - + atn = self._str_atn_map[atn_key] if isinstance(atn_key, str) else atn_key if not atn.enabled(self.config): action_valid = False break - for arg, val in sorted(args.items()): - obj = arg.deserialize(self.realm, entity, val) + for arg_key, val in sorted(args.items()): + arg = self._str_atn_map[arg_key] if isinstance(arg_key, str) else arg_key + obj = arg.deserialize(self.realm, entity, val, self.obs[ent_id]) if obj is None: action_valid = False break @@ -333,70 +402,79 @@ def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): def _compute_scripted_agent_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): '''Compute actions for scripted agents and add them into the action dict''' - for eid in self.scripted_agents: - # remove the dead scripted agent from the list - if eid in self._dead_agents or eid not in self.realm.players: - self.scripted_agents.discard(eid) - continue + dead_agents = set() + for agent_id in self.scripted_agents: + if agent_id in self.realm.players: + # override the provided scripted agents' actions + actions[agent_id] = self.realm.players[agent_id].agent(self.obs[agent_id]) + else: + dead_agents.add(agent_id) - # override the provided scripted agents' actions - actions[eid] = self.realm.players[eid].agent(self.obs[eid]) + # remove the dead scripted agent from the list + self.scripted_agents -= dead_agents return actions - def _compute_observations(self): - '''Neural MMO Observation API + def _make_dummy_obs(self): + dummy_tiles = np.zeros((0, len(Tile.State.attr_name_to_col)), dtype=np.int16) + dummy_entities = np.zeros((0, len(Entity.State.attr_name_to_col)), dtype=np.int16) + dummy_inventory = np.zeros((0, len(Item.State.attr_name_to_col)), dtype=np.int16) + dummy_market = np.zeros((0, len(Item.State.attr_name_to_col)), dtype=np.int16) + return Observation(self.config, self.realm.tick, 0, self._dummy_task_embedding, + dummy_tiles, dummy_entities, dummy_inventory, dummy_market) - Args: - agents: List of agents to return observations for. If None, returns - observations for all agents - - Returns: - obs: Dictionary of observations for each agent - obs[agent_id] = { - "Entity": [e1, e2, ...], - "Task": [encoded_task], - "Tile": [t1, t2, ...], - "Inventory": [i1, i2, ...], - "Market": [m1, m2, ...], - "ActionTargets": { - "Attack": [a1, a2, ...], - "Sell": [s1, s2, ...], - "Buy": [b1, b2, ...], - "Move": [m1, m2, ...], - } - ''' + def _compute_observations(self): + # Clean up unnecessary observations, which cause memory leaks + if self.obs is not None: + for agent_id, agent_obs in self.obs.items(): + agent_obs.clear_cache() # clear the lru_cache + self.obs[agent_id] = None + del agent_obs + self.obs = None obs = {} - market = Item.Query.for_sale(self.realm.datastore) + # get tile map, to bypass the expensive tile window query + tile_map = Tile.Query.get_map(self.realm.datastore, self.config.MAP_SIZE) + radius = self.config.PLAYER_VISION_RADIUS + tile_obs_size = ((2*radius+1)**2, len(Tile.State.attr_name_to_col)) + for agent_id in self.agents: - agent = self.realm.players.get(agent_id) - agent_r = agent.row.val - agent_c = agent.col.val - - visible_entities = Entity.Query.window( - self.realm.datastore, - agent_r, agent_c, - self.config.PLAYER_VISION_RADIUS - ) - visible_tiles = Tile.Query.window( - self.realm.datastore, - agent_r, agent_c, - self.config.PLAYER_VISION_RADIUS) - - inventory = Item.Query.owned_by(self.realm.datastore, agent_id) - - obs[agent_id] = Observation(self.config, - self.realm.tick, - agent_id, - visible_tiles, - visible_entities, - inventory, market) + if agent_id not in self.realm.players: + # return dummy obs for the agents in dead_this_tick + dummy_obs = copy(self._dummy_obs) + dummy_obs.current_tick = self.realm.tick + dummy_obs.agent_id = agent_id + obs[agent_id] = dummy_obs + else: + agent = self.realm.players.get(agent_id) + agent_r = agent.row.val + agent_c = agent.col.val + + visible_entities = Entity.Query.window( + self.realm.datastore, + agent_r, agent_c, + radius + ) + visible_tiles = tile_map[agent_r-radius:agent_r+radius+1, + agent_c-radius:agent_c+radius+1,:].reshape(tile_obs_size) + + inventory = Item.Query.owned_by(self.realm.datastore, agent_id) + + # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are + # available in each task instance, via task.embedding + # For now, each agent is assigned to a single task, so we just use the first task + # TODO: can the embeddings of multiple tasks be superposed while preserving the + # task-specific information? This needs research + task_embedding = self._dummy_task_embedding + if agent_id in self.agent_task_map: + task_embedding = self.agent_task_map[agent_id][0].embedding # NOTE: first task only + obs[agent_id] = Observation(self.config, self.realm.tick, agent_id, task_embedding, + visible_tiles, visible_entities, inventory, market) return obs - def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): + def _compute_rewards(self): '''Computes the reward for the specified agent Override this method to create custom reward functions. You have full @@ -412,23 +490,30 @@ def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): entity identified by ent_id. ''' # Initialization + agents = set(self.agents) infos = {agent_id: {'task': {}} for agent_id in agents} rewards = defaultdict(int) - agents = set(agents) - reward_cache = {} + + # Clean up unnecessary game state, which cause memory leaks + if self.game_state is not None: + self.game_state.clear_cache() + self.game_state = None # Compute Rewards and infos self.game_state = self._gamestate_generator.generate(self.realm, self.obs) for task in self.tasks: - if task in reward_cache: - task_rewards, task_infos = reward_cache[task] - else: + if agents.intersection(task.assignee): # evaluate only if the agents are current task_rewards, task_infos = task.compute_rewards(self.game_state) - reward_cache[task] = (task_rewards, task_infos) - for agent_id, reward in task_rewards.items(): - if agent_id in agents and agent_id not in dones: - rewards[agent_id] = rewards.get(agent_id,0) + reward - infos[agent_id]['task'][task.name] = task_infos[agent_id] # progress + for agent_id, reward in task_rewards.items(): + if agent_id in agents: + rewards[agent_id] = rewards.get(agent_id,0) + reward + infos[agent_id]['task'][task.name] = task_infos[agent_id] # include progress, etc. + else: + task.close() # To prevent memory leak + + # Make sure the dead agents return the rewards of -1 + for agent_id in self._dead_this_tick: + rewards[agent_id] = -1 return rewards, infos @@ -441,14 +526,24 @@ def render(self, mode='human'): @property def agents(self) -> List[AgentID]: - '''For conformity with the PettingZoo API only; rendering is external''' - return list(set(self.realm.players.keys()) - self._dead_agents) + '''For conformity with the PettingZoo API''' + # returns the list of "current" agents, both alive and dead_this_tick + return self._agents def close(self): '''For conformity with the PettingZoo API only; rendering is external''' def seed(self, seed=None): - return self._init_random(seed) + '''Reseeds the environment. reset() must be called after seed(), and before step(). + - self._np_seed is None: seed() has not been called, e.g. __init__() -> new RNG + - self._np_seed is set, and seed is not None: seed() or reset() with seed -> new RNG + + If self._np_seed is set, but seed is None + probably called from reset() without seed, so don't change the RNG + ''' + if self._np_seed is None or seed is not None: + self._np_random, self._np_seed = seeding.np_random(seed) + self._reset_required = True def state(self) -> np.ndarray: raise NotImplementedError diff --git a/nmmo/core/map.py b/nmmo/core/map.py index 47bbc8ee1..870ddabe5 100644 --- a/nmmo/core/map.py +++ b/nmmo/core/map.py @@ -1,10 +1,9 @@ import os import logging - import numpy as np from ordered_set import OrderedSet -from nmmo.core.tile import Tile +from nmmo.core.tile import Tile from nmmo.lib import material @@ -13,18 +12,24 @@ class Map: Also tracks a sparse list of tile updates ''' - def __init__(self, config, realm): + def __init__(self, config, realm, np_random): self.config = config self._repr = None self.realm = realm self.update_list = None + self.pathfinding_cache = {} # Avoid recalculating A*, paths don't move sz = config.MAP_SIZE self.tiles = np.zeros((sz, sz), dtype=object) + self.habitable_tiles = np.zeros((sz,sz)) for r in range(sz): for c in range(sz): - self.tiles[r, c] = Tile(realm, r, c) + self.tiles[r, c] = Tile(realm, r, c, np_random) + + self.dist_border_center = config.MAP_CENTER // 2 + self.center_coord = (config.MAP_BORDER + self.dist_border_center, + config.MAP_BORDER + self.dist_border_center) @property def packet(self): @@ -42,10 +47,10 @@ def repr(self): return self._repr - def reset(self, map_id): + def reset(self, map_id, np_random): '''Reuse the current tile objects to load a new map''' config = self.config - self.update_list = OrderedSet() + self.update_list = OrderedSet() # critical for determinism path_map_suffix = config.PATH_MAP_SUFFIX.format(map_id) f_path = os.path.join(config.PATH_CWD, config.PATH_MAPS, path_map_suffix) @@ -62,7 +67,8 @@ def reset(self, map_id): for c, idx in enumerate(row): mat = materials[idx] tile = self.tiles[r, c] - tile.reset(mat, config) + tile.reset(mat, config, np_random) + self.habitable_tiles[r, c] = tile.habitable assert c == config.MAP_SIZE - 1 assert r == config.MAP_SIZE - 1 diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index ad05a0b2f..d0e52c55c 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -1,4 +1,4 @@ -from functools import lru_cache +from functools import lru_cache, cached_property import numpy as np @@ -15,7 +15,7 @@ def __init__(self, values, id_col): self.values = values self.ids = values[:, id_col] - @property + @cached_property def len(self): return len(self.ids) @@ -42,6 +42,7 @@ def __init__(self, config, current_tick: int, agent_id: int, + task_embedding, tiles, entities, inventory, @@ -50,12 +51,14 @@ def __init__(self, self.config = config self.current_tick = current_tick self.agent_id = agent_id + self.task_embedding = task_embedding self.tiles = tiles[0:config.MAP_N_OBS] self.entities = BasicObs(entities[0:config.PLAYER_N_OBS], EntityState.State.attr_name_to_col["id"]) - if config.COMBAT_SYSTEM_ENABLED: + self.dummy_obs = self.agent() is None + if config.COMBAT_SYSTEM_ENABLED and not self.dummy_obs: latest_combat_tick = self.agent().latest_combat_tick self.agent_in_combat = False if latest_combat_tick == 0 else \ (current_tick - latest_combat_tick) < config.COMBAT_STATUS_DURATION @@ -74,6 +77,8 @@ def __init__(self, else: assert market.size == 0 + self._noop_action = 1 if config.PROVIDE_NOOP_ACTION_TARGET else 0 + # pylint: disable=method-cache-max-size-none @lru_cache(maxsize=None) def tile(self, r_delta, c_delta): @@ -103,7 +108,7 @@ def tile(self, r_delta, c_delta): @lru_cache(maxsize=None) def entity(self, entity_id): rows = self.entities.values[self.entities.ids == entity_id] - if rows.size == 0: + if rows.shape[0] == 0: return None return EntityState.parse_array(rows[0]) @@ -112,31 +117,47 @@ def entity(self, entity_id): def agent(self): return self.entity(self.agent_id) - def to_gym(self): - '''Convert the observation to a format that can be used by OpenAI Gym''' - - tiles = np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])) - tiles[:self.tiles.shape[0],:] = self.tiles - - entities = np.zeros((self.config.PLAYER_N_OBS, self.entities.values.shape[1])) - entities[:self.entities.values.shape[0],:] = self.entities.values + def clear_cache(self): + # clear the cache, so that this object can be garbage collected + self.agent.cache_clear() + self.entity.cache_clear() + self.tile.cache_clear() + def get_empty_obs(self): gym_obs = { - "CurrentTick": np.array([self.current_tick]), - "AgentId": np.array([self.agent_id]), - "Tile": tiles, - "Entity": entities, - } + "CurrentTick": self.current_tick, + "AgentId": self.agent_id, + "Task": self.task_embedding, + "Tile": None, # np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])), + "Entity": np.zeros((self.config.PLAYER_N_OBS, + self.entities.values.shape[1]), dtype=np.int16)} + if self.config.ITEM_SYSTEM_ENABLED: + gym_obs["Inventory"] = np.zeros((self.config.INVENTORY_N_OBS, + self.inventory.values.shape[1]), dtype=np.int16) + if self.config.EXCHANGE_SYSTEM_ENABLED: + gym_obs["Market"] = np.zeros((self.config.MARKET_N_OBS, + self.market.values.shape[1]), dtype=np.int16) + return gym_obs + + def to_gym(self): + '''Convert the observation to a format that can be used by OpenAI Gym''' + gym_obs = self.get_empty_obs() + if self.dummy_obs: + # return empty obs for the dead agents + gym_obs['Tile'] = np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1]), dtype=np.int16) + if self.config.PROVIDE_ACTION_TARGETS: + gym_obs["ActionTargets"] = self._make_action_targets() + return gym_obs + + # NOTE: assume that all len(self.tiles) == self.config.MAP_N_OBS + gym_obs['Tile'] = self.tiles + gym_obs['Entity'][:self.entities.values.shape[0],:] = self.entities.values if self.config.ITEM_SYSTEM_ENABLED: - inventory = np.zeros((self.config.INVENTORY_N_OBS, self.inventory.values.shape[1])) - inventory[:self.inventory.values.shape[0],:] = self.inventory.values - gym_obs["Inventory"] = inventory + gym_obs["Inventory"][:self.inventory.values.shape[0],:] = self.inventory.values if self.config.EXCHANGE_SYSTEM_ENABLED: - market = np.zeros((self.config.MARKET_N_OBS, self.market.values.shape[1])) - market[:self.market.values.shape[0],:] = self.market.values - gym_obs["Market"] = market + gym_obs["Market"][:self.market.values.shape[0],:] = self.market.values if self.config.PROVIDE_ACTION_TARGETS: gym_obs["ActionTargets"] = self._make_action_targets() @@ -145,49 +166,55 @@ def to_gym(self): def _make_action_targets(self): masks = {} - masks[action.Move] = { - action.Direction: self._make_move_mask() + masks["Move"] = { + "Direction": self._make_move_mask() } if self.config.COMBAT_SYSTEM_ENABLED: - masks[action.Attack] = { - action.Style: np.ones(len(action.Style.edges), dtype=np.int8), - action.Target: self._make_attack_mask() + # Test below. see tests/core/test_observation_tile.py, test_action_target_consts() + # assert len(action.Style.edges) == 3 + masks["Attack"] = { + "Style": np.ones(3, dtype=np.int8), + "Target": self._make_attack_mask() } if self.config.ITEM_SYSTEM_ENABLED: - masks[action.Use] = { - action.InventoryItem: self._make_use_mask() + masks["Use"] = { + "InventoryItem": self._make_use_mask() } - masks[action.Give] = { - action.InventoryItem: self._make_sell_mask(), - action.Target: self._make_give_target_mask() + masks["Give"] = { + "InventoryItem": self._make_sell_mask(), + "Target": self._make_give_target_mask() } - masks[action.Destroy] = { - action.InventoryItem: self._make_destroy_item_mask() + masks["Destroy"] = { + "InventoryItem": self._make_destroy_item_mask() } if self.config.EXCHANGE_SYSTEM_ENABLED: - masks[action.Sell] = { - action.InventoryItem: self._make_sell_mask(), - action.Price: np.ones(len(action.Price.edges), dtype=np.int8) + masks["Sell"] = { + "InventoryItem": self._make_sell_mask(), + "Price": np.ones(self.config.PRICE_N_OBS, dtype=np.int8) } - masks[action.Buy] = { - action.MarketItem: self._make_buy_mask() + masks["Buy"] = { + "MarketItem": self._make_buy_mask() } - masks[action.GiveGold] = { - action.Target: self._make_give_target_mask(), - action.Price: self._make_give_gold_mask() # reusing Price + masks["GiveGold"] = { + "Price": self._make_give_gold_mask(), # reusing Price + "Target": self._make_give_target_mask() } if self.config.COMMUNICATION_SYSTEM_ENABLED: - masks[action.Comm] = { - action.Token: np.ones(len(action.Token.edges), dtype=np.int8) + masks["Comm"] = { + "Token":np.ones(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8) } return masks def _make_move_mask(self): + if self.dummy_obs: + mask = np.zeros(len(action.Direction.edges), dtype=np.int8) + mask[-1] = 1 # make sure the noop action is available + return mask # pylint: disable=not-an-iterable return np.array([self.tile(*d.delta).material_id in material.Habitable.indices for d in action.Direction.edges], dtype=np.int8) @@ -200,32 +227,40 @@ def _make_attack_mask(self): assert self.config.COMBAT_MELEE_REACH == self.config.COMBAT_MAGE_REACH assert self.config.COMBAT_RANGE_REACH == self.config.COMBAT_MAGE_REACH - attack_range = self.config.COMBAT_MELEE_REACH + attack_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + attack_mask[-1] = 1 + + if self.dummy_obs: + return attack_mask agent = self.agent() - entities_pos = self.entities.values[:,[EntityState.State.attr_name_to_col["row"], - EntityState.State.attr_name_to_col["col"]]] - within_range = utils.linf(entities_pos,(agent.row, agent.col)) <= attack_range + within_range = np.maximum( # calculating the l-inf dist + np.abs(self.entities.values[:,EntityState.State.attr_name_to_col["row"]] - agent.row), + np.abs(self.entities.values[:,EntityState.State.attr_name_to_col["col"]] - agent.col) + ) <= self.config.COMBAT_MELEE_REACH immunity = self.config.COMBAT_SPAWN_IMMUNITY - if 0 < immunity < agent.time_alive: - # ids > 0 equals entity.is_player - spawn_immunity = (self.entities.ids > 0) & \ - (self.entities.values[:,EntityState.State.attr_name_to_col["time_alive"]] < immunity) + if agent.time_alive < immunity: + # NOTE: CANNOT attack players during immunity, thus mask should set to 0 + no_spawn_immunity = ~(self.entities.ids > 0) # ids > 0 equals entity.is_player else: - spawn_immunity = np.ones(self.entities.len, dtype=bool) + no_spawn_immunity = np.ones(self.entities.len, dtype=bool) # allow friendly fire but no self shooting not_me = self.entities.ids != agent.id - attack_mask = np.zeros(self.config.PLAYER_N_OBS, dtype=np.int8) - attack_mask[:self.entities.len] = within_range & not_me & spawn_immunity + attack_mask[:self.entities.len] = within_range & not_me & no_spawn_immunity return attack_mask def _make_use_mask(self): # empty inventory -- nothing to use - use_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) - if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0) or self.agent_in_combat: + use_mask = np.zeros(self.config.INVENTORY_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + use_mask[-1] = 1 + + if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ + or self.dummy_obs or self.agent_in_combat: return use_mask item_skill = self._item_skill() @@ -237,8 +272,8 @@ def _make_use_mask(self): # level limits are differently applied depending on item types type_flt = np.tile(np.array(list(item_skill.keys())), (self.inventory.len,1)) level_flt = np.tile(np.array(list(item_skill.values())), (self.inventory.len,1)) - item_type = np.tile(np.transpose(np.atleast_2d(item_type)), (1, len(item_skill))) - item_level = np.tile(np.transpose(np.atleast_2d(item_level)), (1, len(item_skill))) + item_type = np.tile(np.transpose(np.atleast_2d(item_type)), (1,len(item_skill))) + item_level = np.tile(np.transpose(np.atleast_2d(item_level)), (1,len(item_skill))) level_satisfied = np.any((item_type==type_flt) & (item_level<=level_flt), axis=1) use_mask[:self.inventory.len] = not_listed & level_satisfied @@ -271,9 +306,13 @@ def _item_skill(self): } def _make_destroy_item_mask(self): - destroy_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) + destroy_mask = np.zeros(self.config.INVENTORY_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + destroy_mask[-1] = 1 + # empty inventory -- nothing to destroy - if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0) or self.agent_in_combat: + if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ + or self.dummy_obs or self.agent_in_combat: return destroy_mask # not equipped items in the inventory can be destroyed @@ -283,9 +322,12 @@ def _make_destroy_item_mask(self): return destroy_mask def _make_give_target_mask(self): - give_mask = np.zeros(self.config.PLAYER_N_OBS, dtype=np.int8) + give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + give_mask[-1] = 1 # empty inventory -- nothing to give - if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0) or self.agent_in_combat: + if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ + or self.dummy_obs or self.agent_in_combat: return give_mask agent = self.agent() @@ -299,19 +341,25 @@ def _make_give_target_mask(self): return give_mask def _make_give_gold_mask(self): - gold = int(self.agent().gold) mask = np.zeros(self.config.PRICE_N_OBS, dtype=np.int8) + mask[0] = 1 # To avoid all-0 masks. If the agent has no gold, this action will be ignored. + if self.dummy_obs: + return mask + gold = int(self.agent().gold) if gold and not self.agent_in_combat: mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1 return mask def _make_sell_mask(self): - sell_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) + sell_mask = np.zeros(self.config.INVENTORY_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + sell_mask[-1] = 1 + # empty inventory -- nothing to sell if not (self.config.EXCHANGE_SYSTEM_ENABLED and self.inventory.len > 0) \ - or self.agent_in_combat: + or self.dummy_obs or self.agent_in_combat: return sell_mask not_equipped = self.inventory.values[:,ItemState.State.attr_name_to_col["equipped"]] == 0 @@ -321,8 +369,11 @@ def _make_sell_mask(self): return sell_mask def _make_buy_mask(self): - buy_mask = np.zeros(self.config.MARKET_N_OBS, dtype=np.int8) - if not self.config.EXCHANGE_SYSTEM_ENABLED or self.agent_in_combat: + buy_mask = np.zeros(self.config.MARKET_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + buy_mask[-1] = 1 + + if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat: return buy_mask agent = self.agent() @@ -334,7 +385,7 @@ def _make_buy_mask(self): if self.inventory.len >= self.config.ITEM_INVENTORY_CAPACITY: exist_ammo_listings = self._existing_ammo_listings() if not np.any(exist_ammo_listings): - return np.zeros(self.config.MARKET_N_OBS, dtype=np.int8) + return buy_mask not_mine &= exist_ammo_listings enough_gold = market_items[:,ItemState.State.attr_name_to_col["listed_price"]] <= agent.gold diff --git a/nmmo/core/realm.py b/nmmo/core/realm.py index 7cf0a2008..eb8723ff1 100644 --- a/nmmo/core/realm.py +++ b/nmmo/core/realm.py @@ -4,8 +4,6 @@ from collections import defaultdict from typing import Dict -import numpy as np - import nmmo from nmmo.core.log_helper import LogHelper from nmmo.core.map import Map @@ -30,8 +28,9 @@ def prioritized(entities: Dict, merged: Dict): class Realm: """Top-level world object""" - def __init__(self, config): + def __init__(self, config, np_random): self.config = config + self._np_random = np_random # rng assert isinstance( config, nmmo.config.Config ), f"Config {config} is not a config instance (did you pass the class?)" @@ -39,7 +38,9 @@ def __init__(self, config): Action.hook(config) # Generate maps if they do not exist - config.MAP_GENERATOR(config).generate_all_maps() + # NOTE: Map generation interferes with determinism. + # To ensure determinism, provide seed to env.reset() + config.MAP_GENERATOR(config).generate_all_maps(self._np_random) self.datastore = NumpyDatastore() for s in [TileState, EntityState, ItemState, EventState]: @@ -49,14 +50,14 @@ def __init__(self, config): self.exchange = None # Load the world file - self.map = Map(config, self) + self.map = Map(config, self, self._np_random) self.log_helper = LogHelper.create(self) self.event_log = EventLogger(self) # Entity handlers - self.players = PlayerManager(self) - self.npcs = NPCManager(self) + self.players = PlayerManager(self, self._np_random) + self.npcs = NPCManager(self, self._np_random) # Global item registry self.items = {} @@ -67,26 +68,28 @@ def __init__(self, config): # Initialize actions nmmo.Action.init(config) - def reset(self, map_id: int = None): + def reset(self, np_random, map_id: int = None): """Reset the environment and load the specified map Args: idx: Map index to load """ + self._np_random = np_random self.log_helper.reset() self.event_log.reset() - map_id = map_id or np.random.randint(self.config.MAP_N) + 1 - self.map.reset(map_id) + map_id = map_id or self._np_random.integers(self.config.MAP_N) + 1 + self.map.reset(map_id, self._np_random) self.tick = 0 # EntityState and ItemState tables must be empty after players/npcs.reset() - self.players.reset() - self.npcs.reset() + self.players.reset(self._np_random) + self.npcs.reset(self._np_random) assert EntityState.State.table(self.datastore).is_empty(), \ "EntityState table is not empty" - assert ItemState.State.table(self.datastore).is_empty(), \ - "ItemState table is not empty" + # TODO: fix the item leak, then uncomment the below -- print out the table? + # assert ItemState.State.table(self.datastore).is_empty(), \ + # "ItemState table is not empty" # DataStore id allocator must be reset to be deterministic EntityState.State.table(self.datastore).reset() @@ -170,7 +173,7 @@ def step(self, actions): # TODO: we should be randomizing these, otherwise the lower ID agents # will always go first. --> ONLY SHUFFLE BUY if priority == Buy.priority: - np.random.shuffle(merged[priority]) + self._np_random.shuffle(merged[priority]) # CHECK ME: do we need this line? # ent_id, (atn, args) = merged[priority][0] @@ -185,6 +188,7 @@ def step(self, actions): self.map.step() self.exchange.step(self.tick) self.log_helper.update(dead) + self.event_log.update() if self._replay_helper is not None: self._replay_helper.update() diff --git a/nmmo/core/terrain.py b/nmmo/core/terrain.py index b5d0e2c84..4aa983d8f 100644 --- a/nmmo/core/terrain.py +++ b/nmmo/core/terrain.py @@ -1,6 +1,4 @@ - import os -import random import logging import numpy as np @@ -145,13 +143,13 @@ def generate_terrain(config, map_id, interpolaters): return val, matl, interpolaters -def place_fish(tiles): +def place_fish(tiles, np_random): placed = False allow = {Terrain.GRASS} water_loc = np.where(tiles == Terrain.WATER) water_loc = list(zip(water_loc[0], water_loc[1])) - random.shuffle(water_loc) + np_random.shuffle(water_loc) for r, c in water_loc: if tiles[r-1, c] in allow or tiles[r+1, c] in allow or \ @@ -163,25 +161,25 @@ def place_fish(tiles): if not placed: raise RuntimeError('Could not find the water tile to place fish.') -def uniform(config, tiles, mat, mmin, mmax): - r = random.randint(mmin, mmax) - c = random.randint(mmin, mmax) +def uniform(config, tiles, mat, mmin, mmax, np_random): + r = np_random.integers(mmin, mmax) + c = np_random.integers(mmin, mmax) if tiles[r, c] not in {Terrain.GRASS}: - uniform(config, tiles, mat, mmin, mmax) + uniform(config, tiles, mat, mmin, mmax, np_random) else: tiles[r, c] = mat -def cluster(config, tiles, mat, mmin, mmax): +def cluster(config, tiles, mat, mmin, mmax, np_random): mmin = mmin + 1 mmax = mmax - 1 - r = random.randint(mmin, mmax) - c = random.randint(mmin, mmax) + r = np_random.integers(mmin, mmax) + c = np_random.integers(mmin, mmax) matls = {Terrain.GRASS} if tiles[r, c] not in matls: - cluster(config, tiles, mat, mmin-1, mmax+1) + cluster(config, tiles, mat, mmin-1, mmax+1, np_random) return tiles[r, c] = mat @@ -194,18 +192,21 @@ def cluster(config, tiles, mat, mmin, mmax): if tiles[r, c+1] in matls: tiles[r, c+1] = mat -def spawn_profession_resources(config, tiles): +def spawn_profession_resources(config, tiles, np_random=None): + if np_random is None: + np_random = np.random + mmin = config.MAP_BORDER + 1 mmax = config.MAP_SIZE - config.MAP_BORDER - 1 for _ in range(config.PROGRESSION_SPAWN_CLUSTERS): - cluster(config, tiles, Terrain.ORE, mmin, mmax) - cluster(config, tiles, Terrain.TREE, mmin, mmax) - cluster(config, tiles, Terrain.CRYSTAL, mmin, mmax) + cluster(config, tiles, Terrain.ORE, mmin, mmax, np_random) + cluster(config, tiles, Terrain.TREE, mmin, mmax, np_random) + cluster(config, tiles, Terrain.CRYSTAL, mmin, mmax, np_random) for _ in range(config.PROGRESSION_SPAWN_UNIFORMS): - uniform(config, tiles, Terrain.HERB, mmin, mmax) - place_fish(tiles) + uniform(config, tiles, Terrain.HERB, mmin, mmax, np_random) + place_fish(tiles, np_random) class MapGenerator: '''Procedural map generation''' @@ -226,7 +227,7 @@ def load_textures(self): setattr(Terrain, key.upper(), mat.index) self.textures = lookup - def generate_all_maps(self): + def generate_all_maps(self, np_random=None): '''Generates NMAPS maps according to generate_map Provides additional utilities for saving to .npy and rendering png previews''' @@ -253,7 +254,7 @@ def generate_all_maps(self): path = path_maps + '/map' + str(idx+1) os.makedirs(path, exist_ok=True) - terrain, tiles = self.generate_map(idx) + terrain, tiles = self.generate_map(idx, np_random) #Save/render Save.as_numpy(tiles, path) @@ -263,7 +264,7 @@ def generate_all_maps(self): Save.fractal(terrain, path+'/fractal.png') Save.render(tiles, self.textures, path+'/map.png') - def generate_map(self, idx): + def generate_map(self, idx, np_random=None): '''Generate a single map The default method is a relatively complex multiscale perlin noise method. @@ -295,6 +296,6 @@ def generate_map(self, idx): tiles[r, c] = Terrain.VOID if config.PROFESSION_SYSTEM_ENABLED: - spawn_profession_resources(config, tiles) + spawn_profession_resources(config, tiles, np_random) return terrain, tiles diff --git a/nmmo/core/tile.py b/nmmo/core/tile.py index b991c4dcd..9d9cb33ca 100644 --- a/nmmo/core/tile.py +++ b/nmmo/core/tile.py @@ -1,10 +1,9 @@ from types import SimpleNamespace -import numpy as np from nmmo.datastore.serialized import SerializedState from nmmo.lib import material -# pylint: disable=no-member +# pylint: disable=no-member,protected-access TileState = SerializedState.subclass( "Tile", [ "row", @@ -23,13 +22,17 @@ TileState.State.attr_name_to_col["row"], TileState.State.attr_name_to_col["col"], r, c, radius), + get_map=lambda ds, map_size: + ds.table("Tile")._data[1:(map_size*map_size+1)] + .reshape((map_size,map_size,len(TileState.State.attr_name_to_col))) ) class Tile(TileState): - def __init__(self, realm, r, c): + def __init__(self, realm, r, c, np_random): super().__init__(realm.datastore, TileState.Limits(realm.config)) self.realm = realm self.config = realm.config + self._np_random = np_random self.row.update(r) self.col.update(c) @@ -61,7 +64,8 @@ def impassible(self): def void(self): return self.material == material.Void - def reset(self, mat, config): + def reset(self, mat, config, np_random): + self._np_random = np_random # reset the RNG self.state = mat(config) self.material = mat(config) self.material_id.update(self.state.index) @@ -80,7 +84,7 @@ def remove_entity(self, ent_id): del self.entities[ent_id] def step(self): - if not self.depleted or np.random.rand() > self.material.respawn: + if not self.depleted or self._np_random.random() > self.material.respawn: return self.depleted = False diff --git a/nmmo/datastore/id_allocator.py b/nmmo/datastore/id_allocator.py index a93e8c1f1..83e65ec84 100644 --- a/nmmo/datastore/id_allocator.py +++ b/nmmo/datastore/id_allocator.py @@ -4,7 +4,7 @@ class IdAllocator: def __init__(self, max_id): # Key 0 is reserved as padding self.max_id = 1 - self.free = OrderedSet() + self.free = OrderedSet() self.expand(max_id) def full(self): @@ -17,5 +17,5 @@ def allocate(self): return self.free.pop(0) def expand(self, max_id): - self.free.update(OrderedSet(range(self.max_id, max_id))) + self.free.update(range(self.max_id, max_id)) self.max_id = max_id diff --git a/nmmo/datastore/numpy_datastore.py b/nmmo/datastore/numpy_datastore.py index e737ad9cd..2bced2d46 100644 --- a/nmmo/datastore/numpy_datastore.py +++ b/nmmo/datastore/numpy_datastore.py @@ -6,7 +6,7 @@ class NumpyTable(DataTable): - def __init__(self, num_columns: int, initial_size: int, dtype=np.float32): + def __init__(self, num_columns: int, initial_size: int, dtype=np.int16): super().__init__(num_columns) self._dtype = dtype self._initial_size = initial_size diff --git a/nmmo/datastore/serialized.py b/nmmo/datastore/serialized.py index 652280292..a6201ba6a 100644 --- a/nmmo/datastore/serialized.py +++ b/nmmo/datastore/serialized.py @@ -38,8 +38,10 @@ def val(self): return self._val def update(self, value): - value = min(self._max, max(self._min, value)) - + if value > self._max: + value = self._max + elif value < self._min: + value = self._min self.datastore_record.update(self._column, value) self._val = value diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py index 13a523b93..2229c227c 100644 --- a/nmmo/entity/entity.py +++ b/nmmo/entity/entity.py @@ -105,6 +105,7 @@ def __init__(self, ent, config): self.health = ent.health self.water = ent.water self.food = ent.food + self.health_restore = 0 self.health.update(config.PLAYER_BASE_HEALTH) if config.RESOURCE_SYSTEM_ENABLED: @@ -121,6 +122,7 @@ def update(self): food_thresh = self.food > thresh * self.config.RESOURCE_BASE water_thresh = self.water > thresh * self.config.RESOURCE_BASE + org_health = self.health.val if food_thresh and water_thresh: restore = np.floor(self.health.max * regen) self.health.increment(restore) @@ -131,6 +133,9 @@ def update(self): if self.water.empty: self.health.decrement(self.config.RESOURCE_DEHYDRATION_RATE) + # records both increase and decrease in health due to food and water + self.health_restore = self.health.val - org_health + def packet(self): data = {} data['health'] = { 'val': self.health.val, 'max': self.config.PLAYER_BASE_HEALTH } @@ -218,6 +223,10 @@ def __init__(self, realm, pos, entity_id, name): self.realm = realm self.config: Config = realm.config + # TODO: do not access realm._np_random directly + # related to the whole NPC, scripted logic + # pylint: disable=protected-access + self._np_random = realm._np_random self.policy = name self.entity_id = entity_id @@ -310,7 +319,7 @@ def apply_damage(self, dmg, style): @property def pos(self): - return int(self.row.val), int(self.col.val) + return self.row.val, self.col.val @property def alive(self): diff --git a/nmmo/entity/entity_manager.py b/nmmo/entity/entity_manager.py index 91763c675..9d24be2d4 100644 --- a/nmmo/entity/entity_manager.py +++ b/nmmo/entity/entity_manager.py @@ -1,9 +1,6 @@ from collections.abc import Mapping from typing import Dict -import numpy as np -from ordered_set import OrderedSet - from nmmo.entity.entity import Entity from nmmo.entity.npc import NPC from nmmo.entity.player import Player @@ -12,10 +9,11 @@ class EntityGroup(Mapping): - def __init__(self, realm): + def __init__(self, realm, np_random): self.datastore = realm.datastore self.realm = realm self.config = realm.config + self._np_random = np_random self.entities: Dict[int, Entity] = {} self.dead_this_tick: Dict[int, Entity] = {} @@ -43,7 +41,8 @@ def corporeal(self): def packet(self): return {k: v.packet() for k, v in self.corporeal.items()} - def reset(self): + def reset(self, np_random): + self._np_random = np_random # reset the RNG for ent in self.entities.values(): # destroy the items if self.config.ITEM_SYSTEM_ENABLED: @@ -87,13 +86,13 @@ def update(self, actions): class NPCManager(EntityGroup): - def __init__(self, realm): - super().__init__(realm) + def __init__(self, realm, np_random): + super().__init__(realm, np_random) self.next_id = -1 self.spawn_dangers = [] - def reset(self): - super().reset() + def reset(self, np_random): + super().reset(np_random) self.next_id = -1 self.spawn_dangers = [] @@ -109,14 +108,14 @@ def spawn(self): if self.spawn_dangers: danger = self.spawn_dangers[-1] - r, c = combat.spawn(config, danger) + r, c = combat.spawn(config, danger, self._np_random) else: center = config.MAP_CENTER border = self.config.MAP_BORDER # pylint: disable=unbalanced-tuple-unpacking - r, c = np.random.randint(border, center+border, 2).tolist() + r, c = self._np_random.integers(border, center+border, 2).tolist() - npc = NPC.spawn(self.realm, (r, c), self.next_id) + npc = NPC.spawn(self.realm, (r, c), self.next_id, self._np_random) if npc: super().spawn(npc) self.next_id -= 1 @@ -138,16 +137,16 @@ def actions(self, realm): return actions class PlayerManager(EntityGroup): - def __init__(self, realm): - super().__init__(realm) + def __init__(self, realm, np_random): + super().__init__(realm, np_random) self.loader_class = self.realm.config.PLAYER_LOADER self._agent_loader: spawn.SequentialLoader = None self.spawned = None - def reset(self): - super().reset() - self._agent_loader = self.loader_class(self.config) - self.spawned = OrderedSet() + def reset(self, np_random): + super().reset(np_random) + self._agent_loader = self.loader_class(self.config, self._np_random) + self.spawned = set() def spawn_individual(self, r, c, idx): agent = next(self._agent_loader) diff --git a/nmmo/entity/npc.py b/nmmo/entity/npc.py index 96907ab95..211eb4076 100644 --- a/nmmo/entity/npc.py +++ b/nmmo/entity/npc.py @@ -1,6 +1,3 @@ - -import random - from nmmo.entity import entity from nmmo.core import action as Action from nmmo.systems import combat, droptable @@ -8,7 +5,7 @@ from nmmo.systems import item as Item from nmmo.systems import skill from nmmo.systems.inventory import EquipmentSlot - +from nmmo.lib.log import EventCode class Equipment: def __init__(self, total, @@ -74,19 +71,26 @@ def receive_damage(self, source, dmg): # run the next lines if the npc is killed # source receive gold & items in the droptable # pylint: disable=no-member - source.gold.increment(self.gold.val) - self.gold.update(0) + if self.gold.val > 0: + source.gold.increment(self.gold.val) + self.realm.event_log.record(EventCode.EARN_GOLD, source, amount=self.gold.val) + self.gold.update(0) for item in self.droptable.roll(self.realm, self.attack_level): if source.is_player and source.inventory.space: - source.inventory.receive(item) + # inventory.receive() returns True if the item is received + # if source doesn't have space, inventory.receive() destroys the item + if source.inventory.receive(item): + self.realm.event_log.record(EventCode.LOOT_ITEM, source, item=item) else: item.destroy() return False + # NOTE: passing np_random here is a hack + # Ideally, it should be passed to __init__ and also used in action generation @staticmethod - def spawn(realm, pos, iden): + def spawn(realm, pos, iden, np_random): config = realm.config # check the position @@ -107,7 +111,13 @@ def spawn(realm, pos, iden): ent.spawn_danger = danger # Select combat focus - style = random.choice((Action.Melee, Action.Range, Action.Mage)) + style = np_random.integers(0,3) + if style == 0: + style = Action.Melee + elif style == 1: + style = Action.Range + else: + style = Action.Mage ent.skills.style = style # Compute level @@ -134,7 +144,7 @@ def spawn(realm, pos, iden): # Equipment to instantiate if config.EQUIPMENT_SYSTEM_ENABLED: - lvl = level - random.random() + lvl = level - np_random.random() ilvl = int(5 * lvl) offense = int(config.NPC_BASE_DAMAGE + lvl*config.NPC_LEVEL_DAMAGE) @@ -143,11 +153,11 @@ def spawn(realm, pos, iden): ent.equipment = Equipment(ilvl, offense, offense, offense, defense, defense, defense) armor = [Item.Hat, Item.Top, Item.Bottom] - ent.droptable.add(random.choice(armor)) + ent.droptable.add(np_random.choice(armor)) if config.PROFESSION_SYSTEM_ENABLED: tools = [Item.Rod, Item.Gloves, Item.Pickaxe, Item.Axe, Item.Chisel] - ent.droptable.add(random.choice(tools)) + ent.droptable.add(np_random.choice(tools)) return ent diff --git a/nmmo/entity/player.py b/nmmo/entity/player.py index 70d4be283..b635810d0 100644 --- a/nmmo/entity/player.py +++ b/nmmo/entity/player.py @@ -1,7 +1,6 @@ -import numpy as np - from nmmo.systems.skill import Skills from nmmo.entity import entity +from nmmo.lib.log import EventCode # pylint: disable=no-member class Player(entity.Entity): @@ -67,20 +66,24 @@ def receive_damage(self, source, dmg): # starting from here, source receive gold & inventory items if self.config.EXCHANGE_SYSTEM_ENABLED and source is not None: - source.gold.increment(self.gold.val) - self.gold.update(0) + if self.gold.val > 0: + source.gold.increment(self.gold.val) + self.realm.event_log.record(EventCode.EARN_GOLD, source, amount=self.gold.val) + self.gold.update(0) # TODO: make source receive the highest-level items first # because source cannot take it if the inventory is full item_list = list(self.inventory.items) - np.random.shuffle(item_list) + self._np_random.shuffle(item_list) for item in item_list: self.inventory.remove(item) # if source is None or NPC, destroy the item if source.is_player: + # inventory.receive() returns True if the item is received # if source doesn't have space, inventory.receive() destroys the item - source.inventory.receive(item) + if source.inventory.receive(item): + self.realm.event_log.record(EventCode.LOOT_ITEM, source, item=item) else: item.destroy() diff --git a/nmmo/lib/__init__.py b/nmmo/lib/__init__.py index f8c10fcbe..e69de29bb 100644 --- a/nmmo/lib/__init__.py +++ b/nmmo/lib/__init__.py @@ -1 +0,0 @@ -from nmmo.lib.priorityqueue import PriorityQueue diff --git a/nmmo/lib/event_log.py b/nmmo/lib/event_log.py index cfb70dfd8..e11b1bee1 100644 --- a/nmmo/lib/event_log.py +++ b/nmmo/lib/event_log.py @@ -11,7 +11,7 @@ # pylint: disable=no-member EventState = SerializedState.subclass("Event", [ - "id", # unique event id + "recorded", # event_log is write-only, no update or delete, so no need for row id "ent_id", "tick", @@ -27,10 +27,11 @@ EventAttr = EventState.State.attr_name_to_col EventState.Query = SimpleNamespace( - table=lambda ds: ds.table("Event").where_neq(EventAttr["id"], 0), - + table=lambda ds: ds.table("Event").where_eq(EventAttr["recorded"], 1), by_event=lambda ds, event_code: ds.table("Event").where_eq( EventAttr["event"], event_code), + by_tick=lambda ds, tick: ds.table("Event").where_eq( + EventAttr["tick"], tick), ) # defining col synoyms for different event types @@ -56,6 +57,9 @@ def __init__(self, realm): self.valid_events = { val: evt for evt, val in EventCode.__dict__.items() if isinstance(val, int) } + self._data_by_tick = {} + self._last_tick = 0 + self._empty_data = np.empty((0, len(EventAttr))) # add synonyms to the attributes self.attr_to_col = deepcopy(EventAttr) @@ -70,7 +74,7 @@ def reset(self): # define event logging def _create_event(self, entity: Entity, event_code: int): log = EventState(self.datastore) - log.id.update(log.datastore_record.id) + log.recorded.update(1) log.ent_id.update(entity.ent_id) # the tick increase by 1 after executing all actions log.tick.update(self.realm.tick+1) @@ -111,7 +115,8 @@ def record(self, event_code: int, entity: Entity, **kwargs): log.level.update(target.attack_level) return - if event_code in [EventCode.CONSUME_ITEM, EventCode.HARVEST_ITEM, EventCode.EQUIP_ITEM]: + if event_code in [EventCode.CONSUME_ITEM, EventCode.HARVEST_ITEM, EventCode.EQUIP_ITEM, + EventCode.LOOT_ITEM]: # CHECK ME: item types should be checked. For example, # Only Ration and Potion can be consumed # Only Ration, Potion, Whetstone, Arrow, Runes can be produced @@ -135,6 +140,7 @@ def record(self, event_code: int, entity: Entity, **kwargs): log.gold.update(kwargs['price']) return + # NOTE: do we want to separate the source of income? from selling vs looting if event_code == EventCode.EARN_GOLD: if ('amount' in kwargs and kwargs['amount'] > 0): log = self._create_event(entity, event_code) @@ -154,16 +160,27 @@ def record(self, event_code: int, entity: Entity, **kwargs): # CHECK ME: The below should be commented out after debugging raise ValueError(f"Event code: {event_code}", kwargs) - def get_data(self, event_code=None, agents: List[int]=None): - if event_code is None: - event_data = EventState.Query.table(self.datastore).astype(np.int32) - elif event_code in self.valid_events: - event_data = EventState.Query.by_event(self.datastore, event_code).astype(np.int32) + def update(self): + curr_tick = self.realm.tick + 1 # update happens before the tick update + if curr_tick > self._last_tick: + self._data_by_tick[curr_tick] = EventState.Query.by_tick(self.datastore, curr_tick) + self._last_tick = curr_tick + + def get_data(self, event_code=None, agents: List[int]=None, tick: int=None) -> np.ndarray: + if tick is not None: + if tick not in self._data_by_tick: + return self._empty_data + event_data = self._data_by_tick[tick] else: - return None - - if agents: - flt_idx = np.in1d(event_data[:, EventAttr['ent_id']], agents) + event_data = EventState.Query.table(self.datastore) + + if event_data.shape[0] > 0: + if event_code is None: + flt_idx = event_data[:, EventAttr["event"]] > 0 + else: + flt_idx = event_data[:, EventAttr["event"]] == event_code + if agents: + flt_idx &= np.in1d(event_data[:, EventAttr["ent_id"]], agents) return event_data[flt_idx] - return event_data + return self._empty_data diff --git a/nmmo/lib/log.py b/nmmo/lib/log.py index 6ee72296e..8ced7f7a4 100644 --- a/nmmo/lib/log.py +++ b/nmmo/lib/log.py @@ -35,9 +35,6 @@ def log_max(self, key, val): return True -# CHECK ME: Is this a good place to put here? -# EventCode is used in many places, and I(kywch)'m putting it here -# to avoid a circular import, which happened a few times with event_log.py class EventCode: # Move EAT_FOOD = 1 @@ -54,6 +51,7 @@ class EventCode: DESTROY_ITEM = 23 HARVEST_ITEM = 24 EQUIP_ITEM = 25 + LOOT_ITEM = 26 # Exchange GIVE_GOLD = 31 diff --git a/nmmo/lib/priorityqueue.py b/nmmo/lib/priorityqueue.py deleted file mode 100644 index 7d3d0e3be..000000000 --- a/nmmo/lib/priorityqueue.py +++ /dev/null @@ -1,65 +0,0 @@ -# pylint: disable=all - -import heapq, itertools -import itertools - -from ordered_set import OrderedSet - -class PriorityQueue: - def __init__(self, capacity, unique=False): - self.q, self.items = [], OrderedSet() - self.capacity = capacity - self.count = itertools.count() - self.unique = unique - - def get(self, ind): - priority, item = self.tolist()[ind] - return priority, item - - def push(self, item, priority, uniqueKey=None): - if self.unique: - self.items.add(uniqueKey) - count = next(self.count) - if len(self.q) >= self.capacity: - return heapq.heappushpop(self.q, (priority, count, item)) - heapq.heappush(self.q, (priority, count, item)) - - def pop(self): - priority, _, item = heapq.heappop(self.q) - if self.unique: - self.items.remove(item) - return priority, item - - @property - def peek(self): - return self.peekPriority, self.peekValue - - @property - def peekPriority(self): - ret = heapq.nlargest(1, self.q) - if len(ret) > 0: - return ret[0][0] - - @property - def peekValue(self): - ret = heapq.nlargest(1, self.q) - if len(ret) > 0: - return ret[0][2] - - - def tolist(self): - q = heapq.nlargest(self.n, self.q) - return [(e[0], e[2]) for e in q] - - def priorities(self): - return sorted([e[0] for e in self.q], reverse=True) - - def print(self): - q = heapq.nlargest(self.n, self.q) - print([(e[0]) for e in q], end='') - print() - - @property - def n(self): - return len(self.q) - diff --git a/nmmo/lib/seeding.py b/nmmo/lib/seeding.py new file mode 100644 index 000000000..1e75c066c --- /dev/null +++ b/nmmo/lib/seeding.py @@ -0,0 +1,42 @@ +# copied from https://github.com/openai/gym/blob/master/gym/utils/seeding.py + +"""Set of random number generator functions: seeding, generator, hashing seeds.""" +from typing import Any, Optional, Tuple + +import numpy as np + +from gym import error + + +class RandomNumberGenerator(np.random.Generator): + def __init__(self, bit_generator): + super().__init__(bit_generator) + self._dir_seq_len = 1024 + self._wrap = self._dir_seq_len - 1 + self._dir_seq = list(self.integers(0, 4, size=self._dir_seq_len)) + self._dir_idx = 0 + + # provide a random direction from the pre-generated sequence + def get_direction(self): + self._dir_idx = (self._dir_idx + 1) & self._wrap + return self._dir_seq[self._dir_idx] + +def np_random(seed: Optional[int] = None) -> Tuple[np.random.Generator, Any]: + """Generates a random number generator from the seed and returns the Generator and seed. + + Args: + seed: The seed used to create the generator + + Returns: + The generator and resulting seed + + Raises: + Error: Seed must be a non-negative integer or omitted + """ + if seed is not None and not (isinstance(seed, int) and 0 <= seed): + raise error.Error(f"Seed must be a non-negative integer or omitted, not {seed}") + + seed_seq = np.random.SeedSequence(seed) + np_seed = seed_seq.entropy + rng = RandomNumberGenerator(np.random.PCG64(seed_seq)) + return rng, np_seed diff --git a/nmmo/lib/spawn.py b/nmmo/lib/spawn.py index 70a78f400..aed9949c7 100644 --- a/nmmo/lib/spawn.py +++ b/nmmo/lib/spawn.py @@ -1,14 +1,13 @@ -import numpy as np - class SequentialLoader: '''config.PLAYER_LOADER that spreads out agent populations''' - def __init__(self, config): + def __init__(self, config, np_random): items = config.PLAYERS self.items = items self.idx = -1 - self.candidate_spawn_pos = spawn_concurrent(config) + # np_random is the env-level rng + self.candidate_spawn_pos = spawn_concurrent(config, np_random) def __iter__(self): return self @@ -22,7 +21,7 @@ def get_spawn_position(self, agent_id): # the basic SequentialLoader just provides a random spawn position return self.candidate_spawn_pos.pop() -def spawn_continuous(config): +def spawn_continuous(config, np_random): '''Generates spawn positions for new agents Randomly selects spawn positions around @@ -38,10 +37,11 @@ def spawn_continuous(config): mmax = config.MAP_CENTER + config.MAP_BORDER mmin = config.MAP_BORDER - var = np.random.randint(mmin, mmax) - fixed = np.random.choice([mmin, mmax]) + # np_random is the env-level RNG, a drop-in replacement of numpy.random + var = np_random.integers(mmin, mmax) + fixed = np_random.choice([mmin, mmax]) r, c = int(var), int(fixed) - if np.random.rand() > 0.5: + if np_random.random() > 0.5: r, c = c, r return (r, c) @@ -63,7 +63,7 @@ def get_edge_tiles(config): return sides -def spawn_concurrent(config): +def spawn_concurrent(config, np_random): '''Generates spawn positions for new agents Evenly spaces agents around the borders @@ -108,31 +108,8 @@ def spawn_concurrent(config): spawn_positions.append(pos) else: # team_n = 1: to fit 128 agents in a small map, ignore spacing and spawn randomly - np.random.shuffle(sides) + # np_random is the env-level RNG, a drop-in replacement of numpy.random + np_random.shuffle(sides) spawn_positions = sides[:config.PLAYER_N] return spawn_positions - -def get_team_spawn_positions(config, num_teams): - '''Generates spawn positions for new teams - Agents in the same team spawn together in the same tile - Evenly spaces teams around the square map borders - - Returns: - list of tuple(int, int): - - position: - The position (row, col) to spawn the given teams - ''' - teams_per_sides = (num_teams + 3) // 4 # 1-4 -> 1, 5-8 -> 2, etc. - - sides = get_edge_tiles(config) - assert len(sides[0]) >= 4*teams_per_sides, 'Map too small for teams' - - team_spawn_positions = [] - for side in sides: - for i in range(teams_per_sides): - idx = int(len(side)*(i+1)/(teams_per_sides + 1)) - team_spawn_positions.append(side[idx]) - - return team_spawn_positions diff --git a/nmmo/lib/team_helper.py b/nmmo/lib/team_helper.py index e624dca5a..9256cd036 100644 --- a/nmmo/lib/team_helper.py +++ b/nmmo/lib/team_helper.py @@ -24,14 +24,26 @@ def is_agent_in_team(self, agent_id:int , team_id: int) -> bool: return agent_id in self.teams[team_id] def get_target_agent(self, team_id: int, target: str): - if target == 'left_team': - return self.teams[(team_id+1) % self.num_teams] - if target == 'left_team_leader': - return self.teams[(team_id+1) % self.num_teams][0] - if target == 'right_team': - return self.teams[(team_id-1) % self.num_teams] - if target == 'right_team_leader': - return self.teams[(team_id-1) % self.num_teams][0] - if target == 'my_team_leader': + team_ids = list(self.teams.keys()) + idx = team_ids.index(team_id) + if target == "left_team": + target_id = team_ids[(idx+1) % self.num_teams] + return self.teams[target_id] + if target == "left_team_leader": + target_id = team_ids[(idx+1) % self.num_teams] + return self.teams[target_id][0] + if target == "right_team": + target_id = team_ids[(idx-1) % self.num_teams] + return self.teams[target_id] + if target == "right_team_leader": + target_id = team_ids[(idx-1) % self.num_teams] + return self.teams[target_id][0] + if target == "my_team_leader": return self.teams[team_id][0] + if target == "all_foes": + all_foes = [] + for foe_team_id in team_ids: + if foe_team_id != team_id: + all_foes += self.teams[foe_team_id] + return all_foes return None diff --git a/nmmo/lib/utils.py b/nmmo/lib/utils.py index e4ebd33cd..31b73d0b5 100644 --- a/nmmo/lib/utils.py +++ b/nmmo/lib/utils.py @@ -66,13 +66,14 @@ def __ge__(self, other): class IterableNameComparable(Iterable, NameComparable): pass -def seed(): - return int(np.random.randint(0, 2**32)) - def linf(pos1, pos2): # pos could be a single (r,c) or a vector of (r,c)s diff = np.abs(np.array(pos1) - np.array(pos2)) - return np.max(diff, axis=len(diff.shape)-1) + return np.max(diff, axis=-1) + +def linf_single(pos1, pos2): + # pos is a single (r,c) to avoid uneccessary function calls + return max(abs(pos1[0]-pos2[0]), abs(pos1[1]-pos2[1])) #Bounds checker def in_bounds(r, c, shape, border=0): diff --git a/nmmo/render/render_client.py b/nmmo/render/render_client.py index e61d88083..d861c3fed 100644 --- a/nmmo/render/render_client.py +++ b/nmmo/render/render_client.py @@ -19,6 +19,10 @@ def __init__(self, realm=None) -> None: self.packet = None + def set_realm(self, realm) -> None: + self._realm = realm + self.registry = OverlayRegistry(realm, renderer=self) if realm else None + def render_packet(self, packet) -> None: packet = { 'pos': self.overlay_pos, diff --git a/nmmo/render/replay_helper.py b/nmmo/render/replay_helper.py index a16564e7b..1a1bab1e6 100644 --- a/nmmo/render/replay_helper.py +++ b/nmmo/render/replay_helper.py @@ -1,3 +1,4 @@ +import os import json import logging import lzma @@ -77,12 +78,10 @@ def _metadata(self) -> Dict: def update(self): self.packets.append(self._packet()) - def save(self, filename_prefix, compress=True): + def save(self, filename_prefix, compress=False): replay_file = f'{filename_prefix}.replay.json' metadata_file = f'{filename_prefix}.metadata.pkl' - logging.info('Saving replay to %s ...', replay_file) - data = json.dumps({ 'map': self.map, 'packets': self.packets @@ -92,19 +91,21 @@ def save(self, filename_prefix, compress=True): replay_file = f'{filename_prefix}.replay.lzma' data = lzma.compress(data, format=lzma.FORMAT_ALONE) + logging.info('Saving replay to %s ...', replay_file) + with open(replay_file, 'wb') as out: out.write(data) with open(metadata_file, 'wb') as out: pickle.dump(self._metadata(), out) - @classmethod - def load(cls, replay_file, decompress=True): + def load(cls, replay_file): + extension = os.path.splitext(replay_file)[1] with open(replay_file, 'rb') as fp: data = fp.read() - if decompress: + if extension != '.json': data = lzma.decompress(data, format=lzma.FORMAT_ALONE) data = json.loads(data.decode('utf-8')) diff --git a/nmmo/render/websocket.py b/nmmo/render/websocket.py index 3647f51e1..b3b70123c 100644 --- a/nmmo/render/websocket.py +++ b/nmmo/render/websocket.py @@ -21,143 +21,143 @@ from .render_utils import np_encoder class GodswordServerProtocol(WebSocketServerProtocol): - def __init__(self): - super().__init__() - print("Created a server") - self.frame = 0 - - #"connected" is already used by WSSP - self.sent_environment = False - self.isConnected = False - - self.pos = [0, 0] - self.cmd = None - - def onOpen(self): - print("Opened connection to server") - - def onClose(self, wasClean, code=None, reason=None): - self.isConnected = False - print('Connection closed') - - def connectionMade(self): - super().connectionMade() - self.factory.clientConnectionMade(self) - - def connectionLost(self, reason): - super().connectionLost(reason) - self.factory.clientConnectionLost(self) - self.sent_environment = False - - #Not used without player interaction - def onMessage(self, packet, isBinary): - print("Server packet", packet) - packet = packet.decode() - _, packet = packet.split(';') #Strip headeer - r, c, cmd = packet.split(' ') #Split camera coords - if len(cmd) == 0 or cmd == '\t': - cmd = None - - self.pos = [int(r), int(c)] - self.cmd = cmd - - self.isConnected = True - - def onConnect(self, request): - print("WebSocket connection request: {}".format(request)) - realm = self.factory.realm - self.realm = realm - self.frame += 1 - - def serverPacket(self): - data = self.realm.packet - return data - - def sendUpdate(self, data): - packet = {} - packet['resource'] = data['resource'] - packet['player'] = data['player'] - packet['npc'] = data['npc'] - packet['pos'] = data['pos'] - packet['wilderness'] = data['wilderness'] - packet['market'] = data['market'] - - print('Is Connected? : {}'.format(self.isConnected)) - if not self.sent_environment: - packet['map'] = data['environment'] - packet['border'] = data['border'] - packet['size'] = data['size'] - self.sent_environment=True - - if 'overlay' in data: - packet['overlay'] = data['overlay'] - print('SENDING OVERLAY: ', len(packet['overlay'])) - - packet = json.dumps(packet, default=np_encoder).encode('utf8') - self.sendMessage(packet, False) + def __init__(self): + super().__init__() + print("Created a server") + self.frame = 0 + + #"connected" is already used by WSSP + self.sent_environment = False + self.isConnected = False + + self.pos = [0, 0] + self.cmd = None + + def onOpen(self): + print("Opened connection to server") + + def onClose(self, wasClean, code=None, reason=None): + self.isConnected = False + print('Connection closed') + + def connectionMade(self): + super().connectionMade() + self.factory.clientConnectionMade(self) + + def connectionLost(self, reason): + super().connectionLost(reason) + self.factory.clientConnectionLost(self) + self.sent_environment = False + + #Not used without player interaction + def onMessage(self, packet, isBinary): + print("Server packet", packet) + packet = packet.decode() + _, packet = packet.split(';') #Strip headeer + r, c, cmd = packet.split(' ') #Split camera coords + if len(cmd) == 0 or cmd == '\t': + cmd = None + + self.pos = [int(r), int(c)] + self.cmd = cmd + + self.isConnected = True + + def onConnect(self, request): + print("WebSocket connection request: {}".format(request)) + realm = self.factory.realm + self.realm = realm + self.frame += 1 + + def serverPacket(self): + data = self.realm.packet + return data + + def sendUpdate(self, data): + packet = {} + packet['resource'] = data['resource'] + packet['player'] = data['player'] + packet['npc'] = data['npc'] + packet['pos'] = data['pos'] + packet['wilderness'] = data['wilderness'] + packet['market'] = data['market'] + + print('Is Connected? : {}'.format(self.isConnected)) + if not self.sent_environment: + packet['map'] = data['environment'] + packet['border'] = data['border'] + packet['size'] = data['size'] + self.sent_environment=True + + if 'overlay' in data: + packet['overlay'] = data['overlay'] + print('SENDING OVERLAY: ', len(packet['overlay'])) + + packet = json.dumps(packet, default=np_encoder).encode('utf8') + self.sendMessage(packet, False) class WSServerFactory(WebSocketServerFactory): - def __init__(self, ip, realm): - super().__init__(ip) - self.realm = realm - self.time = time.time() - self.clients = [] - - self.pos = [0, 0] - self.cmd = None - self.tickRate = 0.6 - self.tick = 0 - - def update(self, packet): - self.tick += 1 - uptime = np.round(self.tickRate*self.tick, 1) - delta = time.time() - self.time - print('Wall Clock: ', str(delta)[:5], 'Uptime: ', uptime, ', Tick: ', self.tick) - delta = self.tickRate - delta - if delta > 0: - time.sleep(delta) - self.time = time.time() - - for client in self.clients: - client.sendUpdate(packet) - if client.pos is not None: - self.pos = client.pos - self.cmd = client.cmd - - return self.pos, self.cmd - - def clientConnectionMade(self, client): - self.clients.append(client) - - def clientConnectionLost(self, client): - self.clients.remove(client) + def __init__(self, ip, realm): + super().__init__(ip) + self.realm = realm + self.time = time.time() + self.clients = [] + + self.pos = [0, 0] + self.cmd = None + self.tickRate = 0.6 + self.tick = 0 + + def update(self, packet): + self.tick += 1 + uptime = np.round(self.tickRate*self.tick, 1) + delta = time.time() - self.time + print('Wall Clock: ', str(delta)[:5], 'Uptime: ', uptime, ', Tick: ', self.tick) + delta = self.tickRate - delta + if delta > 0: + time.sleep(delta) + self.time = time.time() + + for client in self.clients: + client.sendUpdate(packet) + if client.pos is not None: + self.pos = client.pos + self.cmd = client.cmd + + return self.pos, self.cmd + + def clientConnectionMade(self, client): + self.clients.append(client) + + def clientConnectionLost(self, client): + self.clients.remove(client) class Application: - def __init__(self, realm): - signal(SIGINT, self.kill) - log.startLogging(sys.stdout) + def __init__(self, realm): + signal(SIGINT, self.kill) + log.startLogging(sys.stdout) - port = 8080 - self.factory = WSServerFactory(u'ws://localhost:{}'.format(port), realm) - self.factory.protocol = GodswordServerProtocol - resource = WebSocketResource(self.factory) + port = 8080 + self.factory = WSServerFactory(u'ws://localhost:{}'.format(port), realm) + self.factory.protocol = GodswordServerProtocol + resource = WebSocketResource(self.factory) - root = File(".") - root.putChild(b"ws", resource) - site = Site(root) + root = File(".") + root.putChild(b"ws", resource) + site = Site(root) - reactor.listenTCP(port, site) + reactor.listenTCP(port, site) - def run(): - reactor.run(installSignalHandlers=0) + def run(): + reactor.run(installSignalHandlers=0) - threading.Thread(target=run).start() + threading.Thread(target=run).start() - def update(self, packet): - return self.factory.update(packet) + def update(self, packet): + return self.factory.update(packet) - def kill(*args): - print("Killed by user") - reactor.stop() - os._exit(0) + def kill(*args): + print("Killed by user") + reactor.stop() + os._exit(0) \ No newline at end of file diff --git a/nmmo/systems/ai/behavior.py b/nmmo/systems/ai/behavior.py index 85cbf1c26..81075415b 100644 --- a/nmmo/systems/ai/behavior.py +++ b/nmmo/systems/ai/behavior.py @@ -1,4 +1,4 @@ -# pylint: disable=all +#pylint: disable=protected-access, invalid-name import numpy as np @@ -6,68 +6,75 @@ from nmmo.systems.ai import move, utils def update(entity): - '''Update validity of tracked entities''' - if not utils.validTarget(entity, entity.attacker, entity.vision): - entity.attacker = None - if not utils.validTarget(entity, entity.target, entity.vision): - entity.target = None - if not utils.validTarget(entity, entity.closest, entity.vision): - entity.closest = None - - if entity.__class__.__name__ != 'Player': - return - - if not utils.validResource(entity, entity.food, entity.vision): - entity.food = None - if not utils.validResource(entity, entity.water, entity.vision): - entity.water = None + '''Update validity of tracked entities''' + if not utils.validTarget(entity, entity.attacker, entity.vision): + entity.attacker = None + if not utils.validTarget(entity, entity.target, entity.vision): + entity.target = None + if not utils.validTarget(entity, entity.closest, entity.vision): + entity.closest = None + + if entity.__class__.__name__ != 'Player': + return + + if not utils.validResource(entity, entity.food, entity.vision): + entity.food = None + if not utils.validResource(entity, entity.water, entity.vision): + entity.water = None + def pathfind(realm, actions, entity, target): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.pathfind(realm.map.tiles, entity, target)} + # TODO: do not access realm._np_random directly. ALSO see below for all other uses + actions[nmmo.action.Move] = { + nmmo.action.Direction: move.pathfind(realm.map, entity, target, realm._np_random)} + def explore(realm, actions, entity): - sz = realm.config.TERRAIN_SIZE - r, c = entity.pos + sz = realm.config.TERRAIN_SIZE + r, c = entity.pos - spawnR, spawnC = entity.spawnPos - centR, centC = sz//2, sz//2 + spawnR, spawnC = entity.spawnPos + centR, centC = sz//2, sz//2 - vR, vC = centR-spawnR, centC-spawnC + vR, vC = centR-spawnR, centC-spawnC - mmag = max(abs(vR), abs(vC)) - rr = r + int(np.round(entity.vision*vR/mmag)) - cc = c + int(np.round(entity.vision*vC/mmag)) + mmag = max(abs(vR), abs(vC)) + rr = r + int(np.round(entity.vision*vR/mmag)) + cc = c + int(np.round(entity.vision*vC/mmag)) + + tile = realm.map.tiles[rr, cc] + pathfind(realm, actions, entity, tile) - tile = realm.map.tiles[rr, cc] - pathfind(realm, actions, entity, tile) def meander(realm, actions, entity): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.habitable(realm.map.tiles, entity)} + actions[nmmo.action.Move] = { + nmmo.action.Direction: move.habitable(realm.map, entity, realm._np_random)} def evade(realm, actions, entity): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.antipathfind(realm.map.tiles, entity, entity.attacker)} + actions[nmmo.action.Move] = { + nmmo.action.Direction: move.antipathfind(realm.map, entity, entity.attacker, + realm._np_random)} def hunt(realm, actions, entity): - #Move args - distance = utils.distance(entity, entity.target) - - direction = None - if distance == 0: - direction = move.random_direction() - elif distance > 1: - direction = move.pathfind(realm.map.tiles, entity, entity.target) + # Move args + distance = utils.lInfty(entity.pos, entity.target.pos) - if direction is not None: - actions[nmmo.action.Move] = {nmmo.action.Direction: direction} + if distance > 1: + actions[nmmo.action.Move] = {nmmo.action.Direction: move.pathfind(realm.map, + entity, + entity.target, + realm._np_random)} + elif distance == 0: + actions[nmmo.action.Move] = { + nmmo.action.Direction: move.habitable(realm.map, entity, realm._np_random)} - attack(realm, actions, entity) + attack(realm, actions, entity) def attack(realm, actions, entity): - distance = utils.lInfty(entity.pos, entity.target.pos) - if distance > entity.skills.style.attack_range(realm.config): - return - - actions[nmmo.action.Attack] = { - nmmo.action.Style: entity.skills.style, - nmmo.action.Target: entity.target} + distance = utils.lInfty(entity.pos, entity.target.pos) + if distance > entity.skills.style.attack_range(realm.config): + return + actions[nmmo.action.Attack] = { + nmmo.action.Style: entity.skills.style, + nmmo.action.Target: entity.target} diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py index 80eb461ac..d2d398f2b 100644 --- a/nmmo/systems/ai/move.py +++ b/nmmo/systems/ai/move.py @@ -1,46 +1,25 @@ -# pylint: disable=R0401 - -import random - +# pylint: disable=cyclic-import from nmmo.core import action from nmmo.systems.ai import utils +DIRECTIONS = [ # row delta, col delta, action + (-1, 0, action.North), + (1, 0, action.South), + (0, -1, action.West), + (0, 1, action.East)] * 2 -def random_direction(): - return random.choice(action.Direction.edges) - -def random_safe(tiles, ent): - r, c = ent.pos - cands = [] - if not tiles[r-1, c].void: - cands.append(action.North) - if not tiles[r+1, c].void: - cands.append(action.South) - if not tiles[r, c-1].void: - cands.append(action.West) - if not tiles[r, c+1].void: - cands.append(action.East) - - return random.choice(cands) - -def habitable(tiles, ent): - r, c = ent.pos - cands = [] - if tiles[r-1, c].habitable: - cands.append(action.North) - if tiles[r+1, c].habitable: - cands.append(action.South) - if tiles[r, c-1].habitable: - cands.append(action.West) - if tiles[r, c+1].habitable: - cands.append(action.East) - - if len(cands) == 0: - return action.North +def habitable(realm_map, ent, np_random): + r, c = ent.pos + is_habitable = realm_map.habitable_tiles + start = np_random.get_direction() + for i in range(4): + dr, dc, act = DIRECTIONS[start + i] + if is_habitable[r + dr, c + dc]: + return act - return random.choice(cands) + return action.North -def towards(direction): +def towards(direction, np_random): if direction == (-1, 0): return action.North if direction == (1, 0): @@ -50,19 +29,19 @@ def towards(direction): if direction == (0, 1): return action.East - return random.choice(action.Direction.edges) + return np_random.choice(action.Direction.edges) -def bullrush(ent, targ): +def bullrush(ent, targ, np_random): direction = utils.directionTowards(ent, targ) - return towards(direction) + return towards(direction, np_random) -def pathfind(tiles, ent, targ): - direction = utils.aStar(tiles, ent.pos, targ.pos) - return towards(direction) +def pathfind(realm_map, ent, targ, np_random): + direction = utils.aStar(realm_map, ent.pos, targ.pos) + return towards(direction, np_random) -def antipathfind(tiles, ent, targ): +def antipathfind(realm_map, ent, targ, np_random): er, ec = ent.pos tr, tc = targ.pos goal = (2*er - tr , 2*ec-tc) - direction = utils.aStar(tiles, ent.pos, goal) - return towards(direction) + direction = utils.aStar(realm_map, ent.pos, goal) + return towards(direction, np_random) diff --git a/nmmo/systems/ai/utils.py b/nmmo/systems/ai/utils.py index cd23d5bff..3fd7165ca 100644 --- a/nmmo/systems/ai/utils.py +++ b/nmmo/systems/ai/utils.py @@ -1,5 +1,4 @@ -# pylint: disable=all - +#pylint: disable=protected-access, invalid-name import heapq from typing import Tuple @@ -10,175 +9,187 @@ def validTarget(ent, targ, rng): - if targ is None or not targ.alive: - return False - if lInfty(ent.pos, targ.pos) > rng: - return False - return True + if targ is None or not targ.alive or lInfty(ent.pos, targ.pos) > rng: + return False + return True def validResource(ent, tile, rng): - return tile is not None and tile.state.tex in ( - 'foilage', 'water') and distance(ent, tile) <= rng + return tile is not None and tile.state.tex in ( + 'foilage', 'water') and lInfty(ent.pos, tile.pos) <= rng def directionTowards(ent, targ): - sr, sc = ent.pos - tr, tc = targ.pos + sr, sc = ent.pos + tr, tc = targ.pos - if abs(sc - tc) > abs(sr - tr): - direction = (0, np.sign(tc - sc)) - else: - direction = (np.sign(tr - sr), 0) + if abs(sc - tc) > abs(sr - tr): + direction = (0, np.sign(tc - sc)) + else: + direction = (np.sign(tr - sr), 0) - return direction + return direction def closestTarget(ent, tiles, rng=1): - sr, sc = ent.pos - for d in range(rng+1): - for r in range(-d, d+1): - for e in tiles[sr+r, sc-d].entities.values(): - if e is not ent and validTarget(ent, e, rng): return e + sr, sc = ent.pos + for d in range(rng+1): + for r in range(-d, d+1): + for e in tiles[sr+r, sc-d].entities.values(): + if e is not ent and validTarget(ent, e, rng): + return e - for e in tiles[sr + r, sc + d].entities.values(): - if e is not ent and validTarget(ent, e, rng): return e + for e in tiles[sr + r, sc + d].entities.values(): + if e is not ent and validTarget(ent, e, rng): + return e - for e in tiles[sr - d, sc + r].entities.values(): - if e is not ent and validTarget(ent, e, rng): return e + for e in tiles[sr - d, sc + r].entities.values(): + if e is not ent and validTarget(ent, e, rng): + return e - for e in tiles[sr + d, sc + r].entities.values(): - if e is not ent and validTarget(ent, e, rng): return e + for e in tiles[sr + d, sc + r].entities.values(): + if e is not ent and validTarget(ent, e, rng): + return e + return None -def distance(ent, targ): - # used in scripted/behavior.py, attack() to determine attack range - return lInfty(ent.pos, targ.pos) def lInf(ent, targ): - sr, sc = ent.pos - gr, gc = targ.pos - return abs(gr - sr) + abs(gc - sc) + sr, sc = ent.pos + gr, gc = targ.pos + return abs(gr - sr) + abs(gc - sc) def adjacentPos(pos): - r, c = pos - return [(r - 1, c), (r, c - 1), (r + 1, c), (r, c + 1)] + r, c = pos + return [(r - 1, c), (r, c - 1), (r + 1, c), (r, c + 1)] def cropTilesAround(position: Tuple[int, int], horizon: int, tiles): - line, column = position + line, column = position - return tiles[max(line - horizon, 0): min(line + horizon + 1, len(tiles)), - max(column - horizon, 0): min(column + horizon + 1, len(tiles[0]))] + return tiles[max(line - horizon, 0): min(line + horizon + 1, len(tiles)), + max(column - horizon, 0): min(column + horizon + 1, len(tiles[0]))] # A* Search -def l1(start, goal): - sr, sc = start - gr, gc = goal - return abs(gr - sr) + abs(gc - sc) - -def l2(start, goal): - sr, sc = start - gr, gc = goal - return 0.5*((gr - sr)**2 + (gc - sc)**2)**0.5 - -#TODO: unify lInfty and lInf -def lInfty(start, goal): - sr, sc = start - gr, gc = goal - return max(abs(gr - sr), abs(gc - sc)) - -def aStar(tiles, start, goal, cutoff=100): - if start == goal: - return (0, 0) - - pq = [(0, start)] - - backtrace = {} - cost = {start: 0} - closestPos = start - closestHeuristic = l1(start, goal) - closestCost = closestHeuristic - while pq: - # Use approximate solution if budget exhausted - cutoff -= 1 - if cutoff <= 0: - if goal not in backtrace: - goal = closestPos - break - - priority, cur = heapq.heappop(pq) - - if cur == goal: - break - - for nxt in adjacentPos(cur): - if not in_bounds(*nxt, tiles.shape): - continue - - newCost = cost[cur] + 1 - if nxt not in cost or newCost < cost[nxt]: - cost[nxt] = newCost - heuristic = lInfty(goal, nxt) - priority = newCost + heuristic +def l1(start, goal): + sr, sc = start + gr, gc = goal + return abs(gr - sr) + abs(gc - sc) - # Compute approximate solution - if heuristic < closestHeuristic or ( - heuristic == closestHeuristic and priority < closestCost): - closestPos = nxt - closestHeuristic = heuristic - closestCost = priority - heapq.heappush(pq, (priority, nxt)) - backtrace[nxt] = cur +def l2(start, goal): + sr, sc = start + gr, gc = goal + return 0.5*((gr - sr)**2 + (gc - sc)**2)**0.5 - while goal in backtrace and backtrace[goal] != start: - goal = backtrace[goal] +# TODO: unify lInfty and lInf - sr, sc = start - gr, gc = goal - return (gr - sr, gc - sc) +def lInfty(start, goal): + sr, sc = start + gr, gc = goal + return max(abs(gr - sr), abs(gc - sc)) + + +CUTOFF = 100 + + +def aStar(realm_map, start, goal): + cutoff = CUTOFF + tiles = realm_map.tiles + if start == goal: + return (0, 0) + if (start, goal) in realm_map.pathfinding_cache: + return realm_map.pathfinding_cache[(start, goal)] + initial_goal = goal + pq = [(0, start)] + + backtrace = {} + cost = {start: 0} + + closestPos = start + closestHeuristic = l1(start, goal) + closestCost = closestHeuristic + + while pq: + # Use approximate solution if budget exhausted + cutoff -= 1 + if cutoff <= 0: + if goal not in backtrace: + goal = closestPos + break + + priority, cur = heapq.heappop(pq) + + if cur == goal: + break + + for nxt in adjacentPos(cur): + if not in_bounds(*nxt, tiles.shape): + continue + + newCost = cost[cur] + 1 + if nxt not in cost or newCost < cost[nxt]: + cost[nxt] = newCost + heuristic = lInfty(goal, nxt) + priority = newCost + heuristic + + # Compute approximate solution + if heuristic < closestHeuristic or ( + heuristic == closestHeuristic and priority < closestCost): + closestPos = nxt + closestHeuristic = heuristic + closestCost = priority + + heapq.heappush(pq, (priority, nxt)) + backtrace[nxt] = cur + + while goal in backtrace and backtrace[goal] != start: + gr, gc = goal + goal = backtrace[goal] + sr, sc = goal + realm_map.pathfinding_cache[(goal, initial_goal)] = (gr - sr, gc - sc) + + sr, sc = start + gr, gc = goal + realm_map.pathfinding_cache[(start, initial_goal)] = (gr - sr, gc - sc) + return (gr - sr, gc - sc) # End A* # Adjacency functions -def adjacentTiles(tiles, ent): - r, c = ent.pos - - def adjacentDeltas(): - return [(-1, 0), (1, 0), (0, 1), (0, -1)] + return [(-1, 0), (1, 0), (0, 1), (0, -1)] def l1Deltas(s): - rets = [] - for r in range(-s, s + 1): - for c in range(-s, s + 1): - rets.append((r, c)) - return rets + rets = [] + for r in range(-s, s + 1): + for c in range(-s, s + 1): + rets.append((r, c)) + return rets def posSum(pos1, pos2): - return pos1[0] + pos2[0], pos1[1] + pos2[1] + return pos1[0] + pos2[0], pos1[1] + pos2[1] def adjacentEmptyPos(env, pos): - return [p for p in adjacentPos(pos) - if in_bounds(*p, env.size)] + return [p for p in adjacentPos(pos) + if in_bounds(*p, env.size)] def adjacentTiles(env, pos): - return [env.tiles[p] for p in adjacentPos(pos) - if in_bounds(*p, env.size)] + return [env.tiles[p] for p in adjacentPos(pos) + if in_bounds(*p, env.size)] def adjacentMats(tiles, pos): - return [type(tiles[p].state) for p in adjacentPos(pos) - if in_bounds(*p, tiles.shape)] + return [type(tiles[p].state) for p in adjacentPos(pos) + if in_bounds(*p, tiles.shape)] def adjacencyDelMatPairs(env, pos): - return zip(adjacentDeltas(), adjacentMats(env.tiles, pos)) -###End### + return zip(adjacentDeltas(), adjacentMats(env.tiles, pos)) +### End### diff --git a/nmmo/systems/combat.py b/nmmo/systems/combat.py index 1666feed1..42c81eeb3 100644 --- a/nmmo/systems/combat.py +++ b/nmmo/systems/combat.py @@ -130,16 +130,16 @@ def danger(config, pos): return norm -def spawn(config, dnger): +def spawn(config, dnger, np_random): border = config.MAP_BORDER center = config.MAP_CENTER mid = center // 2 dist = dnger * center / 2 max_offset = mid - dist - offset = mid + border + np.random.randint(-max_offset, max_offset) + offset = mid + border + np_random.integers(-max_offset, max_offset) - rng = np.random.rand() + rng = np_random.random() if rng < 0.25: r = border + dist c = offset diff --git a/nmmo/systems/droptable.py b/nmmo/systems/droptable.py index 6110d79f2..7d8728474 100644 --- a/nmmo/systems/droptable.py +++ b/nmmo/systems/droptable.py @@ -1,5 +1,3 @@ -import numpy as np - class Fixed(): def __init__(self, item): self.item = item @@ -13,7 +11,10 @@ def __init__(self, item, prob): self.prob = prob def roll(self, realm, level): - if np.random.rand() < self.prob: + # TODO: do not access realm._np_random directly + # related to skill.py, all harvest skills + # pylint: disable=protected-access + if realm._np_random.random() < self.prob: return self.item(realm, level) return None diff --git a/nmmo/systems/inventory.py b/nmmo/systems/inventory.py index 6e6f19bd3..ae2024500 100644 --- a/nmmo/systems/inventory.py +++ b/nmmo/systems/inventory.py @@ -103,7 +103,7 @@ def __init__(self, realm, entity): self.capacity = config.ITEM_INVENTORY_CAPACITY self._item_stacks: Dict[Tuple, Item.Stack] = {} - self.items: OrderedSet[Item.Item] = OrderedSet([]) + self.items: OrderedSet[Item.Item] = OrderedSet([]) # critical for correct functioning @property def space(self): @@ -125,7 +125,8 @@ def __iter__(self): for item in self.items: yield item - def receive(self, item: Item.Item): + def receive(self, item: Item.Item) -> bool: + # Return True if the item is received assert isinstance(item, Item.Item), f'{item} received is not an Item instance' assert item not in self.items, f'{item} object received already in inventory' assert not item.equipped.val, f'Received equipped item {item}' @@ -140,19 +141,19 @@ def receive(self, item: Item.Item): stack.quantity.increment(item.quantity.val) # destroy the original item instance after the transfer is complete item.destroy() - return + return False if not self.space: # if no space thus cannot receive, just destroy the item item.destroy() - return + return False self._item_stacks[signature] = item if not self.space: # if no space thus cannot receive, just destroy the item item.destroy() - return + return False self.realm.log_milestone(f'Receive_{item.__class__.__name__}', item.level.val, f'INVENTORY: Received level {item.level.val} {item.__class__.__name__}', @@ -160,6 +161,7 @@ def receive(self, item: Item.Item): item.owner_id.update(self.entity.id.val) self.items.add(item) + return True # pylint: disable=protected-access def remove(self, item, quantity=None): diff --git a/nmmo/systems/skill.py b/nmmo/systems/skill.py index 59e93c5c9..2978bb429 100644 --- a/nmmo/systems/skill.py +++ b/nmmo/systems/skill.py @@ -17,7 +17,7 @@ def __init__(self, realm, entity): self.entity = entity self.experience_calculator = experience.ExperienceCalculator() - self.skills = OrderedSet() + self.skills = OrderedSet() # critical for determinism def update(self): for skill in self.skills: @@ -97,8 +97,8 @@ def process_drops(self, matl, drop_table): # for example, fishing level=5 without rod will only yield level-1 ration level = 1 tool = entity.equipment.held - if matl.tool is not None and isinstance(tool, matl.tool): - level = tool.level.val + if matl.tool is not None and isinstance(tool.item, matl.tool): + level = min(1+tool.item.level.val, self.config.PROGRESSION_LEVEL_MAX) #TODO: double-check drop table quantity for drop in drop_table.roll(self.realm, level): diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index 4f8dbaf14..b619b336b 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -6,7 +6,6 @@ from nmmo.task.group import Group from nmmo.task.game_state import GameState -from nmmo.task import constraint from nmmo.systems import skill as nmmo_skill from nmmo.systems.skill import Skill from nmmo.systems.item import Item @@ -16,50 +15,38 @@ def norm(progress): return max(min(progress, 1.0), 0.0) -def Success(gs: GameState, - subject: Group): +def Success(gs: GameState, subject: Group): ''' Returns True. For debugging. ''' return True -def TickGE(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - num_tick: int = constraint.ScalarConstraint()): +def TickGE(gs: GameState, subject: Group, num_tick: int): """True if the current tick is greater than or equal to the specified num_tick. Is progress counter. """ return norm(gs.current_tick / num_tick) -def CanSeeTile(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - tile_type: type[Material]= constraint.MATERIAL_CONSTRAINT): +def CanSeeTile(gs: GameState, subject: Group, tile_type: type[Material]): """ True if any agent in subject can see a tile of tile_type """ return any(tile_type.index in t for t in subject.obs.tile.material_id) -def StayAlive(gs: GameState, - subject: Group = constraint.TEAM_GROUPS): +def StayAlive(gs: GameState, subject: Group): """True if all subjects are alive. """ return count(subject.health > 0) == len(subject) -def AllDead(gs: GameState, - subject: Group = constraint.TEAM_GROUPS): +def AllDead(gs: GameState, subject: Group): """True if all subjects are dead. """ return norm(1.0 - count(subject.health) / len(subject)) -def OccupyTile(gs: GameState, - subject: Group, - row: int = constraint.COORDINATE_CONSTRAINT, - col: int = constraint.COORDINATE_CONSTRAINT): +def OccupyTile(gs: GameState, subject: Group, row: int, col: int): """True if any subject agent is on the desginated tile. """ return np.any((subject.row == row) & (subject.col == col)) -def AllMembersWithinRange(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - dist: int = constraint.COORDINATE_CONSTRAINT): +def AllMembersWithinRange(gs: GameState, subject: Group, dist: int): """True if the max l-inf distance of teammates is less than or equal to dist """ @@ -69,23 +56,19 @@ def AllMembersWithinRange(gs: GameState, return 1.0 return norm(dist / current_dist) -def CanSeeAgent(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - target: int = constraint.AGENT_NUMBER_CONSTRAINT): +def CanSeeAgent(gs: GameState, subject: Group, target: int): """True if obj_agent is present in the subjects' entities obs. """ return any(target in e.ids for e in subject.obs.entities) -def CanSeeGroup(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - target: Iterable[int] = constraint.AgentListConstraint): +def CanSeeGroup(gs: GameState, subject: Group, target: Iterable[int]): """ Returns True if subject can see any of target """ + if target is None: + return False return any(CanSeeAgent(gs, subject, agent) for agent in target) -def DistanceTraveled(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - dist: int = constraint.ScalarConstraint()): +def DistanceTraveled(gs: GameState, subject: Group, dist: int): """True if the summed l-inf distance between each agent's current pos and spawn pos is greater than or equal to the specified _dist. """ @@ -96,41 +79,27 @@ def DistanceTraveled(gs: GameState, dists = utils.linf(list(zip(r,c)),[gs.spawn_pos[id_] for id_ in subject.entity.id]) return norm(dists.sum() / dist) -def AttainSkill(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - skill: Skill = constraint.SKILL_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): +def AttainSkill(gs: GameState, subject: Group, skill: Skill, level: int, num_agent: int): """True if the number of agents having skill level GE level is greather than or equal to num_agent """ skill_level = getattr(subject,skill.__name__.lower() + '_level') return norm(sum(skill_level >= level) / num_agent) -def CountEvent(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - event: str = constraint.EVENTCODE_CONSTRAINT, - N: int = constraint.EVENT_NUMBER_CONSTRAINT): +def CountEvent(gs: GameState, subject: Group, event: str, N: int): """True if the number of events occured in subject corresponding to event >= N """ return norm(len(getattr(subject.event, event)) / N) -def ScoreHit(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - combat_style: type[Skill] = constraint.COMBAT_SKILL_CONSTRAINT, - N: int = constraint.EVENT_NUMBER_CONSTRAINT): +def ScoreHit(gs: GameState, subject: Group, combat_style: type[Skill], N: int): """True if the number of hits scored in style combat_style >= count """ hits = subject.event.SCORE_HIT.combat_style == combat_style.SKILL_ID return norm(count(hits) / N) -def DefeatEntity(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - agent_type: str = constraint.AGENT_TYPE_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): +def DefeatEntity(gs: GameState, subject: Group, agent_type: str, level: int, num_agent: int): """True if the number of agents (agent_type, >= level) defeated is greater than or equal to num_agent """ @@ -143,50 +112,36 @@ def DefeatEntity(gs: GameState, return norm(count(defeated) / num_agent) return 1.0 -def HoardGold(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.GOLD_CONSTRAINT): +def HoardGold(gs: GameState, subject: Group, amount: int): """True iff the summed gold of all teammate is greater than or equal to amount. """ return norm(subject.gold.sum() / amount) -def EarnGold(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.GOLD_CONSTRAINT): +def EarnGold(gs: GameState, subject: Group, amount: int): """ True if the total amount of gold earned is greater than or equal to amount. """ return norm(subject.event.EARN_GOLD.gold.sum() / amount) -def SpendGold(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.GOLD_CONSTRAINT): +def SpendGold(gs: GameState, subject: Group, amount: int): """ True if the total amount of gold spent is greater than or equal to amount. """ return norm(subject.event.BUY_ITEM.gold.sum() / amount) -def MakeProfit(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.GOLD_CONSTRAINT): +def MakeProfit(gs: GameState, subject: Group, amount: int): """ True if the total amount of gold earned-spent is greater than or equal to amount. """ profits = subject.event.EARN_GOLD.gold.sum() costs = subject.event.BUY_ITEM.gold.sum() return norm((profits-costs) / amount) -def InventorySpaceGE(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - space: int = constraint.INVENTORY_CONSTRAINT): +def InventorySpaceGE(gs: GameState, subject: Group, space: int): """True if the inventory space of every subjects is greater than or equal to the space. Otherwise false. """ max_space = gs.config.ITEM_INVENTORY_CAPACITY return all(max_space - inv.len >= space for inv in subject.obs.inventory) -def OwnItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.ITEM_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.INVENTORY_CONSTRAINT): +def OwnItem(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): """True if the number of items owned (_item_type, >= level) is greater than or equal to quantity. """ @@ -194,11 +149,7 @@ def OwnItem(gs: GameState, (subject.item.level >= level) return norm(sum(subject.item.quantity[owned]) / quantity) -def EquipItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.EQUIPABLE_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): +def EquipItem(gs: GameState, subject: Group, item: type[Item], level: int, num_agent: int): """True if the number of agents that equip the item (_item_type, >=_level) is greater than or equal to _num_agent. """ @@ -209,11 +160,8 @@ def EquipItem(gs: GameState, return norm(count(equipped) / num_agent) return 1.0 -def FullyArmed(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - combat_style: type[Skill] = constraint.COMBAT_SKILL_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): +def FullyArmed(gs: GameState, subject: Group, + combat_style: type[Skill], level: int, num_agent: int): """True if the number of fully equipped agents is greater than or equal to _num_agent Otherwise false. To determine fully equipped, we look at hat, top, bottom, weapon, ammo, respectively, @@ -236,44 +184,28 @@ def FullyArmed(gs: GameState, return norm((equipment_numbers >= len(item_ids.items())).sum() / num_agent) return 1.0 -def ConsumeItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.CONSUMABLE_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): +def ConsumeItem(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): """True if total quantity consumed of item type above level is >= quantity """ type_flt = subject.event.CONSUME_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.CONSUME_ITEM.level >= level return norm(subject.event.CONSUME_ITEM.number[type_flt & lvl_flt].sum() / quantity) -def HarvestItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.HARVEST_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): +def HarvestItem(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): """True if total quantity harvested of item type above level is >= quantity """ type_flt = subject.event.HARVEST_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.HARVEST_ITEM.level >= level return norm(subject.event.HARVEST_ITEM.number[type_flt & lvl_flt].sum() / quantity) -def ListItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.ITEM_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): +def ListItem(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): """True if total quantity listed of item type above level is >= quantity """ type_flt = subject.event.LIST_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.LIST_ITEM.level >= level return norm(subject.event.LIST_ITEM.number[type_flt & lvl_flt].sum() / quantity) -def BuyItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.ITEM_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): +def BuyItem(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): """True if total quantity purchased of item type above level is >= quantity """ type_flt = subject.event.BUY_ITEM.type == item.ITEM_TYPE_ID diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index fb57ba4cf..e5e743d05 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -1,16 +1,18 @@ from __future__ import annotations -from typing import Dict, List, Tuple, MutableMapping -from dataclasses import dataclass +from typing import Dict, Iterable, Tuple, MutableMapping, Set, List +from dataclasses import dataclass, field from copy import deepcopy -from abc import ABC, abstractmethod +from collections import defaultdict +import weakref +from abc import ABC, abstractmethod +import functools import numpy as np from nmmo.core.config import Config from nmmo.core.realm import Realm from nmmo.core.observation import Observation from nmmo.task.group import Group - from nmmo.entity.entity import EntityState from nmmo.lib.event_log import EventState, ATTACK_COL_MAP, ITEM_COL_MAP, LEVEL_COL_MAP from nmmo.lib.log import EventCode @@ -18,6 +20,7 @@ from nmmo.core.tile import TileState EntityAttr = EntityState.State.attr_name_to_col +EntityAttrKeys = EntityAttr.keys() EventAttr = EventState.State.attr_name_to_col ItemAttr = ItemState.State.attr_name_to_col TileAttr = TileState.State.attr_name_to_col @@ -31,38 +34,74 @@ class GameState: config: Config spawn_pos: Dict[int, Tuple[int, int]] # ent_id: (row, col) of all spawned agents - alive_agents: List[int] # of alive agents' ent_id (for convenience) + alive_agents: Set[int] # of alive agents' ent_id (for convenience) env_obs: Dict[int, Observation] # env passes the obs of only alive agents entity_data: np.ndarray # a copied, whole Entity ds table + entity_index: Dict[int, Iterable] # precomputed index for where_in_1d item_data: np.ndarray # a copied, whole Item ds table + item_index: Dict[int, Iterable] event_data: np.ndarray # a copied, whole Event log table + event_index: Dict[int, Iterable] cache_result: MutableMapping # cache for general memoization + _group_view: List[GroupView] = field(default_factory=list) # cache for GroupView + # add helper functions below + @functools.lru_cache def entity_or_none(self, ent_id): flt_ent = self.entity_data[:, EntityAttr['id']] == ent_id if np.any(flt_ent): return EntityState.parse_array(self.entity_data[flt_ent][0]) - return None - def where_in_id(self, data_type, subject: List[int]): + def where_in_id(self, data_type, subject: Iterable[int]): + k = (data_type, subject) + if k in self.cache_result: + return self.cache_result[k] + if data_type == 'entity': - flt_idx = np.in1d(self.entity_data[:, EntityAttr['id']], subject) - return self.entity_data[flt_idx] + flt_idx = [row for sbj in subject for row in self.entity_index.get(sbj,[])] + self.cache_result[k] = self.entity_data[flt_idx] if data_type == 'item': - flt_idx = np.in1d(self.item_data[:, ItemAttr['owner_id']], subject) - return self.item_data[flt_idx] + flt_idx = [row for sbj in subject for row in self.item_index.get(sbj,[])] + self.cache_result[k] = self.item_data[flt_idx] if data_type == 'event': - flt_idx = np.in1d(self.event_data[:, EventAttr['ent_id']], subject) - return self.event_data[flt_idx] + flt_idx = [row for sbj in subject for row in self.event_index.get(sbj,[])] + self.cache_result[k] = self.event_data[flt_idx] + if data_type in ['entity', 'item', 'event']: + return self.cache_result[k] + raise ValueError("data_type must be in entity, item, event") def get_subject_view(self, subject: Group): - return GroupView(self, subject) + new_group_view = GroupView(self, subject) + self._group_view.append(new_group_view) + return new_group_view + + def clear_cache(self): + # clear the cache, so that this object can be garbage collected + self.entity_or_none.cache_clear() # pylint: disable=no-member + self.cache_result.clear() + self.alive_agents.clear() + while self._group_view: + weakref.ref(self._group_view.pop()) # clear the cache # Wrapper around an iterable datastore +class CachedProperty: + def __init__(self, func): + self.func = func + # Allows the instance keys to be garbage collected + # when they are no longer referenced elsewhere + self.cache = weakref.WeakKeyDictionary() + + def __get__(self, instance, owner): + if instance is None: + return self + if instance not in self.cache: + self.cache[instance] = self.func(instance) + return self.cache[instance] + class ArrayView(ABC): def __init__(self, mapping, @@ -74,7 +113,9 @@ def __init__(self, self._name = name self._gs = gs self._subject = subject + self._hash = hash(subject) ^ hash(name) self._arr = arr + self._cache = self._gs.cache_result def __len__(self): return len(self._arr) @@ -84,11 +125,11 @@ def get_attribute(self, attr) -> np.ndarray: raise NotImplementedError def __getattr__(self, attr) -> np.ndarray: - k = (self._subject, self._name+'_'+attr) - if k in self._gs.cache_result: - return self._gs.cache_result[k] + k = (self._hash, attr) + if k in self._cache: + return self._cache[k] v = object.__getattribute__(self, 'get_attribute')(attr) - self._gs.cache_result[k] = v + self._cache[k] = v return v class ItemView(ArrayView): @@ -142,7 +183,10 @@ def __init__(self, gs: GameState, subject: Group): valid_agents = filter(lambda eid: eid in gs.env_obs,subject.agents) self._obs = [gs.env_obs[ent_id] for ent_id in valid_agents] self._subject = subject - self.tile = TileView(gs, subject, [o.tiles for o in self._obs]) + + @CachedProperty + def tile(self): + return TileView(self._gs, self._subject, [o.tiles for o in self._obs]) def __getattr__(self, attr): return [getattr(o, attr) for o in self._obs] @@ -151,35 +195,58 @@ class GroupView: def __init__(self, gs: GameState, subject: Group): self._gs = gs self._subject = subject - self._sbj_ent = gs.where_in_id('entity', subject.agents) - self._sbj_item = gs.where_in_id('item', subject.agents) - self._sbj_event = gs.where_in_id('event', subject.agents) + self._subject_hash = hash(subject) - self.entity = EntityView(gs, subject, self._sbj_ent) - self.item = ItemView(gs, subject, self._sbj_item) - self.event = EventView(gs, subject, self._sbj_event) - self.obs = GroupObsView(gs, subject) + @CachedProperty + def obs(self): + return GroupObsView(self._gs, self._subject) + + @CachedProperty + def _sbj_ent(self): + return self._gs.where_in_id('entity', self._subject.agents) + + @CachedProperty + def entity(self): + return EntityView(self._gs, self._subject, self._sbj_ent) + + @CachedProperty + def _sbj_item(self): + return self._gs.where_in_id('item', self._subject.agents) + + @CachedProperty + def item(self): + return ItemView(self._gs, self._subject, self._sbj_item) + + @CachedProperty + def _sbj_event(self): + return self._gs.where_in_id('event', self._subject.agents) + + @CachedProperty + def event(self): + return EventView(self._gs, self._subject, self._sbj_event) def __getattribute__(self, attr): - if attr in ['_gs','_subject','_sbj_ent','_sbj_item','entity','item','event','obs']: - return object.__getattribute__(self,attr) + if attr in {'_gs','_subject','_sbj_ent','_sbj_item', + 'entity','item','event','obs', '_subject_hash'}: + return object.__getattribute__(self, attr) # Cached optimization - k = (self._subject, attr) - if k in self._gs.cache_result: - return self._gs.cache_result[k] + k = (self._subject_hash, attr) + cache = self._gs.cache_result + if k in cache: + return cache[k] try: # Get property - if attr in EntityAttr.keys(): + if attr in EntityAttrKeys: v = getattr(self.entity, attr) else: v = object.__getattribute__(self, attr) - self._gs.cache_result[k] = v + cache[k] = v return v except AttributeError: # View behavior - return object.__getattribute__(self._gs,attr) + return object.__getattribute__(self._gs, attr) class GameStateGenerator: def __init__(self, realm: Realm, config: Config): @@ -191,16 +258,31 @@ def __init__(self, realm: Realm, config: Config): def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: # copy the datastore, by running astype - entity_all = EntityState.Query.table(realm.datastore).astype(np.int16) - + entity_all = EntityState.Query.table(realm.datastore).copy() + alive_agents = entity_all[:, EntityAttr["id"]] + alive_agents = set(alive_agents[alive_agents > 0]) + item_data = ItemState.Query.table(realm.datastore).copy() + event_data = EventState.Query.table(realm.datastore).copy() return GameState( current_tick = realm.tick, config = self.config, spawn_pos = self.spawn_pos, - alive_agents = list(entity_all[:, EntityAttr["id"]]), + alive_agents = alive_agents, env_obs = env_obs, entity_data = entity_all, - item_data = ItemState.Query.table(realm.datastore).astype(np.int16), - event_data = EventState.Query.table(realm.datastore).astype(np.int16), + entity_index = precompute_index(entity_all, EntityAttr["id"]), + item_data = item_data, + item_index = precompute_index(item_data, ItemAttr["owner_id"]), + event_data = event_data, + event_index = precompute_index(event_data, EventAttr['ent_id']), cache_result = {} ) + +def precompute_index(table, id_col): + index = defaultdict() + for row, id_ in enumerate(table[:,id_col]): + if id_ in index: + index[id_].append(row) + else: + index[id_] = [row] + return index diff --git a/nmmo/task/group.py b/nmmo/task/group.py index 442778c18..4d319f081 100644 --- a/nmmo/task/group.py +++ b/nmmo/task/group.py @@ -2,6 +2,7 @@ from typing import Dict, Union, Iterable, TYPE_CHECKING from collections import OrderedDict from collections.abc import Set, Sequence +import weakref if TYPE_CHECKING: from nmmo.task.game_state import GameState, GroupView @@ -25,6 +26,8 @@ def __init__(self, self._sd: GroupView = None self._gs: GameState = None + self._hash = hash(self._agents) + @property def agents(self): return self._agents @@ -42,7 +45,7 @@ def __len__(self): return len(self._agents) def __hash__(self): - return hash(self._agents) + return self._hash def __getitem__(self, key): if len(self) == 1 and key == 0: @@ -73,7 +76,16 @@ def description(self) -> Dict: "agents": self._agents } + def clear_prev_state(self) -> None: + if self._gs is not None: + self._gs.clear_cache() # prevent memory leak + self._gs = None + if self._sd is not None: + weakref.ref(self._sd) # prevent memory leak + self._sd = None + def update(self, gs: GameState) -> None: + self.clear_prev_state() self._gs = gs self._sd = gs.get_subject_view(self) diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index e71f2cc19..6381f2be3 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Callable, List, Optional, Tuple, Union, Iterable, TYPE_CHECKING +from typing import Callable, List, Optional, Tuple, Union, Iterable, Type, TYPE_CHECKING from types import FunctionType from abc import ABC, abstractmethod import inspect @@ -53,12 +53,12 @@ def __call__(self, gs: GameState) -> float: for group in self._groups: group.update(gs) # Calculate score - # cache = gs.cache_result - if self.name in gs.cache_result: - progress = gs.cache_result[self.name] + cache = gs.cache_result + if self.name in cache: + progress = cache[self.name] else: progress = max(min(self._evaluate(gs)*1.0,1.0),0.0) - gs.cache_result[self.name] = progress + cache[self.name] = progress return progress def _reset(self, config: Config): @@ -66,6 +66,11 @@ def _reset(self, config: Config): if not self.check(self._config): raise InvalidConstraint() + def close(self): + # To prevent memory leak, clear all refs to old game state + for group in self._groups: + group.clear_prev_state() + def check(self, config: Config): """ Checks whether the predicate is valid @@ -124,13 +129,34 @@ def _make_name(self, class_name, args, kwargs) -> str: def __str__(self): return self.name + @abstractmethod + def get_source_code(self) -> str: + """ Returns the actual source code how the game state/progress evaluation is done. + """ + raise NotImplementedError + + @abstractmethod + def get_signature(self) -> List: + """ Returns the signature of the game state/progress evaluation function. + """ + raise NotImplementedError + + @property + def args(self): + return self._args + + @property + def kwargs(self): + return self._kwargs + @property def subject(self): return self._subject - def create_task(self, task_cls: Task=None, + def create_task(self, + task_cls: Optional[Type[Task]]=None, assignee: Union[Iterable[int], int]=None, - reward_multiplier=1.0) -> Task: + **kwargs) -> Task: """ Creates a task from this predicate""" if task_cls is None: from nmmo.task.task_api import Task @@ -140,7 +166,7 @@ def create_task(self, task_cls: Task=None, # the new task is assigned to this predicate's subject assignee = self._subject.agents - return task_cls(eval_fn=self, assignee=assignee, reward_multiplier=reward_multiplier) + return task_cls(eval_fn=self, assignee=assignee, **kwargs) def __and__(self, other): return AND(self, other) @@ -171,7 +197,7 @@ def arg_to_string(arg): ################################################ -def make_predicate(fn: Callable) -> type[Predicate]: +def make_predicate(fn: Callable) -> Type[Predicate]: """ Syntactic sugar API for defining predicates from function """ signature = inspect.signature(fn) @@ -205,12 +231,11 @@ def __init__(self, *args, **kwargs) -> None: self._kwargs = kwargs self.name = self._make_name(fn.__name__, args, kwargs) def _evaluate(self, gs: GameState) -> float: - # pylint: disable=redefined-builtin, unused-variable - __doc = fn.__doc__ - result = fn(gs, *self._args, **self._kwargs) - if isinstance(result, Predicate): - return result(gs) - return result + return fn(gs, *self._args, **self._kwargs) + def get_source_code(self): + return inspect.getsource(fn).strip() + def get_signature(self) -> List: + return list(self._signature.parameters) return FunctionPredicate @@ -239,12 +264,44 @@ def check(self, config: Config) -> bool: return all((p.check(config) if isinstance(p, Predicate) else True for p in self._predicates)) - def sample(self, config: Config, cls: type[PredicateOperator], **kwargs): + def sample(self, config: Config, cls: Type[PredicateOperator], **kwargs): subject = self._subject_argument if 'subject' not in kwargs else kwargs['subject'] predicates = [p.sample(config, **kwargs) if isinstance(p, Predicate) else p(None) for p in self._predicates] return cls(*predicates, subject=subject) + def get_source_code(self) -> str: + # NOTE: get_source_code() of the combined predicates returns the joined str + # of each predicate's source code, which may NOT represent what the actual + # predicate is doing + # TODO: try to generate "the source code" that matches + # what the actual instantiated predicate returns, + # which perhaps should reflect the actual agent ids, etc... + src_list = [] + for pred in self._predicates: + if isinstance(pred, Predicate): + src_list.append(pred.get_source_code()) + return '\n\n'.join(src_list).strip() + + def get_signature(self): + # TODO: try to generate the correct signature + return [] + + @property + def args(self): + # TODO: try to generate the correct args + return [] + + @property + def kwargs(self): + # NOTE: This is incorrect implementation. kwargs of the combined predicates returns + # all summed kwargs dict, which can OVERWRITE the values of duplicated keys + # TODO: try to match the eval function and kwargs, which can be correctly used downstream + # for pred in self._predicates: + # if isinstance(pred, Predicate): + # kwargs.update(pred.kwargs) + return {} + class OR(PredicateOperator, Predicate): def __init__(self, *predicates: Predicate, subject: Group=None): super().__init__(lambda n: n>0, *predicates, subject=subject) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 8bc5d587a..be293c6a6 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -1,12 +1,14 @@ -# pylint: disable=unused-import -from typing import Callable, Iterable, Dict, List, Union, Tuple +# pylint: disable=unused-import,attribute-defined-outside-init +from typing import Callable, Iterable, Dict, List, Union, Tuple, Type from types import FunctionType from abc import ABC +import inspect +import numpy as np from nmmo.task.group import Group +from nmmo.task.game_state import GameState from nmmo.task.predicate_api import Predicate, make_predicate, arg_to_string from nmmo.task import base_predicates as bp -from nmmo.lib.team_helper import TeamHelper class Task(ABC): """ A task is used to calculate rewards for agents in assignee @@ -15,23 +17,36 @@ class Task(ABC): def __init__(self, eval_fn: Callable, assignee: Union[Iterable[int], int], - reward_multiplier = 1.0): + reward_multiplier = 1.0, + embedding = None, + spec_name: str = None): if isinstance(assignee, int): self._assignee = (assignee,) else: assert len(assignee) > 0, "Assignee cannot be empty" self._assignee = tuple(set(assignee)) # dedup self._eval_fn = eval_fn - self._progress = 0.0 - self._completed = False self._reward_multiplier = reward_multiplier - + self._embedding = None if embedding is None else np.array(embedding, dtype=np.float16) + self.spec_name = spec_name # None if not created using TaskSpec self.name = self._make_name(self.__class__.__name__, eval_fn=eval_fn, assignee=self._assignee) + self.reset() def reset(self): + self._stop_eval = False + self._last_eval_tick = None self._progress = 0.0 - self._completed = False + self._completed_tick = None + self._max_progress = 0.0 + self._positive_reward_count = 0 + self._negative_reward_count = 0 + + def close(self): + if self._stop_eval is False: + if isinstance(self._eval_fn, Predicate): + self._eval_fn.close() + self._stop_eval = True @property def assignee(self) -> Tuple[int]: @@ -39,39 +54,56 @@ def assignee(self) -> Tuple[int]: @property def completed(self) -> bool: - return self._completed + return self._completed_tick is not None @property def reward_multiplier(self) -> float: return self._reward_multiplier - def _map_progress_to_reward(self, gs) -> float: + @property + def reward_signal_count(self) -> int: + return self._positive_reward_count + self._negative_reward_count + + @property + def embedding(self): + return self._embedding + + def set_embedding(self, embedding): + self._embedding = embedding + + def _map_progress_to_reward(self, gs: GameState) -> float: """ The default reward is the diff between the old and new progress. Once the task is completed, no more reward is provided. Override this function to create a custom reward function """ - if self._completed: + if self.completed: return 0.0 new_progress = max(min(self._eval_fn(gs)*1.0,1.0),0.0) diff = new_progress - self._progress self._progress = new_progress if self._progress >= 1: - self._completed = True + self._completed_tick = gs.current_tick + diff = 1.0 # give out the max reward when task is completed return diff - def compute_rewards(self, gs) -> Tuple[Dict[int, float], Dict[int, Dict]]: + def compute_rewards(self, gs: GameState) -> Tuple[Dict[int, float], Dict[int, Dict]]: """ Environment facing API Returns rewards and infos for all agents in subject """ reward = self._map_progress_to_reward(gs) * self._reward_multiplier + self._last_eval_tick = gs.current_tick + self._max_progress = max(self._max_progress, self._progress) + self._positive_reward_count += int(reward > 0) + self._negative_reward_count += int(reward < 0) rewards = {int(ent_id): reward for ent_id in self._assignee} - infos = {int(ent_id): {'reward': reward, - 'progress': self._progress, - 'completed': self._completed} + infos = {int(ent_id): {"task_spec": self.spec_name, + "reward": reward, + "progress": self._progress, + "completed": self.completed} for ent_id in self._assignee} # NOTE: tasks do not know whether assignee agents are alive or dead @@ -81,116 +113,137 @@ def compute_rewards(self, gs) -> Tuple[Dict[int, float], Dict[int, Dict]]: def _make_name(self, class_name, **kwargs) -> str: name = [class_name] + \ [f"{arg_to_string(key)}:{arg_to_string(arg)}" for key, arg in kwargs.items()] - name = "("+'_'.join(name).replace(' ', '')+")" + name = "("+"_".join(name).replace(" ", "")+")" return name def __str__(self): return self.name + @property + def subject(self): + if isinstance(self._eval_fn, Predicate): + return self._eval_fn.subject.agents + return self.assignee + + def get_source_code(self): + if isinstance(self._eval_fn, Predicate): + return self._eval_fn.get_source_code() + return inspect.getsource(self._eval_fn).strip() + + def get_signature(self): + if isinstance(self._eval_fn, Predicate): + return self._eval_fn.get_signature() + signature = inspect.signature(self._eval_fn) + return list(signature.parameters) + + @property + def args(self): + if isinstance(self._eval_fn, Predicate): + return self._eval_fn.args + # the function _eval_fn must only take gs + return [] + + @property + def kwargs(self): + if isinstance(self._eval_fn, Predicate): + return self._eval_fn.kwargs + # the function _eval_fn must only take gs + return {} + + @property + def progress_info(self): + return { + "task_spec_name": self.spec_name, + "last_eval_tick": self._last_eval_tick, + "completed": self.completed, + "completed_tick": self._completed_tick, + "max_progress": self._max_progress, + "positive_reward_count": self._positive_reward_count, + "negative_reward_count": self._negative_reward_count, + "reward_signal_count": self.reward_signal_count, + } + class OngoingTask(Task): - def _map_progress_to_reward(self, gs) -> float: + def _map_progress_to_reward(self, gs: GameState) -> float: """Keep returning the progress reward after the task is completed. However, this task tracks the completion status in the same manner. """ self._progress = max(min(self._eval_fn(gs)*1.0,1.0),0.0) - if self._progress >= 1: - self._completed = True + if self._progress >= 1 and self._completed_tick is None: + self._completed_tick = gs.current_tick return self._progress +class HoldDurationTask(Task): + def __init__(self, + eval_fn: Callable, + assignee: Union[Iterable[int], int], + hold_duration: int, + **kwargs): + super().__init__(eval_fn, assignee, **kwargs) + self._hold_duration = hold_duration + self._reset_timer() + + def _reset_timer(self): + self._timer = 0 + self._last_success_tick = 0 + + def reset(self): + super().reset() + self._reset_timer() + + def _map_progress_to_reward(self, gs: GameState) -> float: + # pylint: disable=attribute-defined-outside-init + if self.completed: + return 0.0 + + curr_eval = max(min(self._eval_fn(gs)*1.0,1.0),0.0) + if curr_eval < 1: + self._reset_timer() + else: + self._timer += 1 + self._last_success_tick = gs.current_tick + + new_progress = self._timer / self._hold_duration + diff = new_progress - self._progress + self._progress = new_progress + if self._progress >= 1 and self._completed_tick is None: + self._completed_tick = gs.current_tick + diff = 1.0 # give out the max reward when task is completed + + return diff ###################################################################### # The same task is assigned each agent in agent_list individually # with the agent as the predicate subject and task assignee -def make_same_task(predicate: Union[Predicate, Callable], +def make_same_task(pred_cls: Union[Type[Predicate], Callable], agent_list: Iterable[int], - task_cls = Task, **kwargs) -> List[Task]: + pred_kwargs=None, + task_cls: Type[Task]=Task, + task_kwargs=None) -> List[Task]: # if a function is provided, make it a predicate class - if isinstance(predicate, FunctionType): - predicate = make_predicate(predicate) - - return [predicate(Group(agent_id),**kwargs).create_task(task_cls=task_cls) - for agent_id in agent_list] + if isinstance(pred_cls, FunctionType): + pred_cls = make_predicate(pred_cls) + if pred_kwargs is None: + pred_kwargs = {} + if task_kwargs is None: + task_kwargs = {} + + task_list = [] + for agent_id in agent_list: + predicate = pred_cls(Group(agent_id), **pred_kwargs) + task_list.append(predicate.create_task(task_cls=task_cls, **task_kwargs)) + return task_list def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: # (almost) no overhead in env._compute_rewards() - if test_mode == 'no_task': + if test_mode == "no_task": return [] # eval function on Predicate class, but does not use Group during eval - if test_mode == 'dummy_eval_fn': + if test_mode == "dummy_eval_fn": # pylint: disable=unused-argument return make_same_task(lambda gs, subject: True, agent_list, task_cls=OngoingTask) # the default is to use the predicate class return make_same_task(bp.StayAlive, agent_list, task_cls=OngoingTask) - -###################################################################### -# TODO: a lot to improve below - -REWARD_TO = ['agent', 'team'] -VALID_TARGET = ['left_team', 'left_team_leader', - 'right_team', 'right_team_leader', - 'my_team_leader'] - -def make_team_tasks(teams, task_spec) -> List[Task]: - """ - task_spec: a list of tuples (reward_to, eval_fn, **kwargs) - - each tuple is assigned to the teams - """ - tasks = [] - team_list = list(teams.keys()) - team_helper = TeamHelper(teams) - for idx in range(min(len(team_list), len(task_spec))): - team_id = team_list[idx] - reward_to, pred_fn, kwargs = task_spec[team_id] - - assert reward_to in REWARD_TO, 'Wrong reward target' - - if 'task_cls' in kwargs: - task_cls = kwargs.pop('task_cls') - else: - task_cls = Task - - # reserve 'target' for relative agent mapping - if 'target' in kwargs: - target = kwargs.pop('target') - assert target in VALID_TARGET, 'Invalid target' - # translate target to specific agent ids using team_helper - target = team_helper.get_target_agent(team_id, target) - kwargs['target'] = target - - # handle some special cases and instantiate the predicate first - predicate = None - if isinstance(pred_fn, FunctionType): - # if a function is provided as a predicate - pred_cls = make_predicate(pred_fn) - - # TODO: should create a test for these - if pred_fn in [bp.AllDead]: - kwargs.pop('target') # remove target - predicate = pred_cls(Group(target), **kwargs) - if pred_fn in [bp.StayAlive] and 'target' in kwargs: - kwargs.pop('target') # remove target - predicate = pred_cls(Group(target), **kwargs) - - # create the task - if reward_to == 'team': - assignee = team_helper.teams[team_id] - if predicate is None: - tasks.append(pred_cls(Group(assignee), **kwargs).create_task(task_cls=task_cls)) - else: - # this branch is for the cases like AllDead, StayAlive - tasks.append(predicate.create_task(assignee=assignee, task_cls=task_cls)) - - elif reward_to == 'agent': - agent_list = team_helper.teams[team_id] - if predicate is None: - tasks += make_same_task(pred_cls, agent_list, task_cls=task_cls, **kwargs) - else: - # this branch is for the cases like AllDead, StayAlive - tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls) - for agent_id in agent_list] - - return tasks diff --git a/nmmo/task/task_spec.py b/nmmo/task/task_spec.py new file mode 100644 index 000000000..0c33267fd --- /dev/null +++ b/nmmo/task/task_spec.py @@ -0,0 +1,165 @@ +import functools +from dataclasses import dataclass, field +from typing import Iterable, Dict, List, Union, Type +from types import FunctionType +from copy import deepcopy + +import numpy as np + +import nmmo +from nmmo.task.task_api import Task, make_same_task +from nmmo.task.predicate_api import Predicate, make_predicate +from nmmo.task.group import Group +from nmmo.task import base_predicates as bp +from nmmo.lib.team_helper import TeamHelper + +""" task_spec + + eval_fn can come from the base_predicates.py or could be custom functions like above + eval_fn_kwargs are the additional args that go into predicate. There are also special keys + * "target" must be ["left_team", "right_team", "left_team_leader", "right_team_leader"] + these str will be translated into the actual agent ids + + task_cls specifies the task class to be used. Default is Task. + task_kwargs are the optional, additional args that go into the task. + + reward_to: must be in ["team", "agent"] + * "team" create a single team task, in which all team members get rewarded + * "agent" create a task for each agent, in which only the agent gets rewarded + + sampling_weight specifies the weight of the task in the curriculum sampling. Default is 1 +""" + +REWARD_TO = ["agent", "team"] +VALID_TARGET = ["left_team", "left_team_leader", + "right_team", "right_team_leader", + "my_team_leader", "all_foes"] + +@dataclass +class TaskSpec: + eval_fn: FunctionType + eval_fn_kwargs: Dict + task_cls: Type[Task] = Task + task_kwargs: Dict = field(default_factory=dict) + reward_to: str = "agent" + sampling_weight: float = 1.0 + embedding: np.ndarray = None + predicate: Predicate = None + + def __post_init__(self): + if self.predicate is None: + assert isinstance(self.eval_fn, FunctionType), \ + "eval_fn must be a function" + else: + assert self.eval_fn is None, "Cannot specify both eval_fn and predicate" + assert self.reward_to in REWARD_TO, \ + f"reward_to must be in {REWARD_TO}" + if "target" in self.eval_fn_kwargs: + assert self.eval_fn_kwargs["target"] in VALID_TARGET, \ + f"target must be in {VALID_TARGET}" + + @functools.cached_property + def name(self): + # pylint: disable=no-member + kwargs_str = [] + for key, val in self.eval_fn_kwargs.items(): + val_str = str(val) + if isinstance(val, type): + val_str = val.__name__ + kwargs_str.append(f"{key}:{val_str}_") + kwargs_str = "(" + "".join(kwargs_str)[:-1] + ")" # remove the last _ + pred_name = self.eval_fn.__name__ if self.predicate is None else self.predicate.name + return "_".join([self.task_cls.__name__, pred_name, + kwargs_str, "reward_to:" + self.reward_to]) + +def make_task_from_spec(assign_to: Union[Iterable[int], Dict], + task_spec: List[TaskSpec]) -> List[Task]: + """ + Args: + assign_to: either a Dict with { team_id: [agent_id]} or a List of agent ids + task_spec: a list of tuples (reward_to, eval_fn, pred_fn_kwargs, task_kwargs) + + each tuple is assigned to the teams + """ + teams = assign_to + if not isinstance(teams, Dict): # convert agent id list to the team dict format + teams = {idx: [agent_id] for idx, agent_id in enumerate(assign_to)} + team_list = list(teams.keys()) + team_helper = TeamHelper(teams) + + # assign task spec to teams (assign_to) + tasks = [] + for idx in range(min(len(team_list), len(task_spec))): + team_id = team_list[idx] + + # map local vars to spec attributes + reward_to = task_spec[idx].reward_to + pred_fn = task_spec[idx].eval_fn + pred_fn_kwargs = deepcopy(task_spec[idx].eval_fn_kwargs) + task_cls = task_spec[idx].task_cls + task_kwargs = deepcopy(task_spec[idx].task_kwargs) + task_kwargs["embedding"] = task_spec[idx].embedding # to pass to task_cls + task_kwargs["spec_name"] = task_spec[idx].name + predicate = task_spec[idx].predicate + + # reserve "target" for relative agent mapping + if "target" in pred_fn_kwargs: + target = pred_fn_kwargs.pop("target") + assert target in VALID_TARGET, "Invalid target" + # translate target to specific agent ids using team_helper + target = team_helper.get_target_agent(team_id, target) + pred_fn_kwargs["target"] = target + + # handle some special cases and instantiate the predicate first + if pred_fn is not None and isinstance(pred_fn, FunctionType): + # if a function is provided as a predicate + pred_cls = make_predicate(pred_fn) + + # TODO: should create a test for these + if (pred_fn in [bp.AllDead]) or \ + (pred_fn in [bp.StayAlive] and "target" in pred_fn_kwargs): + # use the target as the predicate subject + pred_fn_kwargs.pop("target") # remove target + predicate = pred_cls(Group(target), **pred_fn_kwargs) + + # create the task + if reward_to == "team": + assignee = team_helper.teams[team_id] + if predicate is None: + predicate = pred_cls(Group(assignee), **pred_fn_kwargs) + tasks.append(predicate.create_task(task_cls=task_cls, **task_kwargs)) + else: + # this branch is for the cases like AllDead, StayAlive + tasks.append(predicate.create_task(assignee=assignee, task_cls=task_cls, + **task_kwargs)) + + elif reward_to == "agent": + agent_list = team_helper.teams[team_id] + if predicate is None: + tasks += make_same_task(pred_cls, agent_list, pred_kwargs=pred_fn_kwargs, + task_cls=task_cls, task_kwargs=task_kwargs) + else: + # this branch is for the cases like AllDead, StayAlive + tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls, **task_kwargs) + for agent_id in agent_list] + + return tasks + +# pylint: disable=bare-except,cell-var-from-loop +def check_task_spec(spec_list: List[TaskSpec]) -> List[Dict]: + teams = {0: [1, 2, 3], 3: [4, 5], 7: [6, 7], 11: [8, 9], 14: [10, 11]} + config = nmmo.config.Default() + env = nmmo.Env(config) + results = [] + for single_spec in spec_list: + result = {"spec_name": single_spec.name} + try: + env.reset(make_task_fn=lambda: make_task_from_spec(teams, [single_spec])) + for _ in range(3): + env.step({}) + result["runnable"] = True + except: + result["runnable"] = False + + results.append(result) + return results diff --git a/scripted/attack.py b/scripted/attack.py index 0f2c916c0..9b62089bd 100644 --- a/scripted/attack.py +++ b/scripted/attack.py @@ -1,31 +1,28 @@ -# pylint: disable=all - +# pylint: disable=invalid-name, unused-argument import numpy as np import nmmo from nmmo.core.observation import Observation from nmmo.entity.entity import EntityState +from nmmo.lib import utils -from scripted import utils def closestTarget(config, ob: Observation): shortestDist = np.inf closestAgent = None agent = ob.agent() - start = (agent.row, agent.col) - for target in ob.entities.values: - target = EntityState.parse_array(target) - if target.id == agent.id: + for target_ent in ob.entities.values: + target_ent = EntityState.parse_array(target_ent) + if target_ent.id == agent.id: continue - dist = utils.l1(start, (target.row, target.col)) - + dist = utils.linf_single(start, (target_ent.row, target_ent.col)) if dist < shortestDist and dist != 0: shortestDist = dist - closestAgent = target + closestAgent = target_ent if closestAgent is None: return None, None @@ -36,18 +33,17 @@ def attacker(config, ob: Observation): agent = ob.agent() attacker_id = agent.attacker_id - if attacker_id == 0: return None, None - target = ob.entity(attacker_id) - if target == None: + target_ent = ob.entity(attacker_id) + if target_ent is None: return None, None - - return target, utils.l1((agent.row, agent.col), (target.row, target.col)) + + return target_ent,\ + utils.linf_single((agent.row, agent.col), (target_ent.row, target_ent.col)) def target(config, actions, style, targetID): actions[nmmo.action.Attack] = { nmmo.action.Style: style, nmmo.action.Target: targetID} - diff --git a/scripted/baselines.py b/scripted/baselines.py index 625191da9..860c105e0 100644 --- a/scripted/baselines.py +++ b/scripted/baselines.py @@ -1,9 +1,6 @@ -# pylint: disable=all - +# pylint: disable=invalid-name, attribute-defined-outside-init, no-member from typing import Dict - from collections import defaultdict -import random import nmmo from nmmo import material @@ -15,8 +12,9 @@ from scripted import attack, move -class Scripted(nmmo.Agent): - '''Template class for scripted models. + +class Scripted(nmmo.Scripted): + '''Template class for baseline scripted models. You may either subclass directly or mirror the __call__ function''' scripted = True @@ -30,8 +28,8 @@ def __init__(self, config, idx): self.health_max = config.PLAYER_BASE_HEALTH if config.RESOURCE_SYSTEM_ENABLED: - self.food_max = config.RESOURCE_BASE - self.water_max = config.RESOURCE_BASE + self.food_max = config.RESOURCE_BASE + self.water_max = config.RESOURCE_BASE self.spawnR = None self.spawnC = None @@ -48,15 +46,17 @@ def forage_criterion(self) -> bool: def forage(self): '''Min/max food and water using Dijkstra's algorithm''' - move.forageDijkstra(self.config, self.ob, self.actions, self.food_max, self.water_max) + # TODO: do not access realm._np_random directly. ALSO see below for all other uses + move.forageDijkstra(self.config, self.ob, self.actions, + self.food_max, self.water_max, self._np_random) def gather(self, resource): '''BFS search for a particular resource''' - return move.gatherBFS(self.config, self.ob, self.actions, resource) + return move.gatherBFS(self.config, self.ob, self.actions, resource, self._np_random) def explore(self): '''Route away from spawn''' - move.explore(self.config, self.ob, self.actions, self.me.row, self.me.col) + move.explore(self.config, self.ob, self.actions, self.me.row, self.me.col, self._np_random) @property def downtime(self): @@ -65,7 +65,7 @@ def downtime(self): def evade(self): '''Target and path away from an attacker''' - move.evade(self.config, self.ob, self.actions, self.attacker) + move.evade(self.config, self.ob, self.actions, self.attacker, self._np_random) self.target = self.attacker self.targetID = self.attackerID self.targetDist = self.attackerDist @@ -74,10 +74,10 @@ def attack(self): '''Attack the current target''' if self.target is not None: assert self.targetID is not None - style = random.choice(self.style) + style = self._np_random.choice(self.style) attack.target(self.config, self.actions, style, self.targetID) - def target_weak(self): + def target_weak(self): # pylint: disable=inconsistent-return-statements '''Target the nearest agent if it is weak''' if self.closest is None: return False @@ -99,11 +99,11 @@ def scan_agents(self): self.closestID = None if self.closest is not None: - self.closestID = self.closest.id + self.closestID = self.ob.entities.index(self.closest.id) self.attackerID = None if self.attacker is not None: - self.attackerID = self.attacker.id + self.attackerID = self.ob.entities.index(self.attacker.id) self.target = None self.targetID = None @@ -226,14 +226,16 @@ def equip(self, items: set): # InventoryItem needs where the item is (index) in the inventory self.actions[action.Use] = { - action.InventoryItem: self.ob.inventory.index(itm.id)} # list(self.ob.inventory.ids).index(itm.id) + action.InventoryItem: self.ob.inventory.index(itm.id)} return True def consume(self): - if self.me.health <= self.health_max // 2 and item_system.Potion.ITEM_TYPE_ID in self.best_items: + if self.me.health <= self.health_max // 2 \ + and item_system.Potion.ITEM_TYPE_ID in self.best_items: itm = self.best_items[item_system.Potion.ITEM_TYPE_ID] - elif (self.me.food == 0 or self.me.water == 0) and item_system.Ration.ITEM_TYPE_ID in self.best_items: + elif (self.me.food == 0 or self.me.water == 0) \ + and item_system.Ration.ITEM_TYPE_ID in self.best_items: itm = self.best_items[item_system.Ration.ITEM_TYPE_ID] else: return @@ -243,7 +245,7 @@ def consume(self): # InventoryItem needs where the item is (index) in the inventory self.actions[action.Use] = { - action.InventoryItem: self.ob.inventory.index(itm.id)} # list(self.ob.inventory.ids).index(itm.id) + action.InventoryItem: self.ob.inventory.index(itm.id)} def sell(self, keep_k: dict, keep_best: set): for itm in self.inventory.values(): @@ -266,8 +268,8 @@ def sell(self, keep_k: dict, keep_best: set): continue self.actions[action.Sell] = { - action.InventoryItem: self.ob.inventory.index(itm.id), # list(self.ob.inventory.ids).index(itm.id) - action.Price: action.Price.edges[price-1] } # Price starts from 1 + action.InventoryItem: self.ob.inventory.index(itm.id), + action.Price: action.Price.index(price) } return itm @@ -277,7 +279,7 @@ def buy(self, buy_k: dict, buy_upgrade: set): purchase = None best = list(self.best_heuristic.items()) - random.shuffle(best) + self._np_random.shuffle(best) for type_id, itm in best: # Buy top k if type_id in buy_k: @@ -293,7 +295,7 @@ def buy(self, buy_k: dict, buy_upgrade: set): # Buy best heuristic upgrade if purchase: self.actions[action.Buy] = { - action.MarketItem: self.ob.market.index(purchase.id)} #list(self.ob.market.ids).index(purchase.id)} + action.MarketItem: self.ob.market.index(purchase.id)} return def exchange(self): @@ -311,6 +313,7 @@ def use(self): def __call__(self, observation: Observation): '''Process observations and return actions''' + assert self._np_random is not None, "Agent's RNG must be set." self.actions = {} self.ob = observation @@ -358,7 +361,7 @@ class Random(Scripted): def __call__(self, obs): super().__call__(obs) - move.rand(self.config, self.ob, self.actions) + move.rand(self.config, self.ob, self.actions, self._np_random) return self.actions class Meander(Scripted): @@ -366,7 +369,7 @@ class Meander(Scripted): def __call__(self, obs): super().__call__(obs) - move.meander(self.config, self.ob, self.actions) + move.meander(self.config, self.ob, self.actions, self._np_random) return self.actions class Explore(Scripted): @@ -384,9 +387,9 @@ def __call__(self, obs): super().__call__(obs) if self.forage_criterion: - self.forage() + self.forage() else: - self.explore() + self.explore() return self.actions @@ -461,42 +464,42 @@ def __call__(self, obs): class Fisher(Gather): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.PROFESSION_SYSTEM_ENABLED: self.resource = [material.Fish] self.tool = item_system.Rod class Herbalist(Gather): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.PROFESSION_SYSTEM_ENABLED: self.resource = [material.Herb] self.tool = item_system.Gloves class Prospector(Gather): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.PROFESSION_SYSTEM_ENABLED: self.resource = [material.Ore] self.tool = item_system.Pickaxe class Carver(Gather): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.PROFESSION_SYSTEM_ENABLED: self.resource = [material.Tree] self.tool = item_system.Axe class Alchemist(Gather): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.PROFESSION_SYSTEM_ENABLED: self.resource = [material.Crystal] self.tool = item_system.Chisel class Melee(Combat): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.COMBAT_SYSTEM_ENABLED: self.style = [action.Melee] self.weapon = item_system.Spear self.ammo = item_system.Whetstone @@ -504,7 +507,7 @@ def __init__(self, config, idx): class Range(Combat): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.COMBAT_SYSTEM_ENABLED: self.style = [action.Range] self.weapon = item_system.Bow self.ammo = item_system.Arrow @@ -512,7 +515,7 @@ def __init__(self, config, idx): class Mage(Combat): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.COMBAT_SYSTEM_ENABLED: self.style = [action.Mage] self.weapon = item_system.Wand self.ammo = item_system.Runes diff --git a/scripted/behavior.py b/scripted/behavior.py deleted file mode 100644 index c2d8753c2..000000000 --- a/scripted/behavior.py +++ /dev/null @@ -1,62 +0,0 @@ -# pylint: disable=all - -import nmmo -from nmmo.systems.ai import move, attack, utils - -def update(entity): - '''Update validity of tracked entities''' - if not utils.validTarget(entity, entity.attacker, entity.vision): - entity.attacker = None - if not utils.validTarget(entity, entity.target, entity.vision): - entity.target = None - if not utils.validTarget(entity, entity.closest, entity.vision): - entity.closest = None - - if entity.__class__.__name__ != 'Player': - return - - if not utils.validResource(entity, entity.food, entity.vision): - entity.food = None - if not utils.validResource(entity, entity.water, entity.vision): - entity.water = None - -def pathfind(config, ob, actions, rr, cc): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.pathfind(config, ob, actions, rr, cc)} - -def meander(realm, actions, entity): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.habitable(realm.map.tiles, entity)} - -def evade(realm, actions, entity): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.antipathfind(realm.map.tiles, entity, entity.attacker)} - -def hunt(realm, actions, entity): - #Move args - distance = utils.distance(entity, entity.target) - - direction = None - if distance == 0: - direction = move.random_direction() - elif distance > 1: - direction = move.pathfind(realm.map.tiles, entity, entity.target) - - if direction is not None: - actions[nmmo.action.Move] = {nmmo.action.Direction: direction} - - attack(realm, actions, entity) - -def attack(realm, actions, entity): - distance = utils.distance(entity, entity.target) - if distance > entity.skills.style.attack_range(realm.config): - return - - actions[nmmo.action.Attack] = {nmmo.action.Style: entity.skills.style, - nmmo.action.Target: entity.target} - -def forageDP(realm, actions, entity): - direction = utils.forageDP(realm.map.tiles, entity) - actions[nmmo.action.Move] = {nmmo.action.Direction: move.towards(direction)} - -#def forageDijkstra(realm, actions, entity): -def forageDijkstra(config, ob, actions, food_max, water_max): - direction = utils.forageDijkstra(config, ob, food_max, water_max) - actions[nmmo.action.Move] = {nmmo.action.Direction: move.towards(direction)} diff --git a/scripted/move.py b/scripted/move.py index 893a4cee0..0c80f9d6b 100644 --- a/scripted/move.py +++ b/scripted/move.py @@ -1,320 +1,291 @@ -# pylint: disable=all - -import numpy as np -import random - +# pylint: disable=invalid-name, unused-argument import heapq +import numpy as np from nmmo.core import action from nmmo.core.observation import Observation from nmmo.lib import material +from nmmo.systems.ai import utils -from scripted import utils - -def adjacentPos(pos): - r, c = pos - return [(r - 1, c), (r, c - 1), (r + 1, c), (r, c + 1)] def inSight(dr, dc, vision): - return ( - dr >= -vision and - dc >= -vision and - dr <= vision and - dc <= vision) - -def rand(config, ob, actions): - direction = random.choice(action.Direction.edges) - actions[action.Move] = {action.Direction: direction} - -def towards(direction): - if direction == (-1, 0): - return action.North - elif direction == (1, 0): - return action.South - elif direction == (0, -1): - return action.West - elif direction == (0, 1): - return action.East - else: - return random.choice(action.Direction.edges) - -def pathfind(config, ob, actions, rr, cc): - direction = aStar(config, ob, actions, rr, cc) - direction = towards(direction) - actions[action.Move] = {action.Direction: direction} - -def meander(config, ob, actions): - cands = [] - if ob.tile(-1, 0).material_id in material.Habitable.indices: - cands.append((-1, 0)) - if ob.tile(1, 0).material_id in material.Habitable.indices: - cands.append((1, 0)) - if ob.tile(0, -1).material_id in material.Habitable.indices: - cands.append((0, -1)) - if ob.tile(0, 1).material_id in material.Habitable.indices: - cands.append((0, 1)) - if not cands: - return (-1, 0) - - direction = random.choices(cands)[0] - direction = towards(direction) - actions[action.Move] = {action.Direction: direction} - -def explore(config, ob, actions, r, c): - vision = config.PLAYER_VISION_RADIUS - sz = config.MAP_SIZE - - centR, centC = sz//2, sz//2 - - vR, vC = centR-r, centC-c - - mmag = max(1, abs(vR), abs(vC)) - rr = int(np.round(vision*vR/mmag)) - cc = int(np.round(vision*vC/mmag)) - pathfind(config, ob, actions, rr, cc) - -def evade(config, ob: Observation, actions, attacker): - agent = ob.agent() - - rr, cc = (2*agent.row - attacker.row, 2*agent.col - attacker.col) - - pathfind(config, ob, actions, rr, cc) - -def forageDijkstra(config, ob: Observation, actions, food_max, water_max, cutoff=100): - vision = config.PLAYER_VISION_RADIUS - - agent = ob.agent() - food = agent.food - water = agent.water - - best = -1000 - start = (0, 0) - goal = (0, 0) - - reward = {start: (food, water)} - backtrace = {start: None} - - queue = [start] - - while queue: - cutoff -= 1 - if cutoff <= 0: - break - - cur = queue.pop(0) - for nxt in adjacentPos(cur): - if nxt in backtrace: - continue - - if not inSight(*nxt, vision): - continue - - tile = ob.tile(*nxt) - matl = tile.material_id - - if not matl in material.Habitable.indices: - continue - - food, water = reward[cur] - food = max(0, food - 1) - water = max(0, water - 1) - - if matl == material.Foilage.index: - food = min(food+food_max//2, food_max) - for pos in adjacentPos(nxt): - if not inSight(*pos, vision): - continue - - tile = ob.tile(*pos) - matl = tile.material_id - - if matl == material.Water.index: - water = min(water+water_max//2, water_max) - break - - reward[nxt] = (food, water) - - total = min(food, water) - if total > best or ( - total == best and max(food, water) > max(reward[goal])): - best = total - goal = nxt - - queue.append(nxt) - backtrace[nxt] = cur + return (-vision <= dr <= vision and + -vision <= dc <= vision) + +def rand(config, ob, actions, np_random): + direction = np_random.choice(action.Direction.edges) + actions[action.Move] = {action.Direction: direction} + +def towards(direction, np_random): + if direction == (-1, 0): + return action.North + if direction == (1, 0): + return action.South + if direction == (0, -1): + return action.West + if direction == (0, 1): + return action.East + + return np_random.choice(action.Direction.edges) + +def pathfind(config, ob, actions, rr, cc, np_random): + direction = aStar(config, ob, actions, rr, cc) + direction = towards(direction, np_random) + actions[action.Move] = {action.Direction: direction} + +def meander(config, ob, actions, np_random): + cands = [] + if ob.tile(-1, 0).material_id in material.Habitable.indices: + cands.append((-1, 0)) + if ob.tile(1, 0).material_id in material.Habitable.indices: + cands.append((1, 0)) + if ob.tile(0, -1).material_id in material.Habitable.indices: + cands.append((0, -1)) + if ob.tile(0, 1).material_id in material.Habitable.indices: + cands.append((0, 1)) + + if len(cands) > 0: + direction = np_random.choices(cands)[0] + direction = towards(direction, np_random) + actions[action.Move] = {action.Direction: direction} - while goal in backtrace and backtrace[goal] != start: - goal = backtrace[goal] - direction = towards(goal) - actions[action.Move] = {action.Direction: direction} +def explore(config, ob, actions, r, c, np_random): + vision = config.PLAYER_VISION_RADIUS + sz = config.MAP_SIZE + centR, centC = sz//2, sz//2 + vR, vC = centR-r, centC-c + mmag = max(1, abs(vR), abs(vC)) + rr = int(np.round(vision*vR/mmag)) + cc = int(np.round(vision*vC/mmag)) + pathfind(config, ob, actions, rr, cc, np_random) + +def evade(config, ob: Observation, actions, attacker, np_random): + agent = ob.agent() + rr, cc = (2*agent.row - attacker.row, 2*agent.col - attacker.col) + pathfind(config, ob, actions, rr, cc, np_random) + +def forageDijkstra(config, ob: Observation, actions, + food_max, water_max, np_random, cutoff=100): + vision = config.PLAYER_VISION_RADIUS + + agent = ob.agent() + food = agent.food + water = agent.water + + best = -1000 + start = (0, 0) + goal = (0, 0) + + reward = {start: (food, water)} + backtrace = {start: None} + + queue = [start] + + while queue: + cutoff -= 1 + if cutoff <= 0: + break + + cur = queue.pop(0) + for nxt in utils.adjacentPos(cur): + if nxt in backtrace: + continue + + if not inSight(*nxt, vision): + continue + + tile = ob.tile(*nxt) + matl = tile.material_id + + if not matl in material.Habitable.indices: + continue + + food, water = reward[cur] + water = max(0, water - 1) + food = max(0, food - 1) + if matl == material.Foilage.index: + food = min(food+food_max//2, food_max) + + for pos in utils.adjacentPos(nxt): + if not inSight(*pos, vision): + continue + + tile = ob.tile(*pos) + matl = tile.material_id + if matl == material.Water.index: + water = min(water+water_max//2, water_max) + break + + reward[nxt] = (food, water) + + total = min(food, water) + if total > best \ + or (total == best and max(food, water) > max(reward[goal])): + best = total + goal = nxt + + queue.append(nxt) + backtrace[nxt] = cur + + while goal in backtrace and backtrace[goal] != start: + goal = backtrace[goal] + direction = towards(goal, np_random) + actions[action.Move] = {action.Direction: direction} def findResource(config, ob: Observation, resource): - vision = config.PLAYER_VISION_RADIUS - - resource_index = resource.index - - for r in range(-vision, vision+1): - for c in range(-vision, vision+1): - tile = ob.tile(r, c) - material_id = tile.material_id - - if material_id == resource_index: - return (r, c) - + vision = config.PLAYER_VISION_RADIUS + resource_index = resource.index + for r in range(-vision, vision+1): + for c in range(-vision, vision+1): + tile = ob.tile(r, c) + material_id = tile.material_id + if material_id == resource_index: + return (r, c) + return False + +def gatherAStar(config, ob, actions, resource, np_random, cutoff=100): + resource_pos = findResource(config, ob, resource) + if not resource_pos: return False -def gatherAStar(config, ob, actions, resource, cutoff=100): - resource_pos = findResource(config, ob, resource) - if not resource_pos: - return - - rr, cc = resource_pos - next_pos = aStar(config, ob, actions, rr, cc, cutoff=cutoff) - if not next_pos or next_pos == (0, 0): - return - - direction = towards(next_pos) - actions[action.Move] = {action.Direction: direction} - return True - -def gatherBFS(config, ob: Observation, actions, resource, cutoff=100): - vision = config.PLAYER_VISION_RADIUS + rr, cc = resource_pos + next_pos = aStar(config, ob, actions, rr, cc, cutoff=cutoff) + if not next_pos or next_pos == (0, 0): + return False - start = (0, 0) + direction = towards(next_pos, np_random) + actions[action.Move] = {action.Direction: direction} + return True - backtrace = {start: None} +def gatherBFS(config, ob: Observation, actions, resource, np_random, cutoff=100): + vision = config.PLAYER_VISION_RADIUS - queue = [start] + start = (0, 0) + backtrace = {start: None} + queue = [start] + found = False - found = False - while queue: - cutoff -= 1 - if cutoff <= 0: - return False + while queue: + cutoff -= 1 + if cutoff <= 0: + return False - cur = queue.pop(0) - for nxt in adjacentPos(cur): - if found: - break + cur = queue.pop(0) + for nxt in utils.adjacentPos(cur): + if found: + break - if nxt in backtrace: - continue + if nxt in backtrace: + continue - if not inSight(*nxt, vision): - continue + if not inSight(*nxt, vision): + continue - tile = ob.tile(*nxt) - matl = tile.material_id + tile = ob.tile(*nxt) + matl = tile.material_id - if material.Fish in resource and material.Fish.index == matl: - found = nxt - backtrace[nxt] = cur - break + if material.Fish in resource and material.Fish.index == matl: + found = nxt + backtrace[nxt] = cur + break - if not tile.material_id in material.Habitable.indices: - continue + if not tile.material_id in material.Habitable.indices: + continue - if matl in (e.index for e in resource): - found = nxt - backtrace[nxt] = cur - break + if matl in (e.index for e in resource): + found = nxt + backtrace[nxt] = cur + break - for pos in adjacentPos(nxt): - if not inSight(*pos, vision): - continue + for pos in utils.adjacentPos(nxt): + if not inSight(*pos, vision): + continue - tile = ob.tile(*pos) - matl = tile.material_id + tile = ob.tile(*pos) + matl = tile.material_id - if matl == material.Fish.index: - backtrace[nxt] = cur - break + if matl == material.Fish.index: + backtrace[nxt] = cur + break - queue.append(nxt) - backtrace[nxt] = cur + queue.append(nxt) + backtrace[nxt] = cur - #Ran out of tiles - if not found: - return False + #Ran out of tiles + if not found: + return False - found_orig = found - while found in backtrace and backtrace[found] != start: - found = backtrace[found] + while found in backtrace and backtrace[found] != start: + found = backtrace[found] - direction = towards(found) - actions[action.Move] = {action.Direction: direction} + direction = towards(found, np_random) + actions[action.Move] = {action.Direction: direction} - return True + return True def aStar(config, ob: Observation, actions, rr, cc, cutoff=100): - vision = config.PLAYER_VISION_RADIUS - - start = (0, 0) - goal = (rr, cc) - - if start == goal: - return (0, 0) - - pq = [(0, start)] + vision = config.PLAYER_VISION_RADIUS - backtrace = {} - cost = {start: 0} + start = (0, 0) + goal = (rr, cc) + if start == goal: + return (0, 0) - closestPos = start - closestHeuristic = utils.l1(start, goal) - closestCost = closestHeuristic + pq = [(0, start)] - while pq: - # Use approximate solution if budget exhausted - cutoff -= 1 - if cutoff <= 0: - if goal not in backtrace: - goal = closestPos - break + backtrace = {} + cost = {start: 0} - priority, cur = heapq.heappop(pq) + closestPos = start + closestHeuristic = utils.l1(start, goal) + closestCost = closestHeuristic - if cur == goal: - break + while pq: + # Use approximate solution if budget exhausted + cutoff -= 1 + if cutoff <= 0: + if goal not in backtrace: + goal = closestPos + break - for nxt in adjacentPos(cur): - if not inSight(*nxt, vision): - continue + priority, cur = heapq.heappop(pq) - tile = ob.tile(*nxt) - matl = tile.material_id + if cur == goal: + break - if not matl in material.Habitable.indices: - continue + for nxt in utils.adjacentPos(cur): + if not inSight(*nxt, vision): + continue - #Omitted water from the original implementation. Seems key - if matl in material.Impassible.indices: - continue + tile = ob.tile(*nxt) + matl = tile.material_id - newCost = cost[cur] + 1 - if nxt not in cost or newCost < cost[nxt]: - cost[nxt] = newCost - heuristic = utils.lInfty(goal, nxt) - priority = newCost + heuristic + if not matl in material.Habitable.indices: + continue - # Compute approximate solution - if heuristic < closestHeuristic or ( - heuristic == closestHeuristic and priority < closestCost): - closestPos = nxt - closestHeuristic = heuristic - closestCost = priority + #Omitted water from the original implementation. Seems key + if matl in material.Impassible.indices: + continue - heapq.heappush(pq, (priority, nxt)) - backtrace[nxt] = cur + newCost = cost[cur] + 1 + if nxt not in cost or newCost < cost[nxt]: + cost[nxt] = newCost + heuristic = utils.lInfty(goal, nxt) + priority = newCost + heuristic - #Not needed with scuffed material list above - #if goal not in backtrace: - # goal = closestPos + # Compute approximate solution + if heuristic < closestHeuristic \ + or (heuristic == closestHeuristic and priority < closestCost): + closestPos = nxt + closestHeuristic = heuristic + closestCost = priority - goal = closestPos - while goal in backtrace and backtrace[goal] != start: - goal = backtrace[goal] + heapq.heappush(pq, (priority, nxt)) + backtrace[nxt] = cur - return goal + goal = closestPos + while goal in backtrace and backtrace[goal] != start: + goal = backtrace[goal] + return goal diff --git a/scripted/utils.py b/scripted/utils.py deleted file mode 100644 index 0c7f2af85..000000000 --- a/scripted/utils.py +++ /dev/null @@ -1,30 +0,0 @@ - - -def l1(start, goal): - sr, sc = start - gr, gc = goal - return abs(gr - sr) + abs(gc - sc) - -def l2(start, goal): - sr, sc = start - gr, gc = goal - return 0.5*((gr - sr)**2 + (gc - sc)**2)**0.5 - -def lInfty(start, goal): - sr, sc = start - gr, gc = goal - return max(abs(gr - sr), abs(gc - sc)) - -def adjacentPos(pos): - r, c = pos - return [(r - 1, c), (r, c - 1), (r + 1, c), (r, c + 1)] - -def adjacentDeltas(): - return [(-1, 0), (1, 0), (0, 1), (0, -1)] - -def inSight(dr, dc, vision): - return ( - dr >= -vision and - dc >= -vision and - dr <= vision and - dc <= vision) \ No newline at end of file diff --git a/setup.py b/setup.py index ac649cd8a..d9d534bc5 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,6 @@ 'scipy==1.10.0', 'pytest==7.3.0', 'pytest-benchmark==3.4.1', - 'fire==0.4.0', 'autobahn==19.3.3', 'Twisted==19.2.0', 'vec-noise==1.1.4', @@ -44,8 +43,10 @@ 'pettingzoo==1.19.0', 'gym==0.23.0', 'pylint==2.16.0', + 'psutil==5.9.3', 'py==1.11.0', 'tqdm<5', + 'dill==0.3.6', ], extras_require=extra, python_requires=">=3.7", diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py index 9adf770eb..4cd356138 100644 --- a/tests/action/test_ammo_use.py +++ b/tests/action/test_ammo_use.py @@ -25,15 +25,36 @@ def setUpClass(cls): logging.basicConfig(filename=LOGFILE, level=logging.INFO) def _assert_action_targets_zero(self, gym_obs): - mask = np.sum(gym_obs['ActionTargets'][action.GiveGold][action.Price]) \ - + np.sum(gym_obs['ActionTargets'][action.Buy][action.MarketItem]) + mask = np.sum(gym_obs["ActionTargets"]["GiveGold"]["Price"]) \ + + np.sum(gym_obs["ActionTargets"]["Buy"]["MarketItem"]) for atn in [action.Use, action.Give, action.Destroy, action.Sell]: - mask += np.sum(gym_obs['ActionTargets'][atn][action.InventoryItem]) - self.assertEqual(mask, 0) + mask += np.sum(gym_obs["ActionTargets"][atn.__name__]["InventoryItem"]) + # If MarketItem and InventoryTarget have no-action flags, these sum up to 5 + self.assertEqual(mask, 1 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) - def test_ammo_fire_all(self): + def test_spawn_immunity(self): env = self._setup_env(random_seed=RANDOM_SEED) + # Check spawn immunity in the action targets + for ent_obs in env.obs.values(): + gym_obs = ent_obs.to_gym() + target_mask = gym_obs["ActionTargets"]["Attack"]["Target"][:len(ent_obs.entities.ids)] + # cannot target other agents + self.assertTrue(np.sum(target_mask[ent_obs.entities.ids > 0]) == 0) + + # Test attack during spawn immunity, which should be ignored + env.step({ ent_id: { action.Attack: + { action.Style: env.realm.players[ent_id].agent.style[0], + action.Target: env.obs[ent_id].entities.index((ent_id+1)%3+1) } } + for ent_id in self.ammo }) + + for ent_id in [1, 2, 3]: + # in_combat status is set when attack is executed + self.assertFalse(env.realm.players[ent_id].in_combat) + + def test_ammo_fire_all(self): + env = self._setup_env(random_seed=RANDOM_SEED, remove_immunity=True) + # First tick actions: USE (equip) level-0 ammo env.step({ ent_id: { action.Use: { action.InventoryItem: env.obs[ent_id].inventory.sig(ent_ammo, 0) } @@ -48,7 +69,7 @@ def test_ammo_fire_all(self): ItemState.parse_array(inventory.values[inv_idx]).equipped) # check SELL InventoryItem mask -- one cannot sell equipped item - mask = gym_obs['ActionTargets'][action.Sell][action.InventoryItem][:inventory.len] > 0 + mask = gym_obs["ActionTargets"]["Sell"]["InventoryItem"][:inventory.len] > 0 self.assertTrue(inventory.id(inv_idx) not in inventory.ids[mask]) # the agents must not be in combat status @@ -58,7 +79,7 @@ def test_ammo_fire_all(self): # NOTE that agents 1 & 3's attack are invalid due to out-of-range env.step({ ent_id: { action.Attack: { action.Style: env.realm.players[ent_id].agent.style[0], - action.Target: (ent_id+1)%3+1 } } + action.Target: env.obs[ent_id].entities.index((ent_id+1)%3+1) } } for ent_id in self.ammo }) # check combat status: agents 2 (attacker) and 1 (target) are in combat @@ -87,7 +108,7 @@ def test_ammo_fire_all(self): # NOTE that agent 3's attack command is invalid due to out-of-range env.step({ ent_id: { action.Attack: { action.Style: env.realm.players[ent_id].agent.style[0], - action.Target: (ent_id+1)%3+1 } } + action.Target: env.obs[ent_id].entities.index((ent_id+1)%3+1) } } for ent_id in self.ammo }) # agents 1 and 2's latest_combat_tick should be updated @@ -140,7 +161,7 @@ def test_cannot_use_listed_items(self): # First tick actions: SELL level-0 ammo env.step({ ent_id: { action.Sell: { action.InventoryItem: env.obs[ent_id].inventory.sig(ent_ammo, 0), - action.Price: sell_price } } + action.Price: action.Price.index(sell_price) } } for ent_id, ent_ammo in self.ammo.items() }) # check if the ammos were listed @@ -156,15 +177,15 @@ def test_cannot_use_listed_items(self): self.assertTrue(item_info.id in env.obs[ent_id].market.ids) # check SELL InventoryItem mask -- one cannot sell listed item - mask = gym_obs['ActionTargets'][action.Sell][action.InventoryItem][:inventory.len] > 0 + mask = gym_obs["ActionTargets"]["Sell"]["InventoryItem"][:inventory.len] > 0 self.assertTrue(inventory.id(inv_idx) not in inventory.ids[mask]) # check USE InventoryItem mask -- one cannot use listed item - mask = gym_obs['ActionTargets'][action.Use][action.InventoryItem][:inventory.len] > 0 + mask = gym_obs["ActionTargets"]["Use"]["InventoryItem"][:inventory.len] > 0 self.assertTrue(inventory.id(inv_idx) not in inventory.ids[mask]) # check BUY MarketItem mask -- there should be two ammo items in the market - mask = gym_obs['ActionTargets'][action.Buy][action.MarketItem][:inventory.len] > 0 + mask = gym_obs["ActionTargets"]["Buy"]["MarketItem"][:inventory.len] > 0 # agent 1 has inventory space if ent_id == 1: self.assertTrue(sum(mask) == 2) # agent 2's inventory is full but can buy level-0 whetstone (existing ammo) @@ -242,7 +263,7 @@ def sig_int_tuple(sig): if ent_id == 1: gym_obs = env.obs[ent_id].to_gym() # check USE InventoryItem mask - mask = gym_obs['ActionTargets'][action.Use][action.InventoryItem][:inv_obs.len] > 0 + mask = gym_obs["ActionTargets"]["Use"]["InventoryItem"][:inv_obs.len] > 0 # level-2 melee should be able to use level-0, level-1 whetstone but not level-3 self.assertTrue(inv_obs.id(inv_obs.sig(*wstone_lvl0)) in inv_obs.ids[mask]) self.assertTrue(inv_obs.id(inv_obs.sig(*wstone_lvl1)) in inv_obs.ids[mask]) diff --git a/tests/action/test_destroy_give_gold.py b/tests/action/test_destroy_give_gold.py index cea62bda5..fa9f10b1a 100644 --- a/tests/action/test_destroy_give_gold.py +++ b/tests/action/test_destroy_give_gold.py @@ -153,7 +153,7 @@ def test_give_equipped_listed(self): self._check_inv_mask(env.obs[ent_id], action.Sell, item_sig)) actions[ent_id] = { action.Sell: { action.InventoryItem: env.obs[ent_id].inventory.sig(*item_sig), - action.Price: price } } + action.Price: action.Price.index(price) } } env.step(actions) @@ -257,7 +257,7 @@ def test_give_gold(self): test_cond[1] = { 'tgt_id': 3, 'gold': 1, 'ent_mask': True, 'ent_gold': self.init_gold-1, 'tgt_gold': self.init_gold+1 } # agent 2: give gold to agent 4 (valid: same tile) - test_cond[2] = { 'tgt_id': 4, 'gold': 100, 'ent_mask': True, + test_cond[2] = { 'tgt_id': 4, 'gold': self.init_gold, 'ent_mask': True, 'ent_gold': 0, 'tgt_gold': 2*self.init_gold } # agent 3: give gold to npc -1 (invalid: cannot give to npc) # ent_gold is self.init_gold+1 because (3) got 1 gold from (1) diff --git a/tests/action/test_monkey_action.py b/tests/action/test_monkey_action.py index 9b5d2e2c3..2c2f6da92 100644 --- a/tests/action/test_monkey_action.py +++ b/tests/action/test_monkey_action.py @@ -21,7 +21,7 @@ def make_random_actions(config, ent_obs): for atn in sorted(nmmo.Action.edges(config)): actions[atn] = {} for arg in sorted(atn.edges, reverse=True): # intentionally doing wrong - mask = ent_obs['ActionTargets'][atn][arg] + mask = ent_obs["ActionTargets"][atn.__name__][arg.__name__] actions[atn][arg] = 0 if np.any(mask): actions[atn][arg] += int(np.random.choice(np.where(mask)[0])) @@ -29,7 +29,7 @@ def make_random_actions(config, ent_obs): return actions # CHECK ME: this would be nice to include in the env._validate_actions() -def filter_item_actions(actions): +def filter_item_actions(actions, use_str_key=False): # when there are multiple actions on the same item, select one flt_atns = {} inventory_atn = {} # key: inventory idx, val: action @@ -52,6 +52,15 @@ def filter_item_actions(actions): else: flt_atns[atns[0][0]] = atns[0][1] + # convert action keys to str + if use_str_key: + str_atns = {} + for atn, args in flt_atns.items(): + str_atns[atn.__name__] = {} + for arg, val in args.items(): + str_atns[atn.__name__][arg.__name__] = val + flt_atns = str_atns + return flt_atns @@ -63,7 +72,7 @@ def setUpClass(cls): @staticmethod # NOTE: this can also be used for sweeping random seeds - def rollout_with_seed(config, seed): + def rollout_with_seed(config, seed, use_str_key=False): env = ScriptedAgentTestEnv(config) obs = env.reset(seed=seed) @@ -72,7 +81,7 @@ def rollout_with_seed(config, seed): actions = {} for ent_id in env.realm.players: ent_atns = make_random_actions(config, obs[ent_id]) - actions[ent_id] = filter_item_actions(ent_atns) + actions[ent_id] = filter_item_actions(ent_atns, use_str_key) obs, _, _, _ = env.step(actions) def test_monkey_action(self): @@ -81,6 +90,8 @@ def test_monkey_action(self): except: # pylint: disable=bare-except assert False, f"Monkey action failed. seed: {RANDOM_SEED}" + def test_monkey_action_with_str_key(self): + self.rollout_with_seed(self.config, RANDOM_SEED, use_str_key=True) if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_env.py b/tests/core/test_env.py index 1ddeb6776..ca7e8cc1a 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -2,6 +2,7 @@ from typing import List import random +import numpy as np from tqdm import tqdm import nmmo @@ -19,7 +20,6 @@ RANDOM_SEED = random.randint(0, 10000) class Config(nmmo.config.Small, nmmo.config.AllGameSystems): - SPECIALIZE = True PLAYERS = [ baselines.Fisher, baselines.Herbalist, baselines.Prospector, baselines.Carver, baselines.Alchemist, @@ -33,9 +33,10 @@ def setUpClass(cls): def test_action_space(self): action_space = self.env.action_space(0) + atn_str_keys = set(atn.__name__ for atn in nmmo.Action.edges(self.config)) self.assertSetEqual( set(action_space.keys()), - set(nmmo.Action.edges(self.config))) + atn_str_keys) def test_observations(self): obs = self.env.reset() @@ -51,22 +52,47 @@ def test_observations(self): ] for player_id, player_obs in obs.items(): - self._validate_tiles(player_obs, self.env.realm) - self._validate_entitites( - player_id, player_obs, self.env.realm, entity_locations) - self._validate_inventory(player_id, player_obs, self.env.realm) - self._validate_market(player_obs, self.env.realm) - obs, _, dones, _ = self.env.step({}) - - # make sure dead agents return proper dones=True - self.assertEqual(len(self.env.agents), len(self.env.realm.players)) + if player_id in self.env.realm.players: # alive agents + self._validate_tiles(player_obs, self.env.realm) + self._validate_entitites( + player_id, player_obs, self.env.realm, entity_locations) + self._validate_inventory(player_id, player_obs, self.env.realm) + self._validate_market(player_obs, self.env.realm) + else: + # the obs of dead agents are dummy, all zeros + self.assertEqual(np.sum(player_obs["Tile"]), 0) + self.assertEqual(np.sum(player_obs["Entity"]), 0) + self.assertEqual(np.sum(player_obs["Inventory"]), 0) + self.assertEqual(np.sum(player_obs["Market"]), 0) + self.assertEqual(np.sum(player_obs["ActionTargets"]["Move"]["Direction"]), 1) + self.assertEqual(np.sum(player_obs["ActionTargets"]["Attack"]["Style"]), 3) + + obs, rewards, dones, infos = self.env.step({}) + + # make sure dead agents return proper dones=True, dummy obs, and -1 reward + self.assertEqual(len(self.env.agents), + len(self.env.realm.players) + len(self.env._dead_this_tick)) self.assertEqual(len(self.env.possible_agents), len(self.env.realm.players) + len(self.env._dead_agents)) + for agent_id in self.env.agents: + self.assertTrue(agent_id in obs) + self.assertTrue(agent_id in rewards) + self.assertTrue(agent_id in dones) + self.assertTrue(agent_id in infos) if len(self.env._dead_agents) > len(dead_agents): for dead_id in self.env._dead_agents - dead_agents: + self.assertEqual(rewards[dead_id], -1) self.assertTrue(dones[dead_id]) dead_agents.add(dead_id) + # check dead and alive + entity_all = EntityState.Query.table(self.env.realm.datastore) + alive_agents = entity_all[:, Entity.State.attr_name_to_col["id"]] + alive_agents = set(alive_agents[alive_agents > 0]) + for agent_id in alive_agents: + self.assertTrue(agent_id in self.env.realm.players) + self.assertTrue(agent_id not in self.env._dead_agents) + def _validate_tiles(self, obs, realm: Realm): for tile_obs in obs["Tile"]: tile_obs = TileState.parse_array(tile_obs) diff --git a/tests/core/test_gym_obs_spaces.py b/tests/core/test_gym_obs_spaces.py new file mode 100644 index 000000000..50638bbfe --- /dev/null +++ b/tests/core/test_gym_obs_spaces.py @@ -0,0 +1,48 @@ +import unittest + +import nmmo + +class TestGymObsSpaces(unittest.TestCase): + def _test_gym_obs_space(self, env): + obs_spec = env.observation_space(1) + obs, _, _, _ = env.step({}) + + for agent_obs in obs.values(): + for key, val in agent_obs.items(): + if key != 'ActionTargets': + self.assertTrue(obs_spec[key].contains(val), + f"Invalid obs format -- key: {key}, val: {val}") + + if 'ActionTargets' in agent_obs: + val = agent_obs['ActionTargets'] + for atn in nmmo.Action.edges(env.config): + if atn.enabled(env.config): + for arg in atn.edges: # pylint: disable=not-an-iterable + mask_spec = obs_spec['ActionTargets'][atn.__name__][arg.__name__] + mask_val = val[atn.__name__][arg.__name__] + self.assertTrue(mask_spec.contains(mask_val), + "Invalid obs format -- " + \ + f"key: {atn.__name__}/{arg.__name__}, val: {mask_val}") + + def test_env_without_noop(self): + config = nmmo.config.Default() + config.PROVIDE_NOOP_ACTION_TARGET = False + env = nmmo.Env(config) + env.reset(seed=1) + for _ in range(3): + env.step({}) + + self._test_gym_obs_space(env) + + def test_env_with_noop(self): + config = nmmo.config.Default() + config.PROVIDE_NOOP_ACTION_TARGET = True + env = nmmo.Env(config) + env.reset(seed=1) + for _ in range(3): + env.step({}) + + self._test_gym_obs_space(env) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/core/test_immutable_tile_property.py b/tests/core/test_immutable_tile_property.py new file mode 100644 index 000000000..6d8c56da7 --- /dev/null +++ b/tests/core/test_immutable_tile_property.py @@ -0,0 +1,37 @@ +# Test immutable invariants assumed for certain optimizations + +import unittest + +import copy +import nmmo +from scripted.baselines import Random + +def rollout(): + config = nmmo.config.Default() + config.PLAYERS = [Random] + env = nmmo.Env(config) + env.reset() + start = copy.deepcopy(env.realm) + for _ in range(64): + env.step({}) + end = copy.deepcopy(env.realm) + return (start, end) + +class TestImmutableTileProperty(unittest.TestCase): + + def test_passability_immutable(self): + # Used in optimization that caches the result of A* + start, end = rollout() + start_passable = [tile.impassible for tile in start.map.tiles.flatten()] + end_passable = [tile.impassible for tile in end.map.tiles.flatten()] + self.assertListEqual(start_passable, end_passable) + + def test_habitability_immutable(self): + # Used in optimization with habitability lookup table + start, end = rollout() + start_habitable = [tile.habitable for tile in start.map.tiles.flatten()] + end_habitable = [tile.habitable for tile in end.map.tiles.flatten()] + self.assertListEqual(start_habitable, end_habitable) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/core/test_map_generation.py b/tests/core/test_map_generation.py index 1c6020c49..d7f35d9c6 100644 --- a/tests/core/test_map_generation.py +++ b/tests/core/test_map_generation.py @@ -10,6 +10,7 @@ def test_insufficient_maps(self): config.PATH_MAPS = 'maps/test_map_gen' config.MAP_N = 20 + # clear the directory path_maps = os.path.join(config.PATH_CWD, config.PATH_MAPS) shutil.rmtree(path_maps, ignore_errors=True) @@ -25,5 +26,25 @@ def test_insufficient_maps(self): # this should finish without error + def test_map_preview(self): + class MapConfig( + nmmo.config.Small, # no fractal, grass only + nmmo.config.Terrain, # water, grass, foilage, stone + nmmo.config.Item, # no additional effect on the map + nmmo.config.Profession, # add ore, tree, crystal, herb, fish + ): + PATH_MAPS = 'maps/test_preview' + MAP_FORCE_GENERATION = True + MAP_GENERATE_PREVIEWS = True + config = MapConfig() + + # clear the directory + path_maps = os.path.join(config.PATH_CWD, config.PATH_MAPS) + shutil.rmtree(path_maps, ignore_errors=True) + + test_env = nmmo.Env(config) # pylint: disable=unused-variable + + # this should finish without error + if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_observation_tile.py b/tests/core/test_observation_tile.py index 84a231169..00d519fb8 100644 --- a/tests/core/test_observation_tile.py +++ b/tests/core/test_observation_tile.py @@ -1,14 +1,24 @@ # pylint: disable=protected-access,bad-builtin import unittest from timeit import timeit +from collections import defaultdict import numpy as np import nmmo from nmmo.core.tile import TileState +from nmmo.entity.entity import EntityState +from nmmo.systems.item import ItemState +from nmmo.lib.event_log import EventState from nmmo.core.observation import Observation from nmmo.core import action as Action +from nmmo.lib import utils +from tests.testhelpers import ScriptedAgentTestConfig TileAttr = TileState.State.attr_name_to_col +EntityAttr = EntityState.State.attr_name_to_col +ItemAttr = ItemState.State.attr_name_to_col +EventAttr = EventState.State.attr_name_to_col + class TestObservationTile(unittest.TestCase): @classmethod @@ -22,7 +32,12 @@ def setUpClass(cls): def test_tile_attr(self): self.assertDictEqual(TileAttr, {'row': 0, 'col': 1, 'material_id': 2}) - def test_tile_correctness(self): + def test_action_target_consts(self): + self.assertEqual(len(Action.Style.edges), 3) + self.assertEqual(len(Action.Price.edges), self.config.PRICE_N_OBS) + self.assertEqual(len(Action.Token.edges), self.config.COMMUNICATION_NUM_TOKENS) + + def test_obs_tile_correctness(self): obs = self.env._compute_observations() center = self.config.PLAYER_VISION_RADIUS tile_dim = self.config.PLAYER_VISION_DIAMETER @@ -37,6 +52,9 @@ def correct_tile(agent_obs: Observation, r_delta, c_delta): return TileState.parse_array(agent_obs.tiles[r_cond & c_cond][0]) for agent_obs in obs.values(): + # check if the tile obs size + self.assertEqual(len(agent_obs.tiles), self.config.MAP_N_OBS) + # check if the coord conversion is correct row_map = agent_obs.tiles[:,TileAttr['row']].reshape(tile_dim,tile_dim) col_map = agent_obs.tiles[:,TileAttr['col']].reshape(tile_dim,tile_dim) @@ -57,6 +75,142 @@ def correct_tile(agent_obs: Observation, r_delta, c_delta): print('implemented:', timeit(lambda: agent_obs.tile(*d.delta), number=1000, globals=globals())) + def test_env_visible_tiles_correctness(self): + def correct_visible_tile(realm, agent_id): + # Based on numpy datatable window query + assert agent_id in realm.players, "agent_id not in the realm" + agent = realm.players[agent_id] + radius = realm.config.PLAYER_VISION_RADIUS + return TileState.Query.window( + realm.datastore, agent.row.val, agent.col.val, radius) + + # implemented in the env._compute_observations() + def visible_tiles_by_index(realm, agent_id, tile_map): + assert agent_id in realm.players, "agent_id not in the realm" + agent = realm.players[agent_id] + radius = realm.config.PLAYER_VISION_RADIUS + return tile_map[agent.row.val-radius:agent.row.val+radius+1, + agent.col.val-radius:agent.col.val+radius+1,:].reshape(225,3) + + # get tile map, to bypass the expensive tile window query + tile_map = TileState.Query.get_map(self.env.realm.datastore, self.config.MAP_SIZE) + + obs = self.env._compute_observations() + for agent_id in self.env.realm.players: + self.assertTrue(np.array_equal(correct_visible_tile(self.env.realm, agent_id), + obs[agent_id].tiles)) + + print('---test_visible_tile_window---') + print('reference:', timeit(lambda: correct_visible_tile(self.env.realm, agent_id), + number=1000, globals=globals())) + print('implemented:', + timeit(lambda: visible_tiles_by_index(self.env.realm, agent_id, tile_map), + number=1000, globals=globals())) + + def test_make_attack_mask_within_range(self): + def correct_within_range(entities, attack_range, agent_row, agent_col): + entities_pos = entities[:,[EntityAttr["row"],EntityAttr["col"]]] + within_range = utils.linf(entities_pos,(agent_row, agent_col)) <= attack_range + return within_range + + # implemented in the Observation._make_attack_mask() + def simple_within_range(entities, attack_range, agent_row, agent_col): + return np.maximum( + np.abs(entities[:,EntityAttr["row"]] - agent_row), + np.abs(entities[:,EntityAttr["col"]] - agent_col) + ) <= attack_range + + obs = self.env._compute_observations() + attack_range = self.config.COMBAT_MELEE_REACH + + for agent_obs in obs.values(): + entities = agent_obs.entities.values + agent = agent_obs.agent() + self.assertTrue(np.array_equal( + correct_within_range(entities, attack_range, agent.row, agent.col), + simple_within_range(entities, attack_range, agent.row, agent.col))) + + print('---test_attack_within_range---') + print('reference:', timeit( + lambda: correct_within_range(entities, attack_range, agent.row, agent.col), + number=1000, globals=globals())) + print('implemented:', timeit( + lambda: simple_within_range(entities, attack_range, agent.row, agent.col), + number=1000, globals=globals())) + + def test_gs_where_in_1d(self): + config = ScriptedAgentTestConfig() + env = nmmo.Env(config) + env.reset(seed=0) + for _ in range(5): + env.step({}) + + def correct_where_in_1d(event_data, subject): + flt_idx = np.in1d(event_data[:, EventAttr['ent_id']], subject) + return event_data[flt_idx] + + def where_in_1d_with_index(event_data, subject, index): + flt_idx = [row for sbj in subject for row in index.get(sbj,[])] + return event_data[flt_idx] + + event_data = EventState.Query.table(env.realm.datastore) + event_index = defaultdict() + for row, id_ in enumerate(event_data[:,EventAttr['ent_id']]): + if id_ in event_index: + event_index[id_].append(row) + else: + event_index[id_] = [row] + + # NOTE: the index-based approach returns the data in different order, + # and all the operations in the task system don't use the order info + def sort_event_data(event_data): + keys = [event_data[:,i] for i in range(1,8)] + sorted_idx = np.lexsort(keys) + return event_data[sorted_idx] + arr1 = sort_event_data(correct_where_in_1d(event_data, [1,2,3])) + arr2 = sort_event_data(where_in_1d_with_index(event_data, [1,2,3], event_index)) + self.assertTrue(np.array_equal(arr1, arr2)) + + print('---test_gs_where_in_1d---') + print('reference:', timeit( + lambda: correct_where_in_1d(event_data, [1, 2, 3]), + number=1000, globals=globals())) + print('implemented:', timeit( + lambda: where_in_1d_with_index(event_data, [1, 2, 3], event_index), + number=1000, globals=globals())) + + def test_habitable(self): + from nmmo.systems.ai.move import habitable as habitable_impl + realm_map = self.env.realm.map + realm_tiles= self.env.realm.map.tiles + ent = self.env.realm.npcs[-1] + np_random = self.env._np_random + + def habitable_ref(tiles, ent, np_random): + r, c = ent.pos + cands = [] + if tiles[r-1, c].habitable: + cands.append(Action.North) + if tiles[r+1, c].habitable: + cands.append(Action.South) + if tiles[r, c-1].habitable: + cands.append(Action.West) + if tiles[r, c+1].habitable: + cands.append(Action.East) + + if len(cands) == 0: + return Action.North + + return np_random.choice(cands) + + print('---test_habitable---') + print('reference:', timeit( + lambda: habitable_ref(realm_tiles, ent, np_random), + number=1000, globals=globals())) + print('habitable_impl:', timeit( + lambda: habitable_impl(realm_map, ent, np_random), + number=1000, globals=globals())) + if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_tile.py b/tests/core/test_tile.py index 5cc8629dd..f73dd3ad8 100644 --- a/tests/core/test_tile.py +++ b/tests/core/test_tile.py @@ -1,4 +1,6 @@ import unittest +import numpy as np + import nmmo from nmmo.core.tile import Tile, TileState from nmmo.datastore.numpy_datastore import NumpyDatastore @@ -9,6 +11,7 @@ def __init__(self): self.datastore = NumpyDatastore() self.datastore.register_object_type("Tile", TileState.State.num_attributes) self.config = nmmo.config.Small() + self._np_random = np.random class MockEntity(): def __init__(self, ent_id): @@ -18,9 +21,10 @@ class TestTile(unittest.TestCase): # pylint: disable=no-member def test_tile(self): mock_realm = MockRealm() - tile = Tile(mock_realm, 10, 20) + np_random = np.random + tile = Tile(mock_realm, 10, 20, np_random) - tile.reset(material.Foilage, nmmo.config.Small()) + tile.reset(material.Foilage, nmmo.config.Small(), np_random) self.assertEqual(tile.row.val, 10) self.assertEqual(tile.col.val, 20) diff --git a/tests/entity/test_entity.py b/tests/entity/test_entity.py index 848bb7bb1..952e10696 100644 --- a/tests/entity/test_entity.py +++ b/tests/entity/test_entity.py @@ -1,4 +1,6 @@ import unittest +import numpy as np + import nmmo from nmmo.entity.entity import Entity, EntityState from nmmo.datastore.numpy_datastore import NumpyDatastore @@ -9,6 +11,7 @@ def __init__(self): self.config.PLAYERS = range(100) self.datastore = NumpyDatastore() self.datastore.register_object_type("Entity", EntityState.State.num_attributes) + self._np_random = np.random # pylint: disable=no-member class TestEntity(unittest.TestCase): diff --git a/tests/render/test_load_replay.py b/tests/render/test_load_replay.py index 9f6a57fe4..87904cbf4 100644 --- a/tests/render/test_load_replay.py +++ b/tests/render/test_load_replay.py @@ -11,8 +11,8 @@ renderer = WebsocketRenderer() time.sleep(3) - # load a replay - replay = FileReplayHelper.load('replay_dev.json', decompress=False) + # load a replay: replace 'replay_dev.json' with your replay file + replay = FileReplayHelper.load('replay_dev.json') # run the replay for packet in replay: diff --git a/tests/render/test_render_save.py b/tests/render/test_render_save.py index f1f3801ef..d63599e0c 100644 --- a/tests/render/test_render_save.py +++ b/tests/render/test_render_save.py @@ -3,24 +3,19 @@ from nmmo.core.config import (AllGameSystems, Combat, Communication, Equipment, Exchange, Item, Medium, Profession, Progression, Resource, Small, Terrain) -from nmmo.task.task_api import nmmo_default_task from nmmo.render.render_client import WebsocketRenderer from nmmo.render.replay_helper import FileReplayHelper from scripted import baselines def create_config(base, nent, *systems): - # pylint: disable=redefined-outer-name - systems = (base, *systems) - name = '_'.join(cls.__name__ for cls in systems) - - conf = type(name, systems, {})() - + systems = (base, *systems) + name = '_'.join(cls.__name__ for cls in systems) + conf = type(name, systems, {})() conf.TERRAIN_TRAIN_MAPS = 1 conf.TERRAIN_EVAL_MAPS = 1 conf.IMMORTAL = True conf.PLAYER_N = nent conf.PLAYERS = [baselines.Random] - return conf no_npc_small_1_pop_conf = create_config(Small, 1, Terrain, Resource, @@ -51,32 +46,28 @@ def create_config(base, nent, *systems): import random from tqdm import tqdm - from tests.testhelpers import ScriptedAgentTestConfig - TEST_HORIZON = 100 RANDOM_SEED = random.randint(0, 9999) - config = ScriptedAgentTestConfig() - config.NPC_SPAWN_ATTEMPTS = 8 - replay_helper = FileReplayHelper() - for name, config in conf_dict.items(): + # the renderer is external to the env, so need to manually initiate it + renderer = WebsocketRenderer() + + for conf_name, config in conf_dict.items(): env = nmmo.Env(config) # to make replay, one should create replay_helper # and run the below line env.realm.record_replay(replay_helper) - tasks = nmmo_default_task(env.possible_agents, 'no_task') - env.reset(seed=RANDOM_SEED, new_tasks=tasks) - - # the renderer is external to the env, so need to manually initiate it - renderer = WebsocketRenderer(env.realm) + env.reset(seed=RANDOM_SEED) + renderer.set_realm(env.realm) for tick in tqdm(range(TEST_HORIZON)): env.step({}) renderer.render_realm() - # NOTE: the web client has trouble loading the compressed replay file - replay_helper.save(f'replay_{name}_seed_{RANDOM_SEED:04d}.json', compress=False) + # NOTE: save the data in uncompressed json format, since + # the web client has trouble loading the compressed replay file + replay_helper.save(f'replay_{conf_name}_seed_{RANDOM_SEED:04d}.json') diff --git a/tests/task/sample_curriculum.pkl b/tests/task/sample_curriculum.pkl new file mode 100644 index 000000000..2986f6d82 Binary files /dev/null and b/tests/task/sample_curriculum.pkl differ diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index 5f5e532cf..f022def37 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -7,6 +7,7 @@ from nmmo.systems import skill from nmmo.task import predicate_api as p from nmmo.task import task_api as t +from nmmo.task import task_spec as ts from nmmo.task import base_predicates as bp from nmmo.task.game_state import GameState from nmmo.task.group import Group @@ -83,6 +84,7 @@ def ForageSkill(gs, subject, lvl): # Test rollout config = ScriptedAgentTestConfig() + config.ALLOW_MULTI_TASKS_PER_AGENT = True env = Env(config) # Creating and testing "team" tasks @@ -210,34 +212,25 @@ def PredicateMath(gs, subject): # DONE - def test_make_team_tasks_using_task_spec(self): - # NOTE: len(teams) and len(task_spec) don't need to match - teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} - - """ task_spec is a list of tuple (reward_to, predicate class, kwargs) - - each tuple in the task_spec will create tasks for a team in teams + def test_task_spec_based_curriculum(self): + task_spec = [ + ts.TaskSpec(eval_fn=bp.CountEvent, eval_fn_kwargs={'event': 'PLAYER_KILL', 'N': 1}, + reward_to='team'), + ts.TaskSpec(eval_fn=bp.CountEvent, eval_fn_kwargs={'event': 'PLAYER_KILL', 'N': 2}, + reward_to='agent'), + ts.TaskSpec(eval_fn=bp.AllDead, eval_fn_kwargs={'target': 'left_team'}, + reward_to='agent'), + ts.TaskSpec(eval_fn=bp.CanSeeAgent, eval_fn_kwargs={'target': 'right_team_leader'}, + task_cls=t.OngoingTask, reward_to='team'), + ] - reward_to: must be in ['team', 'agent'] - * 'team' create a single team task, in which all team members get rewarded - * 'agent' create a task for each agent, in which only the agent gets rewarded - - predicate class from the base predicates or custom predicates like above - - kwargs are the additional args that go into predicate. There are also special keys - * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] - these str will be translated into the actual agent ids - * 'task_cls' is optional. If not provided, the standard Task is used. """ - task_spec = [ # (reward_to, predicate function, kwargs) - ('team', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 1}), # one task - ('agent', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 2}), - ('agent', bp.AllDead, {'target': 'left_team'}), - ('team', bp.CanSeeAgent, {'target': 'right_team_leader', 'task_cls': t.OngoingTask})] + # NOTE: len(teams) and len(task_spec) don't need to match + teams = {1:[1,2,3], 3:[4,5], 6:[6,7], 9:[8,9], 14:[10,11]} config = ScriptedAgentTestConfig() env = Env(config) - env.reset(make_task_fn=lambda: t.make_team_tasks(teams, task_spec)) + env.reset(make_task_fn=lambda: ts.make_task_from_spec(teams, task_spec)) self.assertEqual(len(env.tasks), 6) # 6 tasks were created self.assertEqual(env.tasks[0].name, # team 0 task assigned to agents 1,2,3 diff --git a/tests/task/test_manual_curriculum.py b/tests/task/test_manual_curriculum.py index 5fcc6bbcd..c8d3563ab 100644 --- a/tests/task/test_manual_curriculum.py +++ b/tests/task/test_manual_curriculum.py @@ -1,14 +1,15 @@ '''Manual test for creating learning curriculum manually''' # pylint: disable=invalid-name,redefined-outer-name,bad-builtin +# pylint: disable=wildcard-import,unused-wildcard-import +from typing import List -import nmmo -import nmmo.lib.material as Material -from nmmo.task import base_predicates as bp -from nmmo.task.task_api import OngoingTask, make_team_tasks +import nmmo.lib.material as m +from nmmo.task.base_predicates import * +from nmmo.task.task_api import OngoingTask from nmmo.task import constraint as c +from nmmo.task.task_spec import TaskSpec, check_task_spec - -EVENT_NUMBER_GOAL = [1, 2, 3, 4, 5, 7, 9, 12, 15, 20, 30, 50] +EVENT_NUMBER_GOAL = [3, 4, 5, 7, 9, 12, 15, 20, 30, 50] INFREQUENT_GOAL = list(range(1, 10)) STAY_ALIVE_GOAL = [50, 100, 150, 200, 300, 500] TEAM_NUMBER_GOAL = [10, 20, 30, 50, 70, 100] @@ -22,142 +23,158 @@ EQUIP_ITEM = c.armour + c.weapons + c.tools + c.ammunition HARVEST_ITEM = c.weapons + c.ammunition + c.consumables -""" task_spec is a list of tuple (reward_to, predicate class, kwargs) - - each tuple in the task_spec will create tasks for a team in teams - - reward_to: must be in ['team', 'agent'] - * 'team' create a single team task, in which all team members get rewarded - * 'agent' create a task for each agent, in which only the agent gets rewarded - - predicate class from the base predicates or custom predicates like above - - kwargs are the additional args that go into predicate. There are also special keys - * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] - these str will be translated into the actual agent ids - * 'task_cls' is optional. If not provided, the standard Task is used. """ -task_spec = [] +task_spec: List[TaskSpec] = [] # explore, eat, drink, attack any agent, harvest any item, level up any skill # which can happen frequently essential_skills = ['GO_FARTHEST', 'EAT_FOOD', 'DRINK_WATER', 'SCORE_HIT', 'HARVEST_ITEM', 'LEVEL_UP'] for event_code in essential_skills: - task_spec += [('agent', bp.CountEvent, {'event': event_code, 'N': cnt}) - for cnt in EVENT_NUMBER_GOAL] + for cnt in EVENT_NUMBER_GOAL: + task_spec.append(TaskSpec(eval_fn=CountEvent, + eval_fn_kwargs={'event': event_code, 'N': cnt}, + sampling_weight=30)) # item/market skills, which happen less frequently or should not do too much item_skills = ['CONSUME_ITEM', 'GIVE_ITEM', 'DESTROY_ITEM', 'EQUIP_ITEM', 'GIVE_GOLD', 'LIST_ITEM', 'EARN_GOLD', 'BUY_ITEM'] for event_code in item_skills: - task_spec += [('agent', bp.CountEvent, {'event': event_code, 'N': cnt}) + task_spec += [TaskSpec(eval_fn=CountEvent, eval_fn_kwargs={'event': event_code, 'N': cnt}) for cnt in INFREQUENT_GOAL] # less than 10 # find resource tiles -for resource in Material.Harvestable: +for resource in m.Harvestable: for reward_to in ['agent', 'team']: - task_spec.append((reward_to, bp.CanSeeTile, {'tile_type': resource})) + task_spec.append(TaskSpec(eval_fn=CanSeeTile, eval_fn_kwargs={'tile_type': resource}, + reward_to=reward_to, sampling_weight=10)) # stay alive ... like ... for 300 ticks # i.e., getting incremental reward for each tick alive as an individual or a team for reward_to in ['agent', 'team']: for num_tick in STAY_ALIVE_GOAL: - task_spec.append((reward_to, bp.TickGE, {'num_tick': num_tick})) + task_spec.append(TaskSpec(eval_fn=TickGE, eval_fn_kwargs={'num_tick': num_tick}, + reward_to=reward_to)) # protect the leader: get reward for each tick the leader is alive -task_spec.append(('team', bp.StayAlive, {'target': 'my_team_leader', 'task_cls': OngoingTask})) +# NOTE: a tuple of length four, to pass in the task_kwargs +task_spec.append(TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={'target': 'my_team_leader'}, + reward_to='team', task_cls=OngoingTask)) # want the other team or team leader to die for target in ['left_team', 'left_team_leader', 'right_team', 'right_team_leader']: - task_spec.append(('team', bp.AllDead, {'target': target})) + task_spec.append(TaskSpec(eval_fn=AllDead, eval_fn_kwargs={'target': target}, + reward_to='team')) # occupy the center tile, assuming the Medium map size # TODO: it'd be better to have some intermediate targets toward the center for reward_to in ['agent', 'team']: - task_spec.append((reward_to, bp.OccupyTile, {'row': 80, 'col': 80})) # TODO: get config + task_spec.append(TaskSpec(eval_fn=OccupyTile, eval_fn_kwargs={'row': 80, 'col': 80}, + reward_to=reward_to)) # TODO: get config for map size # form a tight formation, for a certain number of ticks def PracticeFormation(gs, subject, dist, num_tick): - return bp.AllMembersWithinRange(gs, subject, dist) * bp.TickGE(gs, subject, num_tick) + return AllMembersWithinRange(gs, subject, dist) * TickGE(gs, subject, num_tick) for dist in [1, 3, 5, 10]: - task_spec += [('team', PracticeFormation, {'dist': dist, 'num_tick': num_tick}) - for num_tick in STAY_ALIVE_GOAL] + task_spec += [TaskSpec(eval_fn=PracticeFormation, + eval_fn_kwargs={'dist': dist, 'num_tick': num_tick}, + reward_to='team') for num_tick in STAY_ALIVE_GOAL] # find the other team leader for reward_to in ['agent', 'team']: for target in ['left_team_leader', 'right_team_leader']: - task_spec.append((reward_to, bp.CanSeeAgent, {'target': target})) + task_spec.append(TaskSpec(eval_fn=CanSeeAgent, eval_fn_kwargs={'target': target}, + reward_to=reward_to)) # find the other team (any agent) for reward_to in ['agent']: #, 'team']: for target in ['left_team', 'right_team']: - task_spec.append((reward_to, bp.CanSeeGroup, {'target': target})) + task_spec.append(TaskSpec(eval_fn=CanSeeGroup, eval_fn_kwargs={'target': target}, + reward_to=reward_to)) # explore the map -- sum the l-inf distance traveled by all subjects for dist in [10, 20, 30, 50, 100]: # each agent - task_spec.append(('agent', bp.DistanceTraveled, {'dist': dist})) + task_spec.append(TaskSpec(eval_fn=DistanceTraveled, eval_fn_kwargs={'dist': dist})) for dist in [30, 50, 70, 100, 150, 200, 300, 500]: # summed over all team members - task_spec.append(('team', bp.DistanceTraveled, {'dist': dist})) + task_spec.append(TaskSpec(eval_fn=DistanceTraveled, eval_fn_kwargs={'dist': dist}, + reward_to='team')) # level up a skill for skill in SKILLS: - for level in LEVEL_GOAL: + for level in LEVEL_GOAL[1:]: # since this is an agent task, num_agent must be 1 - task_spec.append(('agent', bp.AttainSkill, {'skill': skill, 'level': level, 'num_agent': 1})) + task_spec.append(TaskSpec(eval_fn=AttainSkill, + eval_fn_kwargs={'skill': skill, 'level': level, 'num_agent': 1}, + reward_to='agent', + sampling_weight=10*(5-level) if level < 5 else 1)) # make attain skill a team task by varying the number of agents for skill in SKILLS: - for level in LEVEL_GOAL: + for level in LEVEL_GOAL[1:]: for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', bp.AttainSkill, - {'skill': skill, 'level': level,'num_agent': num_agent})) + task_spec.append( + TaskSpec(eval_fn=AttainSkill, + eval_fn_kwargs={'skill': skill, 'level': level, 'num_agent': num_agent}, + reward_to='team')) # practice specific combat style for style in COMBAT_STYLE: for cnt in EVENT_NUMBER_GOAL: - task_spec.append(('agent', bp.ScoreHit, {'combat_style': style, 'N': cnt})) + task_spec.append(TaskSpec(eval_fn=ScoreHit, eval_fn_kwargs={'combat_style': style, 'N': cnt}, + sampling_weight=5)) for cnt in TEAM_NUMBER_GOAL: - task_spec.append(('team', bp.ScoreHit, {'combat_style': style, 'N': cnt})) + task_spec.append(TaskSpec(eval_fn=ScoreHit, eval_fn_kwargs={'combat_style': style, 'N': cnt}, + reward_to='team')) # defeat agents of a certain level as a team for agent_type in ['player', 'npc']: # c.AGENT_TYPE_CONSTRAINT for level in LEVEL_GOAL: for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', bp.DefeatEntity, - {'agent_type': agent_type, 'level': level, 'num_agent': num_agent})) + task_spec.append(TaskSpec(eval_fn=DefeatEntity, + eval_fn_kwargs={'agent_type': agent_type, 'level': level, + 'num_agent': num_agent}, + reward_to='team')) # hoarding gold -- evaluated on the current gold for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', bp.HoardGold, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=HoardGold, eval_fn_kwargs={'amount': amount}, + sampling_weight=3)) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', bp.HoardGold, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=HoardGold, eval_fn_kwargs={'amount': amount}, + reward_to='team')) # earning gold -- evaluated on the total gold earned by selling items # does NOT include looted gold for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', bp.EarnGold, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=EarnGold, eval_fn_kwargs={'amount': amount}, + sampling_weight=3)) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', bp.EarnGold, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=EarnGold, eval_fn_kwargs={'amount': amount}, + reward_to='team')) # spending gold, by buying items for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', bp.SpendGold, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=SpendGold, eval_fn_kwargs={'amount': amount}, + sampling_weight=3)) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', bp.SpendGold, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=SpendGold, eval_fn_kwargs={'amount': amount}, + reward_to='team')) # making profits by trading -- only buying and selling are counted for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', bp.MakeProfit, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=MakeProfit, eval_fn_kwargs={'amount': amount}, + sampling_weight=3)) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', bp.MakeProfit, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=MakeProfit, eval_fn_kwargs={'amount': amount}, + reward_to='team')) # managing inventory space def PracticeInventoryManagement(gs, subject, space, num_tick): - return bp.InventorySpaceGE(gs, subject, space) * bp.TickGE(gs, subject, num_tick) + return InventorySpaceGE(gs, subject, space) * TickGE(gs, subject, num_tick) for space in [2, 4, 8]: - task_spec += [('agent', PracticeInventoryManagement, {'space': space, 'num_tick': num_tick}) + task_spec += [TaskSpec(eval_fn=PracticeInventoryManagement, + eval_fn_kwargs={'space': space, 'num_tick': num_tick}) for num_tick in STAY_ALIVE_GOAL] # own item, evaluated on the current inventory @@ -166,27 +183,32 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', bp.OwnItem, - {'item': item, 'level': level, 'quantity': quantity})) - + task_spec.append(TaskSpec(eval_fn=OwnItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + sampling_weight=4-level if level < 4 else 1)) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', bp.OwnItem, - {'item': item, 'level': level, 'quantity': quantity})) + task_spec.append(TaskSpec(eval_fn=OwnItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + reward_to='team')) # equip item, evaluated on the current inventory and equipment status for item in EQUIP_ITEM: for level in LEVEL_GOAL: # agent task - task_spec.append(('agent', bp.EquipItem, - {'item': item, 'level': level, 'num_agent': 1})) - + task_spec.append(TaskSpec(eval_fn=EquipItem, + eval_fn_kwargs={'item': item, 'level': level, 'num_agent': 1}, + sampling_weight=4-level if level < 4 else 1)) # team task for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', bp.EquipItem, - {'item': item, 'level': level, 'num_agent': num_agent})) + task_spec.append(TaskSpec(eval_fn=EquipItem, + eval_fn_kwargs={'item': item, 'level': level, + 'num_agent': num_agent}, + reward_to='team')) # consume items (ration, potion), evaluated based on the event log for item in c.consumables: @@ -194,14 +216,17 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', bp.ConsumeItem, - {'item': item, 'level': level, 'quantity': quantity})) - + task_spec.append(TaskSpec(eval_fn=ConsumeItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + sampling_weight=4-level if level < 4 else 1)) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', bp.ConsumeItem, - {'item': item, 'level': level, 'quantity': quantity})) + task_spec.append(TaskSpec(eval_fn=ConsumeItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + reward_to='team')) # harvest items, evaluated based on the event log for item in HARVEST_ITEM: @@ -209,14 +234,17 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', bp.HarvestItem, - {'item': item, 'level': level, 'quantity': quantity})) - + task_spec.append(TaskSpec(eval_fn=HarvestItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + sampling_weight=4-level if level < 4 else 1)) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', bp.HarvestItem, - {'item': item, 'level': level, 'quantity': quantity})) + task_spec.append(TaskSpec(eval_fn=HarvestItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + reward_to='team')) # list items, evaluated based on the event log for item in ALL_ITEM: @@ -224,14 +252,17 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', bp.ListItem, - {'item': item, 'level': level, 'quantity': quantity})) - + task_spec.append(TaskSpec(eval_fn=ListItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + sampling_weight=4-level if level < 4 else 1)) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', bp.ListItem, - {'item': item, 'level': level, 'quantity': quantity})) + task_spec.append(TaskSpec(eval_fn=ListItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + reward_to='team')) # buy items, evaluated based on the event log for item in ALL_ITEM: @@ -239,31 +270,35 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', bp.BuyItem, - {'item': item, 'level': level, 'quantity': quantity})) - + task_spec.append(TaskSpec(eval_fn=BuyItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + sampling_weight=4-level if level < 4 else 1)) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', bp.BuyItem, - {'item': item, 'level': level, 'quantity': quantity})) + task_spec.append(TaskSpec(eval_fn=BuyItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + reward_to='team')) # fully armed, evaluated based on the current player/inventory status for style in COMBAT_STYLE: for level in LEVEL_GOAL: for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', bp.FullyArmed, - {'combat_style': style, 'level': level, 'num_agent': num_agent})) + task_spec.append(TaskSpec(eval_fn=FullyArmed, + eval_fn_kwargs={'combat_style': style, 'level': level, + 'num_agent': num_agent}, + reward_to='team')) if __name__ == '__main__': - # pylint: disable=bare-except import psutil from contextlib import contextmanager import multiprocessing as mp import numpy as np - import pickle + import dill @contextmanager def create_pool(num_proc): @@ -272,33 +307,20 @@ def create_pool(num_proc): pool.close() pool.join() - def check_task_spec(spec_list): - teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} - config = nmmo.config.Default() - env = nmmo.Env(config) - for idx, single_spec in enumerate(spec_list): - # pylint: disable=cell-var-from-loop - test_task = make_team_tasks(teams, [single_spec]) - try: - env.reset(make_task_fn=lambda: test_task) - for _ in range(3): - env.step({}) - except: - print('invalid task spec:', single_spec) - - if idx > 0 and idx % 50 == 0: - print(idx, 'task specs checked.') - - # 3590 task specs: divide the specs into chunks - num_cores = psutil.cpu_count(logical=False) - spec_chunks = np.array_split(task_spec, num_cores) - with create_pool(num_cores) as pool: - pool.map(check_task_spec, spec_chunks) - - # print(sample_task[0].name) - # if len(sample_task) > 1: - # print(sample_task[-1].name) + # 3495 task specs: divide the specs into chunks + num_workers = round(psutil.cpu_count(logical=False)*0.7) + spec_chunks = np.array_split(task_spec, num_workers) + with create_pool(num_workers) as pool: + chunk_results = pool.map(check_task_spec, spec_chunks) + + num_error = 0 + for results in chunk_results: + for result in results: + if result["runnable"] is False: + print("ERROR: ", result["spec_name"]) + num_error += 1 + print("Total number of errors: ", num_error) # test if the task spec is pickalable - with open('manual_curriculum.pkl', 'wb') as f: - pickle.dump(task_spec, f) + with open('sample_curriculum.pkl', 'wb') as f: + dill.dump(task_spec, f) diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index f2f61f0e3..90bff5a92 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -36,6 +36,7 @@ def _get_taskenv(self, config.PLAYERS = [Sleeper] config.PLAYER_N = NUM_AGENT config.IMMORTAL = True + config.ALLOW_MULTI_TASKS_PER_AGENT = True # OngoingTask keeps evaluating and returns progress as the reward # vs. Task stops evaluating once the task is completed, returns reward = delta(progress) @@ -126,8 +127,6 @@ def test_tickge_stay_alive_rip(self): # make sure that dead players not in the realm nor the datastore self.assertTrue(ent_id not in env.realm.players) self.assertTrue(ent_id not in entities) - # CHECK ME: dead agents are also not in infos - self.assertTrue(ent_id not in infos) # TickGE_5 is true. Agents 1-3 are dead, so # StayAlive(1,3) and StayAlive(3,4) are false, StayAlive(4) is true @@ -253,8 +252,9 @@ def test_occupy_tile(self): env = self._get_taskenv(test_preds, grass_map=True) # All agents to one corner + BORDER = env.config.MAP_BORDER for ent_id in env.realm.players: - change_agent_pos(env.realm,ent_id,(0,0)) + change_agent_pos(env.realm,ent_id,(BORDER,BORDER)) env.obs = env._compute_observations() _, _, _, infos = env.step({}) diff --git a/tests/task/test_sample_task_from_file.py b/tests/task/test_sample_task_from_file.py new file mode 100644 index 000000000..3b5053d6b --- /dev/null +++ b/tests/task/test_sample_task_from_file.py @@ -0,0 +1,23 @@ +import unittest + +import nmmo +from tests.testhelpers import ScriptedAgentTestConfig + +class TestSampleTaskFromFile(unittest.TestCase): + def test_sample_task_from_file(self): + # init the env with the pickled training task spec + config = ScriptedAgentTestConfig() + config.CURRICULUM_FILE_PATH = 'tests/task/sample_curriculum.pkl' + env = nmmo.Env(config) + + # env.reset() samples and instantiates a task for each agent + # when sample_traning_tasks is set True + env.reset() + + self.assertEqual(len(env.possible_agents), len(env.tasks)) + # for the training tasks, the task assignee and subject should be the same + for task in env.tasks: + self.assertEqual(task.assignee, task.subject) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 76356b2f7..8f8322a4b 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -1,14 +1,18 @@ # pylint: disable=unused-argument,invalid-name import unittest from types import FunctionType +import numpy as np import nmmo from nmmo.core.env import Env from nmmo.task.predicate_api import make_predicate, Predicate -from nmmo.task.task_api import Task, make_team_tasks +from nmmo.task.task_api import Task, OngoingTask, HoldDurationTask +from nmmo.task.task_spec import TaskSpec, make_task_from_spec from nmmo.task.group import Group -from nmmo.task.constraint import InvalidConstraint, ScalarConstraint -from nmmo.task.base_predicates import TickGE, CanSeeGroup, AllMembersWithinRange +from nmmo.task.constraint import ScalarConstraint, GroupConstraint, AGENT_LIST_CONSTRAINT +from nmmo.task.base_predicates import ( + TickGE, CanSeeGroup, AllMembersWithinRange, StayAlive, HoardGold +) from nmmo.systems import item as Item from nmmo.core import action as Action @@ -31,9 +35,13 @@ class MockGameState(): def __init__(self): # pylint: disable=super-init-not-called self.config = nmmo.config.Default() + self.current_tick = -1 self.cache_result = {} self.get_subject_view = lambda _: None + def clear_cache(self): + pass + class TestTaskAPI(unittest.TestCase): def test_predicate_operators(self): # pylint: disable=unsupported-binary-operation,invalid-unary-operand-type @@ -57,21 +65,45 @@ def test_predicate_operators(self): # NOTE: only the instantiated predicate can be used with operators like below mock_gs = MockGameState() + # get the individual predicate"s source code + self.assertEqual(SUCCESS.get_source_code(), + "def Success(gs, subject: Group):\n return True") + self.assertEqual(FAILURE.get_source_code(), + "def Failure(gs, subject: Group):\n return False") + # AND (&), OR (|), NOT (~) pred1 = SUCCESS & FAILURE self.assertFalse(pred1(mock_gs)) + # NOTE: get_source_code() of the combined predicates returns the joined str + # of each predicate"s source code, which may NOT represent what the actual + # predicate is doing + self.assertEqual(pred1.get_source_code(), + "def Success(gs, subject: Group):\n return True\n\n"+ + "def Failure(gs, subject: Group):\n return False") pred2 = SUCCESS | FAILURE | SUCCESS self.assertTrue(pred2(mock_gs)) + self.assertEqual(pred2.get_source_code(), + "def Success(gs, subject: Group):\n return True\n\n"+ + "def Failure(gs, subject: Group):\n return False\n\n"+ + "def Success(gs, subject: Group):\n return True") pred3 = SUCCESS & ~ FAILURE & SUCCESS self.assertTrue(pred3(mock_gs)) + # NOTE: demonstrating the above point -- it just returns the functions + # NOT what this predicate actually evaluates. + self.assertEqual(pred2.get_source_code(), + pred3.get_source_code()) # predicate math pred4 = 0.1 * SUCCESS + 0.3 self.assertEqual(pred4(mock_gs), 0.4) self.assertEqual(pred4.name, "(ADD_(MUL_(Success_(0,))_0.1)_0.3)") + # NOTE: demonstrating the above point again, -- it just returns the functions + # NOT what this predicate actually evaluates. + self.assertEqual(pred4.get_source_code(), + "def Success(gs, subject: Group):\n return True") pred5 = 0.3 * SUCCESS - 1 self.assertEqual(pred5(mock_gs), 0.0) # cannot go below 0 @@ -82,11 +114,11 @@ def test_predicate_operators(self): def test_team_assignment(self): team = Group([1, 2, 8, 9], "TeamFoo") - self.assertEqual(team.name, 'TeamFoo') + self.assertEqual(team.name, "TeamFoo") self.assertEqual(team[2].name, "TeamFoo.2") self.assertEqual(team[2], (8,)) - # don't allow member of one-member team + # don"t allow member of one-member team self.assertEqual(team[2][0].name, team[2].name) def test_predicate_name(self): @@ -106,28 +138,12 @@ def test_predicate_name(self): "(SUB_(ADD_(MUL_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))_0.3)_0.4))") def test_constraint(self): - # pylint: disable=not-callable,no-value-for-parameter - # define predicate classes from functions - - # make predicate class from function - success_pred_cls = make_predicate(Success) - tickge_pred_cls = make_predicate(TickGE) - self.assertTrue(isinstance(TickGE, FunctionType)) - mock_gs = MockGameState() - good = success_pred_cls(Group(0)) - bad = success_pred_cls(Group(99999)) - good(mock_gs) - self.assertRaises(InvalidConstraint,lambda: bad(mock_gs)) - scalar = ScalarConstraint(low=-10,high=10) for _ in range(10): self.assertTrue(scalar.sample(mock_gs.config)<10) self.assertTrue(scalar.sample(mock_gs.config)>=-10) - bad = tickge_pred_cls(Group(0), -1) - self.assertRaises(InvalidConstraint, lambda: bad(mock_gs)) - def test_sample_predicate(self): # pylint: disable=no-value-for-parameter,expression-not-assigned # make predicate class from function @@ -136,7 +152,8 @@ def test_sample_predicate(self): # if the predicate class is instantiated without the subject, mock_gs = MockGameState() - predicate = canseegrp_pred_cls() & tickge_pred_cls() + predicate = canseegrp_pred_cls(subject=GroupConstraint, target=AGENT_LIST_CONSTRAINT) &\ + tickge_pred_cls(subject=GroupConstraint, num_tick=ScalarConstraint) self.assertEqual(predicate.name, "(AND_(CanSeeGroup_subject:GroupConstraint_target:AgentListConstraint)_"+\ "(TickGE_subject:GroupConstraint_num_tick:ScalarConstraint))") @@ -157,16 +174,30 @@ def test_task_api_with_predicate(self): fake_pred_cls = make_predicate(Fake) mock_gs = MockGameState() - predicate = fake_pred_cls(Group(2), 1, Item.Hat, Action.Melee) + group = Group(2) + item = Item.Hat + action = Action.Melee + predicate = fake_pred_cls(group, a=1, b=item, c=action) + self.assertEqual(predicate.get_source_code(), + "def Fake(gs, subject, a,b,c):\n return False") + self.assertEqual(predicate.get_signature(), ["gs", "subject", "a", "b", "c"]) + self.assertEqual(predicate.args, [group]) + self.assertDictEqual(predicate.kwargs, {"a": 1, "b": item, "c": action}) + assignee = [1,2,3] # list of agent ids task = predicate.create_task(assignee=assignee) rewards, infos = task.compute_rewards(mock_gs) self.assertEqual(task.name, # contains predicate name and assignee list - "(Task_eval_fn:(Fake_(2,)_1_Hat_Melee)_assignee:(1,2,3))") + "(Task_eval_fn:(Fake_(2,)_a:1_b:Hat_c:Melee)_assignee:(1,2,3))") + self.assertEqual(task.get_source_code(), + "def Fake(gs, subject, a,b,c):\n return False") + self.assertEqual(task.get_signature(), ["gs", "subject", "a", "b", "c"]) + self.assertEqual(task.args, [group]) + self.assertDictEqual(task.kwargs, {"a": 1, "b": item, "c": action}) for agent_id in assignee: self.assertEqual(rewards[agent_id], 0) - self.assertEqual(infos[agent_id]['progress'], 0) # progress (False -> 0) + self.assertEqual(infos[agent_id]["progress"], 0) # progress (False -> 0) self.assertFalse(task.completed) def test_task_api_with_function(self): @@ -182,9 +213,17 @@ def is_agent_1(gs): self.assertEqual(task.name, # contains predicate name and assignee list "(Task_eval_fn:is_agent_1_assignee:(1,2,3))") + self.assertEqual(task.get_source_code(), + "def is_agent_1(gs):\n " + + "return any(agent_id == 1 for agent_id in subject.agents)") + self.assertEqual(task.get_signature(), ["gs"]) + self.assertEqual(task.args, []) + self.assertDictEqual(task.kwargs, {}) + self.assertEqual(task.subject, tuple(assignee)) + self.assertEqual(task.assignee, tuple(assignee)) for agent_id in assignee: self.assertEqual(rewards[agent_id], 1) - self.assertEqual(infos[agent_id]['progress'], 1) # progress (True -> 1) + self.assertEqual(infos[agent_id]["progress"], 1) # progress (True -> 1) self.assertTrue(task.completed) def test_predicate_fn_using_other_predicate_fn(self): @@ -194,19 +233,41 @@ def PracticeFormation(gs, subject, dist, num_tick): # team should stay together within 1 tile for 10 ticks goal_tick = 10 - task_spec = ('team', PracticeFormation, {'dist': 1, 'num_tick': goal_tick}) + task_spec = TaskSpec(eval_fn=PracticeFormation, + eval_fn_kwargs={"dist": 1, "num_tick": goal_tick}, + reward_to="team") # create the test task from the task spec - teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} + teams = {1:[1,2,3], 3:[4,5], 6:[6,7], 9:[8,9], 14:[10,11]} + team_ids= list(teams.keys()) config = ScriptedAgentTestConfig() config.PLAYERS =[Sleeper] config.IMMORTAL = True env = Env(config) - env.reset(make_task_fn=lambda: make_team_tasks(teams, [task_spec])) - - # move agent 2, 3 to agent 1's pos + env.reset(make_task_fn=lambda: make_task_from_spec(teams, [task_spec])) + + # check the task information + task = env.tasks[0] + self.assertEqual(task.name, + "(Task_eval_fn:(PracticeFormation_(1,2,3)_dist:1_num_tick:10)"+ + "_assignee:(1,2,3))") + self.assertEqual(task.get_source_code(), + "def PracticeFormation(gs, subject, dist, num_tick):\n "+ + "return AllMembersWithinRange(gs, subject, dist) * "+ + "TickGE(gs, subject, num_tick)") + self.assertEqual(task.get_signature(), ["gs", "subject", "dist", "num_tick"]) + self.assertEqual(task.subject, tuple(teams[team_ids[0]])) + self.assertEqual(task.kwargs, task_spec.eval_fn_kwargs) + self.assertEqual(task.assignee, tuple(teams[team_ids[0]])) + + # check the agent-task map + for agent_id, agent_tasks in env.agent_task_map.items(): + for task in agent_tasks: + self.assertTrue(agent_id in task.assignee) + + # move agent 2, 3 to agent 1"s pos for agent_id in [2,3]: change_spawn_pos(env.realm, agent_id, env.realm.players[1].pos) @@ -215,18 +276,47 @@ def PracticeFormation(gs, subject, dist, num_tick): _, rewards, _, infos = env.step({}) if tick < 10: - self.assertAlmostEqual(rewards[1], 1/goal_tick) + target_reward = 1.0 if env.realm.tick == goal_tick else 1/goal_tick + self.assertAlmostEqual(rewards[1], target_reward) self.assertAlmostEqual((1+tick)/goal_tick, - infos[1]['task'][env.tasks[0].name]['progress']) + infos[1]["task"][env.tasks[0].name]["progress"]) else: # tick 11, task should be completed self.assertEqual(rewards[1], 0) - self.assertEqual(infos[1]['task'][env.tasks[0].name]['progress'], 1) - self.assertEqual(infos[1]['task'][env.tasks[0].name]['completed'], True) + self.assertEqual(infos[1]["task"][env.tasks[0].name]["progress"], 1) + self.assertEqual(infos[1]["task"][env.tasks[0].name]["completed"], True) + + # test the task_spec_with_embedding + task_embedding = np.ones(config.TASK_EMBED_DIM, dtype=np.float16) + task_spec_with_embedding = TaskSpec(eval_fn=PracticeFormation, + eval_fn_kwargs={"dist": 1, "num_tick": goal_tick}, + reward_to="team", + embedding=task_embedding) + env.reset(make_task_fn=lambda: make_task_from_spec(teams, [task_spec_with_embedding])) + + task = env.tasks[0] + self.assertEqual(task.spec_name, # without the subject and assignee agent ids + "Task_PracticeFormation_(dist:1_num_tick:10)_reward_to:team") + self.assertEqual(task.name, + "(Task_eval_fn:(PracticeFormation_(1,2,3)_dist:1_num_tick:10)"+ + "_assignee:(1,2,3))") + self.assertEqual(task.get_source_code(), + "def PracticeFormation(gs, subject, dist, num_tick):\n "+ + "return AllMembersWithinRange(gs, subject, dist) * "+ + "TickGE(gs, subject, num_tick)") + self.assertEqual(task.get_signature(), ["gs", "subject", "dist", "num_tick"]) + self.assertEqual(task.subject, tuple(teams[team_ids[0]])) + self.assertEqual(task.kwargs, task_spec.eval_fn_kwargs) + self.assertEqual(task.assignee, tuple(teams[team_ids[0]])) + self.assertTrue(np.array_equal(task.embedding, task_embedding)) + + obs_spec = env.observation_space(1) + self.assertTrue(obs_spec["Task"].contains(task.embedding)) def test_completed_tasks_in_info(self): # pylint: disable=no-value-for-parameter,no-member config = ScriptedAgentTestConfig() + config.ALLOW_MULTI_TASKS_PER_AGENT = True env = Env(config) # make predicate class from function @@ -250,26 +340,143 @@ def test_completed_tasks_in_info(self): _, _, _, infos = env.step({}) # agent 1: assigned only task 1, which is always True - self.assertEqual(infos[1]['task'][env.tasks[0].name]['reward'], 1.0) + self.assertEqual(infos[1]["task"][env.tasks[0].name]["reward"], 1.0) for i in [1, 2]: # task 2 and 3 - self.assertTrue(env.tasks[i].name not in infos[1]['task']) + self.assertTrue(env.tasks[i].name not in infos[1]["task"]) # agent 2: assigned task 2 (Failure) and task 4 (Success) - self.assertEqual(infos[2]['task'][env.tasks[1].name]['reward'], 0.0) # task 2 - self.assertEqual(infos[2]['task'][env.tasks[3].name]['reward'], 1.0) # task 4 + self.assertEqual(infos[2]["task"][env.tasks[1].name]["reward"], 0.0) # task 2 + self.assertEqual(infos[2]["task"][env.tasks[3].name]["reward"], 1.0) # task 4 # agent 3 assigned task 3, Fake(), which is always False (0) - self.assertEqual(infos[3]['task'][env.tasks[2].name]['reward'], 0.0) # task 3 + self.assertEqual(infos[3]["task"][env.tasks[2].name]["reward"], 0.0) # task 3 # all agents in the same team with agent 2 have SUCCESS - # other agents don't have any tasks assigned + # other agents don"t have any tasks assigned for ent_id in env.possible_agents: if ent_id in same_team: - self.assertEqual(infos[ent_id]['task'][env.tasks[3].name]['reward'], 1.0) + self.assertEqual(infos[ent_id]["task"][env.tasks[3].name]["reward"], 1.0) else: - self.assertTrue(env.tasks[3].name not in infos[ent_id]['task']) + self.assertTrue(env.tasks[3].name not in infos[ent_id]["task"]) # DONE -if __name__ == '__main__': + def test_make_task_from_spec(self): + teams = {0:[1,2,3], 1:[4,5,6]} + test_embedding = np.array([1,2,3]) + task_spec = [ + TaskSpec(eval_fn=TickGE, eval_fn_kwargs={"num_tick": 20}), + TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={}, task_cls=OngoingTask), + TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={"target": "my_team_leader"}, + task_cls=OngoingTask, reward_to="team"), + TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={"target": "left_team"}, + task_cls=OngoingTask, task_kwargs={"reward_multiplier": 2}, + reward_to="team", embedding=test_embedding), + ] + + task_list = [] + # testing each task spec, individually + for single_spec in task_spec: + task_list.append(make_task_from_spec(teams, [single_spec])) + + # check the task spec names + self.assertEqual(task_list[0][0].spec_name, + "Task_TickGE_(num_tick:20)_reward_to:agent") + self.assertEqual(task_list[1][0].spec_name, + "OngoingTask_StayAlive_()_reward_to:agent") + self.assertEqual(task_list[2][0].spec_name, + "OngoingTask_StayAlive_(target:my_team_leader)_reward_to:team") + self.assertEqual(task_list[3][0].spec_name, + "OngoingTask_StayAlive_(target:left_team)_reward_to:team") + + # check the task names + self.assertEqual(task_list[0][0].name, + "(Task_eval_fn:(TickGE_(1,)_num_tick:20)_assignee:(1,))") + self.assertEqual(task_list[1][0].name, + "(OngoingTask_eval_fn:(StayAlive_(1,))_assignee:(1,))") + self.assertEqual(task_list[2][0].name, + "(OngoingTask_eval_fn:(StayAlive_(1,))_assignee:(1,2,3))") + self.assertEqual(task_list[3][0].name, + "(OngoingTask_eval_fn:(StayAlive_(4,5,6))_assignee:(1,2,3))") + self.assertEqual(task_list[3][0].reward_multiplier, 2) + self.assertTrue(np.array_equal(task_list[3][0].embedding, np.array([1,2,3]))) + + def test_hold_duration_task(self): + # pylint: disable=protected-access + # each agent should hoard gold for 10 ticks + goal_tick = goal_gold = 10 + task_spec = [TaskSpec(eval_fn=HoardGold, + eval_fn_kwargs={"amount": goal_gold}, + task_cls=HoldDurationTask, + task_kwargs={"hold_duration": goal_tick})] * 3 + + config = ScriptedAgentTestConfig() + config.PLAYERS =[Sleeper] + config.IMMORTAL = True + + teams = {id: [id] for id in range(1,4)} + env = Env(config) + env.reset(make_task_fn=lambda: make_task_from_spec(teams, task_spec)) + + # give agent 1, 2 enough gold + for agent_id in [1,2]: + env.realm.players[agent_id].gold.update(goal_gold+1) + + for _ in range(5): + env.step({}) + + # check the task information + self.assertEqual(env.tasks[0].spec_name, + "HoldDurationTask_HoardGold_(amount:10)_reward_to:agent") + for idx in [0, 1]: + self.assertEqual(env.tasks[idx]._progress, 0.5) # agent 1 & 2 has enough gold + self.assertEqual(env.tasks[idx]._max_progress, 0.5) + self.assertEqual(env.tasks[idx].reward_signal_count, 5) + self.assertTrue(env.tasks[2]._progress == 0.0) # agent 3 has no gold + for task in env.tasks: + self.assertTrue(task.completed is False) # not completed yet + + # take away gold from agent 2 + env.realm.players[2].gold.update(goal_gold-1) + + env.step({}) + self.assertEqual(env.tasks[0]._progress, 0.6) # agent 1 has enough gold + self.assertEqual(env.tasks[0]._max_progress, 0.6) + self.assertEqual(env.tasks[0].reward_signal_count, 6) + self.assertEqual(env.tasks[1]._progress, 0) # agent 2 has not enough gold + self.assertEqual(env.tasks[1]._max_progress, 0.5) # max values are preserved + self.assertEqual(env.tasks[1]._positive_reward_count, 5) + self.assertEqual(env.tasks[1].reward_signal_count, 6) # 5 positive + 1 negative + + for _ in range(4): + env.step({}) + + # only agent 1 successfully held 10 gold for 10 ticks + self.assertTrue(env.tasks[0].completed is True) + self.assertTrue(env.tasks[1].completed is False) + self.assertTrue(env.tasks[2].completed is False) + + def test_task_spec_with_predicate(self): + teams = {0:[1,2,3], 1:[4,5,6]} + SUCCESS = make_predicate(Success)(Group(1)) + FAILURE = make_predicate(Failure)(Group([2,3])) + predicate = SUCCESS & FAILURE + predicate.name = "SuccessAndFailure" + + # make task spec + task_spec = [TaskSpec(predicate=predicate, + eval_fn=None, + eval_fn_kwargs={"success_target": 1, + "test_item": Item.Hat})] + tasks = make_task_from_spec(teams, task_spec) + + env = Env(ScriptedAgentTestConfig()) + env.reset(make_task_fn=lambda: tasks) + env.step({}) + + # check the task information + self.assertEqual(env.tasks[0].spec_name, + "Task_SuccessAndFailure_(success_target:1_test_item:Hat)_reward_to:agent") + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_determinism.py b/tests/test_determinism.py index 9d4ca733f..e84b0bc4e 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -1,75 +1,92 @@ -#from pdb import set_trace as T import unittest - -import logging -import random +from timeit import timeit +import numpy as np from tqdm import tqdm +import nmmo +from nmmo.lib import seeding from tests.testhelpers import ScriptedAgentTestConfig, ScriptedAgentTestEnv -from tests.testhelpers import observations_are_equal, actions_are_equal +from tests.testhelpers import observations_are_equal # 30 seems to be enough to test variety of agent actions TEST_HORIZON = 30 -RANDOM_SEED = random.randint(0, 10000) +RANDOM_SEED = np.random.randint(0, 100000) class TestDeterminism(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.horizon = TEST_HORIZON - cls.rand_seed = RANDOM_SEED - cls.config = ScriptedAgentTestConfig() - env = ScriptedAgentTestEnv(cls.config) - - logging.info('TestDeterminism: Setting up the reference env with seed %s', str(cls.rand_seed)) - cls.init_obs_src = env.reset(seed=cls.rand_seed) - cls.actions_src = [] - logging.info('TestDeterminism: Running %s ticks', str(cls.horizon)) - for _ in tqdm(range(cls.horizon)): - nxt_obs_src, _, _, _ = env.step({}) - cls.actions_src.append(env.actions) - cls.final_obs_src = nxt_obs_src - npcs_src = {} - for nid, npc in list(env.realm.npcs.items()): - npcs_src[nid] = npc.packet() - cls.final_npcs_src = npcs_src - - logging.info('TestDeterminism: Setting up the replication env with seed %s', str(cls.rand_seed)) - cls.init_obs_rep = env.reset(seed=cls.rand_seed) - cls.actions_rep = [] - logging.info('TestDeterminism: Running %s ticks', str(cls.horizon)) - for _ in tqdm(range(cls.horizon)): - nxt_obs_rep, _, _, _ = env.step({}) - cls.actions_rep.append(env.actions) - cls.final_obs_rep = nxt_obs_rep - npcs_rep = {} - for nid, npc in list(env.realm.npcs.items()): - npcs_rep[nid] = npc.packet() - cls.final_npcs_rep = npcs_rep - - def test_func_are_observations_equal(self): - self.assertTrue(observations_are_equal(self.init_obs_src, self.init_obs_src)) - self.assertTrue(observations_are_equal(self.final_obs_src, self.final_obs_src)) - self.assertTrue(actions_are_equal(self.actions_src[0], self.actions_src[0])) - self.assertDictEqual(self.final_npcs_src, self.final_npcs_src) - - def test_compare_initial_observations(self): - # assertDictEqual CANNOT replace are_observations_equal - self.assertTrue(observations_are_equal(self.init_obs_src, self.init_obs_rep)) - #self.assertDictEqual(self.init_obs_src, self.init_obs_rep) - - def test_compare_actions(self): - self.assertEqual(len(self.actions_src), len(self.actions_rep)) - for t, action_src in enumerate(self.actions_src): - self.assertTrue(actions_are_equal(action_src, self.actions_rep[t])) - - def test_compare_final_observations(self): - # assertDictEqual CANNOT replace are_observations_equal - self.assertTrue(observations_are_equal(self.final_obs_src, self.final_obs_rep)) - #self.assertDictEqual(self.final_obs_src, self.final_obs_rep) - - def test_compare_final_npcs(self) : - self.assertDictEqual(self.final_npcs_src, self.final_npcs_rep) + def test_np_random_get_direction(self): + # pylint: disable=protected-access,bad-builtin,unnecessary-lambda + np_random_1, np_seed_1 = seeding.np_random(RANDOM_SEED) + np_random_2, np_seed_2 = seeding.np_random(RANDOM_SEED) + self.assertEqual(np_seed_1, np_seed_2) + + # also test get_direction, which was added for speed optimization + self.assertTrue(np.array_equal(np_random_1._dir_seq, np_random_2._dir_seq)) + + print('---test_np_random_get_direction---') + print('np_random.integers():', timeit(lambda: np_random_1.integers(0,4), + number=100000, globals=globals())) + print('np_random.get_direction():', timeit(lambda: np_random_1.get_direction(), + number=100000, globals=globals())) + + def test_map_determinism(self): + config = nmmo.config.Default() + config.MAP_FORCE_GENERATION = True + config.TERRAIN_FLIP_SEED = False + + map_generator = config.MAP_GENERATOR(config) + np_random1, _ = seeding.np_random(RANDOM_SEED) + np_random1_1, _ = seeding.np_random(RANDOM_SEED) + + terrain1, tiles1 = map_generator.generate_map(0, np_random1) + terrain1_1, tiles1_1 = map_generator.generate_map(0, np_random1_1) + + self.assertTrue(np.array_equal(terrain1, terrain1_1)) + self.assertTrue(np.array_equal(tiles1, tiles1_1)) + + # test flip seed + config2 = nmmo.config.Default() + config2.MAP_FORCE_GENERATION = True + config2.TERRAIN_FLIP_SEED = True + + map_generator2 = config2.MAP_GENERATOR(config2) + np_random2, _ = seeding.np_random(RANDOM_SEED) + terrain2, tiles2 = map_generator2.generate_map(0, np_random2) + + self.assertFalse(np.array_equal(terrain1, terrain2)) + self.assertFalse(np.array_equal(tiles1, tiles2)) + + def test_env_level_rng(self): + # two envs running independently should return the same results + + # config to always generate new maps, to test map determinism + config1 = ScriptedAgentTestConfig() + setattr(config1, 'MAP_FORCE_GENERATION', True) + setattr(config1, 'PATH_MAPS', 'maps/det1') + config2 = ScriptedAgentTestConfig() + setattr(config2, 'MAP_FORCE_GENERATION', True) + setattr(config2, 'PATH_MAPS', 'maps/det2') + + # to create the same maps, seed must be provided + env1 = ScriptedAgentTestEnv(config1, seed=RANDOM_SEED) + env2 = ScriptedAgentTestEnv(config2, seed=RANDOM_SEED) + envs = [env1, env2] + + init_obs = [env.reset(seed=RANDOM_SEED+1) for env in envs] + + self.assertTrue(observations_are_equal(init_obs[0], init_obs[0])) # sanity check + self.assertTrue(observations_are_equal(init_obs[0], init_obs[1]), + f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") + + for _ in tqdm(range(TEST_HORIZON)): + # step returns a tuple of (obs, rewards, dones, infos) + step_results = [env.step({}) for env in envs] + self.assertTrue(observations_are_equal(step_results[0][0], step_results[1][0]), + f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") + + event_logs = [env.realm.event_log.get_data() for env in envs] + self.assertTrue(np.array_equal(event_logs[0], event_logs[1]), + f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") if __name__ == '__main__': diff --git a/tests/test_deterministic_replay.py b/tests/test_deterministic_replay.py deleted file mode 100644 index a8ac69c63..000000000 --- a/tests/test_deterministic_replay.py +++ /dev/null @@ -1,169 +0,0 @@ -#from pdb import set_trace as T -import unittest - -import os -import glob -import pickle -import logging -import random -from typing import Any, Dict - -import numpy as np -from tqdm import tqdm - -from tests.testhelpers import ScriptedAgentTestConfig, ScriptedAgentTestEnv -from tests.testhelpers import observations_are_equal - -import nmmo - -TEST_HORIZON = 50 -LOCAL_REPLAY = 'tests/replay_local.pickle' - -def load_replay_file(replay_file): - # load the pickle file - with open(replay_file, 'rb') as handle: - ref_data = pickle.load(handle) - - logging.info('TestDetReplay: Loading the existing replay file with seed %s', - str(ref_data['seed'])) - - seed = ref_data['seed'] - config = ref_data['config'] - map_src = ref_data['map'] - init_obs = ref_data['init_obs'] - init_npcs = ref_data['init_npcs'] - med_obs = ref_data['med_obs'] - actions = ref_data['actions'] - final_obs = ref_data['final_obs'] - final_npcs = ref_data['final_npcs'] - - return seed, config, map_src, init_obs, init_npcs, med_obs, actions, final_obs, final_npcs - - -def make_actions_picklable(actions: Dict[int, Dict[str, Dict[str, Any]]]): - for eid in actions: - for atn, args in actions[eid].items(): - for arg, val in args.items(): - if arg == nmmo.action.Price and not isinstance(val, int): - # : - # convert Discrete_1 to 1 - actions[eid][atn][arg] = val.val - return actions - - -def generate_replay_file(replay_file, test_horizon): - # generate the new data with a new env - seed = random.randint(0, 10000) - logging.info('TestDetReplay: Creating a new replay file with seed %s', str(seed)) - config = ScriptedAgentTestConfig() - env_src = ScriptedAgentTestEnv(config, seed=seed) - init_obs = env_src.reset() - init_npcs = env_src.realm.npcs.packet - - # extract the map - map_src = np.zeros((config.MAP_SIZE, config.MAP_SIZE)) - for r in range(config.MAP_SIZE): - for c in range(config.MAP_SIZE): - map_src[r,c] = env_src.realm.map.tiles[r,c].material_id.val - - med_obs, actions = [], [] - logging.info('TestDetReplay: Running %s ticks', str(test_horizon)) - for _ in tqdm(range(test_horizon)): - nxt_obs, _, _, _ = env_src.step({}) - med_obs.append(nxt_obs) - actions.append(make_actions_picklable(env_src.actions)) - final_obs = nxt_obs - final_npcs = env_src.realm.npcs.packet - - # save to the file - with open(replay_file, 'wb') as handle: - ref_data = {} - ref_data['version'] = nmmo.__version__ # just in case - ref_data['seed'] = seed - ref_data['config'] = config - ref_data['map'] = map_src - ref_data['init_obs'] = init_obs - ref_data['init_npcs'] = init_npcs - ref_data['med_obs'] = med_obs - ref_data['actions'] = actions - ref_data['final_obs'] = final_obs - ref_data['final_npcs'] = final_npcs - - pickle.dump(ref_data, handle) - - return seed, config, map_src, init_obs, init_npcs, med_obs, actions, final_obs, final_npcs - - -class TestDeterministicReplay(unittest.TestCase): - - # CHECK ME: pausing the deterministic replay test while debugging actions/items - # because changes there would most likely to change the game play and make the test fail - __test__ = False - - @classmethod - def setUpClass(cls): - """ - First, check if there is a replay file on the repo that starts with 'replay_repo_' - If there is one, use it. - - Second, check if there a local replay file, which should be named 'replay_local.pickle' - If there is one, use it. If not create one. - - TODO: allow passing a different replay file - """ - # first, look for the repo replay file - replay_files = glob.glob(os.path.join('tests', 'replay_repo_*.pickle')) - if replay_files: - # there may be several, but we only take the first one [0] - cls.seed, cls.config, cls.map_src, cls.init_obs_src, cls.init_npcs_src, \ - cls.med_obs_src,cls.actions, cls.final_obs_src, cls.final_npcs_src = \ - load_replay_file(replay_files[0]) - else: - # if there is no repo replay file, then go with the default local file - if os.path.exists(LOCAL_REPLAY): - cls.seed, cls.config, cls.map_src, cls.init_obs_src, cls.init_npcs_src, \ - cls.med_obs_src, cls.actions, cls.final_obs_src, cls.final_npcs_src = \ - load_replay_file(LOCAL_REPLAY) - else: - cls.seed, cls.config, cls.map_src, cls.init_obs_src, cls.init_npcs_src, \ - cls.med_obs_src, cls.actions, cls.final_obs_src, cls.final_npcs_src = \ - generate_replay_file(LOCAL_REPLAY, TEST_HORIZON) - cls.horizon = len(cls.actions) - - logging.info('TestDetReplay: Setting up the replication env with seed %s', str(cls.seed)) - env_rep = ScriptedAgentTestEnv(cls.config, seed=cls.seed) - cls.init_obs_rep = env_rep.reset() - cls.init_npcs_rep = env_rep.realm.npcs.packet - - # extract the map - cls.map_rep = np.zeros((cls.config.MAP_SIZE, cls.config.MAP_SIZE)) - for r in range(cls.config.MAP_SIZE): - for c in range(cls.config.MAP_SIZE): - cls.map_rep[r,c] = env_rep.realm.map.tiles[r,c].material_id.val - - cls.med_obs_rep, cls.actions_rep = [], [] - logging.info('TestDetReplay: Running %s ticks', str(cls.horizon)) - for t in tqdm(range(cls.horizon)): - nxt_obs_rep, _, _, _ = env_rep.step(cls.actions[t]) - cls.med_obs_rep.append(nxt_obs_rep) - cls.final_obs_rep = nxt_obs_rep - cls.final_npcs_rep = env_rep.realm.npcs.packet - - def test_compare_maps(self): - self.assertEqual(np.sum(self.map_src != self.map_rep), 0) - - def test_compare_init_obs(self): - self.assertTrue(observations_are_equal(self.init_obs_src, self.init_obs_rep)) - - def test_compare_init_npcs(self): - self.assertTrue(observations_are_equal(self.init_npcs_src, self.init_npcs_rep)) - - def test_compare_final_obs(self): - self.assertTrue(observations_are_equal(self.final_obs_src, self.final_obs_rep)) - - def test_compare_final_npcs(self): - self.assertTrue(observations_are_equal(self.final_npcs_src, self.final_npcs_rep)) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_eventlog.py b/tests/test_eventlog.py index 50b7d6abb..c6322c52f 100644 --- a/tests/test_eventlog.py +++ b/tests/test_eventlog.py @@ -48,6 +48,7 @@ def test_event_logging(self): combat_style=Skill.Melee, damage=50) event_log.record(EventCode.PLAYER_KILL, MockEntity(3), target=MockEntity(5, attack_level=5)) + event_log.update() mock_realm.tick = 1 event_log.record(EventCode.CONSUME_ITEM, MockEntity(4), @@ -56,6 +57,7 @@ def test_event_logging(self): event_log.record(EventCode.DESTROY_ITEM, MockEntity(5)) event_log.record(EventCode.HARVEST_ITEM, MockEntity(6), item=Whetstone(mock_realm, 3)) + event_log.update() mock_realm.tick = 2 event_log.record(EventCode.GIVE_GOLD, MockEntity(7)) @@ -65,34 +67,54 @@ def test_event_logging(self): event_log.record(EventCode.BUY_ITEM, MockEntity(10), item=Whetstone(mock_realm, 7), price=21) #event_log.record(EventCode.SPEND_GOLD, env.realm.players[11], amount=25) + event_log.update() mock_realm.tick = 3 event_log.record(EventCode.LEVEL_UP, MockEntity(12), skill=Skill.Fishing, level=3) + event_log.update() mock_realm.tick = 4 event_log.record(EventCode.GO_FARTHEST, MockEntity(12), distance=6) event_log.record(EventCode.EQUIP_ITEM, MockEntity(12), item=Hat(mock_realm, 4)) + event_log.update() log_data = [list(row) for row in event_log.get_data()] - self.assertListEqual(log_data, [ - [ 1, 1, 1, EventCode.EAT_FOOD, 0, 0, 0, 0, 0], - [ 2, 2, 1, EventCode.DRINK_WATER, 0, 0, 0, 0, 0], - [ 3, 2, 1, EventCode.SCORE_HIT, 1, 0, 50, 0, 0], - [ 4, 3, 1, EventCode.PLAYER_KILL, 0, 5, 0, 0, 5], - [ 5, 4, 2, EventCode.CONSUME_ITEM, 16, 8, 1, 0, 0], - [ 6, 4, 2, EventCode.GIVE_ITEM, 0, 0, 0, 0, 0], - [ 7, 5, 2, EventCode.DESTROY_ITEM, 0, 0, 0, 0, 0], - [ 8, 6, 2, EventCode.HARVEST_ITEM, 13, 3, 1, 0, 0], - [ 9, 7, 3, EventCode.GIVE_GOLD, 0, 0, 0, 0, 0], - [10, 8, 3, EventCode.LIST_ITEM, 16, 5, 1, 11, 0], - [11, 9, 3, EventCode.EARN_GOLD, 0, 0, 0, 15, 0], - [12, 10, 3, EventCode.BUY_ITEM, 13, 7, 1, 21, 0], - [13, 12, 4, EventCode.LEVEL_UP, 4, 3, 0, 0, 0], - [14, 12, 5, EventCode.GO_FARTHEST, 0, 0, 6, 0, 0], - [15, 12, 5, EventCode.EQUIP_ITEM, 2, 4, 1, 0, 0]]) + [1, 1, 1, EventCode.EAT_FOOD, 0, 0, 0, 0, 0], + [1, 2, 1, EventCode.DRINK_WATER, 0, 0, 0, 0, 0], + [1, 2, 1, EventCode.SCORE_HIT, 1, 0, 50, 0, 0], + [1, 3, 1, EventCode.PLAYER_KILL, 0, 5, 0, 0, 5], + [1, 4, 2, EventCode.CONSUME_ITEM, 16, 8, 1, 0, 0], + [1, 4, 2, EventCode.GIVE_ITEM, 0, 0, 0, 0, 0], + [1, 5, 2, EventCode.DESTROY_ITEM, 0, 0, 0, 0, 0], + [1, 6, 2, EventCode.HARVEST_ITEM, 13, 3, 1, 0, 0], + [1, 7, 3, EventCode.GIVE_GOLD, 0, 0, 0, 0, 0], + [1, 8, 3, EventCode.LIST_ITEM, 16, 5, 1, 11, 0], + [1, 9, 3, EventCode.EARN_GOLD, 0, 0, 0, 15, 0], + [1, 10, 3, EventCode.BUY_ITEM, 13, 7, 1, 21, 0], + [1, 12, 4, EventCode.LEVEL_UP, 4, 3, 0, 0, 0], + [1, 12, 5, EventCode.GO_FARTHEST, 0, 0, 6, 0, 0], + [1, 12, 5, EventCode.EQUIP_ITEM, 2, 4, 1, 0, 0]]) + + log_by_tick = [list(row) for row in event_log.get_data(tick = 4)] + self.assertListEqual(log_by_tick, [ + [1, 12, 4, EventCode.LEVEL_UP, 4, 3, 0, 0, 0]]) + + log_by_event = [list(row) for row in event_log.get_data(event_code = EventCode.CONSUME_ITEM)] + self.assertListEqual(log_by_event, [ + [1, 4, 2, EventCode.CONSUME_ITEM, 16, 8, 1, 0, 0]]) + + log_by_tick_agent = [list(row) for row in \ + event_log.get_data(tick = 5, + agents = [12], + event_code = EventCode.EQUIP_ITEM)] + self.assertListEqual(log_by_tick_agent, [ + [1, 12, 5, EventCode.EQUIP_ITEM, 2, 4, 1, 0, 0]]) + + empty_log = event_log.get_data(tick = 10) + self.assertTrue(empty_log.shape[0] == 0) if __name__ == '__main__': unittest.main() diff --git a/tests/test_memory_usage.py b/tests/test_memory_usage.py new file mode 100644 index 000000000..ab9220526 --- /dev/null +++ b/tests/test_memory_usage.py @@ -0,0 +1,12 @@ +# pylint: disable=bad-builtin, unused-variable +import psutil + +import nmmo + +def test_memory_usage(): + env = nmmo.Env() + process = psutil.Process() + print("memory", process.memory_info().rss) + +if __name__ == '__main__': + test_memory_usage() diff --git a/tests/test_performance.py b/tests/test_performance.py index f27519e6c..ce9051a20 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -1,8 +1,16 @@ +# import time +import cProfile +import io +import pstats import nmmo from nmmo.core.config import (NPC, AllGameSystems, Combat, Communication, Equipment, Exchange, Item, Medium, Profession, Progression, Resource, Small, Terrain) +from nmmo.task.task_api import nmmo_default_task, make_same_task +from nmmo.task.base_predicates import CountEvent, FullyArmed +from nmmo.systems.skill import Melee +from tests.testhelpers import profile_env_step from scripted import baselines @@ -100,6 +108,36 @@ def test_fps_no_npc_med_100_pop(benchmark): def test_fps_all_med_100_pop(benchmark): benchmark_config(benchmark, Medium, 100, AllGameSystems) +def set_seed_test(): + random_seed = 5000 + conf = create_config(Medium, Terrain, Resource, Combat, NPC) + conf.PLAYER_N = 10 + conf.PLAYERS = [baselines.Random] + + env = nmmo.Env(conf) + + env.reset(seed=random_seed) + for _ in range(1024): + env.step({}) + +def set_seed_test_complex(): + tasks = nmmo_default_task(range(128)) + tasks += make_same_task(CountEvent, range(128), + pred_kwargs={'event': 'EAT_FOOD', 'N': 10}) + tasks += make_same_task(FullyArmed, range(128), + pred_kwargs={'combat_style': Melee, 'level': 3, 'num_agent': 1}) + profile_env_step(tasks=tasks) + +if __name__ == '__main__': + with open('profile.run','a', encoding="utf-8") as f: + pr = cProfile.Profile() + pr.enable() + set_seed_test_complex() + pr.disable() + s = io.StringIO() + ps = pstats.Stats(pr,stream=s).sort_stats('tottime') + ps.print_stats() + f.write(s.getvalue()) ''' def benchmark_env(benchmark, env, nent): diff --git a/tests/test_team_spawn.py b/tests/test_team_spawn.py deleted file mode 100644 index da279e97b..000000000 --- a/tests/test_team_spawn.py +++ /dev/null @@ -1,56 +0,0 @@ -import unittest - -import nmmo -from nmmo.core.agent import Agent -from nmmo.lib.team_helper import TeamHelper -from nmmo.lib import spawn - - -class TeamLoader(spawn.SequentialLoader): - def __init__(self, config, team_helper: TeamHelper): - assert config.PLAYERS == [Agent], \ - "TeamLoader only supports config.PLAYERS == [Agent]" - super().__init__(config) - self.team_helper = team_helper - - self.candidate_spawn_pos = \ - spawn.get_team_spawn_positions(config, team_helper.num_teams) - - def get_spawn_position(self, agent_id): - team_id, _ = self.team_helper.team_and_position_for_agent[agent_id] - return self.candidate_spawn_pos[team_id] - - -class TestTeamSpawn(unittest.TestCase): - def test_team_spawn(self): - num_teams = 16 - team_size = 8 - team_helper = TeamHelper({ - i: [i*team_size+j+1 for j in range(team_size)] - for i in range(num_teams)} - ) - - config = nmmo.config.Small() - config.PLAYER_N = num_teams * team_size - config.PLAYER_LOADER = lambda config: TeamLoader(config, team_helper) - - assert config.PLAYER_N == num_teams * team_size,\ - "config.PLAYER_N must be num_teams * team_size" - env = nmmo.Env(config) - env.reset() - - # agents in the same team should spawn together - team_locs = {} - for team_id, team_members in team_helper.teams.items(): - team_locs[team_id] = env.realm.players[team_members[0]].pos - for agent_id in team_members: - self.assertEqual(team_locs[team_id], env.realm.players[agent_id].pos) - - # teams should be apart from each other - for i in range(num_teams): - for j in range(i+1, num_teams): - self.assertNotEqual(team_locs[i], team_locs[j]) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 03ea20977..111bf2621 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -6,12 +6,12 @@ import numpy as np import nmmo - -from scripted import baselines -from nmmo.entity.entity import EntityState from nmmo.core import action from nmmo.systems import item as Item from nmmo.core.realm import Realm +from nmmo.lib import material as Material + +from scripted import baselines # this function can be replaced by assertDictEqual # but might be still useful for debugging @@ -51,6 +51,7 @@ def observations_are_equal(source_obs, target_obs, debug=True): keys_obs = list(target_obs.keys()) if keys_src != keys_obs: if debug: + #print("entities don't match") logging.error("entities don't match") return False @@ -59,6 +60,7 @@ def observations_are_equal(source_obs, target_obs, debug=True): ent_tgt = target_obs[k] if list(ent_src.keys()) != list(ent_tgt.keys()): if debug: + #print(f"entries don't match. key: {k}") logging.error("entries don't match. key: %s", str(k)) return False @@ -73,6 +75,7 @@ def observations_are_equal(source_obs, target_obs, debug=True): obj_tgt = ent_tgt[o] if np.sum(obj_src != obj_tgt) > 0: if debug: + #print(f"objects don't match. key: {k}, obj: {o}") logging.error("objects don't match. key: %s, obj: %s", str(k), str(o)) return False @@ -116,7 +119,6 @@ class ScriptedAgentTestConfig(nmmo.config.Small, nmmo.config.AllGameSystems): PLAYER_DEATH_FOG = 5 - SPECIALIZE = True PLAYERS = [ baselines.Fisher, baselines.Herbalist, baselines.Prospector,baselines.Carver, baselines.Alchemist, @@ -143,9 +145,6 @@ def __init__(self, config: nmmo.config.Config, seed=None): def reset(self, map_id=None, seed=None, options=None): self.actions = {} - # manually resetting the EntityState, ItemState datastore tables - EntityState.State.table(self.realm.datastore).reset() - Item.ItemState.State.table(self.realm.datastore).reset() return super().reset(map_id=map_id, seed=seed, options=options) def _compute_scripted_agent_actions(self, actions): @@ -226,9 +225,14 @@ def _make_item_sig(self): return item_sig - def _setup_env(self, random_seed, check_assert=True): + def _setup_env(self, random_seed, check_assert=True, remove_immunity=False): """ set up a new env and perform initial checks """ - env = ScriptedAgentTestEnv(self.config, seed=random_seed) + config = deepcopy(self.config) + + if remove_immunity: + config.COMBAT_SPAWN_IMMUNITY = 0 + + env = ScriptedAgentTestEnv(config, seed=random_seed) env.reset() # provide money for all @@ -248,6 +252,15 @@ def _setup_env(self, random_seed, check_assert=True): for ent_id, pos in self.spawn_locs.items(): change_spawn_pos(env.realm, ent_id, pos) + # Change entire map to grass to become habitable and non-harvestable + MS = env.config.MAP_SIZE + for i in range(MS): + for j in range(MS): + tile = env.realm.map.tiles[i,j] + tile.material = Material.Grass + tile.material_id.update(Material.Grass.index) + tile.state = Material.Grass(env.config) + env.obs = env._compute_observations() if check_assert: @@ -258,21 +271,21 @@ def _setup_env(self, random_seed, check_assert=True): def _check_ent_mask(self, ent_obs, atn, target_id): assert atn in [action.Give, action.GiveGold], "Invalid action" gym_obs = ent_obs.to_gym() - mask = gym_obs['ActionTargets'][atn][action.Target][:ent_obs.entities.len] > 0 + mask = gym_obs["ActionTargets"][atn.__name__]["Target"][:ent_obs.entities.len] > 0 return target_id in ent_obs.entities.ids[mask] def _check_inv_mask(self, ent_obs, atn, item_sig): assert atn in [action.Destroy, action.Give, action.Sell, action.Use], "Invalid action" gym_obs = ent_obs.to_gym() - mask = gym_obs['ActionTargets'][atn][action.InventoryItem][:ent_obs.inventory.len] > 0 + mask = gym_obs["ActionTargets"][atn.__name__]["InventoryItem"][:ent_obs.inventory.len] > 0 inv_idx = ent_obs.inventory.sig(*item_sig) return ent_obs.inventory.id(inv_idx) in ent_obs.inventory.ids[mask] def _check_mkt_mask(self, ent_obs, item_id): gym_obs = ent_obs.to_gym() - mask = gym_obs['ActionTargets'][action.Buy][action.MarketItem][:ent_obs.market.len] > 0 + mask = gym_obs["ActionTargets"]["Buy"]["MarketItem"][:ent_obs.market.len] > 0 return item_id in ent_obs.market.ids[mask] @@ -356,11 +369,12 @@ def _check_assert_make_action(self, env, atn, test_cond): if atn == action.Give: actions[ent_id] = { action.Give: { action.InventoryItem: env.obs[ent_id].inventory.sig(*cond['item_sig']), - action.Target: cond['tgt_id'] } } + action.Target: env.obs[ent_id].entities.index(cond['tgt_id']) } } elif atn == action.GiveGold: actions[ent_id] = { action.GiveGold: - { action.Target: cond['tgt_id'], action.Price: cond['gold'] } } + { action.Target: env.obs[ent_id].entities.index(cond['tgt_id']), + action.Price: action.Price.index(cond['gold']) } } elif atn == action.Buy: mkt_idx = ent_obs.market.index(cond['item_id']) @@ -374,21 +388,22 @@ def profile_env_step(action_target=True, tasks=None, condition=None): config.PLAYERS = [baselines.Sleeper] # the scripted agents doing nothing config.IMMORTAL = True # otherwise the agents will die config.PROVIDE_ACTION_TARGETS = action_target - env = nmmo.Env(config) + env = nmmo.Env(config, seed=0) if tasks is None: tasks = [] env.reset(seed=0, make_task_fn=lambda: tasks) for _ in range(3): env.step({}) - obs = env._compute_observations() + env.obs = env._compute_observations() + obs = deepcopy(env.obs) test_func = [ ('env.step({}):', lambda: env.step({})), ('env.realm.step():', lambda: env.realm.step({})), ('env._compute_observations():', lambda: env._compute_observations()), ('obs.to_gym(), ActionTarget:', lambda: {a: o.to_gym() for a,o in obs.items()}), - ('env._compute_rewards():', lambda: env._compute_rewards(obs.keys(), {})) + ('env._compute_rewards():', lambda: env._compute_rewards()) ] if condition: