From d4b7d20d9f611abcca0e7f3030099f3bc5aa464d Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 31 May 2023 00:42:46 +0000 Subject: [PATCH 001/113] added a test for realm-level rng --- tests/test_determinism.py | 121 +++++++++++++++++++------------------- tests/testhelpers.py | 8 +-- 2 files changed, 62 insertions(+), 67 deletions(-) diff --git a/tests/test_determinism.py b/tests/test_determinism.py index 9d4ca733f..87aacd358 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -1,75 +1,74 @@ -#from pdb import set_trace as T import unittest - -import logging import random +import numpy as np from tqdm import tqdm from tests.testhelpers import ScriptedAgentTestConfig, ScriptedAgentTestEnv -from tests.testhelpers import observations_are_equal, actions_are_equal +from tests.testhelpers import observations_are_equal # 30 seems to be enough to test variety of agent actions TEST_HORIZON = 30 -RANDOM_SEED = random.randint(0, 10000) +RANDOM_SEED = random.randint(0, 100000) + +def rollout_with_seed(env, seed): + init_obs = env.reset(seed=seed) + for _ in tqdm(range(TEST_HORIZON)): + obs, _, _, _ = env.step({}) + event_log = env.realm.event_log.get_data() + + return init_obs, obs, event_log class TestDeterminism(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.horizon = TEST_HORIZON - cls.rand_seed = RANDOM_SEED - cls.config = ScriptedAgentTestConfig() - env = ScriptedAgentTestEnv(cls.config) - - logging.info('TestDeterminism: Setting up the reference env with seed %s', str(cls.rand_seed)) - cls.init_obs_src = env.reset(seed=cls.rand_seed) - cls.actions_src = [] - logging.info('TestDeterminism: Running %s ticks', str(cls.horizon)) - for _ in tqdm(range(cls.horizon)): - nxt_obs_src, _, _, _ = env.step({}) - cls.actions_src.append(env.actions) - cls.final_obs_src = nxt_obs_src - npcs_src = {} - for nid, npc in list(env.realm.npcs.items()): - npcs_src[nid] = npc.packet() - cls.final_npcs_src = npcs_src - - logging.info('TestDeterminism: Setting up the replication env with seed %s', str(cls.rand_seed)) - cls.init_obs_rep = env.reset(seed=cls.rand_seed) - cls.actions_rep = [] - logging.info('TestDeterminism: Running %s ticks', str(cls.horizon)) - for _ in tqdm(range(cls.horizon)): - nxt_obs_rep, _, _, _ = env.step({}) - cls.actions_rep.append(env.actions) - cls.final_obs_rep = nxt_obs_rep - npcs_rep = {} - for nid, npc in list(env.realm.npcs.items()): - npcs_rep[nid] = npc.packet() - cls.final_npcs_rep = npcs_rep - - def test_func_are_observations_equal(self): - self.assertTrue(observations_are_equal(self.init_obs_src, self.init_obs_src)) - self.assertTrue(observations_are_equal(self.final_obs_src, self.final_obs_src)) - self.assertTrue(actions_are_equal(self.actions_src[0], self.actions_src[0])) - self.assertDictEqual(self.final_npcs_src, self.final_npcs_src) - - def test_compare_initial_observations(self): - # assertDictEqual CANNOT replace are_observations_equal - self.assertTrue(observations_are_equal(self.init_obs_src, self.init_obs_rep)) - #self.assertDictEqual(self.init_obs_src, self.init_obs_rep) - - def test_compare_actions(self): - self.assertEqual(len(self.actions_src), len(self.actions_rep)) - for t, action_src in enumerate(self.actions_src): - self.assertTrue(actions_are_equal(action_src, self.actions_rep[t])) - - def test_compare_final_observations(self): - # assertDictEqual CANNOT replace are_observations_equal - self.assertTrue(observations_are_equal(self.final_obs_src, self.final_obs_rep)) - #self.assertDictEqual(self.final_obs_src, self.final_obs_rep) - - def test_compare_final_npcs(self) : - self.assertDictEqual(self.final_npcs_src, self.final_npcs_rep) + def test_single_proc(self): + config = ScriptedAgentTestConfig() + env = ScriptedAgentTestEnv(config) + + # the source run + init_obs_src, final_obs_src, event_log_src = rollout_with_seed(env, RANDOM_SEED) + + # the replication run + init_obs_rep, final_obs_rep, event_log_rep = rollout_with_seed(env, RANDOM_SEED) + + # sanity checks + self.assertTrue(observations_are_equal(init_obs_src, init_obs_src)) + self.assertTrue(observations_are_equal(final_obs_src, final_obs_src)) + + # pylint: disable=expression-not-assigned + # compare the source and replication + self.assertTrue(observations_are_equal(init_obs_src, init_obs_rep)),\ + f"The determinism test failed. Seed: {RANDOM_SEED}." + self.assertTrue(observations_are_equal(final_obs_src, final_obs_rep)),\ + f"The determinism test failed. Seed: {RANDOM_SEED}." # after 30 runs + assert np.array_equal(event_log_src, event_log_rep),\ + f"The determinism test failed. Seed: {RANDOM_SEED}." + + def test_realm_level_rng(self): + # the below test doesn't work now + # having a realm-level random number generator would fix this + # for example see https://github.com/openai/gym/pull/135/files + # how self.np_random is initialized and used + pass + + # config = ScriptedAgentTestConfig() + # env1 = ScriptedAgentTestEnv(config) + # env2 = ScriptedAgentTestEnv(config) + # envs = [env1, env2] + + # init_obs = [env.reset(seed=RANDOM_SEED) for env in envs] + + # for _ in tqdm(range(TEST_HORIZON)): + # # step returns a tuple of (obs, rewards, dones, infos) + # step_results = [env.step({}) for env in envs] + + # event_logs = [env.realm.event_log.get_data() for env in envs] + + # self.assertTrue(observations_are_equal(init_obs[0], init_obs[1])),\ + # f"The multi-env determinism failed. Seed: {RANDOM_SEED}." + # self.assertTrue(observations_are_equal(step_results[0][0], step_results[1][0])),\ + # f"The multi-env determinism failed. Seed: {RANDOM_SEED}." # after 30 runs + # assert np.array_equal(event_logs[0], event_logs[1]),\ + # f"The multi-env determinism failed. Seed: {RANDOM_SEED}." if __name__ == '__main__': diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 19b40e631..683adc161 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -5,13 +5,12 @@ import numpy as np import nmmo - -from scripted import baselines -from nmmo.entity.entity import EntityState from nmmo.core import action from nmmo.systems import item as Item from nmmo.core.realm import Realm +from scripted import baselines + # this function can be replaced by assertDictEqual # but might be still useful for debugging def actions_are_equal(source_atn, target_atn, debug=True): @@ -142,9 +141,6 @@ def __init__(self, config: nmmo.config.Config, seed=None): def reset(self, map_id=None, seed=None, options=None): self.actions = {} - # manually resetting the EntityState, ItemState datastore tables - EntityState.State.table(self.realm.datastore).reset() - Item.ItemState.State.table(self.realm.datastore).reset() return super().reset(map_id=map_id, seed=seed, options=options) def _compute_scripted_agent_actions(self, actions): From f44b8105c59bbcff05c83773a32c72c0661245b5 Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 8 Jun 2023 02:51:56 +0000 Subject: [PATCH 002/113] cleaned up determinism tests --- tests/test_deterministic_replay.py | 169 ----------------------------- 1 file changed, 169 deletions(-) delete mode 100644 tests/test_deterministic_replay.py diff --git a/tests/test_deterministic_replay.py b/tests/test_deterministic_replay.py deleted file mode 100644 index a8ac69c63..000000000 --- a/tests/test_deterministic_replay.py +++ /dev/null @@ -1,169 +0,0 @@ -#from pdb import set_trace as T -import unittest - -import os -import glob -import pickle -import logging -import random -from typing import Any, Dict - -import numpy as np -from tqdm import tqdm - -from tests.testhelpers import ScriptedAgentTestConfig, ScriptedAgentTestEnv -from tests.testhelpers import observations_are_equal - -import nmmo - -TEST_HORIZON = 50 -LOCAL_REPLAY = 'tests/replay_local.pickle' - -def load_replay_file(replay_file): - # load the pickle file - with open(replay_file, 'rb') as handle: - ref_data = pickle.load(handle) - - logging.info('TestDetReplay: Loading the existing replay file with seed %s', - str(ref_data['seed'])) - - seed = ref_data['seed'] - config = ref_data['config'] - map_src = ref_data['map'] - init_obs = ref_data['init_obs'] - init_npcs = ref_data['init_npcs'] - med_obs = ref_data['med_obs'] - actions = ref_data['actions'] - final_obs = ref_data['final_obs'] - final_npcs = ref_data['final_npcs'] - - return seed, config, map_src, init_obs, init_npcs, med_obs, actions, final_obs, final_npcs - - -def make_actions_picklable(actions: Dict[int, Dict[str, Dict[str, Any]]]): - for eid in actions: - for atn, args in actions[eid].items(): - for arg, val in args.items(): - if arg == nmmo.action.Price and not isinstance(val, int): - # : - # convert Discrete_1 to 1 - actions[eid][atn][arg] = val.val - return actions - - -def generate_replay_file(replay_file, test_horizon): - # generate the new data with a new env - seed = random.randint(0, 10000) - logging.info('TestDetReplay: Creating a new replay file with seed %s', str(seed)) - config = ScriptedAgentTestConfig() - env_src = ScriptedAgentTestEnv(config, seed=seed) - init_obs = env_src.reset() - init_npcs = env_src.realm.npcs.packet - - # extract the map - map_src = np.zeros((config.MAP_SIZE, config.MAP_SIZE)) - for r in range(config.MAP_SIZE): - for c in range(config.MAP_SIZE): - map_src[r,c] = env_src.realm.map.tiles[r,c].material_id.val - - med_obs, actions = [], [] - logging.info('TestDetReplay: Running %s ticks', str(test_horizon)) - for _ in tqdm(range(test_horizon)): - nxt_obs, _, _, _ = env_src.step({}) - med_obs.append(nxt_obs) - actions.append(make_actions_picklable(env_src.actions)) - final_obs = nxt_obs - final_npcs = env_src.realm.npcs.packet - - # save to the file - with open(replay_file, 'wb') as handle: - ref_data = {} - ref_data['version'] = nmmo.__version__ # just in case - ref_data['seed'] = seed - ref_data['config'] = config - ref_data['map'] = map_src - ref_data['init_obs'] = init_obs - ref_data['init_npcs'] = init_npcs - ref_data['med_obs'] = med_obs - ref_data['actions'] = actions - ref_data['final_obs'] = final_obs - ref_data['final_npcs'] = final_npcs - - pickle.dump(ref_data, handle) - - return seed, config, map_src, init_obs, init_npcs, med_obs, actions, final_obs, final_npcs - - -class TestDeterministicReplay(unittest.TestCase): - - # CHECK ME: pausing the deterministic replay test while debugging actions/items - # because changes there would most likely to change the game play and make the test fail - __test__ = False - - @classmethod - def setUpClass(cls): - """ - First, check if there is a replay file on the repo that starts with 'replay_repo_' - If there is one, use it. - - Second, check if there a local replay file, which should be named 'replay_local.pickle' - If there is one, use it. If not create one. - - TODO: allow passing a different replay file - """ - # first, look for the repo replay file - replay_files = glob.glob(os.path.join('tests', 'replay_repo_*.pickle')) - if replay_files: - # there may be several, but we only take the first one [0] - cls.seed, cls.config, cls.map_src, cls.init_obs_src, cls.init_npcs_src, \ - cls.med_obs_src,cls.actions, cls.final_obs_src, cls.final_npcs_src = \ - load_replay_file(replay_files[0]) - else: - # if there is no repo replay file, then go with the default local file - if os.path.exists(LOCAL_REPLAY): - cls.seed, cls.config, cls.map_src, cls.init_obs_src, cls.init_npcs_src, \ - cls.med_obs_src, cls.actions, cls.final_obs_src, cls.final_npcs_src = \ - load_replay_file(LOCAL_REPLAY) - else: - cls.seed, cls.config, cls.map_src, cls.init_obs_src, cls.init_npcs_src, \ - cls.med_obs_src, cls.actions, cls.final_obs_src, cls.final_npcs_src = \ - generate_replay_file(LOCAL_REPLAY, TEST_HORIZON) - cls.horizon = len(cls.actions) - - logging.info('TestDetReplay: Setting up the replication env with seed %s', str(cls.seed)) - env_rep = ScriptedAgentTestEnv(cls.config, seed=cls.seed) - cls.init_obs_rep = env_rep.reset() - cls.init_npcs_rep = env_rep.realm.npcs.packet - - # extract the map - cls.map_rep = np.zeros((cls.config.MAP_SIZE, cls.config.MAP_SIZE)) - for r in range(cls.config.MAP_SIZE): - for c in range(cls.config.MAP_SIZE): - cls.map_rep[r,c] = env_rep.realm.map.tiles[r,c].material_id.val - - cls.med_obs_rep, cls.actions_rep = [], [] - logging.info('TestDetReplay: Running %s ticks', str(cls.horizon)) - for t in tqdm(range(cls.horizon)): - nxt_obs_rep, _, _, _ = env_rep.step(cls.actions[t]) - cls.med_obs_rep.append(nxt_obs_rep) - cls.final_obs_rep = nxt_obs_rep - cls.final_npcs_rep = env_rep.realm.npcs.packet - - def test_compare_maps(self): - self.assertEqual(np.sum(self.map_src != self.map_rep), 0) - - def test_compare_init_obs(self): - self.assertTrue(observations_are_equal(self.init_obs_src, self.init_obs_rep)) - - def test_compare_init_npcs(self): - self.assertTrue(observations_are_equal(self.init_npcs_src, self.init_npcs_rep)) - - def test_compare_final_obs(self): - self.assertTrue(observations_are_equal(self.final_obs_src, self.final_obs_rep)) - - def test_compare_final_npcs(self): - self.assertTrue(observations_are_equal(self.final_npcs_src, self.final_npcs_rep)) - - -if __name__ == '__main__': - unittest.main() From d69fc9271af230a57f915cc641ca3fd1bfef527d Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 9 Jun 2023 00:45:56 +0000 Subject: [PATCH 003/113] get eval_fn src from task, predicate --- nmmo/task/predicate_api.py | 63 +++++++++++++++++++++++++++++++++---- nmmo/task/task_api.py | 33 ++++++++++++++++++- tests/task/test_task_api.py | 62 ++++++++++++++++++++++++++++++++++-- 3 files changed, 149 insertions(+), 9 deletions(-) diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index e71f2cc19..6be1a3d38 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -124,6 +124,26 @@ def _make_name(self, class_name, args, kwargs) -> str: def __str__(self): return self.name + @abstractmethod + def get_source_code(self) -> str: + """ Returns the actual source code how the game state/progress evaluation is done. + """ + raise NotImplementedError + + @abstractmethod + def get_signature(self) -> List: + """ Returns the signature of the game state/progress evaluation function. + """ + raise NotImplementedError + + @property + def args(self): + return self._args + + @property + def kwargs(self): + return self._kwargs + @property def subject(self): return self._subject @@ -205,12 +225,11 @@ def __init__(self, *args, **kwargs) -> None: self._kwargs = kwargs self.name = self._make_name(fn.__name__, args, kwargs) def _evaluate(self, gs: GameState) -> float: - # pylint: disable=redefined-builtin, unused-variable - __doc = fn.__doc__ - result = fn(gs, *self._args, **self._kwargs) - if isinstance(result, Predicate): - return result(gs) - return result + return fn(gs, *self._args, **self._kwargs) + def get_source_code(self): + return inspect.getsource(fn).strip() + def get_signature(self) -> List: + return list(self._signature.parameters) return FunctionPredicate @@ -245,6 +264,38 @@ def sample(self, config: Config, cls: type[PredicateOperator], **kwargs): else p(None) for p in self._predicates] return cls(*predicates, subject=subject) + def get_source_code(self) -> str: + # NOTE: get_source_code() of the combined predicates returns the joined str + # of each predicate's source code, which may NOT represent what the actual + # predicate is doing + # TODO: try to generate "the source code" that matches + # what the actual instantiated predicate returns, + # which perhaps should reflect the actual agent ids, etc... + src_list = [] + for pred in self._predicates: + if isinstance(pred, Predicate): + src_list.append(pred.get_source_code()) + return '\n\n'.join(src_list).strip() + + def get_signature(self): + # TODO: try to generate the correct signature + return [] + + @property + def args(self): + # TODO: try to generate the correct args + return [] + + @property + def kwargs(self): + # NOTE: This is incorrect implementation. kwargs of the combined predicates returns + # all summed kwargs dict, which can OVERWRITE the values of duplicated keys + # TODO: try to match the eval function and kwargs, which can be correctly used downstream + # for pred in self._predicates: + # if isinstance(pred, Predicate): + # kwargs.update(pred.kwargs) + return {} + class OR(PredicateOperator, Predicate): def __init__(self, *predicates: Predicate, subject: Group=None): super().__init__(lambda n: n>0, *predicates, subject=subject) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 8bc5d587a..557c5ff3c 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -2,6 +2,7 @@ from typing import Callable, Iterable, Dict, List, Union, Tuple from types import FunctionType from abc import ABC +import inspect from nmmo.task.group import Group from nmmo.task.predicate_api import Predicate, make_predicate, arg_to_string @@ -25,7 +26,6 @@ def __init__(self, self._progress = 0.0 self._completed = False self._reward_multiplier = reward_multiplier - self.name = self._make_name(self.__class__.__name__, eval_fn=eval_fn, assignee=self._assignee) @@ -87,6 +87,37 @@ def _make_name(self, class_name, **kwargs) -> str: def __str__(self): return self.name + @property + def subject(self): + if isinstance(self._eval_fn, Predicate): + return self._eval_fn.subject.agents + return self.assignee + + def get_source_code(self): + if isinstance(self._eval_fn, Predicate): + return self._eval_fn.get_source_code() + return inspect.getsource(self._eval_fn).strip() + + def get_signature(self): + if isinstance(self._eval_fn, Predicate): + return self._eval_fn.get_signature() + signature = inspect.signature(self._eval_fn) + return list(signature.parameters) + + @property + def args(self): + if isinstance(self._eval_fn, Predicate): + return self._eval_fn.args + # the function _eval_fn must only take gs + return [] + + @property + def kwargs(self): + if isinstance(self._eval_fn, Predicate): + return self._eval_fn.kwargs + # the function _eval_fn must only take gs + return {} + class OngoingTask(Task): def _map_progress_to_reward(self, gs) -> float: """Keep returning the progress reward after the task is completed. diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 76356b2f7..916f5cb1b 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -57,21 +57,45 @@ def test_predicate_operators(self): # NOTE: only the instantiated predicate can be used with operators like below mock_gs = MockGameState() + # get the individual predicate's source code + self.assertEqual(SUCCESS.get_source_code(), + 'def Success(gs, subject: Group):\n return True') + self.assertEqual(FAILURE.get_source_code(), + 'def Failure(gs, subject: Group):\n return False') + # AND (&), OR (|), NOT (~) pred1 = SUCCESS & FAILURE self.assertFalse(pred1(mock_gs)) + # NOTE: get_source_code() of the combined predicates returns the joined str + # of each predicate's source code, which may NOT represent what the actual + # predicate is doing + self.assertEqual(pred1.get_source_code(), + 'def Success(gs, subject: Group):\n return True\n\n'+ + 'def Failure(gs, subject: Group):\n return False') pred2 = SUCCESS | FAILURE | SUCCESS self.assertTrue(pred2(mock_gs)) + self.assertEqual(pred2.get_source_code(), + 'def Success(gs, subject: Group):\n return True\n\n'+ + 'def Failure(gs, subject: Group):\n return False\n\n'+ + 'def Success(gs, subject: Group):\n return True') pred3 = SUCCESS & ~ FAILURE & SUCCESS self.assertTrue(pred3(mock_gs)) + # NOTE: demonstrating the above point -- it just returns the functions + # NOT what this predicate actually evaluates. + self.assertEqual(pred2.get_source_code(), + pred3.get_source_code()) # predicate math pred4 = 0.1 * SUCCESS + 0.3 self.assertEqual(pred4(mock_gs), 0.4) self.assertEqual(pred4.name, "(ADD_(MUL_(Success_(0,))_0.1)_0.3)") + # NOTE: demonstrating the above point again, -- it just returns the functions + # NOT what this predicate actually evaluates. + self.assertEqual(pred4.get_source_code(), + 'def Success(gs, subject: Group):\n return True') pred5 = 0.3 * SUCCESS - 1 self.assertEqual(pred5(mock_gs), 0.0) # cannot go below 0 @@ -157,13 +181,27 @@ def test_task_api_with_predicate(self): fake_pred_cls = make_predicate(Fake) mock_gs = MockGameState() - predicate = fake_pred_cls(Group(2), 1, Item.Hat, Action.Melee) + group = Group(2) + item = Item.Hat + action = Action.Melee + predicate = fake_pred_cls(group, a=1, b=item, c=action) + self.assertEqual(predicate.get_source_code(), + 'def Fake(gs, subject, a,b,c):\n return False') + self.assertEqual(predicate.get_signature(), ['gs', 'subject', 'a', 'b', 'c']) + self.assertEqual(predicate.args, [group]) + self.assertDictEqual(predicate.kwargs, {'a': 1, 'b': item, 'c': action}) + assignee = [1,2,3] # list of agent ids task = predicate.create_task(assignee=assignee) rewards, infos = task.compute_rewards(mock_gs) self.assertEqual(task.name, # contains predicate name and assignee list - "(Task_eval_fn:(Fake_(2,)_1_Hat_Melee)_assignee:(1,2,3))") + "(Task_eval_fn:(Fake_(2,)_a:1_b:Hat_c:Melee)_assignee:(1,2,3))") + self.assertEqual(task.get_source_code(), + 'def Fake(gs, subject, a,b,c):\n return False') + self.assertEqual(task.get_signature(), ['gs', 'subject', 'a', 'b', 'c']) + self.assertEqual(task.args, [group]) + self.assertDictEqual(task.kwargs, {'a': 1, 'b': item, 'c': action}) for agent_id in assignee: self.assertEqual(rewards[agent_id], 0) self.assertEqual(infos[agent_id]['progress'], 0) # progress (False -> 0) @@ -182,6 +220,14 @@ def is_agent_1(gs): self.assertEqual(task.name, # contains predicate name and assignee list "(Task_eval_fn:is_agent_1_assignee:(1,2,3))") + self.assertEqual(task.get_source_code(), + 'def is_agent_1(gs):\n ' + + 'return any(agent_id == 1 for agent_id in subject.agents)') + self.assertEqual(task.get_signature(), ['gs']) + self.assertEqual(task.args, []) + self.assertDictEqual(task.kwargs, {}) + self.assertEqual(task.subject, tuple(assignee)) + self.assertEqual(task.assignee, tuple(assignee)) for agent_id in assignee: self.assertEqual(rewards[agent_id], 1) self.assertEqual(infos[agent_id]['progress'], 1) # progress (True -> 1) @@ -206,6 +252,18 @@ def PracticeFormation(gs, subject, dist, num_tick): env = Env(config) env.reset(make_task_fn=lambda: make_team_tasks(teams, [task_spec])) + task = env.tasks[0] + self.assertEqual(task.name, + '(Task_eval_fn:(PracticeFormation_(1,2,3)_dist:1_num_tick:10)'+ + '_assignee:(1,2,3))') + self.assertEqual(task.get_source_code(), + 'def PracticeFormation(gs, subject, dist, num_tick):\n '+ + 'return AllMembersWithinRange(gs, subject, dist) * '+ + 'TickGE(gs, subject, num_tick)') + self.assertEqual(task.get_signature(), ['gs', 'subject', 'dist', 'num_tick']) + self.assertEqual(task.subject, tuple(teams[0])) + self.assertEqual(task.kwargs, task_spec[2]) + self.assertEqual(task.assignee, tuple(teams[0])) # move agent 2, 3 to agent 1's pos for agent_id in [2,3]: change_spawn_pos(env.realm, agent_id, From 17c95b79d99829bc0cd7274e30e040fd2f3dea36 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 9 Jun 2023 06:38:34 +0000 Subject: [PATCH 004/113] clean up eval functions to feed into llm --- nmmo/task/base_predicates.py | 124 ++++++--------------------- tests/task/test_manual_curriculum.py | 85 +++++++++--------- tests/task/test_task_api.py | 22 +---- 3 files changed, 74 insertions(+), 157 deletions(-) diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index 4f8dbaf14..3fba63874 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -6,7 +6,6 @@ from nmmo.task.group import Group from nmmo.task.game_state import GameState -from nmmo.task import constraint from nmmo.systems import skill as nmmo_skill from nmmo.systems.skill import Skill from nmmo.systems.item import Item @@ -16,50 +15,38 @@ def norm(progress): return max(min(progress, 1.0), 0.0) -def Success(gs: GameState, - subject: Group): +def Success(gs: GameState, subject: Group): ''' Returns True. For debugging. ''' return True -def TickGE(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - num_tick: int = constraint.ScalarConstraint()): +def TickGE(gs: GameState, subject: Group, num_tick: int): """True if the current tick is greater than or equal to the specified num_tick. Is progress counter. """ return norm(gs.current_tick / num_tick) -def CanSeeTile(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - tile_type: type[Material]= constraint.MATERIAL_CONSTRAINT): +def CanSeeTile(gs: GameState, subject: Group, tile_type: type[Material]): """ True if any agent in subject can see a tile of tile_type """ return any(tile_type.index in t for t in subject.obs.tile.material_id) -def StayAlive(gs: GameState, - subject: Group = constraint.TEAM_GROUPS): +def StayAlive(gs: GameState, subject: Group): """True if all subjects are alive. """ return count(subject.health > 0) == len(subject) -def AllDead(gs: GameState, - subject: Group = constraint.TEAM_GROUPS): +def AllDead(gs: GameState, subject: Group): """True if all subjects are dead. """ return norm(1.0 - count(subject.health) / len(subject)) -def OccupyTile(gs: GameState, - subject: Group, - row: int = constraint.COORDINATE_CONSTRAINT, - col: int = constraint.COORDINATE_CONSTRAINT): +def OccupyTile(gs: GameState, subject: Group, row: int, col: int): """True if any subject agent is on the desginated tile. """ return np.any((subject.row == row) & (subject.col == col)) -def AllMembersWithinRange(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - dist: int = constraint.COORDINATE_CONSTRAINT): +def AllMembersWithinRange(gs: GameState, subject: Group, dist: int): """True if the max l-inf distance of teammates is less than or equal to dist """ @@ -69,23 +56,17 @@ def AllMembersWithinRange(gs: GameState, return 1.0 return norm(dist / current_dist) -def CanSeeAgent(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - target: int = constraint.AGENT_NUMBER_CONSTRAINT): +def CanSeeAgent(gs: GameState, subject: Group, target: int): """True if obj_agent is present in the subjects' entities obs. """ return any(target in e.ids for e in subject.obs.entities) -def CanSeeGroup(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - target: Iterable[int] = constraint.AgentListConstraint): +def CanSeeGroup(gs: GameState, subject: Group, target: Iterable[int]): """ Returns True if subject can see any of target """ return any(CanSeeAgent(gs, subject, agent) for agent in target) -def DistanceTraveled(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - dist: int = constraint.ScalarConstraint()): +def DistanceTraveled(gs: GameState, subject: Group, dist: int): """True if the summed l-inf distance between each agent's current pos and spawn pos is greater than or equal to the specified _dist. """ @@ -96,41 +77,27 @@ def DistanceTraveled(gs: GameState, dists = utils.linf(list(zip(r,c)),[gs.spawn_pos[id_] for id_ in subject.entity.id]) return norm(dists.sum() / dist) -def AttainSkill(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - skill: Skill = constraint.SKILL_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): +def AttainSkill(gs: GameState, subject: Group, skill: Skill, level: int, num_agent: int): """True if the number of agents having skill level GE level is greather than or equal to num_agent """ skill_level = getattr(subject,skill.__name__.lower() + '_level') return norm(sum(skill_level >= level) / num_agent) -def CountEvent(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - event: str = constraint.EVENTCODE_CONSTRAINT, - N: int = constraint.EVENT_NUMBER_CONSTRAINT): +def CountEvent(gs: GameState, subject: Group, event: str, N: int): """True if the number of events occured in subject corresponding to event >= N """ return norm(len(getattr(subject.event, event)) / N) -def ScoreHit(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - combat_style: type[Skill] = constraint.COMBAT_SKILL_CONSTRAINT, - N: int = constraint.EVENT_NUMBER_CONSTRAINT): +def ScoreHit(gs: GameState, subject: Group, combat_style: type[Skill], N: int): """True if the number of hits scored in style combat_style >= count """ hits = subject.event.SCORE_HIT.combat_style == combat_style.SKILL_ID return norm(count(hits) / N) -def DefeatEntity(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - agent_type: str = constraint.AGENT_TYPE_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): +def DefeatEntity(gs: GameState, subject: Group, agent_type: str, level: int, num_agent: int): """True if the number of agents (agent_type, >= level) defeated is greater than or equal to num_agent """ @@ -143,50 +110,36 @@ def DefeatEntity(gs: GameState, return norm(count(defeated) / num_agent) return 1.0 -def HoardGold(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.GOLD_CONSTRAINT): +def HoardGold(gs: GameState, subject: Group, amount: int): """True iff the summed gold of all teammate is greater than or equal to amount. """ return norm(subject.gold.sum() / amount) -def EarnGold(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.GOLD_CONSTRAINT): +def EarnGold(gs: GameState, subject: Group, amount: int): """ True if the total amount of gold earned is greater than or equal to amount. """ return norm(subject.event.EARN_GOLD.gold.sum() / amount) -def SpendGold(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.GOLD_CONSTRAINT): +def SpendGold(gs: GameState, subject: Group, amount: int): """ True if the total amount of gold spent is greater than or equal to amount. """ return norm(subject.event.BUY_ITEM.gold.sum() / amount) -def MakeProfit(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - amount: int = constraint.GOLD_CONSTRAINT): +def MakeProfit(gs: GameState, subject: Group, amount: int): """ True if the total amount of gold earned-spent is greater than or equal to amount. """ profits = subject.event.EARN_GOLD.gold.sum() costs = subject.event.BUY_ITEM.gold.sum() return norm((profits-costs) / amount) -def InventorySpaceGE(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - space: int = constraint.INVENTORY_CONSTRAINT): +def InventorySpaceGE(gs: GameState, subject: Group, space: int): """True if the inventory space of every subjects is greater than or equal to the space. Otherwise false. """ max_space = gs.config.ITEM_INVENTORY_CAPACITY return all(max_space - inv.len >= space for inv in subject.obs.inventory) -def OwnItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.ITEM_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.INVENTORY_CONSTRAINT): +def OwnItem(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): """True if the number of items owned (_item_type, >= level) is greater than or equal to quantity. """ @@ -194,11 +147,7 @@ def OwnItem(gs: GameState, (subject.item.level >= level) return norm(sum(subject.item.quantity[owned]) / quantity) -def EquipItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.EQUIPABLE_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): +def EquipItem(gs: GameState, subject: Group, item: type[Item], level: int, num_agent: int): """True if the number of agents that equip the item (_item_type, >=_level) is greater than or equal to _num_agent. """ @@ -209,11 +158,8 @@ def EquipItem(gs: GameState, return norm(count(equipped) / num_agent) return 1.0 -def FullyArmed(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - combat_style: type[Skill] = constraint.COMBAT_SKILL_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - num_agent: int = constraint.AGENT_NUMBER_CONSTRAINT): +def FullyArmed(gs: GameState, subject: Group, + combat_style: type[Skill], level: int, num_agent: int): """True if the number of fully equipped agents is greater than or equal to _num_agent Otherwise false. To determine fully equipped, we look at hat, top, bottom, weapon, ammo, respectively, @@ -236,44 +182,28 @@ def FullyArmed(gs: GameState, return norm((equipment_numbers >= len(item_ids.items())).sum() / num_agent) return 1.0 -def ConsumeItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.CONSUMABLE_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): +def ConsumeItem(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): """True if total quantity consumed of item type above level is >= quantity """ type_flt = subject.event.CONSUME_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.CONSUME_ITEM.level >= level return norm(subject.event.CONSUME_ITEM.number[type_flt & lvl_flt].sum() / quantity) -def HarvestItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.HARVEST_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): +def HarvestItem(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): """True if total quantity harvested of item type above level is >= quantity """ type_flt = subject.event.HARVEST_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.HARVEST_ITEM.level >= level return norm(subject.event.HARVEST_ITEM.number[type_flt & lvl_flt].sum() / quantity) -def ListItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.ITEM_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): +def ListItem(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): """True if total quantity listed of item type above level is >= quantity """ type_flt = subject.event.LIST_ITEM.type == item.ITEM_TYPE_ID lvl_flt = subject.event.LIST_ITEM.level >= level return norm(subject.event.LIST_ITEM.number[type_flt & lvl_flt].sum() / quantity) -def BuyItem(gs: GameState, - subject: Group = constraint.TEAM_GROUPS, - item: type[Item] = constraint.ITEM_CONSTRAINT, - level: int = constraint.PROGRESSION_CONSTRAINT, - quantity: int = constraint.EVENT_NUMBER_CONSTRAINT): +def BuyItem(gs: GameState, subject: Group, item: type[Item], level: int, quantity: int): """True if total quantity purchased of item type above level is >= quantity """ type_flt = subject.event.BUY_ITEM.type == item.ITEM_TYPE_ID diff --git a/tests/task/test_manual_curriculum.py b/tests/task/test_manual_curriculum.py index 5fcc6bbcd..ab6bccb73 100644 --- a/tests/task/test_manual_curriculum.py +++ b/tests/task/test_manual_curriculum.py @@ -1,9 +1,10 @@ '''Manual test for creating learning curriculum manually''' # pylint: disable=invalid-name,redefined-outer-name,bad-builtin +# pylint: disable=wildcard-import,unused-wildcard-import import nmmo -import nmmo.lib.material as Material -from nmmo.task import base_predicates as bp +import nmmo.lib.material as m +from nmmo.task.base_predicates import * from nmmo.task.task_api import OngoingTask, make_team_tasks from nmmo.task import constraint as c @@ -43,42 +44,42 @@ essential_skills = ['GO_FARTHEST', 'EAT_FOOD', 'DRINK_WATER', 'SCORE_HIT', 'HARVEST_ITEM', 'LEVEL_UP'] for event_code in essential_skills: - task_spec += [('agent', bp.CountEvent, {'event': event_code, 'N': cnt}) + task_spec += [('agent', CountEvent, {'event': event_code, 'N': cnt}) for cnt in EVENT_NUMBER_GOAL] # item/market skills, which happen less frequently or should not do too much item_skills = ['CONSUME_ITEM', 'GIVE_ITEM', 'DESTROY_ITEM', 'EQUIP_ITEM', 'GIVE_GOLD', 'LIST_ITEM', 'EARN_GOLD', 'BUY_ITEM'] for event_code in item_skills: - task_spec += [('agent', bp.CountEvent, {'event': event_code, 'N': cnt}) + task_spec += [('agent', CountEvent, {'event': event_code, 'N': cnt}) for cnt in INFREQUENT_GOAL] # less than 10 # find resource tiles -for resource in Material.Harvestable: +for resource in m.Harvestable: for reward_to in ['agent', 'team']: - task_spec.append((reward_to, bp.CanSeeTile, {'tile_type': resource})) + task_spec.append((reward_to, CanSeeTile, {'tile_type': resource})) # stay alive ... like ... for 300 ticks # i.e., getting incremental reward for each tick alive as an individual or a team for reward_to in ['agent', 'team']: for num_tick in STAY_ALIVE_GOAL: - task_spec.append((reward_to, bp.TickGE, {'num_tick': num_tick})) + task_spec.append((reward_to, TickGE, {'num_tick': num_tick})) # protect the leader: get reward for each tick the leader is alive -task_spec.append(('team', bp.StayAlive, {'target': 'my_team_leader', 'task_cls': OngoingTask})) +task_spec.append(('team', StayAlive, {'target': 'my_team_leader', 'task_cls': OngoingTask})) # want the other team or team leader to die for target in ['left_team', 'left_team_leader', 'right_team', 'right_team_leader']: - task_spec.append(('team', bp.AllDead, {'target': target})) + task_spec.append(('team', AllDead, {'target': target})) # occupy the center tile, assuming the Medium map size # TODO: it'd be better to have some intermediate targets toward the center for reward_to in ['agent', 'team']: - task_spec.append((reward_to, bp.OccupyTile, {'row': 80, 'col': 80})) # TODO: get config + task_spec.append((reward_to, OccupyTile, {'row': 80, 'col': 80})) # TODO: get config # form a tight formation, for a certain number of ticks def PracticeFormation(gs, subject, dist, num_tick): - return bp.AllMembersWithinRange(gs, subject, dist) * bp.TickGE(gs, subject, num_tick) + return AllMembersWithinRange(gs, subject, dist) * TickGE(gs, subject, num_tick) for dist in [1, 3, 5, 10]: task_spec += [('team', PracticeFormation, {'dist': dist, 'num_tick': num_tick}) for num_tick in STAY_ALIVE_GOAL] @@ -86,76 +87,76 @@ def PracticeFormation(gs, subject, dist, num_tick): # find the other team leader for reward_to in ['agent', 'team']: for target in ['left_team_leader', 'right_team_leader']: - task_spec.append((reward_to, bp.CanSeeAgent, {'target': target})) + task_spec.append((reward_to, CanSeeAgent, {'target': target})) # find the other team (any agent) for reward_to in ['agent']: #, 'team']: for target in ['left_team', 'right_team']: - task_spec.append((reward_to, bp.CanSeeGroup, {'target': target})) + task_spec.append((reward_to, CanSeeGroup, {'target': target})) # explore the map -- sum the l-inf distance traveled by all subjects for dist in [10, 20, 30, 50, 100]: # each agent - task_spec.append(('agent', bp.DistanceTraveled, {'dist': dist})) + task_spec.append(('agent', DistanceTraveled, {'dist': dist})) for dist in [30, 50, 70, 100, 150, 200, 300, 500]: # summed over all team members - task_spec.append(('team', bp.DistanceTraveled, {'dist': dist})) + task_spec.append(('team', DistanceTraveled, {'dist': dist})) # level up a skill for skill in SKILLS: for level in LEVEL_GOAL: # since this is an agent task, num_agent must be 1 - task_spec.append(('agent', bp.AttainSkill, {'skill': skill, 'level': level, 'num_agent': 1})) + task_spec.append(('agent', AttainSkill, {'skill': skill, 'level': level, 'num_agent': 1})) # make attain skill a team task by varying the number of agents for skill in SKILLS: for level in LEVEL_GOAL: for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', bp.AttainSkill, + task_spec.append(('team', AttainSkill, {'skill': skill, 'level': level,'num_agent': num_agent})) # practice specific combat style for style in COMBAT_STYLE: for cnt in EVENT_NUMBER_GOAL: - task_spec.append(('agent', bp.ScoreHit, {'combat_style': style, 'N': cnt})) + task_spec.append(('agent', ScoreHit, {'combat_style': style, 'N': cnt})) for cnt in TEAM_NUMBER_GOAL: - task_spec.append(('team', bp.ScoreHit, {'combat_style': style, 'N': cnt})) + task_spec.append(('team', ScoreHit, {'combat_style': style, 'N': cnt})) # defeat agents of a certain level as a team for agent_type in ['player', 'npc']: # c.AGENT_TYPE_CONSTRAINT for level in LEVEL_GOAL: for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', bp.DefeatEntity, + task_spec.append(('team', DefeatEntity, {'agent_type': agent_type, 'level': level, 'num_agent': num_agent})) # hoarding gold -- evaluated on the current gold for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', bp.HoardGold, {'amount': amount})) + task_spec.append(('agent', HoardGold, {'amount': amount})) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', bp.HoardGold, {'amount': amount})) + task_spec.append(('team', HoardGold, {'amount': amount})) # earning gold -- evaluated on the total gold earned by selling items # does NOT include looted gold for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', bp.EarnGold, {'amount': amount})) + task_spec.append(('agent', EarnGold, {'amount': amount})) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', bp.EarnGold, {'amount': amount})) + task_spec.append(('team', EarnGold, {'amount': amount})) # spending gold, by buying items for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', bp.SpendGold, {'amount': amount})) + task_spec.append(('agent', SpendGold, {'amount': amount})) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', bp.SpendGold, {'amount': amount})) + task_spec.append(('team', SpendGold, {'amount': amount})) # making profits by trading -- only buying and selling are counted for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', bp.MakeProfit, {'amount': amount})) + task_spec.append(('agent', MakeProfit, {'amount': amount})) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', bp.MakeProfit, {'amount': amount})) + task_spec.append(('team', MakeProfit, {'amount': amount})) # managing inventory space def PracticeInventoryManagement(gs, subject, space, num_tick): - return bp.InventorySpaceGE(gs, subject, space) * bp.TickGE(gs, subject, num_tick) + return InventorySpaceGE(gs, subject, space) * TickGE(gs, subject, num_tick) for space in [2, 4, 8]: task_spec += [('agent', PracticeInventoryManagement, {'space': space, 'num_tick': num_tick}) for num_tick in STAY_ALIVE_GOAL] @@ -166,26 +167,26 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', bp.OwnItem, + task_spec.append(('agent', OwnItem, {'item': item, 'level': level, 'quantity': quantity})) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', bp.OwnItem, + task_spec.append(('team', OwnItem, {'item': item, 'level': level, 'quantity': quantity})) # equip item, evaluated on the current inventory and equipment status for item in EQUIP_ITEM: for level in LEVEL_GOAL: # agent task - task_spec.append(('agent', bp.EquipItem, + task_spec.append(('agent', EquipItem, {'item': item, 'level': level, 'num_agent': 1})) # team task for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', bp.EquipItem, + task_spec.append(('team', EquipItem, {'item': item, 'level': level, 'num_agent': num_agent})) # consume items (ration, potion), evaluated based on the event log @@ -194,13 +195,13 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', bp.ConsumeItem, + task_spec.append(('agent', ConsumeItem, {'item': item, 'level': level, 'quantity': quantity})) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', bp.ConsumeItem, + task_spec.append(('team', ConsumeItem, {'item': item, 'level': level, 'quantity': quantity})) # harvest items, evaluated based on the event log @@ -209,13 +210,13 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', bp.HarvestItem, + task_spec.append(('agent', HarvestItem, {'item': item, 'level': level, 'quantity': quantity})) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', bp.HarvestItem, + task_spec.append(('team', HarvestItem, {'item': item, 'level': level, 'quantity': quantity})) # list items, evaluated based on the event log @@ -224,13 +225,13 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', bp.ListItem, + task_spec.append(('agent', ListItem, {'item': item, 'level': level, 'quantity': quantity})) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', bp.ListItem, + task_spec.append(('team', ListItem, {'item': item, 'level': level, 'quantity': quantity})) # buy items, evaluated based on the event log @@ -239,13 +240,13 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', bp.BuyItem, + task_spec.append(('agent', BuyItem, {'item': item, 'level': level, 'quantity': quantity})) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', bp.BuyItem, + task_spec.append(('team', BuyItem, {'item': item, 'level': level, 'quantity': quantity})) # fully armed, evaluated based on the current player/inventory status @@ -253,7 +254,7 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): for level in LEVEL_GOAL: for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', bp.FullyArmed, + task_spec.append(('team', FullyArmed, {'combat_style': style, 'level': level, 'num_agent': num_agent})) diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 916f5cb1b..57ee5121a 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -7,7 +7,7 @@ from nmmo.task.predicate_api import make_predicate, Predicate from nmmo.task.task_api import Task, make_team_tasks from nmmo.task.group import Group -from nmmo.task.constraint import InvalidConstraint, ScalarConstraint +from nmmo.task.constraint import ScalarConstraint, GroupConstraint, AGENT_LIST_CONSTRAINT from nmmo.task.base_predicates import TickGE, CanSeeGroup, AllMembersWithinRange from nmmo.systems import item as Item @@ -31,6 +31,7 @@ class MockGameState(): def __init__(self): # pylint: disable=super-init-not-called self.config = nmmo.config.Default() + self.current_tick = -1 self.cache_result = {} self.get_subject_view = lambda _: None @@ -130,28 +131,12 @@ def test_predicate_name(self): "(SUB_(ADD_(MUL_(Failure_(0,))_(Fake_(2,)_1_Hat_Melee))_0.3)_0.4))") def test_constraint(self): - # pylint: disable=not-callable,no-value-for-parameter - # define predicate classes from functions - - # make predicate class from function - success_pred_cls = make_predicate(Success) - tickge_pred_cls = make_predicate(TickGE) - self.assertTrue(isinstance(TickGE, FunctionType)) - mock_gs = MockGameState() - good = success_pred_cls(Group(0)) - bad = success_pred_cls(Group(99999)) - good(mock_gs) - self.assertRaises(InvalidConstraint,lambda: bad(mock_gs)) - scalar = ScalarConstraint(low=-10,high=10) for _ in range(10): self.assertTrue(scalar.sample(mock_gs.config)<10) self.assertTrue(scalar.sample(mock_gs.config)>=-10) - bad = tickge_pred_cls(Group(0), -1) - self.assertRaises(InvalidConstraint, lambda: bad(mock_gs)) - def test_sample_predicate(self): # pylint: disable=no-value-for-parameter,expression-not-assigned # make predicate class from function @@ -160,7 +145,8 @@ def test_sample_predicate(self): # if the predicate class is instantiated without the subject, mock_gs = MockGameState() - predicate = canseegrp_pred_cls() & tickge_pred_cls() + predicate = canseegrp_pred_cls(subject=GroupConstraint, target=AGENT_LIST_CONSTRAINT) &\ + tickge_pred_cls(subject=GroupConstraint, num_tick=ScalarConstraint) self.assertEqual(predicate.name, "(AND_(CanSeeGroup_subject:GroupConstraint_target:AgentListConstraint)_"+\ "(TickGE_subject:GroupConstraint_num_tick:ScalarConstraint))") From 39aa4f4caa29ced8588393949faae5a58c0449f9 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 9 Jun 2023 08:02:37 +0000 Subject: [PATCH 005/113] corrected type hints --- nmmo/task/predicate_api.py | 2 +- nmmo/task/task_api.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index 6be1a3d38..1f57c74d6 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -148,7 +148,7 @@ def kwargs(self): def subject(self): return self._subject - def create_task(self, task_cls: Task=None, + def create_task(self, task_cls: type[Task]=None, assignee: Union[Iterable[int], int]=None, reward_multiplier=1.0) -> Task: """ Creates a task from this predicate""" diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 557c5ff3c..7eb0dcb13 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -133,9 +133,9 @@ def _map_progress_to_reward(self, gs) -> float: # The same task is assigned each agent in agent_list individually # with the agent as the predicate subject and task assignee -def make_same_task(predicate: Union[Predicate, Callable], +def make_same_task(predicate: Union[type[Predicate], Callable], agent_list: Iterable[int], - task_cls = Task, **kwargs) -> List[Task]: + task_cls = type[Task], **kwargs) -> List[Task]: # if a function is provided, make it a predicate class if isinstance(predicate, FunctionType): predicate = make_predicate(predicate) From f40120883b6296cc03ceb2e6c60415a1849acf43 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 9 Jun 2023 08:09:15 +0000 Subject: [PATCH 006/113] fixed error --- nmmo/task/task_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 7eb0dcb13..d7ee0e118 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -135,7 +135,7 @@ def _map_progress_to_reward(self, gs) -> float: # with the agent as the predicate subject and task assignee def make_same_task(predicate: Union[type[Predicate], Callable], agent_list: Iterable[int], - task_cls = type[Task], **kwargs) -> List[Task]: + task_cls = Task, **kwargs) -> List[Task]: # if a function is provided, make it a predicate class if isinstance(predicate, FunctionType): predicate = make_predicate(predicate) From 2e0ce2a32bbf375dd40b200907a24159824d2343 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 9 Jun 2023 08:20:58 +0000 Subject: [PATCH 007/113] fix errors --- nmmo/task/predicate_api.py | 8 ++++---- nmmo/task/task_api.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index 1f57c74d6..bfa54a70c 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Callable, List, Optional, Tuple, Union, Iterable, TYPE_CHECKING +from typing import Callable, List, Optional, Tuple, Union, Iterable, Type, TYPE_CHECKING from types import FunctionType from abc import ABC, abstractmethod import inspect @@ -148,7 +148,7 @@ def kwargs(self): def subject(self): return self._subject - def create_task(self, task_cls: type[Task]=None, + def create_task(self, task_cls: Type[Task]=None, assignee: Union[Iterable[int], int]=None, reward_multiplier=1.0) -> Task: """ Creates a task from this predicate""" @@ -191,7 +191,7 @@ def arg_to_string(arg): ################################################ -def make_predicate(fn: Callable) -> type[Predicate]: +def make_predicate(fn: Callable) -> Type[Predicate]: """ Syntactic sugar API for defining predicates from function """ signature = inspect.signature(fn) @@ -258,7 +258,7 @@ def check(self, config: Config) -> bool: return all((p.check(config) if isinstance(p, Predicate) else True for p in self._predicates)) - def sample(self, config: Config, cls: type[PredicateOperator], **kwargs): + def sample(self, config: Config, cls: Type[PredicateOperator], **kwargs): subject = self._subject_argument if 'subject' not in kwargs else kwargs['subject'] predicates = [p.sample(config, **kwargs) if isinstance(p, Predicate) else p(None) for p in self._predicates] diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index d7ee0e118..d39b7961e 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -1,5 +1,5 @@ # pylint: disable=unused-import -from typing import Callable, Iterable, Dict, List, Union, Tuple +from typing import Callable, Iterable, Dict, List, Union, Tuple, Type from types import FunctionType from abc import ABC import inspect @@ -133,9 +133,9 @@ def _map_progress_to_reward(self, gs) -> float: # The same task is assigned each agent in agent_list individually # with the agent as the predicate subject and task assignee -def make_same_task(predicate: Union[type[Predicate], Callable], +def make_same_task(predicate: Union[Type[Predicate], Callable], agent_list: Iterable[int], - task_cls = Task, **kwargs) -> List[Task]: + task_cls: Type[Task]=Task, **kwargs) -> List[Task]: # if a function is provided, make it a predicate class if isinstance(predicate, FunctionType): predicate = make_predicate(predicate) From 1e30a25e3b037611bfff07b4cfafec6c43528487 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 9 Jun 2023 22:31:59 +0000 Subject: [PATCH 008/113] added support for task_spec_with_embedding --- nmmo/core/env.py | 16 +++++++++++++++ nmmo/task/predicate_api.py | 6 ++++-- nmmo/task/task_api.py | 39 +++++++++++++++++++++++++++++-------- tests/task/test_task_api.py | 28 ++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 10 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 7537895e4..843704311 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -43,6 +43,7 @@ def __init__(self, self.game_state = None # Default task: rewards 1 each turn agent is alive self.tasks = task_api.nmmo_default_task(self.possible_agents) + self.agent_task_map = None # pylint: disable=method-cache-max-size-none @functools.lru_cache(maxsize=None) @@ -162,9 +163,20 @@ def reset(self, map_id=None, seed=None, options=None, else: for task in self.tasks: task.reset() + self.agent_task_map = self._map_task_to_agent() return {a: o.to_gym() for a,o in self.obs.items()} + def _map_task_to_agent(self): + agent_task_map: Dict[int, List[task_api.Task]] = {} + for task in self.tasks: + for agent_id in task.assignee: + if agent_id in agent_task_map: + agent_task_map[agent_id].append(task) + else: + agent_task_map[agent_id] = [task] + return agent_task_map + def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): '''Simulates one game tick or timestep @@ -388,6 +400,10 @@ def _compute_observations(self): inventory = Item.Query.owned_by(self.realm.datastore, agent_id) + # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are + # available in each task instance, via task.embedding + # CHECK ME: do we pass in self.agent_task_map[agent_id], + # so that we can include task embedding in the obs? obs[agent_id] = Observation(self.config, self.realm.tick, agent_id, diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index bfa54a70c..f16dd2868 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -150,7 +150,8 @@ def subject(self): def create_task(self, task_cls: Type[Task]=None, assignee: Union[Iterable[int], int]=None, - reward_multiplier=1.0) -> Task: + reward_multiplier=1.0, + task_embedding=None) -> Task: """ Creates a task from this predicate""" if task_cls is None: from nmmo.task.task_api import Task @@ -160,7 +161,8 @@ def create_task(self, task_cls: Type[Task]=None, # the new task is assigned to this predicate's subject assignee = self._subject.agents - return task_cls(eval_fn=self, assignee=assignee, reward_multiplier=reward_multiplier) + return task_cls(eval_fn=self, assignee=assignee, reward_multiplier=reward_multiplier, + embedding=task_embedding) def __and__(self, other): return AND(self, other) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index d39b7961e..ceb1db0be 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -16,7 +16,8 @@ class Task(ABC): def __init__(self, eval_fn: Callable, assignee: Union[Iterable[int], int], - reward_multiplier = 1.0): + reward_multiplier = 1.0, + embedding = None): if isinstance(assignee, int): self._assignee = (assignee,) else: @@ -26,6 +27,7 @@ def __init__(self, self._progress = 0.0 self._completed = False self._reward_multiplier = reward_multiplier + self._embedding = embedding self.name = self._make_name(self.__class__.__name__, eval_fn=eval_fn, assignee=self._assignee) @@ -45,6 +47,10 @@ def completed(self) -> bool: def reward_multiplier(self) -> float: return self._reward_multiplier + @property + def embedding(self): + return self._embedding + def _map_progress_to_reward(self, gs) -> float: """ The default reward is the diff between the old and new progress. Once the task is completed, no more reward is provided. @@ -135,12 +141,15 @@ def _map_progress_to_reward(self, gs) -> float: # with the agent as the predicate subject and task assignee def make_same_task(predicate: Union[Type[Predicate], Callable], agent_list: Iterable[int], - task_cls: Type[Task]=Task, **kwargs) -> List[Task]: + task_cls: Type[Task]=Task, + task_embedding=None, + **kwargs) -> List[Task]: # if a function is provided, make it a predicate class if isinstance(predicate, FunctionType): predicate = make_predicate(predicate) - return [predicate(Group(agent_id),**kwargs).create_task(task_cls=task_cls) + return [predicate(Group(agent_id),**kwargs).create_task(task_cls=task_cls, + task_embedding=task_embedding) for agent_id in agent_list] def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: @@ -175,7 +184,15 @@ def make_team_tasks(teams, task_spec) -> List[Task]: team_helper = TeamHelper(teams) for idx in range(min(len(team_list), len(task_spec))): team_id = team_list[idx] - reward_to, pred_fn, kwargs = task_spec[team_id] + + # see if task_spec has the task embedding + if len(task_spec[idx]) == 3: + reward_to, pred_fn, kwargs = task_spec[team_id] + task_embedding = None + elif len(task_spec[idx]) == 4: + reward_to, pred_fn, kwargs, task_embedding = task_spec[team_id] + else: + raise ValueError('Wrong task spec format') assert reward_to in REWARD_TO, 'Wrong reward target' @@ -210,18 +227,24 @@ def make_team_tasks(teams, task_spec) -> List[Task]: if reward_to == 'team': assignee = team_helper.teams[team_id] if predicate is None: - tasks.append(pred_cls(Group(assignee), **kwargs).create_task(task_cls=task_cls)) + predicate = pred_cls(Group(assignee), **kwargs) + tasks.append(predicate.create_task(task_cls=task_cls, task_embedding=task_embedding)) else: # this branch is for the cases like AllDead, StayAlive - tasks.append(predicate.create_task(assignee=assignee, task_cls=task_cls)) + tasks.append(predicate.create_task(assignee=assignee, task_cls=task_cls, + task_embedding=task_embedding)) elif reward_to == 'agent': agent_list = team_helper.teams[team_id] if predicate is None: - tasks += make_same_task(pred_cls, agent_list, task_cls=task_cls, **kwargs) + tasks += make_same_task(pred_cls, agent_list, + task_cls=task_cls, + task_embedding=task_embedding, + **kwargs) else: # this branch is for the cases like AllDead, StayAlive - tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls) + tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls, + task_embedding=task_embedding) for agent_id in agent_list] return tasks diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 57ee5121a..a4b46bc2e 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -1,6 +1,7 @@ # pylint: disable=unused-argument,invalid-name import unittest from types import FunctionType +import numpy as np import nmmo from nmmo.core.env import Env @@ -238,6 +239,7 @@ def PracticeFormation(gs, subject, dist, num_tick): env = Env(config) env.reset(make_task_fn=lambda: make_team_tasks(teams, [task_spec])) + # check the task information task = env.tasks[0] self.assertEqual(task.name, '(Task_eval_fn:(PracticeFormation_(1,2,3)_dist:1_num_tick:10)'+ @@ -250,6 +252,12 @@ def PracticeFormation(gs, subject, dist, num_tick): self.assertEqual(task.subject, tuple(teams[0])) self.assertEqual(task.kwargs, task_spec[2]) self.assertEqual(task.assignee, tuple(teams[0])) + + # check the agent-task map + for agent_id, agent_tasks in env.agent_task_map.items(): + for task in agent_tasks: + self.assertTrue(agent_id in task.assignee) + # move agent 2, 3 to agent 1's pos for agent_id in [2,3]: change_spawn_pos(env.realm, agent_id, @@ -268,6 +276,26 @@ def PracticeFormation(gs, subject, dist, num_tick): self.assertEqual(infos[1]['task'][env.tasks[0].name]['progress'], 1) self.assertEqual(infos[1]['task'][env.tasks[0].name]['completed'], True) + # test the task_spec_with_embedding + task_embedding = np.array([1,2,3]) + task_spec_with_embedding = ('team', PracticeFormation, {'dist': 1, 'num_tick': goal_tick}, + task_embedding) + env.reset(make_task_fn=lambda: make_team_tasks(teams, [task_spec_with_embedding])) + + task = env.tasks[0] + self.assertEqual(task.name, + '(Task_eval_fn:(PracticeFormation_(1,2,3)_dist:1_num_tick:10)'+ + '_assignee:(1,2,3))') + self.assertEqual(task.get_source_code(), + 'def PracticeFormation(gs, subject, dist, num_tick):\n '+ + 'return AllMembersWithinRange(gs, subject, dist) * '+ + 'TickGE(gs, subject, num_tick)') + self.assertEqual(task.get_signature(), ['gs', 'subject', 'dist', 'num_tick']) + self.assertEqual(task.subject, tuple(teams[0])) + self.assertEqual(task.kwargs, task_spec[2]) + self.assertEqual(task.assignee, tuple(teams[0])) + self.assertTrue(np.array_equal(task.embedding, task_embedding)) + def test_completed_tasks_in_info(self): # pylint: disable=no-value-for-parameter,no-member config = ScriptedAgentTestConfig() From 6aca4d71322acb652b559c2b2936f94bab6abef2 Mon Sep 17 00:00:00 2001 From: kywch Date: Sun, 11 Jun 2023 19:08:45 +0000 Subject: [PATCH 009/113] separated pred_kwargs and task_kwargs in task_spec --- nmmo/task/predicate_api.py | 9 ++-- nmmo/task/task_api.py | 66 ++++++++++++++------------- tests/task/test_demo_task_creation.py | 35 ++++++++------ tests/task/test_manual_curriculum.py | 14 ++++-- tests/task/test_task_api.py | 49 ++++++++++++++++++-- 5 files changed, 113 insertions(+), 60 deletions(-) diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index f16dd2868..4882448f0 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -148,10 +148,10 @@ def kwargs(self): def subject(self): return self._subject - def create_task(self, task_cls: Type[Task]=None, + def create_task(self, + task_cls: Optional[Type[Task]]=None, assignee: Union[Iterable[int], int]=None, - reward_multiplier=1.0, - task_embedding=None) -> Task: + **kwargs) -> Task: """ Creates a task from this predicate""" if task_cls is None: from nmmo.task.task_api import Task @@ -161,8 +161,7 @@ def create_task(self, task_cls: Type[Task]=None, # the new task is assigned to this predicate's subject assignee = self._subject.agents - return task_cls(eval_fn=self, assignee=assignee, reward_multiplier=reward_multiplier, - embedding=task_embedding) + return task_cls(eval_fn=self, assignee=assignee, **kwargs) def __and__(self, other): return AND(self, other) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index ceb1db0be..d689d32fd 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -139,18 +139,24 @@ def _map_progress_to_reward(self, gs) -> float: # The same task is assigned each agent in agent_list individually # with the agent as the predicate subject and task assignee -def make_same_task(predicate: Union[Type[Predicate], Callable], +def make_same_task(pred_cls: Union[Type[Predicate], Callable], agent_list: Iterable[int], + pred_kwargs=None, task_cls: Type[Task]=Task, - task_embedding=None, - **kwargs) -> List[Task]: + task_kwargs=None) -> List[Task]: # if a function is provided, make it a predicate class - if isinstance(predicate, FunctionType): - predicate = make_predicate(predicate) - - return [predicate(Group(agent_id),**kwargs).create_task(task_cls=task_cls, - task_embedding=task_embedding) - for agent_id in agent_list] + if isinstance(pred_cls, FunctionType): + pred_cls = make_predicate(pred_cls) + if pred_kwargs is None: + pred_kwargs = {} + if task_kwargs is None: + task_kwargs = {} + + task_list = [] + for agent_id in agent_list: + predicate = pred_cls(Group(agent_id), **pred_kwargs) + task_list.append(predicate.create_task(task_cls=task_cls, **task_kwargs)) + return task_list def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: # (almost) no overhead in env._compute_rewards() @@ -187,27 +193,27 @@ def make_team_tasks(teams, task_spec) -> List[Task]: # see if task_spec has the task embedding if len(task_spec[idx]) == 3: - reward_to, pred_fn, kwargs = task_spec[team_id] - task_embedding = None + reward_to, pred_fn, pred_fn_kwargs = task_spec[team_id] + task_kwargs = {} elif len(task_spec[idx]) == 4: - reward_to, pred_fn, kwargs, task_embedding = task_spec[team_id] + reward_to, pred_fn, pred_fn_kwargs, task_kwargs = task_spec[team_id] else: raise ValueError('Wrong task spec format') assert reward_to in REWARD_TO, 'Wrong reward target' - if 'task_cls' in kwargs: - task_cls = kwargs.pop('task_cls') + if 'task_cls' in task_kwargs: + task_cls = task_kwargs.pop('task_cls') else: task_cls = Task # reserve 'target' for relative agent mapping - if 'target' in kwargs: - target = kwargs.pop('target') + if 'target' in pred_fn_kwargs: + target = pred_fn_kwargs.pop('target') assert target in VALID_TARGET, 'Invalid target' # translate target to specific agent ids using team_helper target = team_helper.get_target_agent(team_id, target) - kwargs['target'] = target + pred_fn_kwargs['target'] = target # handle some special cases and instantiate the predicate first predicate = None @@ -216,35 +222,31 @@ def make_team_tasks(teams, task_spec) -> List[Task]: pred_cls = make_predicate(pred_fn) # TODO: should create a test for these - if pred_fn in [bp.AllDead]: - kwargs.pop('target') # remove target - predicate = pred_cls(Group(target), **kwargs) - if pred_fn in [bp.StayAlive] and 'target' in kwargs: - kwargs.pop('target') # remove target - predicate = pred_cls(Group(target), **kwargs) + if (pred_fn in [bp.AllDead]) or \ + (pred_fn in [bp.StayAlive] and 'target' in pred_fn_kwargs): + # use the target as the predicate subject + pred_fn_kwargs.pop('target') # remove target + predicate = pred_cls(Group(target), **pred_fn_kwargs) # create the task if reward_to == 'team': assignee = team_helper.teams[team_id] if predicate is None: - predicate = pred_cls(Group(assignee), **kwargs) - tasks.append(predicate.create_task(task_cls=task_cls, task_embedding=task_embedding)) + predicate = pred_cls(Group(assignee), **pred_fn_kwargs) + tasks.append(predicate.create_task(task_cls=task_cls, **task_kwargs)) else: # this branch is for the cases like AllDead, StayAlive tasks.append(predicate.create_task(assignee=assignee, task_cls=task_cls, - task_embedding=task_embedding)) + **task_kwargs)) elif reward_to == 'agent': agent_list = team_helper.teams[team_id] if predicate is None: - tasks += make_same_task(pred_cls, agent_list, - task_cls=task_cls, - task_embedding=task_embedding, - **kwargs) + tasks += make_same_task(pred_cls, agent_list, pred_kwargs=pred_fn_kwargs, + task_cls=task_cls, task_kwargs=task_kwargs) else: # this branch is for the cases like AllDead, StayAlive - tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls, - task_embedding=task_embedding) + tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls, **task_kwargs) for agent_id in agent_list] return tasks diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index 5f5e532cf..3d43fe4e2 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -210,29 +210,34 @@ def PredicateMath(gs, subject): # DONE - def test_make_team_tasks_using_task_spec(self): - # NOTE: len(teams) and len(task_spec) don't need to match - teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} - - """ task_spec is a list of tuple (reward_to, predicate class, kwargs) + def test_task_spec_based_curriculum(self): + """ + task_spec is a list of tuple (reward_to, evaluation function, eval_fn_kwargs, task_kwargs) + each tuple in the task_spec will create tasks for a team in teams - each tuple in the task_spec will create tasks for a team in teams + reward_to: must be in ['team', 'agent'] + * 'team' create a single team task, in which all team members get rewarded + * 'agent' create a task for each agent, in which only the agent gets rewarded - reward_to: must be in ['team', 'agent'] - * 'team' create a single team task, in which all team members get rewarded - * 'agent' create a task for each agent, in which only the agent gets rewarded + evaluation functions from the base_predicates.py or could be custom functions like above - predicate class from the base predicates or custom predicates like above + eval_fn_kwargs are the additional args that go into predicate. There are also special keys + * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] + these str will be translated into the actual agent ids - kwargs are the additional args that go into predicate. There are also special keys - * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] - these str will be translated into the actual agent ids - * 'task_cls' is optional. If not provided, the standard Task is used. """ + task_kwargs are the optional, additional args that go into the task. + * 'task_cls' specifies the task class to be used. + If not provided, the standard Task is used. + """ task_spec = [ # (reward_to, predicate function, kwargs) ('team', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 1}), # one task ('agent', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 2}), ('agent', bp.AllDead, {'target': 'left_team'}), - ('team', bp.CanSeeAgent, {'target': 'right_team_leader', 'task_cls': t.OngoingTask})] + ('team', bp.CanSeeAgent, {'target': 'right_team_leader'}, {'task_cls': t.OngoingTask}), + ] + + # NOTE: len(teams) and len(task_spec) don't need to match + teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} config = ScriptedAgentTestConfig() env = Env(config) diff --git a/tests/task/test_manual_curriculum.py b/tests/task/test_manual_curriculum.py index ab6bccb73..64a6d98fd 100644 --- a/tests/task/test_manual_curriculum.py +++ b/tests/task/test_manual_curriculum.py @@ -23,7 +23,7 @@ EQUIP_ITEM = c.armour + c.weapons + c.tools + c.ammunition HARVEST_ITEM = c.weapons + c.ammunition + c.consumables -""" task_spec is a list of tuple (reward_to, predicate class, kwargs) +""" task_spec is a list of tuple (reward_to, evaluation function, eval_fn_kwargs, task_kwargs) each tuple in the task_spec will create tasks for a team in teams @@ -31,12 +31,15 @@ * 'team' create a single team task, in which all team members get rewarded * 'agent' create a task for each agent, in which only the agent gets rewarded - predicate class from the base predicates or custom predicates like above + evaluation functions from the base_predicates.py or could be custom functions like above - kwargs are the additional args that go into predicate. There are also special keys + eval_fn_kwargs are the additional args that go into predicate. There are also special keys * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] these str will be translated into the actual agent ids - * 'task_cls' is optional. If not provided, the standard Task is used. """ + + task_kwargs are the optional, additional args that go into the task. + * 'task_cls' specifies the task class to be used. If not provided, the standard Task is used. + """ task_spec = [] # explore, eat, drink, attack any agent, harvest any item, level up any skill @@ -66,7 +69,8 @@ task_spec.append((reward_to, TickGE, {'num_tick': num_tick})) # protect the leader: get reward for each tick the leader is alive -task_spec.append(('team', StayAlive, {'target': 'my_team_leader', 'task_cls': OngoingTask})) +# NOTE: a tuple of length four, to pass in the task_kwargs +task_spec.append(('team', StayAlive, {'target': 'my_team_leader'}, {'task_cls': OngoingTask})) # want the other team or team leader to die for target in ['left_team', 'left_team_leader', 'right_team', 'right_team_leader']: diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index a4b46bc2e..6e639874a 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -6,10 +6,10 @@ import nmmo from nmmo.core.env import Env from nmmo.task.predicate_api import make_predicate, Predicate -from nmmo.task.task_api import Task, make_team_tasks +from nmmo.task.task_api import Task, make_team_tasks, OngoingTask from nmmo.task.group import Group from nmmo.task.constraint import ScalarConstraint, GroupConstraint, AGENT_LIST_CONSTRAINT -from nmmo.task.base_predicates import TickGE, CanSeeGroup, AllMembersWithinRange +from nmmo.task.base_predicates import TickGE, CanSeeGroup, AllMembersWithinRange, StayAlive from nmmo.systems import item as Item from nmmo.core import action as Action @@ -279,7 +279,7 @@ def PracticeFormation(gs, subject, dist, num_tick): # test the task_spec_with_embedding task_embedding = np.array([1,2,3]) task_spec_with_embedding = ('team', PracticeFormation, {'dist': 1, 'num_tick': goal_tick}, - task_embedding) + {'embedding': task_embedding}) env.reset(make_task_fn=lambda: make_team_tasks(teams, [task_spec_with_embedding])) task = env.tasks[0] @@ -343,5 +343,48 @@ def test_completed_tasks_in_info(self): # DONE + def test_make_tasks_with_task_spec(self): + """ + task_spec is a list of tuple (reward_to, evaluation function, eval_fn_kwargs, task_kwargs) + each tuple in the task_spec will create tasks for a team in teams + + reward_to: must be in ['team', 'agent'] + * 'team' create a single team task, in which all team members get rewarded + * 'agent' create a task for each agent, in which only the agent gets rewarded + + evaluation functions from the base_predicates.py or could be custom functions like above + + eval_fn_kwargs are the additional args that go into predicate. There are also special keys + * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] + these str will be translated into the actual agent ids + + task_kwargs are the optional, additional args that go into the task. + * 'task_cls' specifies the task class to be used. + If not provided, the standard Task is used. + """ + teams = {0:[1,2,3], 1:[4,5,6]} + task_spec = [ + ('agent', StayAlive, {}), + ('team', StayAlive, {}), + ('team', StayAlive, {'target': 'my_team_leader'}, {'task_cls': OngoingTask}), + ('team', StayAlive, {'target': 'left_team'}, + {'task_cls': OngoingTask, 'reward_multiplier': 2, 'embedding': np.array([1,2,3])}), + ] + + task_list = [] + # testing each task spec, individually + for single_spec in task_spec: + task_list.append(make_team_tasks(teams, [single_spec])) + + # check the task names + self.assertEqual(task_list[0][0].name, '(Task_eval_fn:(StayAlive_(1,))_assignee:(1,))') + self.assertEqual(task_list[1][0].name, '(Task_eval_fn:(StayAlive_(1,2,3))_assignee:(1,2,3))') + self.assertEqual(task_list[2][0].name, + '(OngoingTask_eval_fn:(StayAlive_(1,))_assignee:(1,2,3))') + self.assertEqual(task_list[3][0].name, + '(OngoingTask_eval_fn:(StayAlive_(4,5,6))_assignee:(1,2,3))') + self.assertEqual(task_list[3][0].reward_multiplier, 2) + self.assertTrue(np.array_equal(task_list[3][0].embedding, np.array([1,2,3]))) + if __name__ == '__main__': unittest.main() From e6660c2844ce30028dea4f9e9b34e9dcfd7593a7 Mon Sep 17 00:00:00 2001 From: kywch Date: Sun, 11 Jun 2023 19:32:50 +0000 Subject: [PATCH 010/113] tweaked tests for task_spec --- tests/task/test_task_api.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 6e639874a..c8b4df203 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -364,8 +364,8 @@ def test_make_tasks_with_task_spec(self): """ teams = {0:[1,2,3], 1:[4,5,6]} task_spec = [ - ('agent', StayAlive, {}), - ('team', StayAlive, {}), + ('agent', TickGE, {'num_tick': 20}), + ('agent', StayAlive, {}, {'task_cls': OngoingTask}), ('team', StayAlive, {'target': 'my_team_leader'}, {'task_cls': OngoingTask}), ('team', StayAlive, {'target': 'left_team'}, {'task_cls': OngoingTask, 'reward_multiplier': 2, 'embedding': np.array([1,2,3])}), @@ -377,8 +377,10 @@ def test_make_tasks_with_task_spec(self): task_list.append(make_team_tasks(teams, [single_spec])) # check the task names - self.assertEqual(task_list[0][0].name, '(Task_eval_fn:(StayAlive_(1,))_assignee:(1,))') - self.assertEqual(task_list[1][0].name, '(Task_eval_fn:(StayAlive_(1,2,3))_assignee:(1,2,3))') + self.assertEqual(task_list[0][0].name, + '(Task_eval_fn:(TickGE_(1,)_num_tick:20)_assignee:(1,))') + self.assertEqual(task_list[1][0].name, + '(OngoingTask_eval_fn:(StayAlive_(1,))_assignee:(1,))') self.assertEqual(task_list[2][0].name, '(OngoingTask_eval_fn:(StayAlive_(1,))_assignee:(1,2,3))') self.assertEqual(task_list[3][0].name, From 96713b55e2cfed20d21a54509675014f13204c36 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Mon, 12 Jun 2023 22:50:19 +0000 Subject: [PATCH 011/113] Add a memory usage test --- tests/test_memory_usage.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 tests/test_memory_usage.py diff --git a/tests/test_memory_usage.py b/tests/test_memory_usage.py new file mode 100644 index 000000000..c0648c959 --- /dev/null +++ b/tests/test_memory_usage.py @@ -0,0 +1,11 @@ +import psutil + +import nmmo + +def test_memory_usage(): + env = nmmo.Env() + process = psutil.Process() + print(process.memory_info().rss) + +if __name__ == '__main__': + test_memory_usage() \ No newline at end of file From 8ae6116ffa80634f6e340cb52ad7bd25586dfe2b Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Mon, 12 Jun 2023 17:00:55 -0700 Subject: [PATCH 012/113] make market_n_obs a constant --- nmmo/core/config.py | 8 ++------ tests/test_memory_usage.py | 9 +++++---- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/nmmo/core/config.py b/nmmo/core/config.py index 5b824f4fe..07d40ef77 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -610,12 +610,8 @@ class Exchange: EXCHANGE_LISTING_DURATION = 5 '''The number of ticks, during which the item is listed for sale''' - @property - def MARKET_N_OBS(self): - # TODO(kywch): This is a hack. Check if the limit is reached - # pylint: disable=no-member - '''Number of distinct item observations''' - return self.PLAYER_N * self.EXCHANGE_LISTING_DURATION + MARKET_N_OBS = 1024 + '''Number of distinct item observations''' PRICE_N_OBS = 99 # make it different from PLAYER_N_OBS '''Number of distinct price observations diff --git a/tests/test_memory_usage.py b/tests/test_memory_usage.py index c0648c959..ab9220526 100644 --- a/tests/test_memory_usage.py +++ b/tests/test_memory_usage.py @@ -1,11 +1,12 @@ +# pylint: disable=bad-builtin, unused-variable import psutil import nmmo def test_memory_usage(): - env = nmmo.Env() - process = psutil.Process() - print(process.memory_info().rss) + env = nmmo.Env() + process = psutil.Process() + print("memory", process.memory_info().rss) if __name__ == '__main__': - test_memory_usage() \ No newline at end of file + test_memory_usage() From aa01ef8a80320aef56f9666b52e682632dba1ba9 Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Mon, 12 Jun 2023 17:03:15 -0700 Subject: [PATCH 013/113] install psutil --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index ac649cd8a..4610f6779 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,7 @@ 'pettingzoo==1.19.0', 'gym==0.23.0', 'pylint==2.16.0', + 'psutil==5.9.3', 'py==1.11.0', 'tqdm<5', ], From 061586d2a07f7d2af92ed3569eb6812803523518 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 14 Jun 2023 10:35:45 +0000 Subject: [PATCH 014/113] return dummy obs and reward for the dead agents --- nmmo/core/env.py | 123 +++++++++++++++------------------- nmmo/core/observation.py | 48 ++++++++----- tests/core/test_env.py | 29 +++++--- tests/task/test_predicates.py | 2 - tests/testhelpers.py | 2 +- 5 files changed, 106 insertions(+), 98 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 843704311..16d091bc2 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -37,6 +37,7 @@ def __init__(self, self.possible_agents = list(range(1, config.PLAYER_N + 1)) self._dead_agents = set() self._episode_stats = defaultdict(lambda: defaultdict(float)) + self._dead_this_tick = None self.scripted_agents = OrderedSet() self._gamestate_generator = GameStateGenerator(self.realm, self.config) @@ -149,6 +150,7 @@ def reset(self, map_id=None, seed=None, options=None, self.realm.reset(map_id) self._dead_agents = set() self._episode_stats.clear() + self._dead_this_tick = {} # check if there are scripted agents for eid, ent in self.realm.players.items(): @@ -278,20 +280,20 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): # we don't need _deserialize_scripted_actions() anymore actions = self._validate_actions(actions) # Execute actions - self.realm.step(actions) + self._dead_this_tick = self.realm.step(actions) dones = {} for eid in self.possible_agents: - if eid not in self.realm.players or self.realm.tick >= self.config.HORIZON: - if eid not in self._dead_agents: - self._dead_agents.add(eid) - self._episode_stats[eid]["death_tick"] = self.realm.tick - dones[eid] = True + if eid in self._dead_this_tick or self.realm.tick >= self.config.HORIZON: + self._dead_agents.add(eid) + self._episode_stats[eid]["death_tick"] = self.realm.tick + dones[eid] = True - # Store the observations, since actions reference them + # Generate obs for each agent in self.agents self.obs = self._compute_observations() gym_obs = {a: o.to_gym() for a,o in self.obs.items()} - rewards, infos = self._compute_rewards(self.obs.keys(), dones) + # Generate rewards, infos for each agent in self.agents + rewards, infos = self._compute_rewards() for k,r in rewards.items(): self._episode_stats[k]['reward'] += r @@ -357,62 +359,47 @@ def _compute_scripted_agent_actions(self, actions: Dict[int, Dict[str, Dict[str, return actions def _compute_observations(self): - '''Neural MMO Observation API - - Args: - agents: List of agents to return observations for. If None, returns - observations for all agents - - Returns: - obs: Dictionary of observations for each agent - obs[agent_id] = { - "Entity": [e1, e2, ...], - "Task": [encoded_task], - "Tile": [t1, t2, ...], - "Inventory": [i1, i2, ...], - "Market": [m1, m2, ...], - "ActionTargets": { - "Attack": [a1, a2, ...], - "Sell": [s1, s2, ...], - "Buy": [b1, b2, ...], - "Move": [m1, m2, ...], - } - ''' - + '''Create an Observation object for each agent in self.agents''' obs = {} + market = Item.Query.for_sale(self.realm.datastore) # the same for all agents - market = Item.Query.for_sale(self.realm.datastore) + # dummy obs + dummy_tiles = np.zeros((1, len(Tile.State.attr_name_to_col))) + dummy_entities = np.zeros((1, len(Entity.State.attr_name_to_col))) + dummy_inventory = np.zeros((1, len(Item.State.attr_name_to_col))) + dummy_market = np.zeros((1, len(Item.State.attr_name_to_col))) for agent_id in self.agents: - agent = self.realm.players.get(agent_id) - agent_r = agent.row.val - agent_c = agent.col.val - - visible_entities = Entity.Query.window( - self.realm.datastore, - agent_r, agent_c, - self.config.PLAYER_VISION_RADIUS - ) - visible_tiles = Tile.Query.window( - self.realm.datastore, - agent_r, agent_c, - self.config.PLAYER_VISION_RADIUS) - - inventory = Item.Query.owned_by(self.realm.datastore, agent_id) - - # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are - # available in each task instance, via task.embedding - # CHECK ME: do we pass in self.agent_task_map[agent_id], - # so that we can include task embedding in the obs? - obs[agent_id] = Observation(self.config, - self.realm.tick, - agent_id, - visible_tiles, - visible_entities, - inventory, market) + if agent_id not in self.realm.players: + # return dummy obs for the agents in dead_this_tick + obs[agent_id] = Observation(self.config, self.realm.tick, agent_id, + dummy_tiles, dummy_entities, dummy_inventory, dummy_market) + else: + agent = self.realm.players.get(agent_id) + agent_r = agent.row.val + agent_c = agent.col.val + + visible_entities = Entity.Query.window( + self.realm.datastore, + agent_r, agent_c, + self.config.PLAYER_VISION_RADIUS + ) + visible_tiles = Tile.Query.window( + self.realm.datastore, + agent_r, agent_c, + self.config.PLAYER_VISION_RADIUS) + + inventory = Item.Query.owned_by(self.realm.datastore, agent_id) + + # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are + # available in each task instance, via task.embedding + # CHECK ME: do we want to pass in self.agent_task_map[agent_id], + # so that we can include task embedding in the obs? + obs[agent_id] = Observation(self.config, self.realm.tick, agent_id, + visible_tiles, visible_entities, inventory, market) return obs - def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): + def _compute_rewards(self): '''Computes the reward for the specified agent Override this method to create custom reward functions. You have full @@ -428,24 +415,22 @@ def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): entity identified by ent_id. ''' # Initialization - infos = {agent_id: {'task': {}} for agent_id in agents} + infos = {agent_id: {'task': {}} for agent_id in self.agents} rewards = defaultdict(int) - agents = set(agents) - reward_cache = {} # Compute Rewards and infos self.game_state = self._gamestate_generator.generate(self.realm, self.obs) for task in self.tasks: - if task in reward_cache: - task_rewards, task_infos = reward_cache[task] - else: - task_rewards, task_infos = task.compute_rewards(self.game_state) - reward_cache[task] = (task_rewards, task_infos) + task_rewards, task_infos = task.compute_rewards(self.game_state) for agent_id, reward in task_rewards.items(): - if agent_id in agents and agent_id not in dones: + if agent_id in self.agents: rewards[agent_id] = rewards.get(agent_id,0) + reward infos[agent_id]['task'][task.name] = task_infos[agent_id] # progress + # Make sure the dead agents return the rewards of -1 + for agent_id in self._dead_this_tick: + rewards[agent_id] = -1 + return rewards, infos ############################################################################ @@ -458,7 +443,9 @@ def render(self, mode='human'): @property def agents(self) -> List[AgentID]: '''For conformity with the PettingZoo API only; rendering is external''' - return list(set(self.realm.players.keys()) - self._dead_agents) + # "current" agents, which return obs: both alive and dead_this_tick + agents = set(list(self.realm.players.keys()) + list(self._dead_this_tick.keys())) + return list(agents) def close(self): '''For conformity with the PettingZoo API only; rendering is external''' diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index ad05a0b2f..9967a1685 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -55,7 +55,8 @@ def __init__(self, self.entities = BasicObs(entities[0:config.PLAYER_N_OBS], EntityState.State.attr_name_to_col["id"]) - if config.COMBAT_SYSTEM_ENABLED: + agent = self.agent() + if config.COMBAT_SYSTEM_ENABLED and agent is not None: latest_combat_tick = self.agent().latest_combat_tick self.agent_in_combat = False if latest_combat_tick == 0 else \ (current_tick - latest_combat_tick) < config.COMBAT_STATUS_DURATION @@ -112,31 +113,42 @@ def entity(self, entity_id): def agent(self): return self.entity(self.agent_id) - def to_gym(self): - '''Convert the observation to a format that can be used by OpenAI Gym''' - - tiles = np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])) - tiles[:self.tiles.shape[0],:] = self.tiles - - entities = np.zeros((self.config.PLAYER_N_OBS, self.entities.values.shape[1])) - entities[:self.entities.values.shape[0],:] = self.entities.values - + def get_empty_obs(self): gym_obs = { "CurrentTick": np.array([self.current_tick]), "AgentId": np.array([self.agent_id]), - "Tile": tiles, - "Entity": entities, + "Tile": np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])), + "Entity": np.zeros((self.config.PLAYER_N_OBS, self.entities.values.shape[1])), } if self.config.ITEM_SYSTEM_ENABLED: - inventory = np.zeros((self.config.INVENTORY_N_OBS, self.inventory.values.shape[1])) - inventory[:self.inventory.values.shape[0],:] = self.inventory.values - gym_obs["Inventory"] = inventory + gym_obs["Inventory"] = np.zeros((self.config.INVENTORY_N_OBS, + self.inventory.values.shape[1])) + + if self.config.EXCHANGE_SYSTEM_ENABLED: + gym_obs["Market"] = np.zeros((self.config.MARKET_N_OBS, + self.market.values.shape[1])) + + if self.config.PROVIDE_ACTION_TARGETS: + gym_obs["ActionTargets"] = None + + return gym_obs + + def to_gym(self): + '''Convert the observation to a format that can be used by OpenAI Gym''' + gym_obs = self.get_empty_obs() + if self.agent() is None: + # return empty obs for the dead agents + return gym_obs + + gym_obs['Tile'][:self.tiles.shape[0],:] = self.tiles + gym_obs['Entity'][:self.entities.values.shape[0],:] = self.entities.values + + if self.config.ITEM_SYSTEM_ENABLED: + gym_obs["Inventory"][:self.inventory.values.shape[0],:] = self.inventory.values if self.config.EXCHANGE_SYSTEM_ENABLED: - market = np.zeros((self.config.MARKET_N_OBS, self.market.values.shape[1])) - market[:self.market.values.shape[0],:] = self.market.values - gym_obs["Market"] = market + gym_obs["Market"][:self.market.values.shape[0],:] = self.market.values if self.config.PROVIDE_ACTION_TARGETS: gym_obs["ActionTargets"] = self._make_action_targets() diff --git a/tests/core/test_env.py b/tests/core/test_env.py index 1ddeb6776..22f816a84 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -2,6 +2,7 @@ from typing import List import random +import numpy as np from tqdm import tqdm import nmmo @@ -51,19 +52,29 @@ def test_observations(self): ] for player_id, player_obs in obs.items(): - self._validate_tiles(player_obs, self.env.realm) - self._validate_entitites( - player_id, player_obs, self.env.realm, entity_locations) - self._validate_inventory(player_id, player_obs, self.env.realm) - self._validate_market(player_obs, self.env.realm) - obs, _, dones, _ = self.env.step({}) - - # make sure dead agents return proper dones=True - self.assertEqual(len(self.env.agents), len(self.env.realm.players)) + if player_id in self.env.realm.players: # alive agents + self._validate_tiles(player_obs, self.env.realm) + self._validate_entitites( + player_id, player_obs, self.env.realm, entity_locations) + self._validate_inventory(player_id, player_obs, self.env.realm) + self._validate_market(player_obs, self.env.realm) + else: + # the obs of dead agents are dummy, all zeros + self.assertEqual(np.sum(player_obs['Tile']), 0) + self.assertEqual(np.sum(player_obs['Entity']), 0) + self.assertEqual(np.sum(player_obs['Inventory']), 0) + self.assertEqual(np.sum(player_obs['Market']), 0) + + obs, rewards, dones, _ = self.env.step({}) + + # make sure dead agents return proper dones=True, dummy obs, and -1 reward + self.assertEqual(len(self.env.agents), + len(self.env.realm.players) + len(self.env._dead_this_tick)) self.assertEqual(len(self.env.possible_agents), len(self.env.realm.players) + len(self.env._dead_agents)) if len(self.env._dead_agents) > len(dead_agents): for dead_id in self.env._dead_agents - dead_agents: + self.assertEqual(rewards[dead_id], -1) self.assertTrue(dones[dead_id]) dead_agents.add(dead_id) diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index f2f61f0e3..695716f79 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -126,8 +126,6 @@ def test_tickge_stay_alive_rip(self): # make sure that dead players not in the realm nor the datastore self.assertTrue(ent_id not in env.realm.players) self.assertTrue(ent_id not in entities) - # CHECK ME: dead agents are also not in infos - self.assertTrue(ent_id not in infos) # TickGE_5 is true. Agents 1-3 are dead, so # StayAlive(1,3) and StayAlive(3,4) are false, StayAlive(4) is true diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 678cedaf9..68ecff4a4 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -384,7 +384,7 @@ def profile_env_step(action_target=True, tasks=None, condition=None): ('env.realm.step():', lambda: env.realm.step({})), ('env._compute_observations():', lambda: env._compute_observations()), ('obs.to_gym(), ActionTarget:', lambda: {a: o.to_gym() for a,o in obs.items()}), - ('env._compute_rewards():', lambda: env._compute_rewards(obs.keys(), {})) + ('env._compute_rewards():', lambda: env._compute_rewards()) ] if condition: From 04e3bc63bf53082aedda457395cfd4ccd3de5dc4 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 14 Jun 2023 12:21:44 +0000 Subject: [PATCH 015/113] env returns obs, rewards, dones, infos for all env.agents --- nmmo/core/env.py | 19 +++++++++++-------- tests/core/test_env.py | 7 ++++++- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 16d091bc2..f125d8792 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -282,11 +282,13 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): # Execute actions self._dead_this_tick = self.realm.step(actions) dones = {} - for eid in self.possible_agents: - if eid in self._dead_this_tick or self.realm.tick >= self.config.HORIZON: - self._dead_agents.add(eid) - self._episode_stats[eid]["death_tick"] = self.realm.tick - dones[eid] = True + for agent_id in self.agents: + if agent_id in self._dead_this_tick or self.realm.tick >= self.config.HORIZON: + self._dead_agents.add(agent_id) + self._episode_stats[agent_id]["death_tick"] = self.realm.tick + dones[agent_id] = True + else: + dones[agent_id] = False # Generate obs for each agent in self.agents self.obs = self._compute_observations() @@ -297,14 +299,14 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): for k,r in rewards.items(): self._episode_stats[k]['reward'] += r - # When the episode ends, add the episode stats to the info of one of - # the last dagents + # When the episode ends, add the episode stats to the info of the last agents if len(self._dead_agents) == len(self.possible_agents): for agent_id, stats in self._episode_stats.items(): if agent_id not in infos: infos[agent_id] = {} infos[agent_id]["episode_stats"] = stats + # NOTE: all obs, rewards, dones, infos have data for each agent in self.agents return gym_obs, rewards, dones, infos def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): @@ -417,13 +419,14 @@ def _compute_rewards(self): # Initialization infos = {agent_id: {'task': {}} for agent_id in self.agents} rewards = defaultdict(int) + agents = set(self.agents) # Compute Rewards and infos self.game_state = self._gamestate_generator.generate(self.realm, self.obs) for task in self.tasks: task_rewards, task_infos = task.compute_rewards(self.game_state) for agent_id, reward in task_rewards.items(): - if agent_id in self.agents: + if agent_id in agents: rewards[agent_id] = rewards.get(agent_id,0) + reward infos[agent_id]['task'][task.name] = task_infos[agent_id] # progress diff --git a/tests/core/test_env.py b/tests/core/test_env.py index 22f816a84..18bd74711 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -65,13 +65,18 @@ def test_observations(self): self.assertEqual(np.sum(player_obs['Inventory']), 0) self.assertEqual(np.sum(player_obs['Market']), 0) - obs, rewards, dones, _ = self.env.step({}) + obs, rewards, dones, infos = self.env.step({}) # make sure dead agents return proper dones=True, dummy obs, and -1 reward self.assertEqual(len(self.env.agents), len(self.env.realm.players) + len(self.env._dead_this_tick)) self.assertEqual(len(self.env.possible_agents), len(self.env.realm.players) + len(self.env._dead_agents)) + for agent_id in self.env.agents: + self.assertTrue(agent_id in obs) + self.assertTrue(agent_id in rewards) + self.assertTrue(agent_id in dones) + self.assertTrue(agent_id in infos) if len(self.env._dead_agents) > len(dead_agents): for dead_id in self.env._dead_agents - dead_agents: self.assertEqual(rewards[dead_id], -1) From 1cb6038f66a3e064fdaa8fb285f76bd7a88f6dd4 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 14 Jun 2023 13:21:41 +0000 Subject: [PATCH 016/113] quick fixes --- nmmo/core/env.py | 4 ++-- nmmo/task/game_state.py | 8 +++++--- tests/core/test_env.py | 8 ++++++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index f125d8792..9f679346e 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -417,9 +417,9 @@ def _compute_rewards(self): entity identified by ent_id. ''' # Initialization - infos = {agent_id: {'task': {}} for agent_id in self.agents} - rewards = defaultdict(int) agents = set(self.agents) + infos = {agent_id: {'task': {}} for agent_id in agents} + rewards = defaultdict(int) # Compute Rewards and infos self.game_state = self._gamestate_generator.generate(self.realm, self.obs) diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index fb57ba4cf..305c31792 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Dict, List, Tuple, MutableMapping +from typing import Dict, List, Tuple, MutableMapping, Set from dataclasses import dataclass from copy import deepcopy from abc import ABC, abstractmethod @@ -31,7 +31,7 @@ class GameState: config: Config spawn_pos: Dict[int, Tuple[int, int]] # ent_id: (row, col) of all spawned agents - alive_agents: List[int] # of alive agents' ent_id (for convenience) + alive_agents: Set[int] # of alive agents' ent_id (for convenience) env_obs: Dict[int, Observation] # env passes the obs of only alive agents entity_data: np.ndarray # a copied, whole Entity ds table @@ -192,12 +192,14 @@ def __init__(self, realm: Realm, config: Config): def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: # copy the datastore, by running astype entity_all = EntityState.Query.table(realm.datastore).astype(np.int16) + alive_agents = entity_all[:, EntityAttr["id"]] + alive_agents = set(alive_agents[alive_agents > 0]) return GameState( current_tick = realm.tick, config = self.config, spawn_pos = self.spawn_pos, - alive_agents = list(entity_all[:, EntityAttr["id"]]), + alive_agents = alive_agents, env_obs = env_obs, entity_data = entity_all, item_data = ItemState.Query.table(realm.datastore).astype(np.int16), diff --git a/tests/core/test_env.py b/tests/core/test_env.py index 18bd74711..244bfc05d 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -83,6 +83,14 @@ def test_observations(self): self.assertTrue(dones[dead_id]) dead_agents.add(dead_id) + # check dead and alive + entity_all = EntityState.Query.table(self.env.realm.datastore).astype(np.int16) + alive_agents = entity_all[:, Entity.State.attr_name_to_col["id"]] + alive_agents = set(alive_agents[alive_agents > 0]) + for agent_id in alive_agents: + self.assertTrue(agent_id in self.env.realm.players) + self.assertTrue(agent_id not in self.env._dead_agents) + def _validate_tiles(self, obs, realm: Realm): for tile_obs in obs["Tile"]: tile_obs = TileState.parse_array(tile_obs) From 222c79510e84437f1d1725bddcae004a4c49b673 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 14 Jun 2023 13:59:28 +0000 Subject: [PATCH 017/113] made dummy obs zero length --- nmmo/core/env.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 9f679346e..4c65708d8 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -366,10 +366,10 @@ def _compute_observations(self): market = Item.Query.for_sale(self.realm.datastore) # the same for all agents # dummy obs - dummy_tiles = np.zeros((1, len(Tile.State.attr_name_to_col))) - dummy_entities = np.zeros((1, len(Entity.State.attr_name_to_col))) - dummy_inventory = np.zeros((1, len(Item.State.attr_name_to_col))) - dummy_market = np.zeros((1, len(Item.State.attr_name_to_col))) + dummy_tiles = np.zeros((0, len(Tile.State.attr_name_to_col))) + dummy_entities = np.zeros((0, len(Entity.State.attr_name_to_col))) + dummy_inventory = np.zeros((0, len(Item.State.attr_name_to_col))) + dummy_market = np.zeros((0, len(Item.State.attr_name_to_col))) for agent_id in self.agents: if agent_id not in self.realm.players: From 7a35c250af89b63eb8697383773f2c4817e1f92e Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 14 Jun 2023 21:04:53 +0000 Subject: [PATCH 018/113] make dummy obs at reset and use it --- nmmo/core/env.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 4c65708d8..f678a7fe2 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -2,6 +2,7 @@ import random from typing import Any, Dict, List, Callable from collections import defaultdict +from copy import copy from ordered_set import OrderedSet import gym @@ -33,6 +34,7 @@ def __init__(self, self.config = config self.realm = realm.Realm(config) self.obs = None + self._dummy_obs = None self.possible_agents = list(range(1, config.PLAYER_N + 1)) self._dead_agents = set() @@ -157,6 +159,7 @@ def reset(self, map_id=None, seed=None, options=None, if isinstance(ent.agent, Scripted): self.scripted_agents.add(eid) + self._dummy_obs = self._make_dummy_obs() self.obs = self._compute_observations() self._gamestate_generator = GameStateGenerator(self.realm, self.config) @@ -360,22 +363,26 @@ def _compute_scripted_agent_actions(self, actions: Dict[int, Dict[str, Dict[str, return actions - def _compute_observations(self): - '''Create an Observation object for each agent in self.agents''' - obs = {} - market = Item.Query.for_sale(self.realm.datastore) # the same for all agents - - # dummy obs + def _make_dummy_obs(self): dummy_tiles = np.zeros((0, len(Tile.State.attr_name_to_col))) dummy_entities = np.zeros((0, len(Entity.State.attr_name_to_col))) dummy_inventory = np.zeros((0, len(Item.State.attr_name_to_col))) dummy_market = np.zeros((0, len(Item.State.attr_name_to_col))) + return Observation(self.config, self.realm.tick, 0, + dummy_tiles, dummy_entities, dummy_inventory, dummy_market) + + def _compute_observations(self): + '''Create an Observation object for each agent in self.agents''' + obs = {} + market = Item.Query.for_sale(self.realm.datastore) # the same for all agents for agent_id in self.agents: if agent_id not in self.realm.players: # return dummy obs for the agents in dead_this_tick - obs[agent_id] = Observation(self.config, self.realm.tick, agent_id, - dummy_tiles, dummy_entities, dummy_inventory, dummy_market) + dummy_obs = copy(self._dummy_obs) + dummy_obs.current_tick = self.realm.tick + dummy_obs.agent_id = agent_id + obs[agent_id] = dummy_obs else: agent = self.realm.players.get(agent_id) agent_r = agent.row.val From 5b029619a596fe180c4e474d41234a3a9c3b30c2 Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Wed, 14 Jun 2023 14:54:30 -0700 Subject: [PATCH 019/113] Revert "return dummy obs and reward for the dead agents" --- nmmo/core/env.py | 141 +++++++++++++++++----------------- nmmo/core/observation.py | 48 +++++------- nmmo/task/game_state.py | 8 +- tests/core/test_env.py | 42 +++------- tests/task/test_predicates.py | 2 + tests/testhelpers.py | 2 +- 6 files changed, 105 insertions(+), 138 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index f678a7fe2..843704311 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -2,7 +2,6 @@ import random from typing import Any, Dict, List, Callable from collections import defaultdict -from copy import copy from ordered_set import OrderedSet import gym @@ -34,12 +33,10 @@ def __init__(self, self.config = config self.realm = realm.Realm(config) self.obs = None - self._dummy_obs = None self.possible_agents = list(range(1, config.PLAYER_N + 1)) self._dead_agents = set() self._episode_stats = defaultdict(lambda: defaultdict(float)) - self._dead_this_tick = None self.scripted_agents = OrderedSet() self._gamestate_generator = GameStateGenerator(self.realm, self.config) @@ -152,14 +149,12 @@ def reset(self, map_id=None, seed=None, options=None, self.realm.reset(map_id) self._dead_agents = set() self._episode_stats.clear() - self._dead_this_tick = {} # check if there are scripted agents for eid, ent in self.realm.players.items(): if isinstance(ent.agent, Scripted): self.scripted_agents.add(eid) - self._dummy_obs = self._make_dummy_obs() self.obs = self._compute_observations() self._gamestate_generator = GameStateGenerator(self.realm, self.config) @@ -283,33 +278,31 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): # we don't need _deserialize_scripted_actions() anymore actions = self._validate_actions(actions) # Execute actions - self._dead_this_tick = self.realm.step(actions) + self.realm.step(actions) dones = {} - for agent_id in self.agents: - if agent_id in self._dead_this_tick or self.realm.tick >= self.config.HORIZON: - self._dead_agents.add(agent_id) - self._episode_stats[agent_id]["death_tick"] = self.realm.tick - dones[agent_id] = True - else: - dones[agent_id] = False - - # Generate obs for each agent in self.agents + for eid in self.possible_agents: + if eid not in self.realm.players or self.realm.tick >= self.config.HORIZON: + if eid not in self._dead_agents: + self._dead_agents.add(eid) + self._episode_stats[eid]["death_tick"] = self.realm.tick + dones[eid] = True + + # Store the observations, since actions reference them self.obs = self._compute_observations() gym_obs = {a: o.to_gym() for a,o in self.obs.items()} - # Generate rewards, infos for each agent in self.agents - rewards, infos = self._compute_rewards() + rewards, infos = self._compute_rewards(self.obs.keys(), dones) for k,r in rewards.items(): self._episode_stats[k]['reward'] += r - # When the episode ends, add the episode stats to the info of the last agents + # When the episode ends, add the episode stats to the info of one of + # the last dagents if len(self._dead_agents) == len(self.possible_agents): for agent_id, stats in self._episode_stats.items(): if agent_id not in infos: infos[agent_id] = {} infos[agent_id]["episode_stats"] = stats - # NOTE: all obs, rewards, dones, infos have data for each agent in self.agents return gym_obs, rewards, dones, infos def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): @@ -363,52 +356,63 @@ def _compute_scripted_agent_actions(self, actions: Dict[int, Dict[str, Dict[str, return actions - def _make_dummy_obs(self): - dummy_tiles = np.zeros((0, len(Tile.State.attr_name_to_col))) - dummy_entities = np.zeros((0, len(Entity.State.attr_name_to_col))) - dummy_inventory = np.zeros((0, len(Item.State.attr_name_to_col))) - dummy_market = np.zeros((0, len(Item.State.attr_name_to_col))) - return Observation(self.config, self.realm.tick, 0, - dummy_tiles, dummy_entities, dummy_inventory, dummy_market) - def _compute_observations(self): - '''Create an Observation object for each agent in self.agents''' + '''Neural MMO Observation API + + Args: + agents: List of agents to return observations for. If None, returns + observations for all agents + + Returns: + obs: Dictionary of observations for each agent + obs[agent_id] = { + "Entity": [e1, e2, ...], + "Task": [encoded_task], + "Tile": [t1, t2, ...], + "Inventory": [i1, i2, ...], + "Market": [m1, m2, ...], + "ActionTargets": { + "Attack": [a1, a2, ...], + "Sell": [s1, s2, ...], + "Buy": [b1, b2, ...], + "Move": [m1, m2, ...], + } + ''' + obs = {} - market = Item.Query.for_sale(self.realm.datastore) # the same for all agents + + market = Item.Query.for_sale(self.realm.datastore) for agent_id in self.agents: - if agent_id not in self.realm.players: - # return dummy obs for the agents in dead_this_tick - dummy_obs = copy(self._dummy_obs) - dummy_obs.current_tick = self.realm.tick - dummy_obs.agent_id = agent_id - obs[agent_id] = dummy_obs - else: - agent = self.realm.players.get(agent_id) - agent_r = agent.row.val - agent_c = agent.col.val - - visible_entities = Entity.Query.window( - self.realm.datastore, - agent_r, agent_c, - self.config.PLAYER_VISION_RADIUS - ) - visible_tiles = Tile.Query.window( - self.realm.datastore, - agent_r, agent_c, - self.config.PLAYER_VISION_RADIUS) - - inventory = Item.Query.owned_by(self.realm.datastore, agent_id) - - # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are - # available in each task instance, via task.embedding - # CHECK ME: do we want to pass in self.agent_task_map[agent_id], - # so that we can include task embedding in the obs? - obs[agent_id] = Observation(self.config, self.realm.tick, agent_id, - visible_tiles, visible_entities, inventory, market) + agent = self.realm.players.get(agent_id) + agent_r = agent.row.val + agent_c = agent.col.val + + visible_entities = Entity.Query.window( + self.realm.datastore, + agent_r, agent_c, + self.config.PLAYER_VISION_RADIUS + ) + visible_tiles = Tile.Query.window( + self.realm.datastore, + agent_r, agent_c, + self.config.PLAYER_VISION_RADIUS) + + inventory = Item.Query.owned_by(self.realm.datastore, agent_id) + + # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are + # available in each task instance, via task.embedding + # CHECK ME: do we pass in self.agent_task_map[agent_id], + # so that we can include task embedding in the obs? + obs[agent_id] = Observation(self.config, + self.realm.tick, + agent_id, + visible_tiles, + visible_entities, + inventory, market) return obs - def _compute_rewards(self): + def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): '''Computes the reward for the specified agent Override this method to create custom reward functions. You have full @@ -424,23 +428,24 @@ def _compute_rewards(self): entity identified by ent_id. ''' # Initialization - agents = set(self.agents) infos = {agent_id: {'task': {}} for agent_id in agents} rewards = defaultdict(int) + agents = set(agents) + reward_cache = {} # Compute Rewards and infos self.game_state = self._gamestate_generator.generate(self.realm, self.obs) for task in self.tasks: - task_rewards, task_infos = task.compute_rewards(self.game_state) + if task in reward_cache: + task_rewards, task_infos = reward_cache[task] + else: + task_rewards, task_infos = task.compute_rewards(self.game_state) + reward_cache[task] = (task_rewards, task_infos) for agent_id, reward in task_rewards.items(): - if agent_id in agents: + if agent_id in agents and agent_id not in dones: rewards[agent_id] = rewards.get(agent_id,0) + reward infos[agent_id]['task'][task.name] = task_infos[agent_id] # progress - # Make sure the dead agents return the rewards of -1 - for agent_id in self._dead_this_tick: - rewards[agent_id] = -1 - return rewards, infos ############################################################################ @@ -453,9 +458,7 @@ def render(self, mode='human'): @property def agents(self) -> List[AgentID]: '''For conformity with the PettingZoo API only; rendering is external''' - # "current" agents, which return obs: both alive and dead_this_tick - agents = set(list(self.realm.players.keys()) + list(self._dead_this_tick.keys())) - return list(agents) + return list(set(self.realm.players.keys()) - self._dead_agents) def close(self): '''For conformity with the PettingZoo API only; rendering is external''' diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 9967a1685..ad05a0b2f 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -55,8 +55,7 @@ def __init__(self, self.entities = BasicObs(entities[0:config.PLAYER_N_OBS], EntityState.State.attr_name_to_col["id"]) - agent = self.agent() - if config.COMBAT_SYSTEM_ENABLED and agent is not None: + if config.COMBAT_SYSTEM_ENABLED: latest_combat_tick = self.agent().latest_combat_tick self.agent_in_combat = False if latest_combat_tick == 0 else \ (current_tick - latest_combat_tick) < config.COMBAT_STATUS_DURATION @@ -113,42 +112,31 @@ def entity(self, entity_id): def agent(self): return self.entity(self.agent_id) - def get_empty_obs(self): + def to_gym(self): + '''Convert the observation to a format that can be used by OpenAI Gym''' + + tiles = np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])) + tiles[:self.tiles.shape[0],:] = self.tiles + + entities = np.zeros((self.config.PLAYER_N_OBS, self.entities.values.shape[1])) + entities[:self.entities.values.shape[0],:] = self.entities.values + gym_obs = { "CurrentTick": np.array([self.current_tick]), "AgentId": np.array([self.agent_id]), - "Tile": np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])), - "Entity": np.zeros((self.config.PLAYER_N_OBS, self.entities.values.shape[1])), + "Tile": tiles, + "Entity": entities, } if self.config.ITEM_SYSTEM_ENABLED: - gym_obs["Inventory"] = np.zeros((self.config.INVENTORY_N_OBS, - self.inventory.values.shape[1])) - - if self.config.EXCHANGE_SYSTEM_ENABLED: - gym_obs["Market"] = np.zeros((self.config.MARKET_N_OBS, - self.market.values.shape[1])) - - if self.config.PROVIDE_ACTION_TARGETS: - gym_obs["ActionTargets"] = None - - return gym_obs - - def to_gym(self): - '''Convert the observation to a format that can be used by OpenAI Gym''' - gym_obs = self.get_empty_obs() - if self.agent() is None: - # return empty obs for the dead agents - return gym_obs - - gym_obs['Tile'][:self.tiles.shape[0],:] = self.tiles - gym_obs['Entity'][:self.entities.values.shape[0],:] = self.entities.values - - if self.config.ITEM_SYSTEM_ENABLED: - gym_obs["Inventory"][:self.inventory.values.shape[0],:] = self.inventory.values + inventory = np.zeros((self.config.INVENTORY_N_OBS, self.inventory.values.shape[1])) + inventory[:self.inventory.values.shape[0],:] = self.inventory.values + gym_obs["Inventory"] = inventory if self.config.EXCHANGE_SYSTEM_ENABLED: - gym_obs["Market"][:self.market.values.shape[0],:] = self.market.values + market = np.zeros((self.config.MARKET_N_OBS, self.market.values.shape[1])) + market[:self.market.values.shape[0],:] = self.market.values + gym_obs["Market"] = market if self.config.PROVIDE_ACTION_TARGETS: gym_obs["ActionTargets"] = self._make_action_targets() diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 305c31792..fb57ba4cf 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Dict, List, Tuple, MutableMapping, Set +from typing import Dict, List, Tuple, MutableMapping from dataclasses import dataclass from copy import deepcopy from abc import ABC, abstractmethod @@ -31,7 +31,7 @@ class GameState: config: Config spawn_pos: Dict[int, Tuple[int, int]] # ent_id: (row, col) of all spawned agents - alive_agents: Set[int] # of alive agents' ent_id (for convenience) + alive_agents: List[int] # of alive agents' ent_id (for convenience) env_obs: Dict[int, Observation] # env passes the obs of only alive agents entity_data: np.ndarray # a copied, whole Entity ds table @@ -192,14 +192,12 @@ def __init__(self, realm: Realm, config: Config): def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: # copy the datastore, by running astype entity_all = EntityState.Query.table(realm.datastore).astype(np.int16) - alive_agents = entity_all[:, EntityAttr["id"]] - alive_agents = set(alive_agents[alive_agents > 0]) return GameState( current_tick = realm.tick, config = self.config, spawn_pos = self.spawn_pos, - alive_agents = alive_agents, + alive_agents = list(entity_all[:, EntityAttr["id"]]), env_obs = env_obs, entity_data = entity_all, item_data = ItemState.Query.table(realm.datastore).astype(np.int16), diff --git a/tests/core/test_env.py b/tests/core/test_env.py index 244bfc05d..1ddeb6776 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -2,7 +2,6 @@ from typing import List import random -import numpy as np from tqdm import tqdm import nmmo @@ -52,45 +51,22 @@ def test_observations(self): ] for player_id, player_obs in obs.items(): - if player_id in self.env.realm.players: # alive agents - self._validate_tiles(player_obs, self.env.realm) - self._validate_entitites( - player_id, player_obs, self.env.realm, entity_locations) - self._validate_inventory(player_id, player_obs, self.env.realm) - self._validate_market(player_obs, self.env.realm) - else: - # the obs of dead agents are dummy, all zeros - self.assertEqual(np.sum(player_obs['Tile']), 0) - self.assertEqual(np.sum(player_obs['Entity']), 0) - self.assertEqual(np.sum(player_obs['Inventory']), 0) - self.assertEqual(np.sum(player_obs['Market']), 0) - - obs, rewards, dones, infos = self.env.step({}) - - # make sure dead agents return proper dones=True, dummy obs, and -1 reward - self.assertEqual(len(self.env.agents), - len(self.env.realm.players) + len(self.env._dead_this_tick)) + self._validate_tiles(player_obs, self.env.realm) + self._validate_entitites( + player_id, player_obs, self.env.realm, entity_locations) + self._validate_inventory(player_id, player_obs, self.env.realm) + self._validate_market(player_obs, self.env.realm) + obs, _, dones, _ = self.env.step({}) + + # make sure dead agents return proper dones=True + self.assertEqual(len(self.env.agents), len(self.env.realm.players)) self.assertEqual(len(self.env.possible_agents), len(self.env.realm.players) + len(self.env._dead_agents)) - for agent_id in self.env.agents: - self.assertTrue(agent_id in obs) - self.assertTrue(agent_id in rewards) - self.assertTrue(agent_id in dones) - self.assertTrue(agent_id in infos) if len(self.env._dead_agents) > len(dead_agents): for dead_id in self.env._dead_agents - dead_agents: - self.assertEqual(rewards[dead_id], -1) self.assertTrue(dones[dead_id]) dead_agents.add(dead_id) - # check dead and alive - entity_all = EntityState.Query.table(self.env.realm.datastore).astype(np.int16) - alive_agents = entity_all[:, Entity.State.attr_name_to_col["id"]] - alive_agents = set(alive_agents[alive_agents > 0]) - for agent_id in alive_agents: - self.assertTrue(agent_id in self.env.realm.players) - self.assertTrue(agent_id not in self.env._dead_agents) - def _validate_tiles(self, obs, realm: Realm): for tile_obs in obs["Tile"]: tile_obs = TileState.parse_array(tile_obs) diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index 695716f79..f2f61f0e3 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -126,6 +126,8 @@ def test_tickge_stay_alive_rip(self): # make sure that dead players not in the realm nor the datastore self.assertTrue(ent_id not in env.realm.players) self.assertTrue(ent_id not in entities) + # CHECK ME: dead agents are also not in infos + self.assertTrue(ent_id not in infos) # TickGE_5 is true. Agents 1-3 are dead, so # StayAlive(1,3) and StayAlive(3,4) are false, StayAlive(4) is true diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 68ecff4a4..678cedaf9 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -384,7 +384,7 @@ def profile_env_step(action_target=True, tasks=None, condition=None): ('env.realm.step():', lambda: env.realm.step({})), ('env._compute_observations():', lambda: env._compute_observations()), ('obs.to_gym(), ActionTarget:', lambda: {a: o.to_gym() for a,o in obs.items()}), - ('env._compute_rewards():', lambda: env._compute_rewards()) + ('env._compute_rewards():', lambda: env._compute_rewards(obs.keys(), {})) ] if condition: From 23d14d517bc6a47e2dcd8d7a010e928a214a9038 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 14 Jun 2023 23:54:54 +0000 Subject: [PATCH 020/113] added ActionTargets to dummy obs --- nmmo/core/env.py | 133 ++++++++++++++++------------------ nmmo/core/observation.py | 78 ++++++++++++-------- nmmo/task/game_state.py | 8 +- tests/core/test_env.py | 45 +++++++++--- tests/task/test_predicates.py | 2 - tests/testhelpers.py | 2 +- 6 files changed, 153 insertions(+), 115 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 843704311..98247b809 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -2,6 +2,7 @@ import random from typing import Any, Dict, List, Callable from collections import defaultdict +from copy import copy from ordered_set import OrderedSet import gym @@ -33,10 +34,12 @@ def __init__(self, self.config = config self.realm = realm.Realm(config) self.obs = None + self._dummy_obs = None self.possible_agents = list(range(1, config.PLAYER_N + 1)) self._dead_agents = set() self._episode_stats = defaultdict(lambda: defaultdict(float)) + self._dead_this_tick = None self.scripted_agents = OrderedSet() self._gamestate_generator = GameStateGenerator(self.realm, self.config) @@ -149,12 +152,14 @@ def reset(self, map_id=None, seed=None, options=None, self.realm.reset(map_id) self._dead_agents = set() self._episode_stats.clear() + self._dead_this_tick = {} # check if there are scripted agents for eid, ent in self.realm.players.items(): if isinstance(ent.agent, Scripted): self.scripted_agents.add(eid) + self._dummy_obs = self._make_dummy_obs() self.obs = self._compute_observations() self._gamestate_generator = GameStateGenerator(self.realm, self.config) @@ -278,31 +283,32 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): # we don't need _deserialize_scripted_actions() anymore actions = self._validate_actions(actions) # Execute actions - self.realm.step(actions) + self._dead_this_tick = self.realm.step(actions) dones = {} - for eid in self.possible_agents: - if eid not in self.realm.players or self.realm.tick >= self.config.HORIZON: - if eid not in self._dead_agents: - self._dead_agents.add(eid) - self._episode_stats[eid]["death_tick"] = self.realm.tick - dones[eid] = True + for agent_id in self.agents: + if agent_id in self._dead_this_tick or self.realm.tick >= self.config.HORIZON: + self._dead_agents.add(agent_id) + self._episode_stats[agent_id]["death_tick"] = self.realm.tick + dones[agent_id] = True + else: + dones[agent_id] = False # Store the observations, since actions reference them self.obs = self._compute_observations() gym_obs = {a: o.to_gym() for a,o in self.obs.items()} - rewards, infos = self._compute_rewards(self.obs.keys(), dones) + rewards, infos = self._compute_rewards() for k,r in rewards.items(): self._episode_stats[k]['reward'] += r - # When the episode ends, add the episode stats to the info of one of - # the last dagents + # When the episode ends, add the episode stats to the info of the last agents if len(self._dead_agents) == len(self.possible_agents): for agent_id, stats in self._episode_stats.items(): if agent_id not in infos: infos[agent_id] = {} infos[agent_id]["episode_stats"] = stats + # NOTE: all obs, rewards, dones, infos have data for each agent in self.agents return gym_obs, rewards, dones, infos def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): @@ -356,63 +362,51 @@ def _compute_scripted_agent_actions(self, actions: Dict[int, Dict[str, Dict[str, return actions - def _compute_observations(self): - '''Neural MMO Observation API - - Args: - agents: List of agents to return observations for. If None, returns - observations for all agents - - Returns: - obs: Dictionary of observations for each agent - obs[agent_id] = { - "Entity": [e1, e2, ...], - "Task": [encoded_task], - "Tile": [t1, t2, ...], - "Inventory": [i1, i2, ...], - "Market": [m1, m2, ...], - "ActionTargets": { - "Attack": [a1, a2, ...], - "Sell": [s1, s2, ...], - "Buy": [b1, b2, ...], - "Move": [m1, m2, ...], - } - ''' + def _make_dummy_obs(self): + dummy_tiles = np.zeros((0, len(Tile.State.attr_name_to_col))) + dummy_entities = np.zeros((0, len(Entity.State.attr_name_to_col))) + dummy_inventory = np.zeros((0, len(Item.State.attr_name_to_col))) + dummy_market = np.zeros((0, len(Item.State.attr_name_to_col))) + return Observation(self.config, self.realm.tick, 0, + dummy_tiles, dummy_entities, dummy_inventory, dummy_market) + def _compute_observations(self): obs = {} - market = Item.Query.for_sale(self.realm.datastore) for agent_id in self.agents: - agent = self.realm.players.get(agent_id) - agent_r = agent.row.val - agent_c = agent.col.val - - visible_entities = Entity.Query.window( - self.realm.datastore, - agent_r, agent_c, - self.config.PLAYER_VISION_RADIUS - ) - visible_tiles = Tile.Query.window( - self.realm.datastore, - agent_r, agent_c, - self.config.PLAYER_VISION_RADIUS) - - inventory = Item.Query.owned_by(self.realm.datastore, agent_id) - - # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are - # available in each task instance, via task.embedding - # CHECK ME: do we pass in self.agent_task_map[agent_id], - # so that we can include task embedding in the obs? - obs[agent_id] = Observation(self.config, - self.realm.tick, - agent_id, - visible_tiles, - visible_entities, - inventory, market) + if agent_id not in self.realm.players: + # return dummy obs for the agents in dead_this_tick + dummy_obs = copy(self._dummy_obs) + dummy_obs.current_tick = self.realm.tick + dummy_obs.agent_id = agent_id + obs[agent_id] = dummy_obs + else: + agent = self.realm.players.get(agent_id) + agent_r = agent.row.val + agent_c = agent.col.val + + visible_entities = Entity.Query.window( + self.realm.datastore, + agent_r, agent_c, + self.config.PLAYER_VISION_RADIUS + ) + visible_tiles = Tile.Query.window( + self.realm.datastore, + agent_r, agent_c, + self.config.PLAYER_VISION_RADIUS) + + inventory = Item.Query.owned_by(self.realm.datastore, agent_id) + + # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are + # available in each task instance, via task.embedding + # CHECK ME: do we pass in self.agent_task_map[agent_id], + # so that we can include task embedding in the obs? + obs[agent_id] = Observation(self.config, self.realm.tick, agent_id, + visible_tiles, visible_entities, inventory, market) return obs - def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): + def _compute_rewards(self): '''Computes the reward for the specified agent Override this method to create custom reward functions. You have full @@ -428,24 +422,23 @@ def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]): entity identified by ent_id. ''' # Initialization + agents = set(self.agents) infos = {agent_id: {'task': {}} for agent_id in agents} rewards = defaultdict(int) - agents = set(agents) - reward_cache = {} # Compute Rewards and infos self.game_state = self._gamestate_generator.generate(self.realm, self.obs) for task in self.tasks: - if task in reward_cache: - task_rewards, task_infos = reward_cache[task] - else: - task_rewards, task_infos = task.compute_rewards(self.game_state) - reward_cache[task] = (task_rewards, task_infos) + task_rewards, task_infos = task.compute_rewards(self.game_state) for agent_id, reward in task_rewards.items(): - if agent_id in agents and agent_id not in dones: + if agent_id in agents: rewards[agent_id] = rewards.get(agent_id,0) + reward infos[agent_id]['task'][task.name] = task_infos[agent_id] # progress + # Make sure the dead agents return the rewards of -1 + for agent_id in self._dead_this_tick: + rewards[agent_id] = -1 + return rewards, infos ############################################################################ @@ -458,7 +451,9 @@ def render(self, mode='human'): @property def agents(self) -> List[AgentID]: '''For conformity with the PettingZoo API only; rendering is external''' - return list(set(self.realm.players.keys()) - self._dead_agents) + # "current" agents, which return obs: both alive and dead_this_tick + agents = set(list(self.realm.players.keys()) + list(self._dead_this_tick.keys())) + return list(agents) def close(self): '''For conformity with the PettingZoo API only; rendering is external''' diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index ad05a0b2f..32bea3180 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -55,7 +55,8 @@ def __init__(self, self.entities = BasicObs(entities[0:config.PLAYER_N_OBS], EntityState.State.attr_name_to_col["id"]) - if config.COMBAT_SYSTEM_ENABLED: + self.dummy_obs = self.agent() is None + if config.COMBAT_SYSTEM_ENABLED and not self.dummy_obs: latest_combat_tick = self.agent().latest_combat_tick self.agent_in_combat = False if latest_combat_tick == 0 else \ (current_tick - latest_combat_tick) < config.COMBAT_STATUS_DURATION @@ -112,31 +113,37 @@ def entity(self, entity_id): def agent(self): return self.entity(self.agent_id) - def to_gym(self): - '''Convert the observation to a format that can be used by OpenAI Gym''' - - tiles = np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])) - tiles[:self.tiles.shape[0],:] = self.tiles - - entities = np.zeros((self.config.PLAYER_N_OBS, self.entities.values.shape[1])) - entities[:self.entities.values.shape[0],:] = self.entities.values - + def get_empty_obs(self): gym_obs = { "CurrentTick": np.array([self.current_tick]), "AgentId": np.array([self.agent_id]), - "Tile": tiles, - "Entity": entities, - } + "Tile": np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])), + "Entity": np.zeros((self.config.PLAYER_N_OBS, self.entities.values.shape[1]))} + if self.config.ITEM_SYSTEM_ENABLED: + gym_obs["Inventory"] = np.zeros((self.config.INVENTORY_N_OBS, + self.inventory.values.shape[1])) + if self.config.EXCHANGE_SYSTEM_ENABLED: + gym_obs["Market"] = np.zeros((self.config.MARKET_N_OBS, + self.market.values.shape[1])) + return gym_obs + + def to_gym(self): + '''Convert the observation to a format that can be used by OpenAI Gym''' + gym_obs = self.get_empty_obs() + if self.dummy_obs: + # return empty obs for the dead agents + if self.config.PROVIDE_ACTION_TARGETS: + gym_obs["ActionTargets"] = self._make_action_targets() + return gym_obs + + gym_obs['Tile'][:self.tiles.shape[0],:] = self.tiles + gym_obs['Entity'][:self.entities.values.shape[0],:] = self.entities.values if self.config.ITEM_SYSTEM_ENABLED: - inventory = np.zeros((self.config.INVENTORY_N_OBS, self.inventory.values.shape[1])) - inventory[:self.inventory.values.shape[0],:] = self.inventory.values - gym_obs["Inventory"] = inventory + gym_obs["Inventory"][:self.inventory.values.shape[0],:] = self.inventory.values if self.config.EXCHANGE_SYSTEM_ENABLED: - market = np.zeros((self.config.MARKET_N_OBS, self.market.values.shape[1])) - market[:self.market.values.shape[0],:] = self.market.values - gym_obs["Market"] = market + gym_obs["Market"][:self.market.values.shape[0],:] = self.market.values if self.config.PROVIDE_ACTION_TARGETS: gym_obs["ActionTargets"] = self._make_action_targets() @@ -149,9 +156,10 @@ def _make_action_targets(self): action.Direction: self._make_move_mask() } + multiplier = 0 if self.dummy_obs else 1 if self.config.COMBAT_SYSTEM_ENABLED: masks[action.Attack] = { - action.Style: np.ones(len(action.Style.edges), dtype=np.int8), + action.Style: np.ones(len(action.Style.edges), dtype=np.int8) * multiplier, action.Target: self._make_attack_mask() } @@ -170,7 +178,7 @@ def _make_action_targets(self): if self.config.EXCHANGE_SYSTEM_ENABLED: masks[action.Sell] = { action.InventoryItem: self._make_sell_mask(), - action.Price: np.ones(len(action.Price.edges), dtype=np.int8) + action.Price: np.ones(len(action.Price.edges), dtype=np.int8) * multiplier } masks[action.Buy] = { action.MarketItem: self._make_buy_mask() @@ -182,12 +190,14 @@ def _make_action_targets(self): if self.config.COMMUNICATION_SYSTEM_ENABLED: masks[action.Comm] = { - action.Token: np.ones(len(action.Token.edges), dtype=np.int8) + action.Token: np.ones(len(action.Token.edges), dtype=np.int8) * multiplier } return masks def _make_move_mask(self): + if self.dummy_obs: + return np.zeros(len(action.Direction.edges), dtype=np.int8) # pylint: disable=not-an-iterable return np.array([self.tile(*d.delta).material_id in material.Habitable.indices for d in action.Direction.edges], dtype=np.int8) @@ -199,12 +209,14 @@ def _make_attack_mask(self): assert self.config.COMBAT_MELEE_REACH == self.config.COMBAT_RANGE_REACH assert self.config.COMBAT_MELEE_REACH == self.config.COMBAT_MAGE_REACH assert self.config.COMBAT_RANGE_REACH == self.config.COMBAT_MAGE_REACH - - attack_range = self.config.COMBAT_MELEE_REACH + attack_mask = np.zeros(self.config.PLAYER_N_OBS, dtype=np.int8) + if self.dummy_obs: + return attack_mask agent = self.agent() entities_pos = self.entities.values[:,[EntityState.State.attr_name_to_col["row"], EntityState.State.attr_name_to_col["col"]]] + attack_range = self.config.COMBAT_MELEE_REACH within_range = utils.linf(entities_pos,(agent.row, agent.col)) <= attack_range immunity = self.config.COMBAT_SPAWN_IMMUNITY @@ -218,14 +230,14 @@ def _make_attack_mask(self): # allow friendly fire but no self shooting not_me = self.entities.ids != agent.id - attack_mask = np.zeros(self.config.PLAYER_N_OBS, dtype=np.int8) attack_mask[:self.entities.len] = within_range & not_me & spawn_immunity return attack_mask def _make_use_mask(self): # empty inventory -- nothing to use use_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) - if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0) or self.agent_in_combat: + if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ + or self.dummy_obs or self.agent_in_combat: return use_mask item_skill = self._item_skill() @@ -273,7 +285,8 @@ def _item_skill(self): def _make_destroy_item_mask(self): destroy_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) # empty inventory -- nothing to destroy - if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0) or self.agent_in_combat: + if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ + or self.dummy_obs or self.agent_in_combat: return destroy_mask # not equipped items in the inventory can be destroyed @@ -285,7 +298,8 @@ def _make_destroy_item_mask(self): def _make_give_target_mask(self): give_mask = np.zeros(self.config.PLAYER_N_OBS, dtype=np.int8) # empty inventory -- nothing to give - if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0) or self.agent_in_combat: + if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ + or self.dummy_obs or self.agent_in_combat: return give_mask agent = self.agent() @@ -299,9 +313,11 @@ def _make_give_target_mask(self): return give_mask def _make_give_gold_mask(self): - gold = int(self.agent().gold) mask = np.zeros(self.config.PRICE_N_OBS, dtype=np.int8) + if self.dummy_obs: + return mask + gold = int(self.agent().gold) if gold and not self.agent_in_combat: mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1 @@ -311,7 +327,7 @@ def _make_sell_mask(self): sell_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) # empty inventory -- nothing to sell if not (self.config.EXCHANGE_SYSTEM_ENABLED and self.inventory.len > 0) \ - or self.agent_in_combat: + or self.dummy_obs or self.agent_in_combat: return sell_mask not_equipped = self.inventory.values[:,ItemState.State.attr_name_to_col["equipped"]] == 0 @@ -322,7 +338,7 @@ def _make_sell_mask(self): def _make_buy_mask(self): buy_mask = np.zeros(self.config.MARKET_N_OBS, dtype=np.int8) - if not self.config.EXCHANGE_SYSTEM_ENABLED or self.agent_in_combat: + if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat: return buy_mask agent = self.agent() diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index fb57ba4cf..305c31792 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Dict, List, Tuple, MutableMapping +from typing import Dict, List, Tuple, MutableMapping, Set from dataclasses import dataclass from copy import deepcopy from abc import ABC, abstractmethod @@ -31,7 +31,7 @@ class GameState: config: Config spawn_pos: Dict[int, Tuple[int, int]] # ent_id: (row, col) of all spawned agents - alive_agents: List[int] # of alive agents' ent_id (for convenience) + alive_agents: Set[int] # of alive agents' ent_id (for convenience) env_obs: Dict[int, Observation] # env passes the obs of only alive agents entity_data: np.ndarray # a copied, whole Entity ds table @@ -192,12 +192,14 @@ def __init__(self, realm: Realm, config: Config): def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: # copy the datastore, by running astype entity_all = EntityState.Query.table(realm.datastore).astype(np.int16) + alive_agents = entity_all[:, EntityAttr["id"]] + alive_agents = set(alive_agents[alive_agents > 0]) return GameState( current_tick = realm.tick, config = self.config, spawn_pos = self.spawn_pos, - alive_agents = list(entity_all[:, EntityAttr["id"]]), + alive_agents = alive_agents, env_obs = env_obs, entity_data = entity_all, item_data = ItemState.Query.table(realm.datastore).astype(np.int16), diff --git a/tests/core/test_env.py b/tests/core/test_env.py index 1ddeb6776..710a9d7b0 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -2,6 +2,7 @@ from typing import List import random +import numpy as np from tqdm import tqdm import nmmo @@ -9,6 +10,7 @@ from nmmo.core.tile import TileState from nmmo.entity.entity import Entity, EntityState from nmmo.systems.item import ItemState +from nmmo.core import action from scripted import baselines # Allow private access for testing @@ -51,22 +53,47 @@ def test_observations(self): ] for player_id, player_obs in obs.items(): - self._validate_tiles(player_obs, self.env.realm) - self._validate_entitites( - player_id, player_obs, self.env.realm, entity_locations) - self._validate_inventory(player_id, player_obs, self.env.realm) - self._validate_market(player_obs, self.env.realm) - obs, _, dones, _ = self.env.step({}) - - # make sure dead agents return proper dones=True - self.assertEqual(len(self.env.agents), len(self.env.realm.players)) + if player_id in self.env.realm.players: # alive agents + self._validate_tiles(player_obs, self.env.realm) + self._validate_entitites( + player_id, player_obs, self.env.realm, entity_locations) + self._validate_inventory(player_id, player_obs, self.env.realm) + self._validate_market(player_obs, self.env.realm) + else: + # the obs of dead agents are dummy, all zeros + self.assertEqual(np.sum(player_obs['Tile']), 0) + self.assertEqual(np.sum(player_obs['Entity']), 0) + self.assertEqual(np.sum(player_obs['Inventory']), 0) + self.assertEqual(np.sum(player_obs['Market']), 0) + self.assertEqual(np.sum(player_obs['ActionTargets'][action.Move][action.Direction]), 0) + self.assertEqual(np.sum(player_obs['ActionTargets'][action.Attack][action.Style]), 0) + + obs, rewards, dones, infos = self.env.step({}) + + # make sure dead agents return proper dones=True, dummy obs, and -1 reward + self.assertEqual(len(self.env.agents), + len(self.env.realm.players) + len(self.env._dead_this_tick)) self.assertEqual(len(self.env.possible_agents), len(self.env.realm.players) + len(self.env._dead_agents)) + for agent_id in self.env.agents: + self.assertTrue(agent_id in obs) + self.assertTrue(agent_id in rewards) + self.assertTrue(agent_id in dones) + self.assertTrue(agent_id in infos) if len(self.env._dead_agents) > len(dead_agents): for dead_id in self.env._dead_agents - dead_agents: + self.assertEqual(rewards[dead_id], -1) self.assertTrue(dones[dead_id]) dead_agents.add(dead_id) + # check dead and alive + entity_all = EntityState.Query.table(self.env.realm.datastore).astype(np.int16) + alive_agents = entity_all[:, Entity.State.attr_name_to_col["id"]] + alive_agents = set(alive_agents[alive_agents > 0]) + for agent_id in alive_agents: + self.assertTrue(agent_id in self.env.realm.players) + self.assertTrue(agent_id not in self.env._dead_agents) + def _validate_tiles(self, obs, realm: Realm): for tile_obs in obs["Tile"]: tile_obs = TileState.parse_array(tile_obs) diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index f2f61f0e3..695716f79 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -126,8 +126,6 @@ def test_tickge_stay_alive_rip(self): # make sure that dead players not in the realm nor the datastore self.assertTrue(ent_id not in env.realm.players) self.assertTrue(ent_id not in entities) - # CHECK ME: dead agents are also not in infos - self.assertTrue(ent_id not in infos) # TickGE_5 is true. Agents 1-3 are dead, so # StayAlive(1,3) and StayAlive(3,4) are false, StayAlive(4) is true diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 678cedaf9..68ecff4a4 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -384,7 +384,7 @@ def profile_env_step(action_target=True, tasks=None, condition=None): ('env.realm.step():', lambda: env.realm.step({})), ('env._compute_observations():', lambda: env._compute_observations()), ('obs.to_gym(), ActionTarget:', lambda: {a: o.to_gym() for a,o in obs.items()}), - ('env._compute_rewards():', lambda: env._compute_rewards(obs.keys(), {})) + ('env._compute_rewards():', lambda: env._compute_rewards()) ] if condition: From dbf2bc4d5c653d9be655c982af3d856009033ab6 Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Wed, 14 Jun 2023 17:33:11 -0700 Subject: [PATCH 021/113] add config.reset_on_death --- nmmo/core/config.py | 23 ++++++++--------------- nmmo/core/env.py | 4 +++- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/nmmo/core/config.py b/nmmo/core/config.py index 07d40ef77..cb485c95b 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -151,21 +151,6 @@ def game_system_enabled(self, name) -> bool: PLAYERS = [Agent] '''Player classes from which to spawn''' - ############################################################################ - ### Emulation Parameters - - EMULATE_FLAT_OBS = False - '''Emulate a flat observation space''' - - EMULATE_FLAT_ATN = False - '''Emulate a flat action space''' - - EMULATE_CONST_PLAYER_N = False - '''Emulate a constant number of agents''' - - EMULATE_CONST_HORIZON = False - '''Emulate a constant HORIZON simulations steps''' - ############################################################################ ### Population Parameters @@ -213,12 +198,20 @@ def PLAYER_VISION_DIAMETER(self): PLAYER_DEATH_FOG = None '''How long before spawning death fog. None for no death fog''' + ############################################################################ + ### Map Parameters + HORIZON = 1024 + '''Number of steps before the environment resets''' + ############################################################################ ### Agent Parameters IMMORTAL = False '''Debug parameter: prevents agents from dying except by void''' + RESET_ON_DEATH = False + '''Whether to reset the environment whenever an agent dies''' + BASE_HEALTH = 10 '''Initial Constitution level and agent health''' diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 98247b809..9d47ef4a0 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -286,7 +286,9 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): self._dead_this_tick = self.realm.step(actions) dones = {} for agent_id in self.agents: - if agent_id in self._dead_this_tick or self.realm.tick >= self.config.HORIZON: + if agent_id in self._dead_this_tick or \ + self.realm.tick >= self.config.HORIZON or \ + (self.config.RESET_ON_DEATH and len(self._dead_agents) > 0): self._dead_agents.add(agent_id) self._episode_stats[agent_id]["death_tick"] = self.realm.tick dones[agent_id] = True From 615947ae323f10af8bcf09506216f2d4adc8c32c Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 15 Jun 2023 13:01:14 +0000 Subject: [PATCH 022/113] made env._compute_observations() faster --- nmmo/core/env.py | 13 ++++++----- nmmo/core/tile.py | 5 ++++- tests/core/test_observation_tile.py | 34 ++++++++++++++++++++++++++++- tests/task/test_predicates.py | 3 ++- 4 files changed, 47 insertions(+), 8 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 9d47ef4a0..2a4cdcca4 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -376,6 +376,11 @@ def _compute_observations(self): obs = {} market = Item.Query.for_sale(self.realm.datastore) + # get tile map, to bypass the expensive tile window query + tile_map = Tile.Query.get_map(self.realm.datastore, self.config.MAP_SIZE) + radius = self.config.PLAYER_VISION_RADIUS + tile_obs_size = ((2*radius+1)**2, len(Tile.State.attr_name_to_col)) + for agent_id in self.agents: if agent_id not in self.realm.players: # return dummy obs for the agents in dead_this_tick @@ -391,12 +396,10 @@ def _compute_observations(self): visible_entities = Entity.Query.window( self.realm.datastore, agent_r, agent_c, - self.config.PLAYER_VISION_RADIUS + radius ) - visible_tiles = Tile.Query.window( - self.realm.datastore, - agent_r, agent_c, - self.config.PLAYER_VISION_RADIUS) + visible_tiles = tile_map[agent_r-radius:agent_r+radius+1, + agent_c-radius:agent_c+radius+1,:].reshape(tile_obs_size) inventory = Item.Query.owned_by(self.realm.datastore, agent_id) diff --git a/nmmo/core/tile.py b/nmmo/core/tile.py index b991c4dcd..c6fcebb69 100644 --- a/nmmo/core/tile.py +++ b/nmmo/core/tile.py @@ -4,7 +4,7 @@ from nmmo.datastore.serialized import SerializedState from nmmo.lib import material -# pylint: disable=no-member +# pylint: disable=no-member,protected-access TileState = SerializedState.subclass( "Tile", [ "row", @@ -23,6 +23,9 @@ TileState.State.attr_name_to_col["row"], TileState.State.attr_name_to_col["col"], r, c, radius), + get_map=lambda ds, map_size: + ds.table("Tile")._data[1:(map_size*map_size+1)] + .reshape((map_size,map_size,len(TileState.State.attr_name_to_col))) ) class Tile(TileState): diff --git a/tests/core/test_observation_tile.py b/tests/core/test_observation_tile.py index 84a231169..44c4a6894 100644 --- a/tests/core/test_observation_tile.py +++ b/tests/core/test_observation_tile.py @@ -22,7 +22,7 @@ def setUpClass(cls): def test_tile_attr(self): self.assertDictEqual(TileAttr, {'row': 0, 'col': 1, 'material_id': 2}) - def test_tile_correctness(self): + def test_obs_tile_correctness(self): obs = self.env._compute_observations() center = self.config.PLAYER_VISION_RADIUS tile_dim = self.config.PLAYER_VISION_DIAMETER @@ -57,6 +57,38 @@ def correct_tile(agent_obs: Observation, r_delta, c_delta): print('implemented:', timeit(lambda: agent_obs.tile(*d.delta), number=1000, globals=globals())) + def test_env_visible_tiles_correctness(self): + def correct_visible_tile(realm, agent_id): + # Based on numpy datatable window query + assert agent_id in realm.players, "agent_id not in the realm" + agent = realm.players[agent_id] + radius = realm.config.PLAYER_VISION_RADIUS + return TileState.Query.window( + realm.datastore, agent.row.val, agent.col.val, radius) + + # implemented in the env._compute_observations() + def visible_tiles_by_index(realm, agent_id, tile_map): + assert agent_id in realm.players, "agent_id not in the realm" + agent = realm.players[agent_id] + radius = realm.config.PLAYER_VISION_RADIUS + return tile_map[agent.row.val-radius:agent.row.val+radius+1, + agent.col.val-radius:agent.col.val+radius+1,:].reshape(225,3) + + # get tile map, to bypass the expensive tile window query + tile_map = TileState.Query.get_map(self.env.realm.datastore, self.config.MAP_SIZE) + + obs = self.env._compute_observations() + for agent_id in self.env.realm.players: + self.assertTrue(np.array_equal(correct_visible_tile(self.env.realm, agent_id), + obs[agent_id].tiles)) + + print('---test_visible_tile_window---') + print('reference:', timeit(lambda: correct_visible_tile(self.env.realm, agent_id), + number=1000, globals=globals())) + print('implemented:', + timeit(lambda: visible_tiles_by_index(self.env.realm, agent_id, tile_map), + number=1000, globals=globals())) + if __name__ == '__main__': unittest.main() diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index 695716f79..749bf626a 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -251,8 +251,9 @@ def test_occupy_tile(self): env = self._get_taskenv(test_preds, grass_map=True) # All agents to one corner + BORDER = env.config.MAP_BORDER for ent_id in env.realm.players: - change_agent_pos(env.realm,ent_id,(0,0)) + change_agent_pos(env.realm,ent_id,(BORDER,BORDER)) env.obs = env._compute_observations() _, _, _, infos = env.step({}) From a4f61fe4be2b368ffd120626c35fac5387587807 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 16 Jun 2023 02:17:02 +0000 Subject: [PATCH 023/113] made obs, make_attack_mask faster --- nmmo/core/observation.py | 32 +++++++++++++-------- tests/core/test_observation_tile.py | 43 +++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 12 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 32bea3180..ee2c88b38 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -117,14 +117,15 @@ def get_empty_obs(self): gym_obs = { "CurrentTick": np.array([self.current_tick]), "AgentId": np.array([self.agent_id]), - "Tile": np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])), - "Entity": np.zeros((self.config.PLAYER_N_OBS, self.entities.values.shape[1]))} + "Tile": None, # np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])), + "Entity": np.zeros((self.config.PLAYER_N_OBS, + self.entities.values.shape[1]), dtype=np.int16)} if self.config.ITEM_SYSTEM_ENABLED: gym_obs["Inventory"] = np.zeros((self.config.INVENTORY_N_OBS, self.inventory.values.shape[1])) if self.config.EXCHANGE_SYSTEM_ENABLED: gym_obs["Market"] = np.zeros((self.config.MARKET_N_OBS, - self.market.values.shape[1])) + self.market.values.shape[1]), dtype=np.int16) return gym_obs def to_gym(self): @@ -132,11 +133,13 @@ def to_gym(self): gym_obs = self.get_empty_obs() if self.dummy_obs: # return empty obs for the dead agents + gym_obs['Tile'] = np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1]), dtype=np.int16) if self.config.PROVIDE_ACTION_TARGETS: gym_obs["ActionTargets"] = self._make_action_targets() return gym_obs - gym_obs['Tile'][:self.tiles.shape[0],:] = self.tiles + # NOTE: assume that all len(self.tiles) == self.config.MAP_N_OBS + gym_obs['Tile'] = self.tiles gym_obs['Entity'][:self.entities.values.shape[0],:] = self.entities.values if self.config.ITEM_SYSTEM_ENABLED: @@ -156,10 +159,12 @@ def _make_action_targets(self): action.Direction: self._make_move_mask() } - multiplier = 0 if self.dummy_obs else 1 if self.config.COMBAT_SYSTEM_ENABLED: + # Test below. see tests/core/test_observation_tile.py, test_action_target_consts() + # assert len(action.Style.edges) == 3 masks[action.Attack] = { - action.Style: np.ones(len(action.Style.edges), dtype=np.int8) * multiplier, + action.Style: np.zeros(3, dtype=np.int8) if self.dummy_obs\ + else np.ones(3, dtype=np.int8), action.Target: self._make_attack_mask() } @@ -178,7 +183,8 @@ def _make_action_targets(self): if self.config.EXCHANGE_SYSTEM_ENABLED: masks[action.Sell] = { action.InventoryItem: self._make_sell_mask(), - action.Price: np.ones(len(action.Price.edges), dtype=np.int8) * multiplier + action.Price: np.zeros(self.config.PRICE_N_OBS, dtype=np.int8) if self.dummy_obs\ + else np.ones(self.config.PRICE_N_OBS, dtype=np.int8) } masks[action.Buy] = { action.MarketItem: self._make_buy_mask() @@ -190,7 +196,9 @@ def _make_action_targets(self): if self.config.COMMUNICATION_SYSTEM_ENABLED: masks[action.Comm] = { - action.Token: np.ones(len(action.Token.edges), dtype=np.int8) * multiplier + action.Token:\ + np.zeros(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8) if self.dummy_obs\ + else np.ones(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8) } return masks @@ -214,10 +222,10 @@ def _make_attack_mask(self): return attack_mask agent = self.agent() - entities_pos = self.entities.values[:,[EntityState.State.attr_name_to_col["row"], - EntityState.State.attr_name_to_col["col"]]] - attack_range = self.config.COMBAT_MELEE_REACH - within_range = utils.linf(entities_pos,(agent.row, agent.col)) <= attack_range + within_range = np.maximum( # calculating the l-inf dist + np.abs(self.entities.values[:,EntityState.State.attr_name_to_col["row"]] - agent.row), + np.abs(self.entities.values[:,EntityState.State.attr_name_to_col["col"]] - agent.col) + ) <= self.config.COMBAT_MELEE_REACH immunity = self.config.COMBAT_SPAWN_IMMUNITY if 0 < immunity < agent.time_alive: diff --git a/tests/core/test_observation_tile.py b/tests/core/test_observation_tile.py index 44c4a6894..2d58a394c 100644 --- a/tests/core/test_observation_tile.py +++ b/tests/core/test_observation_tile.py @@ -5,8 +5,10 @@ import nmmo from nmmo.core.tile import TileState +from nmmo.entity.entity import EntityState from nmmo.core.observation import Observation from nmmo.core import action as Action +from nmmo.lib import utils TileAttr = TileState.State.attr_name_to_col @@ -22,6 +24,11 @@ def setUpClass(cls): def test_tile_attr(self): self.assertDictEqual(TileAttr, {'row': 0, 'col': 1, 'material_id': 2}) + def test_action_target_consts(self): + self.assertEqual(len(Action.Style.edges), 3) + self.assertEqual(len(Action.Price.edges), self.config.PRICE_N_OBS) + self.assertEqual(len(Action.Token.edges), self.config.COMMUNICATION_NUM_TOKENS) + def test_obs_tile_correctness(self): obs = self.env._compute_observations() center = self.config.PLAYER_VISION_RADIUS @@ -37,6 +44,9 @@ def correct_tile(agent_obs: Observation, r_delta, c_delta): return TileState.parse_array(agent_obs.tiles[r_cond & c_cond][0]) for agent_obs in obs.values(): + # check if the tile obs size + self.assertEqual(len(agent_obs.tiles), self.config.MAP_N_OBS) + # check if the coord conversion is correct row_map = agent_obs.tiles[:,TileAttr['row']].reshape(tile_dim,tile_dim) col_map = agent_obs.tiles[:,TileAttr['col']].reshape(tile_dim,tile_dim) @@ -89,6 +99,39 @@ def visible_tiles_by_index(realm, agent_id, tile_map): timeit(lambda: visible_tiles_by_index(self.env.realm, agent_id, tile_map), number=1000, globals=globals())) + def test_make_attack_mask_within_range(self): + # pylint: disable=invalid-name + EntityAttr = EntityState.State.attr_name_to_col + def correct_within_range(entities, attack_range, agent_row, agent_col): + entities_pos = entities[:,[EntityAttr["row"],EntityAttr["col"]]] + within_range = utils.linf(entities_pos,(agent_row, agent_col)) <= attack_range + return within_range + + # implemented in the Observation._make_attack_mask() + def simple_within_range(entities, attack_range, agent_row, agent_col): + return np.maximum( + np.abs(entities[:,EntityAttr["row"]] - agent_row), + np.abs(entities[:,EntityAttr["col"]] - agent_col) + ) <= attack_range + + obs = self.env._compute_observations() + attack_range = self.config.COMBAT_MELEE_REACH + + for agent_obs in obs.values(): + entities = agent_obs.entities.values + agent = agent_obs.agent() + self.assertTrue(np.array_equal( + correct_within_range(entities, attack_range, agent.row, agent.col), + simple_within_range(entities, attack_range, agent.row, agent.col))) + + print('---test_attack_within_range---') + print('reference:', timeit( + lambda: correct_within_range(entities, attack_range, agent.row, agent.col), + number=1000, globals=globals())) + print('implemented:', timeit( + lambda: simple_within_range(entities, attack_range, agent.row, agent.col), + number=1000, globals=globals())) + if __name__ == '__main__': unittest.main() From b9bb4b94e821595700aa9aca5b586ac30f6ae7dd Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 16 Jun 2023 02:56:56 +0000 Subject: [PATCH 024/113] removed unnecessary len --- nmmo/lib/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmmo/lib/utils.py b/nmmo/lib/utils.py index e4ebd33cd..a71497a6d 100644 --- a/nmmo/lib/utils.py +++ b/nmmo/lib/utils.py @@ -72,7 +72,7 @@ def seed(): def linf(pos1, pos2): # pos could be a single (r,c) or a vector of (r,c)s diff = np.abs(np.array(pos1) - np.array(pos2)) - return np.max(diff, axis=len(diff.shape)-1) + return np.max(diff, axis=-1) #Bounds checker def in_bounds(r, c, shape, border=0): From 29de5b270d83b66546fb5aa5ed7c5de40360cc58 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 16 Jun 2023 08:40:30 +0000 Subject: [PATCH 025/113] lazy load gs data, use pre-computed index --- nmmo/task/game_state.py | 68 ++++++++++++++++++++++------- tests/core/test_observation_tile.py | 48 +++++++++++++++++++- 2 files changed, 98 insertions(+), 18 deletions(-) diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 305c31792..3940ab8b2 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -1,8 +1,9 @@ from __future__ import annotations -from typing import Dict, List, Tuple, MutableMapping, Set +from typing import Dict, Iterable, Tuple, MutableMapping, Set from dataclasses import dataclass from copy import deepcopy from abc import ABC, abstractmethod +from collections import defaultdict import numpy as np @@ -35,8 +36,11 @@ class GameState: env_obs: Dict[int, Observation] # env passes the obs of only alive agents entity_data: np.ndarray # a copied, whole Entity ds table + entity_index: Dict[int, Iterable] # index for where_in_1d item_data: np.ndarray # a copied, whole Item ds table + item_index: Dict[int, Iterable] event_data: np.ndarray # a copied, whole Event log table + event_index: Dict[int, Iterable] cache_result: MutableMapping # cache for general memoization # add helper functions below @@ -47,15 +51,15 @@ def entity_or_none(self, ent_id): return None - def where_in_id(self, data_type, subject: List[int]): + def where_in_id(self, data_type, subject: Iterable[int]): if data_type == 'entity': - flt_idx = np.in1d(self.entity_data[:, EntityAttr['id']], subject) + flt_idx = [row for sbj in subject for row in self.entity_index.get(sbj,[])] return self.entity_data[flt_idx] if data_type == 'item': - flt_idx = np.in1d(self.item_data[:, ItemAttr['owner_id']], subject) + flt_idx = [row for sbj in subject for row in self.item_index.get(sbj,[])] return self.item_data[flt_idx] if data_type == 'event': - flt_idx = np.in1d(self.event_data[:, EventAttr['ent_id']], subject) + flt_idx = [row for sbj in subject for row in self.event_index.get(sbj,[])] return self.event_data[flt_idx] raise ValueError("data_type must be in entity, item, event") @@ -151,18 +155,31 @@ class GroupView: def __init__(self, gs: GameState, subject: Group): self._gs = gs self._subject = subject - self._sbj_ent = gs.where_in_id('entity', subject.agents) - self._sbj_item = gs.where_in_id('item', subject.agents) - self._sbj_event = gs.where_in_id('event', subject.agents) - - self.entity = EntityView(gs, subject, self._sbj_ent) - self.item = ItemView(gs, subject, self._sbj_item) - self.event = EventView(gs, subject, self._sbj_event) self.obs = GroupObsView(gs, subject) + self._sbj_ent = None + self.entity = None + self._sbj_item = None + self.item = None + self._sbj_event = None + self.event = None + def __getattribute__(self, attr): + # lazy loading + if attr in ['_sjb_ent', 'entity']: + self._sbj_ent = self._gs.where_in_id('entity', self._subject.agents) + if attr == 'entity': + self.entity = EntityView(self._gs, self._subject, self._sbj_ent) + if attr in ['_sbj_item', 'item']: + self._sbj_item = self._gs.where_in_id('item', self._subject.agents) + if attr == 'item': + self.item = ItemView(self._gs, self._subject, self._sbj_item) + if attr in ['_sbj_event', 'event']: + self._sbj_event = self._gs.where_in_id('event', self._subject.agents) + if attr == 'event': + self.event = EventView(self._gs, self._subject, self._sbj_event) if attr in ['_gs','_subject','_sbj_ent','_sbj_item','entity','item','event','obs']: - return object.__getattribute__(self,attr) + return object.__getattribute__(self, attr) # Cached optimization k = (self._subject, attr) @@ -172,6 +189,10 @@ def __getattribute__(self, attr): try: # Get property if attr in EntityAttr.keys(): + if self._sbj_ent is None: + self._sbj_ent = self._gs.where_in_id('entity', self._subject.agents) + if self.entity is None: + self.entity = EntityView(self._gs, self._subject, self._sbj_ent) v = getattr(self.entity, attr) else: v = object.__getattribute__(self, attr) @@ -179,7 +200,7 @@ def __getattribute__(self, attr): return v except AttributeError: # View behavior - return object.__getattribute__(self._gs,attr) + return object.__getattribute__(self._gs, attr) class GameStateGenerator: def __init__(self, realm: Realm, config: Config): @@ -194,6 +215,8 @@ def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: entity_all = EntityState.Query.table(realm.datastore).astype(np.int16) alive_agents = entity_all[:, EntityAttr["id"]] alive_agents = set(alive_agents[alive_agents > 0]) + item_data = ItemState.Query.table(realm.datastore).astype(np.int16) + event_data = EventState.Query.table(realm.datastore).astype(np.int16) return GameState( current_tick = realm.tick, @@ -202,7 +225,20 @@ def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: alive_agents = alive_agents, env_obs = env_obs, entity_data = entity_all, - item_data = ItemState.Query.table(realm.datastore).astype(np.int16), - event_data = EventState.Query.table(realm.datastore).astype(np.int16), + entity_index = self._precompute_index(entity_all, EntityAttr["id"]), + item_data = item_data, + item_index = self._precompute_index(item_data, ItemAttr['owner_id']), + event_data = event_data, + event_index = self._precompute_index(event_data, EventAttr['ent_id']), cache_result = {} ) + + @staticmethod + def _precompute_index(table, id_col): + index = defaultdict() + for row, id_ in enumerate(table[:,id_col]): + if id_ in index: + index[id_].append(row) + else: + index[id_] = [row] + return index diff --git a/tests/core/test_observation_tile.py b/tests/core/test_observation_tile.py index 2d58a394c..9585e73ff 100644 --- a/tests/core/test_observation_tile.py +++ b/tests/core/test_observation_tile.py @@ -1,16 +1,24 @@ # pylint: disable=protected-access,bad-builtin import unittest from timeit import timeit +from collections import defaultdict import numpy as np import nmmo from nmmo.core.tile import TileState from nmmo.entity.entity import EntityState +from nmmo.systems.item import ItemState +from nmmo.lib.event_log import EventState from nmmo.core.observation import Observation from nmmo.core import action as Action from nmmo.lib import utils +from tests.testhelpers import ScriptedAgentTestConfig TileAttr = TileState.State.attr_name_to_col +EntityAttr = EntityState.State.attr_name_to_col +ItemAttr = ItemState.State.attr_name_to_col +EventAttr = EventState.State.attr_name_to_col + class TestObservationTile(unittest.TestCase): @classmethod @@ -100,8 +108,6 @@ def visible_tiles_by_index(realm, agent_id, tile_map): number=1000, globals=globals())) def test_make_attack_mask_within_range(self): - # pylint: disable=invalid-name - EntityAttr = EntityState.State.attr_name_to_col def correct_within_range(entities, attack_range, agent_row, agent_col): entities_pos = entities[:,[EntityAttr["row"],EntityAttr["col"]]] within_range = utils.linf(entities_pos,(agent_row, agent_col)) <= attack_range @@ -132,6 +138,44 @@ def simple_within_range(entities, attack_range, agent_row, agent_col): lambda: simple_within_range(entities, attack_range, agent.row, agent.col), number=1000, globals=globals())) + def test_gs_where_in_1d(self): + config = ScriptedAgentTestConfig() + env = nmmo.Env(config) + env.reset(seed=0) + for _ in range(5): + env.step({}) + + def correct_where_in_1d(event_data, subject): + flt_idx = np.in1d(event_data[:, EventAttr['ent_id']], subject) + return event_data[flt_idx] + + def where_in_1d_with_index(event_data, subject, index): + flt_idx = [row for sbj in subject for row in index.get(sbj,[])] + return event_data[flt_idx] + + event_data = EventState.Query.table(env.realm.datastore).astype(np.int16) + event_index = defaultdict() + for row, id_ in enumerate(event_data[:,EventAttr['ent_id']]): + if id_ in event_index: + event_index[id_].append(row) + else: + event_index[id_] = [row] + + # NOTE: the index-based approach returns the data in different order, + # and all the operations in the task system don't use the order info + arr = where_in_1d_with_index(event_data, [1,2,3], event_index) + sorted_idx = np.argsort(arr[:,0]) # event_id + self.assertTrue(np.array_equal(correct_where_in_1d(event_data, [1,2,3]), + arr[sorted_idx])) + + print('---test_gs_where_in_1d---') + print('reference:', timeit( + lambda: correct_where_in_1d(event_data, [1, 2, 3]), + number=1000, globals=globals())) + print('implemented:', timeit( + lambda: where_in_1d_with_index(event_data, [1, 2, 3], event_index), + number=1000, globals=globals())) + if __name__ == '__main__': unittest.main() From 7b85371566136b21e36cd480c762010cedde27a9 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 16 Jun 2023 10:23:42 +0000 Subject: [PATCH 026/113] better lazy eval --- nmmo/task/game_state.py | 65 +++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 3940ab8b2..410c56946 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -4,6 +4,7 @@ from copy import deepcopy from abc import ABC, abstractmethod from collections import defaultdict +import functools import numpy as np @@ -36,31 +37,39 @@ class GameState: env_obs: Dict[int, Observation] # env passes the obs of only alive agents entity_data: np.ndarray # a copied, whole Entity ds table - entity_index: Dict[int, Iterable] # index for where_in_1d + entity_index: Dict[int, Iterable] # precomputed index for where_in_1d item_data: np.ndarray # a copied, whole Item ds table item_index: Dict[int, Iterable] event_data: np.ndarray # a copied, whole Event log table event_index: Dict[int, Iterable] cache_result: MutableMapping # cache for general memoization + # add helper functions below + @functools.lru_cache def entity_or_none(self, ent_id): flt_ent = self.entity_data[:, EntityAttr['id']] == ent_id if np.any(flt_ent): return EntityState.parse_array(self.entity_data[flt_ent][0]) - return None def where_in_id(self, data_type, subject: Iterable[int]): + k = (data_type, subject) + if k in self.cache_result: + return self.cache_result[k] + if data_type == 'entity': flt_idx = [row for sbj in subject for row in self.entity_index.get(sbj,[])] - return self.entity_data[flt_idx] + self.cache_result[k] = self.entity_data[flt_idx] if data_type == 'item': flt_idx = [row for sbj in subject for row in self.item_index.get(sbj,[])] - return self.item_data[flt_idx] + self.cache_result[k] = self.item_data[flt_idx] if data_type == 'event': flt_idx = [row for sbj in subject for row in self.event_index.get(sbj,[])] - return self.event_data[flt_idx] + self.cache_result[k] = self.event_data[flt_idx] + if data_type in ['entity', 'item', 'event']: + return self.cache_result[k] + raise ValueError("data_type must be in entity, item, event") def get_subject_view(self, subject: Group): @@ -157,27 +166,31 @@ def __init__(self, gs: GameState, subject: Group): self._subject = subject self.obs = GroupObsView(gs, subject) - self._sbj_ent = None - self.entity = None - self._sbj_item = None - self.item = None - self._sbj_event = None - self.event = None + @functools.cached_property + def _sbj_ent(self): + return self._gs.where_in_id('entity', self._subject.agents) + + @functools.cached_property + def entity(self): + return EntityView(self._gs, self._subject, self._sbj_ent) + + @functools.cached_property + def _sbj_item(self): + return self._gs.where_in_id('item', self._subject.agents) + + @functools.cached_property + def item(self): + return ItemView(self._gs, self._subject, self._sbj_item) + + @functools.cached_property + def _sbj_event(self): + return self._gs.where_in_id('event', self._subject.agents) + + @functools.cached_property + def event(self): + return EventView(self._gs, self._subject, self._sbj_event) def __getattribute__(self, attr): - # lazy loading - if attr in ['_sjb_ent', 'entity']: - self._sbj_ent = self._gs.where_in_id('entity', self._subject.agents) - if attr == 'entity': - self.entity = EntityView(self._gs, self._subject, self._sbj_ent) - if attr in ['_sbj_item', 'item']: - self._sbj_item = self._gs.where_in_id('item', self._subject.agents) - if attr == 'item': - self.item = ItemView(self._gs, self._subject, self._sbj_item) - if attr in ['_sbj_event', 'event']: - self._sbj_event = self._gs.where_in_id('event', self._subject.agents) - if attr == 'event': - self.event = EventView(self._gs, self._subject, self._sbj_event) if attr in ['_gs','_subject','_sbj_ent','_sbj_item','entity','item','event','obs']: return object.__getattribute__(self, attr) @@ -189,10 +202,6 @@ def __getattribute__(self, attr): try: # Get property if attr in EntityAttr.keys(): - if self._sbj_ent is None: - self._sbj_ent = self._gs.where_in_id('entity', self._subject.agents) - if self.entity is None: - self.entity = EntityView(self._gs, self._subject, self._sbj_ent) v = getattr(self.entity, attr) else: v = object.__getattribute__(self, attr) From 61a06d7b5d5c87b71b57d2cecfecd1425cfbf109 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 17 Jun 2023 15:40:29 +0900 Subject: [PATCH 027/113] implement env-level rng using gym seeding --- nmmo/core/env.py | 28 +++++++------- nmmo/core/realm.py | 14 ++++--- nmmo/core/tile.py | 5 ++- nmmo/entity/entity.py | 1 + nmmo/entity/entity_manager.py | 10 ++--- nmmo/entity/npc.py | 11 ++---- nmmo/entity/player.py | 4 +- nmmo/lib/seeding.py | 32 +++++++++++++++ nmmo/lib/spawn.py | 21 +++++----- nmmo/lib/utils.py | 3 -- nmmo/systems/ai/behavior.py | 14 ++++--- nmmo/systems/ai/move.py | 31 +++++++-------- nmmo/systems/combat.py | 6 +-- nmmo/systems/droptable.py | 4 +- scripted/baselines.py | 19 +++++---- scripted/move.py | 39 ++++++++++--------- tests/core/test_tile.py | 3 ++ tests/entity/test_entity.py | 3 ++ tests/test_determinism.py | 73 +++++++++++++---------------------- tests/test_team_spawn.py | 7 ++-- tests/testhelpers.py | 13 +++++++ 21 files changed, 187 insertions(+), 154 deletions(-) create mode 100644 nmmo/lib/seeding.py diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 2a4cdcca4..7840b13da 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -1,5 +1,4 @@ import functools -import random from typing import Any, Dict, List, Callable from collections import defaultdict from copy import copy @@ -18,6 +17,7 @@ from nmmo.systems.item import Item from nmmo.task import task_api from nmmo.task.game_state import GameStateGenerator +from nmmo.lib import seeding from scripted.baselines import Scripted class Env(ParallelEnv): @@ -27,12 +27,12 @@ class Env(ParallelEnv): def __init__(self, config: Default = nmmo.config.Default(), seed = None): - self._init_random(seed) - + self.np_random, self._np_seed = seeding.np_random(seed) super().__init__() self.config = config - self.realm = realm.Realm(config) + self.realm = realm.Realm(config, self.np_random) + self._reset_required = True self.obs = None self._dummy_obs = None @@ -87,11 +87,6 @@ def box(rows, cols): return gym.spaces.Dict(obs_space) - def _init_random(self, seed): - if seed is not None: - np.random.seed(seed) - random.seed(seed) - @functools.lru_cache(maxsize=None) def action_space(self, agent): '''Neural MMO Action Space @@ -147,9 +142,9 @@ def reset(self, map_id=None, seed=None, options=None, but finite horizon: ~1000 timesteps for small maps and 5000+ timesteps for large maps ''' - - self._init_random(seed) - self.realm.reset(map_id) + if seed is not None: + self.np_random, self._np_seed = seeding.np_random(seed) + self.realm.reset(self.np_random, map_id) self._dead_agents = set() self._episode_stats.clear() self._dead_this_tick = {} @@ -158,6 +153,7 @@ def reset(self, map_id=None, seed=None, options=None, for eid, ent in self.realm.players.items(): if isinstance(ent.agent, Scripted): self.scripted_agents.add(eid) + ent.agent.np_random = self.np_random self._dummy_obs = self._make_dummy_obs() self.obs = self._compute_observations() @@ -170,6 +166,8 @@ def reset(self, map_id=None, seed=None, options=None, task.reset() self.agent_task_map = self._map_task_to_agent() + self._reset_required = False + return {a: o.to_gym() for a,o in self.obs.items()} def _map_task_to_agent(self): @@ -274,7 +272,7 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): Provided for conformity with PettingZoo ''' - assert self.obs is not None, 'step() called before reset' + assert not self._reset_required, 'step() called before reset' # Add in scripted agents' actions, if any if self.scripted_agents: actions = self._compute_scripted_agent_actions(actions) @@ -464,7 +462,9 @@ def close(self): '''For conformity with the PettingZoo API only; rendering is external''' def seed(self, seed=None): - return self._init_random(seed) + '''Reseeds the environment. reset() must be called after seed(), and before step().''' + self.np_random, self._np_seed = seeding.np_random(seed) + self._reset_required = True def state(self) -> np.ndarray: raise NotImplementedError diff --git a/nmmo/core/realm.py b/nmmo/core/realm.py index 7cf0a2008..c9f15d3db 100644 --- a/nmmo/core/realm.py +++ b/nmmo/core/realm.py @@ -4,8 +4,6 @@ from collections import defaultdict from typing import Dict -import numpy as np - import nmmo from nmmo.core.log_helper import LogHelper from nmmo.core.map import Map @@ -30,8 +28,9 @@ def prioritized(entities: Dict, merged: Dict): class Realm: """Top-level world object""" - def __init__(self, config): + def __init__(self, config, np_random): self.config = config + self.np_random = np_random # rng assert isinstance( config, nmmo.config.Config ), f"Config {config} is not a config instance (did you pass the class?)" @@ -39,6 +38,8 @@ def __init__(self, config): Action.hook(config) # Generate maps if they do not exist + # CHECK ME: Does the map generator need use the env-level RNG? + # Do the maps need to be deterministic? config.MAP_GENERATOR(config).generate_all_maps() self.datastore = NumpyDatastore() @@ -67,16 +68,17 @@ def __init__(self, config): # Initialize actions nmmo.Action.init(config) - def reset(self, map_id: int = None): + def reset(self, np_random, map_id: int = None): """Reset the environment and load the specified map Args: idx: Map index to load """ + self.np_random = np_random self.log_helper.reset() self.event_log.reset() - map_id = map_id or np.random.randint(self.config.MAP_N) + 1 + map_id = map_id or self.np_random.integers(self.config.MAP_N) + 1 self.map.reset(map_id) self.tick = 0 @@ -170,7 +172,7 @@ def step(self, actions): # TODO: we should be randomizing these, otherwise the lower ID agents # will always go first. --> ONLY SHUFFLE BUY if priority == Buy.priority: - np.random.shuffle(merged[priority]) + self.np_random.shuffle(merged[priority]) # CHECK ME: do we need this line? # ent_id, (atn, args) = merged[priority][0] diff --git a/nmmo/core/tile.py b/nmmo/core/tile.py index c6fcebb69..c931942cd 100644 --- a/nmmo/core/tile.py +++ b/nmmo/core/tile.py @@ -1,5 +1,4 @@ from types import SimpleNamespace -import numpy as np from nmmo.datastore.serialized import SerializedState from nmmo.lib import material @@ -33,6 +32,7 @@ def __init__(self, realm, r, c): super().__init__(realm.datastore, TileState.Limits(realm.config)) self.realm = realm self.config = realm.config + self.np_random = realm.np_random self.row.update(r) self.col.update(c) @@ -65,6 +65,7 @@ def void(self): return self.material == material.Void def reset(self, mat, config): + self.np_random = self.realm.np_random # reset the RNG self.state = mat(config) self.material = mat(config) self.material_id.update(self.state.index) @@ -83,7 +84,7 @@ def remove_entity(self, ent_id): del self.entities[ent_id] def step(self): - if not self.depleted or np.random.rand() > self.material.respawn: + if not self.depleted or self.np_random.random() > self.material.respawn: return self.depleted = False diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py index 13a523b93..e7e079643 100644 --- a/nmmo/entity/entity.py +++ b/nmmo/entity/entity.py @@ -218,6 +218,7 @@ def __init__(self, realm, pos, entity_id, name): self.realm = realm self.config: Config = realm.config + self.np_random = realm.np_random self.policy = name self.entity_id = entity_id diff --git a/nmmo/entity/entity_manager.py b/nmmo/entity/entity_manager.py index 91763c675..7ffce852d 100644 --- a/nmmo/entity/entity_manager.py +++ b/nmmo/entity/entity_manager.py @@ -1,7 +1,5 @@ from collections.abc import Mapping from typing import Dict - -import numpy as np from ordered_set import OrderedSet from nmmo.entity.entity import Entity @@ -16,6 +14,7 @@ def __init__(self, realm): self.datastore = realm.datastore self.realm = realm self.config = realm.config + self.np_random = realm.np_random self.entities: Dict[int, Entity] = {} self.dead_this_tick: Dict[int, Entity] = {} @@ -44,6 +43,7 @@ def packet(self): return {k: v.packet() for k, v in self.corporeal.items()} def reset(self): + self.np_random = self.realm.np_random # reset the RNG for ent in self.entities.values(): # destroy the items if self.config.ITEM_SYSTEM_ENABLED: @@ -109,12 +109,12 @@ def spawn(self): if self.spawn_dangers: danger = self.spawn_dangers[-1] - r, c = combat.spawn(config, danger) + r, c = combat.spawn(config, danger, self.np_random) else: center = config.MAP_CENTER border = self.config.MAP_BORDER # pylint: disable=unbalanced-tuple-unpacking - r, c = np.random.randint(border, center+border, 2).tolist() + r, c = self.np_random.integers(border, center+border, 2).tolist() npc = NPC.spawn(self.realm, (r, c), self.next_id) if npc: @@ -146,7 +146,7 @@ def __init__(self, realm): def reset(self): super().reset() - self._agent_loader = self.loader_class(self.config) + self._agent_loader = self.loader_class(self.config, self.np_random) self.spawned = OrderedSet() def spawn_individual(self, r, c, idx): diff --git a/nmmo/entity/npc.py b/nmmo/entity/npc.py index 96907ab95..eceb989c1 100644 --- a/nmmo/entity/npc.py +++ b/nmmo/entity/npc.py @@ -1,6 +1,3 @@ - -import random - from nmmo.entity import entity from nmmo.core import action as Action from nmmo.systems import combat, droptable @@ -107,7 +104,7 @@ def spawn(realm, pos, iden): ent.spawn_danger = danger # Select combat focus - style = random.choice((Action.Melee, Action.Range, Action.Mage)) + style = realm.np_random.choice((Action.Melee, Action.Range, Action.Mage)) ent.skills.style = style # Compute level @@ -134,7 +131,7 @@ def spawn(realm, pos, iden): # Equipment to instantiate if config.EQUIPMENT_SYSTEM_ENABLED: - lvl = level - random.random() + lvl = level - realm.np_random.random() ilvl = int(5 * lvl) offense = int(config.NPC_BASE_DAMAGE + lvl*config.NPC_LEVEL_DAMAGE) @@ -143,11 +140,11 @@ def spawn(realm, pos, iden): ent.equipment = Equipment(ilvl, offense, offense, offense, defense, defense, defense) armor = [Item.Hat, Item.Top, Item.Bottom] - ent.droptable.add(random.choice(armor)) + ent.droptable.add(realm.np_random.choice(armor)) if config.PROFESSION_SYSTEM_ENABLED: tools = [Item.Rod, Item.Gloves, Item.Pickaxe, Item.Axe, Item.Chisel] - ent.droptable.add(random.choice(tools)) + ent.droptable.add(realm.np_random.choice(tools)) return ent diff --git a/nmmo/entity/player.py b/nmmo/entity/player.py index 70d4be283..eb9d864ff 100644 --- a/nmmo/entity/player.py +++ b/nmmo/entity/player.py @@ -1,5 +1,3 @@ -import numpy as np - from nmmo.systems.skill import Skills from nmmo.entity import entity @@ -73,7 +71,7 @@ def receive_damage(self, source, dmg): # TODO: make source receive the highest-level items first # because source cannot take it if the inventory is full item_list = list(self.inventory.items) - np.random.shuffle(item_list) + self.np_random.shuffle(item_list) for item in item_list: self.inventory.remove(item) diff --git a/nmmo/lib/seeding.py b/nmmo/lib/seeding.py new file mode 100644 index 000000000..5cd6e1acb --- /dev/null +++ b/nmmo/lib/seeding.py @@ -0,0 +1,32 @@ +# copied from https://github.com/openai/gym/blob/master/gym/utils/seeding.py + +"""Set of random number generator functions: seeding, generator, hashing seeds.""" +from typing import Any, Optional, Tuple + +import numpy as np + +from gym import error + + +def np_random(seed: Optional[int] = None) -> Tuple[np.random.Generator, Any]: + """Generates a random number generator from the seed and returns the Generator and seed. + + Args: + seed: The seed used to create the generator + + Returns: + The generator and resulting seed + + Raises: + Error: Seed must be a non-negative integer or omitted + """ + if seed is not None and not (isinstance(seed, int) and 0 <= seed): + raise error.Error(f"Seed must be a non-negative integer or omitted, not {seed}") + + seed_seq = np.random.SeedSequence(seed) + np_seed = seed_seq.entropy + rng = RandomNumberGenerator(np.random.PCG64(seed_seq)) + return rng, np_seed + + +RNG = RandomNumberGenerator = np.random.Generator diff --git a/nmmo/lib/spawn.py b/nmmo/lib/spawn.py index 70a78f400..6d966527b 100644 --- a/nmmo/lib/spawn.py +++ b/nmmo/lib/spawn.py @@ -1,14 +1,13 @@ -import numpy as np - class SequentialLoader: '''config.PLAYER_LOADER that spreads out agent populations''' - def __init__(self, config): + def __init__(self, config, np_random): items = config.PLAYERS self.items = items self.idx = -1 - self.candidate_spawn_pos = spawn_concurrent(config) + # np_random is the env-level rng + self.candidate_spawn_pos = spawn_concurrent(config, np_random) def __iter__(self): return self @@ -22,7 +21,7 @@ def get_spawn_position(self, agent_id): # the basic SequentialLoader just provides a random spawn position return self.candidate_spawn_pos.pop() -def spawn_continuous(config): +def spawn_continuous(config, np_random): '''Generates spawn positions for new agents Randomly selects spawn positions around @@ -38,10 +37,11 @@ def spawn_continuous(config): mmax = config.MAP_CENTER + config.MAP_BORDER mmin = config.MAP_BORDER - var = np.random.randint(mmin, mmax) - fixed = np.random.choice([mmin, mmax]) + # np_random is the env-level RNG, a drop-in replacement of numpy.random + var = np_random.integers(mmin, mmax) + fixed = np_random.choice([mmin, mmax]) r, c = int(var), int(fixed) - if np.random.rand() > 0.5: + if np_random.random() > 0.5: r, c = c, r return (r, c) @@ -63,7 +63,7 @@ def get_edge_tiles(config): return sides -def spawn_concurrent(config): +def spawn_concurrent(config, np_random): '''Generates spawn positions for new agents Evenly spaces agents around the borders @@ -108,7 +108,8 @@ def spawn_concurrent(config): spawn_positions.append(pos) else: # team_n = 1: to fit 128 agents in a small map, ignore spacing and spawn randomly - np.random.shuffle(sides) + # np_random is the env-level RNG, a drop-in replacement of numpy.random + np_random.shuffle(sides) spawn_positions = sides[:config.PLAYER_N] return spawn_positions diff --git a/nmmo/lib/utils.py b/nmmo/lib/utils.py index a71497a6d..a2c0bb3ed 100644 --- a/nmmo/lib/utils.py +++ b/nmmo/lib/utils.py @@ -66,9 +66,6 @@ def __ge__(self, other): class IterableNameComparable(Iterable, NameComparable): pass -def seed(): - return int(np.random.randint(0, 2**32)) - def linf(pos1, pos2): # pos could be a single (r,c) or a vector of (r,c)s diff = np.abs(np.array(pos1) - np.array(pos2)) diff --git a/nmmo/systems/ai/behavior.py b/nmmo/systems/ai/behavior.py index 85cbf1c26..aa60e7f62 100644 --- a/nmmo/systems/ai/behavior.py +++ b/nmmo/systems/ai/behavior.py @@ -23,7 +23,8 @@ def update(entity): entity.water = None def pathfind(realm, actions, entity, target): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.pathfind(realm.map.tiles, entity, target)} + actions[nmmo.action.Move] = { + nmmo.action.Direction: move.pathfind(realm.map.tiles, entity, target, realm.np_random)} def explore(realm, actions, entity): sz = realm.config.TERRAIN_SIZE @@ -42,10 +43,13 @@ def explore(realm, actions, entity): pathfind(realm, actions, entity, tile) def meander(realm, actions, entity): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.habitable(realm.map.tiles, entity)} + actions[nmmo.action.Move] = { + nmmo.action.Direction: move.habitable(realm.map.tiles, entity, realm.np_random)} def evade(realm, actions, entity): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.antipathfind(realm.map.tiles, entity, entity.attacker)} + actions[nmmo.action.Move] = { + nmmo.action.Direction: move.antipathfind(realm.map.tiles, entity, entity.attacker, + realm.np_random)} def hunt(realm, actions, entity): #Move args @@ -53,9 +57,9 @@ def hunt(realm, actions, entity): direction = None if distance == 0: - direction = move.random_direction() + direction = move.random_direction(realm.np_random) elif distance > 1: - direction = move.pathfind(realm.map.tiles, entity, entity.target) + direction = move.pathfind(realm.map.tiles, entity, entity.target, realm.np_random) if direction is not None: actions[nmmo.action.Move] = {nmmo.action.Direction: direction} diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py index 80eb461ac..d3dfcbd7d 100644 --- a/nmmo/systems/ai/move.py +++ b/nmmo/systems/ai/move.py @@ -1,15 +1,12 @@ # pylint: disable=R0401 - -import random - from nmmo.core import action from nmmo.systems.ai import utils -def random_direction(): - return random.choice(action.Direction.edges) +def random_direction(np_random): + return np_random.choice(action.Direction.edges) -def random_safe(tiles, ent): +def random_safe(tiles, ent, np_random): r, c = ent.pos cands = [] if not tiles[r-1, c].void: @@ -21,9 +18,9 @@ def random_safe(tiles, ent): if not tiles[r, c+1].void: cands.append(action.East) - return random.choice(cands) + return np_random.choice(cands) -def habitable(tiles, ent): +def habitable(tiles, ent, np_random): r, c = ent.pos cands = [] if tiles[r-1, c].habitable: @@ -38,9 +35,9 @@ def habitable(tiles, ent): if len(cands) == 0: return action.North - return random.choice(cands) + return np_random.choice(cands) -def towards(direction): +def towards(direction, np_random): if direction == (-1, 0): return action.North if direction == (1, 0): @@ -50,19 +47,19 @@ def towards(direction): if direction == (0, 1): return action.East - return random.choice(action.Direction.edges) + return np_random.choice(action.Direction.edges) -def bullrush(ent, targ): +def bullrush(ent, targ, np_random): direction = utils.directionTowards(ent, targ) - return towards(direction) + return towards(direction, np_random) -def pathfind(tiles, ent, targ): +def pathfind(tiles, ent, targ, np_random): direction = utils.aStar(tiles, ent.pos, targ.pos) - return towards(direction) + return towards(direction, np_random) -def antipathfind(tiles, ent, targ): +def antipathfind(tiles, ent, targ, np_random): er, ec = ent.pos tr, tc = targ.pos goal = (2*er - tr , 2*ec-tc) direction = utils.aStar(tiles, ent.pos, goal) - return towards(direction) + return towards(direction, np_random) diff --git a/nmmo/systems/combat.py b/nmmo/systems/combat.py index 1666feed1..42c81eeb3 100644 --- a/nmmo/systems/combat.py +++ b/nmmo/systems/combat.py @@ -130,16 +130,16 @@ def danger(config, pos): return norm -def spawn(config, dnger): +def spawn(config, dnger, np_random): border = config.MAP_BORDER center = config.MAP_CENTER mid = center // 2 dist = dnger * center / 2 max_offset = mid - dist - offset = mid + border + np.random.randint(-max_offset, max_offset) + offset = mid + border + np_random.integers(-max_offset, max_offset) - rng = np.random.rand() + rng = np_random.random() if rng < 0.25: r = border + dist c = offset diff --git a/nmmo/systems/droptable.py b/nmmo/systems/droptable.py index 6110d79f2..729317b52 100644 --- a/nmmo/systems/droptable.py +++ b/nmmo/systems/droptable.py @@ -1,5 +1,3 @@ -import numpy as np - class Fixed(): def __init__(self, item): self.item = item @@ -13,7 +11,7 @@ def __init__(self, item, prob): self.prob = prob def roll(self, realm, level): - if np.random.rand() < self.prob: + if realm.np_random.random() < self.prob: return self.item(realm, level) return None diff --git a/scripted/baselines.py b/scripted/baselines.py index 625191da9..ad77c0a4a 100644 --- a/scripted/baselines.py +++ b/scripted/baselines.py @@ -27,6 +27,7 @@ def __init__(self, config, idx): config : A forge.blade.core.Config object or subclass object ''' super().__init__(config, idx) + self.np_random = None self.health_max = config.PLAYER_BASE_HEALTH if config.RESOURCE_SYSTEM_ENABLED: @@ -48,15 +49,16 @@ def forage_criterion(self) -> bool: def forage(self): '''Min/max food and water using Dijkstra's algorithm''' - move.forageDijkstra(self.config, self.ob, self.actions, self.food_max, self.water_max) + move.forageDijkstra(self.config, self.ob, self.actions, + self.food_max, self.water_max, self.np_random) def gather(self, resource): '''BFS search for a particular resource''' - return move.gatherBFS(self.config, self.ob, self.actions, resource) + return move.gatherBFS(self.config, self.ob, self.actions, resource, self.np_random) def explore(self): '''Route away from spawn''' - move.explore(self.config, self.ob, self.actions, self.me.row, self.me.col) + move.explore(self.config, self.ob, self.actions, self.me.row, self.me.col, self.np_random) @property def downtime(self): @@ -65,7 +67,7 @@ def downtime(self): def evade(self): '''Target and path away from an attacker''' - move.evade(self.config, self.ob, self.actions, self.attacker) + move.evade(self.config, self.ob, self.actions, self.attacker, self.np_random) self.target = self.attacker self.targetID = self.attackerID self.targetDist = self.attackerDist @@ -74,7 +76,7 @@ def attack(self): '''Attack the current target''' if self.target is not None: assert self.targetID is not None - style = random.choice(self.style) + style = self.np_random.choice(self.style) attack.target(self.config, self.actions, style, self.targetID) def target_weak(self): @@ -277,7 +279,7 @@ def buy(self, buy_k: dict, buy_upgrade: set): purchase = None best = list(self.best_heuristic.items()) - random.shuffle(best) + self.np_random.shuffle(best) for type_id, itm in best: # Buy top k if type_id in buy_k: @@ -311,6 +313,7 @@ def use(self): def __call__(self, observation: Observation): '''Process observations and return actions''' + assert self.np_random is not None, "Agent's RNG must be set." self.actions = {} self.ob = observation @@ -358,7 +361,7 @@ class Random(Scripted): def __call__(self, obs): super().__call__(obs) - move.rand(self.config, self.ob, self.actions) + move.rand(self.config, self.ob, self.actions, self.np_random) return self.actions class Meander(Scripted): @@ -366,7 +369,7 @@ class Meander(Scripted): def __call__(self, obs): super().__call__(obs) - move.meander(self.config, self.ob, self.actions) + move.meander(self.config, self.ob, self.actions, self.np_random) return self.actions class Explore(Scripted): diff --git a/scripted/move.py b/scripted/move.py index 893a4cee0..488a8f0c7 100644 --- a/scripted/move.py +++ b/scripted/move.py @@ -22,11 +22,11 @@ def inSight(dr, dc, vision): dr <= vision and dc <= vision) -def rand(config, ob, actions): - direction = random.choice(action.Direction.edges) +def rand(config, ob, actions, np_random): + direction = np_random.choice(action.Direction.edges) actions[action.Move] = {action.Direction: direction} -def towards(direction): +def towards(direction, np_random): if direction == (-1, 0): return action.North elif direction == (1, 0): @@ -36,14 +36,14 @@ def towards(direction): elif direction == (0, 1): return action.East else: - return random.choice(action.Direction.edges) + return np_random.choice(action.Direction.edges) -def pathfind(config, ob, actions, rr, cc): +def pathfind(config, ob, actions, rr, cc, np_random): direction = aStar(config, ob, actions, rr, cc) - direction = towards(direction) + direction = towards(direction, np_random) actions[action.Move] = {action.Direction: direction} -def meander(config, ob, actions): +def meander(config, ob, actions, np_random): cands = [] if ob.tile(-1, 0).material_id in material.Habitable.indices: cands.append((-1, 0)) @@ -56,11 +56,11 @@ def meander(config, ob, actions): if not cands: return (-1, 0) - direction = random.choices(cands)[0] - direction = towards(direction) + direction = np_random.choices(cands)[0] + direction = towards(direction, np_random) actions[action.Move] = {action.Direction: direction} -def explore(config, ob, actions, r, c): +def explore(config, ob, actions, r, c, np_random): vision = config.PLAYER_VISION_RADIUS sz = config.MAP_SIZE @@ -71,16 +71,17 @@ def explore(config, ob, actions, r, c): mmag = max(1, abs(vR), abs(vC)) rr = int(np.round(vision*vR/mmag)) cc = int(np.round(vision*vC/mmag)) - pathfind(config, ob, actions, rr, cc) + pathfind(config, ob, actions, rr, cc, np_random) -def evade(config, ob: Observation, actions, attacker): +def evade(config, ob: Observation, actions, attacker, np_random): agent = ob.agent() rr, cc = (2*agent.row - attacker.row, 2*agent.col - attacker.col) - pathfind(config, ob, actions, rr, cc) + pathfind(config, ob, actions, rr, cc, np_random) -def forageDijkstra(config, ob: Observation, actions, food_max, water_max, cutoff=100): +def forageDijkstra(config, ob: Observation, actions, + food_max, water_max, np_random, cutoff=100): vision = config.PLAYER_VISION_RADIUS agent = ob.agent() @@ -145,7 +146,7 @@ def forageDijkstra(config, ob: Observation, actions, food_max, water_max, cutoff while goal in backtrace and backtrace[goal] != start: goal = backtrace[goal] - direction = towards(goal) + direction = towards(goal, np_random) actions[action.Move] = {action.Direction: direction} def findResource(config, ob: Observation, resource): @@ -163,7 +164,7 @@ def findResource(config, ob: Observation, resource): return False -def gatherAStar(config, ob, actions, resource, cutoff=100): +def gatherAStar(config, ob, actions, resource, np_random, cutoff=100): resource_pos = findResource(config, ob, resource) if not resource_pos: return @@ -173,11 +174,11 @@ def gatherAStar(config, ob, actions, resource, cutoff=100): if not next_pos or next_pos == (0, 0): return - direction = towards(next_pos) + direction = towards(next_pos, np_random) actions[action.Move] = {action.Direction: direction} return True -def gatherBFS(config, ob: Observation, actions, resource, cutoff=100): +def gatherBFS(config, ob: Observation, actions, resource, np_random, cutoff=100): vision = config.PLAYER_VISION_RADIUS start = (0, 0) @@ -241,7 +242,7 @@ def gatherBFS(config, ob: Observation, actions, resource, cutoff=100): while found in backtrace and backtrace[found] != start: found = backtrace[found] - direction = towards(found) + direction = towards(found, np_random) actions[action.Move] = {action.Direction: direction} return True diff --git a/tests/core/test_tile.py b/tests/core/test_tile.py index 5cc8629dd..f49f78537 100644 --- a/tests/core/test_tile.py +++ b/tests/core/test_tile.py @@ -1,4 +1,6 @@ import unittest +import numpy as np + import nmmo from nmmo.core.tile import Tile, TileState from nmmo.datastore.numpy_datastore import NumpyDatastore @@ -9,6 +11,7 @@ def __init__(self): self.datastore = NumpyDatastore() self.datastore.register_object_type("Tile", TileState.State.num_attributes) self.config = nmmo.config.Small() + self.np_random = np.random class MockEntity(): def __init__(self, ent_id): diff --git a/tests/entity/test_entity.py b/tests/entity/test_entity.py index 848bb7bb1..d4b368a7e 100644 --- a/tests/entity/test_entity.py +++ b/tests/entity/test_entity.py @@ -1,4 +1,6 @@ import unittest +import numpy as np + import nmmo from nmmo.entity.entity import Entity, EntityState from nmmo.datastore.numpy_datastore import NumpyDatastore @@ -9,6 +11,7 @@ def __init__(self): self.config.PLAYERS = range(100) self.datastore = NumpyDatastore() self.datastore.register_object_type("Entity", EntityState.State.num_attributes) + self.np_random = np.random # pylint: disable=no-member class TestEntity(unittest.TestCase): diff --git a/tests/test_determinism.py b/tests/test_determinism.py index 87aacd358..af9e36fee 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -1,14 +1,14 @@ import unittest -import random import numpy as np from tqdm import tqdm +from nmmo.lib import seeding from tests.testhelpers import ScriptedAgentTestConfig, ScriptedAgentTestEnv from tests.testhelpers import observations_are_equal # 30 seems to be enough to test variety of agent actions TEST_HORIZON = 30 -RANDOM_SEED = random.randint(0, 100000) +RANDOM_SEED = np.random.randint(0, 100000) def rollout_with_seed(env, seed): @@ -20,55 +20,36 @@ def rollout_with_seed(env, seed): return init_obs, obs, event_log class TestDeterminism(unittest.TestCase): - def test_single_proc(self): - config = ScriptedAgentTestConfig() - env = ScriptedAgentTestEnv(config) - - # the source run - init_obs_src, final_obs_src, event_log_src = rollout_with_seed(env, RANDOM_SEED) - - # the replication run - init_obs_rep, final_obs_rep, event_log_rep = rollout_with_seed(env, RANDOM_SEED) - - # sanity checks - self.assertTrue(observations_are_equal(init_obs_src, init_obs_src)) - self.assertTrue(observations_are_equal(final_obs_src, final_obs_src)) + def test_gym_np_random(self): + _, _np_seed_1 = seeding.np_random(RANDOM_SEED) + _, _np_seed_2 = seeding.np_random(RANDOM_SEED) + self.assertEqual(_np_seed_1, _np_seed_2) - # pylint: disable=expression-not-assigned - # compare the source and replication - self.assertTrue(observations_are_equal(init_obs_src, init_obs_rep)),\ - f"The determinism test failed. Seed: {RANDOM_SEED}." - self.assertTrue(observations_are_equal(final_obs_src, final_obs_rep)),\ - f"The determinism test failed. Seed: {RANDOM_SEED}." # after 30 runs - assert np.array_equal(event_log_src, event_log_rep),\ - f"The determinism test failed. Seed: {RANDOM_SEED}." - - def test_realm_level_rng(self): - # the below test doesn't work now - # having a realm-level random number generator would fix this - # for example see https://github.com/openai/gym/pull/135/files - # how self.np_random is initialized and used - pass - - # config = ScriptedAgentTestConfig() - # env1 = ScriptedAgentTestEnv(config) - # env2 = ScriptedAgentTestEnv(config) - # envs = [env1, env2] + def test_env_level_rng(self): + # two envs running independently should return the same results + config = ScriptedAgentTestConfig() + env1 = ScriptedAgentTestEnv(config) + env2 = ScriptedAgentTestEnv(config) + envs = [env1, env2] - # init_obs = [env.reset(seed=RANDOM_SEED) for env in envs] + init_obs = [env.reset(seed=RANDOM_SEED) for env in envs] - # for _ in tqdm(range(TEST_HORIZON)): - # # step returns a tuple of (obs, rewards, dones, infos) - # step_results = [env.step({}) for env in envs] + for _ in tqdm(range(TEST_HORIZON)): + # step returns a tuple of (obs, rewards, dones, infos) + step_results = [env.step({}) for env in envs] - # event_logs = [env.realm.event_log.get_data() for env in envs] + event_logs = [env.realm.event_log.get_data() for env in envs] - # self.assertTrue(observations_are_equal(init_obs[0], init_obs[1])),\ - # f"The multi-env determinism failed. Seed: {RANDOM_SEED}." - # self.assertTrue(observations_are_equal(step_results[0][0], step_results[1][0])),\ - # f"The multi-env determinism failed. Seed: {RANDOM_SEED}." # after 30 runs - # assert np.array_equal(event_logs[0], event_logs[1]),\ - # f"The multi-env determinism failed. Seed: {RANDOM_SEED}." + # sanity checks + self.assertTrue(observations_are_equal(init_obs[0], init_obs[0])) + self.assertTrue(observations_are_equal(step_results[0][0], step_results[0][0])) + + self.assertTrue(observations_are_equal(init_obs[0], init_obs[1]), + f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") + self.assertTrue(observations_are_equal(step_results[0][0], step_results[1][0]), + f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") # after 30 runs + self.assertTrue(np.array_equal(event_logs[0], event_logs[1]), + f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") if __name__ == '__main__': diff --git a/tests/test_team_spawn.py b/tests/test_team_spawn.py index da279e97b..2dc7ca635 100644 --- a/tests/test_team_spawn.py +++ b/tests/test_team_spawn.py @@ -7,10 +7,10 @@ class TeamLoader(spawn.SequentialLoader): - def __init__(self, config, team_helper: TeamHelper): + def __init__(self, config, np_random, team_helper: TeamHelper): assert config.PLAYERS == [Agent], \ "TeamLoader only supports config.PLAYERS == [Agent]" - super().__init__(config) + super().__init__(config, np_random) self.team_helper = team_helper self.candidate_spawn_pos = \ @@ -32,7 +32,8 @@ def test_team_spawn(self): config = nmmo.config.Small() config.PLAYER_N = num_teams * team_size - config.PLAYER_LOADER = lambda config: TeamLoader(config, team_helper) + config.PLAYER_LOADER =\ + lambda config, np_random: TeamLoader(config, np_random, team_helper) assert config.PLAYER_N == num_teams * team_size,\ "config.PLAYER_N must be num_teams * team_size" diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 68ecff4a4..cb7726dce 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -9,6 +9,7 @@ from nmmo.core import action from nmmo.systems import item as Item from nmmo.core.realm import Realm +from nmmo.lib import material as Material from scripted import baselines @@ -50,6 +51,7 @@ def observations_are_equal(source_obs, target_obs, debug=True): keys_obs = list(target_obs.keys()) if keys_src != keys_obs: if debug: + #print("entities don't match") logging.error("entities don't match") return False @@ -58,6 +60,7 @@ def observations_are_equal(source_obs, target_obs, debug=True): ent_tgt = target_obs[k] if list(ent_src.keys()) != list(ent_tgt.keys()): if debug: + #print(f"entries don't match. key: {k}") logging.error("entries don't match. key: %s", str(k)) return False @@ -72,6 +75,7 @@ def observations_are_equal(source_obs, target_obs, debug=True): obj_tgt = ent_tgt[o] if np.sum(obj_src != obj_tgt) > 0: if debug: + #print(f"objects don't match. key: {k}, obj: {o}") logging.error("objects don't match. key: %s, obj: %s", str(k), str(o)) return False @@ -244,6 +248,15 @@ def _setup_env(self, random_seed, check_assert=True): for ent_id, pos in self.spawn_locs.items(): change_spawn_pos(env.realm, ent_id, pos) + # Change entire map to grass to become habitable and non-harvestable + MS = env.config.MAP_SIZE + for i in range(MS): + for j in range(MS): + tile = env.realm.map.tiles[i,j] + tile.material = Material.Grass + tile.material_id.update(Material.Grass.index) + tile.state = Material.Grass(env.config) + env.obs = env._compute_observations() if check_assert: From ebe853918afe7bace42500aa8ec3aed65257df5b Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 17 Jun 2023 23:07:17 +0900 Subject: [PATCH 028/113] checked whether ordered_set is necessary --- nmmo/core/action.py | 3 +- nmmo/core/env.py | 31 +++++++++------- nmmo/core/map.py | 5 ++- nmmo/datastore/id_allocator.py | 4 +-- nmmo/entity/entity_manager.py | 3 +- nmmo/lib/__init__.py | 1 - nmmo/lib/priorityqueue.py | 65 ---------------------------------- nmmo/systems/inventory.py | 2 +- nmmo/systems/skill.py | 2 +- tests/test_determinism.py | 15 ++++---- 10 files changed, 32 insertions(+), 99 deletions(-) delete mode 100644 nmmo/lib/priorityqueue.py diff --git a/nmmo/core/action.py b/nmmo/core/action.py index 6701514a1..a026456a1 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -2,7 +2,6 @@ # pylint: disable=no-method-argument,unused-argument,no-self-argument,no-member from enum import Enum, auto -from ordered_set import OrderedSet import numpy as np from nmmo.lib import utils @@ -226,7 +225,7 @@ def in_range(entity, stim, config, N): R, C = stim.shape R, C = R//2, C//2 - rets = OrderedSet([entity]) + rets = set([entity]) for r in range(R-N, R+N+1): for c in range(C-N, C+N+1): for e in stim[r, c].entities.values(): diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 7840b13da..19fdee3c4 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -2,7 +2,6 @@ from typing import Any, Dict, List, Callable from collections import defaultdict from copy import copy -from ordered_set import OrderedSet import gym import numpy as np @@ -37,10 +36,11 @@ def __init__(self, self._dummy_obs = None self.possible_agents = list(range(1, config.PLAYER_N + 1)) + self._agents = None self._dead_agents = set() self._episode_stats = defaultdict(lambda: defaultdict(float)) self._dead_this_tick = None - self.scripted_agents = OrderedSet() + self.scripted_agents = set() self._gamestate_generator = GameStateGenerator(self.realm, self.config) self.game_state = None @@ -145,6 +145,7 @@ def reset(self, map_id=None, seed=None, options=None, if seed is not None: self.np_random, self._np_seed = seeding.np_random(seed) self.realm.reset(self.np_random, map_id) + self._agents = list(self.realm.players.keys()) self._dead_agents = set() self._episode_stats.clear() self._dead_this_tick = {} @@ -282,6 +283,9 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): actions = self._validate_actions(actions) # Execute actions self._dead_this_tick = self.realm.step(actions) + # the list of "current" agents, both alive and dead_this_tick + self._agents = list(set(list(self.realm.players.keys()) + list(self._dead_this_tick.keys()))) + dones = {} for agent_id in self.agents: if agent_id in self._dead_this_tick or \ @@ -351,14 +355,16 @@ def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): def _compute_scripted_agent_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): '''Compute actions for scripted agents and add them into the action dict''' - for eid in self.scripted_agents: - # remove the dead scripted agent from the list - if eid in self._dead_agents or eid not in self.realm.players: - self.scripted_agents.discard(eid) - continue + dead_agents = set() + for agent_id in self.scripted_agents: + if agent_id in self.realm.players: + # override the provided scripted agents' actions + actions[agent_id] = self.realm.players[agent_id].agent(self.obs[agent_id]) + else: + dead_agents.add(agent_id) - # override the provided scripted agents' actions - actions[eid] = self.realm.players[eid].agent(self.obs[eid]) + # remove the dead scripted agent from the list + self.scripted_agents -= dead_agents return actions @@ -453,10 +459,9 @@ def render(self, mode='human'): @property def agents(self) -> List[AgentID]: - '''For conformity with the PettingZoo API only; rendering is external''' - # "current" agents, which return obs: both alive and dead_this_tick - agents = set(list(self.realm.players.keys()) + list(self._dead_this_tick.keys())) - return list(agents) + '''For conformity with the PettingZoo API''' + # returns the list of "current" agents, both alive and dead_this_tick + return self._agents def close(self): '''For conformity with the PettingZoo API only; rendering is external''' diff --git a/nmmo/core/map.py b/nmmo/core/map.py index 47bbc8ee1..4243febc8 100644 --- a/nmmo/core/map.py +++ b/nmmo/core/map.py @@ -1,10 +1,9 @@ import os import logging - import numpy as np from ordered_set import OrderedSet -from nmmo.core.tile import Tile +from nmmo.core.tile import Tile from nmmo.lib import material @@ -45,7 +44,7 @@ def repr(self): def reset(self, map_id): '''Reuse the current tile objects to load a new map''' config = self.config - self.update_list = OrderedSet() + self.update_list = OrderedSet() # critical for determinism path_map_suffix = config.PATH_MAP_SUFFIX.format(map_id) f_path = os.path.join(config.PATH_CWD, config.PATH_MAPS, path_map_suffix) diff --git a/nmmo/datastore/id_allocator.py b/nmmo/datastore/id_allocator.py index a93e8c1f1..83e65ec84 100644 --- a/nmmo/datastore/id_allocator.py +++ b/nmmo/datastore/id_allocator.py @@ -4,7 +4,7 @@ class IdAllocator: def __init__(self, max_id): # Key 0 is reserved as padding self.max_id = 1 - self.free = OrderedSet() + self.free = OrderedSet() self.expand(max_id) def full(self): @@ -17,5 +17,5 @@ def allocate(self): return self.free.pop(0) def expand(self, max_id): - self.free.update(OrderedSet(range(self.max_id, max_id))) + self.free.update(range(self.max_id, max_id)) self.max_id = max_id diff --git a/nmmo/entity/entity_manager.py b/nmmo/entity/entity_manager.py index 7ffce852d..927bdaebb 100644 --- a/nmmo/entity/entity_manager.py +++ b/nmmo/entity/entity_manager.py @@ -1,6 +1,5 @@ from collections.abc import Mapping from typing import Dict -from ordered_set import OrderedSet from nmmo.entity.entity import Entity from nmmo.entity.npc import NPC @@ -147,7 +146,7 @@ def __init__(self, realm): def reset(self): super().reset() self._agent_loader = self.loader_class(self.config, self.np_random) - self.spawned = OrderedSet() + self.spawned = set() def spawn_individual(self, r, c, idx): agent = next(self._agent_loader) diff --git a/nmmo/lib/__init__.py b/nmmo/lib/__init__.py index f8c10fcbe..e69de29bb 100644 --- a/nmmo/lib/__init__.py +++ b/nmmo/lib/__init__.py @@ -1 +0,0 @@ -from nmmo.lib.priorityqueue import PriorityQueue diff --git a/nmmo/lib/priorityqueue.py b/nmmo/lib/priorityqueue.py deleted file mode 100644 index 7d3d0e3be..000000000 --- a/nmmo/lib/priorityqueue.py +++ /dev/null @@ -1,65 +0,0 @@ -# pylint: disable=all - -import heapq, itertools -import itertools - -from ordered_set import OrderedSet - -class PriorityQueue: - def __init__(self, capacity, unique=False): - self.q, self.items = [], OrderedSet() - self.capacity = capacity - self.count = itertools.count() - self.unique = unique - - def get(self, ind): - priority, item = self.tolist()[ind] - return priority, item - - def push(self, item, priority, uniqueKey=None): - if self.unique: - self.items.add(uniqueKey) - count = next(self.count) - if len(self.q) >= self.capacity: - return heapq.heappushpop(self.q, (priority, count, item)) - heapq.heappush(self.q, (priority, count, item)) - - def pop(self): - priority, _, item = heapq.heappop(self.q) - if self.unique: - self.items.remove(item) - return priority, item - - @property - def peek(self): - return self.peekPriority, self.peekValue - - @property - def peekPriority(self): - ret = heapq.nlargest(1, self.q) - if len(ret) > 0: - return ret[0][0] - - @property - def peekValue(self): - ret = heapq.nlargest(1, self.q) - if len(ret) > 0: - return ret[0][2] - - - def tolist(self): - q = heapq.nlargest(self.n, self.q) - return [(e[0], e[2]) for e in q] - - def priorities(self): - return sorted([e[0] for e in self.q], reverse=True) - - def print(self): - q = heapq.nlargest(self.n, self.q) - print([(e[0]) for e in q], end='') - print() - - @property - def n(self): - return len(self.q) - diff --git a/nmmo/systems/inventory.py b/nmmo/systems/inventory.py index 6e6f19bd3..446840a85 100644 --- a/nmmo/systems/inventory.py +++ b/nmmo/systems/inventory.py @@ -103,7 +103,7 @@ def __init__(self, realm, entity): self.capacity = config.ITEM_INVENTORY_CAPACITY self._item_stacks: Dict[Tuple, Item.Stack] = {} - self.items: OrderedSet[Item.Item] = OrderedSet([]) + self.items: OrderedSet[Item.Item] = OrderedSet([]) # critical for correct functioning @property def space(self): diff --git a/nmmo/systems/skill.py b/nmmo/systems/skill.py index 59e93c5c9..a2de62928 100644 --- a/nmmo/systems/skill.py +++ b/nmmo/systems/skill.py @@ -17,7 +17,7 @@ def __init__(self, realm, entity): self.entity = entity self.experience_calculator = experience.ExperienceCalculator() - self.skills = OrderedSet() + self.skills = OrderedSet() # critical for determinism def update(self): for skill in self.skills: diff --git a/tests/test_determinism.py b/tests/test_determinism.py index af9e36fee..27860f722 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -34,20 +34,17 @@ def test_env_level_rng(self): init_obs = [env.reset(seed=RANDOM_SEED) for env in envs] + self.assertTrue(observations_are_equal(init_obs[0], init_obs[0])) # sanity check + self.assertTrue(observations_are_equal(init_obs[0], init_obs[1]), + f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") + for _ in tqdm(range(TEST_HORIZON)): # step returns a tuple of (obs, rewards, dones, infos) step_results = [env.step({}) for env in envs] + self.assertTrue(observations_are_equal(step_results[0][0], step_results[1][0]), + f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") event_logs = [env.realm.event_log.get_data() for env in envs] - - # sanity checks - self.assertTrue(observations_are_equal(init_obs[0], init_obs[0])) - self.assertTrue(observations_are_equal(step_results[0][0], step_results[0][0])) - - self.assertTrue(observations_are_equal(init_obs[0], init_obs[1]), - f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") - self.assertTrue(observations_are_equal(step_results[0][0], step_results[1][0]), - f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") # after 30 runs self.assertTrue(np.array_equal(event_logs[0], event_logs[1]), f"The multi-env determinism failed. Seed: {RANDOM_SEED}.") From efcb95f1fceef23ea32a55ea85e5bcf9ed864eff Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 20 Jun 2023 20:30:45 +0900 Subject: [PATCH 029/113] made the map generation deterministic --- nmmo/core/realm.py | 6 +++--- nmmo/core/terrain.py | 45 ++++++++++++++++++++------------------- tests/test_determinism.py | 33 ++++++++++++++++++++++++---- 3 files changed, 55 insertions(+), 29 deletions(-) diff --git a/nmmo/core/realm.py b/nmmo/core/realm.py index c9f15d3db..0c8a7152d 100644 --- a/nmmo/core/realm.py +++ b/nmmo/core/realm.py @@ -38,9 +38,9 @@ def __init__(self, config, np_random): Action.hook(config) # Generate maps if they do not exist - # CHECK ME: Does the map generator need use the env-level RNG? - # Do the maps need to be deterministic? - config.MAP_GENERATOR(config).generate_all_maps() + # NOTE: Map generation interferes with determinism. + # To ensure determinism, provide seed to env.reset() + config.MAP_GENERATOR(config).generate_all_maps(np_random) self.datastore = NumpyDatastore() for s in [TileState, EntityState, ItemState, EventState]: diff --git a/nmmo/core/terrain.py b/nmmo/core/terrain.py index b5d0e2c84..4aa983d8f 100644 --- a/nmmo/core/terrain.py +++ b/nmmo/core/terrain.py @@ -1,6 +1,4 @@ - import os -import random import logging import numpy as np @@ -145,13 +143,13 @@ def generate_terrain(config, map_id, interpolaters): return val, matl, interpolaters -def place_fish(tiles): +def place_fish(tiles, np_random): placed = False allow = {Terrain.GRASS} water_loc = np.where(tiles == Terrain.WATER) water_loc = list(zip(water_loc[0], water_loc[1])) - random.shuffle(water_loc) + np_random.shuffle(water_loc) for r, c in water_loc: if tiles[r-1, c] in allow or tiles[r+1, c] in allow or \ @@ -163,25 +161,25 @@ def place_fish(tiles): if not placed: raise RuntimeError('Could not find the water tile to place fish.') -def uniform(config, tiles, mat, mmin, mmax): - r = random.randint(mmin, mmax) - c = random.randint(mmin, mmax) +def uniform(config, tiles, mat, mmin, mmax, np_random): + r = np_random.integers(mmin, mmax) + c = np_random.integers(mmin, mmax) if tiles[r, c] not in {Terrain.GRASS}: - uniform(config, tiles, mat, mmin, mmax) + uniform(config, tiles, mat, mmin, mmax, np_random) else: tiles[r, c] = mat -def cluster(config, tiles, mat, mmin, mmax): +def cluster(config, tiles, mat, mmin, mmax, np_random): mmin = mmin + 1 mmax = mmax - 1 - r = random.randint(mmin, mmax) - c = random.randint(mmin, mmax) + r = np_random.integers(mmin, mmax) + c = np_random.integers(mmin, mmax) matls = {Terrain.GRASS} if tiles[r, c] not in matls: - cluster(config, tiles, mat, mmin-1, mmax+1) + cluster(config, tiles, mat, mmin-1, mmax+1, np_random) return tiles[r, c] = mat @@ -194,18 +192,21 @@ def cluster(config, tiles, mat, mmin, mmax): if tiles[r, c+1] in matls: tiles[r, c+1] = mat -def spawn_profession_resources(config, tiles): +def spawn_profession_resources(config, tiles, np_random=None): + if np_random is None: + np_random = np.random + mmin = config.MAP_BORDER + 1 mmax = config.MAP_SIZE - config.MAP_BORDER - 1 for _ in range(config.PROGRESSION_SPAWN_CLUSTERS): - cluster(config, tiles, Terrain.ORE, mmin, mmax) - cluster(config, tiles, Terrain.TREE, mmin, mmax) - cluster(config, tiles, Terrain.CRYSTAL, mmin, mmax) + cluster(config, tiles, Terrain.ORE, mmin, mmax, np_random) + cluster(config, tiles, Terrain.TREE, mmin, mmax, np_random) + cluster(config, tiles, Terrain.CRYSTAL, mmin, mmax, np_random) for _ in range(config.PROGRESSION_SPAWN_UNIFORMS): - uniform(config, tiles, Terrain.HERB, mmin, mmax) - place_fish(tiles) + uniform(config, tiles, Terrain.HERB, mmin, mmax, np_random) + place_fish(tiles, np_random) class MapGenerator: '''Procedural map generation''' @@ -226,7 +227,7 @@ def load_textures(self): setattr(Terrain, key.upper(), mat.index) self.textures = lookup - def generate_all_maps(self): + def generate_all_maps(self, np_random=None): '''Generates NMAPS maps according to generate_map Provides additional utilities for saving to .npy and rendering png previews''' @@ -253,7 +254,7 @@ def generate_all_maps(self): path = path_maps + '/map' + str(idx+1) os.makedirs(path, exist_ok=True) - terrain, tiles = self.generate_map(idx) + terrain, tiles = self.generate_map(idx, np_random) #Save/render Save.as_numpy(tiles, path) @@ -263,7 +264,7 @@ def generate_all_maps(self): Save.fractal(terrain, path+'/fractal.png') Save.render(tiles, self.textures, path+'/map.png') - def generate_map(self, idx): + def generate_map(self, idx, np_random=None): '''Generate a single map The default method is a relatively complex multiscale perlin noise method. @@ -295,6 +296,6 @@ def generate_map(self, idx): tiles[r, c] = Terrain.VOID if config.PROFESSION_SYSTEM_ENABLED: - spawn_profession_resources(config, tiles) + spawn_profession_resources(config, tiles, np_random) return terrain, tiles diff --git a/tests/test_determinism.py b/tests/test_determinism.py index 27860f722..1a407ec65 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -25,14 +25,39 @@ def test_gym_np_random(self): _, _np_seed_2 = seeding.np_random(RANDOM_SEED) self.assertEqual(_np_seed_1, _np_seed_2) + def test_map_determinism(self): + config = ScriptedAgentTestConfig() + config.MAP_FORCE_GENERATION = True + config.PATH_MAPS = 'maps/det0' + + map_generator = config.MAP_GENERATOR(config) + + np_random1, _ = seeding.np_random(RANDOM_SEED) + np_random2, _ = seeding.np_random(RANDOM_SEED) + + terrain1, tiles1 = map_generator.generate_map(0, np_random1) + terrain2, tiles2 = map_generator.generate_map(0, np_random2) + + self.assertTrue(np.array_equal(terrain1, terrain2)) + self.assertTrue(np.array_equal(tiles1, tiles2)) + def test_env_level_rng(self): # two envs running independently should return the same results - config = ScriptedAgentTestConfig() - env1 = ScriptedAgentTestEnv(config) - env2 = ScriptedAgentTestEnv(config) + + # config to always generate new maps, to test map determinism + config1 = ScriptedAgentTestConfig() + config1.MAP_FORCE_GENERATION = True + config1.PATH_MAPS = 'maps/det1' + config2 = ScriptedAgentTestConfig() + config2.MAP_FORCE_GENERATION = True + config2.PATH_MAPS = 'maps/det2' + + # to create the same maps, seed must be provided + env1 = ScriptedAgentTestEnv(config1, seed=RANDOM_SEED) + env2 = ScriptedAgentTestEnv(config2, seed=RANDOM_SEED) envs = [env1, env2] - init_obs = [env.reset(seed=RANDOM_SEED) for env in envs] + init_obs = [env.reset(seed=RANDOM_SEED+1) for env in envs] self.assertTrue(observations_are_equal(init_obs[0], init_obs[0])) # sanity check self.assertTrue(observations_are_equal(init_obs[0], init_obs[1]), From 10d67a4ec425e1e7165e7735c4ac90814b39b838 Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 20 Jun 2023 20:42:00 +0900 Subject: [PATCH 030/113] checked flip seed in map generation --- tests/test_determinism.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/tests/test_determinism.py b/tests/test_determinism.py index 1a407ec65..7a80fd7cd 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -1,7 +1,9 @@ +# pylint: disable=invalid-name import unittest import numpy as np from tqdm import tqdm +import nmmo from nmmo.lib import seeding from tests.testhelpers import ScriptedAgentTestConfig, ScriptedAgentTestEnv from tests.testhelpers import observations_are_equal @@ -11,14 +13,6 @@ RANDOM_SEED = np.random.randint(0, 100000) -def rollout_with_seed(env, seed): - init_obs = env.reset(seed=seed) - for _ in tqdm(range(TEST_HORIZON)): - obs, _, _, _ = env.step({}) - event_log = env.realm.event_log.get_data() - - return init_obs, obs, event_log - class TestDeterminism(unittest.TestCase): def test_gym_np_random(self): _, _np_seed_1 = seeding.np_random(RANDOM_SEED) @@ -26,12 +20,10 @@ def test_gym_np_random(self): self.assertEqual(_np_seed_1, _np_seed_2) def test_map_determinism(self): - config = ScriptedAgentTestConfig() + config = nmmo.config.Default() config.MAP_FORCE_GENERATION = True - config.PATH_MAPS = 'maps/det0' map_generator = config.MAP_GENERATOR(config) - np_random1, _ = seeding.np_random(RANDOM_SEED) np_random2, _ = seeding.np_random(RANDOM_SEED) @@ -41,6 +33,25 @@ def test_map_determinism(self): self.assertTrue(np.array_equal(terrain1, terrain2)) self.assertTrue(np.array_equal(tiles1, tiles2)) + def test_flip_seed_map(self): + config1 = nmmo.config.Default() + config1.MAP_FORCE_GENERATION = True + config1.TERRAIN_FLIP_SEED = False + config2 = nmmo.config.Default() + config2.MAP_FORCE_GENERATION = True + config2.TERRAIN_FLIP_SEED = True + + map_generator1 = config1.MAP_GENERATOR(config1) + np_random1, _ = seeding.np_random(RANDOM_SEED) + map_generator2 = config1.MAP_GENERATOR(config2) + np_random2, _ = seeding.np_random(RANDOM_SEED) + + terrain1, tiles1 = map_generator1.generate_map(0, np_random1) + terrain2, tiles2 = map_generator2.generate_map(0, np_random2) + + self.assertFalse(np.array_equal(terrain1, terrain2)) + self.assertFalse(np.array_equal(tiles1, tiles2)) + def test_env_level_rng(self): # two envs running independently should return the same results From a2754326ca6e5a5dfb8e9ae32eaac090ccd9f8ca Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 20 Jun 2023 21:19:07 +0900 Subject: [PATCH 031/113] fixed pylint error --- tests/test_determinism.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/test_determinism.py b/tests/test_determinism.py index 7a80fd7cd..af08aa811 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -1,4 +1,3 @@ -# pylint: disable=invalid-name import unittest import numpy as np from tqdm import tqdm @@ -57,11 +56,11 @@ def test_env_level_rng(self): # config to always generate new maps, to test map determinism config1 = ScriptedAgentTestConfig() - config1.MAP_FORCE_GENERATION = True - config1.PATH_MAPS = 'maps/det1' + setattr(config1, 'MAP_FORCE_GENERATION', True) + setattr(config1, 'PATH_MAPS', 'maps/det1') config2 = ScriptedAgentTestConfig() - config2.MAP_FORCE_GENERATION = True - config2.PATH_MAPS = 'maps/det2' + setattr(config2, 'MAP_FORCE_GENERATION', True) + setattr(config2, 'PATH_MAPS', 'maps/det2') # to create the same maps, seed must be provided env1 = ScriptedAgentTestEnv(config1, seed=RANDOM_SEED) From c73080dac1a8dd739fb06033a22d786f100a77ed Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 20 Jun 2023 21:47:59 +0900 Subject: [PATCH 032/113] fixed deterministic map test --- tests/test_determinism.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tests/test_determinism.py b/tests/test_determinism.py index af08aa811..bacc46ee3 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -21,31 +21,25 @@ def test_gym_np_random(self): def test_map_determinism(self): config = nmmo.config.Default() config.MAP_FORCE_GENERATION = True + config.TERRAIN_FLIP_SEED = False map_generator = config.MAP_GENERATOR(config) np_random1, _ = seeding.np_random(RANDOM_SEED) - np_random2, _ = seeding.np_random(RANDOM_SEED) + np_random1_1, _ = seeding.np_random(RANDOM_SEED) terrain1, tiles1 = map_generator.generate_map(0, np_random1) - terrain2, tiles2 = map_generator.generate_map(0, np_random2) + terrain1_1, tiles1_1 = map_generator.generate_map(0, np_random1_1) - self.assertTrue(np.array_equal(terrain1, terrain2)) - self.assertTrue(np.array_equal(tiles1, tiles2)) + self.assertTrue(np.array_equal(terrain1, terrain1_1)) + self.assertTrue(np.array_equal(tiles1, tiles1_1)) - def test_flip_seed_map(self): - config1 = nmmo.config.Default() - config1.MAP_FORCE_GENERATION = True - config1.TERRAIN_FLIP_SEED = False + # test flip seed config2 = nmmo.config.Default() config2.MAP_FORCE_GENERATION = True config2.TERRAIN_FLIP_SEED = True - map_generator1 = config1.MAP_GENERATOR(config1) - np_random1, _ = seeding.np_random(RANDOM_SEED) - map_generator2 = config1.MAP_GENERATOR(config2) + map_generator2 = config2.MAP_GENERATOR(config2) np_random2, _ = seeding.np_random(RANDOM_SEED) - - terrain1, tiles1 = map_generator1.generate_map(0, np_random1) terrain2, tiles2 = map_generator2.generate_map(0, np_random2) self.assertFalse(np.array_equal(terrain1, terrain2)) From 5713a55f6138b7e3c8012283978224e308bedcce Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 21 Jun 2023 00:01:58 +0900 Subject: [PATCH 033/113] make sure obs matches obs_space --- nmmo/core/env.py | 104 ++++++++++++++++++++++------------ nmmo/core/observation.py | 6 +- tests/core/test_gym_spaces.py | 34 +++++++++++ 3 files changed, 104 insertions(+), 40 deletions(-) create mode 100644 tests/core/test_gym_spaces.py diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 19fdee3c4..472db24dd 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -12,6 +12,7 @@ from nmmo.core.config import Default from nmmo.core.observation import Observation from nmmo.core.tile import Tile +from nmmo.core import action as Action from nmmo.entity.entity import Entity from nmmo.systems.item import Item from nmmo.task import task_api @@ -48,33 +49,21 @@ def __init__(self, self.tasks = task_api.nmmo_default_task(self.possible_agents) self.agent_task_map = None - # pylint: disable=method-cache-max-size-none - @functools.lru_cache(maxsize=None) - def observation_space(self, agent: int): - '''Neural MMO Observation Space - - Args: - agent: Agent ID - - Returns: - observation: gym.spaces object contained the structured observation - for the specified agent. Each visible object is represented by - continuous and discrete vectors of attributes. A 2-layer attentional - encoder can be used to convert this structured observation into - a flat vector embedding.''' - + @functools.cached_property + def _obs_space(self): def box(rows, cols): return gym.spaces.Box( - low=-2**20, high=2**20, + low=-2**15, high=2**15-1, shape=(rows, cols), - dtype=np.float32) + dtype=np.int16) + def mask_box(length): + return gym.spaces.Box(low=0, high=1, shape=(length,), dtype=np.int8) obs_space = { - "CurrentTick": gym.spaces.Discrete(1), - "AgentId": gym.spaces.Discrete(1), + "CurrentTick": gym.spaces.Discrete(self.config.HORIZON+1), + "AgentId": gym.spaces.Discrete(self.config.PLAYER_N+1), "Tile": box(self.config.MAP_N_OBS, Tile.State.num_attributes), - "Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes), - } + "Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes)} if self.config.ITEM_SYSTEM_ENABLED: obs_space["Inventory"] = box(self.config.INVENTORY_N_OBS, Item.State.num_attributes) @@ -83,37 +72,78 @@ def box(rows, cols): obs_space["Market"] = box(self.config.MARKET_N_OBS, Item.State.num_attributes) if self.config.PROVIDE_ACTION_TARGETS: - obs_space['ActionTargets'] = self.action_space(None) + mask_spec = {} + mask_spec[Action.Move] = gym.spaces.Dict( + {Action.Direction: mask_box(len(Action.Direction.edges))}) + if self.config.COMBAT_SYSTEM_ENABLED: + mask_spec[Action.Attack] = gym.spaces.Dict({ + Action.Style: mask_box(3), + Action.Target: mask_box(self.config.PLAYER_N_OBS)}) + if self.config.ITEM_SYSTEM_ENABLED: + mask_spec[Action.Use] = gym.spaces.Dict( + {Action.InventoryItem: mask_box(self.config.INVENTORY_N_OBS)}) + mask_spec[Action.Destroy] = gym.spaces.Dict( + {Action.InventoryItem: mask_box(self.config.INVENTORY_N_OBS)}) + mask_spec[Action.Give] = gym.spaces.Dict({ + Action.InventoryItem: mask_box(self.config.INVENTORY_N_OBS), + Action.Target: mask_box(self.config.PLAYER_N_OBS)}) + if self.config.EXCHANGE_SYSTEM_ENABLED: + mask_spec[Action.Buy] = gym.spaces.Dict( + {Action.MarketItem: mask_box(self.config.MARKET_N_OBS)}) + mask_spec[Action.Sell] = gym.spaces.Dict({ + Action.InventoryItem: mask_box(self.config.INVENTORY_N_OBS), + Action.Price: mask_box(self.config.PRICE_N_OBS)}) + mask_spec[Action.GiveGold] = gym.spaces.Dict({ + Action.Price: mask_box(self.config.PRICE_N_OBS), + Action.Target: mask_box(self.config.PLAYER_N_OBS)}) + if self.config.COMMUNICATION_SYSTEM_ENABLED: + mask_spec[Action.Comm] = gym.spaces.Dict( + {Action.Token: mask_box(self.config.COMMUNICATION_NUM_TOKENS)}) + obs_space['ActionTargets'] = gym.spaces.Dict(mask_spec) return gym.spaces.Dict(obs_space) + # pylint: disable=method-cache-max-size-none @functools.lru_cache(maxsize=None) - def action_space(self, agent): - '''Neural MMO Action Space + def observation_space(self, agent: AgentID): + '''Neural MMO Observation Space Args: agent: Agent ID Returns: - actions: gym.spaces object contained the structured actions - for the specified agent. Each action is parameterized by a list - of discrete-valued arguments. These consist of both fixed, k-way - choices (such as movement direction) and selections from the - observation space (such as targeting)''' + observation: gym.spaces object contained the structured observation + for the specified agent.''' + return self._obs_space + @functools.cached_property + def _atn_space(self): actions = {} for atn in sorted(nmmo.Action.edges(self.config)): if atn.enabled(self.config): - actions[atn] = {} for arg in sorted(atn.edges): n = arg.N(self.config) actions[atn][arg] = gym.spaces.Discrete(n) - actions[atn] = gym.spaces.Dict(actions[atn]) - return gym.spaces.Dict(actions) + # pylint: disable=method-cache-max-size-none + @functools.lru_cache(maxsize=None) + def action_space(self, agent: AgentID): + '''Neural MMO Action Space + + Args: + agent: Agent ID + + Returns: + actions: gym.spaces object contained the structured actions + for the specified agent. Each action is parameterized by a list + of discrete-valued arguments. These consist of both fixed, k-way + choices (such as movement direction) and selections from the + observation space (such as targeting)''' + return self._atn_space + ############################################################################ # Core API @@ -369,10 +399,10 @@ def _compute_scripted_agent_actions(self, actions: Dict[int, Dict[str, Dict[str, return actions def _make_dummy_obs(self): - dummy_tiles = np.zeros((0, len(Tile.State.attr_name_to_col))) - dummy_entities = np.zeros((0, len(Entity.State.attr_name_to_col))) - dummy_inventory = np.zeros((0, len(Item.State.attr_name_to_col))) - dummy_market = np.zeros((0, len(Item.State.attr_name_to_col))) + dummy_tiles = np.zeros((0, len(Tile.State.attr_name_to_col)), dtype=np.int16) + dummy_entities = np.zeros((0, len(Entity.State.attr_name_to_col)), dtype=np.int16) + dummy_inventory = np.zeros((0, len(Item.State.attr_name_to_col)), dtype=np.int16) + dummy_market = np.zeros((0, len(Item.State.attr_name_to_col)), dtype=np.int16) return Observation(self.config, self.realm.tick, 0, dummy_tiles, dummy_entities, dummy_inventory, dummy_market) @@ -381,7 +411,7 @@ def _compute_observations(self): market = Item.Query.for_sale(self.realm.datastore) # get tile map, to bypass the expensive tile window query - tile_map = Tile.Query.get_map(self.realm.datastore, self.config.MAP_SIZE) + tile_map = Tile.Query.get_map(self.realm.datastore, self.config.MAP_SIZE).astype(np.int16) radius = self.config.PLAYER_VISION_RADIUS tile_obs_size = ((2*radius+1)**2, len(Tile.State.attr_name_to_col)) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index ee2c88b38..5103c0d3b 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -115,14 +115,14 @@ def agent(self): def get_empty_obs(self): gym_obs = { - "CurrentTick": np.array([self.current_tick]), - "AgentId": np.array([self.agent_id]), + "CurrentTick": self.current_tick, + "AgentId": self.agent_id, "Tile": None, # np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])), "Entity": np.zeros((self.config.PLAYER_N_OBS, self.entities.values.shape[1]), dtype=np.int16)} if self.config.ITEM_SYSTEM_ENABLED: gym_obs["Inventory"] = np.zeros((self.config.INVENTORY_N_OBS, - self.inventory.values.shape[1])) + self.inventory.values.shape[1]), dtype=np.int16) if self.config.EXCHANGE_SYSTEM_ENABLED: gym_obs["Market"] = np.zeros((self.config.MARKET_N_OBS, self.market.values.shape[1]), dtype=np.int16) diff --git a/tests/core/test_gym_spaces.py b/tests/core/test_gym_spaces.py new file mode 100644 index 000000000..e248e7c1f --- /dev/null +++ b/tests/core/test_gym_spaces.py @@ -0,0 +1,34 @@ +import unittest + +import nmmo + +class TestGymSpaces(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.config = nmmo.config.Default() + cls.env = nmmo.Env(cls.config) + cls.env.reset(seed=1) + for _ in range(3): + cls.env.step({}) + + def test_obs_space(self): + obs_spec = self.env.observation_space(1) + obs, _, _, _ = self.env.step({}) + + for agent_obs in obs.values(): + for key, val in agent_obs.items(): + if key != 'ActionTargets': + self.assertTrue(obs_spec[key].contains(val), + f"Invalid obs format -- key: {key}, val: {val}") + + if 'ActionTargets' in agent_obs: + val = agent_obs['ActionTargets'] + for atn in nmmo.Action.edges(self.config): + if atn.enabled(self.config): + for arg in atn.edges: # pylint: disable=not-an-iterable + self.assertTrue(obs_spec['ActionTargets'][atn][arg].contains(val[atn][arg]), + f"Invalid obs format -- key: {atn}/{arg}, val: {val[atn][arg]}") + + +if __name__ == '__main__': + unittest.main() From 2ad12173a66be71ddd5334660b164a6acd281dc0 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 21 Jun 2023 12:38:35 +0900 Subject: [PATCH 034/113] changed to _np_random, call self.seed(seed) --- nmmo/core/env.py | 28 ++++++++++++++++++---------- nmmo/core/map.py | 8 ++++---- nmmo/core/realm.py | 22 +++++++++++----------- nmmo/core/tile.py | 10 +++++----- nmmo/entity/entity.py | 5 ++++- nmmo/entity/entity_manager.py | 32 ++++++++++++++++---------------- nmmo/entity/npc.py | 12 +++++++----- nmmo/entity/player.py | 2 +- nmmo/systems/ai/behavior.py | 12 +++++++----- nmmo/systems/droptable.py | 5 ++++- scripted/baselines.py | 26 +++++++++++++++----------- tests/core/test_tile.py | 7 ++++--- tests/entity/test_entity.py | 2 +- 13 files changed, 97 insertions(+), 74 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 19fdee3c4..da28bf82b 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -26,12 +26,14 @@ class Env(ParallelEnv): def __init__(self, config: Default = nmmo.config.Default(), seed = None): - self.np_random, self._np_seed = seeding.np_random(seed) + self._np_random = None + self._np_seed = None + self._reset_required = True + self.seed(seed) super().__init__() self.config = config - self.realm = realm.Realm(config, self.np_random) - self._reset_required = True + self.realm = realm.Realm(config, self._np_random) self.obs = None self._dummy_obs = None @@ -142,9 +144,8 @@ def reset(self, map_id=None, seed=None, options=None, but finite horizon: ~1000 timesteps for small maps and 5000+ timesteps for large maps ''' - if seed is not None: - self.np_random, self._np_seed = seeding.np_random(seed) - self.realm.reset(self.np_random, map_id) + self.seed(seed) + self.realm.reset(self._np_random, map_id) self._agents = list(self.realm.players.keys()) self._dead_agents = set() self._episode_stats.clear() @@ -154,7 +155,7 @@ def reset(self, map_id=None, seed=None, options=None, for eid, ent in self.realm.players.items(): if isinstance(ent.agent, Scripted): self.scripted_agents.add(eid) - ent.agent.np_random = self.np_random + ent.agent.set_rng(self._np_random) self._dummy_obs = self._make_dummy_obs() self.obs = self._compute_observations() @@ -467,9 +468,16 @@ def close(self): '''For conformity with the PettingZoo API only; rendering is external''' def seed(self, seed=None): - '''Reseeds the environment. reset() must be called after seed(), and before step().''' - self.np_random, self._np_seed = seeding.np_random(seed) - self._reset_required = True + '''Reseeds the environment. reset() must be called after seed(), and before step(). + - self._np_seed is None: seed() has not been called, e.g. __init__() -> new RNG + - self._np_seed is set, and seed is not None: seed() or reset() with seed -> new RNG + + If self._np_seed is set, but seed is None + probably called from reset() without seed, so don't change the RNG + ''' + if self._np_seed is None or seed is not None: + self._np_random, self._np_seed = seeding.np_random(seed) + self._reset_required = True def state(self) -> np.ndarray: raise NotImplementedError diff --git a/nmmo/core/map.py b/nmmo/core/map.py index 4243febc8..7e5b7e389 100644 --- a/nmmo/core/map.py +++ b/nmmo/core/map.py @@ -12,7 +12,7 @@ class Map: Also tracks a sparse list of tile updates ''' - def __init__(self, config, realm): + def __init__(self, config, realm, np_random): self.config = config self._repr = None self.realm = realm @@ -23,7 +23,7 @@ def __init__(self, config, realm): for r in range(sz): for c in range(sz): - self.tiles[r, c] = Tile(realm, r, c) + self.tiles[r, c] = Tile(realm, r, c, np_random) @property def packet(self): @@ -41,7 +41,7 @@ def repr(self): return self._repr - def reset(self, map_id): + def reset(self, map_id, np_random): '''Reuse the current tile objects to load a new map''' config = self.config self.update_list = OrderedSet() # critical for determinism @@ -61,7 +61,7 @@ def reset(self, map_id): for c, idx in enumerate(row): mat = materials[idx] tile = self.tiles[r, c] - tile.reset(mat, config) + tile.reset(mat, config, np_random) assert c == config.MAP_SIZE - 1 assert r == config.MAP_SIZE - 1 diff --git a/nmmo/core/realm.py b/nmmo/core/realm.py index 0c8a7152d..6cefb19f5 100644 --- a/nmmo/core/realm.py +++ b/nmmo/core/realm.py @@ -30,7 +30,7 @@ class Realm: def __init__(self, config, np_random): self.config = config - self.np_random = np_random # rng + self._np_random = np_random # rng assert isinstance( config, nmmo.config.Config ), f"Config {config} is not a config instance (did you pass the class?)" @@ -40,7 +40,7 @@ def __init__(self, config, np_random): # Generate maps if they do not exist # NOTE: Map generation interferes with determinism. # To ensure determinism, provide seed to env.reset() - config.MAP_GENERATOR(config).generate_all_maps(np_random) + config.MAP_GENERATOR(config).generate_all_maps(self._np_random) self.datastore = NumpyDatastore() for s in [TileState, EntityState, ItemState, EventState]: @@ -50,14 +50,14 @@ def __init__(self, config, np_random): self.exchange = None # Load the world file - self.map = Map(config, self) + self.map = Map(config, self, self._np_random) self.log_helper = LogHelper.create(self) self.event_log = EventLogger(self) # Entity handlers - self.players = PlayerManager(self) - self.npcs = NPCManager(self) + self.players = PlayerManager(self, self._np_random) + self.npcs = NPCManager(self, self._np_random) # Global item registry self.items = {} @@ -74,17 +74,17 @@ def reset(self, np_random, map_id: int = None): Args: idx: Map index to load """ - self.np_random = np_random + self._np_random = np_random self.log_helper.reset() self.event_log.reset() - map_id = map_id or self.np_random.integers(self.config.MAP_N) + 1 - self.map.reset(map_id) + map_id = map_id or self._np_random.integers(self.config.MAP_N) + 1 + self.map.reset(map_id, self._np_random) self.tick = 0 # EntityState and ItemState tables must be empty after players/npcs.reset() - self.players.reset() - self.npcs.reset() + self.players.reset(self._np_random) + self.npcs.reset(self._np_random) assert EntityState.State.table(self.datastore).is_empty(), \ "EntityState table is not empty" assert ItemState.State.table(self.datastore).is_empty(), \ @@ -172,7 +172,7 @@ def step(self, actions): # TODO: we should be randomizing these, otherwise the lower ID agents # will always go first. --> ONLY SHUFFLE BUY if priority == Buy.priority: - self.np_random.shuffle(merged[priority]) + self._np_random.shuffle(merged[priority]) # CHECK ME: do we need this line? # ent_id, (atn, args) = merged[priority][0] diff --git a/nmmo/core/tile.py b/nmmo/core/tile.py index c931942cd..9d9cb33ca 100644 --- a/nmmo/core/tile.py +++ b/nmmo/core/tile.py @@ -28,11 +28,11 @@ ) class Tile(TileState): - def __init__(self, realm, r, c): + def __init__(self, realm, r, c, np_random): super().__init__(realm.datastore, TileState.Limits(realm.config)) self.realm = realm self.config = realm.config - self.np_random = realm.np_random + self._np_random = np_random self.row.update(r) self.col.update(c) @@ -64,8 +64,8 @@ def impassible(self): def void(self): return self.material == material.Void - def reset(self, mat, config): - self.np_random = self.realm.np_random # reset the RNG + def reset(self, mat, config, np_random): + self._np_random = np_random # reset the RNG self.state = mat(config) self.material = mat(config) self.material_id.update(self.state.index) @@ -84,7 +84,7 @@ def remove_entity(self, ent_id): del self.entities[ent_id] def step(self): - if not self.depleted or self.np_random.random() > self.material.respawn: + if not self.depleted or self._np_random.random() > self.material.respawn: return self.depleted = False diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py index e7e079643..7fda6f896 100644 --- a/nmmo/entity/entity.py +++ b/nmmo/entity/entity.py @@ -218,7 +218,10 @@ def __init__(self, realm, pos, entity_id, name): self.realm = realm self.config: Config = realm.config - self.np_random = realm.np_random + # TODO: do not access realm._np_random directly + # related to the whole NPC, scripted logic + # pylint: disable=protected-access + self._np_random = realm._np_random self.policy = name self.entity_id = entity_id diff --git a/nmmo/entity/entity_manager.py b/nmmo/entity/entity_manager.py index 927bdaebb..9d24be2d4 100644 --- a/nmmo/entity/entity_manager.py +++ b/nmmo/entity/entity_manager.py @@ -9,11 +9,11 @@ class EntityGroup(Mapping): - def __init__(self, realm): + def __init__(self, realm, np_random): self.datastore = realm.datastore self.realm = realm self.config = realm.config - self.np_random = realm.np_random + self._np_random = np_random self.entities: Dict[int, Entity] = {} self.dead_this_tick: Dict[int, Entity] = {} @@ -41,8 +41,8 @@ def corporeal(self): def packet(self): return {k: v.packet() for k, v in self.corporeal.items()} - def reset(self): - self.np_random = self.realm.np_random # reset the RNG + def reset(self, np_random): + self._np_random = np_random # reset the RNG for ent in self.entities.values(): # destroy the items if self.config.ITEM_SYSTEM_ENABLED: @@ -86,13 +86,13 @@ def update(self, actions): class NPCManager(EntityGroup): - def __init__(self, realm): - super().__init__(realm) + def __init__(self, realm, np_random): + super().__init__(realm, np_random) self.next_id = -1 self.spawn_dangers = [] - def reset(self): - super().reset() + def reset(self, np_random): + super().reset(np_random) self.next_id = -1 self.spawn_dangers = [] @@ -108,14 +108,14 @@ def spawn(self): if self.spawn_dangers: danger = self.spawn_dangers[-1] - r, c = combat.spawn(config, danger, self.np_random) + r, c = combat.spawn(config, danger, self._np_random) else: center = config.MAP_CENTER border = self.config.MAP_BORDER # pylint: disable=unbalanced-tuple-unpacking - r, c = self.np_random.integers(border, center+border, 2).tolist() + r, c = self._np_random.integers(border, center+border, 2).tolist() - npc = NPC.spawn(self.realm, (r, c), self.next_id) + npc = NPC.spawn(self.realm, (r, c), self.next_id, self._np_random) if npc: super().spawn(npc) self.next_id -= 1 @@ -137,15 +137,15 @@ def actions(self, realm): return actions class PlayerManager(EntityGroup): - def __init__(self, realm): - super().__init__(realm) + def __init__(self, realm, np_random): + super().__init__(realm, np_random) self.loader_class = self.realm.config.PLAYER_LOADER self._agent_loader: spawn.SequentialLoader = None self.spawned = None - def reset(self): - super().reset() - self._agent_loader = self.loader_class(self.config, self.np_random) + def reset(self, np_random): + super().reset(np_random) + self._agent_loader = self.loader_class(self.config, self._np_random) self.spawned = set() def spawn_individual(self, r, c, idx): diff --git a/nmmo/entity/npc.py b/nmmo/entity/npc.py index eceb989c1..16ac75536 100644 --- a/nmmo/entity/npc.py +++ b/nmmo/entity/npc.py @@ -82,8 +82,10 @@ def receive_damage(self, source, dmg): return False + # NOTE: passing np_random here is a hack + # Ideally, it should be passed to __init__ and also used in action generation @staticmethod - def spawn(realm, pos, iden): + def spawn(realm, pos, iden, np_random): config = realm.config # check the position @@ -104,7 +106,7 @@ def spawn(realm, pos, iden): ent.spawn_danger = danger # Select combat focus - style = realm.np_random.choice((Action.Melee, Action.Range, Action.Mage)) + style = np_random.choice((Action.Melee, Action.Range, Action.Mage)) ent.skills.style = style # Compute level @@ -131,7 +133,7 @@ def spawn(realm, pos, iden): # Equipment to instantiate if config.EQUIPMENT_SYSTEM_ENABLED: - lvl = level - realm.np_random.random() + lvl = level - np_random.random() ilvl = int(5 * lvl) offense = int(config.NPC_BASE_DAMAGE + lvl*config.NPC_LEVEL_DAMAGE) @@ -140,11 +142,11 @@ def spawn(realm, pos, iden): ent.equipment = Equipment(ilvl, offense, offense, offense, defense, defense, defense) armor = [Item.Hat, Item.Top, Item.Bottom] - ent.droptable.add(realm.np_random.choice(armor)) + ent.droptable.add(np_random.choice(armor)) if config.PROFESSION_SYSTEM_ENABLED: tools = [Item.Rod, Item.Gloves, Item.Pickaxe, Item.Axe, Item.Chisel] - ent.droptable.add(realm.np_random.choice(tools)) + ent.droptable.add(np_random.choice(tools)) return ent diff --git a/nmmo/entity/player.py b/nmmo/entity/player.py index eb9d864ff..8cac01629 100644 --- a/nmmo/entity/player.py +++ b/nmmo/entity/player.py @@ -71,7 +71,7 @@ def receive_damage(self, source, dmg): # TODO: make source receive the highest-level items first # because source cannot take it if the inventory is full item_list = list(self.inventory.items) - self.np_random.shuffle(item_list) + self._np_random.shuffle(item_list) for item in item_list: self.inventory.remove(item) diff --git a/nmmo/systems/ai/behavior.py b/nmmo/systems/ai/behavior.py index aa60e7f62..95e3641b2 100644 --- a/nmmo/systems/ai/behavior.py +++ b/nmmo/systems/ai/behavior.py @@ -1,3 +1,4 @@ +# TODO: try to remove the below line # pylint: disable=all import numpy as np @@ -23,8 +24,9 @@ def update(entity): entity.water = None def pathfind(realm, actions, entity, target): + # TODO: do not access realm._np_random directly. ALSO see below for all other uses actions[nmmo.action.Move] = { - nmmo.action.Direction: move.pathfind(realm.map.tiles, entity, target, realm.np_random)} + nmmo.action.Direction: move.pathfind(realm.map.tiles, entity, target, realm._np_random)} def explore(realm, actions, entity): sz = realm.config.TERRAIN_SIZE @@ -44,12 +46,12 @@ def explore(realm, actions, entity): def meander(realm, actions, entity): actions[nmmo.action.Move] = { - nmmo.action.Direction: move.habitable(realm.map.tiles, entity, realm.np_random)} + nmmo.action.Direction: move.habitable(realm.map.tiles, entity, realm._np_random)} def evade(realm, actions, entity): actions[nmmo.action.Move] = { nmmo.action.Direction: move.antipathfind(realm.map.tiles, entity, entity.attacker, - realm.np_random)} + realm._np_random)} def hunt(realm, actions, entity): #Move args @@ -57,9 +59,9 @@ def hunt(realm, actions, entity): direction = None if distance == 0: - direction = move.random_direction(realm.np_random) + direction = move.random_direction(realm._np_random) elif distance > 1: - direction = move.pathfind(realm.map.tiles, entity, entity.target, realm.np_random) + direction = move.pathfind(realm.map.tiles, entity, entity.target, realm._np_random) if direction is not None: actions[nmmo.action.Move] = {nmmo.action.Direction: direction} diff --git a/nmmo/systems/droptable.py b/nmmo/systems/droptable.py index 729317b52..7d8728474 100644 --- a/nmmo/systems/droptable.py +++ b/nmmo/systems/droptable.py @@ -11,7 +11,10 @@ def __init__(self, item, prob): self.prob = prob def roll(self, realm, level): - if realm.np_random.random() < self.prob: + # TODO: do not access realm._np_random directly + # related to skill.py, all harvest skills + # pylint: disable=protected-access + if realm._np_random.random() < self.prob: return self.item(realm, level) return None diff --git a/scripted/baselines.py b/scripted/baselines.py index ad77c0a4a..f2fd358bc 100644 --- a/scripted/baselines.py +++ b/scripted/baselines.py @@ -1,9 +1,9 @@ +# TODO: try to remove the below line # pylint: disable=all from typing import Dict from collections import defaultdict -import random import nmmo from nmmo import material @@ -27,7 +27,7 @@ def __init__(self, config, idx): config : A forge.blade.core.Config object or subclass object ''' super().__init__(config, idx) - self.np_random = None + self._np_random = None self.health_max = config.PLAYER_BASE_HEALTH if config.RESOURCE_SYSTEM_ENABLED: @@ -37,6 +37,9 @@ def __init__(self, config, idx): self.spawnR = None self.spawnC = None + def set_rng(self, np_random): + self._np_random = np_random + @property def policy(self): return self.__class__.__name__ @@ -49,16 +52,17 @@ def forage_criterion(self) -> bool: def forage(self): '''Min/max food and water using Dijkstra's algorithm''' + # TODO: do not access realm._np_random directly. ALSO see below for all other uses move.forageDijkstra(self.config, self.ob, self.actions, - self.food_max, self.water_max, self.np_random) + self.food_max, self.water_max, self._np_random) def gather(self, resource): '''BFS search for a particular resource''' - return move.gatherBFS(self.config, self.ob, self.actions, resource, self.np_random) + return move.gatherBFS(self.config, self.ob, self.actions, resource, self._np_random) def explore(self): '''Route away from spawn''' - move.explore(self.config, self.ob, self.actions, self.me.row, self.me.col, self.np_random) + move.explore(self.config, self.ob, self.actions, self.me.row, self.me.col, self._np_random) @property def downtime(self): @@ -67,7 +71,7 @@ def downtime(self): def evade(self): '''Target and path away from an attacker''' - move.evade(self.config, self.ob, self.actions, self.attacker, self.np_random) + move.evade(self.config, self.ob, self.actions, self.attacker, self._np_random) self.target = self.attacker self.targetID = self.attackerID self.targetDist = self.attackerDist @@ -76,7 +80,7 @@ def attack(self): '''Attack the current target''' if self.target is not None: assert self.targetID is not None - style = self.np_random.choice(self.style) + style = self._np_random.choice(self.style) attack.target(self.config, self.actions, style, self.targetID) def target_weak(self): @@ -279,7 +283,7 @@ def buy(self, buy_k: dict, buy_upgrade: set): purchase = None best = list(self.best_heuristic.items()) - self.np_random.shuffle(best) + self._np_random.shuffle(best) for type_id, itm in best: # Buy top k if type_id in buy_k: @@ -313,7 +317,7 @@ def use(self): def __call__(self, observation: Observation): '''Process observations and return actions''' - assert self.np_random is not None, "Agent's RNG must be set." + assert self._np_random is not None, "Agent's RNG must be set." self.actions = {} self.ob = observation @@ -361,7 +365,7 @@ class Random(Scripted): def __call__(self, obs): super().__call__(obs) - move.rand(self.config, self.ob, self.actions, self.np_random) + move.rand(self.config, self.ob, self.actions, self._np_random) return self.actions class Meander(Scripted): @@ -369,7 +373,7 @@ class Meander(Scripted): def __call__(self, obs): super().__call__(obs) - move.meander(self.config, self.ob, self.actions, self.np_random) + move.meander(self.config, self.ob, self.actions, self._np_random) return self.actions class Explore(Scripted): diff --git a/tests/core/test_tile.py b/tests/core/test_tile.py index f49f78537..f73dd3ad8 100644 --- a/tests/core/test_tile.py +++ b/tests/core/test_tile.py @@ -11,7 +11,7 @@ def __init__(self): self.datastore = NumpyDatastore() self.datastore.register_object_type("Tile", TileState.State.num_attributes) self.config = nmmo.config.Small() - self.np_random = np.random + self._np_random = np.random class MockEntity(): def __init__(self, ent_id): @@ -21,9 +21,10 @@ class TestTile(unittest.TestCase): # pylint: disable=no-member def test_tile(self): mock_realm = MockRealm() - tile = Tile(mock_realm, 10, 20) + np_random = np.random + tile = Tile(mock_realm, 10, 20, np_random) - tile.reset(material.Foilage, nmmo.config.Small()) + tile.reset(material.Foilage, nmmo.config.Small(), np_random) self.assertEqual(tile.row.val, 10) self.assertEqual(tile.col.val, 20) diff --git a/tests/entity/test_entity.py b/tests/entity/test_entity.py index d4b368a7e..952e10696 100644 --- a/tests/entity/test_entity.py +++ b/tests/entity/test_entity.py @@ -11,7 +11,7 @@ def __init__(self): self.config.PLAYERS = range(100) self.datastore = NumpyDatastore() self.datastore.register_object_type("Entity", EntityState.State.num_attributes) - self.np_random = np.random + self._np_random = np.random # pylint: disable=no-member class TestEntity(unittest.TestCase): From 8696b2d4fc91d7f048b5fc9b138df79ba08383c8 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 21 Jun 2023 15:43:16 +0900 Subject: [PATCH 035/113] ensure obs match the obs space, set ds dtype to int16 --- nmmo/core/env.py | 2 +- nmmo/core/observation.py | 10 +++---- nmmo/datastore/numpy_datastore.py | 2 +- nmmo/lib/event_log.py | 11 ++++--- nmmo/task/game_state.py | 6 ++-- tests/core/test_env.py | 2 +- ...t_gym_spaces.py => test_gym_obs_spaces.py} | 4 +-- tests/core/test_observation_tile.py | 13 ++++---- tests/test_eventlog.py | 30 +++++++++---------- 9 files changed, 41 insertions(+), 39 deletions(-) rename tests/core/{test_gym_spaces.py => test_gym_obs_spaces.py} (93%) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 93b2e9172..208fa6094 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -412,7 +412,7 @@ def _compute_observations(self): market = Item.Query.for_sale(self.realm.datastore) # get tile map, to bypass the expensive tile window query - tile_map = Tile.Query.get_map(self.realm.datastore, self.config.MAP_SIZE).astype(np.int16) + tile_map = Tile.Query.get_map(self.realm.datastore, self.config.MAP_SIZE) radius = self.config.PLAYER_VISION_RADIUS tile_obs_size = ((2*radius+1)**2, len(Tile.State.attr_name_to_col)) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 5103c0d3b..82d9660b2 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -1,4 +1,4 @@ -from functools import lru_cache +from functools import lru_cache, cached_property import numpy as np @@ -15,7 +15,7 @@ def __init__(self, values, id_col): self.values = values self.ids = values[:, id_col] - @property + @cached_property def len(self): return len(self.ids) @@ -104,7 +104,7 @@ def tile(self, r_delta, c_delta): @lru_cache(maxsize=None) def entity(self, entity_id): rows = self.entities.values[self.entities.ids == entity_id] - if rows.size == 0: + if rows.shape[0] == 0: return None return EntityState.parse_array(rows[0]) @@ -257,8 +257,8 @@ def _make_use_mask(self): # level limits are differently applied depending on item types type_flt = np.tile(np.array(list(item_skill.keys())), (self.inventory.len,1)) level_flt = np.tile(np.array(list(item_skill.values())), (self.inventory.len,1)) - item_type = np.tile(np.transpose(np.atleast_2d(item_type)), (1, len(item_skill))) - item_level = np.tile(np.transpose(np.atleast_2d(item_level)), (1, len(item_skill))) + item_type = np.tile(np.transpose(np.atleast_2d(item_type)), (1,len(item_skill))) + item_level = np.tile(np.transpose(np.atleast_2d(item_level)), (1,len(item_skill))) level_satisfied = np.any((item_type==type_flt) & (item_level<=level_flt), axis=1) use_mask[:self.inventory.len] = not_listed & level_satisfied diff --git a/nmmo/datastore/numpy_datastore.py b/nmmo/datastore/numpy_datastore.py index e737ad9cd..2bced2d46 100644 --- a/nmmo/datastore/numpy_datastore.py +++ b/nmmo/datastore/numpy_datastore.py @@ -6,7 +6,7 @@ class NumpyTable(DataTable): - def __init__(self, num_columns: int, initial_size: int, dtype=np.float32): + def __init__(self, num_columns: int, initial_size: int, dtype=np.int16): super().__init__(num_columns) self._dtype = dtype self._initial_size = initial_size diff --git a/nmmo/lib/event_log.py b/nmmo/lib/event_log.py index cfb70dfd8..48772d840 100644 --- a/nmmo/lib/event_log.py +++ b/nmmo/lib/event_log.py @@ -11,7 +11,7 @@ # pylint: disable=no-member EventState = SerializedState.subclass("Event", [ - "id", # unique event id + "recorded", # event_log is write-only, no update or delete, so no need for row id "ent_id", "tick", @@ -27,8 +27,7 @@ EventAttr = EventState.State.attr_name_to_col EventState.Query = SimpleNamespace( - table=lambda ds: ds.table("Event").where_neq(EventAttr["id"], 0), - + table=lambda ds: ds.table("Event").where_eq(EventAttr["recorded"], 1), by_event=lambda ds, event_code: ds.table("Event").where_eq( EventAttr["event"], event_code), ) @@ -70,7 +69,7 @@ def reset(self): # define event logging def _create_event(self, entity: Entity, event_code: int): log = EventState(self.datastore) - log.id.update(log.datastore_record.id) + log.recorded.update(1) log.ent_id.update(entity.ent_id) # the tick increase by 1 after executing all actions log.tick.update(self.realm.tick+1) @@ -156,9 +155,9 @@ def record(self, event_code: int, entity: Entity, **kwargs): def get_data(self, event_code=None, agents: List[int]=None): if event_code is None: - event_data = EventState.Query.table(self.datastore).astype(np.int32) + event_data = EventState.Query.table(self.datastore) elif event_code in self.valid_events: - event_data = EventState.Query.by_event(self.datastore, event_code).astype(np.int32) + event_data = EventState.Query.by_event(self.datastore, event_code) else: return None diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 410c56946..36c6b3807 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -221,11 +221,11 @@ def __init__(self, realm: Realm, config: Config): def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: # copy the datastore, by running astype - entity_all = EntityState.Query.table(realm.datastore).astype(np.int16) + entity_all = EntityState.Query.table(realm.datastore).copy() alive_agents = entity_all[:, EntityAttr["id"]] alive_agents = set(alive_agents[alive_agents > 0]) - item_data = ItemState.Query.table(realm.datastore).astype(np.int16) - event_data = EventState.Query.table(realm.datastore).astype(np.int16) + item_data = ItemState.Query.table(realm.datastore).copy() + event_data = EventState.Query.table(realm.datastore).copy() return GameState( current_tick = realm.tick, diff --git a/tests/core/test_env.py b/tests/core/test_env.py index 710a9d7b0..2c647f39a 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -87,7 +87,7 @@ def test_observations(self): dead_agents.add(dead_id) # check dead and alive - entity_all = EntityState.Query.table(self.env.realm.datastore).astype(np.int16) + entity_all = EntityState.Query.table(self.env.realm.datastore) alive_agents = entity_all[:, Entity.State.attr_name_to_col["id"]] alive_agents = set(alive_agents[alive_agents > 0]) for agent_id in alive_agents: diff --git a/tests/core/test_gym_spaces.py b/tests/core/test_gym_obs_spaces.py similarity index 93% rename from tests/core/test_gym_spaces.py rename to tests/core/test_gym_obs_spaces.py index e248e7c1f..79610af09 100644 --- a/tests/core/test_gym_spaces.py +++ b/tests/core/test_gym_obs_spaces.py @@ -2,7 +2,7 @@ import nmmo -class TestGymSpaces(unittest.TestCase): +class TestGymObsSpaces(unittest.TestCase): @classmethod def setUpClass(cls): cls.config = nmmo.config.Default() @@ -11,7 +11,7 @@ def setUpClass(cls): for _ in range(3): cls.env.step({}) - def test_obs_space(self): + def test_gym_obs_space(self): obs_spec = self.env.observation_space(1) obs, _, _, _ = self.env.step({}) diff --git a/tests/core/test_observation_tile.py b/tests/core/test_observation_tile.py index 9585e73ff..828d580c4 100644 --- a/tests/core/test_observation_tile.py +++ b/tests/core/test_observation_tile.py @@ -153,7 +153,7 @@ def where_in_1d_with_index(event_data, subject, index): flt_idx = [row for sbj in subject for row in index.get(sbj,[])] return event_data[flt_idx] - event_data = EventState.Query.table(env.realm.datastore).astype(np.int16) + event_data = EventState.Query.table(env.realm.datastore) event_index = defaultdict() for row, id_ in enumerate(event_data[:,EventAttr['ent_id']]): if id_ in event_index: @@ -163,10 +163,13 @@ def where_in_1d_with_index(event_data, subject, index): # NOTE: the index-based approach returns the data in different order, # and all the operations in the task system don't use the order info - arr = where_in_1d_with_index(event_data, [1,2,3], event_index) - sorted_idx = np.argsort(arr[:,0]) # event_id - self.assertTrue(np.array_equal(correct_where_in_1d(event_data, [1,2,3]), - arr[sorted_idx])) + def sort_event_data(event_data): + keys = [event_data[:,i] for i in range(1,8)] + sorted_idx = np.lexsort(keys) + return event_data[sorted_idx] + arr1 = sort_event_data(correct_where_in_1d(event_data, [1,2,3])) + arr2 = sort_event_data(where_in_1d_with_index(event_data, [1,2,3], event_index)) + self.assertTrue(np.array_equal(arr1, arr2)) print('---test_gs_where_in_1d---') print('reference:', timeit( diff --git a/tests/test_eventlog.py b/tests/test_eventlog.py index 50b7d6abb..d8b0405e1 100644 --- a/tests/test_eventlog.py +++ b/tests/test_eventlog.py @@ -78,21 +78,21 @@ def test_event_logging(self): log_data = [list(row) for row in event_log.get_data()] self.assertListEqual(log_data, [ - [ 1, 1, 1, EventCode.EAT_FOOD, 0, 0, 0, 0, 0], - [ 2, 2, 1, EventCode.DRINK_WATER, 0, 0, 0, 0, 0], - [ 3, 2, 1, EventCode.SCORE_HIT, 1, 0, 50, 0, 0], - [ 4, 3, 1, EventCode.PLAYER_KILL, 0, 5, 0, 0, 5], - [ 5, 4, 2, EventCode.CONSUME_ITEM, 16, 8, 1, 0, 0], - [ 6, 4, 2, EventCode.GIVE_ITEM, 0, 0, 0, 0, 0], - [ 7, 5, 2, EventCode.DESTROY_ITEM, 0, 0, 0, 0, 0], - [ 8, 6, 2, EventCode.HARVEST_ITEM, 13, 3, 1, 0, 0], - [ 9, 7, 3, EventCode.GIVE_GOLD, 0, 0, 0, 0, 0], - [10, 8, 3, EventCode.LIST_ITEM, 16, 5, 1, 11, 0], - [11, 9, 3, EventCode.EARN_GOLD, 0, 0, 0, 15, 0], - [12, 10, 3, EventCode.BUY_ITEM, 13, 7, 1, 21, 0], - [13, 12, 4, EventCode.LEVEL_UP, 4, 3, 0, 0, 0], - [14, 12, 5, EventCode.GO_FARTHEST, 0, 0, 6, 0, 0], - [15, 12, 5, EventCode.EQUIP_ITEM, 2, 4, 1, 0, 0]]) + [1, 1, 1, EventCode.EAT_FOOD, 0, 0, 0, 0, 0], + [1, 2, 1, EventCode.DRINK_WATER, 0, 0, 0, 0, 0], + [1, 2, 1, EventCode.SCORE_HIT, 1, 0, 50, 0, 0], + [1, 3, 1, EventCode.PLAYER_KILL, 0, 5, 0, 0, 5], + [1, 4, 2, EventCode.CONSUME_ITEM, 16, 8, 1, 0, 0], + [1, 4, 2, EventCode.GIVE_ITEM, 0, 0, 0, 0, 0], + [1, 5, 2, EventCode.DESTROY_ITEM, 0, 0, 0, 0, 0], + [1, 6, 2, EventCode.HARVEST_ITEM, 13, 3, 1, 0, 0], + [1, 7, 3, EventCode.GIVE_GOLD, 0, 0, 0, 0, 0], + [1, 8, 3, EventCode.LIST_ITEM, 16, 5, 1, 11, 0], + [1, 9, 3, EventCode.EARN_GOLD, 0, 0, 0, 15, 0], + [1, 10, 3, EventCode.BUY_ITEM, 13, 7, 1, 21, 0], + [1, 12, 4, EventCode.LEVEL_UP, 4, 3, 0, 0, 0], + [1, 12, 5, EventCode.GO_FARTHEST, 0, 0, 6, 0, 0], + [1, 12, 5, EventCode.EQUIP_ITEM, 2, 4, 1, 0, 0]]) if __name__ == '__main__': unittest.main() From 12fbdabc000be43b9da1a782ae8c9d40d357a6f8 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 21 Jun 2023 16:59:48 +0900 Subject: [PATCH 036/113] fix the seed for testing --- tests/testhelpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testhelpers.py b/tests/testhelpers.py index cb7726dce..8fd9ef837 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -383,7 +383,7 @@ def profile_env_step(action_target=True, tasks=None, condition=None): config.PLAYERS = [baselines.Sleeper] # the scripted agents doing nothing config.IMMORTAL = True # otherwise the agents will die config.PROVIDE_ACTION_TARGETS = action_target - env = nmmo.Env(config) + env = nmmo.Env(config, seed=0) if tasks is None: tasks = [] env.reset(seed=0, make_task_fn=lambda: tasks) From cef5adf63b88303cce0341c7398530c95ee24e57 Mon Sep 17 00:00:00 2001 From: Hao Xiang Li Date: Mon, 26 Jun 2023 01:06:21 +0000 Subject: [PATCH 037/113] Added A* caching so results are not recalculated each step. Relies on the invariant that passable tiles remain constant - this is new in 2.0 from unit stacking. On 1024 steps, seed 5000, Medium, Terrain, Resource, Combat, NPC 0.16s improvement --- nmmo/core/map.py | 1 + nmmo/systems/ai/behavior.py | 8 ++++---- nmmo/systems/ai/move.py | 15 +++++++++------ nmmo/systems/ai/utils.py | 15 ++++++++++++--- tests/test_performance.py | 15 +++++++++++++++ 5 files changed, 41 insertions(+), 13 deletions(-) diff --git a/nmmo/core/map.py b/nmmo/core/map.py index 7e5b7e389..dd2321c0c 100644 --- a/nmmo/core/map.py +++ b/nmmo/core/map.py @@ -17,6 +17,7 @@ def __init__(self, config, realm, np_random): self._repr = None self.realm = realm self.update_list = None + self.pathfinding_cache = {} # Avoid recalculating A*, paths don't move sz = config.MAP_SIZE self.tiles = np.zeros((sz, sz), dtype=object) diff --git a/nmmo/systems/ai/behavior.py b/nmmo/systems/ai/behavior.py index 95e3641b2..0f5182ad4 100644 --- a/nmmo/systems/ai/behavior.py +++ b/nmmo/systems/ai/behavior.py @@ -26,7 +26,7 @@ def update(entity): def pathfind(realm, actions, entity, target): # TODO: do not access realm._np_random directly. ALSO see below for all other uses actions[nmmo.action.Move] = { - nmmo.action.Direction: move.pathfind(realm.map.tiles, entity, target, realm._np_random)} + nmmo.action.Direction: move.pathfind(realm.map, entity, target, realm._np_random)} def explore(realm, actions, entity): sz = realm.config.TERRAIN_SIZE @@ -46,11 +46,11 @@ def explore(realm, actions, entity): def meander(realm, actions, entity): actions[nmmo.action.Move] = { - nmmo.action.Direction: move.habitable(realm.map.tiles, entity, realm._np_random)} + nmmo.action.Direction: move.habitable(realm.map, entity, realm._np_random)} def evade(realm, actions, entity): actions[nmmo.action.Move] = { - nmmo.action.Direction: move.antipathfind(realm.map.tiles, entity, entity.attacker, + nmmo.action.Direction: move.antipathfind(realm.map, entity, entity.attacker, realm._np_random)} def hunt(realm, actions, entity): @@ -61,7 +61,7 @@ def hunt(realm, actions, entity): if distance == 0: direction = move.random_direction(realm._np_random) elif distance > 1: - direction = move.pathfind(realm.map.tiles, entity, entity.target, realm._np_random) + direction = move.pathfind(realm.map, entity, entity.target, realm._np_random) if direction is not None: actions[nmmo.action.Move] = {nmmo.action.Direction: direction} diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py index d3dfcbd7d..c84c33832 100644 --- a/nmmo/systems/ai/move.py +++ b/nmmo/systems/ai/move.py @@ -6,8 +6,9 @@ def random_direction(np_random): return np_random.choice(action.Direction.edges) -def random_safe(tiles, ent, np_random): +def random_safe(map, ent, np_random): r, c = ent.pos + tiles = map.tiles cands = [] if not tiles[r-1, c].void: cands.append(action.North) @@ -20,8 +21,9 @@ def random_safe(tiles, ent, np_random): return np_random.choice(cands) -def habitable(tiles, ent, np_random): +def habitable(map, ent, np_random): r, c = ent.pos + tiles = map.tiles cands = [] if tiles[r-1, c].habitable: cands.append(action.North) @@ -53,13 +55,14 @@ def bullrush(ent, targ, np_random): direction = utils.directionTowards(ent, targ) return towards(direction, np_random) -def pathfind(tiles, ent, targ, np_random): - direction = utils.aStar(tiles, ent.pos, targ.pos) +import time +def pathfind(map, ent, targ, np_random): + direction = utils.aStar(map, ent.pos, targ.pos) return towards(direction, np_random) -def antipathfind(tiles, ent, targ, np_random): +def antipathfind(map, ent, targ, np_random): er, ec = ent.pos tr, tc = targ.pos goal = (2*er - tr , 2*ec-tc) - direction = utils.aStar(tiles, ent.pos, goal) + direction = utils.aStar(map, ent.pos, goal) return towards(direction, np_random) diff --git a/nmmo/systems/ai/utils.py b/nmmo/systems/ai/utils.py index cd23d5bff..08d954623 100644 --- a/nmmo/systems/ai/utils.py +++ b/nmmo/systems/ai/utils.py @@ -7,6 +7,7 @@ import numpy as np from nmmo.lib.utils import in_bounds +from functools import lru_cache def validTarget(ent, targ, rng): @@ -88,10 +89,15 @@ def lInfty(start, goal): gr, gc = goal return max(abs(gr - sr), abs(gc - sc)) -def aStar(tiles, start, goal, cutoff=100): +CUTOFF = 100 +def aStar(map, start, goal): + cutoff = CUTOFF + tiles = map.tiles if start == goal: return (0, 0) - + if (start,goal) in map.pathfinding_cache: + return map.pathfinding_cache[(start,goal)] + initial_goal = goal pq = [(0, start)] backtrace = {} @@ -135,11 +141,14 @@ def aStar(tiles, start, goal, cutoff=100): backtrace[nxt] = cur while goal in backtrace and backtrace[goal] != start: + gr, gc = goal goal = backtrace[goal] + sr, sc = goal + map.pathfinding_cache[(goal,initial_goal)] = (gr - sr, gc - sc) sr, sc = start gr, gc = goal - + map.pathfinding_cache[(start,initial_goal)] = (gr - sr, gc - sc) return (gr - sr, gc - sc) # End A* diff --git a/tests/test_performance.py b/tests/test_performance.py index f27519e6c..143228b90 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -101,6 +101,21 @@ def test_fps_all_med_100_pop(benchmark): benchmark_config(benchmark, Medium, 100, AllGameSystems) +import time +if __name__ == '__main__': + RANDOM_SEED = 5000 + conf = create_config(Medium, Terrain, Resource, Combat, NPC) + conf.PLAYER_N = 1 + conf.PLAYERS = [baselines.Random] + + env = nmmo.Env(conf) + + start = time.time() + env.reset(seed=RANDOM_SEED) + for _ in range(1024): + env.step({}) + print(f"Total time {time.time()-start}") + ''' def benchmark_env(benchmark, env, nent): env.config.PLAYER_N = nent From 5c183fc89a9e58ad07e443eba287ca49ab7a692c Mon Sep 17 00:00:00 2001 From: Hao Xiang Li Date: Mon, 26 Jun 2023 02:14:16 +0000 Subject: [PATCH 038/113] Added tests to check invariants assumed for optimiations. Habitable optimization (3x increase for move.habitable, for conf = create_config(Medium, Terrain, Resource, Combat, NPC) single player approx 20% speed increase. --- .gitignore | 2 ++ nmmo/core/map.py | 2 ++ nmmo/systems/ai/move.py | 60 +++++++++++++++++++++++++++------------ nmmo/systems/ai/utils.py | 2 -- tests/test_performance.py | 15 ++++++++-- 5 files changed, 59 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 0fa0b8db6..1407169c1 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +profile.run \ No newline at end of file diff --git a/nmmo/core/map.py b/nmmo/core/map.py index dd2321c0c..6bb23045d 100644 --- a/nmmo/core/map.py +++ b/nmmo/core/map.py @@ -21,6 +21,7 @@ def __init__(self, config, realm, np_random): sz = config.MAP_SIZE self.tiles = np.zeros((sz, sz), dtype=object) + self.habitable_tiles = np.zeros((sz,sz)) for r in range(sz): for c in range(sz): @@ -63,6 +64,7 @@ def reset(self, map_id, np_random): mat = materials[idx] tile = self.tiles[r, c] tile.reset(mat, config, np_random) + self.habitable_tiles[r, c] = tile.habitable assert c == config.MAP_SIZE - 1 assert r == config.MAP_SIZE - 1 diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py index c84c33832..d1f3fd566 100644 --- a/nmmo/systems/ai/move.py +++ b/nmmo/systems/ai/move.py @@ -13,31 +13,56 @@ def random_safe(map, ent, np_random): if not tiles[r-1, c].void: cands.append(action.North) if not tiles[r+1, c].void: - cands.append(action.South) + cands.append(action.North) if not tiles[r, c-1].void: - cands.append(action.West) + cands.append(action.North) if not tiles[r, c+1].void: - cands.append(action.East) + cands.append(action.North) return np_random.choice(cands) def habitable(map, ent, np_random): r, c = ent.pos - tiles = map.tiles - cands = [] - if tiles[r-1, c].habitable: - cands.append(action.North) - if tiles[r+1, c].habitable: - cands.append(action.South) - if tiles[r, c-1].habitable: - cands.append(action.West) - if tiles[r, c+1].habitable: - cands.append(action.East) - - if len(cands) == 0: - return action.North + tiles = map.habitable_tiles + direction = np_random.integers(0,4) + if direction == 0: + if tiles[r-1, c]: + return action.North + if tiles[r+1, c]: + return action.South + if tiles[r, c-1]: + return action.West + if tiles[r, c+1]: + return action.East + elif direction == 1: + if tiles[r+1, c]: + return action.South + if tiles[r, c-1]: + return action.West + if tiles[r, c+1]: + return action.East + if tiles[r-1, c]: + return action.North + elif direction == 2: + if tiles[r, c-1]: + return action.West + if tiles[r, c+1]: + return action.East + if tiles[r-1, c]: + return action.North + if tiles[r+1, c]: + return action.South + else: + if tiles[r, c+1]: + return action.East + if tiles[r-1, c]: + return action.North + if tiles[r+1, c]: + return action.South + if tiles[r, c-1]: + return action.West - return np_random.choice(cands) + return action.North def towards(direction, np_random): if direction == (-1, 0): @@ -55,7 +80,6 @@ def bullrush(ent, targ, np_random): direction = utils.directionTowards(ent, targ) return towards(direction, np_random) -import time def pathfind(map, ent, targ, np_random): direction = utils.aStar(map, ent.pos, targ.pos) return towards(direction, np_random) diff --git a/nmmo/systems/ai/utils.py b/nmmo/systems/ai/utils.py index 08d954623..b1b1ffeb2 100644 --- a/nmmo/systems/ai/utils.py +++ b/nmmo/systems/ai/utils.py @@ -7,8 +7,6 @@ import numpy as np from nmmo.lib.utils import in_bounds -from functools import lru_cache - def validTarget(ent, targ, rng): if targ is None or not targ.alive: diff --git a/tests/test_performance.py b/tests/test_performance.py index 143228b90..7913a2cf7 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -101,8 +101,8 @@ def test_fps_all_med_100_pop(benchmark): benchmark_config(benchmark, Medium, 100, AllGameSystems) -import time -if __name__ == '__main__': +import time, cProfile, io, pstats +def set_seed_test(): RANDOM_SEED = 5000 conf = create_config(Medium, Terrain, Resource, Combat, NPC) conf.PLAYER_N = 1 @@ -116,6 +116,17 @@ def test_fps_all_med_100_pop(benchmark): env.step({}) print(f"Total time {time.time()-start}") +if __name__ == '__main__': + with open('profile.run','a') as f: + pr = cProfile.Profile() + pr.enable() + set_seed_test() + pr.disable() + s = io.StringIO() + ps = pstats.Stats(pr,stream=s).sort_stats('cumtime') + ps.print_stats() + f.write(s.getvalue()) + ''' def benchmark_env(benchmark, env, nent): env.config.PLAYER_N = nent From 99444b0514634e368da35a020a6e401c1cedbbeb Mon Sep 17 00:00:00 2001 From: Hao Xiang Li Date: Mon, 26 Jun 2023 02:39:20 +0000 Subject: [PATCH 039/113] linf - avoid uneccessary calls to np --- nmmo/core/action.py | 4 ++-- nmmo/lib/utils.py | 4 ++++ tests/test_performance.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/nmmo/core/action.py b/nmmo/core/action.py index a026456a1..1bf4e4b1e 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -138,7 +138,7 @@ def call(realm, entity, direction): realm.map.tiles[r_new, c_new].add_entity(entity) # exploration record keeping. moved from entity.py, History.update() - dist_from_spawn = utils.linf(entity.spawn_pos, (r_new, c_new)) + dist_from_spawn = utils.linf_single(entity.spawn_pos, (r_new, c_new)) if dist_from_spawn > entity.history.exploration: entity.history.exploration = dist_from_spawn if entity.is_player: @@ -263,7 +263,7 @@ def call(realm, entity, style, target): return None #Can't attack out of range - if utils.linf(entity.pos, target.pos) > style.attack_range(config): + if utils.linf_single(entity.pos, target.pos) > style.attack_range(config): return None #Execute attack diff --git a/nmmo/lib/utils.py b/nmmo/lib/utils.py index a2c0bb3ed..31b73d0b5 100644 --- a/nmmo/lib/utils.py +++ b/nmmo/lib/utils.py @@ -71,6 +71,10 @@ def linf(pos1, pos2): diff = np.abs(np.array(pos1) - np.array(pos2)) return np.max(diff, axis=-1) +def linf_single(pos1, pos2): + # pos is a single (r,c) to avoid uneccessary function calls + return max(abs(pos1[0]-pos2[0]), abs(pos1[1]-pos2[1])) + #Bounds checker def in_bounds(r, c, shape, border=0): R, C = shape diff --git a/tests/test_performance.py b/tests/test_performance.py index 7913a2cf7..318c1004f 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -105,7 +105,7 @@ def test_fps_all_med_100_pop(benchmark): def set_seed_test(): RANDOM_SEED = 5000 conf = create_config(Medium, Terrain, Resource, Combat, NPC) - conf.PLAYER_N = 1 + conf.PLAYER_N = 10 conf.PLAYERS = [baselines.Random] env = nmmo.Env(conf) From 2d306d8c00679e6256c86f3ff9190db0e4edbc57 Mon Sep 17 00:00:00 2001 From: Hao Xiang Li Date: Mon, 26 Jun 2023 03:51:26 +0000 Subject: [PATCH 040/113] Some more low hanging fruit --- nmmo/datastore/serialized.py | 6 ++++-- nmmo/entity/npc.py | 8 +++++++- nmmo/systems/ai/behavior.py | 17 ++++++++--------- nmmo/systems/ai/move.py | 2 +- nmmo/systems/ai/utils.py | 8 +------- nmmo/task/game_state.py | 2 +- tests/test_optimization.py | 37 ++++++++++++++++++++++++++++++++++++ 7 files changed, 59 insertions(+), 21 deletions(-) create mode 100644 tests/test_optimization.py diff --git a/nmmo/datastore/serialized.py b/nmmo/datastore/serialized.py index 652280292..a6201ba6a 100644 --- a/nmmo/datastore/serialized.py +++ b/nmmo/datastore/serialized.py @@ -38,8 +38,10 @@ def val(self): return self._val def update(self, value): - value = min(self._max, max(self._min, value)) - + if value > self._max: + value = self._max + elif value < self._min: + value = self._min self.datastore_record.update(self._column, value) self._val = value diff --git a/nmmo/entity/npc.py b/nmmo/entity/npc.py index 16ac75536..e19fd94e1 100644 --- a/nmmo/entity/npc.py +++ b/nmmo/entity/npc.py @@ -106,7 +106,13 @@ def spawn(realm, pos, iden, np_random): ent.spawn_danger = danger # Select combat focus - style = np_random.choice((Action.Melee, Action.Range, Action.Mage)) + style = np_random.integers(0,3) + if style == 0: + style = Action.Melee + elif style == 1: + style = Action.Range + else: + style = Action.Mage ent.skills.style = style # Compute level diff --git a/nmmo/systems/ai/behavior.py b/nmmo/systems/ai/behavior.py index 0f5182ad4..50d450483 100644 --- a/nmmo/systems/ai/behavior.py +++ b/nmmo/systems/ai/behavior.py @@ -55,16 +55,15 @@ def evade(realm, actions, entity): def hunt(realm, actions, entity): #Move args - distance = utils.distance(entity, entity.target) - - direction = None - if distance == 0: - direction = move.random_direction(realm._np_random) - elif distance > 1: - direction = move.pathfind(realm.map, entity, entity.target, realm._np_random) + distance = utils.lInfty(entity.pos, entity.target.pos) - if direction is not None: - actions[nmmo.action.Move] = {nmmo.action.Direction: direction} + if distance > 1: + actions[nmmo.action.Move] = {nmmo.action.Direction: move.pathfind(realm.map, + entity, + entity.target, + realm._np_random)} + elif distance == 0: + actions[nmmo.action.Move] = {nmmo.action.Direction: move.random_direction(realm._np_random)} attack(realm, actions, entity) diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py index d1f3fd566..16f00bd89 100644 --- a/nmmo/systems/ai/move.py +++ b/nmmo/systems/ai/move.py @@ -4,7 +4,7 @@ def random_direction(np_random): - return np_random.choice(action.Direction.edges) + return action.Direction.edges[np_random.integers(0,len(action.Direction.edges))] def random_safe(map, ent, np_random): r, c = ent.pos diff --git a/nmmo/systems/ai/utils.py b/nmmo/systems/ai/utils.py index b1b1ffeb2..0213deda7 100644 --- a/nmmo/systems/ai/utils.py +++ b/nmmo/systems/ai/utils.py @@ -9,9 +9,7 @@ from nmmo.lib.utils import in_bounds def validTarget(ent, targ, rng): - if targ is None or not targ.alive: - return False - if lInfty(ent.pos, targ.pos) > rng: + if targ is None or not targ.alive or lInfty(ent.pos, targ.pos) > rng: return False return True @@ -49,10 +47,6 @@ def closestTarget(ent, tiles, rng=1): for e in tiles[sr + d, sc + r].entities.values(): if e is not ent and validTarget(ent, e, rng): return e -def distance(ent, targ): - # used in scripted/behavior.py, attack() to determine attack range - return lInfty(ent.pos, targ.pos) - def lInf(ent, targ): sr, sc = ent.pos gr, gc = targ.pos diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 36c6b3807..223f9209d 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -2,8 +2,8 @@ from typing import Dict, Iterable, Tuple, MutableMapping, Set from dataclasses import dataclass from copy import deepcopy -from abc import ABC, abstractmethod from collections import defaultdict +from abc import ABC, abstractmethod import functools import numpy as np diff --git a/tests/test_optimization.py b/tests/test_optimization.py new file mode 100644 index 000000000..3ea9e6323 --- /dev/null +++ b/tests/test_optimization.py @@ -0,0 +1,37 @@ +# Test invariants assumed for certain optimizations + +import unittest + +import copy +import nmmo +from scripted.baselines import Random + +def rollout(): + config = nmmo.config.Default() + config.PLAYERS = [Random] + env = nmmo.Env(config) + env.reset() + start = copy.deepcopy(env.realm) + for _ in range(64): + env.step({}) + end = copy.deepcopy(env.realm) + return (start, end) + +class TestOptimization(unittest.TestCase): + + def test_passability_immutable(self): + # Used in optimization that caches the result of A* + start, end = rollout() + start_passable = [tile.impassible for tile in start.map.tiles.flatten()] + end_passable = [tile.impassible for tile in end.map.tiles.flatten()] + self.assertListEqual(start_passable, end_passable) + + def test_habitability_immutable(self): + # Used in optimization with habitability lookup table + start, end = rollout() + start_habitable = [tile.habitable for tile in start.map.tiles.flatten()] + end_habitable = [tile.habitable for tile in end.map.tiles.flatten()] + self.assertListEqual(start_habitable, end_habitable) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 88767efa7912811f61b156ab38abfbdddd3021af Mon Sep 17 00:00:00 2001 From: MarkHaoxiang-laptop Date: Tue, 27 Jun 2023 23:47:46 +0000 Subject: [PATCH 041/113] where_in_1d optimization, vectorized. --- nmmo/task/game_state.py | 57 ++++++++++------------------------------- 1 file changed, 14 insertions(+), 43 deletions(-) diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 223f9209d..57797ed4d 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -37,39 +37,23 @@ class GameState: env_obs: Dict[int, Observation] # env passes the obs of only alive agents entity_data: np.ndarray # a copied, whole Entity ds table - entity_index: Dict[int, Iterable] # precomputed index for where_in_1d item_data: np.ndarray # a copied, whole Item ds table - item_index: Dict[int, Iterable] event_data: np.ndarray # a copied, whole Event log table - event_index: Dict[int, Iterable] cache_result: MutableMapping # cache for general memoization - # add helper functions below - @functools.lru_cache - def entity_or_none(self, ent_id): - flt_ent = self.entity_data[:, EntityAttr['id']] == ent_id - if np.any(flt_ent): - return EntityState.parse_array(self.entity_data[flt_ent][0]) - return None - - def where_in_id(self, data_type, subject: Iterable[int]): - k = (data_type, subject) - if k in self.cache_result: - return self.cache_result[k] - + # Helper Functions + def where_in_1d(self, data_type, subject: Iterable[int]): + assert data_type in ['entity', 'item', 'event'], 'data_type must be in entity, item, event' if data_type == 'entity': - flt_idx = [row for sbj in subject for row in self.entity_index.get(sbj,[])] - self.cache_result[k] = self.entity_data[flt_idx] - if data_type == 'item': - flt_idx = [row for sbj in subject for row in self.item_index.get(sbj,[])] - self.cache_result[k] = self.item_data[flt_idx] - if data_type == 'event': - flt_idx = [row for sbj in subject for row in self.event_index.get(sbj,[])] - self.cache_result[k] = self.event_data[flt_idx] - if data_type in ['entity', 'item', 'event']: - return self.cache_result[k] - + flt_idx = np.isin(self.entity_data[:, EntityAttr["id"]], subject).nonzero()[0] + return self.entity_data[flt_idx] + elif data_type == 'item': + flt_idx = np.isin(self.item_data[:, ItemAttr["owner_id"]], subject).nonzero()[0] + return self.item_data[flt_idx] + elif data_type == 'event': + flt_idx = np.isin(self.event_data[:, EventAttr["ent_id"]], subject).nonzero()[0] + return self.event_data[flt_idx] raise ValueError("data_type must be in entity, item, event") def get_subject_view(self, subject: Group): @@ -168,7 +152,7 @@ def __init__(self, gs: GameState, subject: Group): @functools.cached_property def _sbj_ent(self): - return self._gs.where_in_id('entity', self._subject.agents) + return self._gs.where_in_1d('entity', self._subject.agents) @functools.cached_property def entity(self): @@ -176,7 +160,7 @@ def entity(self): @functools.cached_property def _sbj_item(self): - return self._gs.where_in_id('item', self._subject.agents) + return self._gs.where_in_1d('item', self._subject.agents) @functools.cached_property def item(self): @@ -184,7 +168,7 @@ def item(self): @functools.cached_property def _sbj_event(self): - return self._gs.where_in_id('event', self._subject.agents) + return self._gs.where_in_1d('event', self._subject.agents) @functools.cached_property def event(self): @@ -234,20 +218,7 @@ def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: alive_agents = alive_agents, env_obs = env_obs, entity_data = entity_all, - entity_index = self._precompute_index(entity_all, EntityAttr["id"]), item_data = item_data, - item_index = self._precompute_index(item_data, ItemAttr['owner_id']), event_data = event_data, - event_index = self._precompute_index(event_data, EventAttr['ent_id']), cache_result = {} ) - - @staticmethod - def _precompute_index(table, id_col): - index = defaultdict() - for row, id_ in enumerate(table[:,id_col]): - if id_ in index: - index[id_].append(row) - else: - index[id_] = [row] - return index From 62d38fb4a2b8905077ea91a6cc04010c50c94f5b Mon Sep 17 00:00:00 2001 From: MarkHaoxiang-laptop Date: Wed, 28 Jun 2023 01:07:06 +0000 Subject: [PATCH 042/113] Pylinted --- nmmo/entity/entity.py | 2 +- nmmo/systems/ai/move.py | 18 +++++++++--------- nmmo/task/game_state.py | 5 ++--- tests/test_optimization.py | 20 ++++++++++---------- tests/test_performance.py | 18 ++++++++++-------- 5 files changed, 32 insertions(+), 31 deletions(-) diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py index 7fda6f896..3c943dd17 100644 --- a/nmmo/entity/entity.py +++ b/nmmo/entity/entity.py @@ -314,7 +314,7 @@ def apply_damage(self, dmg, style): @property def pos(self): - return int(self.row.val), int(self.col.val) + return self.row.val, self.col.val @property def alive(self): diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py index 16f00bd89..4e577e003 100644 --- a/nmmo/systems/ai/move.py +++ b/nmmo/systems/ai/move.py @@ -2,13 +2,13 @@ from nmmo.core import action from nmmo.systems.ai import utils - +# pylint: disable=E1136 def random_direction(np_random): return action.Direction.edges[np_random.integers(0,len(action.Direction.edges))] -def random_safe(map, ent, np_random): +def random_safe(realm_map, ent, np_random): r, c = ent.pos - tiles = map.tiles + tiles = realm_map.tiles cands = [] if not tiles[r-1, c].void: cands.append(action.North) @@ -21,9 +21,9 @@ def random_safe(map, ent, np_random): return np_random.choice(cands) -def habitable(map, ent, np_random): +def habitable(realm_map, ent, np_random): r, c = ent.pos - tiles = map.habitable_tiles + tiles = realm_map.habitable_tiles direction = np_random.integers(0,4) if direction == 0: if tiles[r-1, c]: @@ -80,13 +80,13 @@ def bullrush(ent, targ, np_random): direction = utils.directionTowards(ent, targ) return towards(direction, np_random) -def pathfind(map, ent, targ, np_random): - direction = utils.aStar(map, ent.pos, targ.pos) +def pathfind(realm_map, ent, targ, np_random): + direction = utils.aStar(realm_map, ent.pos, targ.pos) return towards(direction, np_random) -def antipathfind(map, ent, targ, np_random): +def antipathfind(realm_map, ent, targ, np_random): er, ec = ent.pos tr, tc = targ.pos goal = (2*er - tr , 2*ec-tc) - direction = utils.aStar(map, ent.pos, goal) + direction = utils.aStar(realm_map, ent.pos, goal) return towards(direction, np_random) diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 57797ed4d..251b0138a 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -2,7 +2,6 @@ from typing import Dict, Iterable, Tuple, MutableMapping, Set from dataclasses import dataclass from copy import deepcopy -from collections import defaultdict from abc import ABC, abstractmethod import functools @@ -48,10 +47,10 @@ def where_in_1d(self, data_type, subject: Iterable[int]): if data_type == 'entity': flt_idx = np.isin(self.entity_data[:, EntityAttr["id"]], subject).nonzero()[0] return self.entity_data[flt_idx] - elif data_type == 'item': + if data_type == 'item': flt_idx = np.isin(self.item_data[:, ItemAttr["owner_id"]], subject).nonzero()[0] return self.item_data[flt_idx] - elif data_type == 'event': + if data_type == 'event': flt_idx = np.isin(self.event_data[:, EventAttr["ent_id"]], subject).nonzero()[0] return self.event_data[flt_idx] raise ValueError("data_type must be in entity, item, event") diff --git a/tests/test_optimization.py b/tests/test_optimization.py index 3ea9e6323..bd365b319 100644 --- a/tests/test_optimization.py +++ b/tests/test_optimization.py @@ -7,15 +7,15 @@ from scripted.baselines import Random def rollout(): - config = nmmo.config.Default() - config.PLAYERS = [Random] - env = nmmo.Env(config) - env.reset() - start = copy.deepcopy(env.realm) - for _ in range(64): - env.step({}) - end = copy.deepcopy(env.realm) - return (start, end) + config = nmmo.config.Default() + config.PLAYERS = [Random] + env = nmmo.Env(config) + env.reset() + start = copy.deepcopy(env.realm) + for _ in range(64): + env.step({}) + end = copy.deepcopy(env.realm) + return (start, end) class TestOptimization(unittest.TestCase): @@ -34,4 +34,4 @@ def test_habitability_immutable(self): self.assertListEqual(start_habitable, end_habitable) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_performance.py b/tests/test_performance.py index 318c1004f..c245f9478 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -1,3 +1,7 @@ +# import time +import cProfile +import io +import pstats import nmmo from nmmo.core.config import (NPC, AllGameSystems, Combat, Communication, @@ -100,30 +104,28 @@ def test_fps_no_npc_med_100_pop(benchmark): def test_fps_all_med_100_pop(benchmark): benchmark_config(benchmark, Medium, 100, AllGameSystems) - -import time, cProfile, io, pstats def set_seed_test(): - RANDOM_SEED = 5000 + random_seed = 5000 conf = create_config(Medium, Terrain, Resource, Combat, NPC) conf.PLAYER_N = 10 conf.PLAYERS = [baselines.Random] env = nmmo.Env(conf) - start = time.time() - env.reset(seed=RANDOM_SEED) + # start = time.time() + env.reset(seed=random_seed) for _ in range(1024): env.step({}) - print(f"Total time {time.time()-start}") + # print(f"Total time {time.time()-start}") if __name__ == '__main__': - with open('profile.run','a') as f: + with open('profile.run','a', encoding="utf-8") as f: pr = cProfile.Profile() pr.enable() set_seed_test() pr.disable() s = io.StringIO() - ps = pstats.Stats(pr,stream=s).sort_stats('cumtime') + ps = pstats.Stats(pr,stream=s).sort_stats('tottime') ps.print_stats() f.write(s.getvalue()) From 980dee40605e4b79588e979b5c80dfdc7dc22bc8 Mon Sep 17 00:00:00 2001 From: MarkHaoxiang-laptop Date: Wed, 28 Jun 2023 01:45:43 +0000 Subject: [PATCH 043/113] Bug fix: --- nmmo/systems/ai/move.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py index 4e577e003..cf1b9e3b2 100644 --- a/nmmo/systems/ai/move.py +++ b/nmmo/systems/ai/move.py @@ -13,11 +13,11 @@ def random_safe(realm_map, ent, np_random): if not tiles[r-1, c].void: cands.append(action.North) if not tiles[r+1, c].void: - cands.append(action.North) + cands.append(action.South) if not tiles[r, c-1].void: - cands.append(action.North) + cands.append(action.West) if not tiles[r, c+1].void: - cands.append(action.North) + cands.append(action.East) return np_random.choice(cands) From 5db6d0cfb6bee1f13686cb7bf6612c244890846c Mon Sep 17 00:00:00 2001 From: MarkHaoxiang-laptop Date: Wed, 28 Jun 2023 01:48:42 +0000 Subject: [PATCH 044/113] Further bug fixes. TODO: Pylint the ai.util file --- nmmo/systems/ai/utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nmmo/systems/ai/utils.py b/nmmo/systems/ai/utils.py index 0213deda7..47b626955 100644 --- a/nmmo/systems/ai/utils.py +++ b/nmmo/systems/ai/utils.py @@ -16,7 +16,7 @@ def validTarget(ent, targ, rng): def validResource(ent, tile, rng): return tile is not None and tile.state.tex in ( - 'foilage', 'water') and distance(ent, tile) <= rng + 'foilage', 'water') and lInfty(ent.pos, tile.pos) <= rng def directionTowards(ent, targ): @@ -82,13 +82,13 @@ def lInfty(start, goal): return max(abs(gr - sr), abs(gc - sc)) CUTOFF = 100 -def aStar(map, start, goal): +def aStar(realm_map, start, goal): cutoff = CUTOFF - tiles = map.tiles + tiles = realm_map.tiles if start == goal: return (0, 0) - if (start,goal) in map.pathfinding_cache: - return map.pathfinding_cache[(start,goal)] + if (start,goal) in realm_map.pathfinding_cache: + return realm_map.pathfinding_cache[(start,goal)] initial_goal = goal pq = [(0, start)] @@ -136,11 +136,11 @@ def aStar(map, start, goal): gr, gc = goal goal = backtrace[goal] sr, sc = goal - map.pathfinding_cache[(goal,initial_goal)] = (gr - sr, gc - sc) + realm_map.pathfinding_cache[(goal,initial_goal)] = (gr - sr, gc - sc) sr, sc = start gr, gc = goal - map.pathfinding_cache[(start,initial_goal)] = (gr - sr, gc - sc) + realm_map.pathfinding_cache[(start,initial_goal)] = (gr - sr, gc - sc) return (gr - sr, gc - sc) # End A* From edf765fb31cb670c4b959e19c34f7b17ca404552 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 30 Jun 2023 10:48:44 +0900 Subject: [PATCH 045/113] added test_map_preview --- tests/core/test_map_generation.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/core/test_map_generation.py b/tests/core/test_map_generation.py index 1c6020c49..d7f35d9c6 100644 --- a/tests/core/test_map_generation.py +++ b/tests/core/test_map_generation.py @@ -10,6 +10,7 @@ def test_insufficient_maps(self): config.PATH_MAPS = 'maps/test_map_gen' config.MAP_N = 20 + # clear the directory path_maps = os.path.join(config.PATH_CWD, config.PATH_MAPS) shutil.rmtree(path_maps, ignore_errors=True) @@ -25,5 +26,25 @@ def test_insufficient_maps(self): # this should finish without error + def test_map_preview(self): + class MapConfig( + nmmo.config.Small, # no fractal, grass only + nmmo.config.Terrain, # water, grass, foilage, stone + nmmo.config.Item, # no additional effect on the map + nmmo.config.Profession, # add ore, tree, crystal, herb, fish + ): + PATH_MAPS = 'maps/test_preview' + MAP_FORCE_GENERATION = True + MAP_GENERATE_PREVIEWS = True + config = MapConfig() + + # clear the directory + path_maps = os.path.join(config.PATH_CWD, config.PATH_MAPS) + shutil.rmtree(path_maps, ignore_errors=True) + + test_env = nmmo.Env(config) # pylint: disable=unused-variable + + # this should finish without error + if __name__ == '__main__': unittest.main() From e20e11fa13242ae74d0bda4d57a122166f31c0b8 Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Fri, 30 Jun 2023 17:34:08 -0700 Subject: [PATCH 046/113] randomize spawn locations and move team_loader out of environment --- nmmo/lib/spawn.py | 24 ----------------- tests/test_team_spawn.py | 57 ---------------------------------------- 2 files changed, 81 deletions(-) delete mode 100644 tests/test_team_spawn.py diff --git a/nmmo/lib/spawn.py b/nmmo/lib/spawn.py index 6d966527b..aed9949c7 100644 --- a/nmmo/lib/spawn.py +++ b/nmmo/lib/spawn.py @@ -113,27 +113,3 @@ def spawn_concurrent(config, np_random): spawn_positions = sides[:config.PLAYER_N] return spawn_positions - -def get_team_spawn_positions(config, num_teams): - '''Generates spawn positions for new teams - Agents in the same team spawn together in the same tile - Evenly spaces teams around the square map borders - - Returns: - list of tuple(int, int): - - position: - The position (row, col) to spawn the given teams - ''' - teams_per_sides = (num_teams + 3) // 4 # 1-4 -> 1, 5-8 -> 2, etc. - - sides = get_edge_tiles(config) - assert len(sides[0]) >= 4*teams_per_sides, 'Map too small for teams' - - team_spawn_positions = [] - for side in sides: - for i in range(teams_per_sides): - idx = int(len(side)*(i+1)/(teams_per_sides + 1)) - team_spawn_positions.append(side[idx]) - - return team_spawn_positions diff --git a/tests/test_team_spawn.py b/tests/test_team_spawn.py deleted file mode 100644 index 2dc7ca635..000000000 --- a/tests/test_team_spawn.py +++ /dev/null @@ -1,57 +0,0 @@ -import unittest - -import nmmo -from nmmo.core.agent import Agent -from nmmo.lib.team_helper import TeamHelper -from nmmo.lib import spawn - - -class TeamLoader(spawn.SequentialLoader): - def __init__(self, config, np_random, team_helper: TeamHelper): - assert config.PLAYERS == [Agent], \ - "TeamLoader only supports config.PLAYERS == [Agent]" - super().__init__(config, np_random) - self.team_helper = team_helper - - self.candidate_spawn_pos = \ - spawn.get_team_spawn_positions(config, team_helper.num_teams) - - def get_spawn_position(self, agent_id): - team_id, _ = self.team_helper.team_and_position_for_agent[agent_id] - return self.candidate_spawn_pos[team_id] - - -class TestTeamSpawn(unittest.TestCase): - def test_team_spawn(self): - num_teams = 16 - team_size = 8 - team_helper = TeamHelper({ - i: [i*team_size+j+1 for j in range(team_size)] - for i in range(num_teams)} - ) - - config = nmmo.config.Small() - config.PLAYER_N = num_teams * team_size - config.PLAYER_LOADER =\ - lambda config, np_random: TeamLoader(config, np_random, team_helper) - - assert config.PLAYER_N == num_teams * team_size,\ - "config.PLAYER_N must be num_teams * team_size" - env = nmmo.Env(config) - env.reset() - - # agents in the same team should spawn together - team_locs = {} - for team_id, team_members in team_helper.teams.items(): - team_locs[team_id] = env.realm.players[team_members[0]].pos - for agent_id in team_members: - self.assertEqual(team_locs[team_id], env.realm.players[agent_id].pos) - - # teams should be apart from each other - for i in range(num_teams): - for j in range(i+1, num_teams): - self.assertNotEqual(team_locs[i], team_locs[j]) - - -if __name__ == '__main__': - unittest.main() From 2f43f15049953ae4b6be77d16d7d9a380722c5aa Mon Sep 17 00:00:00 2001 From: Hao Xiang Li Date: Sun, 2 Jul 2023 17:02:36 +0000 Subject: [PATCH 047/113] Pylint and revert vectorization. --- nmmo/systems/ai/behavior.py | 109 ++++++++-------- nmmo/systems/ai/move.py | 4 +- nmmo/systems/ai/utils.py | 252 +++++++++++++++++++----------------- nmmo/task/game_state.py | 58 ++++++--- tests/test_optimization.py | 37 ------ 5 files changed, 233 insertions(+), 227 deletions(-) delete mode 100644 tests/test_optimization.py diff --git a/nmmo/systems/ai/behavior.py b/nmmo/systems/ai/behavior.py index 50d450483..839a4d48c 100644 --- a/nmmo/systems/ai/behavior.py +++ b/nmmo/systems/ai/behavior.py @@ -1,78 +1,83 @@ -# TODO: try to remove the below line -# pylint: disable=all +#pylint: disable=protected-access, invalid-name import numpy as np import nmmo from nmmo.systems.ai import move, utils + def update(entity): - '''Update validity of tracked entities''' - if not utils.validTarget(entity, entity.attacker, entity.vision): - entity.attacker = None - if not utils.validTarget(entity, entity.target, entity.vision): - entity.target = None - if not utils.validTarget(entity, entity.closest, entity.vision): - entity.closest = None - - if entity.__class__.__name__ != 'Player': - return - - if not utils.validResource(entity, entity.food, entity.vision): - entity.food = None - if not utils.validResource(entity, entity.water, entity.vision): - entity.water = None + '''Update validity of tracked entities''' + if not utils.validTarget(entity, entity.attacker, entity.vision): + entity.attacker = None + if not utils.validTarget(entity, entity.target, entity.vision): + entity.target = None + if not utils.validTarget(entity, entity.closest, entity.vision): + entity.closest = None + + if entity.__class__.__name__ != 'Player': + return + + if not utils.validResource(entity, entity.food, entity.vision): + entity.food = None + if not utils.validResource(entity, entity.water, entity.vision): + entity.water = None + def pathfind(realm, actions, entity, target): - # TODO: do not access realm._np_random directly. ALSO see below for all other uses - actions[nmmo.action.Move] = { - nmmo.action.Direction: move.pathfind(realm.map, entity, target, realm._np_random)} + # TODO: do not access realm._np_random directly. ALSO see below for all other uses + actions[nmmo.action.Move] = { + nmmo.action.Direction: move.pathfind(realm.map, entity, target, realm._np_random)} + def explore(realm, actions, entity): - sz = realm.config.TERRAIN_SIZE - r, c = entity.pos + sz = realm.config.TERRAIN_SIZE + r, c = entity.pos - spawnR, spawnC = entity.spawnPos - centR, centC = sz//2, sz//2 + spawnR, spawnC = entity.spawnPos + centR, centC = sz//2, sz//2 - vR, vC = centR-spawnR, centC-spawnC + vR, vC = centR-spawnR, centC-spawnC - mmag = max(abs(vR), abs(vC)) - rr = r + int(np.round(entity.vision*vR/mmag)) - cc = c + int(np.round(entity.vision*vC/mmag)) + mmag = max(abs(vR), abs(vC)) + rr = r + int(np.round(entity.vision*vR/mmag)) + cc = c + int(np.round(entity.vision*vC/mmag)) + + tile = realm.map.tiles[rr, cc] + pathfind(realm, actions, entity, tile) - tile = realm.map.tiles[rr, cc] - pathfind(realm, actions, entity, tile) def meander(realm, actions, entity): - actions[nmmo.action.Move] = { - nmmo.action.Direction: move.habitable(realm.map, entity, realm._np_random)} + actions[nmmo.action.Move] = { + nmmo.action.Direction: move.habitable(realm.map, entity, realm._np_random)} + def evade(realm, actions, entity): - actions[nmmo.action.Move] = { - nmmo.action.Direction: move.antipathfind(realm.map, entity, entity.attacker, - realm._np_random)} + actions[nmmo.action.Move] = { + nmmo.action.Direction: move.antipathfind(realm.map, entity, entity.attacker, + realm._np_random)} + def hunt(realm, actions, entity): - #Move args - distance = utils.lInfty(entity.pos, entity.target.pos) + # Move args + distance = utils.lInfty(entity.pos, entity.target.pos) - if distance > 1: - actions[nmmo.action.Move] = {nmmo.action.Direction: move.pathfind(realm.map, - entity, - entity.target, - realm._np_random)} - elif distance == 0: - actions[nmmo.action.Move] = {nmmo.action.Direction: move.random_direction(realm._np_random)} + if distance > 1: + actions[nmmo.action.Move] = {nmmo.action.Direction: move.pathfind(realm.map, + entity, + entity.target, + realm._np_random)} + elif distance == 0: + actions[nmmo.action.Move] = { + nmmo.action.Direction: move.random_direction(realm._np_random)} - attack(realm, actions, entity) + attack(realm, actions, entity) def attack(realm, actions, entity): - distance = utils.lInfty(entity.pos, entity.target.pos) - if distance > entity.skills.style.attack_range(realm.config): - return - - actions[nmmo.action.Attack] = { - nmmo.action.Style: entity.skills.style, - nmmo.action.Target: entity.target} + distance = utils.lInfty(entity.pos, entity.target.pos) + if distance > entity.skills.style.attack_range(realm.config): + return + actions[nmmo.action.Attack] = { + nmmo.action.Style: entity.skills.style, + nmmo.action.Target: entity.target} diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py index cf1b9e3b2..df1177a04 100644 --- a/nmmo/systems/ai/move.py +++ b/nmmo/systems/ai/move.py @@ -1,8 +1,8 @@ -# pylint: disable=R0401 +# pylint: disable=cyclic-import from nmmo.core import action from nmmo.systems.ai import utils -# pylint: disable=E1136 +# pylint: disable=unsubscriptable-object def random_direction(np_random): return action.Direction.edges[np_random.integers(0,len(action.Direction.edges))] diff --git a/nmmo/systems/ai/utils.py b/nmmo/systems/ai/utils.py index 47b626955..3fd7165ca 100644 --- a/nmmo/systems/ai/utils.py +++ b/nmmo/systems/ai/utils.py @@ -1,5 +1,4 @@ -# pylint: disable=all - +#pylint: disable=protected-access, invalid-name import heapq from typing import Tuple @@ -8,178 +7,189 @@ from nmmo.lib.utils import in_bounds + def validTarget(ent, targ, rng): - if targ is None or not targ.alive or lInfty(ent.pos, targ.pos) > rng: - return False - return True + if targ is None or not targ.alive or lInfty(ent.pos, targ.pos) > rng: + return False + return True def validResource(ent, tile, rng): - return tile is not None and tile.state.tex in ( - 'foilage', 'water') and lInfty(ent.pos, tile.pos) <= rng + return tile is not None and tile.state.tex in ( + 'foilage', 'water') and lInfty(ent.pos, tile.pos) <= rng def directionTowards(ent, targ): - sr, sc = ent.pos - tr, tc = targ.pos + sr, sc = ent.pos + tr, tc = targ.pos - if abs(sc - tc) > abs(sr - tr): - direction = (0, np.sign(tc - sc)) - else: - direction = (np.sign(tr - sr), 0) + if abs(sc - tc) > abs(sr - tr): + direction = (0, np.sign(tc - sc)) + else: + direction = (np.sign(tr - sr), 0) - return direction + return direction def closestTarget(ent, tiles, rng=1): - sr, sc = ent.pos - for d in range(rng+1): - for r in range(-d, d+1): - for e in tiles[sr+r, sc-d].entities.values(): - if e is not ent and validTarget(ent, e, rng): return e + sr, sc = ent.pos + for d in range(rng+1): + for r in range(-d, d+1): + for e in tiles[sr+r, sc-d].entities.values(): + if e is not ent and validTarget(ent, e, rng): + return e - for e in tiles[sr + r, sc + d].entities.values(): - if e is not ent and validTarget(ent, e, rng): return e + for e in tiles[sr + r, sc + d].entities.values(): + if e is not ent and validTarget(ent, e, rng): + return e - for e in tiles[sr - d, sc + r].entities.values(): - if e is not ent and validTarget(ent, e, rng): return e + for e in tiles[sr - d, sc + r].entities.values(): + if e is not ent and validTarget(ent, e, rng): + return e + + for e in tiles[sr + d, sc + r].entities.values(): + if e is not ent and validTarget(ent, e, rng): + return e + return None - for e in tiles[sr + d, sc + r].entities.values(): - if e is not ent and validTarget(ent, e, rng): return e def lInf(ent, targ): - sr, sc = ent.pos - gr, gc = targ.pos - return abs(gr - sr) + abs(gc - sc) + sr, sc = ent.pos + gr, gc = targ.pos + return abs(gr - sr) + abs(gc - sc) def adjacentPos(pos): - r, c = pos - return [(r - 1, c), (r, c - 1), (r + 1, c), (r, c + 1)] + r, c = pos + return [(r - 1, c), (r, c - 1), (r + 1, c), (r, c + 1)] def cropTilesAround(position: Tuple[int, int], horizon: int, tiles): - line, column = position + line, column = position - return tiles[max(line - horizon, 0): min(line + horizon + 1, len(tiles)), - max(column - horizon, 0): min(column + horizon + 1, len(tiles[0]))] + return tiles[max(line - horizon, 0): min(line + horizon + 1, len(tiles)), + max(column - horizon, 0): min(column + horizon + 1, len(tiles[0]))] # A* Search + + def l1(start, goal): - sr, sc = start - gr, gc = goal - return abs(gr - sr) + abs(gc - sc) + sr, sc = start + gr, gc = goal + return abs(gr - sr) + abs(gc - sc) + def l2(start, goal): - sr, sc = start - gr, gc = goal - return 0.5*((gr - sr)**2 + (gc - sc)**2)**0.5 + sr, sc = start + gr, gc = goal + return 0.5*((gr - sr)**2 + (gc - sc)**2)**0.5 + +# TODO: unify lInfty and lInf + -#TODO: unify lInfty and lInf def lInfty(start, goal): - sr, sc = start - gr, gc = goal - return max(abs(gr - sr), abs(gc - sc)) + sr, sc = start + gr, gc = goal + return max(abs(gr - sr), abs(gc - sc)) + CUTOFF = 100 + + def aStar(realm_map, start, goal): - cutoff = CUTOFF - tiles = realm_map.tiles - if start == goal: - return (0, 0) - if (start,goal) in realm_map.pathfinding_cache: - return realm_map.pathfinding_cache[(start,goal)] - initial_goal = goal - pq = [(0, start)] - - backtrace = {} - cost = {start: 0} - - closestPos = start - closestHeuristic = l1(start, goal) - closestCost = closestHeuristic - - while pq: - # Use approximate solution if budget exhausted - cutoff -= 1 - if cutoff <= 0: - if goal not in backtrace: - goal = closestPos - break - - priority, cur = heapq.heappop(pq) - - if cur == goal: - break - - for nxt in adjacentPos(cur): - if not in_bounds(*nxt, tiles.shape): - continue - - newCost = cost[cur] + 1 - if nxt not in cost or newCost < cost[nxt]: - cost[nxt] = newCost - heuristic = lInfty(goal, nxt) - priority = newCost + heuristic - - # Compute approximate solution - if heuristic < closestHeuristic or ( - heuristic == closestHeuristic and priority < closestCost): - closestPos = nxt - closestHeuristic = heuristic - closestCost = priority - - heapq.heappush(pq, (priority, nxt)) - backtrace[nxt] = cur - - while goal in backtrace and backtrace[goal] != start: - gr, gc = goal - goal = backtrace[goal] - sr, sc = goal - realm_map.pathfinding_cache[(goal,initial_goal)] = (gr - sr, gc - sc) - - sr, sc = start - gr, gc = goal - realm_map.pathfinding_cache[(start,initial_goal)] = (gr - sr, gc - sc) - return (gr - sr, gc - sc) + cutoff = CUTOFF + tiles = realm_map.tiles + if start == goal: + return (0, 0) + if (start, goal) in realm_map.pathfinding_cache: + return realm_map.pathfinding_cache[(start, goal)] + initial_goal = goal + pq = [(0, start)] + + backtrace = {} + cost = {start: 0} + + closestPos = start + closestHeuristic = l1(start, goal) + closestCost = closestHeuristic + + while pq: + # Use approximate solution if budget exhausted + cutoff -= 1 + if cutoff <= 0: + if goal not in backtrace: + goal = closestPos + break + + priority, cur = heapq.heappop(pq) + + if cur == goal: + break + + for nxt in adjacentPos(cur): + if not in_bounds(*nxt, tiles.shape): + continue + + newCost = cost[cur] + 1 + if nxt not in cost or newCost < cost[nxt]: + cost[nxt] = newCost + heuristic = lInfty(goal, nxt) + priority = newCost + heuristic + + # Compute approximate solution + if heuristic < closestHeuristic or ( + heuristic == closestHeuristic and priority < closestCost): + closestPos = nxt + closestHeuristic = heuristic + closestCost = priority + + heapq.heappush(pq, (priority, nxt)) + backtrace[nxt] = cur + + while goal in backtrace and backtrace[goal] != start: + gr, gc = goal + goal = backtrace[goal] + sr, sc = goal + realm_map.pathfinding_cache[(goal, initial_goal)] = (gr - sr, gc - sc) + + sr, sc = start + gr, gc = goal + realm_map.pathfinding_cache[(start, initial_goal)] = (gr - sr, gc - sc) + return (gr - sr, gc - sc) # End A* # Adjacency functions -def adjacentTiles(tiles, ent): - r, c = ent.pos - - def adjacentDeltas(): - return [(-1, 0), (1, 0), (0, 1), (0, -1)] + return [(-1, 0), (1, 0), (0, 1), (0, -1)] def l1Deltas(s): - rets = [] - for r in range(-s, s + 1): - for c in range(-s, s + 1): - rets.append((r, c)) - return rets + rets = [] + for r in range(-s, s + 1): + for c in range(-s, s + 1): + rets.append((r, c)) + return rets def posSum(pos1, pos2): - return pos1[0] + pos2[0], pos1[1] + pos2[1] + return pos1[0] + pos2[0], pos1[1] + pos2[1] def adjacentEmptyPos(env, pos): - return [p for p in adjacentPos(pos) - if in_bounds(*p, env.size)] + return [p for p in adjacentPos(pos) + if in_bounds(*p, env.size)] def adjacentTiles(env, pos): - return [env.tiles[p] for p in adjacentPos(pos) - if in_bounds(*p, env.size)] + return [env.tiles[p] for p in adjacentPos(pos) + if in_bounds(*p, env.size)] def adjacentMats(tiles, pos): - return [type(tiles[p].state) for p in adjacentPos(pos) - if in_bounds(*p, tiles.shape)] + return [type(tiles[p].state) for p in adjacentPos(pos) + if in_bounds(*p, tiles.shape)] def adjacencyDelMatPairs(env, pos): - return zip(adjacentDeltas(), adjacentMats(env.tiles, pos)) -###End### + return zip(adjacentDeltas(), adjacentMats(env.tiles, pos)) +### End### diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 251b0138a..0177b6d79 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -2,16 +2,16 @@ from typing import Dict, Iterable, Tuple, MutableMapping, Set from dataclasses import dataclass from copy import deepcopy +from collections import defaultdict + from abc import ABC, abstractmethod import functools - import numpy as np from nmmo.core.config import Config from nmmo.core.realm import Realm from nmmo.core.observation import Observation from nmmo.task.group import Group - from nmmo.entity.entity import EntityState from nmmo.lib.event_log import EventState, ATTACK_COL_MAP, ITEM_COL_MAP, LEVEL_COL_MAP from nmmo.lib.log import EventCode @@ -36,23 +36,40 @@ class GameState: env_obs: Dict[int, Observation] # env passes the obs of only alive agents entity_data: np.ndarray # a copied, whole Entity ds table + entity_index: Dict[int, Iterable] # precomputed index for where_in_1d item_data: np.ndarray # a copied, whole Item ds table + item_index: Dict[int, Iterable] event_data: np.ndarray # a copied, whole Event log table + event_index: Dict[int, Iterable] cache_result: MutableMapping # cache for general memoization - # Helper Functions - def where_in_1d(self, data_type, subject: Iterable[int]): - assert data_type in ['entity', 'item', 'event'], 'data_type must be in entity, item, event' + # add helper functions below + @functools.lru_cache + def entity_or_none(self, ent_id): + flt_ent = self.entity_data[:, EntityAttr['id']] == ent_id + if np.any(flt_ent): + return EntityState.parse_array(self.entity_data[flt_ent][0]) + return None + + def where_in_id(self, data_type, subject: Iterable[int]): + k = (data_type, subject) + if k in self.cache_result: + return self.cache_result[k] + if data_type == 'entity': - flt_idx = np.isin(self.entity_data[:, EntityAttr["id"]], subject).nonzero()[0] - return self.entity_data[flt_idx] + # flt_idx = [self.entity_index[sbj] for sbj in subject if sbj in self.entity_index] + flt_idx = [row for sbj in subject for row in self.entity_index.get(sbj,[])] + self.cache_result[k] = self.entity_data[flt_idx] if data_type == 'item': - flt_idx = np.isin(self.item_data[:, ItemAttr["owner_id"]], subject).nonzero()[0] - return self.item_data[flt_idx] + flt_idx = [row for sbj in subject for row in self.item_index.get(sbj,[])] + self.cache_result[k] = self.item_data[flt_idx] if data_type == 'event': - flt_idx = np.isin(self.event_data[:, EventAttr["ent_id"]], subject).nonzero()[0] - return self.event_data[flt_idx] + flt_idx = [row for sbj in subject for row in self.event_index.get(sbj,[])] + self.cache_result[k] = self.event_data[flt_idx] + if data_type in ['entity', 'item', 'event']: + return self.cache_result[k] + raise ValueError("data_type must be in entity, item, event") def get_subject_view(self, subject: Group): @@ -151,7 +168,7 @@ def __init__(self, gs: GameState, subject: Group): @functools.cached_property def _sbj_ent(self): - return self._gs.where_in_1d('entity', self._subject.agents) + return self._gs.where_in_id('entity', self._subject.agents) @functools.cached_property def entity(self): @@ -159,7 +176,7 @@ def entity(self): @functools.cached_property def _sbj_item(self): - return self._gs.where_in_1d('item', self._subject.agents) + return self._gs.where_in_id('item', self._subject.agents) @functools.cached_property def item(self): @@ -167,7 +184,7 @@ def item(self): @functools.cached_property def _sbj_event(self): - return self._gs.where_in_1d('event', self._subject.agents) + return self._gs.where_in_id('event', self._subject.agents) @functools.cached_property def event(self): @@ -209,7 +226,6 @@ def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: alive_agents = set(alive_agents[alive_agents > 0]) item_data = ItemState.Query.table(realm.datastore).copy() event_data = EventState.Query.table(realm.datastore).copy() - return GameState( current_tick = realm.tick, config = self.config, @@ -217,7 +233,19 @@ def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: alive_agents = alive_agents, env_obs = env_obs, entity_data = entity_all, + entity_index = precompute_index(entity_all, ItemAttr['owner_id']), item_data = item_data, + item_index = precompute_index(item_data, EntityAttr["id"]), event_data = event_data, + event_index = precompute_index(event_data, EventAttr['ent_id']), cache_result = {} ) + +def precompute_index(table, id_col): + index = defaultdict() + for row, id_ in enumerate(table[:,id_col]): + if id_ in index: + index[id_].append(row) + else: + index[id_] = [row] + return index diff --git a/tests/test_optimization.py b/tests/test_optimization.py deleted file mode 100644 index bd365b319..000000000 --- a/tests/test_optimization.py +++ /dev/null @@ -1,37 +0,0 @@ -# Test invariants assumed for certain optimizations - -import unittest - -import copy -import nmmo -from scripted.baselines import Random - -def rollout(): - config = nmmo.config.Default() - config.PLAYERS = [Random] - env = nmmo.Env(config) - env.reset() - start = copy.deepcopy(env.realm) - for _ in range(64): - env.step({}) - end = copy.deepcopy(env.realm) - return (start, end) - -class TestOptimization(unittest.TestCase): - - def test_passability_immutable(self): - # Used in optimization that caches the result of A* - start, end = rollout() - start_passable = [tile.impassible for tile in start.map.tiles.flatten()] - end_passable = [tile.impassible for tile in end.map.tiles.flatten()] - self.assertListEqual(start_passable, end_passable) - - def test_habitability_immutable(self): - # Used in optimization with habitability lookup table - start, end = rollout() - start_habitable = [tile.habitable for tile in start.map.tiles.flatten()] - end_habitable = [tile.habitable for tile in end.map.tiles.flatten()] - self.assertListEqual(start_habitable, end_habitable) - -if __name__ == '__main__': - unittest.main() From 848492fcbf27cfc46bb4d7c904175a80713e46ce Mon Sep 17 00:00:00 2001 From: Hao Xiang Li Date: Sun, 2 Jul 2023 19:53:28 +0000 Subject: [PATCH 048/113] Entity data bug fix --- nmmo/task/game_state.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 0177b6d79..043af1ff7 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -58,7 +58,6 @@ def where_in_id(self, data_type, subject: Iterable[int]): return self.cache_result[k] if data_type == 'entity': - # flt_idx = [self.entity_index[sbj] for sbj in subject if sbj in self.entity_index] flt_idx = [row for sbj in subject for row in self.entity_index.get(sbj,[])] self.cache_result[k] = self.entity_data[flt_idx] if data_type == 'item': @@ -233,9 +232,9 @@ def generate(self, realm: Realm, env_obs: Dict[int, Observation]) -> GameState: alive_agents = alive_agents, env_obs = env_obs, entity_data = entity_all, - entity_index = precompute_index(entity_all, ItemAttr['owner_id']), + entity_index = precompute_index(entity_all, EntityAttr["id"]), item_data = item_data, - item_index = precompute_index(item_data, EntityAttr["id"]), + item_index = precompute_index(item_data, ItemAttr["owner_id"]), event_data = event_data, event_index = precompute_index(event_data, EventAttr['ent_id']), cache_result = {} From 5623af56c52f99bff77f4cdbfd6cbe120107cf71 Mon Sep 17 00:00:00 2001 From: Hao Xiang Li Date: Sun, 2 Jul 2023 21:21:48 +0000 Subject: [PATCH 049/113] Minor task system hash updates --- nmmo/task/game_state.py | 25 +++++++++------ nmmo/task/group.py | 4 ++- nmmo/task/predicate_api.py | 8 ++--- tests/core/test_immutable_tile_property.py | 37 ++++++++++++++++++++++ tests/test_performance.py | 16 ++++++++-- 5 files changed, 72 insertions(+), 18 deletions(-) create mode 100644 tests/core/test_immutable_tile_property.py diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 043af1ff7..214f5485f 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -19,6 +19,7 @@ from nmmo.core.tile import TileState EntityAttr = EntityState.State.attr_name_to_col +EntityAttrKeys = EntityAttr.keys() EventAttr = EventState.State.attr_name_to_col ItemAttr = ItemState.State.attr_name_to_col TileAttr = TileState.State.attr_name_to_col @@ -86,7 +87,9 @@ def __init__(self, self._name = name self._gs = gs self._subject = subject + self._hash = hash(subject) ^ hash(name) self._arr = arr + self._cache = self._gs.cache_result def __len__(self): return len(self._arr) @@ -96,11 +99,11 @@ def get_attribute(self, attr) -> np.ndarray: raise NotImplementedError def __getattr__(self, attr) -> np.ndarray: - k = (self._subject, self._name+'_'+attr) - if k in self._gs.cache_result: - return self._gs.cache_result[k] + k = (self._hash, attr) + if k in self._cache: + return self._cache[k] v = object.__getattribute__(self, 'get_attribute')(attr) - self._gs.cache_result[k] = v + self._cache[k] = v return v class ItemView(ArrayView): @@ -163,6 +166,7 @@ class GroupView: def __init__(self, gs: GameState, subject: Group): self._gs = gs self._subject = subject + self._subject_hash = hash(subject) self.obs = GroupObsView(gs, subject) @functools.cached_property @@ -190,21 +194,22 @@ def event(self): return EventView(self._gs, self._subject, self._sbj_event) def __getattribute__(self, attr): - if attr in ['_gs','_subject','_sbj_ent','_sbj_item','entity','item','event','obs']: + if attr in {'_gs','_subject','_sbj_ent','_sbj_item','entity','item','event','obs', '_subject_hash'}: return object.__getattribute__(self, attr) # Cached optimization - k = (self._subject, attr) - if k in self._gs.cache_result: - return self._gs.cache_result[k] + k = (self._subject_hash, attr) + cache = self._gs.cache_result + if k in cache: + return cache[k] try: # Get property - if attr in EntityAttr.keys(): + if attr in EntityAttrKeys: v = getattr(self.entity, attr) else: v = object.__getattribute__(self, attr) - self._gs.cache_result[k] = v + cache[k] = v return v except AttributeError: # View behavior diff --git a/nmmo/task/group.py b/nmmo/task/group.py index 442778c18..39020f99b 100644 --- a/nmmo/task/group.py +++ b/nmmo/task/group.py @@ -25,6 +25,8 @@ def __init__(self, self._sd: GroupView = None self._gs: GameState = None + self._hash = hash(self._agents) + @property def agents(self): return self._agents @@ -42,7 +44,7 @@ def __len__(self): return len(self._agents) def __hash__(self): - return hash(self._agents) + return self._hash def __getitem__(self, key): if len(self) == 1 and key == 0: diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index 4882448f0..dd74fb5ba 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -53,12 +53,12 @@ def __call__(self, gs: GameState) -> float: for group in self._groups: group.update(gs) # Calculate score - # cache = gs.cache_result - if self.name in gs.cache_result: - progress = gs.cache_result[self.name] + cache = gs.cache_result + if self.name in cache: + progress = cache[self.name] else: progress = max(min(self._evaluate(gs)*1.0,1.0),0.0) - gs.cache_result[self.name] = progress + cache[self.name] = progress return progress def _reset(self, config: Config): diff --git a/tests/core/test_immutable_tile_property.py b/tests/core/test_immutable_tile_property.py new file mode 100644 index 000000000..6d8c56da7 --- /dev/null +++ b/tests/core/test_immutable_tile_property.py @@ -0,0 +1,37 @@ +# Test immutable invariants assumed for certain optimizations + +import unittest + +import copy +import nmmo +from scripted.baselines import Random + +def rollout(): + config = nmmo.config.Default() + config.PLAYERS = [Random] + env = nmmo.Env(config) + env.reset() + start = copy.deepcopy(env.realm) + for _ in range(64): + env.step({}) + end = copy.deepcopy(env.realm) + return (start, end) + +class TestImmutableTileProperty(unittest.TestCase): + + def test_passability_immutable(self): + # Used in optimization that caches the result of A* + start, end = rollout() + start_passable = [tile.impassible for tile in start.map.tiles.flatten()] + end_passable = [tile.impassible for tile in end.map.tiles.flatten()] + self.assertListEqual(start_passable, end_passable) + + def test_habitability_immutable(self): + # Used in optimization with habitability lookup table + start, end = rollout() + start_habitable = [tile.habitable for tile in start.map.tiles.flatten()] + end_habitable = [tile.habitable for tile in end.map.tiles.flatten()] + self.assertListEqual(start_habitable, end_habitable) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_performance.py b/tests/test_performance.py index c245f9478..92fb950ff 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -7,6 +7,10 @@ from nmmo.core.config import (NPC, AllGameSystems, Combat, Communication, Equipment, Exchange, Item, Medium, Profession, Progression, Resource, Small, Terrain) +from nmmo.task.task_api import nmmo_default_task, make_same_task +from nmmo.task.base_predicates import CountEvent, FullyArmed +from nmmo.systems.skill import Melee +from tests.testhelpers import profile_env_step from scripted import baselines @@ -112,17 +116,23 @@ def set_seed_test(): env = nmmo.Env(conf) - # start = time.time() env.reset(seed=random_seed) for _ in range(1024): env.step({}) - # print(f"Total time {time.time()-start}") + +def set_seed_test_complex(): + tasks = nmmo_default_task(range(128)) + tasks += make_same_task(CountEvent, range(128), + pred_kwargs={'event': 'EAT_FOOD', 'N': 10}) + tasks += make_same_task(FullyArmed, range(128), + pred_kwargs={'combat_style': Melee, 'level': 3, 'num_agent': 1}) + profile_env_step(tasks=tasks) if __name__ == '__main__': with open('profile.run','a', encoding="utf-8") as f: pr = cProfile.Profile() pr.enable() - set_seed_test() + set_seed_test_complex() pr.disable() s = io.StringIO() ps = pstats.Stats(pr,stream=s).sort_stats('tottime') From 76f3e2f4982a5fa87a0fd2a9fab7468a78de3fae Mon Sep 17 00:00:00 2001 From: Hao Xiang Li Date: Sun, 2 Jul 2023 21:44:15 +0000 Subject: [PATCH 050/113] Pylinted --- nmmo/task/game_state.py | 3 ++- tests/test_performance.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 214f5485f..753b023cb 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -194,7 +194,8 @@ def event(self): return EventView(self._gs, self._subject, self._sbj_event) def __getattribute__(self, attr): - if attr in {'_gs','_subject','_sbj_ent','_sbj_item','entity','item','event','obs', '_subject_hash'}: + if attr in {'_gs','_subject','_sbj_ent','_sbj_item', + 'entity','item','event','obs', '_subject_hash'}: return object.__getattribute__(self, attr) # Cached optimization diff --git a/tests/test_performance.py b/tests/test_performance.py index 92fb950ff..ce9051a20 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -126,7 +126,7 @@ def set_seed_test_complex(): pred_kwargs={'event': 'EAT_FOOD', 'N': 10}) tasks += make_same_task(FullyArmed, range(128), pred_kwargs={'combat_style': Melee, 'level': 3, 'num_agent': 1}) - profile_env_step(tasks=tasks) + profile_env_step(tasks=tasks) if __name__ == '__main__': with open('profile.run','a', encoding="utf-8") as f: From 398235e89818e422624bdd7ccf573d6c2f4767cd Mon Sep 17 00:00:00 2001 From: Hao Xiang Li Date: Mon, 3 Jul 2023 20:36:18 +0000 Subject: [PATCH 051/113] Removed random_direction and random_safe --- nmmo/systems/ai/behavior.py | 5 +---- nmmo/systems/ai/move.py | 19 ------------------- 2 files changed, 1 insertion(+), 23 deletions(-) diff --git a/nmmo/systems/ai/behavior.py b/nmmo/systems/ai/behavior.py index 839a4d48c..81075415b 100644 --- a/nmmo/systems/ai/behavior.py +++ b/nmmo/systems/ai/behavior.py @@ -5,7 +5,6 @@ import nmmo from nmmo.systems.ai import move, utils - def update(entity): '''Update validity of tracked entities''' if not utils.validTarget(entity, entity.attacker, entity.vision): @@ -51,13 +50,11 @@ def meander(realm, actions, entity): actions[nmmo.action.Move] = { nmmo.action.Direction: move.habitable(realm.map, entity, realm._np_random)} - def evade(realm, actions, entity): actions[nmmo.action.Move] = { nmmo.action.Direction: move.antipathfind(realm.map, entity, entity.attacker, realm._np_random)} - def hunt(realm, actions, entity): # Move args distance = utils.lInfty(entity.pos, entity.target.pos) @@ -69,7 +66,7 @@ def hunt(realm, actions, entity): realm._np_random)} elif distance == 0: actions[nmmo.action.Move] = { - nmmo.action.Direction: move.random_direction(realm._np_random)} + nmmo.action.Direction: move.habitable(realm.map, entity, realm._np_random)} attack(realm, actions, entity) diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py index df1177a04..0e8c4831d 100644 --- a/nmmo/systems/ai/move.py +++ b/nmmo/systems/ai/move.py @@ -2,25 +2,6 @@ from nmmo.core import action from nmmo.systems.ai import utils -# pylint: disable=unsubscriptable-object -def random_direction(np_random): - return action.Direction.edges[np_random.integers(0,len(action.Direction.edges))] - -def random_safe(realm_map, ent, np_random): - r, c = ent.pos - tiles = realm_map.tiles - cands = [] - if not tiles[r-1, c].void: - cands.append(action.North) - if not tiles[r+1, c].void: - cands.append(action.South) - if not tiles[r, c-1].void: - cands.append(action.West) - if not tiles[r, c+1].void: - cands.append(action.East) - - return np_random.choice(cands) - def habitable(realm_map, ent, np_random): r, c = ent.pos tiles = realm_map.habitable_tiles From 7c5da36cbcf8b574769b9effff61945349561416 Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 4 Jul 2023 14:14:07 +0900 Subject: [PATCH 052/113] added get_direction() to np_random for speed --- nmmo/lib/seeding.py | 16 +++++++++++++--- tests/test_determinism.py | 19 +++++++++++++++---- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/nmmo/lib/seeding.py b/nmmo/lib/seeding.py index 5cd6e1acb..69410e8b5 100644 --- a/nmmo/lib/seeding.py +++ b/nmmo/lib/seeding.py @@ -8,6 +8,19 @@ from gym import error +class RandomNumberGenerator(np.random.Generator): + def __init__(self, *kwargs): + super().__init__(*kwargs) + self._dir_seq_len = 1024 + self._wrap = self._dir_seq_len - 1 + self._dir_seq = self.integers(0, 4, size=self._dir_seq_len).astype(np.uint8) + self._dir_idx = 0 + + # provide a random direction from the pre-generated sequence + def get_direction(self): + self._dir_idx = (self._dir_idx + 1) & self._wrap + return self._dir_seq[self._dir_idx] + def np_random(seed: Optional[int] = None) -> Tuple[np.random.Generator, Any]: """Generates a random number generator from the seed and returns the Generator and seed. @@ -27,6 +40,3 @@ def np_random(seed: Optional[int] = None) -> Tuple[np.random.Generator, Any]: np_seed = seed_seq.entropy rng = RandomNumberGenerator(np.random.PCG64(seed_seq)) return rng, np_seed - - -RNG = RandomNumberGenerator = np.random.Generator diff --git a/tests/test_determinism.py b/tests/test_determinism.py index bacc46ee3..597b167c0 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -1,4 +1,5 @@ import unittest +from timeit import timeit import numpy as np from tqdm import tqdm @@ -13,10 +14,20 @@ class TestDeterminism(unittest.TestCase): - def test_gym_np_random(self): - _, _np_seed_1 = seeding.np_random(RANDOM_SEED) - _, _np_seed_2 = seeding.np_random(RANDOM_SEED) - self.assertEqual(_np_seed_1, _np_seed_2) + def test_np_random_get_direction(self): + # pylint: disable=protected-access,bad-builtin,unnecessary-lambda + np_random_1, np_seed_1 = seeding.np_random(RANDOM_SEED) + np_random_2, np_seed_2 = seeding.np_random(RANDOM_SEED) + self.assertEqual(np_seed_1, np_seed_2) + + # also test get_direction, which was added for speed optimization + self.assertTrue(np.array_equal(np_random_1._dir_seq, np_random_2._dir_seq)) + + print('---test_np_random_get_direction---') + print('np_random.integers():', timeit(lambda: np_random_1.integers(0,4), + number=100000, globals=globals())) + print('np_random.get_directions():', timeit(lambda: np_random_1.get_direction(), + number=100000, globals=globals())) def test_map_determinism(self): config = nmmo.config.Default() From 6bc4511f278a471476f021c4aa4b64c9948b36e3 Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 4 Jul 2023 14:26:08 +0900 Subject: [PATCH 053/113] specified bit_generator --- nmmo/lib/seeding.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nmmo/lib/seeding.py b/nmmo/lib/seeding.py index 69410e8b5..9cbc7a8c7 100644 --- a/nmmo/lib/seeding.py +++ b/nmmo/lib/seeding.py @@ -9,8 +9,8 @@ class RandomNumberGenerator(np.random.Generator): - def __init__(self, *kwargs): - super().__init__(*kwargs) + def __init__(self, bit_generator): + super().__init__(bit_generator) self._dir_seq_len = 1024 self._wrap = self._dir_seq_len - 1 self._dir_seq = self.integers(0, 4, size=self._dir_seq_len).astype(np.uint8) From 45885f07f394298771dc1e52154062307f9b5197 Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 4 Jul 2023 14:53:46 +0900 Subject: [PATCH 054/113] few fixes --- nmmo/lib/seeding.py | 2 +- tests/test_determinism.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nmmo/lib/seeding.py b/nmmo/lib/seeding.py index 9cbc7a8c7..1e75c066c 100644 --- a/nmmo/lib/seeding.py +++ b/nmmo/lib/seeding.py @@ -13,7 +13,7 @@ def __init__(self, bit_generator): super().__init__(bit_generator) self._dir_seq_len = 1024 self._wrap = self._dir_seq_len - 1 - self._dir_seq = self.integers(0, 4, size=self._dir_seq_len).astype(np.uint8) + self._dir_seq = list(self.integers(0, 4, size=self._dir_seq_len)) self._dir_idx = 0 # provide a random direction from the pre-generated sequence diff --git a/tests/test_determinism.py b/tests/test_determinism.py index 597b167c0..e84b0bc4e 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -26,7 +26,7 @@ def test_np_random_get_direction(self): print('---test_np_random_get_direction---') print('np_random.integers():', timeit(lambda: np_random_1.integers(0,4), number=100000, globals=globals())) - print('np_random.get_directions():', timeit(lambda: np_random_1.get_direction(), + print('np_random.get_direction():', timeit(lambda: np_random_1.get_direction(), number=100000, globals=globals())) def test_map_determinism(self): From 8541755c36a7d841d844ddb066051f6740fa1eb6 Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 4 Jul 2023 17:40:02 +0900 Subject: [PATCH 055/113] add task embedding to gym obs --- nmmo/core/config.py | 11 +++++------ nmmo/core/env.py | 33 ++++++++++++++++++++++++--------- nmmo/core/observation.py | 3 +++ nmmo/task/task_api.py | 3 +++ tests/task/test_task_api.py | 5 ++++- 5 files changed, 39 insertions(+), 16 deletions(-) diff --git a/nmmo/core/config.py b/nmmo/core/config.py index cb485c95b..18e1e2738 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -144,13 +144,17 @@ def __init__(self): def game_system_enabled(self, name) -> bool: return hasattr(self, name) - PROVIDE_ACTION_TARGETS = True '''Flag used to provide action targets mask''' PLAYERS = [Agent] '''Player classes from which to spawn''' + HORIZON = 1024 + '''Number of steps before the environment resets''' + + TASK_EMBED_DIM = 1024 + '''Dimensionality of task embeddings''' ############################################################################ ### Population Parameters @@ -198,11 +202,6 @@ def PLAYER_VISION_DIAMETER(self): PLAYER_DEATH_FOG = None '''How long before spawning death fog. None for no death fog''' - ############################################################################ - ### Map Parameters - HORIZON = 1024 - '''Number of steps before the environment resets''' - ############################################################################ ### Agent Parameters diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 208fa6094..0618927a9 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -50,6 +50,7 @@ def __init__(self, # Default task: rewards 1 each turn agent is alive self.tasks = task_api.nmmo_default_task(self.possible_agents) self.agent_task_map = None + self._dummy_task_embedding = np.zeros(self.config.TASK_EMBED_DIM, dtype=np.float32) @functools.cached_property def _obs_space(self): @@ -65,7 +66,9 @@ def mask_box(length): "CurrentTick": gym.spaces.Discrete(self.config.HORIZON+1), "AgentId": gym.spaces.Discrete(self.config.PLAYER_N+1), "Tile": box(self.config.MAP_N_OBS, Tile.State.num_attributes), - "Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes)} + "Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes), + "Task": gym.spaces.Box(low=-np.inf, high=-np.inf, shape=(self.config.TASK_EMBED_DIM,)), + } if self.config.ITEM_SYSTEM_ENABLED: obs_space["Inventory"] = box(self.config.INVENTORY_N_OBS, Item.State.num_attributes) @@ -187,10 +190,6 @@ def reset(self, map_id=None, seed=None, options=None, self.scripted_agents.add(eid) ent.agent.set_rng(self._np_random) - self._dummy_obs = self._make_dummy_obs() - self.obs = self._compute_observations() - self._gamestate_generator = GameStateGenerator(self.realm, self.config) - if make_task_fn is not None: self.tasks = make_task_fn() else: @@ -198,6 +197,10 @@ def reset(self, map_id=None, seed=None, options=None, task.reset() self.agent_task_map = self._map_task_to_agent() + self._dummy_obs = self._make_dummy_obs() + self.obs = self._compute_observations() + self._gamestate_generator = GameStateGenerator(self.realm, self.config) + self._reset_required = False return {a: o.to_gym() for a,o in self.obs.items()} @@ -205,8 +208,16 @@ def reset(self, map_id=None, seed=None, options=None, def _map_task_to_agent(self): agent_task_map: Dict[int, List[task_api.Task]] = {} for task in self.tasks: + if task.embedding is None: + task.set_embedding(self._dummy_task_embedding) + # validate task embedding + assert self._obs_space['Task'].contains(task.embedding), "Task embedding is not valid" + + # map task to agents for agent_id in task.assignee: if agent_id in agent_task_map: + # NOTE: only the first task is used for traning + # but all tasks are used for calculate the reward. Is this correct behavior? agent_task_map[agent_id].append(task) else: agent_task_map[agent_id] = [task] @@ -404,7 +415,7 @@ def _make_dummy_obs(self): dummy_entities = np.zeros((0, len(Entity.State.attr_name_to_col)), dtype=np.int16) dummy_inventory = np.zeros((0, len(Item.State.attr_name_to_col)), dtype=np.int16) dummy_market = np.zeros((0, len(Item.State.attr_name_to_col)), dtype=np.int16) - return Observation(self.config, self.realm.tick, 0, + return Observation(self.config, self.realm.tick, 0, self._dummy_task_embedding, dummy_tiles, dummy_entities, dummy_inventory, dummy_market) def _compute_observations(self): @@ -440,9 +451,13 @@ def _compute_observations(self): # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are # available in each task instance, via task.embedding - # CHECK ME: do we pass in self.agent_task_map[agent_id], - # so that we can include task embedding in the obs? - obs[agent_id] = Observation(self.config, self.realm.tick, agent_id, + # For now, only the first tasks' embedding is passed in + # TODO: can the embeddings of multiple tasks be superposed while preserving the + # task-specific information? This needs research + task_embedding = self._dummy_task_embedding + if agent_id in self.agent_task_map: + task_embedding = self.agent_task_map[agent_id][0].embedding # NOTE: first task only + obs[agent_id] = Observation(self.config, self.realm.tick, agent_id, task_embedding, visible_tiles, visible_entities, inventory, market) return obs diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 82d9660b2..1d8a4a43f 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -42,6 +42,7 @@ def __init__(self, config, current_tick: int, agent_id: int, + task_embedding, tiles, entities, inventory, @@ -50,6 +51,7 @@ def __init__(self, self.config = config self.current_tick = current_tick self.agent_id = agent_id + self.task_embedding = task_embedding self.tiles = tiles[0:config.MAP_N_OBS] self.entities = BasicObs(entities[0:config.PLAYER_N_OBS], @@ -117,6 +119,7 @@ def get_empty_obs(self): gym_obs = { "CurrentTick": self.current_tick, "AgentId": self.agent_id, + "Task": self.task_embedding, "Tile": None, # np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1])), "Entity": np.zeros((self.config.PLAYER_N_OBS, self.entities.values.shape[1]), dtype=np.int16)} diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index d689d32fd..17dcd51a6 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -51,6 +51,9 @@ def reward_multiplier(self) -> float: def embedding(self): return self._embedding + def set_embedding(self, embedding): + self._embedding = embedding + def _map_progress_to_reward(self, gs) -> float: """ The default reward is the diff between the old and new progress. Once the task is completed, no more reward is provided. diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index c8b4df203..a714f6efc 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -277,7 +277,7 @@ def PracticeFormation(gs, subject, dist, num_tick): self.assertEqual(infos[1]['task'][env.tasks[0].name]['completed'], True) # test the task_spec_with_embedding - task_embedding = np.array([1,2,3]) + task_embedding = np.ones(config.TASK_EMBED_DIM, dtype=np.float32) task_spec_with_embedding = ('team', PracticeFormation, {'dist': 1, 'num_tick': goal_tick}, {'embedding': task_embedding}) env.reset(make_task_fn=lambda: make_team_tasks(teams, [task_spec_with_embedding])) @@ -296,6 +296,9 @@ def PracticeFormation(gs, subject, dist, num_tick): self.assertEqual(task.assignee, tuple(teams[0])) self.assertTrue(np.array_equal(task.embedding, task_embedding)) + obs_spec = env.observation_space(1) + self.assertTrue(obs_spec['Task'].contains(task.embedding)) + def test_completed_tasks_in_info(self): # pylint: disable=no-value-for-parameter,no-member config = ScriptedAgentTestConfig() From e3a767ee34611fa462e18311164bb18436341004 Mon Sep 17 00:00:00 2001 From: Hao Xiang Li Date: Tue, 4 Jul 2023 11:10:40 +0000 Subject: [PATCH 056/113] Updated habitable. --- nmmo/systems/ai/move.py | 52 +++++++++++------------------------------ 1 file changed, 13 insertions(+), 39 deletions(-) diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py index 0e8c4831d..ea6ead33a 100644 --- a/nmmo/systems/ai/move.py +++ b/nmmo/systems/ai/move.py @@ -2,46 +2,20 @@ from nmmo.core import action from nmmo.systems.ai import utils +DIRECTIONS = [ # row delta, col delta, action + (-1, 0, action.North), + (1, 0, action.South), + (0, -1, action.West), + (0, 1, action.East)] * 2 + def habitable(realm_map, ent, np_random): - r, c = ent.pos - tiles = realm_map.habitable_tiles - direction = np_random.integers(0,4) - if direction == 0: - if tiles[r-1, c]: - return action.North - if tiles[r+1, c]: - return action.South - if tiles[r, c-1]: - return action.West - if tiles[r, c+1]: - return action.East - elif direction == 1: - if tiles[r+1, c]: - return action.South - if tiles[r, c-1]: - return action.West - if tiles[r, c+1]: - return action.East - if tiles[r-1, c]: - return action.North - elif direction == 2: - if tiles[r, c-1]: - return action.West - if tiles[r, c+1]: - return action.East - if tiles[r-1, c]: - return action.North - if tiles[r+1, c]: - return action.South - else: - if tiles[r, c+1]: - return action.East - if tiles[r-1, c]: - return action.North - if tiles[r+1, c]: - return action.South - if tiles[r, c-1]: - return action.West + r, c = ent.pos + is_habitable = realm_map.habitable_tiles + start = np_random.integers(4) + for i in range(4): + dr, dc, act = DIRECTIONS[start + i] + if is_habitable[r + dr, c + dc]: + return act return action.North From 025a7f90de03e9c3cf920cf672f2fddcc35c8d5a Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 4 Jul 2023 21:54:43 +0900 Subject: [PATCH 057/113] use np_random.get_direction(), added perf test --- nmmo/systems/ai/move.py | 2 +- tests/core/test_observation_tile.py | 32 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/nmmo/systems/ai/move.py b/nmmo/systems/ai/move.py index ea6ead33a..d2d398f2b 100644 --- a/nmmo/systems/ai/move.py +++ b/nmmo/systems/ai/move.py @@ -11,7 +11,7 @@ def habitable(realm_map, ent, np_random): r, c = ent.pos is_habitable = realm_map.habitable_tiles - start = np_random.integers(4) + start = np_random.get_direction() for i in range(4): dr, dc, act = DIRECTIONS[start + i] if is_habitable[r + dr, c + dc]: diff --git a/tests/core/test_observation_tile.py b/tests/core/test_observation_tile.py index 828d580c4..00d519fb8 100644 --- a/tests/core/test_observation_tile.py +++ b/tests/core/test_observation_tile.py @@ -179,6 +179,38 @@ def sort_event_data(event_data): lambda: where_in_1d_with_index(event_data, [1, 2, 3], event_index), number=1000, globals=globals())) + def test_habitable(self): + from nmmo.systems.ai.move import habitable as habitable_impl + realm_map = self.env.realm.map + realm_tiles= self.env.realm.map.tiles + ent = self.env.realm.npcs[-1] + np_random = self.env._np_random + + def habitable_ref(tiles, ent, np_random): + r, c = ent.pos + cands = [] + if tiles[r-1, c].habitable: + cands.append(Action.North) + if tiles[r+1, c].habitable: + cands.append(Action.South) + if tiles[r, c-1].habitable: + cands.append(Action.West) + if tiles[r, c+1].habitable: + cands.append(Action.East) + + if len(cands) == 0: + return Action.North + + return np_random.choice(cands) + + print('---test_habitable---') + print('reference:', timeit( + lambda: habitable_ref(realm_tiles, ent, np_random), + number=1000, globals=globals())) + print('habitable_impl:', timeit( + lambda: habitable_impl(realm_map, ent, np_random), + number=1000, globals=globals())) + if __name__ == '__main__': unittest.main() From cd2b3e10112b7ba0066730fd39f13e6d2f87c057 Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 13 Jul 2023 08:01:15 +0900 Subject: [PATCH 058/113] assert single task per agent --- nmmo/core/config.py | 3 +++ nmmo/core/env.py | 10 +++++++--- tests/task/test_demo_task_creation.py | 1 + tests/task/test_predicates.py | 1 + tests/task/test_task_api.py | 1 + 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/nmmo/core/config.py b/nmmo/core/config.py index 18e1e2738..b1d2064e4 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -156,6 +156,9 @@ def game_system_enabled(self, name) -> bool: TASK_EMBED_DIM = 1024 '''Dimensionality of task embeddings''' + ALLOW_MULTI_TASKS_PER_AGENT = False + '''Whether to allow multiple tasks per agent''' + ############################################################################ ### Population Parameters LOG_VERBOSE = False diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 0618927a9..7e6a02d00 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -216,11 +216,15 @@ def _map_task_to_agent(self): # map task to agents for agent_id in task.assignee: if agent_id in agent_task_map: - # NOTE: only the first task is used for traning - # but all tasks are used for calculate the reward. Is this correct behavior? agent_task_map[agent_id].append(task) else: agent_task_map[agent_id] = [task] + + # for now we only support one task per agent + if self.config.ALLOW_MULTI_TASKS_PER_AGENT is False: + for agent_tasks in agent_task_map.values(): + assert len(agent_tasks) == 1, "Only one task per agent is supported" + return agent_task_map def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): @@ -451,7 +455,7 @@ def _compute_observations(self): # NOTE: the tasks for each agent is in self.agent_task_map, and task embeddings are # available in each task instance, via task.embedding - # For now, only the first tasks' embedding is passed in + # For now, each agent is assigned to a single task, so we just use the first task # TODO: can the embeddings of multiple tasks be superposed while preserving the # task-specific information? This needs research task_embedding = self._dummy_task_embedding diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index 3d43fe4e2..3b25ee84f 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -83,6 +83,7 @@ def ForageSkill(gs, subject, lvl): # Test rollout config = ScriptedAgentTestConfig() + config.ALLOW_MULTI_TASKS_PER_AGENT = True env = Env(config) # Creating and testing "team" tasks diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index 749bf626a..90bff5a92 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -36,6 +36,7 @@ def _get_taskenv(self, config.PLAYERS = [Sleeper] config.PLAYER_N = NUM_AGENT config.IMMORTAL = True + config.ALLOW_MULTI_TASKS_PER_AGENT = True # OngoingTask keeps evaluating and returns progress as the reward # vs. Task stops evaluating once the task is completed, returns reward = delta(progress) diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index a714f6efc..525ccf9da 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -302,6 +302,7 @@ def PracticeFormation(gs, subject, dist, num_tick): def test_completed_tasks_in_info(self): # pylint: disable=no-value-for-parameter,no-member config = ScriptedAgentTestConfig() + config.ALLOW_MULTI_TASKS_PER_AGENT = True env = Env(config) # make predicate class from function From 9ce1acfb5e390ce036f3b3561c19fac280db7538 Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 13 Jul 2023 22:47:28 +0900 Subject: [PATCH 059/113] tweaked manual curriculum, added curriculum pkl file --- tests/task/sample_curriculum.pkl | Bin 0 -> 65497 bytes tests/task/test_manual_curriculum.py | 62 +++++++++++++++++---------- 2 files changed, 40 insertions(+), 22 deletions(-) create mode 100644 tests/task/sample_curriculum.pkl diff --git a/tests/task/sample_curriculum.pkl b/tests/task/sample_curriculum.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8c83a9edc1f8fb3c4a25f5221bf94c609a9f0f82 GIT binary patch literal 65497 zcmb8&&u?5=LdEg;_m4zm21RHPQZEJ;ZV3{pC0HW4-R{^piG%I#2Ek0JVpqpiv8&uw zhT#DBoRipA_ef_tlO<)Sh?r`^Ygnu&rVjTWdYw|CnyN?eWA8sF=?Cl<(j7JO4#s^E+UyMih z2hGc?i+1;A<4vpm@_aZxcxNyiZ=ElkFNZ&UzIwiPzJ9PAc^ZAP-j^x)Pj^x)Pj^x)Pj^x)P zj^x)Pj^x)Pj^x)Pj^x*4<9O%rXt%Mqe^R{~pN2o3UyV4DUyV4DUyV4DUyV4DUyV4D zUyV4DUyV4DUyV4DUyXZv+eaUVzepPUC%aFp7v#%ug8YKSk^F+hk^F+hk^F+hk^F+h zk^F+hk^F+hk^F+hk^F+JKiU0w_etZU=hchx7vT@*7bA}37bA}37bA}37bA}37bA}3 z7bA}37bA}37bA|$Eymlg7^8bThtG~bdb<1e;#j!;!|~hU^M46{Zjp{G(UE03vO-5z z>Bt%#S*If#^G8M-kM}>Wn(L2fuJK55uJK55uJK55uJK55uJK55uJK55uJK55uJ=9& z(H$NBX2sI}XZQofrHx05(~U=p(~U=p(~U=p(~U=p(~U=p(~U=p)7{+t^^f+SS4{SQ z120ZC9w|;X9w|;X9w|;X9w|;X9w|;X9w|;X9w|znpsv>0t3Oq=UuBkPa3fLpoS|4C!F; zF{FdV$B+&dAH(|lA5}Ek*~>kIbg=jk(!t_GNC%4#Ass9}gmkd@5YoZoLr4eXLx@lO z7sIQFX7{+&I%!|DCa)q!8^iG1#_;B(ec+JsoH!sGcrNe%dgUNH| z&FKq>&ToZB`o*~Ojqpq7o8cd$9KgnO<2i`&`SPXA^w z3=ikl2eR@>eB=c;S+~`b_UWs~yI~-_vRdz6Up9u}!1&<3LHq*P`TgmF{6YBT;EWG_ zhY$Ub4n6(F1wQtZ>9M$Tho`64S2u5WZfmsE@4XoxJRdEc_AbJ&(=UCCqr1~B-k!Xq zoXmH1*zCV-h1Zu~xoCYlYz$k?%f>~kc@`e%^#9|ZO{f3exRqb+#}|u?!W7b(ANPCL zSCf_KO#Z~i+q0(A{rTMKHgUS?XgJSTK4^#C=$^Jt`pws^i`Ln6S(nek;qk%dbWY!! zp5*)CmxBj<=p8<8frXW)dQ-PNYc-C44UMb)Oedg6Vc&!dzuY)&k$A2G|Yqb1y8vk?+7YlzX-NPs@_>bx= zIIywcz{Y|D8w(C>EI6>S;K1&JuN*bIlfQ25760!#D-LX|IIywez{ZLL8!HZMtT?c{ z;%o5>X1F&O{(`aBd~>nJUn{`Ingbha4s5JBu(9UA#+n1WYrYYmEHp3LgUdRr{tI_JM23M`qVa==kSL>`hu(9sI#<~L=>ke$JJFv0t z!0x)QZC{+8w=Qq$T~rA#g0$U14(ym6-l>l6-oDS6-hU96-jq>6-l>u6-oDb6-hUI6-jq~ zvm`#Xyk&J0QLCGXTHQp{>L#LAHxaeEiKx|0M6GTjYIPG)s~ew2-mUPF z@#HUv&VPkp4wkEjR;q{As)shJhwfDm-LD>cP(Acc_0W5Tq4-znZnNKgyEfAYUs{)q zrOBPX+`7yUmDXi`sI)HgL#1_@A1bZO{7`9K=7&n_GCx#Wm-!)DmyP4ARu>mdc46MF zUYPlz(!$ISl@?}xsI)NiL#2h8A1W=({7`9O=7&lPGe1NNv-!07suiBkf11AfW>;pv zdS&K^N-HxzR9cz&q0-9C50zGCeyFrE^FyVTnI9^x%>2;Y%A7BZHqXLM#fOdf&v1{m zaoTT%o5+n%ue;$zPp=!EfNyoq!=jAt&4x~UXYtnJ*3ROch4I1i!JY8G<%5-jot>`@ z?|gpe^M%hBzqK@6nEY<(TT2h_9N!r~`!bw+zZ<{hr`NwZzG|Mf!u{yZ?}e`!-wxjl zwD>Z7kI>rS`b8(a)CxB(2N8@r@s4!(*X>8=z0204S2u4?&mYCxoxjxTzHaw>-OF&g zN8$U6hJ#1(+m0r`ZCp0H*UgK@>2<%~KE1xUzI=FfGamii^L{ftpwrgdNjz~?#@ zW?ZB}qY;h`zCC^spQa6iJXoAeKD-eNN)ZpYuJccEstt=V@9^Q;+x7KH0$Nw^2CVv%v z?ffAAV{#vUq4PK4_u*aM$=g^SPA?%moSu+*I6XDH1ustLoZdTFOixrQ9$>=h0eUz+ zKo6$}cne-C%z5^%M8%vFPILBfn)5Aqxz02a_NK8?=gg~h@LC!YVIdBQL@#FTyG>!YVIdBQL`0x+bjh0ygp@>`lYS z3uiX+0ygpjHu3^?^ICnsHyrlx-b4DDqze1_Qw0 zd40D|UW8R%gjHUIRbIeGUW8R%gjHUIRbIeGUWC-piA zb@C#t@*=GABCPTPHu55@@*=GABCPTPHu55@u4}?7FJL1t!rnBDyl`eCFJL1tU?VSJ zH?Otl;WK^Bde0I6xlUq)RbqryVuV#WwBnE6G25ck->?XFlbKV|=PgB--j`%-y3L~rvBdiJ|tO^4*3L~rvBdiJ|tO^4* z3L~s8ZNjQBV52a?-ZYHDaAu=0V52Z#qcC8%u$80hZoL;_zp`56HV0u97-1C{VHFs# z5g1_=7-1C{VHFs#5g1{0WfN9`0ULo4_NHM3hBF(10ULn<8-W4afo=EW4_>J873{M* zfe}`L5mtc_R)GNIwU>(;ti|ARV_5mu2AR*?}_kpUZ#5mu2AR*?}_kpUZ#5mr|>VHFv$5gB1` z8b)L|vk@7v5gD)%8L%7K>hoUuaEkqQ*niY1jIb(uWV$i4j(X5mto}R)ql@ zg%MVT5mto}R)ql@g%MVlHepp5uu&LcZyH8nII~e0uu&MWQ5bNxu=ru_-OsMuSNOzO zhd$*s|I^lHpIQ0s5LNt8VF?$1tk~>xO%=HKbKGX1KB~aQAI)(K&i`nRiZkbbG)D!_ z|7ea1od3}r6*&8&IXVT5k4G#NetcVYQ-F<40X8-T*w_?cV^e^QO#wDG1=!dWU}ID8 z9rebh02`YEY-|d!u_?gDrT`n80&Hvwu(2t?#-`xAu#HUtHZ}#=*c4!6Q-F<40X8-T z*w_?cV^e^QO~Lo>8k+)aYznZkDZs|202`YEY-|d!u_?gDrT`n8f^YaVHU-$&6kuah zfQ?N7HZ}#=*c4!6Q-F<40X8-T-zI2m3b3&$z{aKk8=C@bYznZkDZs|202`YEY-|d? zv&`5OU}IB&jZFbIHU-$&6kuahfQ?N7HZ}#=*c5y}lCdek#-;!pn*wZX3b3&$z{aKk z8=C@bYznZkDfkW+V^e^QO#wDG1=!dWU}IB&jZFbIHU-$&6kuah@J$cKrT`n80&Hvw zu(2t?#-;!pn*wZX3b3&$z{aNFjdEjCfQ?N7HZ}#=*c4!6Q-F<40X8-T*w_?cV^i?v zsUu ze^aIc=iij6z}YutIt7e(aEz=7tE>pCtO%>D2&=4sjjRZ(tO%>D2&=4sjjRZ(>zT01 z3fRbsus01OE1cQL3fRaB*vJYvn^pYMwHN+V0^#SfmcMl2Hg>+TCmVBLx(JsV)40@_ z#-+wIE;Uwx^Nnd-YE0u&V;YwltHAljG%j7-8kZWY!1=~B_NI|l85Fh%mAutg<4kvLdXqBCN6kHnJkDvLdXqBCN6kHnJkDu4lq3D_|ol!rnBD ztZ-%{D_|olU?VGFBP+a>WMoBHWkpzJMObA;SY-ulWJOqIMObA;SY-ulWJOqA&xBQ0 zz(!Vty=fR(;mk%>z(!WUMpnQ^R=7uKWJOqIMObA;SY<_6Wd&?xMObA;SY<_6Wd&?x zMOaen^R=`G9z(!WUMppE`RO!DYP`JNA zSY<_6WkpzJMObA8Y-B}PWkpzJMObA8Y-B}PUC)G7R=`G9guQ7PS>en^R=`G9z(!WU zMpn2rZ+x{Ptg<4kvLdXqBCN6kHnJkDvLdXqBCN6kHnJkDu4lq3D_|ol!rnBDtZ-%{ zD_|olU?VHwY*z6u2uHy@N%#6qv@Pz%(uerg15-3Y-s2<5FN6 zmjctc6j%k$2c~i9%GS6PSOv}prm;7Td|(x4&IeY3^MO_1d|(wg8(61+@kzk^*DZ}z zR&-{S6=9VXVU-oIkriQ;6=9VXVU-oIkriQeJrh=00UKEn_NHNEg)dysMplGXR)kengjH6ARaU@8R)kengjH6ARaU@8R)p2{Oju*Dh-a3d^WBP?JeEPQ*0@%4(Z3X8A`i?9ldunG&<2#c@^i?9ld zunG&<2#c_~q6w?8fQ_&Sd($w&!kIZN$_m)X3fRaB*vJaso?&D~SY<_6WkpzJMObA8 zY-B}PWkpzJMObA8Y-B}PUC)G7R=`G9guQ7PS>en^R=`G9z(!WUMppQ#21ZtdRaS&m zR)kengjH6+MplGXR)kengjH6+MplH?^-Nf01#DzR*qert70zsA1#DymY-9y&WQCtg zVPr*EWkpzJMObA;SY-ulWJOqIMObA;SY-ulWJOqA&xBQ0z(!Vty=fR(;mk%>z(!WU zMpnQ^R`|&xMplGXR)kengjH6ARaU@8R)kengjH6ARaU@8R)p2{Ojuen^R=`G9z(!WUMppPliIEjyl@(!? z6=9VXVU-oIkriQ;6=9VXVU-oIkriQeJrh=00UKEn_NHNEg)Y2U zMplGXR)kengjH6ARaU@8R)kengjH6ARaU@8R)p2{Ojuv7I@Y$=i_pfitKRl2=O;!4?+wc8UjgMYwTimo99LOMO+~d|w)S)5!N#aprtq6*%8l1 zI4#}m$4P6fvZ86ItO%>DfQ_sOtE>pCtO%>DfQ_sOtLvGt$_m)Xim*2gBP*QQ$O_oV z3fRaB*vJYWC^51ktg<4kvLdXqBCN6kHnJkDvLdXqBCN6kHnJkDu4lq3D_|ol!rnBD ztZ-%{D_|olU?VGFBP)EI#K?-U%8Ibcim=Lxu*wS9$cnJaim=Lxu*wS9$cnJKo(Ze0 zfQ_sOd($wo!kLY%fQ_txjjVu;tnhIXBP+rxE5a%(!YV7mDl1?kE5a%(!YV7mDl1?k zE5hn}CakgoHnJk@O~c3vXEw3|HnIXXvH~`;!pBLBtO%>D2&=3JtE>pCtbmQI2&=3J ztE>pCtbmQI2&?Ovu*wS9$cnHx4I?X@*~kjm$O_oV3fRaBA15)gBCN6^tg<4kvLdXq z0yeTDtg<4kvLdXq0yeTDtgdImDl1?kE5hD1jI3~GBP(DdD_|olU?VGhoW#hAu*!`lYS3THO50yeS& zHnIXXvcktnjI0Q&tO%>D2&=3JtE_;HtO%>D2&=3JtE_;HtO%>?nXt+V*vN{oHw_~z zoY}|<*vJal$O_oV3LhsivLdXqBCN6^tg<4kvH~`;BCN6^tg<4kvH~`;BCM`w!YV6Z zBP+t*G>oipW+N+LBP(DdD_|ole4NC{im=Lxu*!Ut)uvH~`;BJ54W$O>mRvH~`;0yeS&HnPIUNsO!rtE>pCtO%>D2&=4s zjjRZ(tO%>D2&=4sjjRZ(>zT013fRbsus01OE1cQL3fRaB*vJal$O<1PF|s18vLdXq zBCN6^tg-?&vLdXqBCN6^tg-?&vLdXmXTmBgU?VHS-ZYG?aAqSbU?VGFBP(DdD}0>9 z$cnJaim=Lxu*! literal 0 HcmV?d00001 diff --git a/tests/task/test_manual_curriculum.py b/tests/task/test_manual_curriculum.py index 64a6d98fd..cefbb218e 100644 --- a/tests/task/test_manual_curriculum.py +++ b/tests/task/test_manual_curriculum.py @@ -9,7 +9,7 @@ from nmmo.task import constraint as c -EVENT_NUMBER_GOAL = [1, 2, 3, 4, 5, 7, 9, 12, 15, 20, 30, 50] +EVENT_NUMBER_GOAL = [3, 4, 5, 7, 9, 12, 15, 20, 30, 50] INFREQUENT_GOAL = list(range(1, 10)) STAY_ALIVE_GOAL = [50, 100, 150, 200, 300, 500] TEAM_NUMBER_GOAL = [10, 20, 30, 50, 70, 100] @@ -39,6 +39,7 @@ task_kwargs are the optional, additional args that go into the task. * 'task_cls' specifies the task class to be used. If not provided, the standard Task is used. + * `sampling_weight` specifies the weight of the task in the curriculum sampling. Default is 1 """ task_spec = [] @@ -47,8 +48,9 @@ essential_skills = ['GO_FARTHEST', 'EAT_FOOD', 'DRINK_WATER', 'SCORE_HIT', 'HARVEST_ITEM', 'LEVEL_UP'] for event_code in essential_skills: - task_spec += [('agent', CountEvent, {'event': event_code, 'N': cnt}) - for cnt in EVENT_NUMBER_GOAL] + for cnt in EVENT_NUMBER_GOAL: + task_spec += [('agent', CountEvent, {'event': event_code, 'N': cnt}, + {'sampling_weight': 30})] # item/market skills, which happen less frequently or should not do too much item_skills = ['CONSUME_ITEM', 'GIVE_ITEM', 'DESTROY_ITEM', 'EQUIP_ITEM', @@ -60,7 +62,8 @@ # find resource tiles for resource in m.Harvestable: for reward_to in ['agent', 'team']: - task_spec.append((reward_to, CanSeeTile, {'tile_type': resource})) + task_spec.append((reward_to, CanSeeTile, {'tile_type': resource}, + {'sampling_weight': 10})) # sample this more # stay alive ... like ... for 300 ticks # i.e., getting incremental reward for each tick alive as an individual or a team @@ -106,13 +109,14 @@ def PracticeFormation(gs, subject, dist, num_tick): # level up a skill for skill in SKILLS: - for level in LEVEL_GOAL: + for level in LEVEL_GOAL[1:]: # since this is an agent task, num_agent must be 1 - task_spec.append(('agent', AttainSkill, {'skill': skill, 'level': level, 'num_agent': 1})) + task_spec.append(('agent', AttainSkill, {'skill': skill, 'level': level, 'num_agent': 1}, + {'sampling_weight': 10*(5-level) if level < 5 else 1})) # make attain skill a team task by varying the number of agents for skill in SKILLS: - for level in LEVEL_GOAL: + for level in LEVEL_GOAL[1:]: for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune task_spec.append(('team', AttainSkill, @@ -121,7 +125,8 @@ def PracticeFormation(gs, subject, dist, num_tick): # practice specific combat style for style in COMBAT_STYLE: for cnt in EVENT_NUMBER_GOAL: - task_spec.append(('agent', ScoreHit, {'combat_style': style, 'N': cnt})) + task_spec.append(('agent', ScoreHit, {'combat_style': style, 'N': cnt}, + {'sampling_weight': 5})) for cnt in TEAM_NUMBER_GOAL: task_spec.append(('team', ScoreHit, {'combat_style': style, 'N': cnt})) @@ -135,26 +140,30 @@ def PracticeFormation(gs, subject, dist, num_tick): # hoarding gold -- evaluated on the current gold for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', HoardGold, {'amount': amount})) + task_spec.append(('agent', HoardGold, {'amount': amount}, + {'sampling_weight': 3})) for amount in TEAM_NUMBER_GOAL: task_spec.append(('team', HoardGold, {'amount': amount})) # earning gold -- evaluated on the total gold earned by selling items # does NOT include looted gold for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', EarnGold, {'amount': amount})) + task_spec.append(('agent', EarnGold, {'amount': amount}, + {'sampling_weight': 3})) for amount in TEAM_NUMBER_GOAL: task_spec.append(('team', EarnGold, {'amount': amount})) # spending gold, by buying items for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', SpendGold, {'amount': amount})) + task_spec.append(('agent', SpendGold, {'amount': amount}, + {'sampling_weight': 3})) for amount in TEAM_NUMBER_GOAL: task_spec.append(('team', SpendGold, {'amount': amount})) # making profits by trading -- only buying and selling are counted for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', MakeProfit, {'amount': amount})) + task_spec.append(('agent', MakeProfit, {'amount': amount}, + {'sampling_weight': 3})) for amount in TEAM_NUMBER_GOAL: task_spec.append(('team', MakeProfit, {'amount': amount})) @@ -172,7 +181,8 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune task_spec.append(('agent', OwnItem, - {'item': item, 'level': level, 'quantity': quantity})) + {'item': item, 'level': level, 'quantity': quantity}, + {'sampling_weight': 4-level if level < 4 else 1})) # team task for quantity in TEAM_ITEM_GOAL: @@ -185,7 +195,8 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): for level in LEVEL_GOAL: # agent task task_spec.append(('agent', EquipItem, - {'item': item, 'level': level, 'num_agent': 1})) + {'item': item, 'level': level, 'num_agent': 1}, + {'sampling_weight': 4-level if level < 4 else 1})) # team task for num_agent in AGENT_NUM_GOAL: @@ -200,7 +211,8 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune task_spec.append(('agent', ConsumeItem, - {'item': item, 'level': level, 'quantity': quantity})) + {'item': item, 'level': level, 'quantity': quantity}, + {'sampling_weight': 4-level if level < 4 else 1})) # team task for quantity in TEAM_ITEM_GOAL: @@ -215,7 +227,8 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune task_spec.append(('agent', HarvestItem, - {'item': item, 'level': level, 'quantity': quantity})) + {'item': item, 'level': level, 'quantity': quantity}, + {'sampling_weight': 4-level if level < 4 else 1})) # team task for quantity in TEAM_ITEM_GOAL: @@ -230,7 +243,8 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune task_spec.append(('agent', ListItem, - {'item': item, 'level': level, 'quantity': quantity})) + {'item': item, 'level': level, 'quantity': quantity}, + {'sampling_weight': 4-level if level < 4 else 1})) # team task for quantity in TEAM_ITEM_GOAL: @@ -245,7 +259,8 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune task_spec.append(('agent', BuyItem, - {'item': item, 'level': level, 'quantity': quantity})) + {'item': item, 'level': level, 'quantity': quantity}, + {'sampling_weight': 4-level if level < 4 else 1})) # team task for quantity in TEAM_ITEM_GOAL: @@ -268,7 +283,7 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): from contextlib import contextmanager import multiprocessing as mp import numpy as np - import pickle + import dill @contextmanager def create_pool(num_proc): @@ -294,7 +309,7 @@ def check_task_spec(spec_list): if idx > 0 and idx % 50 == 0: print(idx, 'task specs checked.') - # 3590 task specs: divide the specs into chunks + # 3495 task specs: divide the specs into chunks num_cores = psutil.cpu_count(logical=False) spec_chunks = np.array_split(task_spec, num_cores) with create_pool(num_cores) as pool: @@ -304,6 +319,9 @@ def check_task_spec(spec_list): # if len(sample_task) > 1: # print(sample_task[-1].name) + # for now, we only use the 1535 tasks with reward_to=agent + flt_spec = [spec for spec in task_spec if spec[0] == 'agent'] + # test if the task spec is pickalable - with open('manual_curriculum.pkl', 'wb') as f: - pickle.dump(task_spec, f) + with open('sample_curriculum.pkl', 'wb') as f: + dill.dump(flt_spec, f) From 102598b8b33dc41990d1a6ac622ab07656e0639a Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 13 Jul 2023 22:48:19 +0900 Subject: [PATCH 060/113] implemented sample training tasks from curriculum file --- nmmo/core/config.py | 3 +++ nmmo/core/env.py | 33 ++++++++++++++++++++++-- nmmo/task/task_api.py | 13 ++++++++-- setup.py | 1 + tests/task/test_sample_task_from_file.py | 23 +++++++++++++++++ 5 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 tests/task/test_sample_task_from_file.py diff --git a/nmmo/core/config.py b/nmmo/core/config.py index b1d2064e4..964a62757 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -153,6 +153,9 @@ def game_system_enabled(self, name) -> bool: HORIZON = 1024 '''Number of steps before the environment resets''' + CURRICULUM_FILE_PATH = None + '''Path to a curriculum task file containing a list of task specs for training''' + TASK_EMBED_DIM = 1024 '''Dimensionality of task embeddings''' diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 7e6a02d00..4df7457c5 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Callable from collections import defaultdict from copy import copy +import dill import gym import numpy as np @@ -52,6 +53,14 @@ def __init__(self, self.agent_task_map = None self._dummy_task_embedding = np.zeros(self.config.TASK_EMBED_DIM, dtype=np.float32) + # curriculum file path, if provided, should exist + self.curriculum_file_path = config.CURRICULUM_FILE_PATH + if self.curriculum_file_path is not None: + # try to open the file to check if it exists + with open(self.curriculum_file_path, 'rb') as f: + task_spec = dill.load(f) # pylint: disable=unused-variable + f.close() + @functools.cached_property def _obs_space(self): def box(rows, cols): @@ -155,7 +164,8 @@ def action_space(self, agent: AgentID): # TODO: This doesn't conform to the PettingZoo API # pylint: disable=arguments-renamed def reset(self, map_id=None, seed=None, options=None, - make_task_fn: Callable=None): + make_task_fn: Callable=None, + sample_training_tasks=False): '''OpenAI Gym API reset function Loads a new game map and returns initial observations @@ -190,7 +200,9 @@ def reset(self, map_id=None, seed=None, options=None, self.scripted_agents.add(eid) ent.agent.set_rng(self._np_random) - if make_task_fn is not None: + if self.curriculum_file_path is not None and sample_training_tasks is True: + self.tasks = self._sample_training_tasks() + elif make_task_fn is not None: self.tasks = make_task_fn() else: for task in self.tasks: @@ -205,6 +217,23 @@ def reset(self, map_id=None, seed=None, options=None, return {a: o.to_gym() for a,o in self.obs.items()} + def _sample_training_tasks(self): + with open(self.curriculum_file_path, 'rb') as f: + # curriculum file may have been changed, so read the file when sampling + task_spec = dill.load(f) + f.close() + + sampling_weights = [] + for single_spec in task_spec: + weight = 1 # default + if len(single_spec) == 4 and 'sampling_weight' in single_spec[3]: + weight = single_spec[3]['sampling_weight'] + sampling_weights.append(weight) + sampled_spec = self._np_random.choice(task_spec, size=len(self.possible_agents), + p=sampling_weights/np.sum(sampling_weights)) + + return task_api.make_team_tasks(self.possible_agents, sampled_spec) + def _map_task_to_agent(self): agent_task_map: Dict[int, List[task_api.Task]] = {} for task in self.tasks: diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 17dcd51a6..37ffcd154 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -182,13 +182,19 @@ def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: 'right_team', 'right_team_leader', 'my_team_leader'] -def make_team_tasks(teams, task_spec) -> List[Task]: +def make_team_tasks(teams: Union[Iterable[int], Dict], + task_spec) -> List[Task]: """ - task_spec: a list of tuples (reward_to, eval_fn, **kwargs) + Args: + teams: a Dict with { team_id: [agent_id]} or a List of agent ids + task_spec: a list of tuples (reward_to, eval_fn, pred_fn_kwargs, task_kwargs) each tuple is assigned to the teams """ tasks = [] + if not isinstance(teams, Dict): + # convert agent id list to the team dict format + teams = {idx: [agent_id] for idx, agent_id in enumerate(teams)} team_list = list(teams.keys()) team_helper = TeamHelper(teams) for idx in range(min(len(team_list), len(task_spec))): @@ -210,6 +216,9 @@ def make_team_tasks(teams, task_spec) -> List[Task]: else: task_cls = Task + if 'sampling_weight' in task_kwargs: # necessary for sampling, not needed here + task_kwargs.pop('sampling_weight') + # reserve 'target' for relative agent mapping if 'target' in pred_fn_kwargs: target = pred_fn_kwargs.pop('target') diff --git a/setup.py b/setup.py index 4610f6779..7a49a6569 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ 'psutil==5.9.3', 'py==1.11.0', 'tqdm<5', + 'dill==0.3.6', ], extras_require=extra, python_requires=">=3.7", diff --git a/tests/task/test_sample_task_from_file.py b/tests/task/test_sample_task_from_file.py new file mode 100644 index 000000000..20e497887 --- /dev/null +++ b/tests/task/test_sample_task_from_file.py @@ -0,0 +1,23 @@ +import unittest + +import nmmo +from tests.testhelpers import ScriptedAgentTestConfig + +class TestSampleTaskFromFile(unittest.TestCase): + def test_sample_task_from_file(self): + # init the env with the pickled training task spec + config = ScriptedAgentTestConfig() + config.CURRICULUM_FILE_PATH = 'tests/task/sample_curriculum.pkl' + env = nmmo.Env(config) + + # env.reset() samples and instantiates a task for each agent + # when sample_traning_tasks is set True + env.reset(sample_training_tasks=True) + + self.assertEqual(len(env.possible_agents), len(env.tasks)) + # for the training tasks, the task assignee and subject should be the same + for task in env.tasks: + self.assertEqual(task.assignee, task.subject) + +if __name__ == '__main__': + unittest.main() From 94a3272244308171eb7d50722204561c7246284b Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 14 Jul 2023 14:11:16 +0900 Subject: [PATCH 061/113] refactored task spec, added make_task_from_spec --- nmmo/core/env.py | 17 +- nmmo/task/task_api.py | 90 ----------- nmmo/task/task_spec.py | 125 +++++++++++++++ tests/task/sample_curriculum.pkl | Bin 65497 -> 99552 bytes tests/task/test_demo_task_creation.py | 15 +- tests/task/test_manual_curriculum.py | 213 ++++++++++++++------------ tests/task/test_task_api.py | 56 +++---- 7 files changed, 279 insertions(+), 237 deletions(-) create mode 100644 nmmo/task/task_spec.py diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 4df7457c5..8428769b6 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -16,7 +16,7 @@ from nmmo.core import action as Action from nmmo.entity.entity import Entity from nmmo.systems.item import Item -from nmmo.task import task_api +from nmmo.task import task_api, task_spec from nmmo.task.game_state import GameStateGenerator from nmmo.lib import seeding from scripted.baselines import Scripted @@ -58,7 +58,7 @@ def __init__(self, if self.curriculum_file_path is not None: # try to open the file to check if it exists with open(self.curriculum_file_path, 'rb') as f: - task_spec = dill.load(f) # pylint: disable=unused-variable + curriculum = dill.load(f) # pylint: disable=unused-variable f.close() @functools.cached_property @@ -220,19 +220,14 @@ def reset(self, map_id=None, seed=None, options=None, def _sample_training_tasks(self): with open(self.curriculum_file_path, 'rb') as f: # curriculum file may have been changed, so read the file when sampling - task_spec = dill.load(f) + curriculum = dill.load(f) # a list of TaskSpec f.close() - sampling_weights = [] - for single_spec in task_spec: - weight = 1 # default - if len(single_spec) == 4 and 'sampling_weight' in single_spec[3]: - weight = single_spec[3]['sampling_weight'] - sampling_weights.append(weight) - sampled_spec = self._np_random.choice(task_spec, size=len(self.possible_agents), + sampling_weights = [spec.sampling_weight for spec in curriculum] + sampled_spec = self._np_random.choice(curriculum, size=len(self.possible_agents), p=sampling_weights/np.sum(sampling_weights)) - return task_api.make_team_tasks(self.possible_agents, sampled_spec) + return task_spec.make_task_from_spec(self.possible_agents, sampled_spec) def _map_task_to_agent(self): agent_task_map: Dict[int, List[task_api.Task]] = {} diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 37ffcd154..540a8cd02 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -7,7 +7,6 @@ from nmmo.task.group import Group from nmmo.task.predicate_api import Predicate, make_predicate, arg_to_string from nmmo.task import base_predicates as bp -from nmmo.lib.team_helper import TeamHelper class Task(ABC): """ A task is used to calculate rewards for agents in assignee @@ -173,92 +172,3 @@ def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: # the default is to use the predicate class return make_same_task(bp.StayAlive, agent_list, task_cls=OngoingTask) - -###################################################################### -# TODO: a lot to improve below - -REWARD_TO = ['agent', 'team'] -VALID_TARGET = ['left_team', 'left_team_leader', - 'right_team', 'right_team_leader', - 'my_team_leader'] - -def make_team_tasks(teams: Union[Iterable[int], Dict], - task_spec) -> List[Task]: - """ - Args: - teams: a Dict with { team_id: [agent_id]} or a List of agent ids - task_spec: a list of tuples (reward_to, eval_fn, pred_fn_kwargs, task_kwargs) - - each tuple is assigned to the teams - """ - tasks = [] - if not isinstance(teams, Dict): - # convert agent id list to the team dict format - teams = {idx: [agent_id] for idx, agent_id in enumerate(teams)} - team_list = list(teams.keys()) - team_helper = TeamHelper(teams) - for idx in range(min(len(team_list), len(task_spec))): - team_id = team_list[idx] - - # see if task_spec has the task embedding - if len(task_spec[idx]) == 3: - reward_to, pred_fn, pred_fn_kwargs = task_spec[team_id] - task_kwargs = {} - elif len(task_spec[idx]) == 4: - reward_to, pred_fn, pred_fn_kwargs, task_kwargs = task_spec[team_id] - else: - raise ValueError('Wrong task spec format') - - assert reward_to in REWARD_TO, 'Wrong reward target' - - if 'task_cls' in task_kwargs: - task_cls = task_kwargs.pop('task_cls') - else: - task_cls = Task - - if 'sampling_weight' in task_kwargs: # necessary for sampling, not needed here - task_kwargs.pop('sampling_weight') - - # reserve 'target' for relative agent mapping - if 'target' in pred_fn_kwargs: - target = pred_fn_kwargs.pop('target') - assert target in VALID_TARGET, 'Invalid target' - # translate target to specific agent ids using team_helper - target = team_helper.get_target_agent(team_id, target) - pred_fn_kwargs['target'] = target - - # handle some special cases and instantiate the predicate first - predicate = None - if isinstance(pred_fn, FunctionType): - # if a function is provided as a predicate - pred_cls = make_predicate(pred_fn) - - # TODO: should create a test for these - if (pred_fn in [bp.AllDead]) or \ - (pred_fn in [bp.StayAlive] and 'target' in pred_fn_kwargs): - # use the target as the predicate subject - pred_fn_kwargs.pop('target') # remove target - predicate = pred_cls(Group(target), **pred_fn_kwargs) - - # create the task - if reward_to == 'team': - assignee = team_helper.teams[team_id] - if predicate is None: - predicate = pred_cls(Group(assignee), **pred_fn_kwargs) - tasks.append(predicate.create_task(task_cls=task_cls, **task_kwargs)) - else: - # this branch is for the cases like AllDead, StayAlive - tasks.append(predicate.create_task(assignee=assignee, task_cls=task_cls, - **task_kwargs)) - - elif reward_to == 'agent': - agent_list = team_helper.teams[team_id] - if predicate is None: - tasks += make_same_task(pred_cls, agent_list, pred_kwargs=pred_fn_kwargs, - task_cls=task_cls, task_kwargs=task_kwargs) - else: - # this branch is for the cases like AllDead, StayAlive - tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls, **task_kwargs) - for agent_id in agent_list] - - return tasks diff --git a/nmmo/task/task_spec.py b/nmmo/task/task_spec.py new file mode 100644 index 000000000..837ad5de9 --- /dev/null +++ b/nmmo/task/task_spec.py @@ -0,0 +1,125 @@ +from dataclasses import dataclass, field +from typing import Iterable, Dict, List, Union, Type +from types import FunctionType + +import numpy as np + +from nmmo.task.task_api import Task, make_same_task +from nmmo.task.predicate_api import make_predicate +from nmmo.task.group import Group +from nmmo.task import base_predicates as bp +from nmmo.lib.team_helper import TeamHelper + +""" task_spec + + eval_fn can come from the base_predicates.py or could be custom functions like above + eval_fn_kwargs are the additional args that go into predicate. There are also special keys + * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] + these str will be translated into the actual agent ids + + task_cls specifies the task class to be used. Default is Task. + task_kwargs are the optional, additional args that go into the task. + + reward_to: must be in ['team', 'agent'] + * 'team' create a single team task, in which all team members get rewarded + * 'agent' create a task for each agent, in which only the agent gets rewarded + + sampling_weight specifies the weight of the task in the curriculum sampling. Default is 1 +""" + +REWARD_TO = ['agent', 'team'] +VALID_TARGET = ['left_team', 'left_team_leader', + 'right_team', 'right_team_leader', + 'my_team_leader'] + +@dataclass +class TaskSpec: + eval_fn: FunctionType + eval_fn_kwargs: Dict + task_cls: Type[Task] = Task + task_kwargs: Dict = field(default_factory=dict) + reward_to: str = 'agent' + sampling_weight: float = 1.0 + embedding: np.ndarray = None + + def __post_init__(self): + assert isinstance(self.eval_fn, FunctionType), \ + "eval_fn must be a function" + assert self.reward_to in REWARD_TO, \ + f"reward_to must be in {REWARD_TO}" + if 'target' in self.eval_fn_kwargs: + assert self.eval_fn_kwargs['target'] in VALID_TARGET, \ + f"target must be in {VALID_TARGET}" + + +def make_task_from_spec(assign_to: Union[Iterable[int], Dict], + task_spec: List[TaskSpec]) -> List[Task]: + """ + Args: + assign_to: either a Dict with { team_id: [agent_id]} or a List of agent ids + task_spec: a list of tuples (reward_to, eval_fn, pred_fn_kwargs, task_kwargs) + + each tuple is assigned to the teams + """ + teams = assign_to + if not isinstance(teams, Dict): # convert agent id list to the team dict format + teams = {idx: [agent_id] for idx, agent_id in enumerate(assign_to)} + team_list = list(teams.keys()) + team_helper = TeamHelper(teams) + + # assign task spec to teams (assign_to) + tasks = [] + for idx in range(min(len(team_list), len(task_spec))): + team_id = team_list[idx] + + # map local vars to spec attributes + reward_to = task_spec[idx].reward_to + pred_fn = task_spec[idx].eval_fn + pred_fn_kwargs = task_spec[idx].eval_fn_kwargs + task_cls = task_spec[idx].task_cls + task_kwargs = task_spec[idx].task_kwargs + task_kwargs['embedding'] = task_spec[idx].embedding # to pass to task_cls + + # reserve 'target' for relative agent mapping + if 'target' in pred_fn_kwargs: + target = pred_fn_kwargs.pop('target') + assert target in VALID_TARGET, 'Invalid target' + # translate target to specific agent ids using team_helper + target = team_helper.get_target_agent(team_id, target) + pred_fn_kwargs['target'] = target + + # handle some special cases and instantiate the predicate first + predicate = None + if isinstance(pred_fn, FunctionType): + # if a function is provided as a predicate + pred_cls = make_predicate(pred_fn) + + # TODO: should create a test for these + if (pred_fn in [bp.AllDead]) or \ + (pred_fn in [bp.StayAlive] and 'target' in pred_fn_kwargs): + # use the target as the predicate subject + pred_fn_kwargs.pop('target') # remove target + predicate = pred_cls(Group(target), **pred_fn_kwargs) + + # create the task + if reward_to == 'team': + assignee = team_helper.teams[team_id] + if predicate is None: + predicate = pred_cls(Group(assignee), **pred_fn_kwargs) + tasks.append(predicate.create_task(task_cls=task_cls, **task_kwargs)) + else: + # this branch is for the cases like AllDead, StayAlive + tasks.append(predicate.create_task(assignee=assignee, task_cls=task_cls, + **task_kwargs)) + + elif reward_to == 'agent': + agent_list = team_helper.teams[team_id] + if predicate is None: + tasks += make_same_task(pred_cls, agent_list, pred_kwargs=pred_fn_kwargs, + task_cls=task_cls, task_kwargs=task_kwargs) + else: + # this branch is for the cases like AllDead, StayAlive + tasks += [predicate.create_task(assignee=agent_id, task_cls=task_cls, **task_kwargs) + for agent_id in agent_list] + + return tasks diff --git a/tests/task/sample_curriculum.pkl b/tests/task/sample_curriculum.pkl index 8c83a9edc1f8fb3c4a25f5221bf94c609a9f0f82..2986f6d82f0fbedb97f433f1a6edca63fa4be153 100644 GIT binary patch literal 99552 zcmb`Q&u?7UeZ@%*e@aT^#BS=MfH&R9Kq8|Cigee?GA#+Tew3o^bWsdO=Y7QCJU;&E`j1vtR#&S3{L$>n^u^)H$>{2&bNX=pkM`-K?%`~@eUSd( ze)Zv}vsZsG`#25U>OSrqx8EPmroVwH4?3sa_M>t4sDId*bWdl~3)e?y!^!UB?r@T( zPQRWfwI6=i8Gmp(2W@oc|L^qTjl1o)uI(S(+`WG=o37rO?X}LP+to_ihsO!^O?;a9 zA9fz~XVdj+VHNq}{Lh|Yr#s_r`V>d)$!Ip+=zLIp{^{k@&dHwI^;&E8J$LfU5UQtxZMukWp&^)C0m*?aNhS?||+-|GGP z-YdPAzhwA^HhfDPzM~Dlqz%8U4Sz`+{)#sI_2S{v?cHk!?YHjUeRG!1t5rO&_xsxM zR>Otkt%eK7TMZYEw;C=SZ#7&v-fFmTywz~wc&p*U@m_p$|JI$o_IuY3cK5{-?x)Wr zT)@?E;e@NRTsYxsxNyQd_pjgG-)-N#bs(PaU#HI_ zoNqN;INxfxaK6=W;e4y%!ueLih4Zb33+G!67tXgDE}ZYBo7eW=O@FtyZyoI37LWL6 z=~D?uTn!hFxEd}TaWz~x;%c~X#MN-&h^yhk5m&>7Bd&%EN4)*^?z_8hx8J!Z9`Apq zPa_;}HC#B}YPfK`)o|f>tKq`&R>Otkt%eK7TMZYEw;C=S@1^T^@7#aq_U^MQcy;N^ ztQ)`gKl2w7D|KHT55AUOX{1lO=1yGx!mju;F4wDg;&Rc7CvLiXT<&V|XWVu_<8s}@ z&p6$=aqHbCJ$T*iL7u4hAWzhLkSFRr$P@J*Q%@S^(y3vdKL0Sy$X4vUWGhSufm1hKX~WXy#_`2$Sp#is23qm)QgZO z>P5&C^&;endJ*zOy$E@tUWD_%8*bcvd;YB${cD7ux;@Af^&aGjdJpnMy$5-s-h(_* z??IlZ_aIL+dhqtG=U(mUJ@~oXgFI31L7u4hAWzhLkSFRr$P@J*OII4^&aGjdJpnMy$9QGywl|C#241gmyUU&UV}VQ zuR)%u*C0>SYmg`EHOLe78sv$^HP}k;1$Kt_yWNBSaW}nVSG~P9-I=6+X-}R!>dvO$ zn7_Yw+<$QOB)xYy?stx7)2+8g{p0lRT^fJ7^bXvGrSCBeey8e3f)2i$J{bIN`p0zr zW_SF+oV#^>{N!|!7GZwsgK^hg_Qrcv0Olud-5tB1cm1vYX|LeZ_O_;*2mQl`H+IwD z`8jS6&raHt^hdM3*Ov~e?t2M?!>8xfO&{zXdDDLEP5aE6cKh3_-n`FC=cQyX+&w%z zd-UYFtK`bGH6DF9+q*Yy9gdFE|CWB6XKQ-#{F1vifB!q5)#fC<_1;Zyhre*#eSgxP zbUP>Q<8J3DHLi4bpFBOfzo@RVZ;Y!q(;ZOSZeHPy@#ySPZqr~+-FddU;$?lhu&fJM zRw*H^=Y==>>8c+db`Qp#$KB)Z(bI~rAEp0ews)a4qrZ8&n4hK(_FnR)z2Z$Pz6KW; zS$Yjt$98Qp>GX&9ANG$cL(cD&m~NcT|25yKahmkmJvqIa|8V1WO0zQ4bmKVP@sWDG zHr=VTaei~g-ty}My2WDfk2%pRgMUgN4SrVr<6o+9TZ4bC{)Y%JE&ml!OIWWV&|fmO zgpC@)YVjI28xyt~6Sf-@c1*%QrxTp6-#-7l{`^?iYEaAf-Ygg6KN=SU1h*Icn|1_*95KyaG@g4+xb8Z~39`kJ-6 zd+*a(vu^xW<8FZ9b^`>r8z8vd0Kx4B2yQn(Xw;3J>g8YOxPN-mxE;HV+W~^x4iMaS zfZ(AdCl9!lpezk^UZZ~1+I(s^|2WMM9J4`tyz zx|yeC-)`KllzkA$$iQ5eC-)`KllzkA$$iQ5RC%rg6KIz5rB`3X;ek6PN{Dob5)ISlAdU1Sw)QjWeqh1^zANAt+_^21h$49+5 zK0fNj@$peFjxRat3%5HDyXiYt?>~E`QF_)t7teZee0&5Z$Suc(c&w6^{D7~kCwO#!)-J{Vy9Cy>*K<)R>hUq(%qhWfPd1dgcbfnWu zPyesO(NXoj`jzWzE331;^}Ut!-}SwXz3bOso~(Sd^3m!?Yd>sFR_7nJe%N|x<^Ia- z&Yvgttzq@#h0*xQ{YRa{Zn~Ri@F2bT_#{2tVJ$tIVY+pC_F#~{KbqdLKdlDL2GyN9 z>0htC)*GF4UwioE!^7Td)m>ZP=?)+F$D`p%!o8NB`!G3ut$G~8{KNK1XLy#L6mfVq z9`_H=j?Yf6K6)~n{^q@LCvE7k`)m=nJHzzuZ-toc{r&Z|mse-6zB-xh&koz|2WS1` z^ljR9`$9Y6<{u3HFJ*A&PBq};^xx^_cDpkijwYQdq|S#`zaMR|d~dKPtTWX7I(+f81>PXN{((`+XlcTj|E9XWtK8?^pkp zZ*P5iQ`GsNwQhE*?;D=KalJa-{{C5KIO$KG%=lg=dR?~vOiu^tqrrRCKjwEstq#7I zKBgbBrB@1rh(O=6l-EGtT#yO$ZH98f+IbGk%Gz67euZ0`LnbW$u(D2|B;_?wlI4U} zO%8NtT5S%QKsm?+%AuUF-gp<8&|nuEjn}Z*n6TBDu-%xjV-ns;UYfQJM%9hJ3$CJT zG`h3ZV(PT2h{b#E+>WozJd1K)bf$Ax*fi6>- zAPWz>SgT1gQUwv{LYE1Yq)ebB%L!Rmvha{YCQuHMu82T6loMRxVGXYEKyZZzf-5`_ zT;YM>3J-)v!rMr1Npz~8U2OIR-W!`uU(OPN+8No32y~Uo1lf03!Y4G6C7h(M=Vz6RHJSc7Xj5M0}V;Mxv^Mz-6!m!8ks`3hdpervnw3tA#jKObVUTZOl5*BJgmVL9uepgiDW|rN>V0J zlH~+fc-Vz2JR(pIk!(P4g+~NB&GI$4!owO|;ep@^4+K|uAT$!*#{SvxEBFT9KQ#6o z5vZM!t%yKZsZ5Z4hc&pqBLZC^k!y%RNy-FDvYg=h4!dxDM+C|tat#Qs?}$LBS-u9> zcUXh#I}lvof#CWMgl4|GHm-g%M$@nFR(6`cpd|veGqM#C=qi;7vhT14*LOsqDpKu!-+|D`cRTO(x|7q% zXxRL&lsk=`M+9nUWGo`kl`0ct=V1-5^N2uKN#q+MP?9o%k}M~<&ciNT=MjN&hpUXRX_l|SbspB>Iu8Wbc_6sX1EG=gHt&t{bFZ4+hWC@k!XpCJGtw0i=rWZFvhc75 zS9nCAOC*vF5hzKSKuMMpT;X9CuJDLJIYhDn!4)16=rqgM;0h0GaD@kgD?AWf;epUh zc>D8b*foAt`(KTPM+B;8q$?uOWhxV7;b9G~@Q6T{NF*B~P?9o%k}M~V;L+F@CcvPn%^WEH3|f`Q6RXD0>Nz*2yUZ5a2o}J+b9s+MuFfq3eQM!8wG;f zC=lF6f#5a@1h-KjxQznAZ4?M@qd;&Qg{M%sjRL`K6bNpkKyVucg4-w%+(v=mHVOo{ zQ6RXD!cY3UjRL`K6bNpkKyVucg4-w%+(v=mHVOo{Q6RXD!VgosjRL`K6bNpkKyVuc zg4-w%+(v=mHVOo{Q6RXD!Vj#vjRL`K6bNpkKyVucg4-w%+(v=mHVOo{Q6RXD!cPRc zjRL`K6bNpkKyVucg4-w%+(v=mHVOo{Q6RXD!cSefjRL`K6bNpkKyVucg4-w%+(v=m zHVOo{Q6RXD!cVohjRL`K6bNpkKyVucg4-w%+(v=mHVOo{Q6RXD!jBEOjRL`K6bNpk zKyVucg4-w%+(v=mHVOo{Q6RXD!j0K(qd;&Q1%lfs5Zp$A;5G^bw^1OtjRL`K6bNpk zaOZlgeO z8wG;fC=lF6fl%70bQ{9O>*?2oo}G00Mg-i(wOaGYeR|vYw=XT+h(H9o!$c;~eKInE z?iZ2?@ts$-Ijq$rNjGFf1WHmSP?9o%k}M~*YI2~PLTYo!1j<1sP!8pU;ybTucaaGV zc2RuiRqYyz@4Tud6yJGOODMkcs+LfC=ha||fP1%G%MpQ2GcpVjsFN~*u1%RBTMoNe ztGT)(KM;YElnIoiOrRvo3E5HcucWH&WF}A!k)x1gCQuIL1lMv{gKIeuT+4yrS`Gx) zav->t1EJJ%)faX*JLB{e+X;VRhkGlEmG^IpU)T|W3QD?(2vks+Ko_b^h?Q5ni?vea zMFdKcbQ2LMNtr-NmJ_mWV&&EDA`>VF5>`Z@9LfpB%Bx*NvGQsO#mcKC6f3WmP^`RK zLb38{38l&#ED>==6X@EM39{v|3)gZ)pd=&15P_1E36x|xAv;R8 z9CncjltbhwB2W(H1lMv{gKIeuT+4yrS`Gx)av->t1HrW%?m=@cM+E9*WEdh)CuIU% zn=(PR9CqPajtG=wWEdh)k}`pkEGJ|~$(F+|GJ$f497P1mp`74a4r_2N2ZC!k5M0ZF z;93p@*K#1ZmczTRuH}e8os0}a1nQ(rpleem$d?qlC z*hMB#4w0jXKsl5XT+3k%uH`^*EeC>YIS^dSf#6yW1lMx7BfzyB5vY@qVTeGTlnHcg z$^_YR*oA93B2bc%VTeFU$^=TXoRA$QTMoO(1j-?D6cH$ga)N6)tiiP$2(INoa4iRd zYdH{H%Yoop4tJ@zmLmdnGBOMisFN~*u1%RBTMoN$Ek^`OGBOMiC`p+>NtP3`qh!lr z7nwjgM2;c?==6R49ifv!!NAX^SOxRxUVB^eop2$ZBupd`x)t(rzf zhQTf}fpU-ultVeewH$WgS`Gx)av->t1HrW%2(INoD79R=d2ajd{ukXj_x!UNzn31? zxEFs__npfNKN3L%YA3lSB2YVJ0$rstA@*JEE{c5@5hzJ=O+=t1WdbExPRO>RMv-f3 zcaaH{gG`_t$_d53tKCJh?`jFfzN;k^`>vKy?7Lb*vF~aLrM??15%7G-)tbM9BR>#< zIw=#VlQMy>O_?BD4mr4%BLXEE8HNayq)ebB%L%QTMn#6fE;4~~kO`DSIl;9YcHvqM z1lMvPxRwLKwHyep zNtP3`qvWsUu!~Hf93n>%fpRD(xR%2jT+4yrS`Gx)av->t1HrW%2(IPmC$*@PkspXa zosouIE7ba?jOU z4uk^B{pc5ewle?E7YY+7S9c(|y2FoDxVj?(l`~Qe5vZIpfi6;+AnOjhaCJunN-|On z5hzKSKuMMpvaINZLZlk(A`>VFnLs&|6I|V47q0F=aCHZQt2+=1boYgp1HrW%2(IPu zIDFS~M4(Pah9LrVQYO%~DHCMNVHd9Dh(Jk3h9LqaDHABkazb{LY&q;A6DWtsQAD5| z$_cLJum;z1Ah?zT!L=L+uH`^*EeC>YIsA$S*K$OlPDX|y0(DX*(6uQOWXoX}uH}e8 zNk)bt0wpOED9Lg{c9d*6>>?8=hsaSxpd88xuH~=>*K#1ZmIJ}H90;!EKyWPwf@?Ya z+7Q=rM4(Pah9LrVQYO%~DHCMNVHd9Dh(Jk3h9LqaDHABkazb{LY&q;A6DWtsQAD5| z$_cLJum;z1Ah?zT!L=L+uH`^*EeC>YIs7Uc*K$OlPDX|y0(DX*(6uQOWXoX}uH}e8 zNk)bt0wpOED9Lg{c9d*6>>?8=hsaSxpd88xuH~=>*K#1ZmIJ}H90;!EKyWPwf@?Ya zdL`F#M4(Pah9LrVQYO%~DHCMNVHd9Dh(Jk3h9LqaDHABkazb{LY&q;A6DWtsQAD5| z$_cLJum;z1Ah?zT!L=L+uH`^*EeC>YIXuwGwHy(slaXPFK%JBcbZyE6*>c#0YdIoN zl96GEKuO93O0t}g9VJ^1yT}B}A#xNED2H-_YdNgJwHyept!vmdM%MpP(85xEM)Jd5@*QQL6Er(sWmLmcs z85xEMl%!0cB+CieQL^Q*i%g&#B1aK{awsRbmctrc%Yoop4g}Y7Ah?zT!L=L+uI2DR zC)aXBpiV}HAp&($CeXDh6J*O_7p~=qKuJc1Ap#{S6DY}YLUxpFIqV`6D2K>VM4%kX z39jX^2G?>RxRwLKwHyepy<=OO}?Q(gmIq%tAaUF|N4br%unGAGqU1WHmSP?F_@ zEGudhsit-pnLs(n1j?bDP^`P!T@>rCmQbv_T0*hzY6-= z`DyFO4@96&$^`19OrUF1CdigU4zA^hKuJc1Ap#{S6DY}YLaU}xkzuflOrRWO0_9Ln za4m;jxRwLKwHyepWw$WcU~9Lfo<<*)|Vav->t1HrW%2(INoa4iRdYdJj4 z$+a92sFRUlh(Mi`33P4B1le-fg=;w?P?C{hh(Jlo1WK};kR2sk4!g(%${}(T5h#ap zf@?Xf!L=L+uH`^*EeC>YIS^dSf#6yW4{>rWM+E9*WEdh)CuIU%n=(PR9CqPajtG=w zWEdh)k}`pkEGJ|~$(F+|GJ$f497P1mp`74a4r_2N2ZC!k5M0ZF;93p@*K#1Zmcv7w zT+0!GIvE*;2-HcLK-Z>BkS&K@xRxUVB^eop2$ZBupd`x)*-^6Pu!~Hf93n>%fpRD( zxR%2jT+4yrS`Gx)av->t1HrW%2(IPu5GU7iM4(Pah9LrVQYO%~DHCMNVHd9Dh(Jk3 zh9LqaDHABkazb{LY&q;A6DWtsQAD5|$_cLJum;z1Ah?zT!L=L+uH`^*EeC>YIXuM4 zwHy(slaXPFK%JBcbZyE6*>c#0YdIoNl96GEKuO93O0t}g9VJ^1yT}B}A#xNED2H-_ zYdNgJwHyept!$X{0 z%MpP(85xEM)Jd5@*QQL6Er(sWmLmcs85xEMl%!0cB+CieQL^Q*i%g&#B1aK{awsRb zmctrc%Yoop4g}Y7Ah?zT!L=L+uI2C$C)aXBpiV}HAp&($CeXDh6J*O_7p~=qKuJc1 zAp#{S6DY}YLUxpFIqV`6D2K>VM4%kX39jX^2G?>RxRwLKwHyepSSaXB2Xt~0$rOjLAD%r;aZLelw@QW zB2bbtfs!mIWJk%C!!9y`a)=y71j?bD;93rAa4iRdYdH{H%Yoop4g}Y7Ah?#pL!4a8 z5rH}x8HNbdNtr;`rc976hh4aqBLXEE8HNayq)ebB%L&<0vgNRgOrRVhM-hQ?C?~j< z!x~)6f#6yW1lMvPxRwLKwHyep>?8=hsaSxpd88xuH~=>*K#1ZmIJ}H90;!EKyWPwf@?WE#L2ZB z5vY@qVTeGTlnHcg$^_YR*oA93B2bc%VTeFU$^=TXoRA$QTMoO(1j-?D6cH$ga)N6) ctiiP$2(INoa4iRdYdH{H%YpFamg`>qf4@8m?f?J) literal 65497 zcmb8&&u?5=LdEg;_m4zm21RHPQZEJ;ZV3{pC0HW4-R{^piG%I#2Ek0JVpqpiv8&uw zhT#DBoRipA_ef_tlO<)Sh?r`^Ygnu&rVjTWdYw|CnyN?eWA8sF=?Cl<(j7JO4#s^E+UyMih z2hGc?i+1;A<4vpm@_aZxcxNyiZ=ElkFNZ&UzIwiPzJ9PAc^ZAP-j^x)Pj^x)Pj^x)Pj^x)P zj^x)Pj^x)Pj^x)Pj^x*4<9O%rXt%Mqe^R{~pN2o3UyV4DUyV4DUyV4DUyV4DUyV4D zUyV4DUyV4DUyV4DUyXZv+eaUVzepPUC%aFp7v#%ug8YKSk^F+hk^F+hk^F+hk^F+h zk^F+hk^F+hk^F+hk^F+JKiU0w_etZU=hchx7vT@*7bA}37bA}37bA}37bA}37bA}3 z7bA}37bA}37bA|$Eymlg7^8bThtG~bdb<1e;#j!;!|~hU^M46{Zjp{G(UE03vO-5z z>Bt%#S*If#^G8M-kM}>Wn(L2fuJK55uJK55uJK55uJK55uJK55uJK55uJK55uJ=9& z(H$NBX2sI}XZQofrHx05(~U=p(~U=p(~U=p(~U=p(~U=p(~U=p)7{+t^^f+SS4{SQ z120ZC9w|;X9w|;X9w|;X9w|;X9w|;X9w|;X9w|znpsv>0t3Oq=UuBkPa3fLpoS|4C!F; zF{FdV$B+&dAH(|lA5}Ek*~>kIbg=jk(!t_GNC%4#Ass9}gmkd@5YoZoLr4eXLx@lO z7sIQFX7{+&I%!|DCa)q!8^iG1#_;B(ec+JsoH!sGcrNe%dgUNH| z&FKq>&ToZB`o*~Ojqpq7o8cd$9KgnO<2i`&`SPXA^w z3=ikl2eR@>eB=c;S+~`b_UWs~yI~-_vRdz6Up9u}!1&<3LHq*P`TgmF{6YBT;EWG_ zhY$Ub4n6(F1wQtZ>9M$Tho`64S2u5WZfmsE@4XoxJRdEc_AbJ&(=UCCqr1~B-k!Xq zoXmH1*zCV-h1Zu~xoCYlYz$k?%f>~kc@`e%^#9|ZO{f3exRqb+#}|u?!W7b(ANPCL zSCf_KO#Z~i+q0(A{rTMKHgUS?XgJSTK4^#C=$^Jt`pws^i`Ln6S(nek;qk%dbWY!! zp5*)CmxBj<=p8<8frXW)dQ-PNYc-C44UMb)Oedg6Vc&!dzuY)&k$A2G|Yqb1y8vk?+7YlzX-NPs@_>bx= zIIywcz{Y|D8w(C>EI6>S;K1&JuN*bIlfQ25760!#D-LX|IIywez{ZLL8!HZMtT?c{ z;%o5>X1F&O{(`aBd~>nJUn{`Ingbha4s5JBu(9UA#+n1WYrYYmEHp3LgUdRr{tI_JM23M`qVa==kSL>`hu(9sI#<~L=>ke$JJFv0t z!0x)QZC{+8w=Qq$T~rA#g0$U14(ym6-l>l6-oDS6-hU96-jq>6-l>u6-oDb6-hUI6-jq~ zvm`#Xyk&J0QLCGXTHQp{>L#LAHxaeEiKx|0M6GTjYIPG)s~ew2-mUPF z@#HUv&VPkp4wkEjR;q{As)shJhwfDm-LD>cP(Acc_0W5Tq4-znZnNKgyEfAYUs{)q zrOBPX+`7yUmDXi`sI)HgL#1_@A1bZO{7`9K=7&n_GCx#Wm-!)DmyP4ARu>mdc46MF zUYPlz(!$ISl@?}xsI)NiL#2h8A1W=({7`9O=7&lPGe1NNv-!07suiBkf11AfW>;pv zdS&K^N-HxzR9cz&q0-9C50zGCeyFrE^FyVTnI9^x%>2;Y%A7BZHqXLM#fOdf&v1{m zaoTT%o5+n%ue;$zPp=!EfNyoq!=jAt&4x~UXYtnJ*3ROch4I1i!JY8G<%5-jot>`@ z?|gpe^M%hBzqK@6nEY<(TT2h_9N!r~`!bw+zZ<{hr`NwZzG|Mf!u{yZ?}e`!-wxjl zwD>Z7kI>rS`b8(a)CxB(2N8@r@s4!(*X>8=z0204S2u4?&mYCxoxjxTzHaw>-OF&g zN8$U6hJ#1(+m0r`ZCp0H*UgK@>2<%~KE1xUzI=FfGamii^L{ftpwrgdNjz~?#@ zW?ZB}qY;h`zCC^spQa6iJXoAeKD-eNN)ZpYuJccEstt=V@9^Q;+x7KH0$Nw^2CVv%v z?ffAAV{#vUq4PK4_u*aM$=g^SPA?%moSu+*I6XDH1ustLoZdTFOixrQ9$>=h0eUz+ zKo6$}cne-C%z5^%M8%vFPILBfn)5Aqxz02a_NK8?=gg~h@LC!YVIdBQL@#FTyG>!YVIdBQL`0x+bjh0ygp@>`lYS z3uiX+0ygpjHu3^?^ICnsHyrlx-b4DDqze1_Qw0 zd40D|UW8R%gjHUIRbIeGUW8R%gjHUIRbIeGUWC-piA zb@C#t@*=GABCPTPHu55@@*=GABCPTPHu55@u4}?7FJL1t!rnBDyl`eCFJL1tU?VSJ zH?Otl;WK^Bde0I6xlUq)RbqryVuV#WwBnE6G25ck->?XFlbKV|=PgB--j`%-y3L~rvBdiJ|tO^4*3L~rvBdiJ|tO^4* z3L~s8ZNjQBV52a?-ZYHDaAu=0V52Z#qcC8%u$80hZoL;_zp`56HV0u97-1C{VHFs# z5g1_=7-1C{VHFs#5g1{0WfN9`0ULo4_NHM3hBF(10ULn<8-W4afo=EW4_>J873{M* zfe}`L5mtc_R)GNIwU>(;ti|ARV_5mu2AR*?}_kpUZ#5mu2AR*?}_kpUZ#5mr|>VHFv$5gB1` z8b)L|vk@7v5gD)%8L%7K>hoUuaEkqQ*niY1jIb(uWV$i4j(X5mto}R)ql@ zg%MVT5mto}R)ql@g%MVlHepp5uu&LcZyH8nII~e0uu&MWQ5bNxu=ru_-OsMuSNOzO zhd$*s|I^lHpIQ0s5LNt8VF?$1tk~>xO%=HKbKGX1KB~aQAI)(K&i`nRiZkbbG)D!_ z|7ea1od3}r6*&8&IXVT5k4G#NetcVYQ-F<40X8-T*w_?cV^e^QO#wDG1=!dWU}ID8 z9rebh02`YEY-|d!u_?gDrT`n80&Hvwu(2t?#-`xAu#HUtHZ}#=*c4!6Q-F<40X8-T z*w_?cV^e^QO~Lo>8k+)aYznZkDZs|202`YEY-|d!u_?gDrT`n8f^YaVHU-$&6kuah zfQ?N7HZ}#=*c4!6Q-F<40X8-T-zI2m3b3&$z{aKk8=C@bYznZkDZs|202`YEY-|d? zv&`5OU}IB&jZFbIHU-$&6kuahfQ?N7HZ}#=*c5y}lCdek#-;!pn*wZX3b3&$z{aKk z8=C@bYznZkDfkW+V^e^QO#wDG1=!dWU}IB&jZFbIHU-$&6kuah@J$cKrT`n80&Hvw zu(2t?#-;!pn*wZX3b3&$z{aNFjdEjCfQ?N7HZ}#=*c4!6Q-F<40X8-T*w_?cV^i?v zsUu ze^aIc=iij6z}YutIt7e(aEz=7tE>pCtO%>D2&=4sjjRZ(tO%>D2&=4sjjRZ(>zT01 z3fRbsus01OE1cQL3fRaB*vJYvn^pYMwHN+V0^#SfmcMl2Hg>+TCmVBLx(JsV)40@_ z#-+wIE;Uwx^Nnd-YE0u&V;YwltHAljG%j7-8kZWY!1=~B_NI|l85Fh%mAutg<4kvLdXqBCN6kHnJkDvLdXqBCN6kHnJkDu4lq3D_|ol!rnBD ztZ-%{D_|olU?VGFBP+a>WMoBHWkpzJMObA;SY-ulWJOqIMObA;SY-ulWJOqA&xBQ0 zz(!Vty=fR(;mk%>z(!WUMpnQ^R=7uKWJOqIMObA;SY<_6Wd&?xMObA;SY<_6Wd&?x zMOaen^R=`G9z(!WUMppE`RO!DYP`JNA zSY<_6WkpzJMObA8Y-B}PWkpzJMObA8Y-B}PUC)G7R=`G9guQ7PS>en^R=`G9z(!WU zMpn2rZ+x{Ptg<4kvLdXqBCN6kHnJkDvLdXqBCN6kHnJkDu4lq3D_|ol!rnBDtZ-%{ zD_|olU?VHwY*z6u2uHy@N%#6qv@Pz%(uerg15-3Y-s2<5FN6 zmjctc6j%k$2c~i9%GS6PSOv}prm;7Td|(x4&IeY3^MO_1d|(wg8(61+@kzk^*DZ}z zR&-{S6=9VXVU-oIkriQ;6=9VXVU-oIkriQeJrh=00UKEn_NHNEg)dysMplGXR)kengjH6ARaU@8R)kengjH6ARaU@8R)p2{Oju*Dh-a3d^WBP?JeEPQ*0@%4(Z3X8A`i?9ldunG&<2#c@^i?9ld zunG&<2#c_~q6w?8fQ_&Sd($w&!kIZN$_m)X3fRaB*vJaso?&D~SY<_6WkpzJMObA8 zY-B}PWkpzJMObA8Y-B}PUC)G7R=`G9guQ7PS>en^R=`G9z(!WUMppQ#21ZtdRaS&m zR)kengjH6+MplGXR)kengjH6+MplH?^-Nf01#DzR*qert70zsA1#DymY-9y&WQCtg zVPr*EWkpzJMObA;SY-ulWJOqIMObA;SY-ulWJOqA&xBQ0z(!Vty=fR(;mk%>z(!WU zMpnQ^R`|&xMplGXR)kengjH6ARaU@8R)kengjH6ARaU@8R)p2{Ojuen^R=`G9z(!WUMppPliIEjyl@(!? z6=9VXVU-oIkriQ;6=9VXVU-oIkriQeJrh=00UKEn_NHNEg)Y2U zMplGXR)kengjH6ARaU@8R)kengjH6ARaU@8R)p2{Ojuv7I@Y$=i_pfitKRl2=O;!4?+wc8UjgMYwTimo99LOMO+~d|w)S)5!N#aprtq6*%8l1 zI4#}m$4P6fvZ86ItO%>DfQ_sOtE>pCtO%>DfQ_sOtLvGt$_m)Xim*2gBP*QQ$O_oV z3fRaB*vJYWC^51ktg<4kvLdXqBCN6kHnJkDvLdXqBCN6kHnJkDu4lq3D_|ol!rnBD ztZ-%{D_|olU?VGFBP)EI#K?-U%8Ibcim=Lxu*wS9$cnJaim=Lxu*wS9$cnJKo(Ze0 zfQ_sOd($wo!kLY%fQ_txjjVu;tnhIXBP+rxE5a%(!YV7mDl1?kE5a%(!YV7mDl1?k zE5hn}CakgoHnJk@O~c3vXEw3|HnIXXvH~`;!pBLBtO%>D2&=3JtE>pCtbmQI2&=3J ztE>pCtbmQI2&?Ovu*wS9$cnHx4I?X@*~kjm$O_oV3fRaBA15)gBCN6^tg<4kvLdXq z0yeTDtg<4kvLdXq0yeTDtgdImDl1?kE5hD1jI3~GBP(DdD_|olU?VGhoW#hAu*!`lYS3THO50yeS& zHnIXXvcktnjI0Q&tO%>D2&=3JtE_;HtO%>D2&=3JtE_;HtO%>?nXt+V*vN{oHw_~z zoY}|<*vJal$O_oV3LhsivLdXqBCN6^tg<4kvH~`;BCN6^tg<4kvH~`;BCM`w!YV6Z zBP+t*G>oipW+N+LBP(DdD_|ole4NC{im=Lxu*!Ut)uvH~`;BJ54W$O>mRvH~`;0yeS&HnPIUNsO!rtE>pCtO%>D2&=4s zjjRZ(tO%>D2&=4sjjRZ(>zT013fRbsus01OE1cQL3fRaB*vJal$O<1PF|s18vLdXq zBCN6^tg-?&vLdXqBCN6^tg-?&vLdXmXTmBgU?VHS-ZYG?aAqSbU?VGFBP(DdD}0>9 z$cnJaim=Lxu*! diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index 3b25ee84f..d1271da7e 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -7,6 +7,7 @@ from nmmo.systems import skill from nmmo.task import predicate_api as p from nmmo.task import task_api as t +from nmmo.task import task_spec as ts from nmmo.task import base_predicates as bp from nmmo.task.game_state import GameState from nmmo.task.group import Group @@ -231,10 +232,14 @@ def test_task_spec_based_curriculum(self): If not provided, the standard Task is used. """ task_spec = [ # (reward_to, predicate function, kwargs) - ('team', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 1}), # one task - ('agent', bp.CountEvent, {'event': 'PLAYER_KILL', 'N': 2}), - ('agent', bp.AllDead, {'target': 'left_team'}), - ('team', bp.CanSeeAgent, {'target': 'right_team_leader'}, {'task_cls': t.OngoingTask}), + ts.TaskSpec(eval_fn=bp.CountEvent, eval_fn_kwargs={'event': 'PLAYER_KILL', 'N': 1}, + reward_to='team'), + ts.TaskSpec(eval_fn=bp.CountEvent, eval_fn_kwargs={'event': 'PLAYER_KILL', 'N': 2}, + reward_to='agent'), + ts.TaskSpec(eval_fn=bp.AllDead, eval_fn_kwargs={'target': 'left_team'}, + reward_to='agent'), + ts.TaskSpec(eval_fn=bp.CanSeeAgent, eval_fn_kwargs={'target': 'right_team_leader'}, + task_cls=t.OngoingTask, reward_to='team'), ] # NOTE: len(teams) and len(task_spec) don't need to match @@ -243,7 +248,7 @@ def test_task_spec_based_curriculum(self): config = ScriptedAgentTestConfig() env = Env(config) - env.reset(make_task_fn=lambda: t.make_team_tasks(teams, task_spec)) + env.reset(make_task_fn=lambda: ts.make_task_from_spec(teams, task_spec)) self.assertEqual(len(env.tasks), 6) # 6 tasks were created self.assertEqual(env.tasks[0].name, # team 0 task assigned to agents 1,2,3 diff --git a/tests/task/test_manual_curriculum.py b/tests/task/test_manual_curriculum.py index cefbb218e..f95d92348 100644 --- a/tests/task/test_manual_curriculum.py +++ b/tests/task/test_manual_curriculum.py @@ -1,13 +1,14 @@ '''Manual test for creating learning curriculum manually''' # pylint: disable=invalid-name,redefined-outer-name,bad-builtin # pylint: disable=wildcard-import,unused-wildcard-import +from typing import List import nmmo import nmmo.lib.material as m from nmmo.task.base_predicates import * -from nmmo.task.task_api import OngoingTask, make_team_tasks +from nmmo.task.task_api import OngoingTask from nmmo.task import constraint as c - +from nmmo.task.task_spec import TaskSpec, make_task_from_spec EVENT_NUMBER_GOAL = [3, 4, 5, 7, 9, 12, 15, 20, 30, 50] INFREQUENT_GOAL = list(range(1, 10)) @@ -23,25 +24,7 @@ EQUIP_ITEM = c.armour + c.weapons + c.tools + c.ammunition HARVEST_ITEM = c.weapons + c.ammunition + c.consumables -""" task_spec is a list of tuple (reward_to, evaluation function, eval_fn_kwargs, task_kwargs) - - each tuple in the task_spec will create tasks for a team in teams - - reward_to: must be in ['team', 'agent'] - * 'team' create a single team task, in which all team members get rewarded - * 'agent' create a task for each agent, in which only the agent gets rewarded - - evaluation functions from the base_predicates.py or could be custom functions like above - - eval_fn_kwargs are the additional args that go into predicate. There are also special keys - * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] - these str will be translated into the actual agent ids - - task_kwargs are the optional, additional args that go into the task. - * 'task_cls' specifies the task class to be used. If not provided, the standard Task is used. - * `sampling_weight` specifies the weight of the task in the curriculum sampling. Default is 1 - """ -task_spec = [] +task_spec: List[TaskSpec] = [] # explore, eat, drink, attack any agent, harvest any item, level up any skill # which can happen frequently @@ -49,129 +32,150 @@ 'SCORE_HIT', 'HARVEST_ITEM', 'LEVEL_UP'] for event_code in essential_skills: for cnt in EVENT_NUMBER_GOAL: - task_spec += [('agent', CountEvent, {'event': event_code, 'N': cnt}, - {'sampling_weight': 30})] + task_spec.append(TaskSpec(eval_fn=CountEvent, + eval_fn_kwargs={'event': event_code, 'N': cnt}, + sampling_weight=30)) # item/market skills, which happen less frequently or should not do too much item_skills = ['CONSUME_ITEM', 'GIVE_ITEM', 'DESTROY_ITEM', 'EQUIP_ITEM', 'GIVE_GOLD', 'LIST_ITEM', 'EARN_GOLD', 'BUY_ITEM'] for event_code in item_skills: - task_spec += [('agent', CountEvent, {'event': event_code, 'N': cnt}) + task_spec += [TaskSpec(eval_fn=CountEvent, eval_fn_kwargs={'event': event_code, 'N': cnt}) for cnt in INFREQUENT_GOAL] # less than 10 # find resource tiles for resource in m.Harvestable: for reward_to in ['agent', 'team']: - task_spec.append((reward_to, CanSeeTile, {'tile_type': resource}, - {'sampling_weight': 10})) # sample this more + task_spec.append(TaskSpec(eval_fn=CanSeeTile, eval_fn_kwargs={'tile_type': resource}, + reward_to=reward_to, sampling_weight=10)) # stay alive ... like ... for 300 ticks # i.e., getting incremental reward for each tick alive as an individual or a team for reward_to in ['agent', 'team']: for num_tick in STAY_ALIVE_GOAL: - task_spec.append((reward_to, TickGE, {'num_tick': num_tick})) + task_spec.append(TaskSpec(eval_fn=TickGE, eval_fn_kwargs={'num_tick': num_tick}, + reward_to=reward_to)) # protect the leader: get reward for each tick the leader is alive # NOTE: a tuple of length four, to pass in the task_kwargs -task_spec.append(('team', StayAlive, {'target': 'my_team_leader'}, {'task_cls': OngoingTask})) +task_spec.append(TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={'target': 'my_team_leader'}, + reward_to='team', task_cls=OngoingTask)) # want the other team or team leader to die for target in ['left_team', 'left_team_leader', 'right_team', 'right_team_leader']: - task_spec.append(('team', AllDead, {'target': target})) + task_spec.append(TaskSpec(eval_fn=AllDead, eval_fn_kwargs={'target': target}, + reward_to='team')) # occupy the center tile, assuming the Medium map size # TODO: it'd be better to have some intermediate targets toward the center for reward_to in ['agent', 'team']: - task_spec.append((reward_to, OccupyTile, {'row': 80, 'col': 80})) # TODO: get config + task_spec.append(TaskSpec(eval_fn=OccupyTile, eval_fn_kwargs={'row': 80, 'col': 80}, + reward_to=reward_to)) # TODO: get config for map size # form a tight formation, for a certain number of ticks def PracticeFormation(gs, subject, dist, num_tick): return AllMembersWithinRange(gs, subject, dist) * TickGE(gs, subject, num_tick) for dist in [1, 3, 5, 10]: - task_spec += [('team', PracticeFormation, {'dist': dist, 'num_tick': num_tick}) - for num_tick in STAY_ALIVE_GOAL] + task_spec += [TaskSpec(eval_fn=PracticeFormation, + eval_fn_kwargs={'dist': dist, 'num_tick': num_tick}, + reward_to='team') for num_tick in STAY_ALIVE_GOAL] # find the other team leader for reward_to in ['agent', 'team']: for target in ['left_team_leader', 'right_team_leader']: - task_spec.append((reward_to, CanSeeAgent, {'target': target})) + task_spec.append(TaskSpec(eval_fn=CanSeeAgent, eval_fn_kwargs={'target': target}, + reward_to=reward_to)) # find the other team (any agent) for reward_to in ['agent']: #, 'team']: for target in ['left_team', 'right_team']: - task_spec.append((reward_to, CanSeeGroup, {'target': target})) + task_spec.append(TaskSpec(eval_fn=CanSeeGroup, eval_fn_kwargs={'target': target}, + reward_to=reward_to)) # explore the map -- sum the l-inf distance traveled by all subjects for dist in [10, 20, 30, 50, 100]: # each agent - task_spec.append(('agent', DistanceTraveled, {'dist': dist})) + task_spec.append(TaskSpec(eval_fn=DistanceTraveled, eval_fn_kwargs={'dist': dist})) for dist in [30, 50, 70, 100, 150, 200, 300, 500]: # summed over all team members - task_spec.append(('team', DistanceTraveled, {'dist': dist})) + task_spec.append(TaskSpec(eval_fn=DistanceTraveled, eval_fn_kwargs={'dist': dist}, + reward_to='team')) # level up a skill for skill in SKILLS: for level in LEVEL_GOAL[1:]: # since this is an agent task, num_agent must be 1 - task_spec.append(('agent', AttainSkill, {'skill': skill, 'level': level, 'num_agent': 1}, - {'sampling_weight': 10*(5-level) if level < 5 else 1})) + task_spec.append(TaskSpec(eval_fn=AttainSkill, + eval_fn_kwargs={'skill': skill, 'level': level, 'num_agent': 1}, + reward_to='agent', + sampling_weight=10*(5-level) if level < 5 else 1)) # make attain skill a team task by varying the number of agents for skill in SKILLS: for level in LEVEL_GOAL[1:]: for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', AttainSkill, - {'skill': skill, 'level': level,'num_agent': num_agent})) + task_spec.append( + TaskSpec(eval_fn=AttainSkill, + eval_fn_kwargs={'skill': skill, 'level': level, 'num_agent': num_agent}, + reward_to='team')) # practice specific combat style for style in COMBAT_STYLE: for cnt in EVENT_NUMBER_GOAL: - task_spec.append(('agent', ScoreHit, {'combat_style': style, 'N': cnt}, - {'sampling_weight': 5})) + task_spec.append(TaskSpec(eval_fn=ScoreHit, eval_fn_kwargs={'combat_style': style, 'N': cnt}, + sampling_weight=5)) for cnt in TEAM_NUMBER_GOAL: - task_spec.append(('team', ScoreHit, {'combat_style': style, 'N': cnt})) + task_spec.append(TaskSpec(eval_fn=ScoreHit, eval_fn_kwargs={'combat_style': style, 'N': cnt}, + reward_to='team')) # defeat agents of a certain level as a team for agent_type in ['player', 'npc']: # c.AGENT_TYPE_CONSTRAINT for level in LEVEL_GOAL: for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', DefeatEntity, - {'agent_type': agent_type, 'level': level, 'num_agent': num_agent})) + task_spec.append(TaskSpec(eval_fn=DefeatEntity, + eval_fn_kwargs={'agent_type': agent_type, 'level': level, + 'num_agent': num_agent}, + reward_to='team')) # hoarding gold -- evaluated on the current gold for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', HoardGold, {'amount': amount}, - {'sampling_weight': 3})) + task_spec.append(TaskSpec(eval_fn=HoardGold, eval_fn_kwargs={'amount': amount}, + sampling_weight=3)) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', HoardGold, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=HoardGold, eval_fn_kwargs={'amount': amount}, + reward_to='team')) # earning gold -- evaluated on the total gold earned by selling items # does NOT include looted gold for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', EarnGold, {'amount': amount}, - {'sampling_weight': 3})) + task_spec.append(TaskSpec(eval_fn=EarnGold, eval_fn_kwargs={'amount': amount}, + sampling_weight=3)) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', EarnGold, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=EarnGold, eval_fn_kwargs={'amount': amount}, + reward_to='team')) # spending gold, by buying items for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', SpendGold, {'amount': amount}, - {'sampling_weight': 3})) + task_spec.append(TaskSpec(eval_fn=SpendGold, eval_fn_kwargs={'amount': amount}, + sampling_weight=3)) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', SpendGold, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=SpendGold, eval_fn_kwargs={'amount': amount}, + reward_to='team')) # making profits by trading -- only buying and selling are counted for amount in EVENT_NUMBER_GOAL: - task_spec.append(('agent', MakeProfit, {'amount': amount}, - {'sampling_weight': 3})) + task_spec.append(TaskSpec(eval_fn=MakeProfit, eval_fn_kwargs={'amount': amount}, + sampling_weight=3)) for amount in TEAM_NUMBER_GOAL: - task_spec.append(('team', MakeProfit, {'amount': amount})) + task_spec.append(TaskSpec(eval_fn=MakeProfit, eval_fn_kwargs={'amount': amount}, + reward_to='team')) # managing inventory space def PracticeInventoryManagement(gs, subject, space, num_tick): return InventorySpaceGE(gs, subject, space) * TickGE(gs, subject, num_tick) for space in [2, 4, 8]: - task_spec += [('agent', PracticeInventoryManagement, {'space': space, 'num_tick': num_tick}) + task_spec += [TaskSpec(eval_fn=PracticeInventoryManagement, + eval_fn_kwargs={'space': space, 'num_tick': num_tick}) for num_tick in STAY_ALIVE_GOAL] # own item, evaluated on the current inventory @@ -180,29 +184,32 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', OwnItem, - {'item': item, 'level': level, 'quantity': quantity}, - {'sampling_weight': 4-level if level < 4 else 1})) - + task_spec.append(TaskSpec(eval_fn=OwnItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + sampling_weight=4-level if level < 4 else 1)) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', OwnItem, - {'item': item, 'level': level, 'quantity': quantity})) + task_spec.append(TaskSpec(eval_fn=OwnItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + reward_to='team')) # equip item, evaluated on the current inventory and equipment status for item in EQUIP_ITEM: for level in LEVEL_GOAL: # agent task - task_spec.append(('agent', EquipItem, - {'item': item, 'level': level, 'num_agent': 1}, - {'sampling_weight': 4-level if level < 4 else 1})) - + task_spec.append(TaskSpec(eval_fn=EquipItem, + eval_fn_kwargs={'item': item, 'level': level, 'num_agent': 1}, + sampling_weight=4-level if level < 4 else 1)) # team task for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', EquipItem, - {'item': item, 'level': level, 'num_agent': num_agent})) + task_spec.append(TaskSpec(eval_fn=EquipItem, + eval_fn_kwargs={'item': item, 'level': level, + 'num_agent': num_agent}, + reward_to='team')) # consume items (ration, potion), evaluated based on the event log for item in c.consumables: @@ -210,15 +217,17 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', ConsumeItem, - {'item': item, 'level': level, 'quantity': quantity}, - {'sampling_weight': 4-level if level < 4 else 1})) - + task_spec.append(TaskSpec(eval_fn=ConsumeItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + sampling_weight=4-level if level < 4 else 1)) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', ConsumeItem, - {'item': item, 'level': level, 'quantity': quantity})) + task_spec.append(TaskSpec(eval_fn=ConsumeItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + reward_to='team')) # harvest items, evaluated based on the event log for item in HARVEST_ITEM: @@ -226,15 +235,17 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', HarvestItem, - {'item': item, 'level': level, 'quantity': quantity}, - {'sampling_weight': 4-level if level < 4 else 1})) - + task_spec.append(TaskSpec(eval_fn=HarvestItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + sampling_weight=4-level if level < 4 else 1)) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', HarvestItem, - {'item': item, 'level': level, 'quantity': quantity})) + task_spec.append(TaskSpec(eval_fn=HarvestItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + reward_to='team')) # list items, evaluated based on the event log for item in ALL_ITEM: @@ -242,15 +253,17 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', ListItem, - {'item': item, 'level': level, 'quantity': quantity}, - {'sampling_weight': 4-level if level < 4 else 1})) - + task_spec.append(TaskSpec(eval_fn=ListItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + sampling_weight=4-level if level < 4 else 1)) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', ListItem, - {'item': item, 'level': level, 'quantity': quantity})) + task_spec.append(TaskSpec(eval_fn=ListItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + reward_to='team')) # buy items, evaluated based on the event log for item in ALL_ITEM: @@ -258,23 +271,27 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): # agent task for quantity in ITEM_NUM_GOAL: if level + quantity <= 6 or quantity == 1: # heuristic prune - task_spec.append(('agent', BuyItem, - {'item': item, 'level': level, 'quantity': quantity}, - {'sampling_weight': 4-level if level < 4 else 1})) - + task_spec.append(TaskSpec(eval_fn=BuyItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + sampling_weight=4-level if level < 4 else 1)) # team task for quantity in TEAM_ITEM_GOAL: if level + quantity <= 10 or quantity == 1: # heuristic prune - task_spec.append(('team', BuyItem, - {'item': item, 'level': level, 'quantity': quantity})) + task_spec.append(TaskSpec(eval_fn=BuyItem, + eval_fn_kwargs={'item': item, 'level': level, + 'quantity': quantity}, + reward_to='team')) # fully armed, evaluated based on the current player/inventory status for style in COMBAT_STYLE: for level in LEVEL_GOAL: for num_agent in AGENT_NUM_GOAL: if level + num_agent <= 6 or num_agent == 1: # heuristic prune - task_spec.append(('team', FullyArmed, - {'combat_style': style, 'level': level, 'num_agent': num_agent})) + task_spec.append(TaskSpec(eval_fn=FullyArmed, + eval_fn_kwargs={'combat_style': style, 'level': level, + 'num_agent': num_agent}, + reward_to='team')) if __name__ == '__main__': @@ -298,7 +315,7 @@ def check_task_spec(spec_list): env = nmmo.Env(config) for idx, single_spec in enumerate(spec_list): # pylint: disable=cell-var-from-loop - test_task = make_team_tasks(teams, [single_spec]) + test_task = make_task_from_spec(teams, [single_spec]) try: env.reset(make_task_fn=lambda: test_task) for _ in range(3): @@ -320,7 +337,7 @@ def check_task_spec(spec_list): # print(sample_task[-1].name) # for now, we only use the 1535 tasks with reward_to=agent - flt_spec = [spec for spec in task_spec if spec[0] == 'agent'] + flt_spec = [spec for spec in task_spec if spec.reward_to == 'agent'] # test if the task spec is pickalable with open('sample_curriculum.pkl', 'wb') as f: diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 525ccf9da..1b2bb720e 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -6,7 +6,8 @@ import nmmo from nmmo.core.env import Env from nmmo.task.predicate_api import make_predicate, Predicate -from nmmo.task.task_api import Task, make_team_tasks, OngoingTask +from nmmo.task.task_api import Task, OngoingTask +from nmmo.task.task_spec import TaskSpec, make_task_from_spec from nmmo.task.group import Group from nmmo.task.constraint import ScalarConstraint, GroupConstraint, AGENT_LIST_CONSTRAINT from nmmo.task.base_predicates import TickGE, CanSeeGroup, AllMembersWithinRange, StayAlive @@ -227,7 +228,9 @@ def PracticeFormation(gs, subject, dist, num_tick): # team should stay together within 1 tile for 10 ticks goal_tick = 10 - task_spec = ('team', PracticeFormation, {'dist': 1, 'num_tick': goal_tick}) + task_spec = TaskSpec(eval_fn=PracticeFormation, + eval_fn_kwargs={'dist': 1, 'num_tick': goal_tick}, + reward_to='team') # create the test task from the task spec teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} @@ -237,7 +240,7 @@ def PracticeFormation(gs, subject, dist, num_tick): config.IMMORTAL = True env = Env(config) - env.reset(make_task_fn=lambda: make_team_tasks(teams, [task_spec])) + env.reset(make_task_fn=lambda: make_task_from_spec(teams, [task_spec])) # check the task information task = env.tasks[0] @@ -250,7 +253,7 @@ def PracticeFormation(gs, subject, dist, num_tick): 'TickGE(gs, subject, num_tick)') self.assertEqual(task.get_signature(), ['gs', 'subject', 'dist', 'num_tick']) self.assertEqual(task.subject, tuple(teams[0])) - self.assertEqual(task.kwargs, task_spec[2]) + self.assertEqual(task.kwargs, task_spec.eval_fn_kwargs) self.assertEqual(task.assignee, tuple(teams[0])) # check the agent-task map @@ -278,9 +281,11 @@ def PracticeFormation(gs, subject, dist, num_tick): # test the task_spec_with_embedding task_embedding = np.ones(config.TASK_EMBED_DIM, dtype=np.float32) - task_spec_with_embedding = ('team', PracticeFormation, {'dist': 1, 'num_tick': goal_tick}, - {'embedding': task_embedding}) - env.reset(make_task_fn=lambda: make_team_tasks(teams, [task_spec_with_embedding])) + task_spec_with_embedding = TaskSpec(eval_fn=PracticeFormation, + eval_fn_kwargs={'dist': 1, 'num_tick': goal_tick}, + reward_to='team', + embedding=task_embedding) + env.reset(make_task_fn=lambda: make_task_from_spec(teams, [task_spec_with_embedding])) task = env.tasks[0] self.assertEqual(task.name, @@ -292,7 +297,7 @@ def PracticeFormation(gs, subject, dist, num_tick): 'TickGE(gs, subject, num_tick)') self.assertEqual(task.get_signature(), ['gs', 'subject', 'dist', 'num_tick']) self.assertEqual(task.subject, tuple(teams[0])) - self.assertEqual(task.kwargs, task_spec[2]) + self.assertEqual(task.kwargs, task_spec.eval_fn_kwargs) self.assertEqual(task.assignee, tuple(teams[0])) self.assertTrue(np.array_equal(task.embedding, task_embedding)) @@ -347,38 +352,23 @@ def test_completed_tasks_in_info(self): # DONE - def test_make_tasks_with_task_spec(self): - """ - task_spec is a list of tuple (reward_to, evaluation function, eval_fn_kwargs, task_kwargs) - each tuple in the task_spec will create tasks for a team in teams - - reward_to: must be in ['team', 'agent'] - * 'team' create a single team task, in which all team members get rewarded - * 'agent' create a task for each agent, in which only the agent gets rewarded - - evaluation functions from the base_predicates.py or could be custom functions like above - - eval_fn_kwargs are the additional args that go into predicate. There are also special keys - * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] - these str will be translated into the actual agent ids - - task_kwargs are the optional, additional args that go into the task. - * 'task_cls' specifies the task class to be used. - If not provided, the standard Task is used. - """ + def test_make_task_from_spec(self): teams = {0:[1,2,3], 1:[4,5,6]} + test_embedding = np.array([1,2,3]) task_spec = [ - ('agent', TickGE, {'num_tick': 20}), - ('agent', StayAlive, {}, {'task_cls': OngoingTask}), - ('team', StayAlive, {'target': 'my_team_leader'}, {'task_cls': OngoingTask}), - ('team', StayAlive, {'target': 'left_team'}, - {'task_cls': OngoingTask, 'reward_multiplier': 2, 'embedding': np.array([1,2,3])}), + TaskSpec(eval_fn=TickGE, eval_fn_kwargs={'num_tick': 20}), + TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={}, task_cls=OngoingTask), + TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={'target': 'my_team_leader'}, + task_cls=OngoingTask, reward_to='team'), + TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={'target': 'left_team'}, + task_cls=OngoingTask, task_kwargs={'reward_multiplier': 2}, + reward_to='team', embedding=test_embedding), ] task_list = [] # testing each task spec, individually for single_spec in task_spec: - task_list.append(make_team_tasks(teams, [single_spec])) + task_list.append(make_task_from_spec(teams, [single_spec])) # check the task names self.assertEqual(task_list[0][0].name, From 528ed3479b2d5a46cb11483605b06cb23fff8a35 Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Fri, 14 Jul 2023 13:44:26 -0700 Subject: [PATCH 062/113] remove some no-longer needed stats --- nmmo/core/env.py | 12 ------------ nmmo/render/replay_helper.py | 3 ++- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 208fa6094..e23de6da7 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -41,7 +41,6 @@ def __init__(self, self.possible_agents = list(range(1, config.PLAYER_N + 1)) self._agents = None self._dead_agents = set() - self._episode_stats = defaultdict(lambda: defaultdict(float)) self._dead_this_tick = None self.scripted_agents = set() @@ -178,7 +177,6 @@ def reset(self, map_id=None, seed=None, options=None, self.realm.reset(self._np_random, map_id) self._agents = list(self.realm.players.keys()) self._dead_agents = set() - self._episode_stats.clear() self._dead_this_tick = {} # check if there are scripted agents @@ -323,7 +321,6 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): self.realm.tick >= self.config.HORIZON or \ (self.config.RESET_ON_DEATH and len(self._dead_agents) > 0): self._dead_agents.add(agent_id) - self._episode_stats[agent_id]["death_tick"] = self.realm.tick dones[agent_id] = True else: dones[agent_id] = False @@ -333,15 +330,6 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): gym_obs = {a: o.to_gym() for a,o in self.obs.items()} rewards, infos = self._compute_rewards() - for k,r in rewards.items(): - self._episode_stats[k]['reward'] += r - - # When the episode ends, add the episode stats to the info of the last agents - if len(self._dead_agents) == len(self.possible_agents): - for agent_id, stats in self._episode_stats.items(): - if agent_id not in infos: - infos[agent_id] = {} - infos[agent_id]["episode_stats"] = stats # NOTE: all obs, rewards, dones, infos have data for each agent in self.agents return gym_obs, rewards, dones, infos diff --git a/nmmo/render/replay_helper.py b/nmmo/render/replay_helper.py index a16564e7b..119ee4ba8 100644 --- a/nmmo/render/replay_helper.py +++ b/nmmo/render/replay_helper.py @@ -81,7 +81,6 @@ def save(self, filename_prefix, compress=True): replay_file = f'{filename_prefix}.replay.json' metadata_file = f'{filename_prefix}.metadata.pkl' - logging.info('Saving replay to %s ...', replay_file) data = json.dumps({ 'map': self.map, @@ -92,6 +91,8 @@ def save(self, filename_prefix, compress=True): replay_file = f'{filename_prefix}.replay.lzma' data = lzma.compress(data, format=lzma.FORMAT_ALONE) + logging.info('Saving replay to %s ...', replay_file) + with open(replay_file, 'wb') as out: out.write(data) From 5d71913548dce97c3749cfeada6d30b2ac2be6d1 Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Sat, 15 Jul 2023 14:22:50 -0700 Subject: [PATCH 063/113] fix task embedding observation space --- nmmo/core/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index d0703b03c..70e5b39e7 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -75,7 +75,7 @@ def mask_box(length): "AgentId": gym.spaces.Discrete(self.config.PLAYER_N+1), "Tile": box(self.config.MAP_N_OBS, Tile.State.num_attributes), "Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes), - "Task": gym.spaces.Box(low=-np.inf, high=-np.inf, shape=(self.config.TASK_EMBED_DIM,)), + "Task": gym.spaces.Box(low=-2**15, high=2**15-1, shape=(self.config.TASK_EMBED_DIM,)), } if self.config.ITEM_SYSTEM_ENABLED: From f814c20a95c159a0f84ad6667be0525015aca53a Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Wed, 19 Jul 2023 15:20:04 -0700 Subject: [PATCH 064/113] cp --- nmmo/core/env.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 70e5b39e7..e39348cdf 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -163,8 +163,7 @@ def action_space(self, agent: AgentID): # TODO: This doesn't conform to the PettingZoo API # pylint: disable=arguments-renamed def reset(self, map_id=None, seed=None, options=None, - make_task_fn: Callable=None, - sample_training_tasks=False): + make_task_fn: Callable=None): '''OpenAI Gym API reset function Loads a new game map and returns initial observations @@ -198,7 +197,7 @@ def reset(self, map_id=None, seed=None, options=None, self.scripted_agents.add(eid) ent.agent.set_rng(self._np_random) - if self.curriculum_file_path is not None and sample_training_tasks is True: + if self.curriculum_file_path is not None: self.tasks = self._sample_training_tasks() elif make_task_fn is not None: self.tasks = make_task_fn() @@ -219,7 +218,6 @@ def _sample_training_tasks(self): with open(self.curriculum_file_path, 'rb') as f: # curriculum file may have been changed, so read the file when sampling curriculum = dill.load(f) # a list of TaskSpec - f.close() sampling_weights = [spec.sampling_weight for spec in curriculum] sampled_spec = self._np_random.choice(curriculum, size=len(self.possible_agents), @@ -503,6 +501,8 @@ def _compute_rewards(self): task_rewards, task_infos = task.compute_rewards(self.game_state) for agent_id, reward in task_rewards.items(): if agent_id in agents: + if reward > 0: + print("task reward", agent_id, task.name, reward) rewards[agent_id] = rewards.get(agent_id,0) + reward infos[agent_id]['task'][task.name] = task_infos[agent_id] # progress From 75fc43c9e9a8e319bc0182adbc810b42dd66df81 Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Wed, 19 Jul 2023 15:42:27 -0700 Subject: [PATCH 065/113] cp --- nmmo/core/env.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index e39348cdf..5efde650d 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -501,8 +501,6 @@ def _compute_rewards(self): task_rewards, task_infos = task.compute_rewards(self.game_state) for agent_id, reward in task_rewards.items(): if agent_id in agents: - if reward > 0: - print("task reward", agent_id, task.name, reward) rewards[agent_id] = rewards.get(agent_id,0) + reward infos[agent_id]['task'][task.name] = task_infos[agent_id] # progress From 90423d6b91fdda09fc6a1de398f06745b47c7230 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 21 Jul 2023 12:47:17 +0900 Subject: [PATCH 066/113] added no-op, checked tests --- nmmo/core/action.py | 33 ++++++++++++++++++++++++++------ nmmo/core/env.py | 36 ++++++----------------------------- nmmo/core/observation.py | 18 ++++++++++++------ scripted/baselines.py | 6 +++--- tests/action/test_ammo_use.py | 7 ++++--- tests/testhelpers.py | 5 +++-- 6 files changed, 55 insertions(+), 50 deletions(-) diff --git a/nmmo/core/action.py b/nmmo/core/action.py index 1bf4e4b1e..9aa785a38 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -7,8 +7,10 @@ from nmmo.lib import utils from nmmo.lib.utils import staticproperty from nmmo.systems.item import Item, Stack +from nmmo.entity.entity import Entity from nmmo.lib.log import EventCode + class NodeType(Enum): #Tree edges STATIC = auto() #Traverses all edges without decisions @@ -304,12 +306,25 @@ class Target(Node): @classmethod def N(cls, config): - return config.PLAYER_N_OBS + return config.PLAYER_N_OBS + 1 # +1 for the "None" target def deserialize(realm, entity, index: int): - # NOTE: index is the entity id - # CHECK ME: should index be renamed to ent_id? - return realm.entity_or_none(index) + # NOTE: index is from the entity obs, NOT the entity id + if index >= realm.config.PLAYER_N_OBS or index < 0: # checking for the "None" target + return None + + radius = realm.config.PLAYER_VISION_RADIUS + entity_obs = Entity.Query.window( + realm.datastore, + entity.row.val, entity.col.val, + radius + ) + + if index >= entity_obs.shape[0]: + return None + + entity_id = entity_obs[index, Entity.State.attr_name_to_col["id"]] + return realm.entity_or_none(entity_id) def args(stim, entity, config): #Should pass max range? @@ -351,7 +366,7 @@ class InventoryItem(Node): @classmethod def N(cls, config): - return config.INVENTORY_N_OBS + return config.INVENTORY_N_OBS + 1 # +1 for the "None" item # TODO(kywch): What does args do? def args(stim, entity, config): @@ -359,6 +374,9 @@ def args(stim, entity, config): def deserialize(realm, entity, index: int): # NOTE: index is from the inventory, NOT item id + if index >= realm.config.INVENTORY_N_OBS or index < 0: # checking for the "None" item + return None + inventory = Item.Query.owned_by(realm.datastore, entity.id.val) if index >= inventory.shape[0]: @@ -543,7 +561,7 @@ class MarketItem(Node): @classmethod def N(cls, config): - return config.MARKET_N_OBS + return config.MARKET_N_OBS + 1 # +1 for the "None" item # TODO(kywch): What does args do? def args(stim, entity, config): @@ -551,6 +569,9 @@ def args(stim, entity, config): def deserialize(realm, entity, index: int): # NOTE: index is from the market, NOT item id + if index >= realm.config.MARKET_N_OBS or index < 0: # checking for the "None" item + return None + market = Item.Query.for_sale(realm.datastore) if index >= market.shape[0]: diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 70e5b39e7..77d0af831 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -1,7 +1,7 @@ import functools from typing import Any, Dict, List, Callable from collections import defaultdict -from copy import copy +from copy import copy, deepcopy import dill import gym @@ -13,7 +13,6 @@ from nmmo.core.config import Default from nmmo.core.observation import Observation from nmmo.core.tile import Tile -from nmmo.core import action as Action from nmmo.entity.entity import Entity from nmmo.systems.item import Item from nmmo.task import task_api, task_spec @@ -85,34 +84,11 @@ def mask_box(length): obs_space["Market"] = box(self.config.MARKET_N_OBS, Item.State.num_attributes) if self.config.PROVIDE_ACTION_TARGETS: - mask_spec = {} - mask_spec[Action.Move] = gym.spaces.Dict( - {Action.Direction: mask_box(len(Action.Direction.edges))}) - if self.config.COMBAT_SYSTEM_ENABLED: - mask_spec[Action.Attack] = gym.spaces.Dict({ - Action.Style: mask_box(3), - Action.Target: mask_box(self.config.PLAYER_N_OBS)}) - if self.config.ITEM_SYSTEM_ENABLED: - mask_spec[Action.Use] = gym.spaces.Dict( - {Action.InventoryItem: mask_box(self.config.INVENTORY_N_OBS)}) - mask_spec[Action.Destroy] = gym.spaces.Dict( - {Action.InventoryItem: mask_box(self.config.INVENTORY_N_OBS)}) - mask_spec[Action.Give] = gym.spaces.Dict({ - Action.InventoryItem: mask_box(self.config.INVENTORY_N_OBS), - Action.Target: mask_box(self.config.PLAYER_N_OBS)}) - if self.config.EXCHANGE_SYSTEM_ENABLED: - mask_spec[Action.Buy] = gym.spaces.Dict( - {Action.MarketItem: mask_box(self.config.MARKET_N_OBS)}) - mask_spec[Action.Sell] = gym.spaces.Dict({ - Action.InventoryItem: mask_box(self.config.INVENTORY_N_OBS), - Action.Price: mask_box(self.config.PRICE_N_OBS)}) - mask_spec[Action.GiveGold] = gym.spaces.Dict({ - Action.Price: mask_box(self.config.PRICE_N_OBS), - Action.Target: mask_box(self.config.PLAYER_N_OBS)}) - if self.config.COMMUNICATION_SYSTEM_ENABLED: - mask_spec[Action.Comm] = gym.spaces.Dict( - {Action.Token: mask_box(self.config.COMMUNICATION_NUM_TOKENS)}) - obs_space['ActionTargets'] = gym.spaces.Dict(mask_spec) + mask_spec = deepcopy(self._atn_space) + for atn in mask_spec: + for arg in atn.edges: + mask_spec[atn][arg] = mask_box(mask_spec[atn][arg].n) + obs_space['ActionTargets'] = mask_spec return gym.spaces.Dict(obs_space) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 1d8a4a43f..d5546d836 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -220,7 +220,8 @@ def _make_attack_mask(self): assert self.config.COMBAT_MELEE_REACH == self.config.COMBAT_RANGE_REACH assert self.config.COMBAT_MELEE_REACH == self.config.COMBAT_MAGE_REACH assert self.config.COMBAT_RANGE_REACH == self.config.COMBAT_MAGE_REACH - attack_mask = np.zeros(self.config.PLAYER_N_OBS, dtype=np.int8) + attack_mask = np.zeros(self.config.PLAYER_N_OBS + 1, dtype=np.int8) # +1 for No action + attack_mask[-1] = 1 # No action if self.dummy_obs: return attack_mask @@ -246,7 +247,8 @@ def _make_attack_mask(self): def _make_use_mask(self): # empty inventory -- nothing to use - use_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) + use_mask = np.zeros(self.config.INVENTORY_N_OBS + 1, dtype=np.int8) # +1 for No action + use_mask[-1] = 1 # No action if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ or self.dummy_obs or self.agent_in_combat: return use_mask @@ -294,7 +296,8 @@ def _item_skill(self): } def _make_destroy_item_mask(self): - destroy_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) + destroy_mask = np.zeros(self.config.INVENTORY_N_OBS + 1, dtype=np.int8) # +1 for No action + destroy_mask[-1] = 1 # No action # empty inventory -- nothing to destroy if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ or self.dummy_obs or self.agent_in_combat: @@ -307,7 +310,8 @@ def _make_destroy_item_mask(self): return destroy_mask def _make_give_target_mask(self): - give_mask = np.zeros(self.config.PLAYER_N_OBS, dtype=np.int8) + give_mask = np.zeros(self.config.PLAYER_N_OBS + 1, dtype=np.int8) # +1 for No action + give_mask[-1] = 1 # No action # empty inventory -- nothing to give if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ or self.dummy_obs or self.agent_in_combat: @@ -335,7 +339,8 @@ def _make_give_gold_mask(self): return mask def _make_sell_mask(self): - sell_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) + sell_mask = np.zeros(self.config.INVENTORY_N_OBS + 1, dtype=np.int8) # +1 for No action + sell_mask[-1] = 1 # No action # empty inventory -- nothing to sell if not (self.config.EXCHANGE_SYSTEM_ENABLED and self.inventory.len > 0) \ or self.dummy_obs or self.agent_in_combat: @@ -348,7 +353,8 @@ def _make_sell_mask(self): return sell_mask def _make_buy_mask(self): - buy_mask = np.zeros(self.config.MARKET_N_OBS, dtype=np.int8) + buy_mask = np.zeros(self.config.MARKET_N_OBS + 1, dtype=np.int8) # +1 for No action + buy_mask[-1] = 1 # No action if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat: return buy_mask diff --git a/scripted/baselines.py b/scripted/baselines.py index f2fd358bc..4bf2add75 100644 --- a/scripted/baselines.py +++ b/scripted/baselines.py @@ -2,8 +2,8 @@ # pylint: disable=all from typing import Dict - from collections import defaultdict +import numpy as np import nmmo from nmmo import material @@ -105,11 +105,11 @@ def scan_agents(self): self.closestID = None if self.closest is not None: - self.closestID = self.closest.id + self.closestID = self.ob.entities.index(self.closest.id) self.attackerID = None if self.attacker is not None: - self.attackerID = self.attacker.id + self.attackerID = self.ob.entities.index(self.attacker.id) self.target = None self.targetID = None diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py index 9adf770eb..372b98287 100644 --- a/tests/action/test_ammo_use.py +++ b/tests/action/test_ammo_use.py @@ -29,7 +29,8 @@ def _assert_action_targets_zero(self, gym_obs): + np.sum(gym_obs['ActionTargets'][action.Buy][action.MarketItem]) for atn in [action.Use, action.Give, action.Destroy, action.Sell]: mask += np.sum(gym_obs['ActionTargets'][atn][action.InventoryItem]) - self.assertEqual(mask, 0) + # MarketItem and InventoryTarget have no-action flags, which sum up to 5 + self.assertEqual(mask, 5) def test_ammo_fire_all(self): env = self._setup_env(random_seed=RANDOM_SEED) @@ -58,7 +59,7 @@ def test_ammo_fire_all(self): # NOTE that agents 1 & 3's attack are invalid due to out-of-range env.step({ ent_id: { action.Attack: { action.Style: env.realm.players[ent_id].agent.style[0], - action.Target: (ent_id+1)%3+1 } } + action.Target: env.obs[ent_id].entities.index((ent_id+1)%3+1) } } for ent_id in self.ammo }) # check combat status: agents 2 (attacker) and 1 (target) are in combat @@ -87,7 +88,7 @@ def test_ammo_fire_all(self): # NOTE that agent 3's attack command is invalid due to out-of-range env.step({ ent_id: { action.Attack: { action.Style: env.realm.players[ent_id].agent.style[0], - action.Target: (ent_id+1)%3+1 } } + action.Target: env.obs[ent_id].entities.index((ent_id+1)%3+1) } } for ent_id in self.ammo }) # agents 1 and 2's latest_combat_tick should be updated diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 8fd9ef837..6461832cf 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -365,11 +365,12 @@ def _check_assert_make_action(self, env, atn, test_cond): if atn == action.Give: actions[ent_id] = { action.Give: { action.InventoryItem: env.obs[ent_id].inventory.sig(*cond['item_sig']), - action.Target: cond['tgt_id'] } } + action.Target: env.obs[ent_id].entities.index(cond['tgt_id']) } } elif atn == action.GiveGold: actions[ent_id] = { action.GiveGold: - { action.Target: cond['tgt_id'], action.Price: cond['gold'] } } + { action.Target: env.obs[ent_id].entities.index(cond['tgt_id']), + action.Price: cond['gold'] } } elif atn == action.Buy: mkt_idx = ent_obs.market.index(cond['item_id']) From 17428257d8603206f77f72abb232a56db94e5f9e Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 22 Jul 2023 00:32:43 +0900 Subject: [PATCH 067/113] some clean ups --- nmmo/core/action.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/nmmo/core/action.py b/nmmo/core/action.py index 9aa785a38..24e866998 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -109,7 +109,6 @@ def edges(cls, config): def args(stim, entity, config): raise NotImplementedError - class Move(Node): priority = 60 nodeType = NodeType.SELECTION @@ -204,7 +203,6 @@ class West(Node): class Stay(Node): delta = (0, 0) - class Attack(Node): priority = 50 nodeType = NodeType.SELECTION @@ -236,14 +234,6 @@ def in_range(entity, stim, config, N): rets = list(rets) return rets - # CHECK ME: do we need l1 distance function? - # systems/ai/utils.py also has various distance functions - # which we may want to clean up - # def l1(pos, cent): - # r, c = pos - # r_cent, c_cent = cent - # return abs(r - r_cent) + abs(c - c_cent) - def call(realm, entity, style, target): if style is None or target is None: return None @@ -300,7 +290,6 @@ def args(stim, entity, config): def deserialize(realm, entity, index): return deserialize_fixed_arg(Style, index) - class Target(Node): argType = None @@ -313,11 +302,10 @@ def deserialize(realm, entity, index: int): if index >= realm.config.PLAYER_N_OBS or index < 0: # checking for the "None" target return None - radius = realm.config.PLAYER_VISION_RADIUS entity_obs = Entity.Query.window( realm.datastore, entity.row.val, entity.col.val, - radius + realm.config.PLAYER_VISION_RADIUS ) if index >= entity_obs.shape[0]: @@ -360,7 +348,6 @@ def attack_range(config): def skill(entity): return entity.skills.mage - class InventoryItem(Node): argType = None @@ -368,7 +355,6 @@ class InventoryItem(Node): def N(cls, config): return config.INVENTORY_N_OBS + 1 # +1 for the "None" item - # TODO(kywch): What does args do? def args(stim, entity, config): return stim.exchange.items() @@ -507,7 +493,6 @@ def call(realm, entity, item, target): realm.event_log.record(EventCode.GIVE_ITEM, entity) - class GiveGold(Node): priority = 30 @@ -555,7 +540,6 @@ def call(realm, entity, amount, target): realm.event_log.record(EventCode.GIVE_GOLD, entity) - class MarketItem(Node): argType = None @@ -563,7 +547,6 @@ class MarketItem(Node): def N(cls, config): return config.MARKET_N_OBS + 1 # +1 for the "None" item - # TODO(kywch): What does args do? def args(stim, entity, config): return stim.exchange.items() @@ -691,7 +674,6 @@ def args(stim, entity, config): def deserialize(realm, entity, index): return deserialize_fixed_arg(Price, index) - class Token(Node): argType = Fixed @@ -709,7 +691,6 @@ def args(stim, entity, config): def deserialize(realm, entity, index): return deserialize_fixed_arg(Token, index) - class Comm(Node): argType = Fixed priority = 99 From b6406f1bbdb95c6bec8aa040b4cdb2c0c3d36ad6 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 22 Jul 2023 00:58:05 +0900 Subject: [PATCH 068/113] removed all args() --- nmmo/core/action.py | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/nmmo/core/action.py b/nmmo/core/action.py index 24e866998..e74ee81e1 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -50,9 +50,6 @@ def N(cls, config): def deserialize(realm, entity, index): return index - def args(stim, entity, config): - return [] - class Fixed: pass @@ -76,7 +73,7 @@ def hook(config): arguments = [] for action in Action.edges(config): action.init(config) - for args in action.edges: + for args in action.edges: # pylint: disable=not-an-iterable args.init(config) if not 'edges' in args.__dict__: continue @@ -106,9 +103,6 @@ def edges(cls, config): edges.append(Comm) return edges - def args(stim, entity, config): - raise NotImplementedError - class Move(Node): priority = 60 nodeType = NodeType.SELECTION @@ -169,9 +163,6 @@ class Direction(Node): def edges(): return [North, South, East, West, Stay] - def args(stim, entity, config): - return Direction.edges - def deserialize(realm, entity, index): return deserialize_fixed_arg(Direction, index) @@ -284,9 +275,6 @@ class Style(Node): def edges(): return [Melee, Range, Mage] - def args(stim, entity, config): - return Style.edges - def deserialize(realm, entity, index): return deserialize_fixed_arg(Style, index) @@ -314,10 +302,6 @@ def deserialize(realm, entity, index: int): entity_id = entity_obs[index, Entity.State.attr_name_to_col["id"]] return realm.entity_or_none(entity_id) - def args(stim, entity, config): - #Should pass max range? - return Attack.in_range(entity, stim, config, None) - class Melee(Node): nodeType = NodeType.ACTION freeze=False @@ -355,9 +339,6 @@ class InventoryItem(Node): def N(cls, config): return config.INVENTORY_N_OBS + 1 # +1 for the "None" item - def args(stim, entity, config): - return stim.exchange.items() - def deserialize(realm, entity, index: int): # NOTE: index is from the inventory, NOT item id if index >= realm.config.INVENTORY_N_OBS or index < 0: # checking for the "None" item @@ -547,9 +528,6 @@ class MarketItem(Node): def N(cls, config): return config.MARKET_N_OBS + 1 # +1 for the "None" item - def args(stim, entity, config): - return stim.exchange.items() - def deserialize(realm, entity, index: int): # NOTE: index is from the market, NOT item id if index >= realm.config.MARKET_N_OBS or index < 0: # checking for the "None" item @@ -668,9 +646,6 @@ def init(cls, config): def edges(): return Price.classes - def args(stim, entity, config): - return Price.edges - def deserialize(realm, entity, index): return deserialize_fixed_arg(Price, index) @@ -685,9 +660,6 @@ def init(cls, config): def edges(): return Token.classes - def args(stim, entity, config): - return Token.edges - def deserialize(realm, entity, index): return deserialize_fixed_arg(Token, index) From d98263f37d2c67ca12d73a90534545c9594797d2 Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Fri, 21 Jul 2023 12:26:00 -0700 Subject: [PATCH 069/113] cp --- nmmo/core/action.py | 45 ++++++++++++++++++++------------------------- nmmo/core/env.py | 2 +- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/nmmo/core/action.py b/nmmo/core/action.py index 1bf4e4b1e..728af8217 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -3,6 +3,7 @@ from enum import Enum, auto import numpy as np +from nmmo.core.observation import Observation from nmmo.lib import utils from nmmo.lib.utils import staticproperty @@ -45,7 +46,7 @@ def leaf(): def N(cls, config): return len(cls.edges) - def deserialize(realm, entity, index): + def deserialize(realm, entity, index, obs: Observation): return index def args(stim, entity, config): @@ -126,6 +127,9 @@ def call(realm, entity, direction): if not realm.map.is_valid_pos(r_new, c_new) or \ realm.map.tiles[r_new, c_new].impassible: + if ent_id == 1: + print(f'Invalid move: {direction}') + return if entity.status.freeze > 0: @@ -171,7 +175,7 @@ def edges(): def args(stim, entity, config): return Direction.edges - def deserialize(realm, entity, index): + def deserialize(realm, entity, index, obs: Observation): return deserialize_fixed_arg(Direction, index) # a quick helper function @@ -179,7 +183,7 @@ def deserialize_fixed_arg(arg, index): if isinstance(index, (int, np.int64)): if index < 0: return None # so that the action will be discarded - val = min(index-1, len(arg.edges)-1) + val = min(index, len(arg.edges)-1) return arg.edges[val] # if index is not int, it's probably already deserialized @@ -295,7 +299,7 @@ def edges(): def args(stim, entity, config): return Style.edges - def deserialize(realm, entity, index): + def deserialize(realm, entity, index, obs: Observation): return deserialize_fixed_arg(Style, index) @@ -306,10 +310,10 @@ class Target(Node): def N(cls, config): return config.PLAYER_N_OBS - def deserialize(realm, entity, index: int): - # NOTE: index is the entity id - # CHECK ME: should index be renamed to ent_id? - return realm.entity_or_none(index) + def deserialize(realm, entity, index: int, obs: Observation): + if index >= len(obs.entities.ids): + return None + return realm.entity_or_none(obs.entities.ids[index]) def args(stim, entity, config): #Should pass max range? @@ -357,15 +361,10 @@ def N(cls, config): def args(stim, entity, config): return stim.exchange.items() - def deserialize(realm, entity, index: int): - # NOTE: index is from the inventory, NOT item id - inventory = Item.Query.owned_by(realm.datastore, entity.id.val) - - if index >= inventory.shape[0]: + def deserialize(realm, entity, index: int, obs: Observation): + if index >= len(obs.inventory.ids): return None - - item_id = inventory[index, Item.State.attr_name_to_col["id"]] - return realm.items[item_id] + return realm.items.get(obs.inventory.ids[index]) class Use(Node): priority = 10 @@ -549,15 +548,11 @@ def N(cls, config): def args(stim, entity, config): return stim.exchange.items() - def deserialize(realm, entity, index: int): - # NOTE: index is from the market, NOT item id - market = Item.Query.for_sale(realm.datastore) - - if index >= market.shape[0]: + def deserialize(realm, entity, index: int, obs: Observation): + if index >= len(obs.market.ids): return None - item_id = market[index, Item.State.attr_name_to_col["id"]] - return realm.items[item_id] + return realm.items.get(obs.market.ids[index]) class Buy(Node): priority = 20 @@ -667,7 +662,7 @@ def edges(): def args(stim, entity, config): return Price.edges - def deserialize(realm, entity, index): + def deserialize(realm, entity, index, obs: Observation): return deserialize_fixed_arg(Price, index) @@ -685,7 +680,7 @@ def edges(): def args(stim, entity, config): return Token.edges - def deserialize(realm, entity, index): + def deserialize(realm, entity, index, obs: Observation): return deserialize_fixed_arg(Token, index) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 5efde650d..74e718af2 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -398,7 +398,7 @@ def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): break for arg, val in sorted(args.items()): - obj = arg.deserialize(self.realm, entity, val) + obj = arg.deserialize(self.realm, entity, val, self.obs[ent_id]) if obj is None: action_valid = False break From ea3ae00a7441ee4da189b2e8db0b8e46c3edef97 Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Fri, 21 Jul 2023 13:39:54 -0700 Subject: [PATCH 070/113] cp --- nmmo/core/action.py | 3 --- nmmo/core/config.py | 5 ++++- nmmo/core/observation.py | 38 ++++++++++++++++++++++++++------------ 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/nmmo/core/action.py b/nmmo/core/action.py index e71d2973f..3740ea3cc 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -122,9 +122,6 @@ def call(realm, entity, direction): if not realm.map.is_valid_pos(r_new, c_new) or \ realm.map.tiles[r_new, c_new].impassible: - if ent_id == 1: - print(f'Invalid move: {direction}') - return if entity.status.freeze > 0: diff --git a/nmmo/core/config.py b/nmmo/core/config.py index 964a62757..4efa9133d 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -145,7 +145,10 @@ def game_system_enabled(self, name) -> bool: return hasattr(self, name) PROVIDE_ACTION_TARGETS = True - '''Flag used to provide action targets mask''' + '''Provide action targets mask''' + + PROVIDE_NOOP_ACTION_TARGET = False + '''Provide a no-op option for each action''' PLAYERS = [Agent] '''Player classes from which to spawn''' diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index f2b06dbc6..7bf931a4b 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -77,6 +77,8 @@ def __init__(self, else: assert market.size == 0 + self._noop_action = 1 if config.PROVIDE_NOOP_ACTION_TARGET else 0 + # pylint: disable=method-cache-max-size-none @lru_cache(maxsize=None) def tile(self, r_delta, c_delta): @@ -220,8 +222,11 @@ def _make_attack_mask(self): assert self.config.COMBAT_MELEE_REACH == self.config.COMBAT_RANGE_REACH assert self.config.COMBAT_MELEE_REACH == self.config.COMBAT_MAGE_REACH assert self.config.COMBAT_RANGE_REACH == self.config.COMBAT_MAGE_REACH - attack_mask = np.zeros(self.config.PLAYER_N_OBS, dtype=np.int8) # +1 for No action xcxc - # attack_mask[-1] = 1 # No action (xcxc) + + attack_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + attack_mask[-1] = 1 + if self.dummy_obs: return attack_mask @@ -247,8 +252,10 @@ def _make_attack_mask(self): def _make_use_mask(self): # empty inventory -- nothing to use - use_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) # +1 for No action xcxc - # use_mask[-1] = 1 # No action (xcxc) + use_mask = np.zeros(self.config.INVENTORY_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + use_mask[-1] = 1 + if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ or self.dummy_obs or self.agent_in_combat: return use_mask @@ -296,8 +303,10 @@ def _item_skill(self): } def _make_destroy_item_mask(self): - destroy_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) # +1 for No action xcxc - # destroy_mask[-1] = 1 # No action (xcxc) + destroy_mask = np.zeros(self.config.INVENTORY_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + destroy_mask[-1] = 1 + # empty inventory -- nothing to destroy if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ or self.dummy_obs or self.agent_in_combat: @@ -310,8 +319,9 @@ def _make_destroy_item_mask(self): return destroy_mask def _make_give_target_mask(self): - give_mask = np.zeros(self.config.PLAYER_N_OBS, dtype=np.int8) # +1 for No action xcxc - # give_mask[-1] = 1 # No action (xcxc) + give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + give_mask[-1] = 1 # empty inventory -- nothing to give if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ or self.dummy_obs or self.agent_in_combat: @@ -339,8 +349,10 @@ def _make_give_gold_mask(self): return mask def _make_sell_mask(self): - sell_mask = np.zeros(self.config.INVENTORY_N_OBS, dtype=np.int8) # +1 for No action xcxc - # sell_mask[-1] = 1 # No action (xcxc) + sell_mask = np.zeros(self.config.INVENTORY_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + sell_mask[-1] = 1 + # empty inventory -- nothing to sell if not (self.config.EXCHANGE_SYSTEM_ENABLED and self.inventory.len > 0) \ or self.dummy_obs or self.agent_in_combat: @@ -353,8 +365,10 @@ def _make_sell_mask(self): return sell_mask def _make_buy_mask(self): - buy_mask = np.zeros(self.config.MARKET_N_OBS, dtype=np.int8) # +1 for No action xcxc - # buy_mask[-1] = 1 # No action (xcxc) + buy_mask = np.zeros(self.config.MARKET_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + buy_mask[-1] = 1 + if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat: return buy_mask From bdf78611a2e2dbb589da7849648b382d543df8f1 Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Fri, 21 Jul 2023 13:43:51 -0700 Subject: [PATCH 071/113] cp --- nmmo/core/action.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nmmo/core/action.py b/nmmo/core/action.py index 3740ea3cc..14eb21bb6 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -7,8 +7,7 @@ from nmmo.lib import utils from nmmo.lib.utils import staticproperty -from nmmo.systems.item import Item, Stack -from nmmo.entity.entity import Entity +from nmmo.systems.item import Stack from nmmo.lib.log import EventCode From 3a163ebe4c17d708265c539cc2fa9f62556f60df Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 22 Jul 2023 07:15:37 +0900 Subject: [PATCH 072/113] added a test for noop action on/off, fixed tests --- nmmo/core/action.py | 27 ++++++++++------- tests/action/test_ammo_use.py | 6 ++-- tests/action/test_destroy_give_gold.py | 4 +-- tests/core/test_gym_obs_spaces.py | 37 +++++++++++++++--------- tests/task/test_sample_task_from_file.py | 2 +- tests/testhelpers.py | 2 +- 6 files changed, 47 insertions(+), 31 deletions(-) diff --git a/nmmo/core/action.py b/nmmo/core/action.py index 14eb21bb6..9c68db1cc 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -24,7 +24,8 @@ class NodeType(Enum): class Node(metaclass=utils.IterableNameComparable): @classmethod def init(cls, config): - pass + # noop_action is used in some of the N() methods + cls.noop_action = 1 if config.PROVIDE_NOOP_ACTION_TARGET else 0 @staticproperty def edges(): @@ -283,7 +284,7 @@ class Target(Node): @classmethod def N(cls, config): - return config.PLAYER_N_OBS # +1 for the "None" target + return config.PLAYER_N_OBS + cls.noop_action def deserialize(realm, entity, index: int, obs: Observation): if index >= len(obs.entities.ids): @@ -325,7 +326,7 @@ class InventoryItem(Node): @classmethod def N(cls, config): - return config.INVENTORY_N_OBS # +1 for the "None" item + return config.INVENTORY_N_OBS + cls.noop_action def deserialize(realm, entity, index: int, obs: Observation): if index >= len(obs.inventory.ids): @@ -491,11 +492,9 @@ def call(realm, entity, amount, target): if not isinstance(amount, int): amount = amount.val - if not (amount > 0 and entity.gold.val > 0): # no gold to give + if amount > entity.gold.val: # no gold to give return - amount = min(amount, entity.gold.val) - entity.gold.decrement(amount) target.gold.increment(amount) @@ -506,7 +505,7 @@ class MarketItem(Node): @classmethod def N(cls, config): - return config.MARKET_N_OBS # +1 for the "None" item + return config.MARKET_N_OBS + cls.noop_action def deserialize(realm, entity, index: int, obs: Observation): if index >= len(obs.market.ids): @@ -613,15 +612,21 @@ class Price(Node): @classmethod def init(cls, config): # gold should be > 0 - Price.classes = init_discrete(range(1, config.PRICE_N_OBS+1)) + cls.price_range = range(1, config.PRICE_N_OBS+1) + Price.classes = init_discrete(cls.price_range) + + @classmethod + def index(cls, price): + try: + return cls.price_range.index(price) + except ValueError: + # use the max price, which is config.PRICE_N_OBS + return len(cls.price_range) - 1 @staticproperty def edges(): return Price.classes - def args(stim, entity, config): - return Price.edges - def deserialize(realm, entity, index, obs: Observation): return deserialize_fixed_arg(Price, index) diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py index 372b98287..58b45c9bc 100644 --- a/tests/action/test_ammo_use.py +++ b/tests/action/test_ammo_use.py @@ -29,8 +29,8 @@ def _assert_action_targets_zero(self, gym_obs): + np.sum(gym_obs['ActionTargets'][action.Buy][action.MarketItem]) for atn in [action.Use, action.Give, action.Destroy, action.Sell]: mask += np.sum(gym_obs['ActionTargets'][atn][action.InventoryItem]) - # MarketItem and InventoryTarget have no-action flags, which sum up to 5 - self.assertEqual(mask, 5) + # If MarketItem and InventoryTarget have no-action flags, these sum up to 5 + self.assertEqual(mask, 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) def test_ammo_fire_all(self): env = self._setup_env(random_seed=RANDOM_SEED) @@ -141,7 +141,7 @@ def test_cannot_use_listed_items(self): # First tick actions: SELL level-0 ammo env.step({ ent_id: { action.Sell: { action.InventoryItem: env.obs[ent_id].inventory.sig(ent_ammo, 0), - action.Price: sell_price } } + action.Price: action.Price.index(sell_price) } } for ent_id, ent_ammo in self.ammo.items() }) # check if the ammos were listed diff --git a/tests/action/test_destroy_give_gold.py b/tests/action/test_destroy_give_gold.py index cea62bda5..fa9f10b1a 100644 --- a/tests/action/test_destroy_give_gold.py +++ b/tests/action/test_destroy_give_gold.py @@ -153,7 +153,7 @@ def test_give_equipped_listed(self): self._check_inv_mask(env.obs[ent_id], action.Sell, item_sig)) actions[ent_id] = { action.Sell: { action.InventoryItem: env.obs[ent_id].inventory.sig(*item_sig), - action.Price: price } } + action.Price: action.Price.index(price) } } env.step(actions) @@ -257,7 +257,7 @@ def test_give_gold(self): test_cond[1] = { 'tgt_id': 3, 'gold': 1, 'ent_mask': True, 'ent_gold': self.init_gold-1, 'tgt_gold': self.init_gold+1 } # agent 2: give gold to agent 4 (valid: same tile) - test_cond[2] = { 'tgt_id': 4, 'gold': 100, 'ent_mask': True, + test_cond[2] = { 'tgt_id': 4, 'gold': self.init_gold, 'ent_mask': True, 'ent_gold': 0, 'tgt_gold': 2*self.init_gold } # agent 3: give gold to npc -1 (invalid: cannot give to npc) # ent_gold is self.init_gold+1 because (3) got 1 gold from (1) diff --git a/tests/core/test_gym_obs_spaces.py b/tests/core/test_gym_obs_spaces.py index 79610af09..919bf513f 100644 --- a/tests/core/test_gym_obs_spaces.py +++ b/tests/core/test_gym_obs_spaces.py @@ -3,17 +3,9 @@ import nmmo class TestGymObsSpaces(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.config = nmmo.config.Default() - cls.env = nmmo.Env(cls.config) - cls.env.reset(seed=1) - for _ in range(3): - cls.env.step({}) - - def test_gym_obs_space(self): - obs_spec = self.env.observation_space(1) - obs, _, _, _ = self.env.step({}) + def _test_gym_obs_space(self, env): + obs_spec = env.observation_space(1) + obs, _, _, _ = env.step({}) for agent_obs in obs.values(): for key, val in agent_obs.items(): @@ -23,12 +15,31 @@ def test_gym_obs_space(self): if 'ActionTargets' in agent_obs: val = agent_obs['ActionTargets'] - for atn in nmmo.Action.edges(self.config): - if atn.enabled(self.config): + for atn in nmmo.Action.edges(env.config): + if atn.enabled(env.config): for arg in atn.edges: # pylint: disable=not-an-iterable self.assertTrue(obs_spec['ActionTargets'][atn][arg].contains(val[atn][arg]), f"Invalid obs format -- key: {atn}/{arg}, val: {val[atn][arg]}") + def test_env_without_noop(self): + config = nmmo.config.Default() + config.PROVIDE_NOOP_ACTION_TARGET = False + env = nmmo.Env(config) + env.reset(seed=1) + for _ in range(3): + env.step({}) + + self._test_gym_obs_space(env) + + def test_env_with_noop(self): + config = nmmo.config.Default() + config.PROVIDE_NOOP_ACTION_TARGET = True + env = nmmo.Env(config) + env.reset(seed=1) + for _ in range(3): + env.step({}) + + self._test_gym_obs_space(env) if __name__ == '__main__': unittest.main() diff --git a/tests/task/test_sample_task_from_file.py b/tests/task/test_sample_task_from_file.py index 20e497887..3b5053d6b 100644 --- a/tests/task/test_sample_task_from_file.py +++ b/tests/task/test_sample_task_from_file.py @@ -12,7 +12,7 @@ def test_sample_task_from_file(self): # env.reset() samples and instantiates a task for each agent # when sample_traning_tasks is set True - env.reset(sample_training_tasks=True) + env.reset() self.assertEqual(len(env.possible_agents), len(env.tasks)) # for the training tasks, the task assignee and subject should be the same diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 6461832cf..e4bd7e0d0 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -370,7 +370,7 @@ def _check_assert_make_action(self, env, atn, test_cond): elif atn == action.GiveGold: actions[ent_id] = { action.GiveGold: { action.Target: env.obs[ent_id].entities.index(cond['tgt_id']), - action.Price: cond['gold'] } } + action.Price: action.Price.index(cond['gold']) } } elif atn == action.Buy: mkt_idx = ent_obs.market.index(cond['item_id']) From 273e81f3ea5b2a6804eb3b540c5767ae204a42af Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 22 Jul 2023 09:19:45 +0900 Subject: [PATCH 073/113] support non-continous team ids --- nmmo/lib/team_helper.py | 14 ++++++++++---- nmmo/task/base_predicates.py | 2 ++ tests/task/test_demo_task_creation.py | 22 ++-------------------- tests/task/test_task_api.py | 11 ++++++----- 4 files changed, 20 insertions(+), 29 deletions(-) diff --git a/nmmo/lib/team_helper.py b/nmmo/lib/team_helper.py index e624dca5a..6adc0aba2 100644 --- a/nmmo/lib/team_helper.py +++ b/nmmo/lib/team_helper.py @@ -24,14 +24,20 @@ def is_agent_in_team(self, agent_id:int , team_id: int) -> bool: return agent_id in self.teams[team_id] def get_target_agent(self, team_id: int, target: str): + team_ids = list(self.teams.keys()) + idx = team_ids.index(team_id) if target == 'left_team': - return self.teams[(team_id+1) % self.num_teams] + target_id = team_ids[(idx+1) % self.num_teams] + return self.teams[target_id] if target == 'left_team_leader': - return self.teams[(team_id+1) % self.num_teams][0] + target_id = team_ids[(idx+1) % self.num_teams] + return self.teams[target_id][0] if target == 'right_team': - return self.teams[(team_id-1) % self.num_teams] + target_id = team_ids[(idx-1) % self.num_teams] + return self.teams[target_id] if target == 'right_team_leader': - return self.teams[(team_id-1) % self.num_teams][0] + target_id = team_ids[(idx-1) % self.num_teams] + return self.teams[target_id][0] if target == 'my_team_leader': return self.teams[team_id][0] return None diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index 3fba63874..b619b336b 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -64,6 +64,8 @@ def CanSeeAgent(gs: GameState, subject: Group, target: int): def CanSeeGroup(gs: GameState, subject: Group, target: Iterable[int]): """ Returns True if subject can see any of target """ + if target is None: + return False return any(CanSeeAgent(gs, subject, agent) for agent in target) def DistanceTraveled(gs: GameState, subject: Group, dist: int): diff --git a/tests/task/test_demo_task_creation.py b/tests/task/test_demo_task_creation.py index d1271da7e..f022def37 100644 --- a/tests/task/test_demo_task_creation.py +++ b/tests/task/test_demo_task_creation.py @@ -213,25 +213,7 @@ def PredicateMath(gs, subject): # DONE def test_task_spec_based_curriculum(self): - """ - task_spec is a list of tuple (reward_to, evaluation function, eval_fn_kwargs, task_kwargs) - each tuple in the task_spec will create tasks for a team in teams - - reward_to: must be in ['team', 'agent'] - * 'team' create a single team task, in which all team members get rewarded - * 'agent' create a task for each agent, in which only the agent gets rewarded - - evaluation functions from the base_predicates.py or could be custom functions like above - - eval_fn_kwargs are the additional args that go into predicate. There are also special keys - * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] - these str will be translated into the actual agent ids - - task_kwargs are the optional, additional args that go into the task. - * 'task_cls' specifies the task class to be used. - If not provided, the standard Task is used. - """ - task_spec = [ # (reward_to, predicate function, kwargs) + task_spec = [ ts.TaskSpec(eval_fn=bp.CountEvent, eval_fn_kwargs={'event': 'PLAYER_KILL', 'N': 1}, reward_to='team'), ts.TaskSpec(eval_fn=bp.CountEvent, eval_fn_kwargs={'event': 'PLAYER_KILL', 'N': 2}, @@ -243,7 +225,7 @@ def test_task_spec_based_curriculum(self): ] # NOTE: len(teams) and len(task_spec) don't need to match - teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} + teams = {1:[1,2,3], 3:[4,5], 6:[6,7], 9:[8,9], 14:[10,11]} config = ScriptedAgentTestConfig() env = Env(config) diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 1b2bb720e..8f4f7b326 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -233,7 +233,8 @@ def PracticeFormation(gs, subject, dist, num_tick): reward_to='team') # create the test task from the task spec - teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} + teams = {1:[1,2,3], 3:[4,5], 6:[6,7], 9:[8,9], 14:[10,11]} + team_ids= list(teams.keys()) config = ScriptedAgentTestConfig() config.PLAYERS =[Sleeper] @@ -252,9 +253,9 @@ def PracticeFormation(gs, subject, dist, num_tick): 'return AllMembersWithinRange(gs, subject, dist) * '+ 'TickGE(gs, subject, num_tick)') self.assertEqual(task.get_signature(), ['gs', 'subject', 'dist', 'num_tick']) - self.assertEqual(task.subject, tuple(teams[0])) + self.assertEqual(task.subject, tuple(teams[team_ids[0]])) self.assertEqual(task.kwargs, task_spec.eval_fn_kwargs) - self.assertEqual(task.assignee, tuple(teams[0])) + self.assertEqual(task.assignee, tuple(teams[team_ids[0]])) # check the agent-task map for agent_id, agent_tasks in env.agent_task_map.items(): @@ -296,9 +297,9 @@ def PracticeFormation(gs, subject, dist, num_tick): 'return AllMembersWithinRange(gs, subject, dist) * '+ 'TickGE(gs, subject, num_tick)') self.assertEqual(task.get_signature(), ['gs', 'subject', 'dist', 'num_tick']) - self.assertEqual(task.subject, tuple(teams[0])) + self.assertEqual(task.subject, tuple(teams[team_ids[0]])) self.assertEqual(task.kwargs, task_spec.eval_fn_kwargs) - self.assertEqual(task.assignee, tuple(teams[0])) + self.assertEqual(task.assignee, tuple(teams[team_ids[0]])) self.assertTrue(np.array_equal(task.embedding, task_embedding)) obs_spec = env.observation_space(1) From 9fcc5627a528c82986bdd4eba1de360f4fe201b2 Mon Sep 17 00:00:00 2001 From: jsuarez5341 Date: Sat, 22 Jul 2023 04:35:24 +0000 Subject: [PATCH 074/113] Remove websocket and websocket deps; breaks some python installs --- nmmo/render/websocket.py | 163 --------------------------------------- setup.py | 2 - 2 files changed, 165 deletions(-) delete mode 100644 nmmo/render/websocket.py diff --git a/nmmo/render/websocket.py b/nmmo/render/websocket.py deleted file mode 100644 index 3647f51e1..000000000 --- a/nmmo/render/websocket.py +++ /dev/null @@ -1,163 +0,0 @@ -# pylint: disable=all - -import numpy as np - -from signal import signal, SIGINT -import json -import os -import sys -import time -import threading - -from twisted.internet import reactor -from twisted.python import log -from twisted.web.server import Site -from twisted.web.static import File - -from autobahn.twisted.websocket import WebSocketServerFactory, \ - WebSocketServerProtocol -from autobahn.twisted.resource import WebSocketResource - -from .render_utils import np_encoder - -class GodswordServerProtocol(WebSocketServerProtocol): - def __init__(self): - super().__init__() - print("Created a server") - self.frame = 0 - - #"connected" is already used by WSSP - self.sent_environment = False - self.isConnected = False - - self.pos = [0, 0] - self.cmd = None - - def onOpen(self): - print("Opened connection to server") - - def onClose(self, wasClean, code=None, reason=None): - self.isConnected = False - print('Connection closed') - - def connectionMade(self): - super().connectionMade() - self.factory.clientConnectionMade(self) - - def connectionLost(self, reason): - super().connectionLost(reason) - self.factory.clientConnectionLost(self) - self.sent_environment = False - - #Not used without player interaction - def onMessage(self, packet, isBinary): - print("Server packet", packet) - packet = packet.decode() - _, packet = packet.split(';') #Strip headeer - r, c, cmd = packet.split(' ') #Split camera coords - if len(cmd) == 0 or cmd == '\t': - cmd = None - - self.pos = [int(r), int(c)] - self.cmd = cmd - - self.isConnected = True - - def onConnect(self, request): - print("WebSocket connection request: {}".format(request)) - realm = self.factory.realm - self.realm = realm - self.frame += 1 - - def serverPacket(self): - data = self.realm.packet - return data - - def sendUpdate(self, data): - packet = {} - packet['resource'] = data['resource'] - packet['player'] = data['player'] - packet['npc'] = data['npc'] - packet['pos'] = data['pos'] - packet['wilderness'] = data['wilderness'] - packet['market'] = data['market'] - - print('Is Connected? : {}'.format(self.isConnected)) - if not self.sent_environment: - packet['map'] = data['environment'] - packet['border'] = data['border'] - packet['size'] = data['size'] - self.sent_environment=True - - if 'overlay' in data: - packet['overlay'] = data['overlay'] - print('SENDING OVERLAY: ', len(packet['overlay'])) - - packet = json.dumps(packet, default=np_encoder).encode('utf8') - self.sendMessage(packet, False) - - -class WSServerFactory(WebSocketServerFactory): - def __init__(self, ip, realm): - super().__init__(ip) - self.realm = realm - self.time = time.time() - self.clients = [] - - self.pos = [0, 0] - self.cmd = None - self.tickRate = 0.6 - self.tick = 0 - - def update(self, packet): - self.tick += 1 - uptime = np.round(self.tickRate*self.tick, 1) - delta = time.time() - self.time - print('Wall Clock: ', str(delta)[:5], 'Uptime: ', uptime, ', Tick: ', self.tick) - delta = self.tickRate - delta - if delta > 0: - time.sleep(delta) - self.time = time.time() - - for client in self.clients: - client.sendUpdate(packet) - if client.pos is not None: - self.pos = client.pos - self.cmd = client.cmd - - return self.pos, self.cmd - - def clientConnectionMade(self, client): - self.clients.append(client) - - def clientConnectionLost(self, client): - self.clients.remove(client) - -class Application: - def __init__(self, realm): - signal(SIGINT, self.kill) - log.startLogging(sys.stdout) - - port = 8080 - self.factory = WSServerFactory(u'ws://localhost:{}'.format(port), realm) - self.factory.protocol = GodswordServerProtocol - resource = WebSocketResource(self.factory) - - root = File(".") - root.putChild(b"ws", resource) - site = Site(root) - - reactor.listenTCP(port, site) - - def run(): - reactor.run(installSignalHandlers=0) - - threading.Thread(target=run).start() - - def update(self, packet): - return self.factory.update(packet) - - def kill(*args): - print("Killed by user") - reactor.stop() - os._exit(0) diff --git a/setup.py b/setup.py index 7a49a6569..ba4362fad 100644 --- a/setup.py +++ b/setup.py @@ -36,8 +36,6 @@ 'pytest==7.3.0', 'pytest-benchmark==3.4.1', 'fire==0.4.0', - 'autobahn==19.3.3', - 'Twisted==19.2.0', 'vec-noise==1.1.4', 'imageio==2.23.0', 'ordered-set==4.1.0', From 703a12a1c4a823578cfd23483422defd85c81dd6 Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 24 Jul 2023 09:32:35 +0900 Subject: [PATCH 075/113] add back websocket --- nmmo/render/websocket.py | 163 +++++++++++++++++++++++++++++++ setup.py | 3 +- tests/render/test_load_replay.py | 2 +- tests/render/test_render_save.py | 23 ++--- 4 files changed, 172 insertions(+), 19 deletions(-) create mode 100644 nmmo/render/websocket.py diff --git a/nmmo/render/websocket.py b/nmmo/render/websocket.py new file mode 100644 index 000000000..b3b70123c --- /dev/null +++ b/nmmo/render/websocket.py @@ -0,0 +1,163 @@ +# pylint: disable=all + +import numpy as np + +from signal import signal, SIGINT +import json +import os +import sys +import time +import threading + +from twisted.internet import reactor +from twisted.python import log +from twisted.web.server import Site +from twisted.web.static import File + +from autobahn.twisted.websocket import WebSocketServerFactory, \ + WebSocketServerProtocol +from autobahn.twisted.resource import WebSocketResource + +from .render_utils import np_encoder + +class GodswordServerProtocol(WebSocketServerProtocol): + def __init__(self): + super().__init__() + print("Created a server") + self.frame = 0 + + #"connected" is already used by WSSP + self.sent_environment = False + self.isConnected = False + + self.pos = [0, 0] + self.cmd = None + + def onOpen(self): + print("Opened connection to server") + + def onClose(self, wasClean, code=None, reason=None): + self.isConnected = False + print('Connection closed') + + def connectionMade(self): + super().connectionMade() + self.factory.clientConnectionMade(self) + + def connectionLost(self, reason): + super().connectionLost(reason) + self.factory.clientConnectionLost(self) + self.sent_environment = False + + #Not used without player interaction + def onMessage(self, packet, isBinary): + print("Server packet", packet) + packet = packet.decode() + _, packet = packet.split(';') #Strip headeer + r, c, cmd = packet.split(' ') #Split camera coords + if len(cmd) == 0 or cmd == '\t': + cmd = None + + self.pos = [int(r), int(c)] + self.cmd = cmd + + self.isConnected = True + + def onConnect(self, request): + print("WebSocket connection request: {}".format(request)) + realm = self.factory.realm + self.realm = realm + self.frame += 1 + + def serverPacket(self): + data = self.realm.packet + return data + + def sendUpdate(self, data): + packet = {} + packet['resource'] = data['resource'] + packet['player'] = data['player'] + packet['npc'] = data['npc'] + packet['pos'] = data['pos'] + packet['wilderness'] = data['wilderness'] + packet['market'] = data['market'] + + print('Is Connected? : {}'.format(self.isConnected)) + if not self.sent_environment: + packet['map'] = data['environment'] + packet['border'] = data['border'] + packet['size'] = data['size'] + self.sent_environment=True + + if 'overlay' in data: + packet['overlay'] = data['overlay'] + print('SENDING OVERLAY: ', len(packet['overlay'])) + + packet = json.dumps(packet, default=np_encoder).encode('utf8') + self.sendMessage(packet, False) + + +class WSServerFactory(WebSocketServerFactory): + def __init__(self, ip, realm): + super().__init__(ip) + self.realm = realm + self.time = time.time() + self.clients = [] + + self.pos = [0, 0] + self.cmd = None + self.tickRate = 0.6 + self.tick = 0 + + def update(self, packet): + self.tick += 1 + uptime = np.round(self.tickRate*self.tick, 1) + delta = time.time() - self.time + print('Wall Clock: ', str(delta)[:5], 'Uptime: ', uptime, ', Tick: ', self.tick) + delta = self.tickRate - delta + if delta > 0: + time.sleep(delta) + self.time = time.time() + + for client in self.clients: + client.sendUpdate(packet) + if client.pos is not None: + self.pos = client.pos + self.cmd = client.cmd + + return self.pos, self.cmd + + def clientConnectionMade(self, client): + self.clients.append(client) + + def clientConnectionLost(self, client): + self.clients.remove(client) + +class Application: + def __init__(self, realm): + signal(SIGINT, self.kill) + log.startLogging(sys.stdout) + + port = 8080 + self.factory = WSServerFactory(u'ws://localhost:{}'.format(port), realm) + self.factory.protocol = GodswordServerProtocol + resource = WebSocketResource(self.factory) + + root = File(".") + root.putChild(b"ws", resource) + site = Site(root) + + reactor.listenTCP(port, site) + + def run(): + reactor.run(installSignalHandlers=0) + + threading.Thread(target=run).start() + + def update(self, packet): + return self.factory.update(packet) + + def kill(*args): + print("Killed by user") + reactor.stop() + os._exit(0) \ No newline at end of file diff --git a/setup.py b/setup.py index ba4362fad..d9d534bc5 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,8 @@ 'scipy==1.10.0', 'pytest==7.3.0', 'pytest-benchmark==3.4.1', - 'fire==0.4.0', + 'autobahn==19.3.3', + 'Twisted==19.2.0', 'vec-noise==1.1.4', 'imageio==2.23.0', 'ordered-set==4.1.0', diff --git a/tests/render/test_load_replay.py b/tests/render/test_load_replay.py index 9f6a57fe4..17ecdd258 100644 --- a/tests/render/test_load_replay.py +++ b/tests/render/test_load_replay.py @@ -11,7 +11,7 @@ renderer = WebsocketRenderer() time.sleep(3) - # load a replay + # load a replay: replace 'replay_dev.json' with your replay file replay = FileReplayHelper.load('replay_dev.json', decompress=False) # run the replay diff --git a/tests/render/test_render_save.py b/tests/render/test_render_save.py index f1f3801ef..a35d8905f 100644 --- a/tests/render/test_render_save.py +++ b/tests/render/test_render_save.py @@ -3,24 +3,19 @@ from nmmo.core.config import (AllGameSystems, Combat, Communication, Equipment, Exchange, Item, Medium, Profession, Progression, Resource, Small, Terrain) -from nmmo.task.task_api import nmmo_default_task from nmmo.render.render_client import WebsocketRenderer from nmmo.render.replay_helper import FileReplayHelper from scripted import baselines def create_config(base, nent, *systems): - # pylint: disable=redefined-outer-name - systems = (base, *systems) - name = '_'.join(cls.__name__ for cls in systems) - - conf = type(name, systems, {})() - + systems = (base, *systems) + name = '_'.join(cls.__name__ for cls in systems) + conf = type(name, systems, {})() conf.TERRAIN_TRAIN_MAPS = 1 conf.TERRAIN_EVAL_MAPS = 1 conf.IMMORTAL = True conf.PLAYER_N = nent conf.PLAYERS = [baselines.Random] - return conf no_npc_small_1_pop_conf = create_config(Small, 1, Terrain, Resource, @@ -51,25 +46,19 @@ def create_config(base, nent, *systems): import random from tqdm import tqdm - from tests.testhelpers import ScriptedAgentTestConfig - TEST_HORIZON = 100 RANDOM_SEED = random.randint(0, 9999) - config = ScriptedAgentTestConfig() - config.NPC_SPAWN_ATTEMPTS = 8 - replay_helper = FileReplayHelper() - for name, config in conf_dict.items(): + for conf_name, config in conf_dict.items(): env = nmmo.Env(config) # to make replay, one should create replay_helper # and run the below line env.realm.record_replay(replay_helper) - tasks = nmmo_default_task(env.possible_agents, 'no_task') - env.reset(seed=RANDOM_SEED, new_tasks=tasks) + env.reset(seed=RANDOM_SEED) # the renderer is external to the env, so need to manually initiate it renderer = WebsocketRenderer(env.realm) @@ -79,4 +68,4 @@ def create_config(base, nent, *systems): renderer.render_realm() # NOTE: the web client has trouble loading the compressed replay file - replay_helper.save(f'replay_{name}_seed_{RANDOM_SEED:04d}.json', compress=False) + replay_helper.save(f'replay_{conf_name}_seed_{RANDOM_SEED:04d}.json', compress=False) From 3387919e1140ab4dd5650d50f6e71beab025de3b Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 24 Jul 2023 10:02:37 +0900 Subject: [PATCH 076/113] made json the default for saving replay --- nmmo/render/render_client.py | 4 ++++ nmmo/render/replay_helper.py | 10 +++++----- tests/render/test_render_save.py | 12 +++++++----- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/nmmo/render/render_client.py b/nmmo/render/render_client.py index e61d88083..d861c3fed 100644 --- a/nmmo/render/render_client.py +++ b/nmmo/render/render_client.py @@ -19,6 +19,10 @@ def __init__(self, realm=None) -> None: self.packet = None + def set_realm(self, realm) -> None: + self._realm = realm + self.registry = OverlayRegistry(realm, renderer=self) if realm else None + def render_packet(self, packet) -> None: packet = { 'pos': self.overlay_pos, diff --git a/nmmo/render/replay_helper.py b/nmmo/render/replay_helper.py index 119ee4ba8..1a1bab1e6 100644 --- a/nmmo/render/replay_helper.py +++ b/nmmo/render/replay_helper.py @@ -1,3 +1,4 @@ +import os import json import logging import lzma @@ -77,11 +78,10 @@ def _metadata(self) -> Dict: def update(self): self.packets.append(self._packet()) - def save(self, filename_prefix, compress=True): + def save(self, filename_prefix, compress=False): replay_file = f'{filename_prefix}.replay.json' metadata_file = f'{filename_prefix}.metadata.pkl' - data = json.dumps({ 'map': self.map, 'packets': self.packets @@ -99,13 +99,13 @@ def save(self, filename_prefix, compress=True): with open(metadata_file, 'wb') as out: pickle.dump(self._metadata(), out) - @classmethod - def load(cls, replay_file, decompress=True): + def load(cls, replay_file): + extension = os.path.splitext(replay_file)[1] with open(replay_file, 'rb') as fp: data = fp.read() - if decompress: + if extension != '.json': data = lzma.decompress(data, format=lzma.FORMAT_ALONE) data = json.loads(data.decode('utf-8')) diff --git a/tests/render/test_render_save.py b/tests/render/test_render_save.py index a35d8905f..d63599e0c 100644 --- a/tests/render/test_render_save.py +++ b/tests/render/test_render_save.py @@ -51,6 +51,9 @@ def create_config(base, nent, *systems): replay_helper = FileReplayHelper() + # the renderer is external to the env, so need to manually initiate it + renderer = WebsocketRenderer() + for conf_name, config in conf_dict.items(): env = nmmo.Env(config) @@ -59,13 +62,12 @@ def create_config(base, nent, *systems): env.realm.record_replay(replay_helper) env.reset(seed=RANDOM_SEED) - - # the renderer is external to the env, so need to manually initiate it - renderer = WebsocketRenderer(env.realm) + renderer.set_realm(env.realm) for tick in tqdm(range(TEST_HORIZON)): env.step({}) renderer.render_realm() - # NOTE: the web client has trouble loading the compressed replay file - replay_helper.save(f'replay_{conf_name}_seed_{RANDOM_SEED:04d}.json', compress=False) + # NOTE: save the data in uncompressed json format, since + # the web client has trouble loading the compressed replay file + replay_helper.save(f'replay_{conf_name}_seed_{RANDOM_SEED:04d}.json') From ae666aaaf86e2a5cf0769132668656fd0c234beb Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 24 Jul 2023 10:12:08 +0900 Subject: [PATCH 077/113] removed decompress flag --- tests/render/test_load_replay.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/render/test_load_replay.py b/tests/render/test_load_replay.py index 17ecdd258..87904cbf4 100644 --- a/tests/render/test_load_replay.py +++ b/tests/render/test_load_replay.py @@ -12,7 +12,7 @@ time.sleep(3) # load a replay: replace 'replay_dev.json' with your replay file - replay = FileReplayHelper.load('replay_dev.json', decompress=False) + replay = FileReplayHelper.load('replay_dev.json') # run the replay for packet in replay: From 82a3b1ef19f81cdb9d68ed8595b4ecabc0f59ac7 Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 27 Jul 2023 10:48:39 +0900 Subject: [PATCH 078/113] added task_spec to info, to know which tasks got completed, added HoldDurationTask --- .gitignore | 3 +- nmmo/lib/team_helper.py | 16 ++- nmmo/task/task_api.py | 61 ++++++++++-- nmmo/task/task_spec.py | 52 ++++++---- tests/task/test_task_api.py | 192 ++++++++++++++++++++++++------------ 5 files changed, 222 insertions(+), 102 deletions(-) diff --git a/.gitignore b/.gitignore index 1407169c1..011a34c5c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,9 +4,10 @@ maps/ runs/* wandb/* -# local replay file from tests/test_deterministic_replay.py, test_render_save.py +# local replay file from test_render_save.py tests/replay_local*.pickle replay* +eval* .vscode diff --git a/nmmo/lib/team_helper.py b/nmmo/lib/team_helper.py index 6adc0aba2..9256cd036 100644 --- a/nmmo/lib/team_helper.py +++ b/nmmo/lib/team_helper.py @@ -26,18 +26,24 @@ def is_agent_in_team(self, agent_id:int , team_id: int) -> bool: def get_target_agent(self, team_id: int, target: str): team_ids = list(self.teams.keys()) idx = team_ids.index(team_id) - if target == 'left_team': + if target == "left_team": target_id = team_ids[(idx+1) % self.num_teams] return self.teams[target_id] - if target == 'left_team_leader': + if target == "left_team_leader": target_id = team_ids[(idx+1) % self.num_teams] return self.teams[target_id][0] - if target == 'right_team': + if target == "right_team": target_id = team_ids[(idx-1) % self.num_teams] return self.teams[target_id] - if target == 'right_team_leader': + if target == "right_team_leader": target_id = team_ids[(idx-1) % self.num_teams] return self.teams[target_id][0] - if target == 'my_team_leader': + if target == "my_team_leader": return self.teams[team_id][0] + if target == "all_foes": + all_foes = [] + for foe_team_id in team_ids: + if foe_team_id != team_id: + all_foes += self.teams[foe_team_id] + return all_foes return None diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 540a8cd02..e0c9c89cb 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -5,6 +5,7 @@ import inspect from nmmo.task.group import Group +from nmmo.task.game_state import GameState from nmmo.task.predicate_api import Predicate, make_predicate, arg_to_string from nmmo.task import base_predicates as bp @@ -16,7 +17,8 @@ def __init__(self, eval_fn: Callable, assignee: Union[Iterable[int], int], reward_multiplier = 1.0, - embedding = None): + embedding = None, + spec_name: str = None): if isinstance(assignee, int): self._assignee = (assignee,) else: @@ -27,6 +29,7 @@ def __init__(self, self._completed = False self._reward_multiplier = reward_multiplier self._embedding = embedding + self.spec_name = spec_name # None if not created using TaskSpec self.name = self._make_name(self.__class__.__name__, eval_fn=eval_fn, assignee=self._assignee) @@ -53,7 +56,7 @@ def embedding(self): def set_embedding(self, embedding): self._embedding = embedding - def _map_progress_to_reward(self, gs) -> float: + def _map_progress_to_reward(self, gs: GameState) -> float: """ The default reward is the diff between the old and new progress. Once the task is completed, no more reward is provided. @@ -70,16 +73,17 @@ def _map_progress_to_reward(self, gs) -> float: return diff - def compute_rewards(self, gs) -> Tuple[Dict[int, float], Dict[int, Dict]]: + def compute_rewards(self, gs: GameState) -> Tuple[Dict[int, float], Dict[int, Dict]]: """ Environment facing API Returns rewards and infos for all agents in subject """ reward = self._map_progress_to_reward(gs) * self._reward_multiplier rewards = {int(ent_id): reward for ent_id in self._assignee} - infos = {int(ent_id): {'reward': reward, - 'progress': self._progress, - 'completed': self._completed} + infos = {int(ent_id): {"task_spec": self.spec_name, + "reward": reward, + "progress": self._progress, + "completed": self._completed} for ent_id in self._assignee} # NOTE: tasks do not know whether assignee agents are alive or dead @@ -89,7 +93,7 @@ def compute_rewards(self, gs) -> Tuple[Dict[int, float], Dict[int, Dict]]: def _make_name(self, class_name, **kwargs) -> str: name = [class_name] + \ [f"{arg_to_string(key)}:{arg_to_string(arg)}" for key, arg in kwargs.items()] - name = "("+'_'.join(name).replace(' ', '')+")" + name = "("+"_".join(name).replace(" ", "")+")" return name def __str__(self): @@ -127,7 +131,7 @@ def kwargs(self): return {} class OngoingTask(Task): - def _map_progress_to_reward(self, gs) -> float: + def _map_progress_to_reward(self, gs: GameState) -> float: """Keep returning the progress reward after the task is completed. However, this task tracks the completion status in the same manner. """ @@ -136,6 +140,43 @@ def _map_progress_to_reward(self, gs) -> float: self._completed = True return self._progress +class HoldDurationTask(Task): + def __init__(self, + eval_fn: Callable, + assignee: Union[Iterable[int], int], + hold_duration: int, + **kwargs): + super().__init__(eval_fn, assignee, **kwargs) + self._hold_duration = hold_duration + self._reset_timer() + + def _reset_timer(self): + self._timer = 0 + self._last_success_tick = 0 + + def reset(self): + super().reset() + self._reset_timer() + + def _map_progress_to_reward(self, gs: GameState) -> float: + # pylint: disable=attribute-defined-outside-init + if self._completed: + return 0.0 + + curr_eval = max(min(self._eval_fn(gs)*1.0,1.0),0.0) + if curr_eval < 1: + self._reset_timer() + else: + self._timer += 1 + self._last_success_tick = gs.current_tick + + new_progress = self._timer / self._hold_duration + diff = new_progress - self._progress + self._progress = new_progress + if self._progress >= 1: + self._completed = True + + return diff ###################################################################### @@ -162,11 +203,11 @@ def make_same_task(pred_cls: Union[Type[Predicate], Callable], def nmmo_default_task(agent_list: Iterable[int], test_mode=None) -> List[Task]: # (almost) no overhead in env._compute_rewards() - if test_mode == 'no_task': + if test_mode == "no_task": return [] # eval function on Predicate class, but does not use Group during eval - if test_mode == 'dummy_eval_fn': + if test_mode == "dummy_eval_fn": # pylint: disable=unused-argument return make_same_task(lambda gs, subject: True, agent_list, task_cls=OngoingTask) diff --git a/nmmo/task/task_spec.py b/nmmo/task/task_spec.py index 837ad5de9..38bedfd8b 100644 --- a/nmmo/task/task_spec.py +++ b/nmmo/task/task_spec.py @@ -1,3 +1,4 @@ +import functools from dataclasses import dataclass, field from typing import Iterable, Dict, List, Union, Type from types import FunctionType @@ -14,23 +15,23 @@ eval_fn can come from the base_predicates.py or could be custom functions like above eval_fn_kwargs are the additional args that go into predicate. There are also special keys - * 'target' must be ['left_team', 'right_team', 'left_team_leader', 'right_team_leader'] + * "target" must be ["left_team", "right_team", "left_team_leader", "right_team_leader"] these str will be translated into the actual agent ids task_cls specifies the task class to be used. Default is Task. task_kwargs are the optional, additional args that go into the task. - reward_to: must be in ['team', 'agent'] - * 'team' create a single team task, in which all team members get rewarded - * 'agent' create a task for each agent, in which only the agent gets rewarded + reward_to: must be in ["team", "agent"] + * "team" create a single team task, in which all team members get rewarded + * "agent" create a task for each agent, in which only the agent gets rewarded sampling_weight specifies the weight of the task in the curriculum sampling. Default is 1 """ -REWARD_TO = ['agent', 'team'] -VALID_TARGET = ['left_team', 'left_team_leader', - 'right_team', 'right_team_leader', - 'my_team_leader'] +REWARD_TO = ["agent", "team"] +VALID_TARGET = ["left_team", "left_team_leader", + "right_team", "right_team_leader", + "my_team_leader", "all_foes"] @dataclass class TaskSpec: @@ -38,7 +39,7 @@ class TaskSpec: eval_fn_kwargs: Dict task_cls: Type[Task] = Task task_kwargs: Dict = field(default_factory=dict) - reward_to: str = 'agent' + reward_to: str = "agent" sampling_weight: float = 1.0 embedding: np.ndarray = None @@ -47,10 +48,18 @@ def __post_init__(self): "eval_fn must be a function" assert self.reward_to in REWARD_TO, \ f"reward_to must be in {REWARD_TO}" - if 'target' in self.eval_fn_kwargs: - assert self.eval_fn_kwargs['target'] in VALID_TARGET, \ + if "target" in self.eval_fn_kwargs: + assert self.eval_fn_kwargs["target"] in VALID_TARGET, \ f"target must be in {VALID_TARGET}" + @functools.cached_property + def name(self): + """ Generate a name for the task spec + """ + kwargs_str = "".join([f"{key}={str(val)}_" for key, val in self.eval_fn_kwargs.items()]) + kwargs_str = "(" + kwargs_str[:-1] + ")" # remove the last _ + return "_".join([self.task_cls.__name__, self.eval_fn.__name__, # pylint: disable=no-member + kwargs_str, "reward_to=" + self.reward_to]) def make_task_from_spec(assign_to: Union[Iterable[int], Dict], task_spec: List[TaskSpec]) -> List[Task]: @@ -78,15 +87,16 @@ def make_task_from_spec(assign_to: Union[Iterable[int], Dict], pred_fn_kwargs = task_spec[idx].eval_fn_kwargs task_cls = task_spec[idx].task_cls task_kwargs = task_spec[idx].task_kwargs - task_kwargs['embedding'] = task_spec[idx].embedding # to pass to task_cls + task_kwargs["embedding"] = task_spec[idx].embedding # to pass to task_cls + task_kwargs["spec_name"] = task_spec[idx].name - # reserve 'target' for relative agent mapping - if 'target' in pred_fn_kwargs: - target = pred_fn_kwargs.pop('target') - assert target in VALID_TARGET, 'Invalid target' + # reserve "target" for relative agent mapping + if "target" in pred_fn_kwargs: + target = pred_fn_kwargs.pop("target") + assert target in VALID_TARGET, "Invalid target" # translate target to specific agent ids using team_helper target = team_helper.get_target_agent(team_id, target) - pred_fn_kwargs['target'] = target + pred_fn_kwargs["target"] = target # handle some special cases and instantiate the predicate first predicate = None @@ -96,13 +106,13 @@ def make_task_from_spec(assign_to: Union[Iterable[int], Dict], # TODO: should create a test for these if (pred_fn in [bp.AllDead]) or \ - (pred_fn in [bp.StayAlive] and 'target' in pred_fn_kwargs): + (pred_fn in [bp.StayAlive] and "target" in pred_fn_kwargs): # use the target as the predicate subject - pred_fn_kwargs.pop('target') # remove target + pred_fn_kwargs.pop("target") # remove target predicate = pred_cls(Group(target), **pred_fn_kwargs) # create the task - if reward_to == 'team': + if reward_to == "team": assignee = team_helper.teams[team_id] if predicate is None: predicate = pred_cls(Group(assignee), **pred_fn_kwargs) @@ -112,7 +122,7 @@ def make_task_from_spec(assign_to: Union[Iterable[int], Dict], tasks.append(predicate.create_task(assignee=assignee, task_cls=task_cls, **task_kwargs)) - elif reward_to == 'agent': + elif reward_to == "agent": agent_list = team_helper.teams[team_id] if predicate is None: tasks += make_same_task(pred_cls, agent_list, pred_kwargs=pred_fn_kwargs, diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 8f4f7b326..50c4d855a 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -6,11 +6,13 @@ import nmmo from nmmo.core.env import Env from nmmo.task.predicate_api import make_predicate, Predicate -from nmmo.task.task_api import Task, OngoingTask +from nmmo.task.task_api import Task, OngoingTask, HoldDurationTask from nmmo.task.task_spec import TaskSpec, make_task_from_spec from nmmo.task.group import Group from nmmo.task.constraint import ScalarConstraint, GroupConstraint, AGENT_LIST_CONSTRAINT -from nmmo.task.base_predicates import TickGE, CanSeeGroup, AllMembersWithinRange, StayAlive +from nmmo.task.base_predicates import ( + TickGE, CanSeeGroup, AllMembersWithinRange, StayAlive, HoardGold +) from nmmo.systems import item as Item from nmmo.core import action as Action @@ -60,28 +62,28 @@ def test_predicate_operators(self): # NOTE: only the instantiated predicate can be used with operators like below mock_gs = MockGameState() - # get the individual predicate's source code + # get the individual predicate"s source code self.assertEqual(SUCCESS.get_source_code(), - 'def Success(gs, subject: Group):\n return True') + "def Success(gs, subject: Group):\n return True") self.assertEqual(FAILURE.get_source_code(), - 'def Failure(gs, subject: Group):\n return False') + "def Failure(gs, subject: Group):\n return False") # AND (&), OR (|), NOT (~) pred1 = SUCCESS & FAILURE self.assertFalse(pred1(mock_gs)) # NOTE: get_source_code() of the combined predicates returns the joined str - # of each predicate's source code, which may NOT represent what the actual + # of each predicate"s source code, which may NOT represent what the actual # predicate is doing self.assertEqual(pred1.get_source_code(), - 'def Success(gs, subject: Group):\n return True\n\n'+ - 'def Failure(gs, subject: Group):\n return False') + "def Success(gs, subject: Group):\n return True\n\n"+ + "def Failure(gs, subject: Group):\n return False") pred2 = SUCCESS | FAILURE | SUCCESS self.assertTrue(pred2(mock_gs)) self.assertEqual(pred2.get_source_code(), - 'def Success(gs, subject: Group):\n return True\n\n'+ - 'def Failure(gs, subject: Group):\n return False\n\n'+ - 'def Success(gs, subject: Group):\n return True') + "def Success(gs, subject: Group):\n return True\n\n"+ + "def Failure(gs, subject: Group):\n return False\n\n"+ + "def Success(gs, subject: Group):\n return True") pred3 = SUCCESS & ~ FAILURE & SUCCESS self.assertTrue(pred3(mock_gs)) @@ -98,7 +100,7 @@ def test_predicate_operators(self): # NOTE: demonstrating the above point again, -- it just returns the functions # NOT what this predicate actually evaluates. self.assertEqual(pred4.get_source_code(), - 'def Success(gs, subject: Group):\n return True') + "def Success(gs, subject: Group):\n return True") pred5 = 0.3 * SUCCESS - 1 self.assertEqual(pred5(mock_gs), 0.0) # cannot go below 0 @@ -109,11 +111,11 @@ def test_predicate_operators(self): def test_team_assignment(self): team = Group([1, 2, 8, 9], "TeamFoo") - self.assertEqual(team.name, 'TeamFoo') + self.assertEqual(team.name, "TeamFoo") self.assertEqual(team[2].name, "TeamFoo.2") self.assertEqual(team[2], (8,)) - # don't allow member of one-member team + # don"t allow member of one-member team self.assertEqual(team[2][0].name, team[2].name) def test_predicate_name(self): @@ -174,10 +176,10 @@ def test_task_api_with_predicate(self): action = Action.Melee predicate = fake_pred_cls(group, a=1, b=item, c=action) self.assertEqual(predicate.get_source_code(), - 'def Fake(gs, subject, a,b,c):\n return False') - self.assertEqual(predicate.get_signature(), ['gs', 'subject', 'a', 'b', 'c']) + "def Fake(gs, subject, a,b,c):\n return False") + self.assertEqual(predicate.get_signature(), ["gs", "subject", "a", "b", "c"]) self.assertEqual(predicate.args, [group]) - self.assertDictEqual(predicate.kwargs, {'a': 1, 'b': item, 'c': action}) + self.assertDictEqual(predicate.kwargs, {"a": 1, "b": item, "c": action}) assignee = [1,2,3] # list of agent ids task = predicate.create_task(assignee=assignee) @@ -186,13 +188,13 @@ def test_task_api_with_predicate(self): self.assertEqual(task.name, # contains predicate name and assignee list "(Task_eval_fn:(Fake_(2,)_a:1_b:Hat_c:Melee)_assignee:(1,2,3))") self.assertEqual(task.get_source_code(), - 'def Fake(gs, subject, a,b,c):\n return False') - self.assertEqual(task.get_signature(), ['gs', 'subject', 'a', 'b', 'c']) + "def Fake(gs, subject, a,b,c):\n return False") + self.assertEqual(task.get_signature(), ["gs", "subject", "a", "b", "c"]) self.assertEqual(task.args, [group]) - self.assertDictEqual(task.kwargs, {'a': 1, 'b': item, 'c': action}) + self.assertDictEqual(task.kwargs, {"a": 1, "b": item, "c": action}) for agent_id in assignee: self.assertEqual(rewards[agent_id], 0) - self.assertEqual(infos[agent_id]['progress'], 0) # progress (False -> 0) + self.assertEqual(infos[agent_id]["progress"], 0) # progress (False -> 0) self.assertFalse(task.completed) def test_task_api_with_function(self): @@ -209,16 +211,16 @@ def is_agent_1(gs): self.assertEqual(task.name, # contains predicate name and assignee list "(Task_eval_fn:is_agent_1_assignee:(1,2,3))") self.assertEqual(task.get_source_code(), - 'def is_agent_1(gs):\n ' + - 'return any(agent_id == 1 for agent_id in subject.agents)') - self.assertEqual(task.get_signature(), ['gs']) + "def is_agent_1(gs):\n " + + "return any(agent_id == 1 for agent_id in subject.agents)") + self.assertEqual(task.get_signature(), ["gs"]) self.assertEqual(task.args, []) self.assertDictEqual(task.kwargs, {}) self.assertEqual(task.subject, tuple(assignee)) self.assertEqual(task.assignee, tuple(assignee)) for agent_id in assignee: self.assertEqual(rewards[agent_id], 1) - self.assertEqual(infos[agent_id]['progress'], 1) # progress (True -> 1) + self.assertEqual(infos[agent_id]["progress"], 1) # progress (True -> 1) self.assertTrue(task.completed) def test_predicate_fn_using_other_predicate_fn(self): @@ -229,8 +231,8 @@ def PracticeFormation(gs, subject, dist, num_tick): # team should stay together within 1 tile for 10 ticks goal_tick = 10 task_spec = TaskSpec(eval_fn=PracticeFormation, - eval_fn_kwargs={'dist': 1, 'num_tick': goal_tick}, - reward_to='team') + eval_fn_kwargs={"dist": 1, "num_tick": goal_tick}, + reward_to="team") # create the test task from the task spec teams = {1:[1,2,3], 3:[4,5], 6:[6,7], 9:[8,9], 14:[10,11]} @@ -246,13 +248,13 @@ def PracticeFormation(gs, subject, dist, num_tick): # check the task information task = env.tasks[0] self.assertEqual(task.name, - '(Task_eval_fn:(PracticeFormation_(1,2,3)_dist:1_num_tick:10)'+ - '_assignee:(1,2,3))') + "(Task_eval_fn:(PracticeFormation_(1,2,3)_dist:1_num_tick:10)"+ + "_assignee:(1,2,3))") self.assertEqual(task.get_source_code(), - 'def PracticeFormation(gs, subject, dist, num_tick):\n '+ - 'return AllMembersWithinRange(gs, subject, dist) * '+ - 'TickGE(gs, subject, num_tick)') - self.assertEqual(task.get_signature(), ['gs', 'subject', 'dist', 'num_tick']) + "def PracticeFormation(gs, subject, dist, num_tick):\n "+ + "return AllMembersWithinRange(gs, subject, dist) * "+ + "TickGE(gs, subject, num_tick)") + self.assertEqual(task.get_signature(), ["gs", "subject", "dist", "num_tick"]) self.assertEqual(task.subject, tuple(teams[team_ids[0]])) self.assertEqual(task.kwargs, task_spec.eval_fn_kwargs) self.assertEqual(task.assignee, tuple(teams[team_ids[0]])) @@ -262,7 +264,7 @@ def PracticeFormation(gs, subject, dist, num_tick): for task in agent_tasks: self.assertTrue(agent_id in task.assignee) - # move agent 2, 3 to agent 1's pos + # move agent 2, 3 to agent 1"s pos for agent_id in [2,3]: change_spawn_pos(env.realm, agent_id, env.realm.players[1].pos) @@ -273,37 +275,39 @@ def PracticeFormation(gs, subject, dist, num_tick): if tick < 10: self.assertAlmostEqual(rewards[1], 1/goal_tick) self.assertAlmostEqual((1+tick)/goal_tick, - infos[1]['task'][env.tasks[0].name]['progress']) + infos[1]["task"][env.tasks[0].name]["progress"]) else: # tick 11, task should be completed self.assertEqual(rewards[1], 0) - self.assertEqual(infos[1]['task'][env.tasks[0].name]['progress'], 1) - self.assertEqual(infos[1]['task'][env.tasks[0].name]['completed'], True) + self.assertEqual(infos[1]["task"][env.tasks[0].name]["progress"], 1) + self.assertEqual(infos[1]["task"][env.tasks[0].name]["completed"], True) # test the task_spec_with_embedding task_embedding = np.ones(config.TASK_EMBED_DIM, dtype=np.float32) task_spec_with_embedding = TaskSpec(eval_fn=PracticeFormation, - eval_fn_kwargs={'dist': 1, 'num_tick': goal_tick}, - reward_to='team', + eval_fn_kwargs={"dist": 1, "num_tick": goal_tick}, + reward_to="team", embedding=task_embedding) env.reset(make_task_fn=lambda: make_task_from_spec(teams, [task_spec_with_embedding])) task = env.tasks[0] + self.assertEqual(task.spec_name, # without the subject and assignee agent ids + "Task_PracticeFormation_(dist=1_num_tick=10)_reward_to=team") self.assertEqual(task.name, - '(Task_eval_fn:(PracticeFormation_(1,2,3)_dist:1_num_tick:10)'+ - '_assignee:(1,2,3))') + "(Task_eval_fn:(PracticeFormation_(1,2,3)_dist:1_num_tick:10)"+ + "_assignee:(1,2,3))") self.assertEqual(task.get_source_code(), - 'def PracticeFormation(gs, subject, dist, num_tick):\n '+ - 'return AllMembersWithinRange(gs, subject, dist) * '+ - 'TickGE(gs, subject, num_tick)') - self.assertEqual(task.get_signature(), ['gs', 'subject', 'dist', 'num_tick']) + "def PracticeFormation(gs, subject, dist, num_tick):\n "+ + "return AllMembersWithinRange(gs, subject, dist) * "+ + "TickGE(gs, subject, num_tick)") + self.assertEqual(task.get_signature(), ["gs", "subject", "dist", "num_tick"]) self.assertEqual(task.subject, tuple(teams[team_ids[0]])) self.assertEqual(task.kwargs, task_spec.eval_fn_kwargs) self.assertEqual(task.assignee, tuple(teams[team_ids[0]])) self.assertTrue(np.array_equal(task.embedding, task_embedding)) obs_spec = env.observation_space(1) - self.assertTrue(obs_spec['Task'].contains(task.embedding)) + self.assertTrue(obs_spec["Task"].contains(task.embedding)) def test_completed_tasks_in_info(self): # pylint: disable=no-value-for-parameter,no-member @@ -332,24 +336,24 @@ def test_completed_tasks_in_info(self): _, _, _, infos = env.step({}) # agent 1: assigned only task 1, which is always True - self.assertEqual(infos[1]['task'][env.tasks[0].name]['reward'], 1.0) + self.assertEqual(infos[1]["task"][env.tasks[0].name]["reward"], 1.0) for i in [1, 2]: # task 2 and 3 - self.assertTrue(env.tasks[i].name not in infos[1]['task']) + self.assertTrue(env.tasks[i].name not in infos[1]["task"]) # agent 2: assigned task 2 (Failure) and task 4 (Success) - self.assertEqual(infos[2]['task'][env.tasks[1].name]['reward'], 0.0) # task 2 - self.assertEqual(infos[2]['task'][env.tasks[3].name]['reward'], 1.0) # task 4 + self.assertEqual(infos[2]["task"][env.tasks[1].name]["reward"], 0.0) # task 2 + self.assertEqual(infos[2]["task"][env.tasks[3].name]["reward"], 1.0) # task 4 # agent 3 assigned task 3, Fake(), which is always False (0) - self.assertEqual(infos[3]['task'][env.tasks[2].name]['reward'], 0.0) # task 3 + self.assertEqual(infos[3]["task"][env.tasks[2].name]["reward"], 0.0) # task 3 # all agents in the same team with agent 2 have SUCCESS - # other agents don't have any tasks assigned + # other agents don"t have any tasks assigned for ent_id in env.possible_agents: if ent_id in same_team: - self.assertEqual(infos[ent_id]['task'][env.tasks[3].name]['reward'], 1.0) + self.assertEqual(infos[ent_id]["task"][env.tasks[3].name]["reward"], 1.0) else: - self.assertTrue(env.tasks[3].name not in infos[ent_id]['task']) + self.assertTrue(env.tasks[3].name not in infos[ent_id]["task"]) # DONE @@ -357,13 +361,13 @@ def test_make_task_from_spec(self): teams = {0:[1,2,3], 1:[4,5,6]} test_embedding = np.array([1,2,3]) task_spec = [ - TaskSpec(eval_fn=TickGE, eval_fn_kwargs={'num_tick': 20}), + TaskSpec(eval_fn=TickGE, eval_fn_kwargs={"num_tick": 20}), TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={}, task_cls=OngoingTask), - TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={'target': 'my_team_leader'}, - task_cls=OngoingTask, reward_to='team'), - TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={'target': 'left_team'}, - task_cls=OngoingTask, task_kwargs={'reward_multiplier': 2}, - reward_to='team', embedding=test_embedding), + TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={"target": "my_team_leader"}, + task_cls=OngoingTask, reward_to="team"), + TaskSpec(eval_fn=StayAlive, eval_fn_kwargs={"target": "left_team"}, + task_cls=OngoingTask, task_kwargs={"reward_multiplier": 2}, + reward_to="team", embedding=test_embedding), ] task_list = [] @@ -371,17 +375,75 @@ def test_make_task_from_spec(self): for single_spec in task_spec: task_list.append(make_task_from_spec(teams, [single_spec])) + # check the task spec names + self.assertEqual(task_list[0][0].spec_name, + "Task_TickGE_(num_tick=20)_reward_to=agent") + self.assertEqual(task_list[1][0].spec_name, + "OngoingTask_StayAlive_()_reward_to=agent") + self.assertEqual(task_list[2][0].spec_name, + "OngoingTask_StayAlive_(target=my_team_leader)_reward_to=team") + self.assertEqual(task_list[3][0].spec_name, + "OngoingTask_StayAlive_(target=left_team)_reward_to=team") + # check the task names self.assertEqual(task_list[0][0].name, - '(Task_eval_fn:(TickGE_(1,)_num_tick:20)_assignee:(1,))') + "(Task_eval_fn:(TickGE_(1,)_num_tick:20)_assignee:(1,))") self.assertEqual(task_list[1][0].name, - '(OngoingTask_eval_fn:(StayAlive_(1,))_assignee:(1,))') + "(OngoingTask_eval_fn:(StayAlive_(1,))_assignee:(1,))") self.assertEqual(task_list[2][0].name, - '(OngoingTask_eval_fn:(StayAlive_(1,))_assignee:(1,2,3))') + "(OngoingTask_eval_fn:(StayAlive_(1,))_assignee:(1,2,3))") self.assertEqual(task_list[3][0].name, - '(OngoingTask_eval_fn:(StayAlive_(4,5,6))_assignee:(1,2,3))') + "(OngoingTask_eval_fn:(StayAlive_(4,5,6))_assignee:(1,2,3))") self.assertEqual(task_list[3][0].reward_multiplier, 2) self.assertTrue(np.array_equal(task_list[3][0].embedding, np.array([1,2,3]))) -if __name__ == '__main__': + def test_hold_duration_task(self): + # pylint: disable=protected-access + # each agent should hoard gold for 10 ticks + goal_tick = goal_gold = 10 + task_spec = [TaskSpec(eval_fn=HoardGold, + eval_fn_kwargs={"amount": goal_gold}, + task_cls=HoldDurationTask, + task_kwargs={"hold_duration": goal_tick})] * 3 + + config = ScriptedAgentTestConfig() + config.PLAYERS =[Sleeper] + config.IMMORTAL = True + + teams = {id: [id] for id in range(1,4)} + env = Env(config) + env.reset(make_task_fn=lambda: make_task_from_spec(teams, task_spec)) + + # give agent 1, 2 enough gold + for agent_id in [1,2]: + env.realm.players[agent_id].gold.update(goal_gold+1) + + for _ in range(5): + env.step({}) + + # check the task information + self.assertEqual(env.tasks[0].spec_name, + "HoldDurationTask_HoardGold_(amount=10)_reward_to=agent") + self.assertTrue(env.tasks[0]._progress == 0.5) # agent 1 has enough gold + self.assertTrue(env.tasks[1]._progress == 0.5) # agent 2 has enough gold + self.assertTrue(env.tasks[2]._progress == 0.0) # agent 3 has no gold + for task in env.tasks: + self.assertTrue(task.completed is False) # not completed yet + + # take away gold from agent 2 + env.realm.players[2].gold.update(goal_gold-1) + + env.step({}) + self.assertTrue(env.tasks[0]._progress == 0.6) # agent 1 has enough gold + self.assertTrue(env.tasks[1]._progress == 0) # agent 2 has not enough gold + + for _ in range(4): + env.step({}) + + # only agent 1 successfully held 10 gold for 10 ticks + self.assertTrue(env.tasks[0].completed is True) + self.assertTrue(env.tasks[1].completed is False) + self.assertTrue(env.tasks[2].completed is False) + +if __name__ == "__main__": unittest.main() From c11fba76fbea518f2e204f055724d5a62f8f1baf Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 27 Jul 2023 17:12:22 +0900 Subject: [PATCH 079/113] can define task spec with instantiated predicates --- nmmo/task/task_spec.py | 20 ++++++++++++-------- tests/task/test_task_api.py | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/nmmo/task/task_spec.py b/nmmo/task/task_spec.py index 38bedfd8b..714cd8fde 100644 --- a/nmmo/task/task_spec.py +++ b/nmmo/task/task_spec.py @@ -6,7 +6,7 @@ import numpy as np from nmmo.task.task_api import Task, make_same_task -from nmmo.task.predicate_api import make_predicate +from nmmo.task.predicate_api import Predicate, make_predicate from nmmo.task.group import Group from nmmo.task import base_predicates as bp from nmmo.lib.team_helper import TeamHelper @@ -42,10 +42,14 @@ class TaskSpec: reward_to: str = "agent" sampling_weight: float = 1.0 embedding: np.ndarray = None + predicate: Predicate = None def __post_init__(self): - assert isinstance(self.eval_fn, FunctionType), \ - "eval_fn must be a function" + if self.predicate is None: + assert isinstance(self.eval_fn, FunctionType), \ + "eval_fn must be a function" + else: + assert self.eval_fn is None, "Cannot specify both eval_fn and predicate" assert self.reward_to in REWARD_TO, \ f"reward_to must be in {REWARD_TO}" if "target" in self.eval_fn_kwargs: @@ -54,11 +58,11 @@ def __post_init__(self): @functools.cached_property def name(self): - """ Generate a name for the task spec - """ + # pylint: disable=no-member kwargs_str = "".join([f"{key}={str(val)}_" for key, val in self.eval_fn_kwargs.items()]) kwargs_str = "(" + kwargs_str[:-1] + ")" # remove the last _ - return "_".join([self.task_cls.__name__, self.eval_fn.__name__, # pylint: disable=no-member + pred_name = self.eval_fn.__name__ if self.predicate is None else self.predicate.name + return "_".join([self.task_cls.__name__, pred_name, kwargs_str, "reward_to=" + self.reward_to]) def make_task_from_spec(assign_to: Union[Iterable[int], Dict], @@ -89,6 +93,7 @@ def make_task_from_spec(assign_to: Union[Iterable[int], Dict], task_kwargs = task_spec[idx].task_kwargs task_kwargs["embedding"] = task_spec[idx].embedding # to pass to task_cls task_kwargs["spec_name"] = task_spec[idx].name + predicate = task_spec[idx].predicate # reserve "target" for relative agent mapping if "target" in pred_fn_kwargs: @@ -99,8 +104,7 @@ def make_task_from_spec(assign_to: Union[Iterable[int], Dict], pred_fn_kwargs["target"] = target # handle some special cases and instantiate the predicate first - predicate = None - if isinstance(pred_fn, FunctionType): + if pred_fn is not None and isinstance(pred_fn, FunctionType): # if a function is provided as a predicate pred_cls = make_predicate(pred_fn) diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 50c4d855a..18f9299ab 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -445,5 +445,25 @@ def test_hold_duration_task(self): self.assertTrue(env.tasks[1].completed is False) self.assertTrue(env.tasks[2].completed is False) + def test_task_spec_with_predicate(self): + teams = {0:[1,2,3], 1:[4,5,6]} + SUCCESS = make_predicate(Success)(Group(1)) + FAILURE = make_predicate(Failure)(Group([2,3])) + predicate = SUCCESS & FAILURE + predicate.name = "SuccessAndFailure" + + # make task spec + task_spec = [TaskSpec(predicate=predicate, + eval_fn=None, eval_fn_kwargs={"success_target": 1})] + tasks = make_task_from_spec(teams, task_spec) + + env = Env(ScriptedAgentTestConfig()) + env.reset(make_task_fn=lambda: tasks) + env.step({}) + + # check the task information + self.assertEqual(env.tasks[0].spec_name, + "Task_SuccessAndFailure_(success_target=1)_reward_to=agent") + if __name__ == "__main__": unittest.main() From 67be912c7c724106a8e1d796807abbb22c1b186f Mon Sep 17 00:00:00 2001 From: jsuarez5341 Date: Thu, 27 Jul 2023 19:38:58 +0000 Subject: [PATCH 080/113] Re-add websocket deps and separate scripted base class --- nmmo/__init__.py | 4 ++-- nmmo/core/agent.py | 5 ++++- scripted/baselines.py | 6 +++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/nmmo/__init__.py b/nmmo/__init__.py index eec648f32..22813666b 100644 --- a/nmmo/__init__.py +++ b/nmmo/__init__.py @@ -6,7 +6,7 @@ from .render.overlay import Overlay, OverlayRegistry from .core import config, agent, action from .core.action import Action -from .core.agent import Agent +from .core.agent import Agent, Scripted from .core.env import Env from .core.terrain import MapGenerator, Terrain @@ -22,7 +22,7 @@ \ \:\ \ \:\ \ \:\ \ \::/ maintained at MIT in \__\/ \__\/ \__\/ \__\/ Phillip Isola's lab ''' -__all__ = ['Env', 'config', 'agent', 'Agent', 'MapGenerator', 'Terrain', +__all__ = ['Env', 'config', 'agent', 'Agent', 'Scripted', 'MapGenerator', 'Terrain', 'action', 'Action', 'material', 'spawn', 'Overlay', 'OverlayRegistry'] diff --git a/nmmo/core/agent.py b/nmmo/core/agent.py index 04fdd5500..3b674aad5 100644 --- a/nmmo/core/agent.py +++ b/nmmo/core/agent.py @@ -1,4 +1,3 @@ - class Agent: policy = 'Neural' @@ -18,3 +17,7 @@ def __call__(self, obs): Args: obs: Agent observation provided by the environment ''' + +class Scripted(Agent): + '''Base class for scripted agents''' + policy = 'Scripted' \ No newline at end of file diff --git a/scripted/baselines.py b/scripted/baselines.py index 4bf2add75..c08c22a5b 100644 --- a/scripted/baselines.py +++ b/scripted/baselines.py @@ -15,8 +15,8 @@ from scripted import attack, move -class Scripted(nmmo.Agent): - '''Template class for scripted models. +class Scripted(nmmo.Scripted): + '''Template class for baseline scripted models. You may either subclass directly or mirror the __call__ function''' scripted = True @@ -522,4 +522,4 @@ def __init__(self, config, idx): if config.SPECIALIZE: self.style = [action.Mage] self.weapon = item_system.Wand - self.ammo = item_system.Runes + self.ammo = item_system.Runes \ No newline at end of file From 5a951a04014f9984d48287fd3c2a418c64a5a950 Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 28 Jul 2023 10:05:51 +0900 Subject: [PATCH 081/113] clean up scripted code --- nmmo/core/agent.py | 11 +- nmmo/core/env.py | 3 +- scripted/attack.py | 28 ++- scripted/baselines.py | 38 ++-- scripted/behavior.py | 62 ------ scripted/move.py | 446 ++++++++++++++++++++---------------------- scripted/utils.py | 30 --- 7 files changed, 248 insertions(+), 370 deletions(-) delete mode 100644 scripted/behavior.py delete mode 100644 scripted/utils.py diff --git a/nmmo/core/agent.py b/nmmo/core/agent.py index 3b674aad5..0bdfa6b10 100644 --- a/nmmo/core/agent.py +++ b/nmmo/core/agent.py @@ -10,6 +10,7 @@ def __init__(self, config, idx): ''' self.config = config self.iden = idx + self._np_random = None def __call__(self, obs): '''Used by scripted agents to compute actions. Override in subclasses. @@ -18,6 +19,14 @@ def __call__(self, obs): obs: Agent observation provided by the environment ''' + def set_rng(self, np_random): + '''Set the random number generator for the agent for reproducibility + + Args: + np_random: A numpy random.Generator object + ''' + self._np_random = np_random + class Scripted(Agent): '''Base class for scripted agents''' - policy = 'Scripted' \ No newline at end of file + policy = 'Scripted' diff --git a/nmmo/core/env.py b/nmmo/core/env.py index dfca26dc9..94b9b9e31 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -18,7 +18,6 @@ from nmmo.task import task_api, task_spec from nmmo.task.game_state import GameStateGenerator from nmmo.lib import seeding -from scripted.baselines import Scripted class Env(ParallelEnv): # Environment wrapper for Neural MMO using the Parallel PettingZoo API @@ -169,7 +168,7 @@ def reset(self, map_id=None, seed=None, options=None, # check if there are scripted agents for eid, ent in self.realm.players.items(): - if isinstance(ent.agent, Scripted): + if isinstance(ent.agent, nmmo.Scripted): self.scripted_agents.add(eid) ent.agent.set_rng(self._np_random) diff --git a/scripted/attack.py b/scripted/attack.py index 0f2c916c0..9b62089bd 100644 --- a/scripted/attack.py +++ b/scripted/attack.py @@ -1,31 +1,28 @@ -# pylint: disable=all - +# pylint: disable=invalid-name, unused-argument import numpy as np import nmmo from nmmo.core.observation import Observation from nmmo.entity.entity import EntityState +from nmmo.lib import utils -from scripted import utils def closestTarget(config, ob: Observation): shortestDist = np.inf closestAgent = None agent = ob.agent() - start = (agent.row, agent.col) - for target in ob.entities.values: - target = EntityState.parse_array(target) - if target.id == agent.id: + for target_ent in ob.entities.values: + target_ent = EntityState.parse_array(target_ent) + if target_ent.id == agent.id: continue - dist = utils.l1(start, (target.row, target.col)) - + dist = utils.linf_single(start, (target_ent.row, target_ent.col)) if dist < shortestDist and dist != 0: shortestDist = dist - closestAgent = target + closestAgent = target_ent if closestAgent is None: return None, None @@ -36,18 +33,17 @@ def attacker(config, ob: Observation): agent = ob.agent() attacker_id = agent.attacker_id - if attacker_id == 0: return None, None - target = ob.entity(attacker_id) - if target == None: + target_ent = ob.entity(attacker_id) + if target_ent is None: return None, None - - return target, utils.l1((agent.row, agent.col), (target.row, target.col)) + + return target_ent,\ + utils.linf_single((agent.row, agent.col), (target_ent.row, target_ent.col)) def target(config, actions, style, targetID): actions[nmmo.action.Attack] = { nmmo.action.Style: style, nmmo.action.Target: targetID} - diff --git a/scripted/baselines.py b/scripted/baselines.py index c08c22a5b..adfb5ca88 100644 --- a/scripted/baselines.py +++ b/scripted/baselines.py @@ -1,9 +1,6 @@ -# TODO: try to remove the below line -# pylint: disable=all - +# pylint: disable=invalid-name, attribute-defined-outside-init, no-member from typing import Dict from collections import defaultdict -import numpy as np import nmmo from nmmo import material @@ -15,6 +12,7 @@ from scripted import attack, move + class Scripted(nmmo.Scripted): '''Template class for baseline scripted models. @@ -27,19 +25,15 @@ def __init__(self, config, idx): config : A forge.blade.core.Config object or subclass object ''' super().__init__(config, idx) - self._np_random = None self.health_max = config.PLAYER_BASE_HEALTH if config.RESOURCE_SYSTEM_ENABLED: - self.food_max = config.RESOURCE_BASE - self.water_max = config.RESOURCE_BASE + self.food_max = config.RESOURCE_BASE + self.water_max = config.RESOURCE_BASE self.spawnR = None self.spawnC = None - def set_rng(self, np_random): - self._np_random = np_random - @property def policy(self): return self.__class__.__name__ @@ -83,7 +77,7 @@ def attack(self): style = self._np_random.choice(self.style) attack.target(self.config, self.actions, style, self.targetID) - def target_weak(self): + def target_weak(self): # pylint: disable=inconsistent-return-statements '''Target the nearest agent if it is weak''' if self.closest is None: return False @@ -232,14 +226,16 @@ def equip(self, items: set): # InventoryItem needs where the item is (index) in the inventory self.actions[action.Use] = { - action.InventoryItem: self.ob.inventory.index(itm.id)} # list(self.ob.inventory.ids).index(itm.id) + action.InventoryItem: self.ob.inventory.index(itm.id)} return True def consume(self): - if self.me.health <= self.health_max // 2 and item_system.Potion.ITEM_TYPE_ID in self.best_items: + if self.me.health <= self.health_max // 2 \ + and item_system.Potion.ITEM_TYPE_ID in self.best_items: itm = self.best_items[item_system.Potion.ITEM_TYPE_ID] - elif (self.me.food == 0 or self.me.water == 0) and item_system.Ration.ITEM_TYPE_ID in self.best_items: + elif (self.me.food == 0 or self.me.water == 0) \ + and item_system.Ration.ITEM_TYPE_ID in self.best_items: itm = self.best_items[item_system.Ration.ITEM_TYPE_ID] else: return @@ -249,7 +245,7 @@ def consume(self): # InventoryItem needs where the item is (index) in the inventory self.actions[action.Use] = { - action.InventoryItem: self.ob.inventory.index(itm.id)} # list(self.ob.inventory.ids).index(itm.id) + action.InventoryItem: self.ob.inventory.index(itm.id)} def sell(self, keep_k: dict, keep_best: set): for itm in self.inventory.values(): @@ -272,8 +268,8 @@ def sell(self, keep_k: dict, keep_best: set): continue self.actions[action.Sell] = { - action.InventoryItem: self.ob.inventory.index(itm.id), # list(self.ob.inventory.ids).index(itm.id) - action.Price: action.Price.edges[price-1] } # Price starts from 1 + action.InventoryItem: self.ob.inventory.index(itm.id), + action.Price: action.Price.index(price) } return itm @@ -299,7 +295,7 @@ def buy(self, buy_k: dict, buy_upgrade: set): # Buy best heuristic upgrade if purchase: self.actions[action.Buy] = { - action.MarketItem: self.ob.market.index(purchase.id)} #list(self.ob.market.ids).index(purchase.id)} + action.MarketItem: self.ob.market.index(purchase.id)} return def exchange(self): @@ -391,9 +387,9 @@ def __call__(self, obs): super().__call__(obs) if self.forage_criterion: - self.forage() + self.forage() else: - self.explore() + self.explore() return self.actions @@ -522,4 +518,4 @@ def __init__(self, config, idx): if config.SPECIALIZE: self.style = [action.Mage] self.weapon = item_system.Wand - self.ammo = item_system.Runes \ No newline at end of file + self.ammo = item_system.Runes diff --git a/scripted/behavior.py b/scripted/behavior.py deleted file mode 100644 index c2d8753c2..000000000 --- a/scripted/behavior.py +++ /dev/null @@ -1,62 +0,0 @@ -# pylint: disable=all - -import nmmo -from nmmo.systems.ai import move, attack, utils - -def update(entity): - '''Update validity of tracked entities''' - if not utils.validTarget(entity, entity.attacker, entity.vision): - entity.attacker = None - if not utils.validTarget(entity, entity.target, entity.vision): - entity.target = None - if not utils.validTarget(entity, entity.closest, entity.vision): - entity.closest = None - - if entity.__class__.__name__ != 'Player': - return - - if not utils.validResource(entity, entity.food, entity.vision): - entity.food = None - if not utils.validResource(entity, entity.water, entity.vision): - entity.water = None - -def pathfind(config, ob, actions, rr, cc): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.pathfind(config, ob, actions, rr, cc)} - -def meander(realm, actions, entity): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.habitable(realm.map.tiles, entity)} - -def evade(realm, actions, entity): - actions[nmmo.action.Move] = {nmmo.action.Direction: move.antipathfind(realm.map.tiles, entity, entity.attacker)} - -def hunt(realm, actions, entity): - #Move args - distance = utils.distance(entity, entity.target) - - direction = None - if distance == 0: - direction = move.random_direction() - elif distance > 1: - direction = move.pathfind(realm.map.tiles, entity, entity.target) - - if direction is not None: - actions[nmmo.action.Move] = {nmmo.action.Direction: direction} - - attack(realm, actions, entity) - -def attack(realm, actions, entity): - distance = utils.distance(entity, entity.target) - if distance > entity.skills.style.attack_range(realm.config): - return - - actions[nmmo.action.Attack] = {nmmo.action.Style: entity.skills.style, - nmmo.action.Target: entity.target} - -def forageDP(realm, actions, entity): - direction = utils.forageDP(realm.map.tiles, entity) - actions[nmmo.action.Move] = {nmmo.action.Direction: move.towards(direction)} - -#def forageDijkstra(realm, actions, entity): -def forageDijkstra(config, ob, actions, food_max, water_max): - direction = utils.forageDijkstra(config, ob, food_max, water_max) - actions[nmmo.action.Move] = {nmmo.action.Direction: move.towards(direction)} diff --git a/scripted/move.py b/scripted/move.py index 488a8f0c7..0c80f9d6b 100644 --- a/scripted/move.py +++ b/scripted/move.py @@ -1,321 +1,291 @@ -# pylint: disable=all - -import numpy as np -import random - +# pylint: disable=invalid-name, unused-argument import heapq +import numpy as np from nmmo.core import action from nmmo.core.observation import Observation from nmmo.lib import material +from nmmo.systems.ai import utils -from scripted import utils - -def adjacentPos(pos): - r, c = pos - return [(r - 1, c), (r, c - 1), (r + 1, c), (r, c + 1)] def inSight(dr, dc, vision): - return ( - dr >= -vision and - dc >= -vision and - dr <= vision and - dc <= vision) + return (-vision <= dr <= vision and + -vision <= dc <= vision) def rand(config, ob, actions, np_random): - direction = np_random.choice(action.Direction.edges) - actions[action.Move] = {action.Direction: direction} + direction = np_random.choice(action.Direction.edges) + actions[action.Move] = {action.Direction: direction} def towards(direction, np_random): - if direction == (-1, 0): - return action.North - elif direction == (1, 0): - return action.South - elif direction == (0, -1): - return action.West - elif direction == (0, 1): - return action.East - else: - return np_random.choice(action.Direction.edges) + if direction == (-1, 0): + return action.North + if direction == (1, 0): + return action.South + if direction == (0, -1): + return action.West + if direction == (0, 1): + return action.East + + return np_random.choice(action.Direction.edges) def pathfind(config, ob, actions, rr, cc, np_random): - direction = aStar(config, ob, actions, rr, cc) - direction = towards(direction, np_random) - actions[action.Move] = {action.Direction: direction} + direction = aStar(config, ob, actions, rr, cc) + direction = towards(direction, np_random) + actions[action.Move] = {action.Direction: direction} def meander(config, ob, actions, np_random): - cands = [] - if ob.tile(-1, 0).material_id in material.Habitable.indices: - cands.append((-1, 0)) - if ob.tile(1, 0).material_id in material.Habitable.indices: - cands.append((1, 0)) - if ob.tile(0, -1).material_id in material.Habitable.indices: - cands.append((0, -1)) - if ob.tile(0, 1).material_id in material.Habitable.indices: - cands.append((0, 1)) - if not cands: - return (-1, 0) - - direction = np_random.choices(cands)[0] - direction = towards(direction, np_random) - actions[action.Move] = {action.Direction: direction} + cands = [] + if ob.tile(-1, 0).material_id in material.Habitable.indices: + cands.append((-1, 0)) + if ob.tile(1, 0).material_id in material.Habitable.indices: + cands.append((1, 0)) + if ob.tile(0, -1).material_id in material.Habitable.indices: + cands.append((0, -1)) + if ob.tile(0, 1).material_id in material.Habitable.indices: + cands.append((0, 1)) + + if len(cands) > 0: + direction = np_random.choices(cands)[0] + direction = towards(direction, np_random) + actions[action.Move] = {action.Direction: direction} def explore(config, ob, actions, r, c, np_random): - vision = config.PLAYER_VISION_RADIUS - sz = config.MAP_SIZE - - centR, centC = sz//2, sz//2 - - vR, vC = centR-r, centC-c - - mmag = max(1, abs(vR), abs(vC)) - rr = int(np.round(vision*vR/mmag)) - cc = int(np.round(vision*vC/mmag)) - pathfind(config, ob, actions, rr, cc, np_random) + vision = config.PLAYER_VISION_RADIUS + sz = config.MAP_SIZE + centR, centC = sz//2, sz//2 + vR, vC = centR-r, centC-c + mmag = max(1, abs(vR), abs(vC)) + rr = int(np.round(vision*vR/mmag)) + cc = int(np.round(vision*vC/mmag)) + pathfind(config, ob, actions, rr, cc, np_random) def evade(config, ob: Observation, actions, attacker, np_random): - agent = ob.agent() - - rr, cc = (2*agent.row - attacker.row, 2*agent.col - attacker.col) - - pathfind(config, ob, actions, rr, cc, np_random) + agent = ob.agent() + rr, cc = (2*agent.row - attacker.row, 2*agent.col - attacker.col) + pathfind(config, ob, actions, rr, cc, np_random) def forageDijkstra(config, ob: Observation, actions, food_max, water_max, np_random, cutoff=100): - vision = config.PLAYER_VISION_RADIUS - - agent = ob.agent() - food = agent.food - water = agent.water + vision = config.PLAYER_VISION_RADIUS - best = -1000 - start = (0, 0) - goal = (0, 0) + agent = ob.agent() + food = agent.food + water = agent.water - reward = {start: (food, water)} - backtrace = {start: None} + best = -1000 + start = (0, 0) + goal = (0, 0) - queue = [start] + reward = {start: (food, water)} + backtrace = {start: None} - while queue: - cutoff -= 1 - if cutoff <= 0: - break + queue = [start] - cur = queue.pop(0) - for nxt in adjacentPos(cur): - if nxt in backtrace: - continue + while queue: + cutoff -= 1 + if cutoff <= 0: + break - if not inSight(*nxt, vision): - continue + cur = queue.pop(0) + for nxt in utils.adjacentPos(cur): + if nxt in backtrace: + continue - tile = ob.tile(*nxt) - matl = tile.material_id + if not inSight(*nxt, vision): + continue - if not matl in material.Habitable.indices: - continue + tile = ob.tile(*nxt) + matl = tile.material_id - food, water = reward[cur] - food = max(0, food - 1) - water = max(0, water - 1) + if not matl in material.Habitable.indices: + continue - if matl == material.Foilage.index: - food = min(food+food_max//2, food_max) - for pos in adjacentPos(nxt): - if not inSight(*pos, vision): - continue + food, water = reward[cur] + water = max(0, water - 1) + food = max(0, food - 1) + if matl == material.Foilage.index: + food = min(food+food_max//2, food_max) - tile = ob.tile(*pos) - matl = tile.material_id + for pos in utils.adjacentPos(nxt): + if not inSight(*pos, vision): + continue - if matl == material.Water.index: - water = min(water+water_max//2, water_max) - break + tile = ob.tile(*pos) + matl = tile.material_id + if matl == material.Water.index: + water = min(water+water_max//2, water_max) + break - reward[nxt] = (food, water) + reward[nxt] = (food, water) - total = min(food, water) - if total > best or ( - total == best and max(food, water) > max(reward[goal])): - best = total - goal = nxt + total = min(food, water) + if total > best \ + or (total == best and max(food, water) > max(reward[goal])): + best = total + goal = nxt - queue.append(nxt) - backtrace[nxt] = cur + queue.append(nxt) + backtrace[nxt] = cur - while goal in backtrace and backtrace[goal] != start: - goal = backtrace[goal] - direction = towards(goal, np_random) - actions[action.Move] = {action.Direction: direction} + while goal in backtrace and backtrace[goal] != start: + goal = backtrace[goal] + direction = towards(goal, np_random) + actions[action.Move] = {action.Direction: direction} def findResource(config, ob: Observation, resource): - vision = config.PLAYER_VISION_RADIUS - - resource_index = resource.index - - for r in range(-vision, vision+1): - for c in range(-vision, vision+1): - tile = ob.tile(r, c) - material_id = tile.material_id - - if material_id == resource_index: - return (r, c) - - return False + vision = config.PLAYER_VISION_RADIUS + resource_index = resource.index + for r in range(-vision, vision+1): + for c in range(-vision, vision+1): + tile = ob.tile(r, c) + material_id = tile.material_id + if material_id == resource_index: + return (r, c) + return False def gatherAStar(config, ob, actions, resource, np_random, cutoff=100): - resource_pos = findResource(config, ob, resource) - if not resource_pos: - return + resource_pos = findResource(config, ob, resource) + if not resource_pos: + return False - rr, cc = resource_pos - next_pos = aStar(config, ob, actions, rr, cc, cutoff=cutoff) - if not next_pos or next_pos == (0, 0): - return + rr, cc = resource_pos + next_pos = aStar(config, ob, actions, rr, cc, cutoff=cutoff) + if not next_pos or next_pos == (0, 0): + return False - direction = towards(next_pos, np_random) - actions[action.Move] = {action.Direction: direction} - return True + direction = towards(next_pos, np_random) + actions[action.Move] = {action.Direction: direction} + return True def gatherBFS(config, ob: Observation, actions, resource, np_random, cutoff=100): - vision = config.PLAYER_VISION_RADIUS + vision = config.PLAYER_VISION_RADIUS - start = (0, 0) + start = (0, 0) + backtrace = {start: None} + queue = [start] + found = False - backtrace = {start: None} + while queue: + cutoff -= 1 + if cutoff <= 0: + return False - queue = [start] + cur = queue.pop(0) + for nxt in utils.adjacentPos(cur): + if found: + break - found = False - while queue: - cutoff -= 1 - if cutoff <= 0: - return False + if nxt in backtrace: + continue - cur = queue.pop(0) - for nxt in adjacentPos(cur): - if found: - break + if not inSight(*nxt, vision): + continue - if nxt in backtrace: - continue + tile = ob.tile(*nxt) + matl = tile.material_id - if not inSight(*nxt, vision): - continue + if material.Fish in resource and material.Fish.index == matl: + found = nxt + backtrace[nxt] = cur + break - tile = ob.tile(*nxt) - matl = tile.material_id + if not tile.material_id in material.Habitable.indices: + continue - if material.Fish in resource and material.Fish.index == matl: - found = nxt - backtrace[nxt] = cur - break + if matl in (e.index for e in resource): + found = nxt + backtrace[nxt] = cur + break - if not tile.material_id in material.Habitable.indices: - continue + for pos in utils.adjacentPos(nxt): + if not inSight(*pos, vision): + continue - if matl in (e.index for e in resource): - found = nxt - backtrace[nxt] = cur - break + tile = ob.tile(*pos) + matl = tile.material_id - for pos in adjacentPos(nxt): - if not inSight(*pos, vision): - continue + if matl == material.Fish.index: + backtrace[nxt] = cur + break - tile = ob.tile(*pos) - matl = tile.material_id + queue.append(nxt) + backtrace[nxt] = cur - if matl == material.Fish.index: - backtrace[nxt] = cur - break - - queue.append(nxt) - backtrace[nxt] = cur - - #Ran out of tiles - if not found: - return False + #Ran out of tiles + if not found: + return False - found_orig = found - while found in backtrace and backtrace[found] != start: - found = backtrace[found] + while found in backtrace and backtrace[found] != start: + found = backtrace[found] - direction = towards(found, np_random) - actions[action.Move] = {action.Direction: direction} + direction = towards(found, np_random) + actions[action.Move] = {action.Direction: direction} - return True + return True def aStar(config, ob: Observation, actions, rr, cc, cutoff=100): - vision = config.PLAYER_VISION_RADIUS - - start = (0, 0) - goal = (rr, cc) - - if start == goal: - return (0, 0) - - pq = [(0, start)] + vision = config.PLAYER_VISION_RADIUS - backtrace = {} - cost = {start: 0} + start = (0, 0) + goal = (rr, cc) + if start == goal: + return (0, 0) - closestPos = start - closestHeuristic = utils.l1(start, goal) - closestCost = closestHeuristic + pq = [(0, start)] - while pq: - # Use approximate solution if budget exhausted - cutoff -= 1 - if cutoff <= 0: - if goal not in backtrace: - goal = closestPos - break + backtrace = {} + cost = {start: 0} - priority, cur = heapq.heappop(pq) + closestPos = start + closestHeuristic = utils.l1(start, goal) + closestCost = closestHeuristic - if cur == goal: - break + while pq: + # Use approximate solution if budget exhausted + cutoff -= 1 + if cutoff <= 0: + if goal not in backtrace: + goal = closestPos + break - for nxt in adjacentPos(cur): - if not inSight(*nxt, vision): - continue + priority, cur = heapq.heappop(pq) - tile = ob.tile(*nxt) - matl = tile.material_id + if cur == goal: + break - if not matl in material.Habitable.indices: - continue + for nxt in utils.adjacentPos(cur): + if not inSight(*nxt, vision): + continue - #Omitted water from the original implementation. Seems key - if matl in material.Impassible.indices: - continue + tile = ob.tile(*nxt) + matl = tile.material_id - newCost = cost[cur] + 1 - if nxt not in cost or newCost < cost[nxt]: - cost[nxt] = newCost - heuristic = utils.lInfty(goal, nxt) - priority = newCost + heuristic + if not matl in material.Habitable.indices: + continue - # Compute approximate solution - if heuristic < closestHeuristic or ( - heuristic == closestHeuristic and priority < closestCost): - closestPos = nxt - closestHeuristic = heuristic - closestCost = priority + #Omitted water from the original implementation. Seems key + if matl in material.Impassible.indices: + continue - heapq.heappush(pq, (priority, nxt)) - backtrace[nxt] = cur + newCost = cost[cur] + 1 + if nxt not in cost or newCost < cost[nxt]: + cost[nxt] = newCost + heuristic = utils.lInfty(goal, nxt) + priority = newCost + heuristic - #Not needed with scuffed material list above - #if goal not in backtrace: - # goal = closestPos + # Compute approximate solution + if heuristic < closestHeuristic \ + or (heuristic == closestHeuristic and priority < closestCost): + closestPos = nxt + closestHeuristic = heuristic + closestCost = priority - goal = closestPos - while goal in backtrace and backtrace[goal] != start: - goal = backtrace[goal] + heapq.heappush(pq, (priority, nxt)) + backtrace[nxt] = cur - return goal + goal = closestPos + while goal in backtrace and backtrace[goal] != start: + goal = backtrace[goal] + return goal diff --git a/scripted/utils.py b/scripted/utils.py deleted file mode 100644 index 0c7f2af85..000000000 --- a/scripted/utils.py +++ /dev/null @@ -1,30 +0,0 @@ - - -def l1(start, goal): - sr, sc = start - gr, gc = goal - return abs(gr - sr) + abs(gc - sc) - -def l2(start, goal): - sr, sc = start - gr, gc = goal - return 0.5*((gr - sr)**2 + (gc - sc)**2)**0.5 - -def lInfty(start, goal): - sr, sc = start - gr, gc = goal - return max(abs(gr - sr), abs(gc - sc)) - -def adjacentPos(pos): - r, c = pos - return [(r - 1, c), (r, c - 1), (r + 1, c), (r, c + 1)] - -def adjacentDeltas(): - return [(-1, 0), (1, 0), (0, 1), (0, -1)] - -def inSight(dr, dc, vision): - return ( - dr >= -vision and - dc >= -vision and - dr <= vision and - dc <= vision) \ No newline at end of file From 2063c3f32dc5e13e2a1358a650abae5314bd2db4 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 29 Jul 2023 14:12:14 +0900 Subject: [PATCH 082/113] tweaked task spec name --- nmmo/task/task_spec.py | 11 ++++++++--- tests/task/test_task_api.py | 18 ++++++++++-------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/nmmo/task/task_spec.py b/nmmo/task/task_spec.py index 714cd8fde..f1d0c9ad4 100644 --- a/nmmo/task/task_spec.py +++ b/nmmo/task/task_spec.py @@ -59,11 +59,16 @@ def __post_init__(self): @functools.cached_property def name(self): # pylint: disable=no-member - kwargs_str = "".join([f"{key}={str(val)}_" for key, val in self.eval_fn_kwargs.items()]) - kwargs_str = "(" + kwargs_str[:-1] + ")" # remove the last _ + kwargs_str = [] + for key, val in self.eval_fn_kwargs.items(): + val_str = str(val) + if isinstance(val, type): + val_str = val.__name__ + kwargs_str.append(f"{key}:{val_str}_") + kwargs_str = "(" + "".join(kwargs_str)[:-1] + ")" # remove the last _ pred_name = self.eval_fn.__name__ if self.predicate is None else self.predicate.name return "_".join([self.task_cls.__name__, pred_name, - kwargs_str, "reward_to=" + self.reward_to]) + kwargs_str, "reward_to:" + self.reward_to]) def make_task_from_spec(assign_to: Union[Iterable[int], Dict], task_spec: List[TaskSpec]) -> List[Task]: diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 18f9299ab..8b971dcd2 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -292,7 +292,7 @@ def PracticeFormation(gs, subject, dist, num_tick): task = env.tasks[0] self.assertEqual(task.spec_name, # without the subject and assignee agent ids - "Task_PracticeFormation_(dist=1_num_tick=10)_reward_to=team") + "Task_PracticeFormation_(dist:1_num_tick:10)_reward_to:team") self.assertEqual(task.name, "(Task_eval_fn:(PracticeFormation_(1,2,3)_dist:1_num_tick:10)"+ "_assignee:(1,2,3))") @@ -377,13 +377,13 @@ def test_make_task_from_spec(self): # check the task spec names self.assertEqual(task_list[0][0].spec_name, - "Task_TickGE_(num_tick=20)_reward_to=agent") + "Task_TickGE_(num_tick:20)_reward_to:agent") self.assertEqual(task_list[1][0].spec_name, - "OngoingTask_StayAlive_()_reward_to=agent") + "OngoingTask_StayAlive_()_reward_to:agent") self.assertEqual(task_list[2][0].spec_name, - "OngoingTask_StayAlive_(target=my_team_leader)_reward_to=team") + "OngoingTask_StayAlive_(target:my_team_leader)_reward_to:team") self.assertEqual(task_list[3][0].spec_name, - "OngoingTask_StayAlive_(target=left_team)_reward_to=team") + "OngoingTask_StayAlive_(target:left_team)_reward_to:team") # check the task names self.assertEqual(task_list[0][0].name, @@ -423,7 +423,7 @@ def test_hold_duration_task(self): # check the task information self.assertEqual(env.tasks[0].spec_name, - "HoldDurationTask_HoardGold_(amount=10)_reward_to=agent") + "HoldDurationTask_HoardGold_(amount:10)_reward_to:agent") self.assertTrue(env.tasks[0]._progress == 0.5) # agent 1 has enough gold self.assertTrue(env.tasks[1]._progress == 0.5) # agent 2 has enough gold self.assertTrue(env.tasks[2]._progress == 0.0) # agent 3 has no gold @@ -454,7 +454,9 @@ def test_task_spec_with_predicate(self): # make task spec task_spec = [TaskSpec(predicate=predicate, - eval_fn=None, eval_fn_kwargs={"success_target": 1})] + eval_fn=None, + eval_fn_kwargs={"success_target": 1, + "test_item": Item.Hat})] tasks = make_task_from_spec(teams, task_spec) env = Env(ScriptedAgentTestConfig()) @@ -463,7 +465,7 @@ def test_task_spec_with_predicate(self): # check the task information self.assertEqual(env.tasks[0].spec_name, - "Task_SuccessAndFailure_(success_target=1)_reward_to=agent") + "Task_SuccessAndFailure_(success_target:1_test_item:Hat)_reward_to:agent") if __name__ == "__main__": unittest.main() From 1f23c6114e52918b2ac41a5ee147852e43e2d902 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 29 Jul 2023 21:09:15 +0900 Subject: [PATCH 083/113] added task progress reward tracking --- nmmo/task/task_api.py | 7 +++++++ tests/task/test_task_api.py | 14 ++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index e0c9c89cb..a0e615e68 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -27,6 +27,9 @@ def __init__(self, self._eval_fn = eval_fn self._progress = 0.0 self._completed = False + self._max_progress = 0.0 + self._reward_count = 0 + self._reward_multiplier = reward_multiplier self._embedding = embedding self.spec_name = spec_name # None if not created using TaskSpec @@ -36,6 +39,8 @@ def __init__(self, def reset(self): self._progress = 0.0 self._completed = False + self._max_progress = 0.0 + self._reward_count = 0 @property def assignee(self) -> Tuple[int]: @@ -79,6 +84,8 @@ def compute_rewards(self, gs: GameState) -> Tuple[Dict[int, float], Dict[int, Di Returns rewards and infos for all agents in subject """ reward = self._map_progress_to_reward(gs) * self._reward_multiplier + self._max_progress = max(self._max_progress, self._progress) + self._reward_count += int(reward > 0) rewards = {int(ent_id): reward for ent_id in self._assignee} infos = {int(ent_id): {"task_spec": self.spec_name, "reward": reward, diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 8b971dcd2..de8587169 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -424,8 +424,10 @@ def test_hold_duration_task(self): # check the task information self.assertEqual(env.tasks[0].spec_name, "HoldDurationTask_HoardGold_(amount:10)_reward_to:agent") - self.assertTrue(env.tasks[0]._progress == 0.5) # agent 1 has enough gold - self.assertTrue(env.tasks[1]._progress == 0.5) # agent 2 has enough gold + for idx in [0, 1]: + self.assertEqual(env.tasks[idx]._progress, 0.5) # agent 1 & 2 has enough gold + self.assertEqual(env.tasks[idx]._max_progress, 0.5) + self.assertEqual(env.tasks[idx]._reward_count, 5) self.assertTrue(env.tasks[2]._progress == 0.0) # agent 3 has no gold for task in env.tasks: self.assertTrue(task.completed is False) # not completed yet @@ -434,8 +436,12 @@ def test_hold_duration_task(self): env.realm.players[2].gold.update(goal_gold-1) env.step({}) - self.assertTrue(env.tasks[0]._progress == 0.6) # agent 1 has enough gold - self.assertTrue(env.tasks[1]._progress == 0) # agent 2 has not enough gold + self.assertEqual(env.tasks[0]._progress, 0.6) # agent 1 has enough gold + self.assertEqual(env.tasks[0]._max_progress, 0.6) + self.assertEqual(env.tasks[0]._reward_count, 6) + self.assertEqual(env.tasks[1]._progress, 0) # agent 2 has not enough gold + self.assertEqual(env.tasks[1]._max_progress, 0.5) # max values are preserved + self.assertEqual(env.tasks[1]._reward_count, 5) for _ in range(4): env.step({}) From 424c30563d3bfaf9fee21850cb70c2fda38e9e89 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 29 Jul 2023 22:02:05 +0900 Subject: [PATCH 084/113] deepcopy kwargs that are being changed --- nmmo/task/task_spec.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nmmo/task/task_spec.py b/nmmo/task/task_spec.py index f1d0c9ad4..370fbff07 100644 --- a/nmmo/task/task_spec.py +++ b/nmmo/task/task_spec.py @@ -2,6 +2,7 @@ from dataclasses import dataclass, field from typing import Iterable, Dict, List, Union, Type from types import FunctionType +from copy import deepcopy import numpy as np @@ -93,9 +94,9 @@ def make_task_from_spec(assign_to: Union[Iterable[int], Dict], # map local vars to spec attributes reward_to = task_spec[idx].reward_to pred_fn = task_spec[idx].eval_fn - pred_fn_kwargs = task_spec[idx].eval_fn_kwargs + pred_fn_kwargs = deepcopy(task_spec[idx].eval_fn_kwargs) task_cls = task_spec[idx].task_cls - task_kwargs = task_spec[idx].task_kwargs + task_kwargs = deepcopy(task_spec[idx].task_kwargs) task_kwargs["embedding"] = task_spec[idx].embedding # to pass to task_cls task_kwargs["spec_name"] = task_spec[idx].name predicate = task_spec[idx].predicate From 8f0ab122232d938a9f0243cd2298d6a1785f3852 Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 8 Aug 2023 23:31:02 -0700 Subject: [PATCH 085/113] fixed task embedding type, added curriculum-related vars --- nmmo/core/env.py | 6 ++++-- nmmo/task/task_api.py | 41 ++++++++++++++++++++----------------- tests/task/test_task_api.py | 7 ++++--- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 94b9b9e31..bc3a2fca6 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -48,7 +48,7 @@ def __init__(self, # Default task: rewards 1 each turn agent is alive self.tasks = task_api.nmmo_default_task(self.possible_agents) self.agent_task_map = None - self._dummy_task_embedding = np.zeros(self.config.TASK_EMBED_DIM, dtype=np.float32) + self._dummy_task_embedding = np.zeros(self.config.TASK_EMBED_DIM, dtype=np.float16) # curriculum file path, if provided, should exist self.curriculum_file_path = config.CURRICULUM_FILE_PATH @@ -73,7 +73,9 @@ def mask_box(length): "AgentId": gym.spaces.Discrete(self.config.PLAYER_N+1), "Tile": box(self.config.MAP_N_OBS, Tile.State.num_attributes), "Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes), - "Task": gym.spaces.Box(low=-2**15, high=2**15-1, shape=(self.config.TASK_EMBED_DIM,)), + "Task": gym.spaces.Box(low=-2**15, high=2**15-1, + shape=(self.config.TASK_EMBED_DIM,), + dtype=np.float16), } if self.config.ITEM_SYSTEM_ENABLED: diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index a0e615e68..174d9dd60 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -1,8 +1,9 @@ -# pylint: disable=unused-import +# pylint: disable=unused-import,attribute-defined-outside-init from typing import Callable, Iterable, Dict, List, Union, Tuple, Type from types import FunctionType from abc import ABC import inspect +import numpy as np from nmmo.task.group import Group from nmmo.task.game_state import GameState @@ -25,22 +26,19 @@ def __init__(self, assert len(assignee) > 0, "Assignee cannot be empty" self._assignee = tuple(set(assignee)) # dedup self._eval_fn = eval_fn - self._progress = 0.0 - self._completed = False - self._max_progress = 0.0 - self._reward_count = 0 - self._reward_multiplier = reward_multiplier - self._embedding = embedding + self._embedding = None if embedding is None else np.array(embedding, dtype=np.float16) self.spec_name = spec_name # None if not created using TaskSpec self.name = self._make_name(self.__class__.__name__, eval_fn=eval_fn, assignee=self._assignee) + self.reset() def reset(self): self._progress = 0.0 - self._completed = False + self._completed_tick = None self._max_progress = 0.0 - self._reward_count = 0 + self._positive_reward_count = 0 + self._negative_reward_count = 0 @property def assignee(self) -> Tuple[int]: @@ -48,12 +46,16 @@ def assignee(self) -> Tuple[int]: @property def completed(self) -> bool: - return self._completed + return self._completed_tick is not None @property def reward_multiplier(self) -> float: return self._reward_multiplier + @property + def reward_signal_count(self) -> int: + return self._positive_reward_count + self._negative_reward_count + @property def embedding(self): return self._embedding @@ -67,14 +69,14 @@ def _map_progress_to_reward(self, gs: GameState) -> float: Override this function to create a custom reward function """ - if self._completed: + if self.completed: return 0.0 new_progress = max(min(self._eval_fn(gs)*1.0,1.0),0.0) diff = new_progress - self._progress self._progress = new_progress if self._progress >= 1: - self._completed = True + self._completed_tick = gs.current_tick return diff @@ -85,12 +87,13 @@ def compute_rewards(self, gs: GameState) -> Tuple[Dict[int, float], Dict[int, Di """ reward = self._map_progress_to_reward(gs) * self._reward_multiplier self._max_progress = max(self._max_progress, self._progress) - self._reward_count += int(reward > 0) + self._positive_reward_count += int(reward > 0) + self._negative_reward_count += int(reward < 0) rewards = {int(ent_id): reward for ent_id in self._assignee} infos = {int(ent_id): {"task_spec": self.spec_name, "reward": reward, "progress": self._progress, - "completed": self._completed} + "completed": self.completed} for ent_id in self._assignee} # NOTE: tasks do not know whether assignee agents are alive or dead @@ -143,8 +146,8 @@ def _map_progress_to_reward(self, gs: GameState) -> float: However, this task tracks the completion status in the same manner. """ self._progress = max(min(self._eval_fn(gs)*1.0,1.0),0.0) - if self._progress >= 1: - self._completed = True + if self._progress >= 1 and self._completed_tick is None: + self._completed_tick = gs.current_tick return self._progress class HoldDurationTask(Task): @@ -167,7 +170,7 @@ def reset(self): def _map_progress_to_reward(self, gs: GameState) -> float: # pylint: disable=attribute-defined-outside-init - if self._completed: + if self.completed: return 0.0 curr_eval = max(min(self._eval_fn(gs)*1.0,1.0),0.0) @@ -180,8 +183,8 @@ def _map_progress_to_reward(self, gs: GameState) -> float: new_progress = self._timer / self._hold_duration diff = new_progress - self._progress self._progress = new_progress - if self._progress >= 1: - self._completed = True + if self._progress >= 1 and self._completed_tick is None: + self._completed_tick = gs.current_tick return diff diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index de8587169..e2f61572b 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -427,7 +427,7 @@ def test_hold_duration_task(self): for idx in [0, 1]: self.assertEqual(env.tasks[idx]._progress, 0.5) # agent 1 & 2 has enough gold self.assertEqual(env.tasks[idx]._max_progress, 0.5) - self.assertEqual(env.tasks[idx]._reward_count, 5) + self.assertEqual(env.tasks[idx].reward_signal_count, 5) self.assertTrue(env.tasks[2]._progress == 0.0) # agent 3 has no gold for task in env.tasks: self.assertTrue(task.completed is False) # not completed yet @@ -438,10 +438,11 @@ def test_hold_duration_task(self): env.step({}) self.assertEqual(env.tasks[0]._progress, 0.6) # agent 1 has enough gold self.assertEqual(env.tasks[0]._max_progress, 0.6) - self.assertEqual(env.tasks[0]._reward_count, 6) + self.assertEqual(env.tasks[0].reward_signal_count, 6) self.assertEqual(env.tasks[1]._progress, 0) # agent 2 has not enough gold self.assertEqual(env.tasks[1]._max_progress, 0.5) # max values are preserved - self.assertEqual(env.tasks[1]._reward_count, 5) + self.assertEqual(env.tasks[1]._positive_reward_count, 5) + self.assertEqual(env.tasks[1].reward_signal_count, 6) # 5 positive + 1 negative for _ in range(4): env.step({}) From ee80177399cba04b815b087ebb98c46cd0746b21 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 9 Aug 2023 15:28:49 -0700 Subject: [PATCH 086/113] fixed potential 1-off obs bug --- nmmo/core/observation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 7bf931a4b..640a693d3 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -381,7 +381,7 @@ def _make_buy_mask(self): if self.inventory.len >= self.config.ITEM_INVENTORY_CAPACITY: exist_ammo_listings = self._existing_ammo_listings() if not np.any(exist_ammo_listings): - return np.zeros(self.config.MARKET_N_OBS, dtype=np.int8) + return buy_mask not_mine &= exist_ammo_listings enough_gold = market_items[:,ItemState.State.attr_name_to_col["listed_price"]] <= agent.gold From f3225fd098a844552af83c37343cb72dc2cfded2 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 9 Aug 2023 22:54:46 -0700 Subject: [PATCH 087/113] comment out the ItemState empty assert for now --- nmmo/core/realm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nmmo/core/realm.py b/nmmo/core/realm.py index 6cefb19f5..ef662c40d 100644 --- a/nmmo/core/realm.py +++ b/nmmo/core/realm.py @@ -87,8 +87,9 @@ def reset(self, np_random, map_id: int = None): self.npcs.reset(self._np_random) assert EntityState.State.table(self.datastore).is_empty(), \ "EntityState table is not empty" - assert ItemState.State.table(self.datastore).is_empty(), \ - "ItemState table is not empty" + # TODO: fix the item leak, then uncomment the below -- print out the table? + # assert ItemState.State.table(self.datastore).is_empty(), \ + # "ItemState table is not empty" # DataStore id allocator must be reset to be deterministic EntityState.State.table(self.datastore).reset() From e1d318bdfdabf869265547b8688351281d831271 Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 10 Aug 2023 00:04:22 -0700 Subject: [PATCH 088/113] added task progress info --- nmmo/core/env.py | 11 ++++++----- nmmo/task/task_api.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index bc3a2fca6..370a90688 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -475,11 +475,12 @@ def _compute_rewards(self): # Compute Rewards and infos self.game_state = self._gamestate_generator.generate(self.realm, self.obs) for task in self.tasks: - task_rewards, task_infos = task.compute_rewards(self.game_state) - for agent_id, reward in task_rewards.items(): - if agent_id in agents: - rewards[agent_id] = rewards.get(agent_id,0) + reward - infos[agent_id]['task'][task.name] = task_infos[agent_id] # progress + if agents.intersection(task.assignee): # evaluate only if the agents are current + task_rewards, task_infos = task.compute_rewards(self.game_state) + for agent_id, reward in task_rewards.items(): + if agent_id in agents: + rewards[agent_id] = rewards.get(agent_id,0) + reward + infos[agent_id]['task'][task.name] = task_infos[agent_id] # include progress, etc # Make sure the dead agents return the rewards of -1 for agent_id in self._dead_this_tick: diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 174d9dd60..20d1e6ca2 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -34,6 +34,7 @@ def __init__(self, self.reset() def reset(self): + self._last_eval_tick = None self._progress = 0.0 self._completed_tick = None self._max_progress = 0.0 @@ -86,6 +87,7 @@ def compute_rewards(self, gs: GameState) -> Tuple[Dict[int, float], Dict[int, Di Returns rewards and infos for all agents in subject """ reward = self._map_progress_to_reward(gs) * self._reward_multiplier + self._last_eval_tick = gs.current_tick self._max_progress = max(self._max_progress, self._progress) self._positive_reward_count += int(reward > 0) self._negative_reward_count += int(reward < 0) @@ -140,6 +142,19 @@ def kwargs(self): # the function _eval_fn must only take gs return {} + @property + def progress_info(self): + return { + "task_spec_name": self.spec_name, + "last_eval_tick": self._last_eval_tick, + "completed": self.completed, + "completed_tick": self._completed_tick, + "max_progress": self._max_progress, + "positive_reward_count": self._positive_reward_count, + "negative_reward_count": self._negative_reward_count, + "reward_signal_count": self.reward_signal_count, + } + class OngoingTask(Task): def _map_progress_to_reward(self, gs: GameState) -> float: """Keep returning the progress reward after the task is completed. From 4fe3b7833ae6aafd5aa189e727661779ee12aee5 Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 10 Aug 2023 00:05:21 -0700 Subject: [PATCH 089/113] removed config.SPECIALIZE for scripted agents --- scripted/baselines.py | 16 ++++++++-------- tests/core/test_env.py | 1 - tests/testhelpers.py | 1 - 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/scripted/baselines.py b/scripted/baselines.py index adfb5ca88..860c105e0 100644 --- a/scripted/baselines.py +++ b/scripted/baselines.py @@ -464,42 +464,42 @@ def __call__(self, obs): class Fisher(Gather): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.PROFESSION_SYSTEM_ENABLED: self.resource = [material.Fish] self.tool = item_system.Rod class Herbalist(Gather): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.PROFESSION_SYSTEM_ENABLED: self.resource = [material.Herb] self.tool = item_system.Gloves class Prospector(Gather): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.PROFESSION_SYSTEM_ENABLED: self.resource = [material.Ore] self.tool = item_system.Pickaxe class Carver(Gather): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.PROFESSION_SYSTEM_ENABLED: self.resource = [material.Tree] self.tool = item_system.Axe class Alchemist(Gather): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.PROFESSION_SYSTEM_ENABLED: self.resource = [material.Crystal] self.tool = item_system.Chisel class Melee(Combat): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.COMBAT_SYSTEM_ENABLED: self.style = [action.Melee] self.weapon = item_system.Spear self.ammo = item_system.Whetstone @@ -507,7 +507,7 @@ def __init__(self, config, idx): class Range(Combat): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.COMBAT_SYSTEM_ENABLED: self.style = [action.Range] self.weapon = item_system.Bow self.ammo = item_system.Arrow @@ -515,7 +515,7 @@ def __init__(self, config, idx): class Mage(Combat): def __init__(self, config, idx): super().__init__(config, idx) - if config.SPECIALIZE: + if config.COMBAT_SYSTEM_ENABLED: self.style = [action.Mage] self.weapon = item_system.Wand self.ammo = item_system.Runes diff --git a/tests/core/test_env.py b/tests/core/test_env.py index 2c647f39a..dccca541b 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -21,7 +21,6 @@ RANDOM_SEED = random.randint(0, 10000) class Config(nmmo.config.Small, nmmo.config.AllGameSystems): - SPECIALIZE = True PLAYERS = [ baselines.Fisher, baselines.Herbalist, baselines.Prospector, baselines.Carver, baselines.Alchemist, diff --git a/tests/testhelpers.py b/tests/testhelpers.py index e4bd7e0d0..9fdb20877 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -119,7 +119,6 @@ class ScriptedAgentTestConfig(nmmo.config.Small, nmmo.config.AllGameSystems): PLAYER_DEATH_FOG = 5 - SPECIALIZE = True PLAYERS = [ baselines.Fisher, baselines.Herbalist, baselines.Prospector,baselines.Carver, baselines.Alchemist, From 4585870ddcb7a47d12cefa43eacdd009548bbab8 Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 10 Aug 2023 16:47:34 -0700 Subject: [PATCH 090/113] moved check_task_spec to nmmo.task.task_spec --- nmmo/lib/log.py | 3 -- nmmo/task/task_spec.py | 20 ++++++++++++ tests/task/test_manual_curriculum.py | 46 +++++++++------------------- 3 files changed, 34 insertions(+), 35 deletions(-) diff --git a/nmmo/lib/log.py b/nmmo/lib/log.py index 6ee72296e..8ee6c77d7 100644 --- a/nmmo/lib/log.py +++ b/nmmo/lib/log.py @@ -35,9 +35,6 @@ def log_max(self, key, val): return True -# CHECK ME: Is this a good place to put here? -# EventCode is used in many places, and I(kywch)'m putting it here -# to avoid a circular import, which happened a few times with event_log.py class EventCode: # Move EAT_FOOD = 1 diff --git a/nmmo/task/task_spec.py b/nmmo/task/task_spec.py index 370fbff07..0c33267fd 100644 --- a/nmmo/task/task_spec.py +++ b/nmmo/task/task_spec.py @@ -6,6 +6,7 @@ import numpy as np +import nmmo from nmmo.task.task_api import Task, make_same_task from nmmo.task.predicate_api import Predicate, make_predicate from nmmo.task.group import Group @@ -143,3 +144,22 @@ def make_task_from_spec(assign_to: Union[Iterable[int], Dict], for agent_id in agent_list] return tasks + +# pylint: disable=bare-except,cell-var-from-loop +def check_task_spec(spec_list: List[TaskSpec]) -> List[Dict]: + teams = {0: [1, 2, 3], 3: [4, 5], 7: [6, 7], 11: [8, 9], 14: [10, 11]} + config = nmmo.config.Default() + env = nmmo.Env(config) + results = [] + for single_spec in spec_list: + result = {"spec_name": single_spec.name} + try: + env.reset(make_task_fn=lambda: make_task_from_spec(teams, [single_spec])) + for _ in range(3): + env.step({}) + result["runnable"] = True + except: + result["runnable"] = False + + results.append(result) + return results diff --git a/tests/task/test_manual_curriculum.py b/tests/task/test_manual_curriculum.py index f95d92348..c8d3563ab 100644 --- a/tests/task/test_manual_curriculum.py +++ b/tests/task/test_manual_curriculum.py @@ -3,12 +3,11 @@ # pylint: disable=wildcard-import,unused-wildcard-import from typing import List -import nmmo import nmmo.lib.material as m from nmmo.task.base_predicates import * from nmmo.task.task_api import OngoingTask from nmmo.task import constraint as c -from nmmo.task.task_spec import TaskSpec, make_task_from_spec +from nmmo.task.task_spec import TaskSpec, check_task_spec EVENT_NUMBER_GOAL = [3, 4, 5, 7, 9, 12, 15, 20, 30, 50] INFREQUENT_GOAL = list(range(1, 10)) @@ -295,7 +294,6 @@ def PracticeInventoryManagement(gs, subject, space, num_tick): if __name__ == '__main__': - # pylint: disable=bare-except import psutil from contextlib import contextmanager import multiprocessing as mp @@ -309,36 +307,20 @@ def create_pool(num_proc): pool.close() pool.join() - def check_task_spec(spec_list): - teams = {0:[1,2,3], 1:[4,5], 2:[6,7], 3:[8,9], 4:[10,11]} - config = nmmo.config.Default() - env = nmmo.Env(config) - for idx, single_spec in enumerate(spec_list): - # pylint: disable=cell-var-from-loop - test_task = make_task_from_spec(teams, [single_spec]) - try: - env.reset(make_task_fn=lambda: test_task) - for _ in range(3): - env.step({}) - except: - print('invalid task spec:', single_spec) - - if idx > 0 and idx % 50 == 0: - print(idx, 'task specs checked.') - # 3495 task specs: divide the specs into chunks - num_cores = psutil.cpu_count(logical=False) - spec_chunks = np.array_split(task_spec, num_cores) - with create_pool(num_cores) as pool: - pool.map(check_task_spec, spec_chunks) - - # print(sample_task[0].name) - # if len(sample_task) > 1: - # print(sample_task[-1].name) - - # for now, we only use the 1535 tasks with reward_to=agent - flt_spec = [spec for spec in task_spec if spec.reward_to == 'agent'] + num_workers = round(psutil.cpu_count(logical=False)*0.7) + spec_chunks = np.array_split(task_spec, num_workers) + with create_pool(num_workers) as pool: + chunk_results = pool.map(check_task_spec, spec_chunks) + + num_error = 0 + for results in chunk_results: + for result in results: + if result["runnable"] is False: + print("ERROR: ", result["spec_name"]) + num_error += 1 + print("Total number of errors: ", num_error) # test if the task spec is pickalable with open('sample_curriculum.pkl', 'wb') as f: - dill.dump(flt_spec, f) + dill.dump(task_spec, f) From 0144877aaf626a3ad44bd50ef9fd82141f17ba5e Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 11 Aug 2023 09:53:22 -0700 Subject: [PATCH 091/113] give out the max reward when the task is accomplished --- nmmo/task/task_api.py | 2 ++ tests/task/test_task_api.py | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index 20d1e6ca2..bcef741e8 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -78,6 +78,7 @@ def _map_progress_to_reward(self, gs: GameState) -> float: self._progress = new_progress if self._progress >= 1: self._completed_tick = gs.current_tick + diff = 1.0 # give out the max reward when task is completed return diff @@ -200,6 +201,7 @@ def _map_progress_to_reward(self, gs: GameState) -> float: self._progress = new_progress if self._progress >= 1 and self._completed_tick is None: self._completed_tick = gs.current_tick + diff = 1.0 # give out the max reward when task is completed return diff diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index e2f61572b..a85450b99 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -273,7 +273,8 @@ def PracticeFormation(gs, subject, dist, num_tick): _, rewards, _, infos = env.step({}) if tick < 10: - self.assertAlmostEqual(rewards[1], 1/goal_tick) + target_reward = 1.0 if env.realm.tick == goal_tick else 1/goal_tick + self.assertAlmostEqual(rewards[1], target_reward) self.assertAlmostEqual((1+tick)/goal_tick, infos[1]["task"][env.tasks[0].name]["progress"]) else: @@ -283,7 +284,7 @@ def PracticeFormation(gs, subject, dist, num_tick): self.assertEqual(infos[1]["task"][env.tasks[0].name]["completed"], True) # test the task_spec_with_embedding - task_embedding = np.ones(config.TASK_EMBED_DIM, dtype=np.float32) + task_embedding = np.ones(config.TASK_EMBED_DIM, dtype=np.float16) task_spec_with_embedding = TaskSpec(eval_fn=PracticeFormation, eval_fn_kwargs={"dist": 1, "num_tick": goal_tick}, reward_to="team", From c28b9c42675cf2ae2c9a12e932948fe4ad520129 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 16 Aug 2023 11:31:32 -0700 Subject: [PATCH 092/113] changed ActionTarget key type to str --- nmmo/core/env.py | 12 ++++---- nmmo/core/observation.py | 48 +++++++++++++++--------------- tests/action/test_ammo_use.py | 16 +++++----- tests/action/test_monkey_action.py | 2 +- tests/core/test_env.py | 13 ++++---- tests/core/test_gym_obs_spaces.py | 7 +++-- tests/testhelpers.py | 6 ++-- 7 files changed, 54 insertions(+), 50 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 370a90688..387665c51 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -1,7 +1,7 @@ import functools from typing import Any, Dict, List, Callable from collections import defaultdict -from copy import copy, deepcopy +from copy import copy import dill import gym @@ -85,11 +85,13 @@ def mask_box(length): obs_space["Market"] = box(self.config.MARKET_N_OBS, Item.State.num_attributes) if self.config.PROVIDE_ACTION_TARGETS: - mask_spec = deepcopy(self._atn_space) - for atn in mask_spec: + mask_spec = {} # deepcopy(self._atn_space) + for atn in self._atn_space: + tmp_mask = {} for arg in atn.edges: - mask_spec[atn][arg] = mask_box(mask_spec[atn][arg].n) - obs_space['ActionTargets'] = mask_spec + tmp_mask[arg.__name__] = mask_box(self._atn_space[atn][arg].n) + mask_spec[atn.__name__] = gym.spaces.Dict(tmp_mask) + obs_space['ActionTargets'] = gym.spaces.Dict(mask_spec) return gym.spaces.Dict(obs_space) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 640a693d3..29d24b304 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -160,48 +160,48 @@ def to_gym(self): def _make_action_targets(self): masks = {} - masks[action.Move] = { - action.Direction: self._make_move_mask() + masks["Move"] = { + "Direction": self._make_move_mask() } if self.config.COMBAT_SYSTEM_ENABLED: # Test below. see tests/core/test_observation_tile.py, test_action_target_consts() # assert len(action.Style.edges) == 3 - masks[action.Attack] = { - action.Style: np.zeros(3, dtype=np.int8) if self.dummy_obs\ - else np.ones(3, dtype=np.int8), - action.Target: self._make_attack_mask() + masks["Attack"] = { + "Style": np.zeros(3, dtype=np.int8) if self.dummy_obs\ + else np.ones(3, dtype=np.int8), + "Target": self._make_attack_mask() } if self.config.ITEM_SYSTEM_ENABLED: - masks[action.Use] = { - action.InventoryItem: self._make_use_mask() + masks["Use"] = { + "InventoryItem": self._make_use_mask() } - masks[action.Give] = { - action.InventoryItem: self._make_sell_mask(), - action.Target: self._make_give_target_mask() + masks["Give"] = { + "InventoryItem": self._make_sell_mask(), + "Target": self._make_give_target_mask() } - masks[action.Destroy] = { - action.InventoryItem: self._make_destroy_item_mask() + masks["Destroy"] = { + "InventoryItem": self._make_destroy_item_mask() } if self.config.EXCHANGE_SYSTEM_ENABLED: - masks[action.Sell] = { - action.InventoryItem: self._make_sell_mask(), - action.Price: np.zeros(self.config.PRICE_N_OBS, dtype=np.int8) if self.dummy_obs\ - else np.ones(self.config.PRICE_N_OBS, dtype=np.int8) + masks["Sell"] = { + "InventoryItem": self._make_sell_mask(), + "Price": np.zeros(self.config.PRICE_N_OBS, dtype=np.int8) if self.dummy_obs\ + else np.ones(self.config.PRICE_N_OBS, dtype=np.int8) } - masks[action.Buy] = { - action.MarketItem: self._make_buy_mask() + masks["Buy"] = { + "MarketItem": self._make_buy_mask() } - masks[action.GiveGold] = { - action.Target: self._make_give_target_mask(), - action.Price: self._make_give_gold_mask() # reusing Price + masks["GiveGold"] = { + "Target": self._make_give_target_mask(), + "Price": self._make_give_gold_mask() # reusing Price } if self.config.COMMUNICATION_SYSTEM_ENABLED: - masks[action.Comm] = { - action.Token:\ + masks["Comm"] = { + "Token":\ np.zeros(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8) if self.dummy_obs\ else np.ones(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8) } diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py index 58b45c9bc..6e85b7d55 100644 --- a/tests/action/test_ammo_use.py +++ b/tests/action/test_ammo_use.py @@ -25,10 +25,10 @@ def setUpClass(cls): logging.basicConfig(filename=LOGFILE, level=logging.INFO) def _assert_action_targets_zero(self, gym_obs): - mask = np.sum(gym_obs['ActionTargets'][action.GiveGold][action.Price]) \ - + np.sum(gym_obs['ActionTargets'][action.Buy][action.MarketItem]) + mask = np.sum(gym_obs["ActionTargets"]["GiveGold"]["Price"]) \ + + np.sum(gym_obs["ActionTargets"]["Buy"]["MarketItem"]) for atn in [action.Use, action.Give, action.Destroy, action.Sell]: - mask += np.sum(gym_obs['ActionTargets'][atn][action.InventoryItem]) + mask += np.sum(gym_obs["ActionTargets"][atn.__name__]["InventoryItem"]) # If MarketItem and InventoryTarget have no-action flags, these sum up to 5 self.assertEqual(mask, 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) @@ -49,7 +49,7 @@ def test_ammo_fire_all(self): ItemState.parse_array(inventory.values[inv_idx]).equipped) # check SELL InventoryItem mask -- one cannot sell equipped item - mask = gym_obs['ActionTargets'][action.Sell][action.InventoryItem][:inventory.len] > 0 + mask = gym_obs["ActionTargets"]["Sell"]["InventoryItem"][:inventory.len] > 0 self.assertTrue(inventory.id(inv_idx) not in inventory.ids[mask]) # the agents must not be in combat status @@ -157,15 +157,15 @@ def test_cannot_use_listed_items(self): self.assertTrue(item_info.id in env.obs[ent_id].market.ids) # check SELL InventoryItem mask -- one cannot sell listed item - mask = gym_obs['ActionTargets'][action.Sell][action.InventoryItem][:inventory.len] > 0 + mask = gym_obs["ActionTargets"]["Sell"]["InventoryItem"][:inventory.len] > 0 self.assertTrue(inventory.id(inv_idx) not in inventory.ids[mask]) # check USE InventoryItem mask -- one cannot use listed item - mask = gym_obs['ActionTargets'][action.Use][action.InventoryItem][:inventory.len] > 0 + mask = gym_obs["ActionTargets"]["Use"]["InventoryItem"][:inventory.len] > 0 self.assertTrue(inventory.id(inv_idx) not in inventory.ids[mask]) # check BUY MarketItem mask -- there should be two ammo items in the market - mask = gym_obs['ActionTargets'][action.Buy][action.MarketItem][:inventory.len] > 0 + mask = gym_obs["ActionTargets"]["Buy"]["MarketItem"][:inventory.len] > 0 # agent 1 has inventory space if ent_id == 1: self.assertTrue(sum(mask) == 2) # agent 2's inventory is full but can buy level-0 whetstone (existing ammo) @@ -243,7 +243,7 @@ def sig_int_tuple(sig): if ent_id == 1: gym_obs = env.obs[ent_id].to_gym() # check USE InventoryItem mask - mask = gym_obs['ActionTargets'][action.Use][action.InventoryItem][:inv_obs.len] > 0 + mask = gym_obs["ActionTargets"]["Use"]["InventoryItem"][:inv_obs.len] > 0 # level-2 melee should be able to use level-0, level-1 whetstone but not level-3 self.assertTrue(inv_obs.id(inv_obs.sig(*wstone_lvl0)) in inv_obs.ids[mask]) self.assertTrue(inv_obs.id(inv_obs.sig(*wstone_lvl1)) in inv_obs.ids[mask]) diff --git a/tests/action/test_monkey_action.py b/tests/action/test_monkey_action.py index 9b5d2e2c3..df85ced49 100644 --- a/tests/action/test_monkey_action.py +++ b/tests/action/test_monkey_action.py @@ -21,7 +21,7 @@ def make_random_actions(config, ent_obs): for atn in sorted(nmmo.Action.edges(config)): actions[atn] = {} for arg in sorted(atn.edges, reverse=True): # intentionally doing wrong - mask = ent_obs['ActionTargets'][atn][arg] + mask = ent_obs["ActionTargets"][atn.__name__][arg.__name__] actions[atn][arg] = 0 if np.any(mask): actions[atn][arg] += int(np.random.choice(np.where(mask)[0])) diff --git a/tests/core/test_env.py b/tests/core/test_env.py index dccca541b..c232df1bc 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -10,7 +10,6 @@ from nmmo.core.tile import TileState from nmmo.entity.entity import Entity, EntityState from nmmo.systems.item import ItemState -from nmmo.core import action from scripted import baselines # Allow private access for testing @@ -60,12 +59,12 @@ def test_observations(self): self._validate_market(player_obs, self.env.realm) else: # the obs of dead agents are dummy, all zeros - self.assertEqual(np.sum(player_obs['Tile']), 0) - self.assertEqual(np.sum(player_obs['Entity']), 0) - self.assertEqual(np.sum(player_obs['Inventory']), 0) - self.assertEqual(np.sum(player_obs['Market']), 0) - self.assertEqual(np.sum(player_obs['ActionTargets'][action.Move][action.Direction]), 0) - self.assertEqual(np.sum(player_obs['ActionTargets'][action.Attack][action.Style]), 0) + self.assertEqual(np.sum(player_obs["Tile"]), 0) + self.assertEqual(np.sum(player_obs["Entity"]), 0) + self.assertEqual(np.sum(player_obs["Inventory"]), 0) + self.assertEqual(np.sum(player_obs["Market"]), 0) + self.assertEqual(np.sum(player_obs["ActionTargets"]["Move"]["Direction"]), 0) + self.assertEqual(np.sum(player_obs["ActionTargets"]["Attack"]["Style"]), 0) obs, rewards, dones, infos = self.env.step({}) diff --git a/tests/core/test_gym_obs_spaces.py b/tests/core/test_gym_obs_spaces.py index 919bf513f..50638bbfe 100644 --- a/tests/core/test_gym_obs_spaces.py +++ b/tests/core/test_gym_obs_spaces.py @@ -18,8 +18,11 @@ def _test_gym_obs_space(self, env): for atn in nmmo.Action.edges(env.config): if atn.enabled(env.config): for arg in atn.edges: # pylint: disable=not-an-iterable - self.assertTrue(obs_spec['ActionTargets'][atn][arg].contains(val[atn][arg]), - f"Invalid obs format -- key: {atn}/{arg}, val: {val[atn][arg]}") + mask_spec = obs_spec['ActionTargets'][atn.__name__][arg.__name__] + mask_val = val[atn.__name__][arg.__name__] + self.assertTrue(mask_spec.contains(mask_val), + "Invalid obs format -- " + \ + f"key: {atn.__name__}/{arg.__name__}, val: {mask_val}") def test_env_without_noop(self): config = nmmo.config.Default() diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 9fdb20877..2ff45c3fe 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -266,21 +266,21 @@ def _setup_env(self, random_seed, check_assert=True): def _check_ent_mask(self, ent_obs, atn, target_id): assert atn in [action.Give, action.GiveGold], "Invalid action" gym_obs = ent_obs.to_gym() - mask = gym_obs['ActionTargets'][atn][action.Target][:ent_obs.entities.len] > 0 + mask = gym_obs["ActionTargets"][atn.__name__]["Target"][:ent_obs.entities.len] > 0 return target_id in ent_obs.entities.ids[mask] def _check_inv_mask(self, ent_obs, atn, item_sig): assert atn in [action.Destroy, action.Give, action.Sell, action.Use], "Invalid action" gym_obs = ent_obs.to_gym() - mask = gym_obs['ActionTargets'][atn][action.InventoryItem][:ent_obs.inventory.len] > 0 + mask = gym_obs["ActionTargets"][atn.__name__]["InventoryItem"][:ent_obs.inventory.len] > 0 inv_idx = ent_obs.inventory.sig(*item_sig) return ent_obs.inventory.id(inv_idx) in ent_obs.inventory.ids[mask] def _check_mkt_mask(self, ent_obs, item_id): gym_obs = ent_obs.to_gym() - mask = gym_obs['ActionTargets'][action.Buy][action.MarketItem][:ent_obs.market.len] > 0 + mask = gym_obs["ActionTargets"]["Buy"]["MarketItem"][:ent_obs.market.len] > 0 return item_id in ent_obs.market.ids[mask] From cc2632cf6805c2e868cfca2d05b73644f870345d Mon Sep 17 00:00:00 2001 From: kywch Date: Fri, 18 Aug 2023 15:15:52 -0700 Subject: [PATCH 093/113] actions support both str and obj keys --- nmmo/core/env.py | 37 +++++++++++++++++++----------- tests/action/test_monkey_action.py | 17 +++++++++++--- tests/core/test_env.py | 3 ++- 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 387665c51..4f1cf7152 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -1,7 +1,7 @@ import functools from typing import Any, Dict, List, Callable from collections import defaultdict -from copy import copy +from copy import copy, deepcopy import dill import gym @@ -85,13 +85,11 @@ def mask_box(length): obs_space["Market"] = box(self.config.MARKET_N_OBS, Item.State.num_attributes) if self.config.PROVIDE_ACTION_TARGETS: - mask_spec = {} # deepcopy(self._atn_space) - for atn in self._atn_space: - tmp_mask = {} - for arg in atn.edges: - tmp_mask[arg.__name__] = mask_box(self._atn_space[atn][arg].n) - mask_spec[atn.__name__] = gym.spaces.Dict(tmp_mask) - obs_space['ActionTargets'] = gym.spaces.Dict(mask_spec) + mask_spec = deepcopy(self._atn_space) + for atn_str in mask_spec: + for arg_str in mask_spec[atn_str]: + mask_spec[atn_str][arg_str] = mask_box(self._atn_space[atn_str][arg_str].n) + obs_space["ActionTargets"] = mask_spec return gym.spaces.Dict(obs_space) @@ -113,13 +111,23 @@ def _atn_space(self): actions = {} for atn in sorted(nmmo.Action.edges(self.config)): if atn.enabled(self.config): - actions[atn] = {} + actions[atn.__name__] = {} # use the string key for arg in sorted(atn.edges): n = arg.N(self.config) - actions[atn][arg] = gym.spaces.Discrete(n) - actions[atn] = gym.spaces.Dict(actions[atn]) + actions[atn.__name__][arg.__name__] = gym.spaces.Discrete(n) + actions[atn.__name__] = gym.spaces.Dict(actions[atn.__name__]) return gym.spaces.Dict(actions) + @functools.cached_property + def _str_atn_map(self): + '''Map action and argument names to their corresponding objects''' + str_map = {} + for atn in nmmo.Action.edges(self.config): + str_map[atn.__name__] = atn + for arg in atn.edges: + str_map[arg.__name__] = arg + return str_map + # pylint: disable=method-cache-max-size-none @functools.lru_cache(maxsize=None) def action_space(self, agent: AgentID): @@ -368,15 +376,16 @@ def _validate_actions(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): validated_actions[ent_id] = {} - for atn, args in sorted(atns.items()): + for atn_key, args in sorted(atns.items()): action_valid = True deserialized_action = {} - + atn = self._str_atn_map[atn_key] if isinstance(atn_key, str) else atn_key if not atn.enabled(self.config): action_valid = False break - for arg, val in sorted(args.items()): + for arg_key, val in sorted(args.items()): + arg = self._str_atn_map[arg_key] if isinstance(arg_key, str) else arg_key obj = arg.deserialize(self.realm, entity, val, self.obs[ent_id]) if obj is None: action_valid = False diff --git a/tests/action/test_monkey_action.py b/tests/action/test_monkey_action.py index df85ced49..2c2f6da92 100644 --- a/tests/action/test_monkey_action.py +++ b/tests/action/test_monkey_action.py @@ -29,7 +29,7 @@ def make_random_actions(config, ent_obs): return actions # CHECK ME: this would be nice to include in the env._validate_actions() -def filter_item_actions(actions): +def filter_item_actions(actions, use_str_key=False): # when there are multiple actions on the same item, select one flt_atns = {} inventory_atn = {} # key: inventory idx, val: action @@ -52,6 +52,15 @@ def filter_item_actions(actions): else: flt_atns[atns[0][0]] = atns[0][1] + # convert action keys to str + if use_str_key: + str_atns = {} + for atn, args in flt_atns.items(): + str_atns[atn.__name__] = {} + for arg, val in args.items(): + str_atns[atn.__name__][arg.__name__] = val + flt_atns = str_atns + return flt_atns @@ -63,7 +72,7 @@ def setUpClass(cls): @staticmethod # NOTE: this can also be used for sweeping random seeds - def rollout_with_seed(config, seed): + def rollout_with_seed(config, seed, use_str_key=False): env = ScriptedAgentTestEnv(config) obs = env.reset(seed=seed) @@ -72,7 +81,7 @@ def rollout_with_seed(config, seed): actions = {} for ent_id in env.realm.players: ent_atns = make_random_actions(config, obs[ent_id]) - actions[ent_id] = filter_item_actions(ent_atns) + actions[ent_id] = filter_item_actions(ent_atns, use_str_key) obs, _, _, _ = env.step(actions) def test_monkey_action(self): @@ -81,6 +90,8 @@ def test_monkey_action(self): except: # pylint: disable=bare-except assert False, f"Monkey action failed. seed: {RANDOM_SEED}" + def test_monkey_action_with_str_key(self): + self.rollout_with_seed(self.config, RANDOM_SEED, use_str_key=True) if __name__ == '__main__': unittest.main() diff --git a/tests/core/test_env.py b/tests/core/test_env.py index c232df1bc..bb81a0eca 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -33,9 +33,10 @@ def setUpClass(cls): def test_action_space(self): action_space = self.env.action_space(0) + atn_str_keys = set(atn.__name__ for atn in nmmo.Action.edges(self.config)) self.assertSetEqual( set(action_space.keys()), - set(nmmo.Action.edges(self.config))) + atn_str_keys) def test_observations(self): obs = self.env.reset() From f3798001e6987c1d623d8199509184ebed395027 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 19 Aug 2023 20:51:22 -0700 Subject: [PATCH 094/113] clear obs cache to allow garbage collection --- nmmo/core/env.py | 5 +++++ nmmo/core/observation.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 4f1cf7152..1ff1dfba1 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -349,6 +349,11 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): else: dones[agent_id] = False + # Clean up unnecessary observations, which cause memory leaks + for agent_id in self.obs: + # pylint: disable=unnecessary-dunder-call + self.obs[agent_id].__del__() # clear the lru_cache + # Store the observations, since actions reference them self.obs = self._compute_observations() gym_obs = {a: o.to_gym() for a,o in self.obs.items()} diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 29d24b304..c306fca92 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -117,6 +117,12 @@ def entity(self, entity_id): def agent(self): return self.entity(self.agent_id) + def __del__(self): + # clear the cache, so that this object can be garbage collected + self.agent.cache_clear() + self.entity.cache_clear() + self.tile.cache_clear() + def get_empty_obs(self): gym_obs = { "CurrentTick": self.current_tick, From e9ec8f4cffa0462c143f710ec7a54c630edd03d0 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 19 Aug 2023 21:20:23 -0700 Subject: [PATCH 095/113] clean up obs during reset too --- nmmo/core/env.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index 1ff1dfba1..fdb3944f3 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -349,11 +349,6 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]): else: dones[agent_id] = False - # Clean up unnecessary observations, which cause memory leaks - for agent_id in self.obs: - # pylint: disable=unnecessary-dunder-call - self.obs[agent_id].__del__() # clear the lru_cache - # Store the observations, since actions reference them self.obs = self._compute_observations() gym_obs = {a: o.to_gym() for a,o in self.obs.items()} @@ -426,6 +421,12 @@ def _make_dummy_obs(self): dummy_tiles, dummy_entities, dummy_inventory, dummy_market) def _compute_observations(self): + # Clean up unnecessary observations, which cause memory leaks + if self.obs is not None: + for agent_obs in self.obs.values(): + # pylint: disable=unnecessary-dunder-call + agent_obs.__del__() # clear the lru_cache + obs = {} market = Item.Query.for_sale(self.realm.datastore) From fb59d16c39ecbce28c316e7996c9f0ecd2896df6 Mon Sep 17 00:00:00 2001 From: kywch Date: Sun, 20 Aug 2023 00:35:46 -0700 Subject: [PATCH 096/113] caught task-related obj leaks --- nmmo/core/env.py | 20 +++++++++++--- nmmo/core/observation.py | 2 +- nmmo/task/game_state.py | 54 +++++++++++++++++++++++++++++-------- nmmo/task/group.py | 10 +++++++ nmmo/task/predicate_api.py | 5 ++++ nmmo/task/task_api.py | 7 +++++ tests/task/test_task_api.py | 3 +++ tests/testhelpers.py | 3 ++- 8 files changed, 87 insertions(+), 17 deletions(-) diff --git a/nmmo/core/env.py b/nmmo/core/env.py index fdb3944f3..db0340335 100644 --- a/nmmo/core/env.py +++ b/nmmo/core/env.py @@ -196,6 +196,9 @@ def reset(self, map_id=None, seed=None, options=None, self._dummy_obs = self._make_dummy_obs() self.obs = self._compute_observations() self._gamestate_generator = GameStateGenerator(self.realm, self.config) + if self.game_state is not None: + self.game_state.clear_cache() + self.game_state = None self._reset_required = False @@ -423,9 +426,11 @@ def _make_dummy_obs(self): def _compute_observations(self): # Clean up unnecessary observations, which cause memory leaks if self.obs is not None: - for agent_obs in self.obs.values(): - # pylint: disable=unnecessary-dunder-call - agent_obs.__del__() # clear the lru_cache + for agent_id, agent_obs in self.obs.items(): + agent_obs.clear_cache() # clear the lru_cache + self.obs[agent_id] = None + del agent_obs + self.obs = None obs = {} market = Item.Query.for_sale(self.realm.datastore) @@ -489,6 +494,11 @@ def _compute_rewards(self): infos = {agent_id: {'task': {}} for agent_id in agents} rewards = defaultdict(int) + # Clean up unnecessary game state, which cause memory leaks + if self.game_state is not None: + self.game_state.clear_cache() + self.game_state = None + # Compute Rewards and infos self.game_state = self._gamestate_generator.generate(self.realm, self.obs) for task in self.tasks: @@ -497,7 +507,9 @@ def _compute_rewards(self): for agent_id, reward in task_rewards.items(): if agent_id in agents: rewards[agent_id] = rewards.get(agent_id,0) + reward - infos[agent_id]['task'][task.name] = task_infos[agent_id] # include progress, etc + infos[agent_id]['task'][task.name] = task_infos[agent_id] # include progress, etc. + else: + task.close() # To prevent memory leak # Make sure the dead agents return the rewards of -1 for agent_id in self._dead_this_tick: diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index c306fca92..cd28c97fd 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -117,7 +117,7 @@ def entity(self, entity_id): def agent(self): return self.entity(self.agent_id) - def __del__(self): + def clear_cache(self): # clear the cache, so that this object can be garbage collected self.agent.cache_clear() self.entity.cache_clear() diff --git a/nmmo/task/game_state.py b/nmmo/task/game_state.py index 753b023cb..e5e743d05 100644 --- a/nmmo/task/game_state.py +++ b/nmmo/task/game_state.py @@ -1,8 +1,9 @@ from __future__ import annotations -from typing import Dict, Iterable, Tuple, MutableMapping, Set -from dataclasses import dataclass +from typing import Dict, Iterable, Tuple, MutableMapping, Set, List +from dataclasses import dataclass, field from copy import deepcopy from collections import defaultdict +import weakref from abc import ABC, abstractmethod import functools @@ -44,6 +45,7 @@ class GameState: event_index: Dict[int, Iterable] cache_result: MutableMapping # cache for general memoization + _group_view: List[GroupView] = field(default_factory=list) # cache for GroupView # add helper functions below @functools.lru_cache @@ -73,9 +75,33 @@ def where_in_id(self, data_type, subject: Iterable[int]): raise ValueError("data_type must be in entity, item, event") def get_subject_view(self, subject: Group): - return GroupView(self, subject) + new_group_view = GroupView(self, subject) + self._group_view.append(new_group_view) + return new_group_view + + def clear_cache(self): + # clear the cache, so that this object can be garbage collected + self.entity_or_none.cache_clear() # pylint: disable=no-member + self.cache_result.clear() + self.alive_agents.clear() + while self._group_view: + weakref.ref(self._group_view.pop()) # clear the cache # Wrapper around an iterable datastore +class CachedProperty: + def __init__(self, func): + self.func = func + # Allows the instance keys to be garbage collected + # when they are no longer referenced elsewhere + self.cache = weakref.WeakKeyDictionary() + + def __get__(self, instance, owner): + if instance is None: + return self + if instance not in self.cache: + self.cache[instance] = self.func(instance) + return self.cache[instance] + class ArrayView(ABC): def __init__(self, mapping, @@ -157,7 +183,10 @@ def __init__(self, gs: GameState, subject: Group): valid_agents = filter(lambda eid: eid in gs.env_obs,subject.agents) self._obs = [gs.env_obs[ent_id] for ent_id in valid_agents] self._subject = subject - self.tile = TileView(gs, subject, [o.tiles for o in self._obs]) + + @CachedProperty + def tile(self): + return TileView(self._gs, self._subject, [o.tiles for o in self._obs]) def __getattr__(self, attr): return [getattr(o, attr) for o in self._obs] @@ -167,29 +196,32 @@ def __init__(self, gs: GameState, subject: Group): self._gs = gs self._subject = subject self._subject_hash = hash(subject) - self.obs = GroupObsView(gs, subject) - @functools.cached_property + @CachedProperty + def obs(self): + return GroupObsView(self._gs, self._subject) + + @CachedProperty def _sbj_ent(self): return self._gs.where_in_id('entity', self._subject.agents) - @functools.cached_property + @CachedProperty def entity(self): return EntityView(self._gs, self._subject, self._sbj_ent) - @functools.cached_property + @CachedProperty def _sbj_item(self): return self._gs.where_in_id('item', self._subject.agents) - @functools.cached_property + @CachedProperty def item(self): return ItemView(self._gs, self._subject, self._sbj_item) - @functools.cached_property + @CachedProperty def _sbj_event(self): return self._gs.where_in_id('event', self._subject.agents) - @functools.cached_property + @CachedProperty def event(self): return EventView(self._gs, self._subject, self._sbj_event) diff --git a/nmmo/task/group.py b/nmmo/task/group.py index 39020f99b..4d319f081 100644 --- a/nmmo/task/group.py +++ b/nmmo/task/group.py @@ -2,6 +2,7 @@ from typing import Dict, Union, Iterable, TYPE_CHECKING from collections import OrderedDict from collections.abc import Set, Sequence +import weakref if TYPE_CHECKING: from nmmo.task.game_state import GameState, GroupView @@ -75,7 +76,16 @@ def description(self) -> Dict: "agents": self._agents } + def clear_prev_state(self) -> None: + if self._gs is not None: + self._gs.clear_cache() # prevent memory leak + self._gs = None + if self._sd is not None: + weakref.ref(self._sd) # prevent memory leak + self._sd = None + def update(self, gs: GameState) -> None: + self.clear_prev_state() self._gs = gs self._sd = gs.get_subject_view(self) diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index dd74fb5ba..6381f2be3 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -66,6 +66,11 @@ def _reset(self, config: Config): if not self.check(self._config): raise InvalidConstraint() + def close(self): + # To prevent memory leak, clear all refs to old game state + for group in self._groups: + group.clear_prev_state() + def check(self, config: Config): """ Checks whether the predicate is valid diff --git a/nmmo/task/task_api.py b/nmmo/task/task_api.py index bcef741e8..be293c6a6 100644 --- a/nmmo/task/task_api.py +++ b/nmmo/task/task_api.py @@ -34,6 +34,7 @@ def __init__(self, self.reset() def reset(self): + self._stop_eval = False self._last_eval_tick = None self._progress = 0.0 self._completed_tick = None @@ -41,6 +42,12 @@ def reset(self): self._positive_reward_count = 0 self._negative_reward_count = 0 + def close(self): + if self._stop_eval is False: + if isinstance(self._eval_fn, Predicate): + self._eval_fn.close() + self._stop_eval = True + @property def assignee(self) -> Tuple[int]: return self._assignee diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index a85450b99..8f8322a4b 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -39,6 +39,9 @@ def __init__(self): self.cache_result = {} self.get_subject_view = lambda _: None + def clear_cache(self): + pass + class TestTaskAPI(unittest.TestCase): def test_predicate_operators(self): # pylint: disable=unsupported-binary-operation,invalid-unary-operand-type diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 2ff45c3fe..46cc04144 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -390,7 +390,8 @@ def profile_env_step(action_target=True, tasks=None, condition=None): for _ in range(3): env.step({}) - obs = env._compute_observations() + env.obs = env._compute_observations() + obs = deepcopy(env.obs) test_func = [ ('env.step({}):', lambda: env.step({})), From 0e1166d419311410955ee3344714ac06283d8476 Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 21 Aug 2023 10:54:00 -0700 Subject: [PATCH 097/113] added eventlog for loot item/gold --- nmmo/entity/npc.py | 13 +++++++++---- nmmo/entity/player.py | 11 ++++++++--- nmmo/lib/event_log.py | 4 +++- nmmo/lib/log.py | 1 + nmmo/systems/inventory.py | 10 ++++++---- 5 files changed, 27 insertions(+), 12 deletions(-) diff --git a/nmmo/entity/npc.py b/nmmo/entity/npc.py index e19fd94e1..211eb4076 100644 --- a/nmmo/entity/npc.py +++ b/nmmo/entity/npc.py @@ -5,7 +5,7 @@ from nmmo.systems import item as Item from nmmo.systems import skill from nmmo.systems.inventory import EquipmentSlot - +from nmmo.lib.log import EventCode class Equipment: def __init__(self, total, @@ -71,12 +71,17 @@ def receive_damage(self, source, dmg): # run the next lines if the npc is killed # source receive gold & items in the droptable # pylint: disable=no-member - source.gold.increment(self.gold.val) - self.gold.update(0) + if self.gold.val > 0: + source.gold.increment(self.gold.val) + self.realm.event_log.record(EventCode.EARN_GOLD, source, amount=self.gold.val) + self.gold.update(0) for item in self.droptable.roll(self.realm, self.attack_level): if source.is_player and source.inventory.space: - source.inventory.receive(item) + # inventory.receive() returns True if the item is received + # if source doesn't have space, inventory.receive() destroys the item + if source.inventory.receive(item): + self.realm.event_log.record(EventCode.LOOT_ITEM, source, item=item) else: item.destroy() diff --git a/nmmo/entity/player.py b/nmmo/entity/player.py index 8cac01629..b635810d0 100644 --- a/nmmo/entity/player.py +++ b/nmmo/entity/player.py @@ -1,5 +1,6 @@ from nmmo.systems.skill import Skills from nmmo.entity import entity +from nmmo.lib.log import EventCode # pylint: disable=no-member class Player(entity.Entity): @@ -65,8 +66,10 @@ def receive_damage(self, source, dmg): # starting from here, source receive gold & inventory items if self.config.EXCHANGE_SYSTEM_ENABLED and source is not None: - source.gold.increment(self.gold.val) - self.gold.update(0) + if self.gold.val > 0: + source.gold.increment(self.gold.val) + self.realm.event_log.record(EventCode.EARN_GOLD, source, amount=self.gold.val) + self.gold.update(0) # TODO: make source receive the highest-level items first # because source cannot take it if the inventory is full @@ -77,8 +80,10 @@ def receive_damage(self, source, dmg): # if source is None or NPC, destroy the item if source.is_player: + # inventory.receive() returns True if the item is received # if source doesn't have space, inventory.receive() destroys the item - source.inventory.receive(item) + if source.inventory.receive(item): + self.realm.event_log.record(EventCode.LOOT_ITEM, source, item=item) else: item.destroy() diff --git a/nmmo/lib/event_log.py b/nmmo/lib/event_log.py index 48772d840..a5aac246a 100644 --- a/nmmo/lib/event_log.py +++ b/nmmo/lib/event_log.py @@ -110,7 +110,8 @@ def record(self, event_code: int, entity: Entity, **kwargs): log.level.update(target.attack_level) return - if event_code in [EventCode.CONSUME_ITEM, EventCode.HARVEST_ITEM, EventCode.EQUIP_ITEM]: + if event_code in [EventCode.CONSUME_ITEM, EventCode.HARVEST_ITEM, EventCode.EQUIP_ITEM, + EventCode.LOOT_ITEM]: # CHECK ME: item types should be checked. For example, # Only Ration and Potion can be consumed # Only Ration, Potion, Whetstone, Arrow, Runes can be produced @@ -134,6 +135,7 @@ def record(self, event_code: int, entity: Entity, **kwargs): log.gold.update(kwargs['price']) return + # NOTE: do we want to separate the source of income? from selling vs looting if event_code == EventCode.EARN_GOLD: if ('amount' in kwargs and kwargs['amount'] > 0): log = self._create_event(entity, event_code) diff --git a/nmmo/lib/log.py b/nmmo/lib/log.py index 8ee6c77d7..8ced7f7a4 100644 --- a/nmmo/lib/log.py +++ b/nmmo/lib/log.py @@ -51,6 +51,7 @@ class EventCode: DESTROY_ITEM = 23 HARVEST_ITEM = 24 EQUIP_ITEM = 25 + LOOT_ITEM = 26 # Exchange GIVE_GOLD = 31 diff --git a/nmmo/systems/inventory.py b/nmmo/systems/inventory.py index 446840a85..ae2024500 100644 --- a/nmmo/systems/inventory.py +++ b/nmmo/systems/inventory.py @@ -125,7 +125,8 @@ def __iter__(self): for item in self.items: yield item - def receive(self, item: Item.Item): + def receive(self, item: Item.Item) -> bool: + # Return True if the item is received assert isinstance(item, Item.Item), f'{item} received is not an Item instance' assert item not in self.items, f'{item} object received already in inventory' assert not item.equipped.val, f'Received equipped item {item}' @@ -140,19 +141,19 @@ def receive(self, item: Item.Item): stack.quantity.increment(item.quantity.val) # destroy the original item instance after the transfer is complete item.destroy() - return + return False if not self.space: # if no space thus cannot receive, just destroy the item item.destroy() - return + return False self._item_stacks[signature] = item if not self.space: # if no space thus cannot receive, just destroy the item item.destroy() - return + return False self.realm.log_milestone(f'Receive_{item.__class__.__name__}', item.level.val, f'INVENTORY: Received level {item.level.val} {item.__class__.__name__}', @@ -160,6 +161,7 @@ def receive(self, item: Item.Item): item.owner_id.update(self.entity.id.val) self.items.add(item) + return True # pylint: disable=protected-access def remove(self, item, quantity=None): From 4f5b24db7eda606e29474d5ff47b2d54851f235a Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 22 Aug 2023 10:57:28 -0700 Subject: [PATCH 098/113] tweaked combat cfg, tool use --- nmmo/core/config.py | 10 +++++----- nmmo/systems/skill.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/nmmo/core/config.py b/nmmo/core/config.py index 4efa9133d..9cc7cccf9 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -437,19 +437,19 @@ class Progression: PROGRESSION_LEVEL_MAX = 10 '''Max skill level''' - PROGRESSION_MELEE_BASE_DAMAGE = 0 + PROGRESSION_MELEE_BASE_DAMAGE = 20 '''Base Melee attack damage''' PROGRESSION_MELEE_LEVEL_DAMAGE = 5 '''Bonus Melee attack damage per level''' - PROGRESSION_RANGE_BASE_DAMAGE = 0 + PROGRESSION_RANGE_BASE_DAMAGE = 20 '''Base Range attack damage''' PROGRESSION_RANGE_LEVEL_DAMAGE = 5 '''Bonus Range attack damage per level''' - PROGRESSION_MAGE_BASE_DAMAGE = 0 + PROGRESSION_MAGE_BASE_DAMAGE = 20 '''Base Mage attack damage ''' PROGRESSION_MAGE_LEVEL_DAMAGE = 5 @@ -492,13 +492,13 @@ class NPC: NPC_BASE_DEFENSE = 0 '''Base NPC defense''' - NPC_LEVEL_DEFENSE = 30 + NPC_LEVEL_DEFENSE = 15 '''Bonus NPC defense per level''' NPC_BASE_DAMAGE = 15 '''Base NPC damage''' - NPC_LEVEL_DAMAGE = 30 + NPC_LEVEL_DAMAGE = 15 '''Bonus NPC damage per level''' diff --git a/nmmo/systems/skill.py b/nmmo/systems/skill.py index a2de62928..db0a6d746 100644 --- a/nmmo/systems/skill.py +++ b/nmmo/systems/skill.py @@ -98,7 +98,7 @@ def process_drops(self, matl, drop_table): level = 1 tool = entity.equipment.held if matl.tool is not None and isinstance(tool, matl.tool): - level = tool.level.val + level = min(1+tool.level.val, self.config.PROGRESSION_LEVEL_MAX) #TODO: double-check drop table quantity for drop in drop_table.roll(self.realm, level): From f52231e1db59c29208e284ce61f3ead31e1f33c1 Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 22 Aug 2023 17:44:15 -0700 Subject: [PATCH 099/113] added support for get event log data by tick --- nmmo/core/realm.py | 1 + nmmo/lib/event_log.py | 33 ++++++++++++++++++++++++--------- tests/test_eventlog.py | 24 +++++++++++++++++++++++- 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/nmmo/core/realm.py b/nmmo/core/realm.py index ef662c40d..eb8723ff1 100644 --- a/nmmo/core/realm.py +++ b/nmmo/core/realm.py @@ -188,6 +188,7 @@ def step(self, actions): self.map.step() self.exchange.step(self.tick) self.log_helper.update(dead) + self.event_log.update() if self._replay_helper is not None: self._replay_helper.update() diff --git a/nmmo/lib/event_log.py b/nmmo/lib/event_log.py index a5aac246a..d72aefa7e 100644 --- a/nmmo/lib/event_log.py +++ b/nmmo/lib/event_log.py @@ -30,6 +30,8 @@ table=lambda ds: ds.table("Event").where_eq(EventAttr["recorded"], 1), by_event=lambda ds, event_code: ds.table("Event").where_eq( EventAttr["event"], event_code), + by_tick=lambda ds, tick: ds.table("Event").where_eq( + EventAttr["tick"], tick), ) # defining col synoyms for different event types @@ -55,6 +57,8 @@ def __init__(self, realm): self.valid_events = { val: evt for evt, val in EventCode.__dict__.items() if isinstance(val, int) } + self._data_by_tick = {} + self._last_tick = 0 # add synonyms to the attributes self.attr_to_col = deepcopy(EventAttr) @@ -155,16 +159,27 @@ def record(self, event_code: int, entity: Entity, **kwargs): # CHECK ME: The below should be commented out after debugging raise ValueError(f"Event code: {event_code}", kwargs) - def get_data(self, event_code=None, agents: List[int]=None): - if event_code is None: - event_data = EventState.Query.table(self.datastore) - elif event_code in self.valid_events: - event_data = EventState.Query.by_event(self.datastore, event_code) + def update(self): + curr_tick = self.realm.tick + 1 # update happens before the tick update + if curr_tick > self._last_tick: + self._data_by_tick[curr_tick] = EventState.Query.by_tick(self.datastore, curr_tick) + self._last_tick = curr_tick + + def get_data(self, event_code=None, agents: List[int]=None, tick: int=None) -> np.ndarray: + if tick is not None: + if tick not in self._data_by_tick: + return np.array([]) + event_data = self._data_by_tick[tick] else: - return None + event_data = EventState.Query.table(self.datastore) - if agents: - flt_idx = np.in1d(event_data[:, EventAttr['ent_id']], agents) + if event_data.shape[0] > 0: + if event_code is None: + flt_idx = event_data[:, EventAttr["event"]] > 0 + else: + flt_idx = event_data[:, EventAttr["event"]] == event_code + if agents: + flt_idx &= np.in1d(event_data[:, EventAttr["ent_id"]], agents) return event_data[flt_idx] - return event_data + return np.array([]) \ No newline at end of file diff --git a/tests/test_eventlog.py b/tests/test_eventlog.py index d8b0405e1..c6322c52f 100644 --- a/tests/test_eventlog.py +++ b/tests/test_eventlog.py @@ -48,6 +48,7 @@ def test_event_logging(self): combat_style=Skill.Melee, damage=50) event_log.record(EventCode.PLAYER_KILL, MockEntity(3), target=MockEntity(5, attack_level=5)) + event_log.update() mock_realm.tick = 1 event_log.record(EventCode.CONSUME_ITEM, MockEntity(4), @@ -56,6 +57,7 @@ def test_event_logging(self): event_log.record(EventCode.DESTROY_ITEM, MockEntity(5)) event_log.record(EventCode.HARVEST_ITEM, MockEntity(6), item=Whetstone(mock_realm, 3)) + event_log.update() mock_realm.tick = 2 event_log.record(EventCode.GIVE_GOLD, MockEntity(7)) @@ -65,18 +67,20 @@ def test_event_logging(self): event_log.record(EventCode.BUY_ITEM, MockEntity(10), item=Whetstone(mock_realm, 7), price=21) #event_log.record(EventCode.SPEND_GOLD, env.realm.players[11], amount=25) + event_log.update() mock_realm.tick = 3 event_log.record(EventCode.LEVEL_UP, MockEntity(12), skill=Skill.Fishing, level=3) + event_log.update() mock_realm.tick = 4 event_log.record(EventCode.GO_FARTHEST, MockEntity(12), distance=6) event_log.record(EventCode.EQUIP_ITEM, MockEntity(12), item=Hat(mock_realm, 4)) + event_log.update() log_data = [list(row) for row in event_log.get_data()] - self.assertListEqual(log_data, [ [1, 1, 1, EventCode.EAT_FOOD, 0, 0, 0, 0, 0], [1, 2, 1, EventCode.DRINK_WATER, 0, 0, 0, 0, 0], @@ -94,6 +98,24 @@ def test_event_logging(self): [1, 12, 5, EventCode.GO_FARTHEST, 0, 0, 6, 0, 0], [1, 12, 5, EventCode.EQUIP_ITEM, 2, 4, 1, 0, 0]]) + log_by_tick = [list(row) for row in event_log.get_data(tick = 4)] + self.assertListEqual(log_by_tick, [ + [1, 12, 4, EventCode.LEVEL_UP, 4, 3, 0, 0, 0]]) + + log_by_event = [list(row) for row in event_log.get_data(event_code = EventCode.CONSUME_ITEM)] + self.assertListEqual(log_by_event, [ + [1, 4, 2, EventCode.CONSUME_ITEM, 16, 8, 1, 0, 0]]) + + log_by_tick_agent = [list(row) for row in \ + event_log.get_data(tick = 5, + agents = [12], + event_code = EventCode.EQUIP_ITEM)] + self.assertListEqual(log_by_tick_agent, [ + [1, 12, 5, EventCode.EQUIP_ITEM, 2, 4, 1, 0, 0]]) + + empty_log = event_log.get_data(tick = 10) + self.assertTrue(empty_log.shape[0] == 0) + if __name__ == '__main__': unittest.main() From e83923f10ec1932f212992b19a2c01d16829c052 Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 22 Aug 2023 17:57:09 -0700 Subject: [PATCH 100/113] fixed pylint error --- nmmo/lib/event_log.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmmo/lib/event_log.py b/nmmo/lib/event_log.py index d72aefa7e..dc0f8872f 100644 --- a/nmmo/lib/event_log.py +++ b/nmmo/lib/event_log.py @@ -182,4 +182,4 @@ def get_data(self, event_code=None, agents: List[int]=None, tick: int=None) -> n flt_idx &= np.in1d(event_data[:, EventAttr["ent_id"]], agents) return event_data[flt_idx] - return np.array([]) \ No newline at end of file + return np.array([]) From 693802c2562546b087d13cdc95300737eee6157c Mon Sep 17 00:00:00 2001 From: kywch Date: Tue, 22 Aug 2023 23:27:07 -0700 Subject: [PATCH 101/113] fixed spawn immunity bug/mask --- nmmo/core/action.py | 2 +- nmmo/core/observation.py | 11 +++++------ tests/action/test_ammo_use.py | 22 +++++++++++++++++++++- tests/testhelpers.py | 9 +++++++-- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/nmmo/core/action.py b/nmmo/core/action.py index 9c68db1cc..c6048ca50 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -239,7 +239,7 @@ def call(realm, entity, style, target): # Testing a spawn immunity against old agents to avoid spawn camping immunity = config.COMBAT_SPAWN_IMMUNITY if entity.is_player and target.is_player and \ - target.history.time_alive < immunity < entity.history.time_alive.val: + target.history.time_alive < immunity: return None #Check if self targeted diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index cd28c97fd..cf3444b45 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -243,17 +243,16 @@ def _make_attack_mask(self): ) <= self.config.COMBAT_MELEE_REACH immunity = self.config.COMBAT_SPAWN_IMMUNITY - if 0 < immunity < agent.time_alive: - # ids > 0 equals entity.is_player - spawn_immunity = (self.entities.ids > 0) & \ - (self.entities.values[:,EntityState.State.attr_name_to_col["time_alive"]] < immunity) + if agent.time_alive < immunity: + # NOTE: CANNOT attack players during immunity, thus mask should set to 0 + no_spawn_immunity = ~(self.entities.ids > 0) # ids > 0 equals entity.is_player else: - spawn_immunity = np.ones(self.entities.len, dtype=bool) + no_spawn_immunity = np.ones(self.entities.len, dtype=bool) # allow friendly fire but no self shooting not_me = self.entities.ids != agent.id - attack_mask[:self.entities.len] = within_range & not_me & spawn_immunity + attack_mask[:self.entities.len] = within_range & not_me & no_spawn_immunity return attack_mask def _make_use_mask(self): diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py index 6e85b7d55..ebaab3e12 100644 --- a/tests/action/test_ammo_use.py +++ b/tests/action/test_ammo_use.py @@ -32,9 +32,29 @@ def _assert_action_targets_zero(self, gym_obs): # If MarketItem and InventoryTarget have no-action flags, these sum up to 5 self.assertEqual(mask, 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) - def test_ammo_fire_all(self): + def test_spawn_immunity(self): env = self._setup_env(random_seed=RANDOM_SEED) + # Check spawn immunity in the action targets + for ent_obs in env.obs.values(): + gym_obs = ent_obs.to_gym() + target_mask = gym_obs["ActionTargets"]["Attack"]["Target"][:len(ent_obs.entities.ids)] + # cannot target other agents + self.assertTrue(np.sum(target_mask[ent_obs.entities.ids > 0]) == 0) + + # Test attack during spawn immunity, which should be ignored + env.step({ ent_id: { action.Attack: + { action.Style: env.realm.players[ent_id].agent.style[0], + action.Target: env.obs[ent_id].entities.index((ent_id+1)%3+1) } } + for ent_id in self.ammo }) + + for ent_id in [1, 2, 3]: + # in_combat status is set when attack is executed + self.assertFalse(env.realm.players[ent_id].in_combat) + + def test_ammo_fire_all(self): + env = self._setup_env(random_seed=RANDOM_SEED, remove_immunity=True) + # First tick actions: USE (equip) level-0 ammo env.step({ ent_id: { action.Use: { action.InventoryItem: env.obs[ent_id].inventory.sig(ent_ammo, 0) } diff --git a/tests/testhelpers.py b/tests/testhelpers.py index 46cc04144..111bf2621 100644 --- a/tests/testhelpers.py +++ b/tests/testhelpers.py @@ -225,9 +225,14 @@ def _make_item_sig(self): return item_sig - def _setup_env(self, random_seed, check_assert=True): + def _setup_env(self, random_seed, check_assert=True, remove_immunity=False): """ set up a new env and perform initial checks """ - env = ScriptedAgentTestEnv(self.config, seed=random_seed) + config = deepcopy(self.config) + + if remove_immunity: + config.COMBAT_SPAWN_IMMUNITY = 0 + + env = ScriptedAgentTestEnv(config, seed=random_seed) env.reset() # provide money for all From 72c40b065fdb7e99287d604e1682d305cb66ff35 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 23 Aug 2023 02:23:33 -0700 Subject: [PATCH 102/113] tweaked agent damages, added health_restore val for reward --- nmmo/core/config.py | 6 +++--- nmmo/entity/entity.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/nmmo/core/config.py b/nmmo/core/config.py index 4efa9133d..d0f2bd404 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -437,19 +437,19 @@ class Progression: PROGRESSION_LEVEL_MAX = 10 '''Max skill level''' - PROGRESSION_MELEE_BASE_DAMAGE = 0 + PROGRESSION_MELEE_BASE_DAMAGE = 15 '''Base Melee attack damage''' PROGRESSION_MELEE_LEVEL_DAMAGE = 5 '''Bonus Melee attack damage per level''' - PROGRESSION_RANGE_BASE_DAMAGE = 0 + PROGRESSION_RANGE_BASE_DAMAGE = 15 '''Base Range attack damage''' PROGRESSION_RANGE_LEVEL_DAMAGE = 5 '''Bonus Range attack damage per level''' - PROGRESSION_MAGE_BASE_DAMAGE = 0 + PROGRESSION_MAGE_BASE_DAMAGE = 15 '''Base Mage attack damage ''' PROGRESSION_MAGE_LEVEL_DAMAGE = 5 diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py index 3c943dd17..a9b410287 100644 --- a/nmmo/entity/entity.py +++ b/nmmo/entity/entity.py @@ -105,6 +105,7 @@ def __init__(self, ent, config): self.health = ent.health self.water = ent.water self.food = ent.food + self.health_restore = 0 self.health.update(config.PLAYER_BASE_HEALTH) if config.RESOURCE_SYSTEM_ENABLED: @@ -121,9 +122,10 @@ def update(self): food_thresh = self.food > thresh * self.config.RESOURCE_BASE water_thresh = self.water > thresh * self.config.RESOURCE_BASE + self.health_restore = 0 # for "healing" bonus if food_thresh and water_thresh: - restore = np.floor(self.health.max * regen) - self.health.increment(restore) + self.health_restore = np.floor(self.health.max * regen) + self.health.increment(self.health_restore) if self.food.empty: self.health.decrement(self.config.RESOURCE_STARVATION_RATE) From 906afb2a7803e88ea4fb905b40f17b7fca6edef1 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 23 Aug 2023 03:07:21 -0700 Subject: [PATCH 103/113] tweaked player damage --- nmmo/core/config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nmmo/core/config.py b/nmmo/core/config.py index d0f2bd404..87a03da1f 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -437,19 +437,19 @@ class Progression: PROGRESSION_LEVEL_MAX = 10 '''Max skill level''' - PROGRESSION_MELEE_BASE_DAMAGE = 15 + PROGRESSION_MELEE_BASE_DAMAGE = 10 '''Base Melee attack damage''' PROGRESSION_MELEE_LEVEL_DAMAGE = 5 '''Bonus Melee attack damage per level''' - PROGRESSION_RANGE_BASE_DAMAGE = 15 + PROGRESSION_RANGE_BASE_DAMAGE = 10 '''Base Range attack damage''' PROGRESSION_RANGE_LEVEL_DAMAGE = 5 '''Bonus Range attack damage per level''' - PROGRESSION_MAGE_BASE_DAMAGE = 15 + PROGRESSION_MAGE_BASE_DAMAGE = 10 '''Base Mage attack damage ''' PROGRESSION_MAGE_LEVEL_DAMAGE = 5 From f2f96720c478b60e46908f8fafad5e4f3f3bda1e Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 23 Aug 2023 08:17:59 -0700 Subject: [PATCH 104/113] set health restore only when health increases --- nmmo/entity/entity.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py index a9b410287..b26b3b306 100644 --- a/nmmo/entity/entity.py +++ b/nmmo/entity/entity.py @@ -124,8 +124,10 @@ def update(self): self.health_restore = 0 # for "healing" bonus if food_thresh and water_thresh: - self.health_restore = np.floor(self.health.max * regen) - self.health.increment(self.health_restore) + self.health_restore = -self.health.val # before incrementing + restore = np.floor(self.health.max * regen) + self.health.increment(restore) + self.health_restore += self.health.val # after incrementing if self.food.empty: self.health.decrement(self.config.RESOURCE_STARVATION_RATE) From b9aca7ceca05dd40b76c6c7f1f687ad387f915e4 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 23 Aug 2023 08:56:36 -0700 Subject: [PATCH 105/113] record both health inc and dec --- nmmo/entity/entity.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py index b26b3b306..2229c227c 100644 --- a/nmmo/entity/entity.py +++ b/nmmo/entity/entity.py @@ -122,12 +122,10 @@ def update(self): food_thresh = self.food > thresh * self.config.RESOURCE_BASE water_thresh = self.water > thresh * self.config.RESOURCE_BASE - self.health_restore = 0 # for "healing" bonus + org_health = self.health.val if food_thresh and water_thresh: - self.health_restore = -self.health.val # before incrementing restore = np.floor(self.health.max * regen) self.health.increment(restore) - self.health_restore += self.health.val # after incrementing if self.food.empty: self.health.decrement(self.config.RESOURCE_STARVATION_RATE) @@ -135,6 +133,9 @@ def update(self): if self.water.empty: self.health.decrement(self.config.RESOURCE_DEHYDRATION_RATE) + # records both increase and decrease in health due to food and water + self.health_restore = self.health.val - org_health + def packet(self): data = {} data['health'] = { 'val': self.health.val, 'max': self.config.PLAYER_BASE_HEALTH } From 5edd767a2ee0e0985227229a08c19b38fcc47eb1 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 23 Aug 2023 10:33:39 -0700 Subject: [PATCH 106/113] tweaked exploration distance as progress toward center --- nmmo/core/action.py | 9 +++++---- nmmo/core/map.py | 4 ++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/nmmo/core/action.py b/nmmo/core/action.py index c6048ca50..5a136be3e 100644 --- a/nmmo/core/action.py +++ b/nmmo/core/action.py @@ -134,12 +134,13 @@ def call(realm, entity, direction): realm.map.tiles[r_new, c_new].add_entity(entity) # exploration record keeping. moved from entity.py, History.update() - dist_from_spawn = utils.linf_single(entity.spawn_pos, (r_new, c_new)) - if dist_from_spawn > entity.history.exploration: - entity.history.exploration = dist_from_spawn + progress_to_center = realm.map.dist_border_center -\ + utils.linf_single(realm.map.center_coord, (r_new, c_new)) + if progress_to_center > entity.history.exploration: + entity.history.exploration = progress_to_center if entity.is_player: realm.event_log.record(EventCode.GO_FARTHEST, entity, - distance=dist_from_spawn) + distance=progress_to_center) # CHECK ME: material.Impassible includes void, so this line is not reachable # Does this belong to Entity/Player.update()? diff --git a/nmmo/core/map.py b/nmmo/core/map.py index 6bb23045d..870ddabe5 100644 --- a/nmmo/core/map.py +++ b/nmmo/core/map.py @@ -27,6 +27,10 @@ def __init__(self, config, realm, np_random): for c in range(sz): self.tiles[r, c] = Tile(realm, r, c, np_random) + self.dist_border_center = config.MAP_CENTER // 2 + self.center_coord = (config.MAP_BORDER + self.dist_border_center, + config.MAP_BORDER + self.dist_border_center) + @property def packet(self): '''Packet of degenerate resource states''' From 8b3e093097e614d77cde664fa84102e7df6c3d74 Mon Sep 17 00:00:00 2001 From: David Bloomin Date: Thu, 24 Aug 2023 14:35:38 -0700 Subject: [PATCH 107/113] cp --- nmmo/lib/event_log.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nmmo/lib/event_log.py b/nmmo/lib/event_log.py index dc0f8872f..edeaa5125 100644 --- a/nmmo/lib/event_log.py +++ b/nmmo/lib/event_log.py @@ -168,7 +168,7 @@ def update(self): def get_data(self, event_code=None, agents: List[int]=None, tick: int=None) -> np.ndarray: if tick is not None: if tick not in self._data_by_tick: - return np.array([]) + return np.empty((2, 0)) event_data = self._data_by_tick[tick] else: event_data = EventState.Query.table(self.datastore) @@ -182,4 +182,4 @@ def get_data(self, event_code=None, agents: List[int]=None, tick: int=None) -> n flt_idx &= np.in1d(event_data[:, EventAttr["ent_id"]], agents) return event_data[flt_idx] - return np.array([]) + return np.empty((2, 0)) From 037fbcc777ba58f2be30b457e818a2db9ef5fda3 Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 24 Aug 2023 17:02:22 -0700 Subject: [PATCH 108/113] fixed harvest tool bug, event log empty array --- nmmo/lib/event_log.py | 5 +++-- nmmo/systems/skill.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/nmmo/lib/event_log.py b/nmmo/lib/event_log.py index edeaa5125..e11b1bee1 100644 --- a/nmmo/lib/event_log.py +++ b/nmmo/lib/event_log.py @@ -59,6 +59,7 @@ def __init__(self, realm): if isinstance(val, int) } self._data_by_tick = {} self._last_tick = 0 + self._empty_data = np.empty((0, len(EventAttr))) # add synonyms to the attributes self.attr_to_col = deepcopy(EventAttr) @@ -168,7 +169,7 @@ def update(self): def get_data(self, event_code=None, agents: List[int]=None, tick: int=None) -> np.ndarray: if tick is not None: if tick not in self._data_by_tick: - return np.empty((2, 0)) + return self._empty_data event_data = self._data_by_tick[tick] else: event_data = EventState.Query.table(self.datastore) @@ -182,4 +183,4 @@ def get_data(self, event_code=None, agents: List[int]=None, tick: int=None) -> n flt_idx &= np.in1d(event_data[:, EventAttr["ent_id"]], agents) return event_data[flt_idx] - return np.empty((2, 0)) + return self._empty_data diff --git a/nmmo/systems/skill.py b/nmmo/systems/skill.py index db0a6d746..2978bb429 100644 --- a/nmmo/systems/skill.py +++ b/nmmo/systems/skill.py @@ -97,8 +97,8 @@ def process_drops(self, matl, drop_table): # for example, fishing level=5 without rod will only yield level-1 ration level = 1 tool = entity.equipment.held - if matl.tool is not None and isinstance(tool, matl.tool): - level = min(1+tool.level.val, self.config.PROGRESSION_LEVEL_MAX) + if matl.tool is not None and isinstance(tool.item, matl.tool): + level = min(1+tool.item.level.val, self.config.PROGRESSION_LEVEL_MAX) #TODO: double-check drop table quantity for drop in drop_table.roll(self.realm, level): From 10228756f91f13ac74fb95cf06a176fd0cd5a8a9 Mon Sep 17 00:00:00 2001 From: kywch Date: Sun, 27 Aug 2023 11:06:08 -0700 Subject: [PATCH 109/113] sort obs dict by key to match gym.spaces.Dict --- nmmo/core/observation.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index cf3444b45..c08fa8dda 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -1,4 +1,5 @@ from functools import lru_cache, cached_property +from collections import OrderedDict import numpy as np @@ -10,6 +11,9 @@ from nmmo.lib import material, utils +def sort_dict_by_key(d): + return OrderedDict(sorted(d.items(), key=lambda t: t[0])) + class BasicObs: def __init__(self, values, id_col): self.values = values @@ -140,14 +144,16 @@ def get_empty_obs(self): return gym_obs def to_gym(self): - '''Convert the observation to a format that can be used by OpenAI Gym''' + '''Convert the observation to a format that can be used by OpenAI Gym + The dictionary keys must be sorted alphabetically to work with gym.spaces.Dict. + ''' gym_obs = self.get_empty_obs() if self.dummy_obs: # return empty obs for the dead agents gym_obs['Tile'] = np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1]), dtype=np.int16) if self.config.PROVIDE_ACTION_TARGETS: gym_obs["ActionTargets"] = self._make_action_targets() - return gym_obs + return sort_dict_by_key(gym_obs) # NOTE: assume that all len(self.tiles) == self.config.MAP_N_OBS gym_obs['Tile'] = self.tiles @@ -162,7 +168,7 @@ def to_gym(self): if self.config.PROVIDE_ACTION_TARGETS: gym_obs["ActionTargets"] = self._make_action_targets() - return gym_obs + return sort_dict_by_key(gym_obs) def _make_action_targets(self): masks = {} @@ -212,7 +218,8 @@ def _make_action_targets(self): else np.ones(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8) } - return masks + # NOTE: the order of the keys are important to work with gym.spaces.Dict + return sort_dict_by_key(masks) def _make_move_mask(self): if self.dummy_obs: From d6dd35144e3cb928ff24422347910b656fac5b7f Mon Sep 17 00:00:00 2001 From: kywch Date: Sun, 27 Aug 2023 12:47:15 -0700 Subject: [PATCH 110/113] fixed give gold mask key order --- nmmo/core/observation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index c08fa8dda..1b3669a69 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -207,8 +207,8 @@ def _make_action_targets(self): "MarketItem": self._make_buy_mask() } masks["GiveGold"] = { - "Target": self._make_give_target_mask(), - "Price": self._make_give_gold_mask() # reusing Price + "Price": self._make_give_gold_mask(), # reusing Price + "Target": self._make_give_target_mask() } if self.config.COMMUNICATION_SYSTEM_ENABLED: From 1219029a219a07c75cf2dd589a351640efa4b18b Mon Sep 17 00:00:00 2001 From: kywch Date: Sun, 27 Aug 2023 15:26:48 -0700 Subject: [PATCH 111/113] undo sort dict, which is covered by pufferlib --- nmmo/core/observation.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 1b3669a69..d72f8d69b 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -1,5 +1,4 @@ from functools import lru_cache, cached_property -from collections import OrderedDict import numpy as np @@ -11,9 +10,6 @@ from nmmo.lib import material, utils -def sort_dict_by_key(d): - return OrderedDict(sorted(d.items(), key=lambda t: t[0])) - class BasicObs: def __init__(self, values, id_col): self.values = values @@ -144,16 +140,14 @@ def get_empty_obs(self): return gym_obs def to_gym(self): - '''Convert the observation to a format that can be used by OpenAI Gym - The dictionary keys must be sorted alphabetically to work with gym.spaces.Dict. - ''' + '''Convert the observation to a format that can be used by OpenAI Gym''' gym_obs = self.get_empty_obs() if self.dummy_obs: # return empty obs for the dead agents gym_obs['Tile'] = np.zeros((self.config.MAP_N_OBS, self.tiles.shape[1]), dtype=np.int16) if self.config.PROVIDE_ACTION_TARGETS: gym_obs["ActionTargets"] = self._make_action_targets() - return sort_dict_by_key(gym_obs) + return gym_obs # NOTE: assume that all len(self.tiles) == self.config.MAP_N_OBS gym_obs['Tile'] = self.tiles @@ -168,7 +162,7 @@ def to_gym(self): if self.config.PROVIDE_ACTION_TARGETS: gym_obs["ActionTargets"] = self._make_action_targets() - return sort_dict_by_key(gym_obs) + return gym_obs def _make_action_targets(self): masks = {} @@ -218,8 +212,7 @@ def _make_action_targets(self): else np.ones(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8) } - # NOTE: the order of the keys are important to work with gym.spaces.Dict - return sort_dict_by_key(masks) + return masks def _make_move_mask(self): if self.dummy_obs: From 35ef317643829abec7bdcaa86bcf3f09a3a1c31a Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 28 Aug 2023 14:06:54 -0700 Subject: [PATCH 112/113] removed all all-zero masks --- nmmo/core/observation.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index d72f8d69b..55f6dfb91 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -174,8 +174,7 @@ def _make_action_targets(self): # Test below. see tests/core/test_observation_tile.py, test_action_target_consts() # assert len(action.Style.edges) == 3 masks["Attack"] = { - "Style": np.zeros(3, dtype=np.int8) if self.dummy_obs\ - else np.ones(3, dtype=np.int8), + "Style": np.ones(3, dtype=np.int8), "Target": self._make_attack_mask() } @@ -194,8 +193,7 @@ def _make_action_targets(self): if self.config.EXCHANGE_SYSTEM_ENABLED: masks["Sell"] = { "InventoryItem": self._make_sell_mask(), - "Price": np.zeros(self.config.PRICE_N_OBS, dtype=np.int8) if self.dummy_obs\ - else np.ones(self.config.PRICE_N_OBS, dtype=np.int8) + "Price": np.ones(self.config.PRICE_N_OBS, dtype=np.int8) } masks["Buy"] = { "MarketItem": self._make_buy_mask() @@ -207,9 +205,7 @@ def _make_action_targets(self): if self.config.COMMUNICATION_SYSTEM_ENABLED: masks["Comm"] = { - "Token":\ - np.zeros(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8) if self.dummy_obs\ - else np.ones(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8) + "Token":np.ones(self.config.COMMUNICATION_NUM_TOKENS, dtype=np.int8) } return masks @@ -344,6 +340,7 @@ def _make_give_target_mask(self): def _make_give_gold_mask(self): mask = np.zeros(self.config.PRICE_N_OBS, dtype=np.int8) + mask[0] = 1 # To avoid all-0 masks. If the agent has no gold, this action will be ignored. if self.dummy_obs: return mask From 7977437a6e2d4f7d0699ed90e9389595d3649fb6 Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 28 Aug 2023 14:32:21 -0700 Subject: [PATCH 113/113] fixed the tests --- nmmo/core/observation.py | 4 +++- tests/action/test_ammo_use.py | 2 +- tests/core/test_env.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 55f6dfb91..d0e52c55c 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -212,7 +212,9 @@ def _make_action_targets(self): def _make_move_mask(self): if self.dummy_obs: - return np.zeros(len(action.Direction.edges), dtype=np.int8) + mask = np.zeros(len(action.Direction.edges), dtype=np.int8) + mask[-1] = 1 # make sure the noop action is available + return mask # pylint: disable=not-an-iterable return np.array([self.tile(*d.delta).material_id in material.Habitable.indices for d in action.Direction.edges], dtype=np.int8) diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py index ebaab3e12..4cd356138 100644 --- a/tests/action/test_ammo_use.py +++ b/tests/action/test_ammo_use.py @@ -30,7 +30,7 @@ def _assert_action_targets_zero(self, gym_obs): for atn in [action.Use, action.Give, action.Destroy, action.Sell]: mask += np.sum(gym_obs["ActionTargets"][atn.__name__]["InventoryItem"]) # If MarketItem and InventoryTarget have no-action flags, these sum up to 5 - self.assertEqual(mask, 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) + self.assertEqual(mask, 1 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) def test_spawn_immunity(self): env = self._setup_env(random_seed=RANDOM_SEED) diff --git a/tests/core/test_env.py b/tests/core/test_env.py index bb81a0eca..ca7e8cc1a 100644 --- a/tests/core/test_env.py +++ b/tests/core/test_env.py @@ -64,8 +64,8 @@ def test_observations(self): self.assertEqual(np.sum(player_obs["Entity"]), 0) self.assertEqual(np.sum(player_obs["Inventory"]), 0) self.assertEqual(np.sum(player_obs["Market"]), 0) - self.assertEqual(np.sum(player_obs["ActionTargets"]["Move"]["Direction"]), 0) - self.assertEqual(np.sum(player_obs["ActionTargets"]["Attack"]["Style"]), 0) + self.assertEqual(np.sum(player_obs["ActionTargets"]["Move"]["Direction"]), 1) + self.assertEqual(np.sum(player_obs["ActionTargets"]["Attack"]["Style"]), 3) obs, rewards, dones, infos = self.env.step({})