From c58809e3f2e2ae409fe8cfe5ef382c0a957b3d90 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 30 Aug 2023 12:29:56 -0700 Subject: [PATCH 01/18] add some noise to masks to prevent entropy collapse --- nmmo/core/observation.py | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index d0e52c55..03c52a14 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -199,8 +199,8 @@ def _make_action_targets(self): "MarketItem": self._make_buy_mask() } masks["GiveGold"] = { - "Price": self._make_give_gold_mask(), # reusing Price - "Target": self._make_give_target_mask() + "Price": self._make_give_gold_mask(), # reusing Price + "Target": self._make_give_gold_target_mask() } if self.config.COMMUNICATION_SYSTEM_ENABLED: @@ -325,9 +325,32 @@ def _make_give_target_mask(self): give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8) if self.config.PROVIDE_NOOP_ACTION_TARGET: give_mask[-1] = 1 - # empty inventory -- nothing to give - if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\ - or self.dummy_obs or self.agent_in_combat: + + if not self.config.ITEM_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat: + return give_mask + + # To prevent entropy collapse, allow agents to issue random give actions during early training + if self.inventory.len == 0: + give_mask[self.config.PLAYER_N_OBS//2:] = 1 + return give_mask + + agent = self.agent() + entities_pos = self.entities.values[:,[EntityState.State.attr_name_to_col["row"], + EntityState.State.attr_name_to_col["col"]]] + same_tile = utils.linf(entities_pos, (agent.row, agent.col)) == 0 + not_me = self.entities.ids != self.agent_id + player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0) + + give_mask[:self.entities.len] = same_tile & player & not_me + return give_mask + + def _make_give_gold_target_mask(self): + give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8) + if self.config.PROVIDE_NOOP_ACTION_TARGET: + give_mask[-1] = 1 + + if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\ + or int(self.agent().gold) == 0: return give_mask agent = self.agent() @@ -376,6 +399,11 @@ def _make_buy_mask(self): if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat: return buy_mask + # To prevent entropy collapse, allow agents to issue random buy actions during early training + if self.market.len == 0: # nothing in the market + buy_mask[self.config.MARKET_N_OBS//10:] = 1 + return buy_mask + agent = self.agent() market_items = self.market.values not_mine = market_items[:,ItemState.State.attr_name_to_col["owner_id"]] != self.agent_id From 64c53f040cac1ef350458c3ed820a113778dec69 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 30 Aug 2023 12:45:06 -0700 Subject: [PATCH 02/18] tweaked give gold target mask to prevent entropy collapse --- nmmo/core/observation.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 03c52a14..68dde302 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -361,6 +361,11 @@ def _make_give_gold_target_mask(self): player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0) give_mask[:self.entities.len] = same_tile & player & not_me + + # To prevent entropy collapse, allow agents to issue random give actions during early training + if sum(give_mask[:self.entities.len]) == 0: + give_mask[self.config.PLAYER_N_OBS//2:] = 1 + return give_mask def _make_give_gold_mask(self): From d3febd2c76e92d1827a4dd1b3591117409719b21 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 30 Aug 2023 13:21:06 -0700 Subject: [PATCH 03/18] tweaked action masks during combat --- nmmo/core/observation.py | 17 +++++++++-------- tests/action/test_ammo_use.py | 3 ++- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 68dde302..4b445aa6 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -349,8 +349,10 @@ def _make_give_gold_target_mask(self): if self.config.PROVIDE_NOOP_ACTION_TARGET: give_mask[-1] = 1 + # To prevent entropy collapse, allow agents to issue random give actions during early training if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\ or int(self.agent().gold) == 0: + give_mask[self.config.PLAYER_N_OBS//2:] = 1 return give_mask agent = self.agent() @@ -371,12 +373,13 @@ def _make_give_gold_target_mask(self): def _make_give_gold_mask(self): mask = np.zeros(self.config.PRICE_N_OBS, dtype=np.int8) mask[0] = 1 # To avoid all-0 masks. If the agent has no gold, this action will be ignored. - if self.dummy_obs: + if self.dummy_obs or self.agent_in_combat: + # To prevent entropy collapse, allow agents to issue random give actions during early training + mask[:] = 1 return mask gold = int(self.agent().gold) - if gold and not self.agent_in_combat: - mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1 + mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1 return mask @@ -401,12 +404,10 @@ def _make_buy_mask(self): if self.config.PROVIDE_NOOP_ACTION_TARGET: buy_mask[-1] = 1 - if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat: - return buy_mask - # To prevent entropy collapse, allow agents to issue random buy actions during early training - if self.market.len == 0: # nothing in the market - buy_mask[self.config.MARKET_N_OBS//10:] = 1 + if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat \ + or self.market.len == 0: + buy_mask[self.config.MARKET_N_OBS//2:] = 1 return buy_mask agent = self.agent() diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py index 4cd35613..68aff5cc 100644 --- a/tests/action/test_ammo_use.py +++ b/tests/action/test_ammo_use.py @@ -30,7 +30,8 @@ def _assert_action_targets_zero(self, gym_obs): for atn in [action.Use, action.Give, action.Destroy, action.Sell]: mask += np.sum(gym_obs["ActionTargets"][atn.__name__]["InventoryItem"]) # If MarketItem and InventoryTarget have no-action flags, these sum up to 5 - self.assertEqual(mask, 1 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) + # To prevent entropy collapse, GiveGold/Price and Buy/MarketItem masks are tweaked + self.assertEqual(mask, 99 + 512 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) def test_spawn_immunity(self): env = self._setup_env(random_seed=RANDOM_SEED) From ec2297049a856dee9e0ae434a874a22563f86a0b Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 30 Aug 2023 13:30:45 -0700 Subject: [PATCH 04/18] mask tweak --- nmmo/core/observation.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 4b445aa6..d3ff8013 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -326,11 +326,9 @@ def _make_give_target_mask(self): if self.config.PROVIDE_NOOP_ACTION_TARGET: give_mask[-1] = 1 - if not self.config.ITEM_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat: - return give_mask - # To prevent entropy collapse, allow agents to issue random give actions during early training - if self.inventory.len == 0: + if not self.config.ITEM_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\ + or self.inventory.len == 0: give_mask[self.config.PLAYER_N_OBS//2:] = 1 return give_mask @@ -342,6 +340,11 @@ def _make_give_target_mask(self): player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0) give_mask[:self.entities.len] = same_tile & player & not_me + + # To prevent entropy collapse, allow agents to issue random give actions during early training + if sum(give_mask[:self.entities.len]) == 0: + give_mask[self.config.PLAYER_N_OBS//2:] = 1 + return give_mask def _make_give_gold_target_mask(self): From 3614560d9aad8dab5e7f70dd2023c4acb6190888 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 30 Aug 2023 17:41:00 -0700 Subject: [PATCH 05/18] disallow no-op for attack and buy to jitter --- nmmo/core/observation.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index d3ff8013..769d3431 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -251,6 +251,12 @@ def _make_attack_mask(self): not_me = self.entities.ids != agent.id attack_mask[:self.entities.len] = within_range & not_me & no_spawn_immunity + + # To prevent entropy collapse, allow agents to issue random give actions during early training + if sum(attack_mask[:self.entities.len]) == 0: + attack_mask[self.config.PLAYER_N_OBS//2:] = 1 + attack_mask[-1] = 0 # do not allow noop action in this case + return attack_mask def _make_use_mask(self): @@ -411,6 +417,7 @@ def _make_buy_mask(self): if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat \ or self.market.len == 0: buy_mask[self.config.MARKET_N_OBS//2:] = 1 + buy_mask[-1] = 0 # do not allow noop action in this case return buy_mask agent = self.agent() From 9851ed354ef0cea48d540d4272a92aedf9db6543 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 30 Aug 2023 17:48:01 -0700 Subject: [PATCH 06/18] fixed tests --- tests/action/test_ammo_use.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py index 68aff5cc..f88dc669 100644 --- a/tests/action/test_ammo_use.py +++ b/tests/action/test_ammo_use.py @@ -31,7 +31,7 @@ def _assert_action_targets_zero(self, gym_obs): mask += np.sum(gym_obs["ActionTargets"][atn.__name__]["InventoryItem"]) # If MarketItem and InventoryTarget have no-action flags, these sum up to 5 # To prevent entropy collapse, GiveGold/Price and Buy/MarketItem masks are tweaked - self.assertEqual(mask, 99 + 512 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) + self.assertEqual(mask, 99 + 511 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) def test_spawn_immunity(self): env = self._setup_env(random_seed=RANDOM_SEED) From eecee94721afb71c048d4a58dd446e360e8e17a5 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 30 Aug 2023 18:07:34 -0700 Subject: [PATCH 07/18] quick mask tweak --- nmmo/core/observation.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 769d3431..577542c9 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -434,6 +434,12 @@ def _make_buy_mask(self): enough_gold = market_items[:,ItemState.State.attr_name_to_col["listed_price"]] <= agent.gold buy_mask[:self.market.len] = not_mine & enough_gold + + # To prevent entropy collapse, allow agents to issue random give actions during early training + if sum(buy_mask[:self.market.len]) == 0: + buy_mask[self.config.MARKET_N_OBS//2:] = 1 + buy_mask[-1] = 0 # do not allow noop action in this case + return buy_mask def _existing_ammo_listings(self): From 240dc0ecf2b72edd08a838aaffcb179b00233563 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 30 Aug 2023 19:22:17 -0700 Subject: [PATCH 08/18] move mask tweak --- nmmo/core/observation.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 577542c9..3524ef90 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -215,9 +215,15 @@ def _make_move_mask(self): mask = np.zeros(len(action.Direction.edges), dtype=np.int8) mask[-1] = 1 # make sure the noop action is available return mask + # pylint: disable=not-an-iterable - return np.array([self.tile(*d.delta).material_id in material.Habitable.indices + mask = np.array([self.tile(*d.delta).material_id in material.Habitable.indices for d in action.Direction.edges], dtype=np.int8) + if sum(mask) == 1: # only the stay is available + mask[:] = 1 + mask[-1] = 0 # do not allow noop action + + return mask def _make_attack_mask(self): # NOTE: Currently, all attacks have the same range From 377ff221e46de893ac07255806296ee4bda01e02 Mon Sep 17 00:00:00 2001 From: kywch Date: Wed, 30 Aug 2023 20:20:45 -0700 Subject: [PATCH 09/18] removed mask jitter other than attack and move --- nmmo/core/observation.py | 32 +++----------------------------- tests/action/test_ammo_use.py | 2 +- 2 files changed, 4 insertions(+), 30 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 3524ef90..ac6cc041 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -219,9 +219,9 @@ def _make_move_mask(self): # pylint: disable=not-an-iterable mask = np.array([self.tile(*d.delta).material_id in material.Habitable.indices for d in action.Direction.edges], dtype=np.int8) - if sum(mask) == 1: # only the stay is available + if sum(mask) == 1: # only the stay (no-op) is available mask[:] = 1 - mask[-1] = 0 # do not allow noop action + mask[-1] = 0 # do not allow no-op action return mask @@ -261,7 +261,7 @@ def _make_attack_mask(self): # To prevent entropy collapse, allow agents to issue random give actions during early training if sum(attack_mask[:self.entities.len]) == 0: attack_mask[self.config.PLAYER_N_OBS//2:] = 1 - attack_mask[-1] = 0 # do not allow noop action in this case + attack_mask[-1] = 0 # do not allow no-op action in this case return attack_mask @@ -338,10 +338,8 @@ def _make_give_target_mask(self): if self.config.PROVIDE_NOOP_ACTION_TARGET: give_mask[-1] = 1 - # To prevent entropy collapse, allow agents to issue random give actions during early training if not self.config.ITEM_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\ or self.inventory.len == 0: - give_mask[self.config.PLAYER_N_OBS//2:] = 1 return give_mask agent = self.agent() @@ -352,11 +350,6 @@ def _make_give_target_mask(self): player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0) give_mask[:self.entities.len] = same_tile & player & not_me - - # To prevent entropy collapse, allow agents to issue random give actions during early training - if sum(give_mask[:self.entities.len]) == 0: - give_mask[self.config.PLAYER_N_OBS//2:] = 1 - return give_mask def _make_give_gold_target_mask(self): @@ -364,10 +357,8 @@ def _make_give_gold_target_mask(self): if self.config.PROVIDE_NOOP_ACTION_TARGET: give_mask[-1] = 1 - # To prevent entropy collapse, allow agents to issue random give actions during early training if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\ or int(self.agent().gold) == 0: - give_mask[self.config.PLAYER_N_OBS//2:] = 1 return give_mask agent = self.agent() @@ -378,24 +369,16 @@ def _make_give_gold_target_mask(self): player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0) give_mask[:self.entities.len] = same_tile & player & not_me - - # To prevent entropy collapse, allow agents to issue random give actions during early training - if sum(give_mask[:self.entities.len]) == 0: - give_mask[self.config.PLAYER_N_OBS//2:] = 1 - return give_mask def _make_give_gold_mask(self): mask = np.zeros(self.config.PRICE_N_OBS, dtype=np.int8) mask[0] = 1 # To avoid all-0 masks. If the agent has no gold, this action will be ignored. if self.dummy_obs or self.agent_in_combat: - # To prevent entropy collapse, allow agents to issue random give actions during early training - mask[:] = 1 return mask gold = int(self.agent().gold) mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1 - return mask def _make_sell_mask(self): @@ -419,11 +402,8 @@ def _make_buy_mask(self): if self.config.PROVIDE_NOOP_ACTION_TARGET: buy_mask[-1] = 1 - # To prevent entropy collapse, allow agents to issue random buy actions during early training if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat \ or self.market.len == 0: - buy_mask[self.config.MARKET_N_OBS//2:] = 1 - buy_mask[-1] = 0 # do not allow noop action in this case return buy_mask agent = self.agent() @@ -440,12 +420,6 @@ def _make_buy_mask(self): enough_gold = market_items[:,ItemState.State.attr_name_to_col["listed_price"]] <= agent.gold buy_mask[:self.market.len] = not_mine & enough_gold - - # To prevent entropy collapse, allow agents to issue random give actions during early training - if sum(buy_mask[:self.market.len]) == 0: - buy_mask[self.config.MARKET_N_OBS//2:] = 1 - buy_mask[-1] = 0 # do not allow noop action in this case - return buy_mask def _existing_ammo_listings(self): diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py index f88dc669..33c2890a 100644 --- a/tests/action/test_ammo_use.py +++ b/tests/action/test_ammo_use.py @@ -31,7 +31,7 @@ def _assert_action_targets_zero(self, gym_obs): mask += np.sum(gym_obs["ActionTargets"][atn.__name__]["InventoryItem"]) # If MarketItem and InventoryTarget have no-action flags, these sum up to 5 # To prevent entropy collapse, GiveGold/Price and Buy/MarketItem masks are tweaked - self.assertEqual(mask, 99 + 511 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) + self.assertEqual(mask, 1 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET)) def test_spawn_immunity(self): env = self._setup_env(random_seed=RANDOM_SEED) From 41ac7318018d506ec3acabea017dc47accbda333 Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 31 Aug 2023 15:50:41 -0700 Subject: [PATCH 10/18] mask no-op in move, attack when there are valid options --- nmmo/core/observation.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index ac6cc041..93e405c0 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -213,15 +213,19 @@ def _make_action_targets(self): def _make_move_mask(self): if self.dummy_obs: mask = np.zeros(len(action.Direction.edges), dtype=np.int8) - mask[-1] = 1 # make sure the noop action is available + mask[-1] = 1 # for no-op return mask # pylint: disable=not-an-iterable mask = np.array([self.tile(*d.delta).material_id in material.Habitable.indices for d in action.Direction.edges], dtype=np.int8) - if sum(mask) == 1: # only the stay (no-op) is available + + # To prevent entropy collapse, do NOT allow no-op action + if sum(mask) <= 1: + # if only the stay (no-op) is possible, then allow all actions mask[:] = 1 - mask[-1] = 0 # do not allow no-op action + # Mask the no-op option, since there should be at least one allowed move + mask[-1] = 0 return mask @@ -261,7 +265,10 @@ def _make_attack_mask(self): # To prevent entropy collapse, allow agents to issue random give actions during early training if sum(attack_mask[:self.entities.len]) == 0: attack_mask[self.config.PLAYER_N_OBS//2:] = 1 - attack_mask[-1] = 0 # do not allow no-op action in this case + + # Mask the no-op option, since there should be at least one allowed move + # NOTE: this will make agents always attack if there is a valid target + attack_mask[-1] = 0 return attack_mask From 7aabad25ae1a35a471405efdf2a8ad3687d03744 Mon Sep 17 00:00:00 2001 From: kywch Date: Thu, 31 Aug 2023 23:10:42 -0700 Subject: [PATCH 11/18] removed attack mask jitter --- nmmo/core/observation.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 93e405c0..5e43322c 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -220,7 +220,6 @@ def _make_move_mask(self): mask = np.array([self.tile(*d.delta).material_id in material.Habitable.indices for d in action.Direction.edges], dtype=np.int8) - # To prevent entropy collapse, do NOT allow no-op action if sum(mask) <= 1: # if only the stay (no-op) is possible, then allow all actions mask[:] = 1 @@ -261,14 +260,10 @@ def _make_attack_mask(self): not_me = self.entities.ids != agent.id attack_mask[:self.entities.len] = within_range & not_me & no_spawn_immunity - - # To prevent entropy collapse, allow agents to issue random give actions during early training - if sum(attack_mask[:self.entities.len]) == 0: - attack_mask[self.config.PLAYER_N_OBS//2:] = 1 - - # Mask the no-op option, since there should be at least one allowed move - # NOTE: this will make agents always attack if there is a valid target - attack_mask[-1] = 0 + if sum(attack_mask[:self.entities.len]) > 0: + # Mask the no-op option, since there should be at least one allowed move + # NOTE: this will make agents always attack if there is a valid target + attack_mask[-1] = 0 return attack_mask From 04026a1a3fa71e8e560cf38e35a3b22671d2fcc9 Mon Sep 17 00:00:00 2001 From: kywch Date: Sat, 2 Sep 2023 03:03:37 -0700 Subject: [PATCH 12/18] put back move no-op --- nmmo/core/observation.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py index 5e43322c..9205ef98 100644 --- a/nmmo/core/observation.py +++ b/nmmo/core/observation.py @@ -217,17 +217,9 @@ def _make_move_mask(self): return mask # pylint: disable=not-an-iterable - mask = np.array([self.tile(*d.delta).material_id in material.Habitable.indices + return np.array([self.tile(*d.delta).material_id in material.Habitable.indices for d in action.Direction.edges], dtype=np.int8) - if sum(mask) <= 1: - # if only the stay (no-op) is possible, then allow all actions - mask[:] = 1 - # Mask the no-op option, since there should be at least one allowed move - mask[-1] = 0 - - return mask - def _make_attack_mask(self): # NOTE: Currently, all attacks have the same range # if we choose to make ranges different, the masks From 0769c4b2431a7aa5a09fd74e5c5e9c25d91f0c58 Mon Sep 17 00:00:00 2001 From: kywch Date: Sun, 3 Sep 2023 13:31:10 -0700 Subject: [PATCH 13/18] added exp to entity ds, tweaked exp threshold --- nmmo/core/config.py | 34 +++++++------ nmmo/entity/entity.py | 20 +++++++- nmmo/systems/experience.py | 13 ----- nmmo/systems/skill.py | 80 +++++++++++++++++++++++------ tests/systems/test_skill_level.py | 84 +++++++++++++++++++++++++++++++ 5 files changed, 188 insertions(+), 43 deletions(-) delete mode 100644 nmmo/systems/experience.py create mode 100644 tests/systems/test_skill_level.py diff --git a/nmmo/core/config.py b/nmmo/core/config.py index 9cc7cccf..0a7fc973 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -413,30 +413,36 @@ def COMBAT_DAMAGE_FORMULA(self, offense, defense, multiplier): '''Reach of attacks using the Mage skill''' +def default_exp_threshold(max_level): + import math + additional_exp_per_level = [round(90*math.sqrt(lvl)) + for lvl in range(1, max_level+1)] + return [sum(additional_exp_per_level[:lvl]) for lvl in range(max_level)] + class Progression: '''Progression Game System''' PROGRESSION_SYSTEM_ENABLED = True '''Game system flag''' - PROGRESSION_BASE_XP_SCALE = 1 - '''Base XP awarded for each skill usage -- multiplied by skill level''' - - PROGRESSION_COMBAT_XP_SCALE = 1 - '''Multiplier on top of XP_SCALE for Melee, Range, and Mage''' - - PROGRESSION_AMMUNITION_XP_SCALE = 1 - '''Multiplier on top of XP_SCALE for Prospecting, Carving, and Alchemy''' - - PROGRESSION_CONSUMABLE_XP_SCALE = 5 - '''Multiplier on top of XP_SCALE for Fishing and Herbalism''' - PROGRESSION_BASE_LEVEL = 1 '''Initial skill level''' PROGRESSION_LEVEL_MAX = 10 '''Max skill level''' + PROGRESSION_EXP_THRESHOLD = default_exp_threshold(PROGRESSION_LEVEL_MAX) + '''A list of experience thresholds for each level''' + + PROGRESSION_COMBAT_XP_SCALE = 3 + '''Additional XP for each attack for skills Melee, Range, and Mage''' + + PROGRESSION_AMMUNITION_XP_SCALE = 15 + '''Additional XP for each harvest for Prospecting, Carving, and Alchemy''' + + PROGRESSION_CONSUMABLE_XP_SCALE = 30 + '''Multiplier XP for each harvest for Fishing and Herbalism''' + PROGRESSION_MELEE_BASE_DAMAGE = 20 '''Base Melee attack damage''' @@ -585,13 +591,13 @@ class Profession: PROFESSION_HERB_CAPACITY = 1 '''Maximum number of harvests before an herb tile decays''' - PROFESSION_HERB_RESPAWN = 0.01 + PROFESSION_HERB_RESPAWN = 0.02 '''Probability that a harvested herb tile will regenerate each tick''' PROFESSION_FISH_CAPACITY = 1 '''Maximum number of harvests before a fish tile decays''' - PROFESSION_FISH_RESPAWN = 0.01 + PROFESSION_FISH_RESPAWN = 0.02 '''Probability that a harvested fish tile will regenerate each tick''' @staticmethod diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py index 2229c227..a6f044a0 100644 --- a/nmmo/entity/entity.py +++ b/nmmo/entity/entity.py @@ -32,17 +32,25 @@ "food", "water", - # Combat + # Combat Skills "melee_level", + "melee_exp", "range_level", + "range_exp", "mage_level", + "mage_exp", - # Skills + # Harvest Skills "fishing_level", + "fishing_exp", "herbalism_level", + "herbalism_exp", "prospecting_level", + "prospecting_exp", "carving_level", + "carving_exp", "alchemy_level", + "alchemy_exp", ]) EntityState.Limits = lambda config: { @@ -69,13 +77,21 @@ } if config.RESOURCE_SYSTEM_ENABLED else {}), **({ "melee_level": (0, config.PROGRESSION_LEVEL_MAX), + "melee_exp": (0, math.inf), "range_level": (0, config.PROGRESSION_LEVEL_MAX), + "range_exp": (0, math.inf), "mage_level": (0, config.PROGRESSION_LEVEL_MAX), + "mage_exp": (0, math.inf), "fishing_level": (0, config.PROGRESSION_LEVEL_MAX), + "fishing_exp": (0, math.inf), "herbalism_level": (0, config.PROGRESSION_LEVEL_MAX), + "herbalism_exp": (0, math.inf), "prospecting_level": (0, config.PROGRESSION_LEVEL_MAX), + "prospecting_exp": (0, math.inf), "carving_level": (0, config.PROGRESSION_LEVEL_MAX), + "carving_exp": (0, math.inf), "alchemy_level": (0, config.PROGRESSION_LEVEL_MAX), + "alchemy_exp": (0, math.inf), } if config.PROGRESSION_SYSTEM_ENABLED else {}), } diff --git a/nmmo/systems/experience.py b/nmmo/systems/experience.py deleted file mode 100644 index 25be029f..00000000 --- a/nmmo/systems/experience.py +++ /dev/null @@ -1,13 +0,0 @@ -import numpy as np - -class ExperienceCalculator: - def __init__(self, num_levels=15): - self.exp = np.array([0] + [10*2**i for i in range(num_levels)]) - - def exp_at_level(self, level): - return int(self.exp[level - 1]) - - def level_at_exp(self, exp): - if exp >= self.exp[-1]: - return len(self.exp) - return np.argmin(exp >= self.exp) diff --git a/nmmo/systems/skill.py b/nmmo/systems/skill.py index 2978bb42..eb555e11 100644 --- a/nmmo/systems/skill.py +++ b/nmmo/systems/skill.py @@ -6,17 +6,37 @@ from ordered_set import OrderedSet from nmmo.lib import material -from nmmo.systems import combat, experience +from nmmo.systems import combat from nmmo.lib.log import EventCode ### Infrastructure ### +class ExperienceCalculator: + def __init__(self, config): + if not config.PROGRESSION_SYSTEM_ENABLED: + return + self.config = config + self.exp_threshold = np.array(config.PROGRESSION_EXP_THRESHOLD) + assert len(self.exp_threshold) >= config.PROGRESSION_LEVEL_MAX,\ + "PROGRESSION_LEVEL_BY_EXP must have at least PROGRESSION_LEVEL_MAX entries" + self.max_exp = self.exp_threshold[self.config.PROGRESSION_LEVEL_MAX - 1] + + def exp_at_level(self, level): + level = min(max(level, self.config.PROGRESSION_BASE_LEVEL), + self.config.PROGRESSION_LEVEL_MAX) + return int(self.exp_threshold[level - 1]) + + def level_at_exp(self, exp): + if exp >= self.max_exp: + return self.config.PROGRESSION_LEVEL_MAX + return np.argmin(exp >= self.exp_threshold) + class SkillGroup: def __init__(self, realm, entity): self.config = realm.config self.realm = realm self.entity = entity - self.experience_calculator = experience.ExperienceCalculator() + self.experience_calculator = ExperienceCalculator(self.config) self.skills = OrderedSet() # critical for determinism def update(self): @@ -38,21 +58,17 @@ def __init__(self, skill_group: SkillGroup): self.experience_calculator = skill_group.experience_calculator self.skill_group = skill_group - self.exp = 0 - skill_group.skills.add(self) def packet(self): data = {} - - data['exp'] = self.exp + data['exp'] = self.exp.val data['level'] = self.level.val - return data def add_xp(self, xp): - self.exp += xp * self.config.PROGRESSION_BASE_XP_SCALE - new_level = int(self.experience_calculator.level_at_exp(self.exp)) + self.exp.increment(xp) + new_level = int(self.experience_calculator.level_at_exp(self.exp.val)) if new_level > self.level.val: self.level.update(new_level) @@ -64,7 +80,7 @@ def add_xp(self, xp): tags={"player_id": self.entity.ent_id}) def set_experience_by_level(self, level): - self.exp = self.experience_calculator.level_at_exp(level) + self.exp.update(self.experience_calculator.level_at_exp(level)) self.level.update(int(level)) @property @@ -72,6 +88,11 @@ def level(self): raise NotImplementedError(f"Skill {self.__class__.__name__} "\ "does not implement 'level' property") + @property + def exp(self): + raise NotImplementedError(f"Skill {self.__class__.__name__} "\ + "does not implement 'exp' property") + ### Skill Bases ### class CombatSkill(Skill): def update(self): @@ -224,7 +245,7 @@ def receive_damage(self, dmg): class Skills(Basic, Harvest, Combat): pass -### Skills ### +### Combat Skills ### class Melee(CombatSkill): SKILL_ID = 1 @@ -232,6 +253,10 @@ class Melee(CombatSkill): def level(self): return self.entity.melee_level + @property + def exp(self): + return self.entity.melee_exp + class Range(CombatSkill): SKILL_ID = 2 @@ -239,6 +264,10 @@ class Range(CombatSkill): def level(self): return self.entity.range_level + @property + def exp(self): + return self.entity.range_exp + class Mage(CombatSkill): SKILL_ID = 3 @@ -246,11 +275,16 @@ class Mage(CombatSkill): def level(self): return self.entity.mage_level + @property + def exp(self): + return self.entity.mage_exp + Melee.weakness = Mage Range.weakness = Melee Mage.weakness = Range -### Individual Skills ### + +### Basic/Harvest Skills ### class DummyLevel: def __init__(self, val=0): @@ -281,7 +315,6 @@ def update(self): self.realm.event_log.record(EventCode.DRINK_WATER, self.entity) - class Food(HarvestSkill): def update(self): config = self.config @@ -304,7 +337,6 @@ def update(self): self.realm.event_log.record(EventCode.EAT_FOOD, self.entity) - class Fishing(ConsumableSkill): SKILL_ID = 4 @@ -312,6 +344,10 @@ class Fishing(ConsumableSkill): def level(self): return self.entity.fishing_level + @property + def exp(self): + return self.entity.fishing_exp + def update(self): self.harvest_adjacent(material.Fish) @@ -322,6 +358,10 @@ class Herbalism(ConsumableSkill): def level(self): return self.entity.herbalism_level + @property + def exp(self): + return self.entity.herbalism_exp + def update(self): self.harvest(material.Herb) @@ -332,6 +372,10 @@ class Prospecting(AmmunitionSkill): def level(self): return self.entity.prospecting_level + @property + def exp(self): + return self.entity.prospecting_exp + def update(self): self.harvest(material.Ore) @@ -342,6 +386,10 @@ class Carving(AmmunitionSkill): def level(self): return self.entity.carving_level + @property + def exp(self): + return self.entity.carving_exp + def update(self,): self.harvest(material.Tree) @@ -352,5 +400,9 @@ class Alchemy(AmmunitionSkill): def level(self): return self.entity.alchemy_level + @property + def exp(self): + return self.entity.alchemy_exp + def update(self): self.harvest(material.Crystal) diff --git a/tests/systems/test_skill_level.py b/tests/systems/test_skill_level.py new file mode 100644 index 00000000..496ecb7a --- /dev/null +++ b/tests/systems/test_skill_level.py @@ -0,0 +1,84 @@ +import unittest + +import numpy as np + +import nmmo +import nmmo.systems.skill +from tests.testhelpers import ScriptedAgentTestConfig, ScriptedAgentTestEnv + + +class TestSkillLevel(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.config = ScriptedAgentTestConfig() + cls.config.PROGRESSION_EXP_THRESHOLD = [0, 10, 20, 30, 40, 50] + cls.config.PROGRESSION_LEVEL_MAX = len(cls.config.PROGRESSION_EXP_THRESHOLD) + cls.env = ScriptedAgentTestEnv(cls.config) + + def test_experience_calculator(self): + exp_calculator = nmmo.systems.skill.ExperienceCalculator(self.config) + + self.assertTrue(np.array_equal(self.config.PROGRESSION_EXP_THRESHOLD, + exp_calculator.exp_threshold)) + + for level in range(1, self.config.PROGRESSION_LEVEL_MAX + 1): + self.assertEqual(exp_calculator.level_at_exp(exp_calculator.exp_at_level(level)), level) + + self.assertEqual(exp_calculator.exp_at_level(-1), # invalid level + min(self.config.PROGRESSION_EXP_THRESHOLD)) + self.assertEqual(exp_calculator.exp_at_level(30), # level above the max + max(self.config.PROGRESSION_EXP_THRESHOLD)) + + self.assertEqual(exp_calculator.level_at_exp(0), 1) + self.assertEqual(exp_calculator.level_at_exp(5), 1) + self.assertEqual(exp_calculator.level_at_exp(45), 5) + self.assertEqual(exp_calculator.level_at_exp(50), 6) + self.assertEqual(exp_calculator.level_at_exp(100), 6) + + def test_add_xp(self): + self.env.reset() + player = self.env.realm.players[1] + + skill_list = ["melee", "range", "mage", + "fishing", "herbalism", "prospecting", "carving", "alchemy"] + + # check the initial levels and exp + for skill in skill_list: + self.assertEqual(getattr(player.skills, skill).level.val, 1) + self.assertEqual(getattr(player.skills, skill).exp.val, 0) + + # add 1 exp to melee, does NOT level up + player.skills.melee.add_xp(1) + for skill in skill_list: + if skill == "melee": + self.assertEqual(getattr(player.skills, skill).level.val, 1) + self.assertEqual(getattr(player.skills, skill).exp.val, 1) + else: + self.assertEqual(getattr(player.skills, skill).level.val, 1) + self.assertEqual(getattr(player.skills, skill).exp.val, 0) + + # add 30 exp to fishing, levels up to 3 + player.skills.fishing.add_xp(30) + for skill in skill_list: + if skill == "melee": + self.assertEqual(getattr(player.skills, skill).level.val, 1) + self.assertEqual(getattr(player.skills, skill).exp.val, 1) + elif skill == "fishing": + self.assertEqual(getattr(player.skills, skill).level.val, 4) + self.assertEqual(getattr(player.skills, skill).exp.val, 30) + else: + self.assertEqual(getattr(player.skills, skill).level.val, 1) + self.assertEqual(getattr(player.skills, skill).exp.val, 0) + + +if __name__ == '__main__': + unittest.main() + + # config = nmmo.config.Default() + # exp_calculator = nmmo.systems.skill.ExperienceCalculator(config) + + # print(exp_calculator.exp_threshold) + # print(exp_calculator.exp_at_level(10)) + # print(exp_calculator.level_at_exp(150)) # 2 + # print(exp_calculator.level_at_exp(300)) # 3 + # print(exp_calculator.level_at_exp(1000)) # 7 From dce5c0a420698739d0e9bf2a0a62c3927e7ae32f Mon Sep 17 00:00:00 2001 From: kywch Date: Sun, 3 Sep 2023 20:54:26 -0700 Subject: [PATCH 14/18] added resilient agents to help training --- nmmo/core/config.py | 39 +++++++++++++++++++---------------- nmmo/entity/entity.py | 12 ++++++++--- nmmo/entity/entity_manager.py | 17 +++++++++++---- nmmo/entity/player.py | 5 ++--- tests/test_determinism.py | 2 ++ 5 files changed, 47 insertions(+), 28 deletions(-) diff --git a/nmmo/core/config.py b/nmmo/core/config.py index 0a7fc973..08b60c15 100644 --- a/nmmo/core/config.py +++ b/nmmo/core/config.py @@ -147,7 +147,7 @@ def game_system_enabled(self, name) -> bool: PROVIDE_ACTION_TARGETS = True '''Provide action targets mask''' - PROVIDE_NOOP_ACTION_TARGET = False + PROVIDE_NOOP_ACTION_TARGET = True '''Provide a no-op option for each action''' PLAYERS = [Agent] @@ -159,7 +159,7 @@ def game_system_enabled(self, name) -> bool: CURRICULUM_FILE_PATH = None '''Path to a curriculum task file containing a list of task specs for training''' - TASK_EMBED_DIM = 1024 + TASK_EMBED_DIM = 4096 '''Dimensionality of task embeddings''' ALLOW_MULTI_TASKS_PER_AGENT = False @@ -188,7 +188,7 @@ def game_system_enabled(self, name) -> bool: PLAYER_N = None '''Maximum number of players spawnable in the environment''' - # TODO(kywch): CHECK if there could be 100+ entities within one's vision + # TODO: CHECK if there could be 100+ entities within one's vision PLAYER_N_OBS = 100 '''Number of distinct agent observations''' @@ -211,18 +211,6 @@ def PLAYER_VISION_DIAMETER(self): PLAYER_DEATH_FOG = None '''How long before spawning death fog. None for no death fog''' - - ############################################################################ - ### Agent Parameters - IMMORTAL = False - '''Debug parameter: prevents agents from dying except by void''' - - RESET_ON_DEATH = False - '''Whether to reset the environment whenever an agent dies''' - - BASE_HEALTH = 10 - '''Initial Constitution level and agent health''' - PLAYER_DEATH_FOG_SPEED = 1 '''Number of tiles per tick that the fog moves in''' @@ -241,6 +229,14 @@ def PLAYER_TEAM_SIZE(self): assert not self.PLAYER_N % len(self.PLAYERS) return self.PLAYER_N // len(self.PLAYERS) + ############################################################################ + ### Debug Parameters + IMMORTAL = False + '''Debug parameter: prevents agents from dying except by void''' + + RESET_ON_DEATH = False + '''Debug parameter: whether to reset the environment whenever an agent dies''' + ############################################################################ ### Map Parameters MAP_N = 1 @@ -358,10 +354,18 @@ class Resource: RESOURCE_DEHYDRATION_RATE = 10 '''Damage per tick without water''' - RESOURCE_FOILAGE_CAPACITY = 1 + RESOURCE_RESILIENT_POPULATION = 0 + '''Training helper: proportion of population that is resilient to starvation and dehydration + (e.g. 0.1 means 10% of the population is resilient to starvation and dehydration) + This is to make some agents live longer during training to sample from "advanced" agents.''' + + RESOURCE_DAMAGE_REDUCTION = 0.5 + '''Training helper: damage reduction from starvation and dehydration for resilient agents''' + + RESOURCE_FOILAGE_CAPACITY = 1 '''Maximum number of harvests before a foilage tile decays''' - RESOURCE_FOILAGE_RESPAWN = 0.025 + RESOURCE_FOILAGE_RESPAWN = 0.025 '''Probability that a harvested foilage tile will regenerate each tick''' RESOURCE_HARVEST_RESTORE_FRACTION = 1.0 @@ -529,7 +533,6 @@ def INVENTORY_N_OBS(self): return self.ITEM_INVENTORY_CAPACITY - class Equipment: '''Equipment Game System''' diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py index a6f044a0..41620f44 100644 --- a/nmmo/entity/entity.py +++ b/nmmo/entity/entity.py @@ -122,6 +122,7 @@ def __init__(self, ent, config): self.water = ent.water self.food = ent.food self.health_restore = 0 + self.resilient = False self.health.update(config.PLAYER_BASE_HEALTH) if config.RESOURCE_SYSTEM_ENABLED: @@ -144,10 +145,16 @@ def update(self): self.health.increment(restore) if self.food.empty: - self.health.decrement(self.config.RESOURCE_STARVATION_RATE) + starvation_damage = self.config.RESOURCE_STARVATION_RATE + if self.resilient: + starvation_damage *= self.config.RESOURCE_DAMAGE_REDUCTION + self.health.decrement(int(starvation_damage)) if self.water.empty: - self.health.decrement(self.config.RESOURCE_DEHYDRATION_RATE) + dehydration_damage = self.config.RESOURCE_DEHYDRATION_RATE + if self.resilient: + dehydration_damage *= self.config.RESOURCE_DAMAGE_REDUCTION + self.health.decrement(int(dehydration_damage)) # records both increase and decrease in health due to food and water self.health_restore = self.health.val - org_health @@ -273,7 +280,6 @@ def ent_id(self): def packet(self): data = {} - data['status'] = self.status.packet() data['history'] = self.history.packet() data['inventory'] = self.inventory.packet() diff --git a/nmmo/entity/entity_manager.py b/nmmo/entity/entity_manager.py index 9d24be2d..95636323 100644 --- a/nmmo/entity/entity_manager.py +++ b/nmmo/entity/entity_manager.py @@ -148,14 +148,23 @@ def reset(self, np_random): self._agent_loader = self.loader_class(self.config, self._np_random) self.spawned = set() - def spawn_individual(self, r, c, idx): + def spawn_individual(self, r, c, idx, resilient=False): agent = next(self._agent_loader) - agent = agent(self.config, idx) - player = Player(self.realm, (r, c), agent) + agent = agent(self.config, idx) + player = Player(self.realm, (r, c), agent, resilient) super().spawn(player) self.spawned.add(idx) def spawn(self): + # Check and assign the constant heal flag + resilient_flag = [False] * self.config.PLAYER_N + if self.config.RESOURCE_SYSTEM_ENABLED: + num_resilient = round(self.config.RESOURCE_RESILIENT_POPULATION * self.config.PLAYER_N) + for idx in range(num_resilient): + resilient_flag[idx] = self.config.RESOURCE_DAMAGE_REDUCTION > 0 + self._np_random.shuffle(resilient_flag) + + # Spawn the players idx = 0 while idx < self.config.PLAYER_N: idx += 1 @@ -167,4 +176,4 @@ def spawn(self): if idx in self.spawned: continue - self.spawn_individual(r, c, idx) + self.spawn_individual(r, c, idx, resilient_flag[idx-1]) diff --git a/nmmo/entity/player.py b/nmmo/entity/player.py index b635810d..73c9b4bc 100644 --- a/nmmo/entity/player.py +++ b/nmmo/entity/player.py @@ -4,11 +4,12 @@ # pylint: disable=no-member class Player(entity.Entity): - def __init__(self, realm, pos, agent): + def __init__(self, realm, pos, agent, resilient=False): super().__init__(realm, pos, agent.iden, agent.policy) self.agent = agent self.immortal = realm.config.IMMORTAL + self.resources.resilient = resilient # Scripted hooks self.target = None @@ -97,9 +98,7 @@ def equipment(self): def packet(self): data = super().packet() - data['entID'] = self.ent_id - data['resource'] = self.resources.packet() data['skills'] = self.skills.packet() data['inventory'] = self.inventory.packet() diff --git a/tests/test_determinism.py b/tests/test_determinism.py index e84b0bc4..fdfcfab9 100644 --- a/tests/test_determinism.py +++ b/tests/test_determinism.py @@ -63,9 +63,11 @@ def test_env_level_rng(self): config1 = ScriptedAgentTestConfig() setattr(config1, 'MAP_FORCE_GENERATION', True) setattr(config1, 'PATH_MAPS', 'maps/det1') + setattr(config1, 'RESOURCE_RESILIENT_POPULATION', 0.2) # uses np_random config2 = ScriptedAgentTestConfig() setattr(config2, 'MAP_FORCE_GENERATION', True) setattr(config2, 'PATH_MAPS', 'maps/det2') + setattr(config2, 'RESOURCE_RESILIENT_POPULATION', 0.2) # to create the same maps, seed must be provided env1 = ScriptedAgentTestEnv(config1, seed=RANDOM_SEED) From e771e895acc40f011cf2de51ef174a580c7a7cef Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 4 Sep 2023 00:33:17 -0700 Subject: [PATCH 15/18] added gain exp predicate --- nmmo/task/base_predicates.py | 11 +++++++++-- tests/task/test_predicates.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index b619b336..a9f0c468 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -83,8 +83,15 @@ def AttainSkill(gs: GameState, subject: Group, skill: Skill, level: int, num_age """True if the number of agents having skill level GE level is greather than or equal to num_agent """ - skill_level = getattr(subject,skill.__name__.lower() + '_level') - return norm(sum(skill_level >= level) / num_agent) + if level <= 1: + return 1.0 + skill_level = getattr(subject,skill.__name__.lower() + '_level') - 1 # base level is 1 + return norm(sum(skill_level) / (num_agent * (level-1))) + +def GainExperience(gs: GameState, subject: Group, skill: Skill, experience: int, num_agent: int): + """True if the experience gained for the skill is greater than or equal to experience.""" + skill_exp = getattr(subject,skill.__name__.lower() + '_exp') + return norm(sum(skill_exp) / (experience*num_agent)) def CountEvent(gs: GameState, subject: Group, event: str, N: int): """True if the number of events occured in subject corresponding diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py index 90bff5a9..b85715f2 100644 --- a/tests/task/test_predicates.py +++ b/tests/task/test_predicates.py @@ -388,6 +388,41 @@ def test_attain_skill(self): # DONE + def test_gain_experience(self): + attain_gain_exp_cls = make_predicate(bp.GainExperience) + + goal_exp = 5 + test_preds = [ # (Predicate, Team), the reward is 1 by default + (attain_gain_exp_cls(Group([1]), Skill.Melee, goal_exp, 1), ALL_AGENT), # False + (attain_gain_exp_cls(Group([2]), Skill.Melee, goal_exp, 1), ALL_AGENT), # False + (attain_gain_exp_cls(Group([1]), Skill.Range, goal_exp, 1), ALL_AGENT), # True + (attain_gain_exp_cls(Group([1,3]), Skill.Fishing, goal_exp, 1), ALL_AGENT), # True + (attain_gain_exp_cls(Group([1,2,3]), Skill.Carving, goal_exp, 3), ALL_AGENT), # False + (attain_gain_exp_cls(Group([2,4]), Skill.Carving, goal_exp, 2), ALL_AGENT)] # True + + env = self._get_taskenv(test_preds) + + # AttainSkill(Group([1]), Skill.Melee, goal_level, 1) is false + # AttainSkill(Group([2]), Skill.Melee, goal_level, 1) is false + env.realm.players[1].skills.melee.exp.update(goal_exp-1) + # AttainSkill(Group([1]), Skill.Range, goal_level, 1) is true + env.realm.players[1].skills.range.exp.update(goal_exp) + # AttainSkill(Group([1,3]), Skill.Fishing, goal_level, 1) is true + env.realm.players[1].skills.fishing.exp.update(goal_exp) + # AttainSkill(Group([1,2,3]), Skill.Carving, goal_level, 3) is false + env.realm.players[1].skills.carving.exp.update(goal_exp) + env.realm.players[2].skills.carving.exp.update(goal_exp) + # AttainSkill(Group([2,4]), Skill.Carving, goal_level, 2) is true + env.realm.players[4].skills.carving.exp.update(goal_exp+2) + env.obs = env._compute_observations() + + _, _, _, infos = env.step({}) + + true_task = [2, 3, 5] + self._check_result(env, test_preds, infos, true_task) + + # DONE + def test_inventory_space_ge_not(self): inv_space_ge_pred_cls = make_predicate(bp.InventorySpaceGE) From f35c9eb2d995cda72486aa22332e93eb1657b30a Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 4 Sep 2023 10:20:45 -0700 Subject: [PATCH 16/18] fixed error with custom pred arg --- nmmo/task/predicate_api.py | 71 ++----------------------------------- tests/task/test_task_api.py | 32 +++-------------- 2 files changed, 7 insertions(+), 96 deletions(-) diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py index 6381f2be..a1511a02 100644 --- a/nmmo/task/predicate_api.py +++ b/nmmo/task/predicate_api.py @@ -8,7 +8,7 @@ from nmmo.core.config import Config from nmmo.task.group import Group, union from nmmo.task.game_state import GameState -from nmmo.task.constraint import Constraint, InvalidConstraint, GroupConstraint +from nmmo.task.constraint import Constraint, GroupConstraint if TYPE_CHECKING: from nmmo.task.task_api import Task @@ -33,7 +33,7 @@ def __init__(self, self._args = args self._kwargs = kwargs - self._constraints = constraints + self._constraints = constraints # NOTE: not used self._config = None self._subject = subject @@ -46,9 +46,6 @@ def __call__(self, gs: GameState) -> float: Returns: progress: float bounded between [0, 1], 1 is considered to be true """ - if not self._config == gs.config: - # TODO(mark) should we make this explicitly called by environment - self._reset(gs.config) # Update views for group in self._groups: group.update(gs) @@ -61,57 +58,11 @@ def __call__(self, gs: GameState) -> float: cache[self.name] = progress return progress - def _reset(self, config: Config): - self._config = config - if not self.check(self._config): - raise InvalidConstraint() - def close(self): # To prevent memory leak, clear all refs to old game state for group in self._groups: group.clear_prev_state() - def check(self, config: Config): - """ Checks whether the predicate is valid - - A satisfiable predicate "makes sense" given a config - ie. Not trying to reach target off the map - """ - if not GroupConstraint().check(config, self._subject): - return False - for i, (name, constraint) in enumerate(self._constraints): - if constraint is None: - continue - if i < len(self._args): - if not constraint.check(config, self._args[i]): - return False - elif not constraint.check(config, self._kwargs[name]): - return False - return True - - def sample(self, config: Config, **overload): - """ Samples a concrete instance of a given task. - - Allows overloading of previous parameters. - """ - # Sample Constraint - nargs = [arg.sample(config) if isinstance(arg, Constraint) else arg - for arg in self._args] - nkwargs = {k : v.sample(config) if isinstance(v, Constraint) else v - for k,v in self._kwargs.items()} - for i, (name, _) in enumerate(self._constraints): - if i < len(nargs): - if name in nkwargs: - raise InvalidPredicateDefinition("Constraints should match arguments.") - nkwargs[name] = nargs[i] - else: - break - - for k, v in overload.items(): - nkwargs[k] = v - # Result - return self.__class__(**nkwargs) - @abstractmethod def _evaluate(self, gs: GameState) -> float: """ A mapping from a game state to the desirability/progress of that state. @@ -209,24 +160,8 @@ def make_predicate(fn: Callable) -> Type[Predicate]: class FunctionPredicate(Predicate): def __init__(self, *args, **kwargs) -> None: - constraints = [] self._signature = signature - args = list(args) - for i, param in enumerate(self._signature.parameters.values()): - if i == 0: - continue - # Calculate list of constraints - if isinstance(param.default, Constraint): - constraints.append((param.name,param.default)) - else: - constraints.append((param.name,None)) - # Insert default values from function definition - if not param.name in kwargs and i-1 >= len(args): - if param.default == inspect.Parameter.empty: - args.append(param.default) - else: - kwargs[param.name] = param.default - super().__init__(*args, **kwargs, constraints=constraints) + super().__init__(*args, **kwargs) self._args = args self._kwargs = kwargs self.name = self._make_name(fn.__name__, args, kwargs) diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py index 8f8322a4..1156c69e 100644 --- a/tests/task/test_task_api.py +++ b/tests/task/test_task_api.py @@ -9,9 +9,9 @@ from nmmo.task.task_api import Task, OngoingTask, HoldDurationTask from nmmo.task.task_spec import TaskSpec, make_task_from_spec from nmmo.task.group import Group -from nmmo.task.constraint import ScalarConstraint, GroupConstraint, AGENT_LIST_CONSTRAINT +from nmmo.task.constraint import ScalarConstraint from nmmo.task.base_predicates import ( - TickGE, CanSeeGroup, AllMembersWithinRange, StayAlive, HoardGold + TickGE, AllMembersWithinRange, StayAlive, HoardGold ) from nmmo.systems import item as Item @@ -144,30 +144,6 @@ def test_constraint(self): self.assertTrue(scalar.sample(mock_gs.config)<10) self.assertTrue(scalar.sample(mock_gs.config)>=-10) - def test_sample_predicate(self): - # pylint: disable=no-value-for-parameter,expression-not-assigned - # make predicate class from function - canseegrp_pred_cls = make_predicate(CanSeeGroup) - tickge_pred_cls = make_predicate(TickGE) - - # if the predicate class is instantiated without the subject, - mock_gs = MockGameState() - predicate = canseegrp_pred_cls(subject=GroupConstraint, target=AGENT_LIST_CONSTRAINT) &\ - tickge_pred_cls(subject=GroupConstraint, num_tick=ScalarConstraint) - self.assertEqual(predicate.name, - "(AND_(CanSeeGroup_subject:GroupConstraint_target:AgentListConstraint)_"+\ - "(TickGE_subject:GroupConstraint_num_tick:ScalarConstraint))") - - # this predicate cannot calculate progress becuase it has no subject - with self.assertRaises(AttributeError): - predicate(mock_gs) - - # this predicate supports sampling with valid arguments - config = nmmo.config.Default() - tickge_pred_cls().sample(config) - predicate.sample(config).name - - # DONE def test_task_api_with_predicate(self): # pylint: disable=no-value-for-parameter,no-member @@ -181,7 +157,7 @@ def test_task_api_with_predicate(self): self.assertEqual(predicate.get_source_code(), "def Fake(gs, subject, a,b,c):\n return False") self.assertEqual(predicate.get_signature(), ["gs", "subject", "a", "b", "c"]) - self.assertEqual(predicate.args, [group]) + self.assertEqual(predicate.args, tuple(group,)) self.assertDictEqual(predicate.kwargs, {"a": 1, "b": item, "c": action}) assignee = [1,2,3] # list of agent ids @@ -193,7 +169,7 @@ def test_task_api_with_predicate(self): self.assertEqual(task.get_source_code(), "def Fake(gs, subject, a,b,c):\n return False") self.assertEqual(task.get_signature(), ["gs", "subject", "a", "b", "c"]) - self.assertEqual(task.args, [group]) + self.assertEqual(task.args, tuple(group,)) self.assertDictEqual(task.kwargs, {"a": 1, "b": item, "c": action}) for agent_id in assignee: self.assertEqual(rewards[agent_id], 0) From 1b864fe8f0d653fc50e92727f05d16666201938b Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 4 Sep 2023 13:37:49 -0700 Subject: [PATCH 17/18] fix water exp error --- nmmo/systems/skill.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/nmmo/systems/skill.py b/nmmo/systems/skill.py index eb555e11..f0d60dde 100644 --- a/nmmo/systems/skill.py +++ b/nmmo/systems/skill.py @@ -101,11 +101,15 @@ def update(self): class NonCombatSkill(Skill): def __init__(self, skill_group: SkillGroup): super().__init__(skill_group) - self._level = DummyLevel() + self._dummy_value = DummyValue() # for water and food @property def level(self): - return self._level + return self._dummy_value + + @property + def exp(self): + return self._dummy_value class HarvestSkill(NonCombatSkill): def process_drops(self, matl, drop_table): @@ -286,7 +290,7 @@ def exp(self): ### Basic/Harvest Skills ### -class DummyLevel: +class DummyValue: def __init__(self, val=0): self.val = val From 6a92799f5232c179e804fd6095a391994f6b1203 Mon Sep 17 00:00:00 2001 From: kywch Date: Mon, 4 Sep 2023 15:36:33 -0700 Subject: [PATCH 18/18] correct skill type hint --- nmmo/task/base_predicates.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py index a9f0c468..cce7d97f 100644 --- a/nmmo/task/base_predicates.py +++ b/nmmo/task/base_predicates.py @@ -79,7 +79,8 @@ def DistanceTraveled(gs: GameState, subject: Group, dist: int): dists = utils.linf(list(zip(r,c)),[gs.spawn_pos[id_] for id_ in subject.entity.id]) return norm(dists.sum() / dist) -def AttainSkill(gs: GameState, subject: Group, skill: Skill, level: int, num_agent: int): +def AttainSkill(gs: GameState, subject: Group, + skill: type[Skill], level: int, num_agent: int): """True if the number of agents having skill level GE level is greather than or equal to num_agent """ @@ -88,7 +89,8 @@ def AttainSkill(gs: GameState, subject: Group, skill: Skill, level: int, num_age skill_level = getattr(subject,skill.__name__.lower() + '_level') - 1 # base level is 1 return norm(sum(skill_level) / (num_agent * (level-1))) -def GainExperience(gs: GameState, subject: Group, skill: Skill, experience: int, num_agent: int): +def GainExperience(gs: GameState, subject: Group, + skill: type[Skill], experience: int, num_agent: int): """True if the experience gained for the skill is greater than or equal to experience.""" skill_exp = getattr(subject,skill.__name__.lower() + '_exp') return norm(sum(skill_exp) / (experience*num_agent))