From c58809e3f2e2ae409fe8cfe5ef382c0a957b3d90 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Wed, 30 Aug 2023 12:29:56 -0700
Subject: [PATCH 01/18] add some noise to masks to prevent entropy collapse

---
 nmmo/core/observation.py | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
index d0e52c55..03c52a14 100644
--- a/nmmo/core/observation.py
+++ b/nmmo/core/observation.py
@@ -199,8 +199,8 @@ def _make_action_targets(self):
         "MarketItem": self._make_buy_mask()
       }
       masks["GiveGold"] = {
-        "Price": self._make_give_gold_mask(), # reusing Price
-        "Target": self._make_give_target_mask()
+        "Price": self._make_give_gold_mask(),  # reusing Price
+        "Target": self._make_give_gold_target_mask()
       }
 
     if self.config.COMMUNICATION_SYSTEM_ENABLED:
@@ -325,9 +325,32 @@ def _make_give_target_mask(self):
     give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8)
     if self.config.PROVIDE_NOOP_ACTION_TARGET:
       give_mask[-1] = 1
-    # empty inventory -- nothing to give
-    if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\
-        or self.dummy_obs or self.agent_in_combat:
+
+    if not self.config.ITEM_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat:
+      return give_mask
+
+    # To prevent entropy collapse, allow agents to issue random give actions during early training
+    if self.inventory.len == 0:
+      give_mask[self.config.PLAYER_N_OBS//2:] = 1
+      return give_mask
+
+    agent = self.agent()
+    entities_pos = self.entities.values[:,[EntityState.State.attr_name_to_col["row"],
+                                           EntityState.State.attr_name_to_col["col"]]]
+    same_tile = utils.linf(entities_pos, (agent.row, agent.col)) == 0
+    not_me = self.entities.ids != self.agent_id
+    player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0)
+
+    give_mask[:self.entities.len] = same_tile & player & not_me
+    return give_mask
+
+  def _make_give_gold_target_mask(self):
+    give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8)
+    if self.config.PROVIDE_NOOP_ACTION_TARGET:
+      give_mask[-1] = 1
+
+    if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\
+       or int(self.agent().gold) == 0:
       return give_mask
 
     agent = self.agent()
@@ -376,6 +399,11 @@ def _make_buy_mask(self):
     if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat:
       return buy_mask
 
+    # To prevent entropy collapse, allow agents to issue random buy actions during early training
+    if self.market.len == 0:  # nothing in the market
+      buy_mask[self.config.MARKET_N_OBS//10:] = 1
+      return buy_mask
+
     agent = self.agent()
     market_items = self.market.values
     not_mine = market_items[:,ItemState.State.attr_name_to_col["owner_id"]] != self.agent_id

From 64c53f040cac1ef350458c3ed820a113778dec69 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Wed, 30 Aug 2023 12:45:06 -0700
Subject: [PATCH 02/18] tweaked give gold target mask to prevent entropy
 collapse

---
 nmmo/core/observation.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
index 03c52a14..68dde302 100644
--- a/nmmo/core/observation.py
+++ b/nmmo/core/observation.py
@@ -361,6 +361,11 @@ def _make_give_gold_target_mask(self):
     player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0)
 
     give_mask[:self.entities.len] = same_tile & player & not_me
+
+    # To prevent entropy collapse, allow agents to issue random give actions during early training
+    if sum(give_mask[:self.entities.len]) == 0:
+      give_mask[self.config.PLAYER_N_OBS//2:] = 1
+
     return give_mask
 
   def _make_give_gold_mask(self):

From d3febd2c76e92d1827a4dd1b3591117409719b21 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Wed, 30 Aug 2023 13:21:06 -0700
Subject: [PATCH 03/18] tweaked action masks during combat

---
 nmmo/core/observation.py      | 17 +++++++++--------
 tests/action/test_ammo_use.py |  3 ++-
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
index 68dde302..4b445aa6 100644
--- a/nmmo/core/observation.py
+++ b/nmmo/core/observation.py
@@ -349,8 +349,10 @@ def _make_give_gold_target_mask(self):
     if self.config.PROVIDE_NOOP_ACTION_TARGET:
       give_mask[-1] = 1
 
+    # To prevent entropy collapse, allow agents to issue random give actions during early training
     if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\
        or int(self.agent().gold) == 0:
+      give_mask[self.config.PLAYER_N_OBS//2:] = 1
       return give_mask
 
     agent = self.agent()
@@ -371,12 +373,13 @@ def _make_give_gold_target_mask(self):
   def _make_give_gold_mask(self):
     mask = np.zeros(self.config.PRICE_N_OBS, dtype=np.int8)
     mask[0] = 1  # To avoid all-0 masks. If the agent has no gold, this action will be ignored.
-    if self.dummy_obs:
+    if self.dummy_obs or self.agent_in_combat:
+      # To prevent entropy collapse, allow agents to issue random give actions during early training
+      mask[:] = 1
       return mask
 
     gold = int(self.agent().gold)
-    if gold and not self.agent_in_combat:
-      mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1
+    mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1
 
     return mask
 
@@ -401,12 +404,10 @@ def _make_buy_mask(self):
     if self.config.PROVIDE_NOOP_ACTION_TARGET:
       buy_mask[-1] = 1
 
-    if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat:
-      return buy_mask
-
     # To prevent entropy collapse, allow agents to issue random buy actions during early training
-    if self.market.len == 0:  # nothing in the market
-      buy_mask[self.config.MARKET_N_OBS//10:] = 1
+    if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat \
+       or self.market.len == 0:
+      buy_mask[self.config.MARKET_N_OBS//2:] = 1
       return buy_mask
 
     agent = self.agent()
diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py
index 4cd35613..68aff5cc 100644
--- a/tests/action/test_ammo_use.py
+++ b/tests/action/test_ammo_use.py
@@ -30,7 +30,8 @@ def _assert_action_targets_zero(self, gym_obs):
     for atn in [action.Use, action.Give, action.Destroy, action.Sell]:
       mask += np.sum(gym_obs["ActionTargets"][atn.__name__]["InventoryItem"])
     # If MarketItem and InventoryTarget have no-action flags, these sum up to 5
-    self.assertEqual(mask, 1 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET))
+    # To prevent entropy collapse, GiveGold/Price and Buy/MarketItem masks are tweaked
+    self.assertEqual(mask, 99 + 512 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET))
 
   def test_spawn_immunity(self):
     env = self._setup_env(random_seed=RANDOM_SEED)

From ec2297049a856dee9e0ae434a874a22563f86a0b Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Wed, 30 Aug 2023 13:30:45 -0700
Subject: [PATCH 04/18] mask tweak

---
 nmmo/core/observation.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
index 4b445aa6..d3ff8013 100644
--- a/nmmo/core/observation.py
+++ b/nmmo/core/observation.py
@@ -326,11 +326,9 @@ def _make_give_target_mask(self):
     if self.config.PROVIDE_NOOP_ACTION_TARGET:
       give_mask[-1] = 1
 
-    if not self.config.ITEM_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat:
-      return give_mask
-
     # To prevent entropy collapse, allow agents to issue random give actions during early training
-    if self.inventory.len == 0:
+    if not self.config.ITEM_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\
+       or self.inventory.len == 0:
       give_mask[self.config.PLAYER_N_OBS//2:] = 1
       return give_mask
 
@@ -342,6 +340,11 @@ def _make_give_target_mask(self):
     player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0)
 
     give_mask[:self.entities.len] = same_tile & player & not_me
+
+    # To prevent entropy collapse, allow agents to issue random give actions during early training
+    if sum(give_mask[:self.entities.len]) == 0:
+      give_mask[self.config.PLAYER_N_OBS//2:] = 1
+
     return give_mask
 
   def _make_give_gold_target_mask(self):

From 3614560d9aad8dab5e7f70dd2023c4acb6190888 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Wed, 30 Aug 2023 17:41:00 -0700
Subject: [PATCH 05/18] disallow no-op for attack and buy to jitter

---
 nmmo/core/observation.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
index d3ff8013..769d3431 100644
--- a/nmmo/core/observation.py
+++ b/nmmo/core/observation.py
@@ -251,6 +251,12 @@ def _make_attack_mask(self):
     not_me = self.entities.ids != agent.id
 
     attack_mask[:self.entities.len] = within_range & not_me & no_spawn_immunity
+
+    # To prevent entropy collapse, allow agents to issue random give actions during early training
+    if sum(attack_mask[:self.entities.len]) == 0:
+      attack_mask[self.config.PLAYER_N_OBS//2:] = 1
+      attack_mask[-1] = 0  # do not allow noop action in this case
+
     return attack_mask
 
   def _make_use_mask(self):
@@ -411,6 +417,7 @@ def _make_buy_mask(self):
     if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat \
        or self.market.len == 0:
       buy_mask[self.config.MARKET_N_OBS//2:] = 1
+      buy_mask[-1] = 0  # do not allow noop action in this case
       return buy_mask
 
     agent = self.agent()

From 9851ed354ef0cea48d540d4272a92aedf9db6543 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Wed, 30 Aug 2023 17:48:01 -0700
Subject: [PATCH 06/18] fixed tests

---
 tests/action/test_ammo_use.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py
index 68aff5cc..f88dc669 100644
--- a/tests/action/test_ammo_use.py
+++ b/tests/action/test_ammo_use.py
@@ -31,7 +31,7 @@ def _assert_action_targets_zero(self, gym_obs):
       mask += np.sum(gym_obs["ActionTargets"][atn.__name__]["InventoryItem"])
     # If MarketItem and InventoryTarget have no-action flags, these sum up to 5
     # To prevent entropy collapse, GiveGold/Price and Buy/MarketItem masks are tweaked
-    self.assertEqual(mask, 99 + 512 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET))
+    self.assertEqual(mask, 99 + 511 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET))
 
   def test_spawn_immunity(self):
     env = self._setup_env(random_seed=RANDOM_SEED)

From eecee94721afb71c048d4a58dd446e360e8e17a5 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Wed, 30 Aug 2023 18:07:34 -0700
Subject: [PATCH 07/18] quick mask tweak

---
 nmmo/core/observation.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
index 769d3431..577542c9 100644
--- a/nmmo/core/observation.py
+++ b/nmmo/core/observation.py
@@ -434,6 +434,12 @@ def _make_buy_mask(self):
 
     enough_gold = market_items[:,ItemState.State.attr_name_to_col["listed_price"]] <= agent.gold
     buy_mask[:self.market.len] = not_mine & enough_gold
+
+    # To prevent entropy collapse, allow agents to issue random give actions during early training
+    if sum(buy_mask[:self.market.len]) == 0:
+      buy_mask[self.config.MARKET_N_OBS//2:] = 1
+      buy_mask[-1] = 0  # do not allow noop action in this case
+
     return buy_mask
 
   def _existing_ammo_listings(self):

From 240dc0ecf2b72edd08a838aaffcb179b00233563 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Wed, 30 Aug 2023 19:22:17 -0700
Subject: [PATCH 08/18] move mask tweak

---
 nmmo/core/observation.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
index 577542c9..3524ef90 100644
--- a/nmmo/core/observation.py
+++ b/nmmo/core/observation.py
@@ -215,9 +215,15 @@ def _make_move_mask(self):
       mask = np.zeros(len(action.Direction.edges), dtype=np.int8)
       mask[-1] = 1  # make sure the noop action is available
       return mask
+
     # pylint: disable=not-an-iterable
-    return np.array([self.tile(*d.delta).material_id in material.Habitable.indices
+    mask = np.array([self.tile(*d.delta).material_id in material.Habitable.indices
                      for d in action.Direction.edges], dtype=np.int8)
+    if sum(mask) == 1:  # only the stay is available
+      mask[:] = 1
+      mask[-1] = 0  # do not allow noop action
+
+    return mask
 
   def _make_attack_mask(self):
     # NOTE: Currently, all attacks have the same range

From 377ff221e46de893ac07255806296ee4bda01e02 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Wed, 30 Aug 2023 20:20:45 -0700
Subject: [PATCH 09/18] removed mask jitter other than attack and move

---
 nmmo/core/observation.py      | 32 +++-----------------------------
 tests/action/test_ammo_use.py |  2 +-
 2 files changed, 4 insertions(+), 30 deletions(-)

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
index 3524ef90..ac6cc041 100644
--- a/nmmo/core/observation.py
+++ b/nmmo/core/observation.py
@@ -219,9 +219,9 @@ def _make_move_mask(self):
     # pylint: disable=not-an-iterable
     mask = np.array([self.tile(*d.delta).material_id in material.Habitable.indices
                      for d in action.Direction.edges], dtype=np.int8)
-    if sum(mask) == 1:  # only the stay is available
+    if sum(mask) == 1:  # only the stay (no-op) is available
       mask[:] = 1
-      mask[-1] = 0  # do not allow noop action
+      mask[-1] = 0  # do not allow no-op action
 
     return mask
 
@@ -261,7 +261,7 @@ def _make_attack_mask(self):
     # To prevent entropy collapse, allow agents to issue random give actions during early training
     if sum(attack_mask[:self.entities.len]) == 0:
       attack_mask[self.config.PLAYER_N_OBS//2:] = 1
-      attack_mask[-1] = 0  # do not allow noop action in this case
+      attack_mask[-1] = 0  # do not allow no-op action in this case
 
     return attack_mask
 
@@ -338,10 +338,8 @@ def _make_give_target_mask(self):
     if self.config.PROVIDE_NOOP_ACTION_TARGET:
       give_mask[-1] = 1
 
-    # To prevent entropy collapse, allow agents to issue random give actions during early training
     if not self.config.ITEM_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\
        or self.inventory.len == 0:
-      give_mask[self.config.PLAYER_N_OBS//2:] = 1
       return give_mask
 
     agent = self.agent()
@@ -352,11 +350,6 @@ def _make_give_target_mask(self):
     player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0)
 
     give_mask[:self.entities.len] = same_tile & player & not_me
-
-    # To prevent entropy collapse, allow agents to issue random give actions during early training
-    if sum(give_mask[:self.entities.len]) == 0:
-      give_mask[self.config.PLAYER_N_OBS//2:] = 1
-
     return give_mask
 
   def _make_give_gold_target_mask(self):
@@ -364,10 +357,8 @@ def _make_give_gold_target_mask(self):
     if self.config.PROVIDE_NOOP_ACTION_TARGET:
       give_mask[-1] = 1
 
-    # To prevent entropy collapse, allow agents to issue random give actions during early training
     if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\
        or int(self.agent().gold) == 0:
-      give_mask[self.config.PLAYER_N_OBS//2:] = 1
       return give_mask
 
     agent = self.agent()
@@ -378,24 +369,16 @@ def _make_give_gold_target_mask(self):
     player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0)
 
     give_mask[:self.entities.len] = same_tile & player & not_me
-
-    # To prevent entropy collapse, allow agents to issue random give actions during early training
-    if sum(give_mask[:self.entities.len]) == 0:
-      give_mask[self.config.PLAYER_N_OBS//2:] = 1
-
     return give_mask
 
   def _make_give_gold_mask(self):
     mask = np.zeros(self.config.PRICE_N_OBS, dtype=np.int8)
     mask[0] = 1  # To avoid all-0 masks. If the agent has no gold, this action will be ignored.
     if self.dummy_obs or self.agent_in_combat:
-      # To prevent entropy collapse, allow agents to issue random give actions during early training
-      mask[:] = 1
       return mask
 
     gold = int(self.agent().gold)
     mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1
-
     return mask
 
   def _make_sell_mask(self):
@@ -419,11 +402,8 @@ def _make_buy_mask(self):
     if self.config.PROVIDE_NOOP_ACTION_TARGET:
       buy_mask[-1] = 1
 
-    # To prevent entropy collapse, allow agents to issue random buy actions during early training
     if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat \
        or self.market.len == 0:
-      buy_mask[self.config.MARKET_N_OBS//2:] = 1
-      buy_mask[-1] = 0  # do not allow noop action in this case
       return buy_mask
 
     agent = self.agent()
@@ -440,12 +420,6 @@ def _make_buy_mask(self):
 
     enough_gold = market_items[:,ItemState.State.attr_name_to_col["listed_price"]] <= agent.gold
     buy_mask[:self.market.len] = not_mine & enough_gold
-
-    # To prevent entropy collapse, allow agents to issue random give actions during early training
-    if sum(buy_mask[:self.market.len]) == 0:
-      buy_mask[self.config.MARKET_N_OBS//2:] = 1
-      buy_mask[-1] = 0  # do not allow noop action in this case
-
     return buy_mask
 
   def _existing_ammo_listings(self):
diff --git a/tests/action/test_ammo_use.py b/tests/action/test_ammo_use.py
index f88dc669..33c2890a 100644
--- a/tests/action/test_ammo_use.py
+++ b/tests/action/test_ammo_use.py
@@ -31,7 +31,7 @@ def _assert_action_targets_zero(self, gym_obs):
       mask += np.sum(gym_obs["ActionTargets"][atn.__name__]["InventoryItem"])
     # If MarketItem and InventoryTarget have no-action flags, these sum up to 5
     # To prevent entropy collapse, GiveGold/Price and Buy/MarketItem masks are tweaked
-    self.assertEqual(mask, 99 + 511 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET))
+    self.assertEqual(mask, 1 + 5*int(self.config.PROVIDE_NOOP_ACTION_TARGET))
 
   def test_spawn_immunity(self):
     env = self._setup_env(random_seed=RANDOM_SEED)

From 41ac7318018d506ec3acabea017dc47accbda333 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Thu, 31 Aug 2023 15:50:41 -0700
Subject: [PATCH 10/18] mask no-op in move, attack when there are valid options

---
 nmmo/core/observation.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
index ac6cc041..93e405c0 100644
--- a/nmmo/core/observation.py
+++ b/nmmo/core/observation.py
@@ -213,15 +213,19 @@ def _make_action_targets(self):
   def _make_move_mask(self):
     if self.dummy_obs:
       mask = np.zeros(len(action.Direction.edges), dtype=np.int8)
-      mask[-1] = 1  # make sure the noop action is available
+      mask[-1] = 1  # for no-op
       return mask
 
     # pylint: disable=not-an-iterable
     mask = np.array([self.tile(*d.delta).material_id in material.Habitable.indices
                      for d in action.Direction.edges], dtype=np.int8)
-    if sum(mask) == 1:  # only the stay (no-op) is available
+
+    # To prevent entropy collapse, do NOT allow no-op action
+    if sum(mask) <= 1:
+      # if only the stay (no-op) is possible, then allow all actions
       mask[:] = 1
-      mask[-1] = 0  # do not allow no-op action
+    # Mask the no-op option, since there should be at least one allowed move
+    mask[-1] = 0
 
     return mask
 
@@ -261,7 +265,10 @@ def _make_attack_mask(self):
     # To prevent entropy collapse, allow agents to issue random give actions during early training
     if sum(attack_mask[:self.entities.len]) == 0:
       attack_mask[self.config.PLAYER_N_OBS//2:] = 1
-      attack_mask[-1] = 0  # do not allow no-op action in this case
+
+    # Mask the no-op option, since there should be at least one allowed move
+    # NOTE: this will make agents always attack if there is a valid target
+    attack_mask[-1] = 0
 
     return attack_mask
 

From 7aabad25ae1a35a471405efdf2a8ad3687d03744 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Thu, 31 Aug 2023 23:10:42 -0700
Subject: [PATCH 11/18] removed attack mask jitter

---
 nmmo/core/observation.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
index 93e405c0..5e43322c 100644
--- a/nmmo/core/observation.py
+++ b/nmmo/core/observation.py
@@ -220,7 +220,6 @@ def _make_move_mask(self):
     mask = np.array([self.tile(*d.delta).material_id in material.Habitable.indices
                      for d in action.Direction.edges], dtype=np.int8)
 
-    # To prevent entropy collapse, do NOT allow no-op action
     if sum(mask) <= 1:
       # if only the stay (no-op) is possible, then allow all actions
       mask[:] = 1
@@ -261,14 +260,10 @@ def _make_attack_mask(self):
     not_me = self.entities.ids != agent.id
 
     attack_mask[:self.entities.len] = within_range & not_me & no_spawn_immunity
-
-    # To prevent entropy collapse, allow agents to issue random give actions during early training
-    if sum(attack_mask[:self.entities.len]) == 0:
-      attack_mask[self.config.PLAYER_N_OBS//2:] = 1
-
-    # Mask the no-op option, since there should be at least one allowed move
-    # NOTE: this will make agents always attack if there is a valid target
-    attack_mask[-1] = 0
+    if sum(attack_mask[:self.entities.len]) > 0:
+      # Mask the no-op option, since there should be at least one allowed move
+      # NOTE: this will make agents always attack if there is a valid target
+      attack_mask[-1] = 0
 
     return attack_mask
 

From 04026a1a3fa71e8e560cf38e35a3b22671d2fcc9 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Sat, 2 Sep 2023 03:03:37 -0700
Subject: [PATCH 12/18] put back move no-op

---
 nmmo/core/observation.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
index 5e43322c..9205ef98 100644
--- a/nmmo/core/observation.py
+++ b/nmmo/core/observation.py
@@ -217,17 +217,9 @@ def _make_move_mask(self):
       return mask
 
     # pylint: disable=not-an-iterable
-    mask = np.array([self.tile(*d.delta).material_id in material.Habitable.indices
+    return np.array([self.tile(*d.delta).material_id in material.Habitable.indices
                      for d in action.Direction.edges], dtype=np.int8)
 
-    if sum(mask) <= 1:
-      # if only the stay (no-op) is possible, then allow all actions
-      mask[:] = 1
-    # Mask the no-op option, since there should be at least one allowed move
-    mask[-1] = 0
-
-    return mask
-
   def _make_attack_mask(self):
     # NOTE: Currently, all attacks have the same range
     #   if we choose to make ranges different, the masks

From 0769c4b2431a7aa5a09fd74e5c5e9c25d91f0c58 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Sun, 3 Sep 2023 13:31:10 -0700
Subject: [PATCH 13/18] added exp to entity ds, tweaked exp threshold

---
 nmmo/core/config.py               | 34 +++++++------
 nmmo/entity/entity.py             | 20 +++++++-
 nmmo/systems/experience.py        | 13 -----
 nmmo/systems/skill.py             | 80 +++++++++++++++++++++++------
 tests/systems/test_skill_level.py | 84 +++++++++++++++++++++++++++++++
 5 files changed, 188 insertions(+), 43 deletions(-)
 delete mode 100644 nmmo/systems/experience.py
 create mode 100644 tests/systems/test_skill_level.py

diff --git a/nmmo/core/config.py b/nmmo/core/config.py
index 9cc7cccf..0a7fc973 100644
--- a/nmmo/core/config.py
+++ b/nmmo/core/config.py
@@ -413,30 +413,36 @@ def COMBAT_DAMAGE_FORMULA(self, offense, defense, multiplier):
   '''Reach of attacks using the Mage skill'''
 
 
+def default_exp_threshold(max_level):
+  import math
+  additional_exp_per_level = [round(90*math.sqrt(lvl))
+                              for lvl in range(1, max_level+1)]
+  return [sum(additional_exp_per_level[:lvl]) for lvl in range(max_level)]
+
 class Progression:
   '''Progression Game System'''
 
   PROGRESSION_SYSTEM_ENABLED        = True
   '''Game system flag'''
 
-  PROGRESSION_BASE_XP_SCALE         = 1
-  '''Base XP awarded for each skill usage -- multiplied by skill level'''
-
-  PROGRESSION_COMBAT_XP_SCALE       = 1
-  '''Multiplier on top of XP_SCALE for Melee, Range, and Mage'''
-
-  PROGRESSION_AMMUNITION_XP_SCALE   = 1
-  '''Multiplier on top of XP_SCALE for Prospecting, Carving, and Alchemy'''
-
-  PROGRESSION_CONSUMABLE_XP_SCALE   = 5
-  '''Multiplier on top of XP_SCALE for Fishing and Herbalism'''
-
   PROGRESSION_BASE_LEVEL            = 1
   '''Initial skill level'''
 
   PROGRESSION_LEVEL_MAX             = 10
   '''Max skill level'''
 
+  PROGRESSION_EXP_THRESHOLD         = default_exp_threshold(PROGRESSION_LEVEL_MAX)
+  '''A list of experience thresholds for each level'''
+
+  PROGRESSION_COMBAT_XP_SCALE       = 3
+  '''Additional XP for each attack for skills Melee, Range, and Mage'''
+
+  PROGRESSION_AMMUNITION_XP_SCALE   = 15
+  '''Additional XP for each harvest for Prospecting, Carving, and Alchemy'''
+
+  PROGRESSION_CONSUMABLE_XP_SCALE   = 30
+  '''Multiplier XP for each harvest for Fishing and Herbalism'''
+
   PROGRESSION_MELEE_BASE_DAMAGE     = 20
   '''Base Melee attack damage'''
 
@@ -585,13 +591,13 @@ class Profession:
   PROFESSION_HERB_CAPACITY            = 1
   '''Maximum number of harvests before an herb tile decays'''
 
-  PROFESSION_HERB_RESPAWN             = 0.01
+  PROFESSION_HERB_RESPAWN             = 0.02
   '''Probability that a harvested herb tile will regenerate each tick'''
 
   PROFESSION_FISH_CAPACITY            = 1
   '''Maximum number of harvests before a fish tile decays'''
 
-  PROFESSION_FISH_RESPAWN             = 0.01
+  PROFESSION_FISH_RESPAWN             = 0.02
   '''Probability that a harvested fish tile will regenerate each tick'''
 
   @staticmethod
diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py
index 2229c227..a6f044a0 100644
--- a/nmmo/entity/entity.py
+++ b/nmmo/entity/entity.py
@@ -32,17 +32,25 @@
     "food",
     "water",
 
-    # Combat
+    # Combat Skills
     "melee_level",
+    "melee_exp",
     "range_level",
+    "range_exp",
     "mage_level",
+    "mage_exp",
 
-    # Skills
+    # Harvest Skills
     "fishing_level",
+    "fishing_exp",
     "herbalism_level",
+    "herbalism_exp",
     "prospecting_level",
+    "prospecting_exp",
     "carving_level",
+    "carving_exp",
     "alchemy_level",
+    "alchemy_exp",
   ])
 
 EntityState.Limits = lambda config: {
@@ -69,13 +77,21 @@
   } if config.RESOURCE_SYSTEM_ENABLED else {}),
   **({
     "melee_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "melee_exp": (0, math.inf),
     "range_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "range_exp": (0, math.inf),
     "mage_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "mage_exp": (0, math.inf),
     "fishing_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "fishing_exp": (0, math.inf),
     "herbalism_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "herbalism_exp": (0, math.inf),
     "prospecting_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "prospecting_exp": (0, math.inf),
     "carving_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "carving_exp": (0, math.inf),
     "alchemy_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "alchemy_exp": (0, math.inf),
   } if config.PROGRESSION_SYSTEM_ENABLED else {}),
 }
 
diff --git a/nmmo/systems/experience.py b/nmmo/systems/experience.py
deleted file mode 100644
index 25be029f..00000000
--- a/nmmo/systems/experience.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import numpy as np
-
-class ExperienceCalculator:
-  def __init__(self, num_levels=15):
-    self.exp = np.array([0] + [10*2**i for i in range(num_levels)])
-
-  def exp_at_level(self, level):
-    return int(self.exp[level - 1])
-
-  def level_at_exp(self, exp):
-    if exp >= self.exp[-1]:
-      return len(self.exp)
-    return np.argmin(exp >= self.exp)
diff --git a/nmmo/systems/skill.py b/nmmo/systems/skill.py
index 2978bb42..eb555e11 100644
--- a/nmmo/systems/skill.py
+++ b/nmmo/systems/skill.py
@@ -6,17 +6,37 @@
 from ordered_set import OrderedSet
 
 from nmmo.lib import material
-from nmmo.systems import combat, experience
+from nmmo.systems import combat
 from nmmo.lib.log import EventCode
 
 ### Infrastructure ###
+class ExperienceCalculator:
+  def __init__(self, config):
+    if not config.PROGRESSION_SYSTEM_ENABLED:
+      return
+    self.config = config
+    self.exp_threshold = np.array(config.PROGRESSION_EXP_THRESHOLD)
+    assert len(self.exp_threshold) >= config.PROGRESSION_LEVEL_MAX,\
+      "PROGRESSION_LEVEL_BY_EXP must have at least PROGRESSION_LEVEL_MAX entries"
+    self.max_exp = self.exp_threshold[self.config.PROGRESSION_LEVEL_MAX - 1]
+
+  def exp_at_level(self, level):
+    level = min(max(level, self.config.PROGRESSION_BASE_LEVEL),
+                self.config.PROGRESSION_LEVEL_MAX)
+    return int(self.exp_threshold[level - 1])
+
+  def level_at_exp(self, exp):
+    if exp >= self.max_exp:
+      return self.config.PROGRESSION_LEVEL_MAX
+    return np.argmin(exp >= self.exp_threshold)
+
 class SkillGroup:
   def __init__(self, realm, entity):
     self.config  = realm.config
     self.realm   = realm
     self.entity = entity
 
-    self.experience_calculator = experience.ExperienceCalculator()
+    self.experience_calculator = ExperienceCalculator(self.config)
     self.skills  = OrderedSet() # critical for determinism
 
   def update(self):
@@ -38,21 +58,17 @@ def __init__(self, skill_group: SkillGroup):
 
     self.experience_calculator = skill_group.experience_calculator
     self.skill_group = skill_group
-    self.exp = 0
-
     skill_group.skills.add(self)
 
   def packet(self):
     data = {}
-
-    data['exp']   = self.exp
+    data['exp']   = self.exp.val
     data['level'] = self.level.val
-
     return data
 
   def add_xp(self, xp):
-    self.exp += xp * self.config.PROGRESSION_BASE_XP_SCALE
-    new_level = int(self.experience_calculator.level_at_exp(self.exp))
+    self.exp.increment(xp)
+    new_level = int(self.experience_calculator.level_at_exp(self.exp.val))
 
     if new_level > self.level.val:
       self.level.update(new_level)
@@ -64,7 +80,7 @@ def add_xp(self, xp):
         tags={"player_id": self.entity.ent_id})
 
   def set_experience_by_level(self, level):
-    self.exp = self.experience_calculator.level_at_exp(level)
+    self.exp.update(self.experience_calculator.level_at_exp(level))
     self.level.update(int(level))
 
   @property
@@ -72,6 +88,11 @@ def level(self):
     raise NotImplementedError(f"Skill {self.__class__.__name__} "\
       "does not implement 'level' property")
 
+  @property
+  def exp(self):
+    raise NotImplementedError(f"Skill {self.__class__.__name__} "\
+      "does not implement 'exp' property")
+
 ### Skill Bases ###
 class CombatSkill(Skill):
   def update(self):
@@ -224,7 +245,7 @@ def receive_damage(self, dmg):
 class Skills(Basic, Harvest, Combat):
   pass
 
-### Skills ###
+### Combat Skills ###
 class Melee(CombatSkill):
   SKILL_ID = 1
 
@@ -232,6 +253,10 @@ class Melee(CombatSkill):
   def level(self):
     return self.entity.melee_level
 
+  @property
+  def exp(self):
+    return self.entity.melee_exp
+
 class Range(CombatSkill):
   SKILL_ID = 2
 
@@ -239,6 +264,10 @@ class Range(CombatSkill):
   def level(self):
     return self.entity.range_level
 
+  @property
+  def exp(self):
+    return self.entity.range_exp
+
 class Mage(CombatSkill):
   SKILL_ID = 3
 
@@ -246,11 +275,16 @@ class Mage(CombatSkill):
   def level(self):
     return self.entity.mage_level
 
+  @property
+  def exp(self):
+    return self.entity.mage_exp
+
 Melee.weakness = Mage
 Range.weakness = Melee
 Mage.weakness  = Range
 
-### Individual Skills ###
+
+### Basic/Harvest Skills ###
 
 class DummyLevel:
   def __init__(self, val=0):
@@ -281,7 +315,6 @@ def update(self):
 
     self.realm.event_log.record(EventCode.DRINK_WATER, self.entity)
 
-
 class Food(HarvestSkill):
   def update(self):
     config = self.config
@@ -304,7 +337,6 @@ def update(self):
 
     self.realm.event_log.record(EventCode.EAT_FOOD, self.entity)
 
-
 class Fishing(ConsumableSkill):
   SKILL_ID = 4
 
@@ -312,6 +344,10 @@ class Fishing(ConsumableSkill):
   def level(self):
     return self.entity.fishing_level
 
+  @property
+  def exp(self):
+    return self.entity.fishing_exp
+
   def update(self):
     self.harvest_adjacent(material.Fish)
 
@@ -322,6 +358,10 @@ class Herbalism(ConsumableSkill):
   def level(self):
     return self.entity.herbalism_level
 
+  @property
+  def exp(self):
+    return self.entity.herbalism_exp
+
   def update(self):
     self.harvest(material.Herb)
 
@@ -332,6 +372,10 @@ class Prospecting(AmmunitionSkill):
   def level(self):
     return self.entity.prospecting_level
 
+  @property
+  def exp(self):
+    return self.entity.prospecting_exp
+
   def update(self):
     self.harvest(material.Ore)
 
@@ -342,6 +386,10 @@ class Carving(AmmunitionSkill):
   def level(self):
     return self.entity.carving_level
 
+  @property
+  def exp(self):
+    return self.entity.carving_exp
+
   def update(self,):
     self.harvest(material.Tree)
 
@@ -352,5 +400,9 @@ class Alchemy(AmmunitionSkill):
   def level(self):
     return self.entity.alchemy_level
 
+  @property
+  def exp(self):
+    return self.entity.alchemy_exp
+
   def update(self):
     self.harvest(material.Crystal)
diff --git a/tests/systems/test_skill_level.py b/tests/systems/test_skill_level.py
new file mode 100644
index 00000000..496ecb7a
--- /dev/null
+++ b/tests/systems/test_skill_level.py
@@ -0,0 +1,84 @@
+import unittest
+
+import numpy as np
+
+import nmmo
+import nmmo.systems.skill
+from tests.testhelpers import ScriptedAgentTestConfig, ScriptedAgentTestEnv
+
+
+class TestSkillLevel(unittest.TestCase):
+  @classmethod
+  def setUpClass(cls):
+    cls.config = ScriptedAgentTestConfig()
+    cls.config.PROGRESSION_EXP_THRESHOLD = [0, 10, 20, 30, 40, 50]
+    cls.config.PROGRESSION_LEVEL_MAX = len(cls.config.PROGRESSION_EXP_THRESHOLD)
+    cls.env = ScriptedAgentTestEnv(cls.config)
+
+  def test_experience_calculator(self):
+    exp_calculator = nmmo.systems.skill.ExperienceCalculator(self.config)
+
+    self.assertTrue(np.array_equal(self.config.PROGRESSION_EXP_THRESHOLD,
+                                   exp_calculator.exp_threshold))
+
+    for level in range(1, self.config.PROGRESSION_LEVEL_MAX + 1):
+      self.assertEqual(exp_calculator.level_at_exp(exp_calculator.exp_at_level(level)), level)
+
+    self.assertEqual(exp_calculator.exp_at_level(-1),  # invalid level
+                     min(self.config.PROGRESSION_EXP_THRESHOLD))
+    self.assertEqual(exp_calculator.exp_at_level(30),  # level above the max
+                     max(self.config.PROGRESSION_EXP_THRESHOLD))
+
+    self.assertEqual(exp_calculator.level_at_exp(0), 1)
+    self.assertEqual(exp_calculator.level_at_exp(5), 1)
+    self.assertEqual(exp_calculator.level_at_exp(45), 5)
+    self.assertEqual(exp_calculator.level_at_exp(50), 6)
+    self.assertEqual(exp_calculator.level_at_exp(100), 6)
+
+  def test_add_xp(self):
+    self.env.reset()
+    player = self.env.realm.players[1]
+
+    skill_list = ["melee", "range", "mage",
+                  "fishing", "herbalism", "prospecting", "carving", "alchemy"]
+
+    # check the initial levels and exp
+    for skill in skill_list:
+      self.assertEqual(getattr(player.skills, skill).level.val, 1)
+      self.assertEqual(getattr(player.skills, skill).exp.val, 0)
+
+    # add 1 exp to melee, does NOT level up
+    player.skills.melee.add_xp(1)
+    for skill in skill_list:
+      if skill == "melee":
+        self.assertEqual(getattr(player.skills, skill).level.val, 1)
+        self.assertEqual(getattr(player.skills, skill).exp.val, 1)
+      else:
+        self.assertEqual(getattr(player.skills, skill).level.val, 1)
+        self.assertEqual(getattr(player.skills, skill).exp.val, 0)
+
+    # add 30 exp to fishing, levels up to 3
+    player.skills.fishing.add_xp(30)
+    for skill in skill_list:
+      if skill == "melee":
+        self.assertEqual(getattr(player.skills, skill).level.val, 1)
+        self.assertEqual(getattr(player.skills, skill).exp.val, 1)
+      elif skill == "fishing":
+        self.assertEqual(getattr(player.skills, skill).level.val, 4)
+        self.assertEqual(getattr(player.skills, skill).exp.val, 30)
+      else:
+        self.assertEqual(getattr(player.skills, skill).level.val, 1)
+        self.assertEqual(getattr(player.skills, skill).exp.val, 0)
+
+
+if __name__ == '__main__':
+  unittest.main()
+
+  # config = nmmo.config.Default()
+  # exp_calculator = nmmo.systems.skill.ExperienceCalculator(config)
+
+  # print(exp_calculator.exp_threshold)
+  # print(exp_calculator.exp_at_level(10))
+  # print(exp_calculator.level_at_exp(150)) # 2
+  # print(exp_calculator.level_at_exp(300)) # 3
+  # print(exp_calculator.level_at_exp(1000)) # 7

From dce5c0a420698739d0e9bf2a0a62c3927e7ae32f Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Sun, 3 Sep 2023 20:54:26 -0700
Subject: [PATCH 14/18] added resilient agents to help training

---
 nmmo/core/config.py           | 39 +++++++++++++++++++----------------
 nmmo/entity/entity.py         | 12 ++++++++---
 nmmo/entity/entity_manager.py | 17 +++++++++++----
 nmmo/entity/player.py         |  5 ++---
 tests/test_determinism.py     |  2 ++
 5 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/nmmo/core/config.py b/nmmo/core/config.py
index 0a7fc973..08b60c15 100644
--- a/nmmo/core/config.py
+++ b/nmmo/core/config.py
@@ -147,7 +147,7 @@ def game_system_enabled(self, name) -> bool:
   PROVIDE_ACTION_TARGETS       = True
   '''Provide action targets mask'''
 
-  PROVIDE_NOOP_ACTION_TARGET         = False
+  PROVIDE_NOOP_ACTION_TARGET   = True
   '''Provide a no-op option for each action'''
 
   PLAYERS                      = [Agent]
@@ -159,7 +159,7 @@ def game_system_enabled(self, name) -> bool:
   CURRICULUM_FILE_PATH = None
   '''Path to a curriculum task file containing a list of task specs for training'''
 
-  TASK_EMBED_DIM = 1024
+  TASK_EMBED_DIM = 4096
   '''Dimensionality of task embeddings'''
 
   ALLOW_MULTI_TASKS_PER_AGENT = False
@@ -188,7 +188,7 @@ def game_system_enabled(self, name) -> bool:
   PLAYER_N                     = None
   '''Maximum number of players spawnable in the environment'''
 
-  # TODO(kywch): CHECK if there could be 100+ entities within one's vision
+  # TODO: CHECK if there could be 100+ entities within one's vision
   PLAYER_N_OBS                 = 100
   '''Number of distinct agent observations'''
 
@@ -211,18 +211,6 @@ def PLAYER_VISION_DIAMETER(self):
   PLAYER_DEATH_FOG             = None
   '''How long before spawning death fog. None for no death fog'''
 
-
-  ############################################################################
-  ### Agent Parameters
-  IMMORTAL = False
-  '''Debug parameter: prevents agents from dying except by void'''
-
-  RESET_ON_DEATH = False
-  '''Whether to reset the environment whenever an agent dies'''
-
-  BASE_HEALTH                = 10
-  '''Initial Constitution level and agent health'''
-
   PLAYER_DEATH_FOG_SPEED       = 1
   '''Number of tiles per tick that the fog moves in'''
 
@@ -241,6 +229,14 @@ def PLAYER_TEAM_SIZE(self):
       assert not self.PLAYER_N % len(self.PLAYERS)
     return self.PLAYER_N // len(self.PLAYERS)
 
+  ############################################################################
+  ### Debug Parameters
+  IMMORTAL = False
+  '''Debug parameter: prevents agents from dying except by void'''
+
+  RESET_ON_DEATH = False
+  '''Debug parameter: whether to reset the environment whenever an agent dies'''
+
   ############################################################################
   ### Map Parameters
   MAP_N                        = 1
@@ -358,10 +354,18 @@ class Resource:
   RESOURCE_DEHYDRATION_RATE           = 10
   '''Damage per tick without water'''
 
-  RESOURCE_FOILAGE_CAPACITY            = 1
+  RESOURCE_RESILIENT_POPULATION       = 0
+  '''Training helper: proportion of population that is resilient to starvation and dehydration
+     (e.g. 0.1 means 10% of the population is resilient to starvation and dehydration)
+     This is to make some agents live longer during training to sample from "advanced" agents.'''
+
+  RESOURCE_DAMAGE_REDUCTION           = 0.5
+  '''Training helper: damage reduction from starvation and dehydration for resilient agents'''
+
+  RESOURCE_FOILAGE_CAPACITY           = 1
   '''Maximum number of harvests before a foilage tile decays'''
 
-  RESOURCE_FOILAGE_RESPAWN             = 0.025
+  RESOURCE_FOILAGE_RESPAWN            = 0.025
   '''Probability that a harvested foilage tile will regenerate each tick'''
 
   RESOURCE_HARVEST_RESTORE_FRACTION   = 1.0
@@ -529,7 +533,6 @@ def INVENTORY_N_OBS(self):
     return self.ITEM_INVENTORY_CAPACITY
 
 
-
 class Equipment:
   '''Equipment Game System'''
 
diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py
index a6f044a0..41620f44 100644
--- a/nmmo/entity/entity.py
+++ b/nmmo/entity/entity.py
@@ -122,6 +122,7 @@ def __init__(self, ent, config):
     self.water = ent.water
     self.food = ent.food
     self.health_restore = 0
+    self.resilient = False
 
     self.health.update(config.PLAYER_BASE_HEALTH)
     if config.RESOURCE_SYSTEM_ENABLED:
@@ -144,10 +145,16 @@ def update(self):
       self.health.increment(restore)
 
     if self.food.empty:
-      self.health.decrement(self.config.RESOURCE_STARVATION_RATE)
+      starvation_damage = self.config.RESOURCE_STARVATION_RATE
+      if self.resilient:
+        starvation_damage *= self.config.RESOURCE_DAMAGE_REDUCTION
+      self.health.decrement(int(starvation_damage))
 
     if self.water.empty:
-      self.health.decrement(self.config.RESOURCE_DEHYDRATION_RATE)
+      dehydration_damage = self.config.RESOURCE_DEHYDRATION_RATE
+      if self.resilient:
+        dehydration_damage *= self.config.RESOURCE_DAMAGE_REDUCTION
+      self.health.decrement(int(dehydration_damage))
 
     # records both increase and decrease in health due to food and water
     self.health_restore = self.health.val - org_health
@@ -273,7 +280,6 @@ def ent_id(self):
 
   def packet(self):
     data = {}
-
     data['status'] = self.status.packet()
     data['history'] = self.history.packet()
     data['inventory'] = self.inventory.packet()
diff --git a/nmmo/entity/entity_manager.py b/nmmo/entity/entity_manager.py
index 9d24be2d..95636323 100644
--- a/nmmo/entity/entity_manager.py
+++ b/nmmo/entity/entity_manager.py
@@ -148,14 +148,23 @@ def reset(self, np_random):
     self._agent_loader = self.loader_class(self.config, self._np_random)
     self.spawned = set()
 
-  def spawn_individual(self, r, c, idx):
+  def spawn_individual(self, r, c, idx, resilient=False):
     agent = next(self._agent_loader)
-    agent      = agent(self.config, idx)
-    player     = Player(self.realm, (r, c), agent)
+    agent = agent(self.config, idx)
+    player = Player(self.realm, (r, c), agent, resilient)
     super().spawn(player)
     self.spawned.add(idx)
 
   def spawn(self):
+    # Check and assign the constant heal flag
+    resilient_flag = [False] * self.config.PLAYER_N
+    if self.config.RESOURCE_SYSTEM_ENABLED:
+      num_resilient = round(self.config.RESOURCE_RESILIENT_POPULATION * self.config.PLAYER_N)
+      for idx in range(num_resilient):
+        resilient_flag[idx] = self.config.RESOURCE_DAMAGE_REDUCTION > 0
+      self._np_random.shuffle(resilient_flag)
+
+    # Spawn the players
     idx = 0
     while idx < self.config.PLAYER_N:
       idx += 1
@@ -167,4 +176,4 @@ def spawn(self):
       if idx in self.spawned:
         continue
 
-      self.spawn_individual(r, c, idx)
+      self.spawn_individual(r, c, idx, resilient_flag[idx-1])
diff --git a/nmmo/entity/player.py b/nmmo/entity/player.py
index b635810d..73c9b4bc 100644
--- a/nmmo/entity/player.py
+++ b/nmmo/entity/player.py
@@ -4,11 +4,12 @@
 
 # pylint: disable=no-member
 class Player(entity.Entity):
-  def __init__(self, realm, pos, agent):
+  def __init__(self, realm, pos, agent, resilient=False):
     super().__init__(realm, pos, agent.iden, agent.policy)
 
     self.agent    = agent
     self.immortal = realm.config.IMMORTAL
+    self.resources.resilient = resilient
 
     # Scripted hooks
     self.target = None
@@ -97,9 +98,7 @@ def equipment(self):
 
   def packet(self):
     data = super().packet()
-
     data['entID']     = self.ent_id
-
     data['resource']  = self.resources.packet()
     data['skills']    = self.skills.packet()
     data['inventory'] = self.inventory.packet()
diff --git a/tests/test_determinism.py b/tests/test_determinism.py
index e84b0bc4..fdfcfab9 100644
--- a/tests/test_determinism.py
+++ b/tests/test_determinism.py
@@ -63,9 +63,11 @@ def test_env_level_rng(self):
     config1 = ScriptedAgentTestConfig()
     setattr(config1, 'MAP_FORCE_GENERATION', True)
     setattr(config1, 'PATH_MAPS', 'maps/det1')
+    setattr(config1, 'RESOURCE_RESILIENT_POPULATION', 0.2)  # uses np_random
     config2 = ScriptedAgentTestConfig()
     setattr(config2, 'MAP_FORCE_GENERATION', True)
     setattr(config2, 'PATH_MAPS', 'maps/det2')
+    setattr(config2, 'RESOURCE_RESILIENT_POPULATION', 0.2)
 
     # to create the same maps, seed must be provided
     env1 = ScriptedAgentTestEnv(config1, seed=RANDOM_SEED)

From e771e895acc40f011cf2de51ef174a580c7a7cef Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Mon, 4 Sep 2023 00:33:17 -0700
Subject: [PATCH 15/18] added gain exp predicate

---
 nmmo/task/base_predicates.py  | 11 +++++++++--
 tests/task/test_predicates.py | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py
index b619b336..a9f0c468 100644
--- a/nmmo/task/base_predicates.py
+++ b/nmmo/task/base_predicates.py
@@ -83,8 +83,15 @@ def AttainSkill(gs: GameState, subject: Group, skill: Skill, level: int, num_age
   """True if the number of agents having skill level GE level
         is greather than or equal to num_agent
   """
-  skill_level = getattr(subject,skill.__name__.lower() + '_level')
-  return norm(sum(skill_level >= level) / num_agent)
+  if level <= 1:
+    return 1.0
+  skill_level = getattr(subject,skill.__name__.lower() + '_level') - 1  # base level is 1
+  return norm(sum(skill_level) / (num_agent * (level-1)))
+
+def GainExperience(gs: GameState, subject: Group, skill: Skill, experience: int, num_agent: int):
+  """True if the experience gained for the skill is greater than or equal to experience."""
+  skill_exp = getattr(subject,skill.__name__.lower() + '_exp')
+  return norm(sum(skill_exp) / (experience*num_agent))
 
 def CountEvent(gs: GameState, subject: Group, event: str, N: int):
   """True if the number of events occured in subject corresponding
diff --git a/tests/task/test_predicates.py b/tests/task/test_predicates.py
index 90bff5a9..b85715f2 100644
--- a/tests/task/test_predicates.py
+++ b/tests/task/test_predicates.py
@@ -388,6 +388,41 @@ def test_attain_skill(self):
 
     # DONE
 
+  def test_gain_experience(self):
+    attain_gain_exp_cls = make_predicate(bp.GainExperience)
+
+    goal_exp = 5
+    test_preds = [ # (Predicate, Team), the reward is 1 by default
+      (attain_gain_exp_cls(Group([1]), Skill.Melee, goal_exp, 1), ALL_AGENT), # False
+      (attain_gain_exp_cls(Group([2]), Skill.Melee, goal_exp, 1), ALL_AGENT), # False
+      (attain_gain_exp_cls(Group([1]), Skill.Range, goal_exp, 1), ALL_AGENT), # True
+      (attain_gain_exp_cls(Group([1,3]), Skill.Fishing, goal_exp, 1), ALL_AGENT), # True
+      (attain_gain_exp_cls(Group([1,2,3]), Skill.Carving, goal_exp, 3), ALL_AGENT), # False
+      (attain_gain_exp_cls(Group([2,4]), Skill.Carving, goal_exp, 2), ALL_AGENT)] # True
+
+    env = self._get_taskenv(test_preds)
+
+    # AttainSkill(Group([1]), Skill.Melee, goal_level, 1) is false
+    # AttainSkill(Group([2]), Skill.Melee, goal_level, 1) is false
+    env.realm.players[1].skills.melee.exp.update(goal_exp-1)
+    # AttainSkill(Group([1]), Skill.Range, goal_level, 1) is true
+    env.realm.players[1].skills.range.exp.update(goal_exp)
+    # AttainSkill(Group([1,3]), Skill.Fishing, goal_level, 1) is true
+    env.realm.players[1].skills.fishing.exp.update(goal_exp)
+    # AttainSkill(Group([1,2,3]), Skill.Carving, goal_level, 3) is false
+    env.realm.players[1].skills.carving.exp.update(goal_exp)
+    env.realm.players[2].skills.carving.exp.update(goal_exp)
+    # AttainSkill(Group([2,4]), Skill.Carving, goal_level, 2) is true
+    env.realm.players[4].skills.carving.exp.update(goal_exp+2)
+    env.obs = env._compute_observations()
+
+    _, _, _, infos = env.step({})
+
+    true_task = [2, 3, 5]
+    self._check_result(env, test_preds, infos, true_task)
+
+    # DONE
+
   def test_inventory_space_ge_not(self):
     inv_space_ge_pred_cls = make_predicate(bp.InventorySpaceGE)
 

From f35c9eb2d995cda72486aa22332e93eb1657b30a Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Mon, 4 Sep 2023 10:20:45 -0700
Subject: [PATCH 16/18] fixed error with custom pred arg

---
 nmmo/task/predicate_api.py  | 71 ++-----------------------------------
 tests/task/test_task_api.py | 32 +++--------------
 2 files changed, 7 insertions(+), 96 deletions(-)

diff --git a/nmmo/task/predicate_api.py b/nmmo/task/predicate_api.py
index 6381f2be..a1511a02 100644
--- a/nmmo/task/predicate_api.py
+++ b/nmmo/task/predicate_api.py
@@ -8,7 +8,7 @@
 from nmmo.core.config import Config
 from nmmo.task.group import Group, union
 from nmmo.task.game_state import GameState
-from nmmo.task.constraint import Constraint, InvalidConstraint, GroupConstraint
+from nmmo.task.constraint import Constraint, GroupConstraint
 
 if TYPE_CHECKING:
   from nmmo.task.task_api import Task
@@ -33,7 +33,7 @@ def __init__(self,
 
     self._args = args
     self._kwargs = kwargs
-    self._constraints = constraints
+    self._constraints = constraints  # NOTE: not used
     self._config = None
     self._subject = subject
 
@@ -46,9 +46,6 @@ def __call__(self, gs: GameState) -> float:
     Returns:
       progress: float bounded between [0, 1], 1 is considered to be true
     """
-    if not self._config == gs.config:
-      # TODO(mark) should we make this explicitly called by environment
-      self._reset(gs.config)
     # Update views
     for group in self._groups:
       group.update(gs)
@@ -61,57 +58,11 @@ def __call__(self, gs: GameState) -> float:
       cache[self.name] = progress
     return progress
 
-  def _reset(self, config: Config):
-    self._config = config
-    if not self.check(self._config):
-      raise InvalidConstraint()
-
   def close(self):
     # To prevent memory leak, clear all refs to old game state
     for group in self._groups:
       group.clear_prev_state()
 
-  def check(self, config: Config):
-    """ Checks whether the predicate is valid
-
-    A satisfiable predicate "makes sense" given a config
-    ie. Not trying to reach target off the map
-    """
-    if not GroupConstraint().check(config, self._subject):
-      return False
-    for i, (name, constraint) in enumerate(self._constraints):
-      if constraint is None:
-        continue
-      if i < len(self._args):
-        if not constraint.check(config, self._args[i]):
-          return False
-      elif not constraint.check(config, self._kwargs[name]):
-        return False
-    return True
-
-  def sample(self, config: Config, **overload):
-    """ Samples a concrete instance of a given task.
-    
-    Allows overloading of previous parameters.
-    """
-    # Sample Constraint
-    nargs = [arg.sample(config) if isinstance(arg, Constraint) else arg
-              for arg in self._args]
-    nkwargs = {k : v.sample(config) if isinstance(v, Constraint) else v
-                for k,v in self._kwargs.items()}
-    for i, (name, _) in enumerate(self._constraints):
-      if i < len(nargs):
-        if name in nkwargs:
-          raise InvalidPredicateDefinition("Constraints should match arguments.")
-        nkwargs[name] = nargs[i]
-      else:
-        break
-
-    for k, v in overload.items():
-      nkwargs[k] = v
-     # Result
-    return self.__class__(**nkwargs)
-
   @abstractmethod
   def _evaluate(self, gs: GameState) -> float:
     """ A mapping from a game state to the desirability/progress of that state.
@@ -209,24 +160,8 @@ def make_predicate(fn: Callable) -> Type[Predicate]:
 
   class FunctionPredicate(Predicate):
     def __init__(self, *args, **kwargs) -> None:
-      constraints = []
       self._signature = signature
-      args = list(args)
-      for i, param in enumerate(self._signature.parameters.values()):
-        if i == 0:
-          continue
-        # Calculate list of constraints
-        if isinstance(param.default, Constraint):
-          constraints.append((param.name,param.default))
-        else:
-          constraints.append((param.name,None))
-        # Insert default values from function definition
-        if not param.name in kwargs and i-1 >= len(args):
-          if param.default == inspect.Parameter.empty:
-            args.append(param.default)
-          else:
-            kwargs[param.name] = param.default
-      super().__init__(*args, **kwargs, constraints=constraints)
+      super().__init__(*args, **kwargs)
       self._args = args
       self._kwargs = kwargs
       self.name = self._make_name(fn.__name__, args, kwargs)
diff --git a/tests/task/test_task_api.py b/tests/task/test_task_api.py
index 8f8322a4..1156c69e 100644
--- a/tests/task/test_task_api.py
+++ b/tests/task/test_task_api.py
@@ -9,9 +9,9 @@
 from nmmo.task.task_api import Task, OngoingTask, HoldDurationTask
 from nmmo.task.task_spec import TaskSpec, make_task_from_spec
 from nmmo.task.group import Group
-from nmmo.task.constraint import ScalarConstraint, GroupConstraint, AGENT_LIST_CONSTRAINT
+from nmmo.task.constraint import ScalarConstraint
 from nmmo.task.base_predicates import (
-    TickGE, CanSeeGroup, AllMembersWithinRange, StayAlive, HoardGold
+    TickGE, AllMembersWithinRange, StayAlive, HoardGold
 )
 
 from nmmo.systems import item as Item
@@ -144,30 +144,6 @@ def test_constraint(self):
       self.assertTrue(scalar.sample(mock_gs.config)<10)
       self.assertTrue(scalar.sample(mock_gs.config)>=-10)
 
-  def test_sample_predicate(self):
-    # pylint: disable=no-value-for-parameter,expression-not-assigned
-    # make predicate class from function
-    canseegrp_pred_cls = make_predicate(CanSeeGroup)
-    tickge_pred_cls = make_predicate(TickGE)
-
-    # if the predicate class is instantiated without the subject,
-    mock_gs = MockGameState()
-    predicate = canseegrp_pred_cls(subject=GroupConstraint, target=AGENT_LIST_CONSTRAINT) &\
-                tickge_pred_cls(subject=GroupConstraint, num_tick=ScalarConstraint)
-    self.assertEqual(predicate.name,
-                     "(AND_(CanSeeGroup_subject:GroupConstraint_target:AgentListConstraint)_"+\
-                     "(TickGE_subject:GroupConstraint_num_tick:ScalarConstraint))")
-
-    # this predicate cannot calculate progress becuase it has no subject
-    with self.assertRaises(AttributeError):
-      predicate(mock_gs)
-
-    # this predicate supports sampling with valid arguments
-    config = nmmo.config.Default()
-    tickge_pred_cls().sample(config)
-    predicate.sample(config).name
-
-    # DONE
 
   def test_task_api_with_predicate(self):
     # pylint: disable=no-value-for-parameter,no-member
@@ -181,7 +157,7 @@ def test_task_api_with_predicate(self):
     self.assertEqual(predicate.get_source_code(),
                      "def Fake(gs, subject, a,b,c):\n  return False")
     self.assertEqual(predicate.get_signature(), ["gs", "subject", "a", "b", "c"])
-    self.assertEqual(predicate.args, [group])
+    self.assertEqual(predicate.args, tuple(group,))
     self.assertDictEqual(predicate.kwargs, {"a": 1, "b": item, "c": action})
 
     assignee = [1,2,3] # list of agent ids
@@ -193,7 +169,7 @@ def test_task_api_with_predicate(self):
     self.assertEqual(task.get_source_code(),
                      "def Fake(gs, subject, a,b,c):\n  return False")
     self.assertEqual(task.get_signature(), ["gs", "subject", "a", "b", "c"])
-    self.assertEqual(task.args, [group])
+    self.assertEqual(task.args, tuple(group,))
     self.assertDictEqual(task.kwargs, {"a": 1, "b": item, "c": action})
     for agent_id in assignee:
       self.assertEqual(rewards[agent_id], 0)

From 1b864fe8f0d653fc50e92727f05d16666201938b Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Mon, 4 Sep 2023 13:37:49 -0700
Subject: [PATCH 17/18] fix water exp error

---
 nmmo/systems/skill.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/nmmo/systems/skill.py b/nmmo/systems/skill.py
index eb555e11..f0d60dde 100644
--- a/nmmo/systems/skill.py
+++ b/nmmo/systems/skill.py
@@ -101,11 +101,15 @@ def update(self):
 class NonCombatSkill(Skill):
   def __init__(self, skill_group: SkillGroup):
     super().__init__(skill_group)
-    self._level = DummyLevel()
+    self._dummy_value = DummyValue()  # for water and food
 
   @property
   def level(self):
-    return self._level
+    return self._dummy_value
+
+  @property
+  def exp(self):
+    return self._dummy_value
 
 class HarvestSkill(NonCombatSkill):
   def process_drops(self, matl, drop_table):
@@ -286,7 +290,7 @@ def exp(self):
 
 ### Basic/Harvest Skills ###
 
-class DummyLevel:
+class DummyValue:
   def __init__(self, val=0):
     self.val = val
 

From 6a92799f5232c179e804fd6095a391994f6b1203 Mon Sep 17 00:00:00 2001
From: kywch <choe.kyoung@gmail.com>
Date: Mon, 4 Sep 2023 15:36:33 -0700
Subject: [PATCH 18/18] correct skill type hint

---
 nmmo/task/base_predicates.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/nmmo/task/base_predicates.py b/nmmo/task/base_predicates.py
index a9f0c468..cce7d97f 100644
--- a/nmmo/task/base_predicates.py
+++ b/nmmo/task/base_predicates.py
@@ -79,7 +79,8 @@ def DistanceTraveled(gs: GameState, subject: Group, dist: int):
   dists = utils.linf(list(zip(r,c)),[gs.spawn_pos[id_] for id_ in subject.entity.id])
   return norm(dists.sum() / dist)
 
-def AttainSkill(gs: GameState, subject: Group, skill: Skill, level: int, num_agent: int):
+def AttainSkill(gs: GameState, subject: Group,
+                skill: type[Skill], level: int, num_agent: int):
   """True if the number of agents having skill level GE level
         is greather than or equal to num_agent
   """
@@ -88,7 +89,8 @@ def AttainSkill(gs: GameState, subject: Group, skill: Skill, level: int, num_age
   skill_level = getattr(subject,skill.__name__.lower() + '_level') - 1  # base level is 1
   return norm(sum(skill_level) / (num_agent * (level-1)))
 
-def GainExperience(gs: GameState, subject: Group, skill: Skill, experience: int, num_agent: int):
+def GainExperience(gs: GameState, subject: Group,
+                   skill: type[Skill], experience: int, num_agent: int):
   """True if the experience gained for the skill is greater than or equal to experience."""
   skill_exp = getattr(subject,skill.__name__.lower() + '_exp')
   return norm(sum(skill_exp) / (experience*num_agent))