subfolder name change

Alanthink · Alanthink · commit 924a2724efc0 · 2021-07-06T14:32:46.000-04:00
diff --git a/banditpylib/learners/mab_collaborative_ftbai_learner/__init__.py b/banditpylib/learners/mab_collaborative_ftbai_learner/__init__.py
@@ -1,6 +1,8 @@
 from .utils import *
 from .collaborative_learning import *
 
-__all__ = ['CollaborativeBAILearner', 'CollaborativeBAIAgent',
+__all__ = [
+    'CollaborativeBAILearner', 'CollaborativeBAIAgent',
     'CollaborativeBAIMaster', 'LilUCBHeuristicCollaborativeBAIAgent',
-    'LilUCBHeuristicCollaborativeBAIMaster']
+    'LilUCBHeuristicCollaborativeBAIMaster'
+]
diff --git a/banditpylib/learners/mab_collaborative_ftbai_learner/collaborative_learning.py b/banditpylib/learners/mab_collaborative_ftbai_learner/collaborative_learning.py
@@ -14,6 +14,7 @@
 
 from .utils import CollaborativeBAIAgent, CollaborativeBAIMaster
 
+
 class LilUCBHeuristicCollaborative(MABFixedConfidenceBAILearner):
   """LilUCB heuristic policy :cite:`jamieson2014lil`
   Modified implementation to supplement CollaborativeAgent
@@ -25,18 +26,20 @@ class LilUCBHeuristicCollaborative(MABFixedConfidenceBAILearner):
   :param np.ndarray assigned_arms: arm indices the learner has to work with
   :param str name: alias name
   """
-  def __init__(self, arm_num: int, confidence: float,
-    assigned_arms: np.ndarray = None, name: str = None):
-    assert np.max(assigned_arms)<arm_num and len(assigned_arms)<=arm_num, (
-      "assigned arms should be a subset of [arm_num]\nReceived: "
-        + str(assigned_arms))
+  def __init__(self,
+               arm_num: int,
+               confidence: float,
+               assigned_arms: np.ndarray = None,
+               name: str = None):
+    assert np.max(assigned_arms) < arm_num and len(assigned_arms) <= arm_num, (
+        "assigned arms should be a subset of [arm_num]\nReceived: " +
+        str(assigned_arms))
     super().__init__(arm_num=arm_num, confidence=confidence, name=name)
     if assigned_arms is not None:
       self.__assigned_arms = assigned_arms
     else:
       self.__assigned_arms = np.arange(arm_num)
 
-
   def _name(self) -> str:
     return 'lilUCB_heur_collaborative'
 
@@ -80,7 +83,7 @@ def __ucb(self) -> np.ndarray:
   def actions(self, context=None) -> Actions:
     del context
     if self.__stage == 'initialization':
-      actions = Actions() # default state is normal
+      actions = Actions()  # default state is normal
 
       # 1 pull each for every assigned arm
       for arm_id in self.__assigned_arms:
@@ -109,7 +112,7 @@ def update(self, feedback: Feedback):
     for arm_feedback in feedback.arm_feedbacks:
       # reverse map from bandit index to local index
       pseudo_arm_index = np.where(
-        self.__assigned_arms==arm_feedback.arm.id)[0][0]
+          self.__assigned_arms == arm_feedback.arm.id)[0][0]
       self.__pseudo_arms[pseudo_arm_index].update(
           np.array(arm_feedback.rewards))
       self.__total_pulls += len(arm_feedback.rewards)
@@ -120,12 +123,10 @@ def update(self, feedback: Feedback):
   @property
   def best_arm(self) -> int:
     # map best arm local index to actual bandit index
-    return self.__assigned_arms[
-      argmax_or_min_tuple([
+    return self.__assigned_arms[argmax_or_min_tuple([
         (pseudo_arm.total_pulls, arm_id)
         for (arm_id, pseudo_arm) in enumerate(self.__pseudo_arms)
-      ])
-    ]
+    ])]
 
   def get_total_pulls(self) -> int:
     return self.__total_pulls
@@ -142,9 +143,11 @@ class LilUCBHeuristicCollaborativeBAIAgent(CollaborativeBAIAgent):
     (over all rounds combined)
   :param Optional[str] name: alias name
   """
-
-  def __init__(self, arm_num: int, rounds: int,
-    horizon: int, name: Optional[str] = None):
+  def __init__(self,
+               arm_num: int,
+               rounds: int,
+               horizon: int,
+               name: Optional[str] = None):
     super().__init__(name)
     if arm_num <= 1:
       raise ValueError('Number of arms is expected at least 2. Got %d.' %
@@ -183,8 +186,8 @@ def set_input_arms(self, arms: List[int]):
 
     self.__assigned_arms = np.array(arms)
     # confidence of 0.01 suggested in the paper
-    self.__central_algo = LilUCBHeuristicCollaborative(self.__arm_num,
-      0.99, self.__assigned_arms)
+    self.__central_algo = LilUCBHeuristicCollaborative(self.__arm_num, 0.99,
+                                                       self.__assigned_arms)
     self.__central_algo.reset()
     if self.__stage == "unassigned":
       self.__stage = "preparation"
@@ -207,7 +210,7 @@ def actions(self, context=None) -> Actions:
         self.__stage = "learning"
         self.__learning_arm = self.__assigned_arms[0]
         return self.actions()
-      if self.__central_algo.get_total_pulls() >= self.__horizon//2:
+      if self.__central_algo.get_total_pulls() >= self.__horizon // 2:
         self.__stage = "learning"
         # use whatever best_arm the central algo outputs
         self.__learning_arm = self.__central_algo.best_arm
@@ -233,7 +236,7 @@ def actions(self, context=None) -> Actions:
         return actions
       else:
         arm_pull = actions.arm_pulls.add()
-        arm_pull.arm.id = self.__learning_arm # pylint: disable=protobuf-type-error
+        arm_pull.arm.id = self.__learning_arm  # pylint: disable=protobuf-type-error
         arm_pull.times = self.__num_pulls_learning
         return actions
 
@@ -249,16 +252,16 @@ def actions(self, context=None) -> Actions:
 
     else:
       raise Exception(self.name + ": " + self.__stage +
-        " does not allow actions to be played")
+                      " does not allow actions to be played")
 
   def update(self, feedback: Feedback):
-    self.__learning_mean = None # default in case learning_arm is None
+    self.__learning_mean = None  # default in case learning_arm is None
     num_pulls = 0
     for arm_feedback in feedback.arm_feedbacks:
       num_pulls += len(arm_feedback.rewards)
     if self.__central_algo_action_taken:
       self.__central_algo.update(feedback)
-    elif num_pulls>0:
+    elif num_pulls > 0:
       # non-zero pulls not by central_algo => learning step was done
       for arm_feedback in feedback.arm_feedbacks:
         if arm_feedback.arm.id == self.__learning_arm:
@@ -282,10 +285,11 @@ def broadcast(self) -> Dict[int, Tuple[float, int]]:
     return_dict = {}
     if self.__learning_arm:
       return_dict[self.__learning_arm] = (self.__learning_mean,
-        self.__pulls_used)
+                                          self.__pulls_used)
     self.__complete_round()
     return return_dict
 
+
 class LilUCBHeuristicCollaborativeBAIMaster(CollaborativeBAIMaster):
   r"""Implementation of master in Collaborative Learning Algorithm
 
@@ -297,9 +301,12 @@ class LilUCBHeuristicCollaborativeBAIMaster(CollaborativeBAIMaster):
   :param int num_agents: number of agents
   :param Optional[str] name: alias name
   """
-
-  def __init__(self, arm_num:int, rounds: int,
-    horizon: int, num_agents: int, name: Optional[str] = None):
+  def __init__(self,
+               arm_num: int,
+               rounds: int,
+               horizon: int,
+               num_agents: int,
+               name: Optional[str] = None):
     super().__init__(name)
     if arm_num <= 1:
       raise ValueError('Number of arms is expected at least 2. Got %d.' %
@@ -371,8 +378,8 @@ def random_round(x: float) -> int:
         if i >= len(active_arms_copy):
           break
         num_arms = random_round(num_arms_per_agent)
-        agent_arm_assignment[agent_id] += active_arms_copy[i: i + num_arms]
-        i+= num_arms
+        agent_arm_assignment[agent_id] += active_arms_copy[i:i + num_arms]
+        i += num_arms
       if i < len(active_arms_copy):
         agent_arm_assignment[agent_ids[-1]] += active_arms_copy[i:]
 
@@ -381,8 +388,10 @@ def random_round(x: float) -> int:
   def initial_arm_assignment(self) -> Dict[int, List[int]]:
     return self.__assign_arms(list(range(self.__num_agents)))
 
-  def elimination(self, agent_ids: List[int],
-    messages: Dict[int, Dict[int, Tuple[float, int]]]) -> Dict[int, List[int]]:
+  def elimination(
+      self, agent_ids: List[int],
+      messages: Dict[int, Dict[int, Tuple[float,
+                                          int]]]) -> Dict[int, List[int]]:
 
     aggregate_messages: Dict[int, Tuple[float, int]] = {}
     for agent_id in messages.keys():
@@ -398,17 +407,17 @@ def elimination(self, agent_ids: List[int],
 
     accumulated_arm_ids = np.array(list(aggregate_messages.keys()))
     accumulated_em_mean_rewards = np.array(
-      list(map(lambda x: aggregate_messages[x][0], aggregate_messages.keys())))
+        list(map(lambda x: aggregate_messages[x][0],
+                 aggregate_messages.keys())))
 
     # elimination
     confidence_radius = np.sqrt(
-      self.__comm_rounds * np.log(200 * self.__num_agents * self.__comm_rounds)
-      / (self.__T * max(1, self.__num_agents / len(self.__active_arms)))
-    )
+        self.__comm_rounds *
+        np.log(200 * self.__num_agents * self.__comm_rounds) /
+        (self.__T * max(1, self.__num_agents / len(self.__active_arms))))
     highest_em_reward = np.max(accumulated_em_mean_rewards)
     self.__active_arms = list(
-      accumulated_arm_ids[accumulated_em_mean_rewards >=
-        highest_em_reward - 2 * confidence_radius]
-    )
+        accumulated_arm_ids[accumulated_em_mean_rewards >= highest_em_reward -
+                            2 * confidence_radius])
 
     return self.__assign_arms(agent_ids)
diff --git a/banditpylib/learners/mab_collaborative_ftbai_learner/lilucb_heur_collaborative_test.py b/banditpylib/learners/mab_collaborative_ftbai_learner/lilucb_heur_collaborative_test.py
@@ -10,7 +10,8 @@ def test_simple_run(self):
     arm_num = 3
     confidence = 0.95
     learner = LilUCBHeuristicCollaborative(arm_num=arm_num,
-      confidence=confidence, assigned_arms=np.arange(arm_num))
+                                           confidence=confidence,
+                                           assigned_arms=np.arange(arm_num))
     learner.reset()
 
     while True:
@@ -23,9 +24,8 @@ def test_simple_run(self):
         arm_feedback = feedback.arm_feedbacks.add()
         arm_feedback.arm.id = arm_pull.arm.id
         arm_feedback.rewards.extend(
-            list(
-                np.random.normal(arm_pull.arm.id / arm_num, 1,
-                                 arm_pull.times)))
+            list(np.random.normal(arm_pull.arm.id / arm_num, 1,
+                                  arm_pull.times)))
       learner.update(feedback)
 
     assert learner.best_arm in list(range(arm_num))
diff --git a/banditpylib/learners/mab_collaborative_ftbai_learner/utils.py b/banditpylib/learners/mab_collaborative_ftbai_learner/utils.py
@@ -6,14 +6,14 @@
 from banditpylib.data_pb2 import Arm, Feedback, Actions, Context
 from banditpylib.learners import Goal, IdentifyBestArm, Learner
 
+
 class CollaborativeBAIAgent(ABC):
   r"""One individual agent
 
   This agent aims to identify the best arm with other agents.
 
   :param Optional[str] name: alias name
   """
-
   def __init__(self, name: Optional[str]):
     self.__name = self._name() if name is None else name
 
@@ -86,7 +86,6 @@ class CollaborativeBAIMaster(ABC):
 
   :param Optional[str] name: alias name
   """
-
   def __init__(self, name: Optional[str]):
     self.__name = self._name() if name is None else name
 
@@ -119,8 +118,10 @@ def initial_arm_assignment(self) -> Dict[int, List[int]]:
     """
 
   @abstractmethod
-  def elimination(self, agent_ids: List[int],
-    messages: Dict[int, Dict[int, Tuple[float, int]]]) -> Dict[int, List[int]]:
+  def elimination(
+      self, agent_ids: List[int],
+      messages: Dict[int, Dict[int, Tuple[float,
+                                          int]]]) -> Dict[int, List[int]]:
     """Update the set of active arms based on some criteria
     and return arm assignment
 
@@ -141,9 +142,11 @@ class CollaborativeBAILearner(Learner):
   :param int num_agents: total number of agents involved
   :param Optional[str] name: alias name
   """
-  def __init__(self, agent: CollaborativeBAIAgent,
-    master: CollaborativeBAIMaster, num_agents: int,
-    name: Optional[str] = None):
+  def __init__(self,
+               agent: CollaborativeBAIAgent,
+               master: CollaborativeBAIMaster,
+               num_agents: int,
+               name: Optional[str] = None):
     super().__init__(name)
     self.__agents = []
     for _ in range(num_agents):
@@ -176,7 +179,7 @@ def goal(self) -> Goal:
     best_arm = self.__agents[0].best_arm
     for agent in self.__agents[1:]:
       if best_arm != agent.best_arm:
-        best_arm = -1 # implies regret of 1
+        best_arm = -1  # implies regret of 1
         break
     arm.id = best_arm
     return IdentifyBestArm(best_arm=arm)
diff --git a/banditpylib/protocols/__init__.py b/banditpylib/protocols/__init__.py
@@ -2,8 +2,4 @@
 from .single_player import *
 from .collaborative_protocol import *
 
-__all__ = [
-    'Protocol',
-    'SinglePlayerProtocol',
-    'CollaborativeLearningProtocol'
-]
+__all__ = ['Protocol', 'SinglePlayerProtocol', 'CollaborativeLearningProtocol']
diff --git a/banditpylib/protocols/collaborative_protocol.py b/banditpylib/protocols/collaborative_protocol.py
@@ -7,7 +7,8 @@
 from banditpylib.bandits import Bandit
 from banditpylib.data_pb2 import Trial, Actions
 from banditpylib.learners import Learner
-from banditpylib.learners.collaborative_learner import CollaborativeBAILearner
+from banditpylib.learners.mab_collaborative_ftbai_learner \
+    import CollaborativeBAILearner
 from .utils import Protocol
 
 
@@ -38,8 +39,7 @@ class CollaborativeLearningProtocol(Protocol):
     so-called batched learner. In this case, eah action counts as a timestep
     used.
   """
-  def __init__(self,
-               bandit: Bandit, learners: List[CollaborativeBAILearner]):
+  def __init__(self, bandit: Bandit, learners: List[CollaborativeBAILearner]):
     super().__init__(bandit=bandit, learners=cast(List[Learner], learners))
 
   @property
@@ -90,7 +90,7 @@ def _one_trial(self, random_seed: int, debug: bool) -> bytes:
           elif actions.state == Actions.WAIT:
             agent_in_wait_ids.append(agent_id)
             break
-          else: # actions.state == Actions.STOP
+          else:  # actions.state == Actions.STOP
             break
         max_pulls = max(max_pulls, pulls)
       total_pulls += max_pulls
@@ -113,8 +113,8 @@ def _one_trial(self, random_seed: int, debug: bool) -> bytes:
       # Send info to master for elimination to get arm assignment for next round
       # agent_arm_assignment: key is agent_id, value is a list storing arm ids
       # assigned to this agent
-      agent_arm_assignment = master.elimination(
-        agent_in_wait_ids, accumulated_messages)
+      agent_arm_assignment = master.elimination(agent_in_wait_ids,
+                                                accumulated_messages)
       for agent_id in agent_arm_assignment:
         agents[agent_id].set_input_arms(agent_arm_assignment[agent_id])
       communication_rounds += 1
@@ -123,7 +123,6 @@ def _one_trial(self, random_seed: int, debug: bool) -> bytes:
     result = trial.results.add()
     result.rounds = communication_rounds
     result.total_actions = total_pulls
-    result.regret = self.bandit.regret(
-      current_learner.goal)
+    result.regret = self.bandit.regret(current_learner.goal)
 
     return trial.SerializeToString()
diff --git a/examples/example_collaborative.ipynb b/examples/example_collaborative.ipynb
diff --git a/examples/mab_collaborative_ftbai_example.ipynb b/examples/mab_collaborative_ftbai_example.ipynb