Alanthink
diff --git a/‎README.md‎
Lines changed: 6 additions & 7 deletions b/‎README.md‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎banditpylib/protocols/collaborative_learning_protocol.py‎
Lines changed: 17 additions & 16 deletions b/‎banditpylib/protocols/collaborative_learning_protocol.py‎
Lines changed: 17 additions & 16 deletions
diff --git a/‎banditpylib/protocols/collaborative_learning_protocol_test.py‎
Lines changed: 1 addition & 1 deletion b/‎banditpylib/protocols/collaborative_learning_protocol_test.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎banditpylib/protocols/single_player_protocol.py‎
Lines changed: 19 additions & 32 deletions b/‎banditpylib/protocols/single_player_protocol.py‎
Lines changed: 19 additions & 32 deletions
diff --git a/‎banditpylib/protocols/single_player_protocol_test.py‎
Lines changed: 2 additions & 3 deletions b/‎banditpylib/protocols/single_player_protocol_test.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎banditpylib/protocols/utils.py‎
Lines changed: 41 additions & 12 deletions b/‎banditpylib/protocols/utils.py‎
Lines changed: 41 additions & 12 deletions
@@ -132,15 +132,14 @@ learners = [EpsGreedy(arm_num=len(arms)),
 ```Python
 # Horizon of the game
 horizon = 2000
-# Record intermediate regrets for each trial
-intermediate_regrets = list(range(0, horizon+1, 50))
 # Set up simulator using single-player protocol
-game = SinglePlayerProtocol(bandit=bandit,
-                            learners=learners,
-                            intermediate_regrets=intermediate_regrets,
-                            horizon=horizon)
+game = SinglePlayerProtocol(bandit=bandit, learners=learners)
+# Record intermediate regrets after these horizons
+intermediate_horizons = list(range(0, horizon+1, 50))
 # Start playing the game and for each setup we run 200 trials
-game.play(trials=200)
+game.play(trials=200,
+          intermediate_horizons=intermediate_horizons,
+          horizon=horizon)
 ```
 
 The following figure shows the simulation results.
 
@@ -13,31 +13,32 @@
 class CollaborativeLearningProtocol(Protocol):
   """Collaborative learning protocol :cite:`tao2019collaborative`
 
-  This protocol is used to simulate the multi-agent game
-  as discussed in the paper. It runs in rounds. During each round,
-  the protocol runs the following steps in sequence:
+  This class defines the communication protocol for the collaborative learning
+  multi-agent game as discussed in the reference paper. The game runs in
+  rounds. During each round, the protocol runs the following steps in sequence:
 
   - For each agent,
 
-    * fetch the state of the corresponding environment and ask the agent for
-      actions;
-    * send the actions to the enviroment for execution;
-    * update the agent with the feedback of the environment;
+    * fetch the state of the corresponding bandit environment and ask the agent
+      for actions;
+    * send the actions to the bandit environment for execution;
+    * update the agent with the feedback of the bandit environment;
     * repeat the above steps until the agent enters the `WAIT` or `STOP` state.
 
-  - If there is at least one agent in `WAIT` state, then receive information
+  - If there is at least one agent in `WAIT` state, then fetch information
     broadcasted from every waiting agent and send them to master to decide
-    arm assignment of next round. Otherwise, stop the simulaiton.
+    arm assignment of next round. Otherwise, stop the game.
 
   :param Bandit bandit: bandit environment
   :param List[CollaborativeLearner] learners: learners that will be compared
+    with
 
   .. note::
     Each agent interacts with an independent bandit environment.
 
   .. note::
     Each action counts as a timestep. The time (or sample) complexity equals to
-    the maximum number of pulls used by the agents.
+    the maximum number of pulls across different agents.
 
   .. note::
     According to the protocol, number of rounds always equals to number of
@@ -50,23 +51,23 @@ def __init__(self, bandit: Bandit, learners: List[CollaborativeLearner]):
   def name(self) -> str:
     return 'collaborative_learning_protocol'
 
-  def _one_trial(self, random_seed: int, debug: bool) -> bytes:
-    if debug:
+  def _one_trial(self, random_seed: int) -> bytes:
+    if self._debug:
       logging.set_verbosity(logging.DEBUG)
     np.random.seed(random_seed)
 
     # Initialization
-    current_learner = cast(CollaborativeLearner, self.current_learner)
+    current_learner = cast(CollaborativeLearner, self._current_learner)
     current_learner.reset()
     agents = current_learner.agents
     bandits = []
     master = current_learner.master
     for _ in range(len(agents)):
-      bandits.append(dcopy(self.bandit))
+      bandits.append(dcopy(self._bandit))
       bandits[-1].reset()
 
     trial = Trial()
-    trial.bandit = self.bandit.name
+    trial.bandit = self._bandit.name
     trial.learner = current_learner.name
 
     communication_rounds, total_pulls = 0, 0
@@ -124,6 +125,6 @@ def _one_trial(self, random_seed: int, debug: bool) -> bytes:
     result = trial.results.add()
     result.rounds = communication_rounds
     result.total_actions = total_pulls
-    result.regret = self.bandit.regret(current_learner.goal)
+    result.regret = self._bandit.regret(current_learner.goal)
 
     return trial.SerializeToString()
@@ -19,7 +19,7 @@ def test_simple_run(self):
     collaborative_learner = CollaborativeLearningProtocol(
         bandit=bandit, learners=[lil_ucb_collaborative_learner])
     temp_file = tempfile.NamedTemporaryFile()
-    collaborative_learner.play(trials=3, output_filename=temp_file.name)
+    collaborative_learner.play(3, temp_file.name)
 
     with open(temp_file.name, 'rb') as f:
       # Check number of records is 3
 
@@ -13,58 +13,45 @@
 class SinglePlayerProtocol(Protocol):
   """Single player protocol
 
-  This protocol is used to simulate the ordinary single-player game. It runs in
-  rounds. During each round, the protocol runs the following steps in sequence:
+  This class defines the communication protocol for the ordinary single-player
+  game. The game runs in rounds and during each round, the protocol runs the
+  following steps in sequence:
 
-  * fetch the state of the environment and ask the learner for actions;
-  * send the actions to the enviroment for execution;
-  * update the learner with the feedback of the environment.
+  * fetch the state of the bandit environment and ask the learner for actions;
+  * send the actions to the bandit environment for execution;
+  * update the learner with the feedback of the bandit environment.
 
-  The simulation stopping criteria is one of the following two:
+  The game runs until one of the following two stopping conditions is satisfied:
 
   * no actions are returned by the learner;
   * total number of actions achieve `horizon`.
 
-
   :param Bandit bandit: bandit environment
   :param List[SinglePlayerLearner] learners: learners to be compared with
-  :param List[int] intermediate_regrets: a list of rounds. If set, the regrets
-    after these rounds will be recorded
-  :param int horizon: horizon of the game (i.e., total number of actions a
-    leaner can make)
 
   .. note::
     During a round, a learner may want to perform multiple actions, which is
-    so-called batched learner. The total number of rounds shows how often the
-    learner wants to communicate with the bandit environment which is at most
-    `horizon`.
+    so-called batched learner.
   """
-  def __init__(self,
-               bandit: Bandit,
-               learners: List[SinglePlayerLearner],
-               intermediate_regrets: List[int] = None,
-               horizon: int = np.inf):  # type: ignore
+  def __init__(self, bandit: Bandit, learners: List[SinglePlayerLearner]):
     super().__init__(bandit=bandit, learners=cast(List[Learner], learners))
-    self.__intermediate_regrets = \
-        intermediate_regrets if intermediate_regrets is not None else []
-    self.__horizon = horizon
 
   @property
   def name(self) -> str:
     return 'single_player_protocol'
 
-  def _one_trial(self, random_seed: int, debug: bool) -> bytes:
-    if debug:
+  def _one_trial(self, random_seed: int) -> bytes:
+    if self._debug:
       logging.set_verbosity(logging.DEBUG)
     np.random.seed(random_seed)
 
     # Reset the bandit environment and the learner
-    self.bandit.reset()
-    current_learner = cast(SinglePlayerLearner, self.current_learner)
+    self._bandit.reset()
+    current_learner = cast(SinglePlayerLearner, self._current_learner)
     current_learner.reset()
 
     trial = Trial()
-    trial.bandit = self.bandit.name
+    trial.bandit = self._bandit.name
     trial.learner = current_learner.name
     rounds = 0
     # Number of actions the learner has made
@@ -74,20 +61,20 @@ def add_result():
       result = trial.results.add()
       result.rounds = rounds
       result.total_actions = total_actions
-      result.regret = self.bandit.regret(current_learner.goal)
+      result.regret = self._bandit.regret(current_learner.goal)
 
-    while total_actions < self.__horizon:
-      actions = current_learner.actions(self.bandit.context)
+    while total_actions < self._horizon:
+      actions = current_learner.actions(self._bandit.context)
 
       # Stop the game if no actions are returned by the learner
       if not actions.arm_pulls:
         break
 
       # Record intermediate regrets
-      if rounds in self.__intermediate_regrets:
+      if rounds in self._intermediate_horizons:
         add_result()
 
-      feedback = self.bandit.feed(actions)
+      feedback = self._bandit.feed(actions)
       current_learner.update(feedback)
 
       for arm_pull in actions.arm_pulls:
 
@@ -15,10 +15,9 @@ def test_simple_run(self):
     ordinary_bandit = MultiArmedBandit(arms)
     eps_greedy_learner = EpsGreedy(arm_num=3)
     single_player = SinglePlayerProtocol(bandit=ordinary_bandit,
-                                         learners=[eps_greedy_learner],
-                                         horizon=10)
+                                         learners=[eps_greedy_learner])
     temp_file = tempfile.NamedTemporaryFile()
-    single_player.play(trials=3, output_filename=temp_file.name)
+    single_player.play(3, temp_file.name, horizon=10)
 
     with open(temp_file.name, 'rb') as f:
       # check number of records is 3
 
@@ -7,6 +7,7 @@
 from absl import logging
 
 from google.protobuf.internal.encoder import _VarintBytes  # type: ignore
+import numpy as np
 
 from banditpylib.bandits import Bandit
 from banditpylib.learners import Learner
@@ -23,8 +24,8 @@ def time_seed() -> int:
 
 
 class Protocol(ABC):
-  """Abstract class for a protocol which is used to coordinate the interactions
-  between the learner and the bandit environment.
+  """Abstract class for a communication protocol which defines the principles of
+  the interactions between the learner and the bandit environment.
 
   :param Bandit bandit: bandit environment
   :param List[Learner] learners: learners used to run simulations
@@ -48,26 +49,39 @@ def __init__(self, bandit: Bandit, learners: List[Learner]):
 
     self.__bandit = bandit
     self.__learners = learners
-    # The learner simulated currently
-    self.__current_learner: Learner = None
 
   @property
   @abstractmethod
   def name(self) -> str:
     """Protocol name"""
 
   @property
-  def bandit(self) -> Bandit:
-    """Bandit environment the simulator is using the learners to play with"""
+  def _bandit(self) -> Bandit:
+    """Bandit environment"""
     return self.__bandit
 
   @property
-  def current_learner(self) -> Learner:
-    """The learner used by the simulator currently"""
+  def _current_learner(self) -> Learner:
+    """The learner in simulation currently"""
     return self.__current_learner
 
+  @property
+  def _horizon(self) -> int:
+    """Horizon of the game"""
+    return self.__horizon
+
+  @property
+  def _intermediate_horizons(self) -> List[int]:
+    """Horizons used to report intermediate regrets"""
+    return self.__intermediate_horizons
+
+  @property
+  def _debug(self) -> bool:
+    """Debug mode"""
+    return self.__debug
+
   @abstractmethod
-  def _one_trial(self, random_seed: int, debug: bool) -> bytes:
+  def _one_trial(self, random_seed: int) -> bytes:
     """One trial of the game
 
     This method defines how to run one trial of the game.
@@ -91,23 +105,38 @@ def __write_to_file(self, data: bytes):
       f.write(data)
       f.flush()
 
-  def play(self, trials: int, output_filename: str, processes=-1, debug=False):
+  def play(
+      self,
+      trials: int,
+      output_filename: str,
+      processes: int = -1,
+      debug: bool = False,
+      # pylint: disable=dangerous-default-value
+      intermediate_horizons: List[int] = [],
+      horizon: int = np.inf):  # type: ignore
     """Start playing the game
 
     Args:
       trials: number of repetitions
-      output_filename: name of the file used to dump the results
+      output_filename: name of the file used to dump the simulation results
       processes: maximum number of processes to run. -1 means no limit
       debug: debug mode. When it is set to `True`, `trials` will be
         automatically set to 1 and debug information of the trial will be
         printed out.
+      intermediate_horizons: report intermediate regrets after these horizons
+      horizon: horizon of the game. Different protocols may have different
+        interpretations.
 
     .. warning::
       By default, `output_filename` will be opened with mode `a`.
     """
     if debug:
       trials = 1
 
+    self.__debug = debug
+    self.__horizon = horizon
+    self.__intermediate_horizons = intermediate_horizons
+
     for learner in self.__learners:
       # Set current learner
       self.__current_learner = learner
@@ -123,7 +152,7 @@ def play(self, trials: int, output_filename: str, processes=-1, debug=False):
       trial_results = []
       for _ in range(trials):
         result = pool.apply_async(self._one_trial,
-                                  args=[time_seed(), debug],
+                                  args=[time_seed()],
                                   callback=self.__write_to_file)
 
         trial_results.append(result)