# 📚 Case Study 101b: Reinforcement Learning Based Control, Multi-Agent

Before you start:
- Make sure you have completed [](TUT000_setup.ipynb) through [](TUT003_variables.ipynb), 
as well as [](TUT101a_app-rllib-monoagent.ipynb).

What you will learn in this chapter:
- TODO

In [1]:
import numpy as _numpy_

from controllables.core.tools.gymnasium import BoxSpace, DictSpace
from controllables.core.tools.rllib import MultiAgentEnv
from controllables.energyplus import Actuator, OutputMeter
from controllables.energyplus import examples


class UserMultiAgentEnv(MultiAgentEnv):
    action_spaces = {
        space_key: DictSpace({
            'thermostat': BoxSpace(
                low=15., high=20.,
                dtype=_numpy_.float32,
                shape=(),
            ).bind(
                Actuator.Ref(
                    type='Zone Temperature Control',
                    control_type='Heating Setpoint',
                    key=ref_key,
                ) 
            ),
        })
        for space_key, ref_key in [
            ('zone_1-1', 'SPACE1-1'),
            ('zone_2-1', 'SPACE2-1'),
        ]
    }

    observation_spaces = {
        space_key: DictSpace({
            'energy-transfer': BoxSpace(
                low=-_numpy_.inf, high=+_numpy_.inf,
                dtype=_numpy_.float32,
                shape=(),
            ).bind(
                OutputMeter.Ref(
                    type=f'EnergyTransfer:Zone:{ref_key}',
                )
            ),
            'energy-consumption': BoxSpace(
                low=-_numpy_.inf, high=+_numpy_.inf,
                dtype=_numpy_.float32,
                shape=(),
            ).bind(
                OutputMeter.Ref(
                    type=f'Electricity:HVAC',
                )
            ),
        })
        for space_key, ref_key in [
            ('zone_1-1', 'SPACE1-1'),
            ('zone_2-1', 'SPACE2-1'),
        ]
    }

    rewards = {
        space_key: lambda agent: (
            agent.observation.value['energy-transfer'] 
            / agent.observation.value['energy-consumption']
        ) if agent.observation.value['energy-consumption'] != 0. else 0.
        for space_key in [
            'zone_1-1',
            'zone_2-1',
        ]
    }

    def __init__(self, config: dict = dict()):
        super().__init__({
            'action_spaces': self.__class__.action_spaces,
            'observation_spaces': self.__class__.observation_spaces,
            'rewards': self.__class__.rewards,
            **config,
        })

    def run(self):
        system = examples.systems.X5ZoneAirCooled(repeat=True)
        #system.add('logging:progress')
        # schedule a (recurring) episode associated with the system
        self.__attach__(system).schedule_episode()
        # run the system in background
        system.start()
        # wait for the system to finish to keep this env alive
        system.wait()

In [2]:
from ray.rllib.policy.policy import PolicySpec
from ray.rllib.algorithms.ppo import PPO, PPOConfig

algo = PPO(
    PPOConfig()
    .environment(UserMultiAgentEnv)
    .rollouts(
        # NOTE this env (an `ExternalEnv`) does not support connectors
        enable_connectors=False,
        # TODO for eval
        create_env_on_local_worker=True,
        # TODO rm
        #num_rollout_workers=0,
        #rollout_fragment_length=1_000,
    )
    .multi_agent(
        # TODO doc policy mapping
        policies={
            space_key: PolicySpec(
                action_space=UserMultiAgentEnv.action_spaces[space_key],
                observation_space=UserMultiAgentEnv.observation_spaces[space_key],
            )
            for space_key in ['zone_1-1', 'zone_2-1']
        },
        policy_mapping_fn=lambda agent_id, *args, **kwargs: str(agent_id),
    )
    # TODO
    .resources(num_gpus=1.)
)

`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
2024-10-05 18:08:34,191	INFO worker.py:1786 -- Started a local Ray instance.


In [3]:
for i in range(20):
    print(i, algo.train())



0 {'custom_metrics': {}, 'episode_media': {}, 'info': {'learner': {'zone_2-1': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 1.2494422703360517, 'cur_kl_coeff': 0.20000000000000004, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 9.96875596344471, 'policy_loss': -0.004068968684684175, 'vf_loss': 9.97220127383868, 'vf_explained_var': -0.0020351000751058263, 'kl': 0.0031182732980596485, 'entropy': 1.4382414517303308, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 480.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, 'zone_1-1': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 1.315661608427763, 'cur_kl_coeff': 0.20000000000000004, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 9.976139344771703, 'policy_loss': -0.0059144112446423, 'vf_loss': 9.981092673540115, 'vf_explained_var': 0.0003855065753062566, 'kl': 0.004805310630308668, 'entropy': 1.3991320552925268, 'entropy_coeff': 0.0}, 'mod

[36m(RolloutWorker pid=292217)[0m Exception in thread Thread-3:
[36m(RolloutWorker pid=292217)[0m Traceback (most recent call last):
[36m(RolloutWorker pid=292217)[0m   File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/energyplus/events.py", line 136, in cb_
[36m(RolloutWorker pid=292217)[0m     return cb(*args, **kwargs)
[36m(RolloutWorker pid=292217)[0m            ^^^^^^^^^^^^^^^^^^^
[36m(RolloutWorker pid=292217)[0m   File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/energyplus/events.py", line 166, in _state
[36m(RolloutWorker pid=292217)[0m     self._event.__call__(
[36m(RolloutWorker pid=292217)[0m   File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/energyplus/events.py", line 105, in __call__
[36m(RolloutWorker pid=292217)[0m     return super().__call__(context)
[36m(RolloutWorker pid=292217)[0m            ^^^^^^^^^^^^^^^^^^^^^^^^^
[36m(RolloutWorker pid=292217)[0m   File "/home/AD/user/lab/EnergyPlus-OOEP/packages/con

In [4]:
# TODO evaluate plot
algo.evaluate()

{'env_runners': {'episode_reward_max': nan,
  'episode_reward_min': nan,
  'episode_reward_mean': nan,
  'episode_len_mean': nan,
  'episode_media': {},
  'episodes_timesteps_total': 0,
  'policy_reward_min': {},
  'policy_reward_max': {},
  'policy_reward_mean': {},
  'custom_metrics': {},
  'hist_stats': {'episode_reward': [], 'episode_lengths': []},
  'sampler_perf': {},
  'num_faulty_episodes': 0,
  'connector_metrics': {},
  'num_episodes': 0,
  'episode_return_max': nan,
  'episode_return_min': nan,
  'episode_return_mean': nan,
  'episodes_this_iter': 0},
 'num_agent_steps_sampled_this_iter': 40000,
 'num_env_steps_sampled_this_iter': 20000,
 'timesteps_this_iter': 20000}

Exception in thread Thread-8:
Traceback (most recent call last):
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/energyplus/events.py", line 136, in cb_
    return cb(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/energyplus/events.py", line 166, in _state
    self._event.__call__(
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/energyplus/events.py", line 105, in __call__
    return super().__call__(context)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/core/callbacks.py", line 414, in __call__
    return self._callables.__call__(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/core/callables.py", line 81, in __call__
    res[f] = f(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/co