# 📚 Case Study 101: Reinforcement Learning Based Control

Before you start:
- Make sure you have the following packages installed:
    - ~~`controllables-energyplus`~~ (`pip`): see 000_intro.ipynb (TODO hyperlink)

What you will learn in this chapter:
- TODO

In [1]:
from controllables.energyplus.examples import system_1zone_evapcooler

system = system_1zone_evapcooler.make_system(repeat=True)

In [2]:
system.add('logging:progress').start()

  0%|          | 0/100 [00:00<?, ?it/s]

<controllables.energyplus.systems.System at 0x7f1ec7124710>

TODO cite
Configure the controller (powered by RLlib).

In [3]:
import numpy as _numpy_
import gymnasium as _gymnasium_

from controllables.energyplus import (
    Actuator,
    OutputVariable,
)
from controllables.core.tools.gymnasium import (
    BoxSpace,
    DictSpace,
)
from controllables.core.tools.ray import ExternalEnv

from ray.rllib.algorithms.ppo import PPO, PPOConfig
from ray.rllib.algorithms.callbacks import DefaultCallbacks


config = (
    PPOConfig()
    .environment(
        ExternalEnv, 
        env_config=ExternalEnv.Config(
            action_space=DictSpace({
                'thermostat': BoxSpace(
                    low=15., high=20.,
                    dtype=_numpy_.float32,
                    shape=(),
                ).bind(
                    Actuator.Ref(
                        type='Zone Temperature Control',
                        control_type='Heating Setpoint',
                        key='MAIN ZONE',
                    )            
                )
            }),    
            observation_space=DictSpace({
                'temperature': BoxSpace(
                    low=-_numpy_.inf, high=+_numpy_.inf,
                    dtype=_numpy_.float32,
                    shape=(),
                ).bind(
                    OutputVariable.Ref(
                        type='Zone Air Temperature',
                        key='MAIN ZONE',
                    )
                ),
            }),
            system=lambda: system,
            reward_function=lambda agent: 1,
            episode_events={
                'step': 'begin_zone_timestep_after_init_heat_balance',
            },
        )
    )
    .rollouts(
        create_env_on_local_worker=True,
        #num_rollout_workers=10,
        num_rollout_workers=0,
        enable_connectors=False,
    )
    .framework('torch')
    .evaluation(
        #evaluation_interval=1,
        #evaluation_num_workers=0
    )
    .resources(num_gpus=1.)
)

algo = PPO(
    config=config,
)

`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))


Train the algorithm.

In [4]:
# start training
for _ in range(20):
    print(algo.train())



{'custom_metrics': {}, 'episode_media': {}, 'info': {'learner': {'default_policy': {'custom_metrics': {}, 'learner_stats': {'cur_kl_coeff': 0.20000000000000004, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 9.938514394657586, 'policy_loss': -0.06153002535143206, 'vf_loss': 10.0, 'vf_explained_var': 0.7428363177084154, 'kl': 0.00022207255682079322, 'entropy': 1.4278103168292713, 'entropy_coeff': 0.0}, 'model': {}, 'num_grad_updates_lifetime': 465.5, 'diff_num_grad_updates_vs_sampler_policy': 464.5}}, 'num_env_steps_sampled': 4000, 'num_env_steps_trained': 4000, 'num_agent_steps_sampled': 4000, 'num_agent_steps_trained': 4000}, 'sampler_results': {'episode_reward_max': nan, 'episode_reward_min': nan, 'episode_reward_mean': nan, 'episode_len_mean': nan, 'episode_media': {}, 'episodes_this_iter': 0, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [], 'episode_lengths': []}, 'sampler_perf': {}, 'num_faulty_

Evaluate the algorithm.

In [5]:
algo.evaluate()

{'evaluation': {'sampler_results': {'episode_reward_max': 52847.0,
   'episode_reward_min': 52847.0,
   'episode_reward_mean': 52847.0,
   'episode_len_mean': 52848.0,
   'episode_media': {},
   'episodes_this_iter': 1,
   'policy_reward_min': {},
   'policy_reward_max': {},
   'policy_reward_mean': {},
   'custom_metrics': {},
   'hist_stats': {'episode_reward': [52847.0], 'episode_lengths': [52848]},
   'sampler_perf': {'mean_raw_obs_processing_ms': 0.10564736058028298,
    'mean_inference_ms': 0.9130532167985475,
    'mean_action_processing_ms': 0.08103630806459351,
    'mean_env_wait_ms': 0.26409723736386065,
    'mean_env_render_ms': 0.0},
   'num_faulty_episodes': 0,
   'connector_metrics': {}},
  'episode_reward_max': 52847.0,
  'episode_reward_min': 52847.0,
  'episode_reward_mean': 52847.0,
  'episode_len_mean': 52848.0,
  'episode_media': {},
  'episodes_this_iter': 1,
  'policy_reward_min': {},
  'policy_reward_max': {},
  'policy_reward_mean': {},
  'custom_metrics': {},
  

Stop the simulation environment when we are done!

In [6]:
system.stop()

<controllables.energyplus.systems.System at 0x7f1ec7124710>

Exception in thread Thread-6:
Traceback (most recent call last):
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/energyplus/events.py", line 136, in cb_
    return cb(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/energyplus/events.py", line 166, in _state
    self._event.__call__(
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/energyplus/events.py", line 105, in __call__
    return super().__call__(context)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/core/callbacks.py", line 415, in __call__
    return self._callables.__call__(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/core/callables.py", line 81, in __call__
    res[f] = f(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/EnergyPlus-OOEP/packages/controllables/co