In [8]:
import numpy as _numpy_
from ray.tune.schedulers import PopulationBasedTraining
from ray import tune , air
from controllables.energyplus import System
from controllables.core import TemporaryUnavailableError
from controllables.core.tools.gymnasium import DiscreteSpace, BoxSpace, DictSpace
from controllables.core.tools.rllib import Env
from controllables.energyplus import Actuator, OutputVariable, OutputMeter
from controllables.energyplus import examples
import pythermalcomfort as pytc
import pandas as pd
import numpy as np
from collections import deque
from typing import Optional, TypedDict


class ComfortFunction:
    def __init__(self, metab_rate=1.5, clothing=.5, pmv_limit=.5):
        self._metab_rate = _numpy_.asarray(metab_rate)
        self._clothing = _numpy_.asarray(clothing)
        self._pmv_limit = _numpy_.asarray(pmv_limit)

    class Inputs(TypedDict):
        temperature_drybulb: float
        temperature_radiant: float
        humidity: float
        airspeed: Optional[float]

    def __call__(self, inputs: Inputs) -> float:
        pmv = pytc.models.pmv_ppd(
            tdb=inputs['temperature_drybulb'], 
            tr=inputs['temperature_radiant'], 
            # calculate relative air speed
            vr=pytc.utilities.v_relative(v=inputs.get('airspeed', .1), met=self._metab_rate), 
            rh=inputs['humidity'], 
            met=self._metab_rate, 
            # calculate dynamic clothing
            clo=pytc.utilities.clo_dynamic(clo=self._clothing, met=self._metab_rate),
            limit_inputs=False ,
        )['pmv']
        return self._pmv_limit - _numpy_.abs(pmv)


class ComfortElecSavingRewardFunction:
    def __init__(self):
        self._comfort_history, self._elec_history, self._comfort_values = {}, {}, {}
        self._comfort_function = ComfortFunction()

    class Inputs(TypedDict):
        hvac_elec: float
        office_occupancy: float
        temperature_drybulb: float
        temperature_radiant: float
        humidity: float
        airspeed: Optional[float]
    
    def __call__(self, inputs: Inputs) -> float:
        rewards = []
        for  var_key in [
            ('1FFIRSTFLOORWEST:OPENOFFICE'),
            ('1FFIRSTFLOOREAST:OPENOFFICE'),
            ('0FGROUNDFLOORWEST:OPENOFFICE'),
            ('0FGROUNDFLOOREAST:OPENOFFICE'),
            ('1FFIRSTFLOORWEST1:OPENOFFICE'),
            ('1FFIRSTFLOOREAST1:OPENOFFICE'),
            ('0FGROUNDFLOORWEST1:OPENOFFICE'),
            ('0FGROUNDFLOOREAST1:OPENOFFICE'),
        ]:
            hvac_elec = inputs['hvac_elec']
            office_occupancy = inputs['office_occupancy']
            self._comfort_values[var_key] = self._comfort_function({
                'temperature_drybulb': inputs[f'temperature_drybulb_{var_key}'],
                'temperature_radiant': inputs[f'temperature_radiant_{var_key}'],
                'humidity': inputs[f'humidity_{var_key}'],
                'airspeed': inputs.get('airspeed', .1),
            })
            self._comfort_history[var_key] = deque(maxlen=2)
            self._elec_history[var_key] = deque(maxlen=2)
            if office_occupancy != 0:
                self._comfort_history[var_key].append(self._comfort_values[var_key])
                self._elec_history[var_key].append(hvac_elec)


                if len(self._comfort_history[var_key]) < 2 or len(self._elec_history[var_key]) < 2:
                    return 0

                comfort_diff = np.array((self._comfort_history[var_key][1] - self._comfort_history[var_key][0]) / self._comfort_history[var_key][0])
                elec_diff = np.array((self._elec_history[var_key][1] - self._elec_history[var_key][0]) / self._elec_history[var_key][0])
                # TODO
                elec_diff_saving = -np.array((self._elec_history[var_key][0] - self._elec_history[var_key][1]) / self._elec_history[var_key][1])

                # TODO
                if elec_diff_saving != 0:
                    reward = comfort_diff / elec_diff_saving #elec_diff
                else:
                    reward = 0
                if _numpy_.isnan(reward):
                    reward = 0
            reward = np.clip(reward, -10, 10)
            rewards.append(reward)
        if rewards:
            reward = sum(rewards)
            return reward
        else:
            return 0.


class ComfortElecSavingVectorRewardFunction:
    def __init__(self):
        self._comfort_history, self._elec_history = deque(maxlen=2),deque(maxlen=2)
        self._comfort_function = ComfortFunction()

    class Inputs(TypedDict):
        hvac_elec: float
        office_occupancy: float
        temperature_drybulb: float
        temperature_radiant: float
        humidity: float
        airspeed: Optional[float]

    def __call__(self, inputs: Inputs) -> float:     
        hvac_elec = inputs['hvac_elec']
        office_occupancy = inputs['office_occupancy']
        comfort = self._comfort_function({
            'temperature_drybulb': inputs['temperature_drybulb'],
            'temperature_radiant': inputs['temperature_radiant'],
            'humidity': inputs['humidity'],
            'airspeed': inputs.get('airspeed', .1),
        })
        reward = 0
        if office_occupancy != 0:  
            self._comfort_history.append(comfort)
            self._elec_history.append(hvac_elec)

            if len(self._comfort_history) == 2:
                comfort_min, comfort_max = -0.5, 0.5
                norm_comfort_0 = (self._comfort_history[0] - comfort_min) / (comfort_max - comfort_min)
                norm_comfort_1 = (self._comfort_history[1] - comfort_min) / (comfort_max - comfort_min)

                elec_min, elec_max = 8720597, 14000000
                norm_elec_0 = (self._elec_history[0] - elec_min) / (elec_max - elec_min)
                norm_elec_1 = (self._elec_history[1] - elec_min) / (elec_max - elec_min)

                # delta_comfort =(self._comfort_history[1]-self._comfort_history[0])/self._comfort_history[0]
                # delta_elec = (self._elec_history[1]-self._elec_history[0])/self._elec_history[0]

                delta_comfort = norm_comfort_1 - norm_comfort_0
                delta_elec = norm_elec_1 - norm_elec_0

                angle = _numpy_.arctan2(delta_comfort, delta_elec)
                distance = _numpy_.sqrt(delta_elec**2 + delta_comfort**2)
                distance_to_goal = _numpy_.sqrt(
                    (norm_elec_1 - 0) ** 2 + (norm_comfort_1 - 1) ** 2
                )
                max_distance = _numpy_.sqrt(2)
                reward_proximity = 1 - distance_to_goal / max_distance
                reward_angle = _numpy_.cos(angle - _numpy_.pi * 3 / 4)
                reward = reward_angle * distance

            # print(f'pmv: {pmv}, reward: {reward}, office_occupancy: {Office_Occupancy}' )
        return reward


class UserEnv(Env):
    config: Env.Config = {'config'}

    # room agents
    class AgentRewardFunction:
        def __init__(self):
            self._comfort_elec_saving_reward_function = ComfortElecSavingRewardFunction()
            #self._comfort_elec_saving_vector_reward_function = ComfortElecSavingVectorRewardFunction()

        def __call__(self, agent) -> float:
            try:
                return self._comfort_elec_saving_reward_function({
                    'hvac_elec': agent.observation.value['energy-consumption'], 
                    'office_occupancy': agent.observation.value['Office Occupancy'], 
                    'temperature_drybulb': agent.observation.value['temperature:drybulb'], 
                    'temperature_radiant': agent.observation.value['temperature:radiant'], 
                    'humidity': agent.observation.value['humidity'],
                })
            except TemporaryUnavailableError:
                return 0.

    for  var_key in [
        ('1FFIRSTFLOORWEST:OPENOFFICE'),
        ('1FFIRSTFLOOREAST:OPENOFFICE'),
        ('0FGROUNDFLOORWEST:OPENOFFICE'),
        ('0FGROUNDFLOOREAST:OPENOFFICE'),
        ('1FFIRSTFLOORWEST1:OPENOFFICE'),
        ('1FFIRSTFLOOREAST1:OPENOFFICE'),
        ('0FGROUNDFLOORWEST1:OPENOFFICE'),
        ('0FGROUNDFLOOREAST1:OPENOFFICE'),
    ]:
        config = {
            'action_space': DictSpace({
                f'thermostat_{var_key}': BoxSpace(
                    low=20., high=30.,
                    dtype=_numpy_.float32,
                    shape=(),
                ).bind(
                    Actuator.Ref(
                        type='Zone Temperature Control',
                        control_type='Cooling Setpoint',
                        key=var_key,
                    ) 
                ),                     
            }),
            'observation_space': DictSpace({
                f'temperature:drybulb_{var_key}': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(OutputVariable.Ref(
                        type='Zone Mean Air Temperature',
                        key=var_key,
                    )),
                f'temperature:radiant_{var_key}': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(OutputVariable.Ref(
                        type='Zone Mean Radiant Temperature',
                        key=var_key,
                    )),
                f'humidity_{var_key}': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(OutputVariable.Ref(
                        type='Zone Air Relative Humidity',
                        key=var_key,
                    )),
                # 'AHU COOLING COIL': BoxSpace(
                #         low=-_numpy_.inf, high=+_numpy_.inf,
                #         dtype=_numpy_.float32,
                #         shape=(),
                #     ).bind(OutputVariable.Ref(
                #         type='Cooling Coil Total Cooling Rate',
                #         key='AIR LOOP AHU COOLING COIL',
                #     )),
                # 'Fan Electricity Rate': BoxSpace(
                #     low=-_numpy_.inf, high=+_numpy_.inf,
                #     dtype=_numpy_.float32,
                #     shape=(),
                # ).bind(OutputVariable.Ref(
                #     type='Fan Electricity Rate',
                #     key='AIR LOOP AHU SUPPLY FAN',
                # )),
                'energy-consumption': BoxSpace(
                    low=-_numpy_.inf, high=+_numpy_.inf,
                    dtype=_numpy_.float32,
                    shape=(),
                ).bind(OutputMeter.Ref(
                    type='Electricity:HVAC',
                )
                ),
                'Office Occupancy': BoxSpace(
                    low=-_numpy_.inf, high=+_numpy_.inf,
                    dtype=_numpy_.float32,
                    shape=(),
                ).bind(OutputVariable.Ref(
                    type='Schedule Value',
                    key='Office_OpenOff_Occ',
                )),
            }),
            'reward': AgentRewardFunction(),
        }

    def __init__(self, config: dict = dict()):
        super().__init__({
            **self.__class__.config,
            **config,
        })

    def run(self):
        system = System(
            building='all_room_have_hvac.idf',
            weather='SGP_Singapore_486980_IWEC.epw',
            # TODO
            #report='tmp/',
            repeat=True,
        )
        # system.add('logging:progress')
        self.__attach__(system).schedule_episode(errors='warn')
        system.start().wait()

In [9]:
from ray.rllib.policy.policy import PolicySpec
from ray.rllib.algorithms.ppo import PPO, PPOConfig

def get_config():
    return (
        PPOConfig()
        .environment(UserEnv)
        .env_runners(
            # NOTE this env (an `ExternalEnv`) does not support connectors
            enable_connectors=False,
        )
        # .multi_agent(
        #     policies={
        #         policy_id: PolicySpec(
        #             action_space=agent_config['action_space'],
        #             observation_space=agent_config['observation_space'],
        #         )
        #         for policy_id, agent_config in UserMultiAgentEnv.config['agents'].items()
        #     },
        #     policy_mapping_fn=lambda agent_id, *args, **kwargs: str(agent_id),
        # )
        # TODO
        .resources(num_gpus=0)
    )

In [10]:
pbt = PopulationBasedTraining(
    time_attr="training_iteration",
    perturbation_interval=4,
    resample_probability=0.25,
    hyperparam_mutations={
        "lr": tune.uniform(1e-5, 0.1),
        "batch_mode": ["complete_episodes"],
        "train_batch_size": [4000],
        "sgd_minibatch_size": [32, 64, 128, 256, 512],
        "num_sgd_iter": [10, 20, 30],
        "clip_param": tune.uniform(0.1, 0.3),
    },
    #require_attrs=False,
)

tuner = tune.Tuner(
    "PPO",
    param_space=get_config().to_dict(),
    tune_config=tune.TuneConfig(
        scheduler=pbt,
        num_samples=2,
        metric="env_runners/episode_reward_mean",
        mode="max",
    ),
    run_config=air.RunConfig(
        stop={"training_iteration": 200},
        checkpoint_config=air.CheckpointConfig(
            checkpoint_at_end=True
        ),
        verbose=2,
    )
)

results = tuner.fit()


2024-11-16 04:38:02,611	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-11-16 04:38:29
Running for:,00:00:26.54
Memory:,14.9/31.0 GiB

Trial name,# failures,error file
PPO_UserEnv_90994_00000,1,/tmp/ray/session_2024-11-16_04-18-27_354745_1621478/artifacts/2024-11-16_04-38-02/PPO_2024-11-16_04-38-02/driver_artifacts/PPO_UserEnv_90994_00000_0_2024-11-16_04-38-02/error.txt
PPO_UserEnv_90994_00001,1,/tmp/ray/session_2024-11-16_04-18-27_354745_1621478/artifacts/2024-11-16_04-38-02/PPO_2024-11-16_04-38-02/driver_artifacts/PPO_UserEnv_90994_00001_1_2024-11-16_04-38-02/error.txt

Trial name,status,loc
PPO_UserEnv_90994_00000,ERROR,192.168.200.249:1625628
PPO_UserEnv_90994_00001,ERROR,192.168.200.249:1625629


2024-11-16 04:38:28,617	ERROR tune_controller.py:1331 -- Trial task failed for trial PPO_UserEnv_90994_00001
Traceback (most recent call last):
  File "/home/AD/user/lab/reports/2024xxxx/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/reports/2024xxxx/.venv/lib/python3.11/site-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/reports/2024xxxx/.venv/lib/python3.11/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/reports/2024xxxx/.venv/lib/python3.11/site-packages/ray/_private/worker.py", line 2691, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^

Trial name
PPO_UserEnv_90994_00000
PPO_UserEnv_90994_00001


2024-11-16 04:38:29,140	ERROR tune_controller.py:1331 -- Trial task failed for trial PPO_UserEnv_90994_00000
Traceback (most recent call last):
  File "/home/AD/user/lab/reports/2024xxxx/.venv/lib/python3.11/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/reports/2024xxxx/.venv/lib/python3.11/site-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/reports/2024xxxx/.venv/lib/python3.11/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/AD/user/lab/reports/2024xxxx/.venv/lib/python3.11/site-packages/ray/_private/worker.py", line 2691, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^

In [8]:
algo.workers.local_worker().env.action.value

{'thermostat_1FWEST': array(27.501886, dtype=float32),
 'thermostat_1FEAST': array(22., dtype=float32)}

In [5]:
c

# varlogger.plot({
#     'traces': [
#         dict(
#             x='wallclock:calendar', 
#             y=Actuator.Ref(
#                 type='Fan',
#                 control_type='Fan Air Mass Flow Rate',
#                 key='AIR LOOP AHU SUPPLY FAN',
#             ),
#         ),        
#     ],
# })


In [6]:
# varlogger.track('clock',
#     'wallclock:calendar'
# )

varlogger.track('Fan Air Mass Flow Rate', Actuator.Ref(
    type='Fan',
    control_type='Fan Air Mass Flow Rate',
    key='AIR LOOP AHU SUPPLY FAN',
),
)

varlogger.track('thermostat_1FEAST',Actuator.Ref(
    type='Schedule:Compact',
    control_type='Schedule Value',
    key='1FFIRSTFLOOREAST:OPENOFFICE',
)
)

varlogger.track('thermostat_1FWEST',Actuator.Ref(
    type='Schedule:Compact',
    control_type='Schedule Value',
    key='1FFIRSTFLOORWEST:OPENOFFICE COOLING SETPOINT SCHEDULE',
)
)

varlogger.track('Zone Mean Air Temperature',OutputVariable.Ref(
    type='Zone Mean Air Temperature',
    key='1FFIRSTFLOORWEST:OPENOFFICE',
)
)

varlogger.track(OutputVariable.Ref(
    type='Zone Air Relative Humidity',
    key='1FFIRSTFLOORWEST:OPENOFFICE',
)
)

varlogger.track('Zone Mean Radiant Temperature',OutputVariable.Ref(
    type='Zone Mean Radiant Temperature',
    key='1FFIRSTFLOORWEST:OPENOFFICE',
)
)

varlogger.track('Cooling Coil Total Cooling Rate',OutputVariable.Ref(
    type='Cooling Coil Total Cooling Rate',
    key='AIR LOOP AHU COOLING COIL',
)
)

varlogger.track('Fan Electricity Rate',OutputVariable.Ref(
    type='Fan Electricity Rate',
    key='AIR LOOP AHU SUPPLY FAN',
)
)

varlogger.track('Office Occupancy',OutputVariable.Ref(
    type='Schedule Value',
    key='Office_OpenOff_Occ',
)
)


In [8]:
varlogger._data

{'Fan Air Mass Flow Rate': History.Record(ref=Actuator.Ref(type='Fan', control_type='Fan Air Mass Flow Rate', key='AIR LOOP AHU SUPPLY FAN'), values=deque([0.0], maxlen=10000)),
 'thermostat_1FEAST': History.Record(ref=Actuator.Ref(type='Schedule:Compact', control_type='Schedule Value', key='1FFIRSTFLOOREAST:OPENOFFICE'), values=deque([], maxlen=10000)),
 'thermostat_1FWEST': History.Record(ref=Actuator.Ref(type='Schedule:Compact', control_type='Schedule Value', key='1FFIRSTFLOORWEST:OPENOFFICE COOLING SETPOINT SCHEDULE'), values=deque([], maxlen=10000)),
 'Zone Mean Air Temperature': History.Record(ref=OutputVariable.Ref(type='Zone Mean Air Temperature', key='1FFIRSTFLOORWEST:OPENOFFICE'), values=deque([], maxlen=10000)),
 OutputVariable.Ref(type='Zone Air Relative Humidity', key='1FFIRSTFLOORWEST:OPENOFFICE'): History.Record(ref=OutputVariable.Ref(type='Zone Air Relative Humidity', key='1FFIRSTFLOORWEST:OPENOFFICE'), values=deque([], maxlen=10000)),
 'Zone Mean Radiant Temperature': 

In [10]:
varlogger.plot({
    'traces': [
        dict(
            x='clock', 
            y='Fan Air Mass Flow Rate',
        ),
        dict(
            x='clock', 
            y='thermostat_1FEAST',
        ),
        dict(
            x='clock', 
            y='thermostat_1FWEST',
        ),
        dict(
            x='clock', 
            y='Zone Mean Air Temperature',
        ),
        dict(
            x='clock', 
            y='Zone Mean Radiant Temperature',
        ),
        dict(
            x='clock', 
            y='Cooling Coil Total Cooling Rate',
        ),
        dict(
            x='clock', 
            y='Fan Electricity Rate',
        ),
        dict(
            x='clock', 
            y='Office Occupancy',
        ),
    ],
}, autoupdate=1_000)


<energyplus.ooep.specs.tools.history.History.Plot at 0x7f7bb0d77650>

In [7]:
import itables as _itables_
df = varlogger.dataframe()
_itables_.show(df)
# df.to_csv('datasave/data.csv', index=False, sep=';')


ValueError: All arrays must be of the same length