In [2]:
# %env KERAS_BACKEND=torch

# import tensorflow as tf
# tf.compat.v1.enable_eager_execution()

import numpy as _numpy_
from ray.tune.schedulers import PopulationBasedTraining
from ray import tune , air
from controllables.energyplus import System
from controllables.core import TemporaryUnavailableError
from controllables.core.tools.gymnasium import DiscreteSpace, BoxSpace, DictSpace
from controllables.core.tools.rllib import MultiAgentEnv
from controllables.energyplus import Actuator, OutputVariable, OutputMeter
from controllables.energyplus import examples
import pythermalcomfort as pytc
from predict_next import PVPredictor


class DelayedPVPredictor:
    def __init__(self, **kwargs):
        self.x_prev = None
        self.predictor = PVPredictor(**kwargs)

    def __call__(self, x, **kwargs):
        res = None
        if self.x_prev is not None:
            res = self.predictor(self.x_prev, **kwargs)
        self.x_prev = x
        return res


class PVRewardFunction:
    def __init__(self, **kwargs):
        self.predictor = DelayedPVPredictor(**kwargs)

    def __call__(self, agent, **kwargs):
        system = agent.system

        try:
            pv = system[OutputMeter.Ref('Photovoltaic:ElectricityProduced')].value
            pv = pv/4

            target = self.predictor(
                {
                    'out_w': pv, 
                    'Drybulb_Temperature': system[OutputVariable.Ref('Site Outdoor Air Drybulb Temperature', 'ENVIRONMENT')].value,
                    'Air_Pressure': system[OutputVariable.Ref('Site Outdoor Air Barometric Pressure', 'ENVIRONMENT')].value,
                    'Wind_Speed': system[OutputVariable.Ref('Site Wind Speed', 'ENVIRONMENT')].value,
                    'Wind_Direction': system[OutputVariable.Ref('Site Wind Direction', 'ENVIRONMENT')].value,
                    'Diffuse_Solar_Radiation': system[OutputVariable.Ref('Site Diffuse Solar Radiation Rate per Area', 'ENVIRONMENT')].value,
                    'Direct_Solar_Radiation': system[OutputVariable.Ref('Site Direct Solar Radiation Rate per Area', 'ENVIRONMENT')].value,
                    'Solar_Azimuth_Angle': system[OutputVariable.Ref('Site Solar Azimuth Angle', 'ENVIRONMENT')].value,
                    'Solar_Altitude_Angle': system[OutputVariable.Ref('Site Solar Altitude Angle', 'ENVIRONMENT')].value,
                },
                **kwargs,
            )
        except TemporaryUnavailableError:
            return 0.

        if target is None:
            return 0.

        return target['out_w']
    

class RewardFunction():
    def __init__(self, metab_rate=1.5, clothing=.5, pmv_limit=.5):
        self._metab_rate = _numpy_.asarray(metab_rate)
        self._clothing = _numpy_.asarray(clothing)
        self._pmv_limit = _numpy_.asarray(pmv_limit)
        self.pv_reward_fn = PVRewardFunction(lookback=1000)
    
    def __call__(self, agent):
        try:
            observation = agent.observation.value    
        except TemporaryUnavailableError:
            return 0. 
        Office_Occupancy = observation['Office Occupancy']
        pmv = pytc.models.pmv_ppd(
            tdb=(tdb := observation['temperature:drybulb']), 
            tr=observation['temperature:radiant'], 
            # calculate relative air speed
            vr=pytc.utilities.v_relative(v=observation.get('airspeed', .1), met=self._metab_rate), 
            rh=observation['humidity'], 
            met=self._metab_rate, 
            # calculate dynamic clothing
            clo=pytc.utilities.clo_dynamic(clo=self._clothing, met=self._metab_rate),
            limit_inputs=False ,
        )['pmv']
        penalty = 0
        reward_pmv = 0
        reward_ratio = 0

        if Office_Occupancy != 0:  
            if _numpy_.abs(pmv) > self._pmv_limit:
                penalty = 100 * (_numpy_.abs(pmv) - self._pmv_limit)
            else:
                penalty = 0
        log_term = max(2 * _numpy_.abs(pmv), 1e-2) 
        reward_pmv = 2 * (-_numpy_.log(log_term))
            

        Office_Occupancy = observation['Office Occupancy']
        # PV = observation['PV']
        # PV = PV/4
        if Office_Occupancy != 0:
            # TODO
            if agent.observation.value['energy-consumption'] != 0 :
                reward_ratio = self.pv_reward_fn(
                    agent,
                    experience_len=100,
                    fit_options=dict(verbose=0),
                    predict_options=dict(verbose=0),
                ) / observation['energy-consumption']
            else :
                reward_ratio = 0
        
        
        reward = (
             reward_pmv 
            # - (_numpy_.exp(AHU_COOLING_COIL / 400000) - 1) 
            - penalty
            +max(2 - abs(reward_ratio - 1) * 2, 0)
        )

        reward = min(reward, 10)
        #- (_numpy_.exp(AHU_COOLING_COIL / 180000) - 1)
        return reward

class RewardFunction_for_PV:
    def __call__(self, agent):
        try:
            observation = agent.observation.value    
        except TemporaryUnavailableError:
            return 0.
        reward_ratio = 0
        Office_Occupancy = observation['Office Occupancy']
        PV = observation['PV']
        PV = PV/4
        if Office_Occupancy != 0:  
            if agent.observation.value['energy-consumption'] != 0 :
                reward_ratio = PV / observation['energy-consumption']
            else :
                reward_ratio = 0
        reward = max(2 - abs(reward_ratio - 1) * 2, 0)
        return reward


class UserMultiAgentEnv(MultiAgentEnv):
    config: MultiAgentEnv.Config = {
        'agents': {
            # room agents
            **{
                agent_id: {
                    'action_space': DictSpace({
                        'thermostat': BoxSpace(
                            low=20., high=30.,
                            dtype=_numpy_.float32,
                            shape=(),
                        ).bind(
                            Actuator.Ref(
                                type='Zone Temperature Control',
                                control_type='Cooling Setpoint',
                                key=var_key,
                            ) 
                        ),
                        # 'air flow rate': BoxSpace(
                        #     low=0., high=100.,
                        #     dtype=_numpy_.float32,
                        #     shape=(),
                        # ).bind(Actuator.Ref(
                        #     type='Fan',
                        #     control_type='Fan Air Mass Flow Rate',
                        #     key='AIR LOOP AHU SUPPLY FAN'
                        # )),
                        # 'air-flow-rate': BoxSpace(
                        #     low=0., high=20.,
                        #     dtype=_numpy_.float32,
                        #     shape=(),
                        # ).bind(
                        #     Actuator.Ref(
                        #         type='System Node Setpoint',
                        #         control_type='Mass Flow Rate Setpoint',
                        #         key=f'{var_key} SINGLE DUCT VAV NO REHEAT SUPPLY OUTLET',
                        #     )
                        # ),                       
                    }),
                    'observation_space': DictSpace({
                        # 'temperature': BoxSpace(
                        #     low=-_numpy_.inf, high=+_numpy_.inf,
                        #     dtype=_numpy_.float32,
                        #     shape=(),
                        # ).bind(
                        #     OutputVariable.Ref(
                        #         type='Zone Air Temperature',
                        #         key=var_key,
                        #     )
                        # ),
                        'temperature:drybulb': BoxSpace(
                                low=-_numpy_.inf, high=+_numpy_.inf,
                                dtype=_numpy_.float32,
                                shape=(),
                            ).bind(OutputVariable.Ref(
                                type='Zone Mean Air Temperature',
                                key=var_key,
                            )),
                        'temperature:radiant': BoxSpace(
                                low=-_numpy_.inf, high=+_numpy_.inf,
                                dtype=_numpy_.float32,
                                shape=(),
                            ).bind(OutputVariable.Ref(
                                type='Zone Mean Radiant Temperature',
                                key=var_key,
                            )),
                        'humidity': BoxSpace(
                                low=-_numpy_.inf, high=+_numpy_.inf,
                                dtype=_numpy_.float32,
                                shape=(),
                            ).bind(OutputVariable.Ref(
                                type='Zone Air Relative Humidity',
                                key=var_key,
                            )),
                        'PV': BoxSpace(
                            low=-_numpy_.inf, high=+_numpy_.inf,
                            dtype=_numpy_.float32,
                            shape=(),
                        ).bind(
                        OutputMeter.Ref(
                            type='Photovoltaic:ElectricityProduced',
                        )
                        ),
                        'energy-consumption': BoxSpace(
                            low=-_numpy_.inf, high=+_numpy_.inf,
                            dtype=_numpy_.float32,
                            shape=(),
                        ).bind(OutputMeter.Ref(
                            type='Electricity:HVAC',
                        )
                        ),
                        'Office Occupancy':BoxSpace(
                            low=-_numpy_.inf, high=+_numpy_.inf,
                            dtype=_numpy_.float32,
                            shape=(),
                        ).bind(OutputVariable.Ref(
                            type='Schedule Value',
                            key='Office_OpenOff_Occ',
                        )),
                    }),
                    'reward': RewardFunction(),
                }
                for agent_id, var_key in [
                    ('1FWEST', '1FFIRSTFLOORWEST:OPENOFFICE'),
                    ('1FEAST', '1FFIRSTFLOOREAST:OPENOFFICE'),
                    ('0FWEST', '0FGROUNDFLOORWEST:OPENOFFICE'),
                    ('0FEAST', '0FGROUNDFLOOREAST:OPENOFFICE'),
                    ('1FWEST1', '1FFIRSTFLOORWEST1:OPENOFFICE'),
                    ('1FEAST1', '1FFIRSTFLOOREAST1:OPENOFFICE'),
                    ('0FWEST1', '0FGROUNDFLOORWEST1:OPENOFFICE'),
                    ('0FEAST1', '0FGROUNDFLOOREAST1:OPENOFFICE'),
                ]
            },
            # HVAC equipment agents
            'CHILLER': {
                'participation': lambda agent: False,
                'action_space': DictSpace({
                    'temperature': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(Actuator.Ref(
                        'System Node Setpoint',
                        'Temperature Setpoint',
                        'CHILLER CHW OUTLET NODE',
                    )),
                }),
                'observation_space': DictSpace({
                    'cooling-rate': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(OutputVariable.Ref(
                        'Chiller Evaporator Cooling Rate',
                        'CHILLER',
                    )),
                    'elec-rate': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(OutputVariable.Ref(
                        'Chiller Electricity Rate',
                        'CHILLER',
                    )),
                }),
                'reward': (
                    lambda agent: (
                        agent.observation.value['cooling-rate'] 
                        / agent.observation.value['elec-rate']
                    ) if agent.observation.value['elec-rate'] != 0. else 0.
                ),
            },
            'AHU': {
                # TODO disabled
                'participation': lambda agent: False,
                'action_space': DictSpace({
                    'air flow rate': BoxSpace(
                        low=0., high=10.,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(Actuator.Ref(
                        type='Fan',
                        control_type='Fan Air Mass Flow Rate',
                        key='AIR LOOP AHU SUPPLY FAN'
                    )),
                }),
                'observation_space': DictSpace({
                    'energy-consumption': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(
                        OutputMeter.Ref(
                            type='Electricity:HVAC',
                        )
                    ),
                    'AHU COOLING COIL': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(OutputVariable.Ref(
                        type='Cooling Coil Total Cooling Rate',
                        key='AIR LOOP AHU COOLING COIL',
                    )),
                }),
                'reward': (
                    lambda agent: (
                        agent.observation.value['AHU COOLING COIL'] 
                        / agent.observation.value['energy-consumption']
                    ) if agent.observation.value['energy-consumption'] != 0. else 0.
                ),
            },
            'PV': {
                'participation': lambda agent: False,
                'action_space': DictSpace({
                    'air flow rate': BoxSpace(
                        low=0., high=100.,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(Actuator.Ref(
                        type='Fan',
                        control_type='Fan Air Mass Flow Rate',
                        key='AIR LOOP AHU SUPPLY FAN'
                    )),
                }),
                'observation_space': DictSpace({
                    'energy-consumption': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(
                        OutputMeter.Ref(
                            type='Electricity:HVAC',
                        )
                    ),
                    'PV': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(
                        OutputVariable.Ref(
                        type='Generator Produced DC Electricity Energy',
                        key='SOLAR COLLECTOR 1',
                        )
                    ),
                    'Office Occupancy':BoxSpace(
                            low=-_numpy_.inf, high=+_numpy_.inf,
                            dtype=_numpy_.float32,
                            shape=(),
                        ).bind(OutputVariable.Ref(
                            type='Schedule Value',
                            key='Office_OpenOff_Occ',
                        )
                        ),
                }),
                'reward': RewardFunction_for_PV(),
            }
        }
    }

    def __init__(self, config: dict = dict()):
        super().__init__({
            **self.__class__.config,
            **config,
        })

    def run(self):
        system = System(
            building='model_with_photovoltaic.idf',
            weather='SGP_Singapore_486980_IWEC.epw',
            # TODO
            #report='tmp/',
            repeat=True,
        )
        system.add('logging:progress')
        self.__attach__(system).schedule_episode()
        system.start().wait()

E0000 00:00:1732182049.456194   10002 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732182049.459512   10002 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
from ray.rllib.policy.policy import PolicySpec
from ray.rllib.algorithms.ppo import PPO, PPOConfig

def get_config():
    return (
        PPOConfig()
        .environment(UserMultiAgentEnv)
        .env_runners(
            # NOTE this env (an `ExternalEnv`) does not support connectors
            enable_connectors=False,
        )
        .rollouts(
                num_rollout_workers=2, 
                #rollout_fragment_length='auto',
                rollout_fragment_length=250,
        )
        .resources(num_gpus=0.25)
        .multi_agent(
            policies={
                policy_id: PolicySpec(
                    action_space=agent_config['action_space'],
                    observation_space=agent_config['observation_space'],
                )
                for policy_id, agent_config in UserMultiAgentEnv.config['agents'].items()
            },
            policy_mapping_fn=lambda agent_id, *args, **kwargs: str(agent_id),
        )
        # TODO
        .resources(num_gpus=1.)
        #.framework('tf2')
    )

In [None]:
pbt = PopulationBasedTraining(
    time_attr="training_iteration",
    perturbation_interval=4,
    resample_probability=0.25,
    hyperparam_mutations={
        "lr": tune.uniform(1e-5, 0.1),
        "batch_mode": ["complete_episodes"],
        "train_batch_size": [4000],
        "sgd_minibatch_size": [32, 64, 128, 256, 512],
        "num_sgd_iter": [10, 20, 30],
        "clip_param": tune.uniform(0.1, 0.3),
    },
    #require_attrs=False,
)

tuner = tune.Tuner(
    "PPO",
    param_space=get_config().to_dict(),
    tune_config=tune.TuneConfig(
        scheduler=pbt,
        num_samples=1,
        metric="env_runners/episode_reward_mean",
        mode="max",
    ),
    run_config=air.RunConfig(
        stop={"training_iteration": 200},
        checkpoint_config=air.CheckpointConfig(
            checkpoint_at_end=True
        ),
        verbose=2,
    )
)

results = tuner.fit()


  self.start_gcs_server()
  self.start_gcs_server()
  self.start_monitor()
  self.start_monitor()
  self.start_api_server(
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_log_monitor()
2024-11-21 09:40:55,532	INFO worker.py:1786 -- Started a local Ray instance.
2024-11-21 09:40:56,097	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-11-21 09:40:56,098	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


0,1
Current time:,2024-11-21 09:52:13
Running for:,00:11:17.68
Memory:,35.7/62.8 GiB

Trial name,status,loc,iter,total time (s),ts,num_healthy_workers,num_in_flight_async_ sample_reqs,num_remote_worker_re starts
PPO_UserMultiAgentEnv_b4ea7_00000,RUNNING,192.168.200.249:23531,5,545.785,6500,2,0,0


[36m(pid=23531)[0m E0000 00:00:1732182056.942320   23531 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(pid=23531)[0m E0000 00:00:1732182056.945599   23531 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(pid=23642)[0m E0000 00:00:1732182063.703860   23642 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(pid=23642)[0m E0000 00:00:1732182063.707173   23642 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(pid=23643)[0m E0000 00:00:1732182063.704480   23643 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(pid=2364

Trial name,agent_timesteps_total,counters,custom_metrics,env_runners,episode_media,info,num_agent_steps_sampled,num_agent_steps_sampled_lifetime,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_lifetime,num_env_steps_sampled_this_iter,num_env_steps_sampled_throughput_per_sec,num_env_steps_trained,num_env_steps_trained_this_iter,num_env_steps_trained_throughput_per_sec,num_healthy_workers,num_in_flight_async_sample_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,timers
PPO_UserMultiAgentEnv_b4ea7_00000,52000,"{'num_env_steps_sampled': 6500, 'num_env_steps_trained': 6500, 'num_agent_steps_sampled': 52000, 'num_agent_steps_trained': 52000}",{},"{'episode_reward_max': nan, 'episode_reward_min': nan, 'episode_reward_mean': nan, 'episode_len_mean': nan, 'episode_media': {}, 'episodes_timesteps_total': 0, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [], 'episode_lengths': []}, 'sampler_perf': {}, 'num_faulty_episodes': 0, 'connector_metrics': {}, 'num_episodes': 0, 'episode_return_max': nan, 'episode_return_min': nan, 'episode_return_mean': nan, 'episodes_this_iter': 0}",{},"{'learner': {}, 'num_env_steps_sampled': 6500, 'num_env_steps_trained': 6500, 'num_agent_steps_sampled': 52000, 'num_agent_steps_trained': 52000}",52000,52000,52000,6500,6500,0,0,6500,0,0,2,0,0,0,"{'cpu_util_percent': 9.119883040935672, 'ram_util_percent': 45.16023391812866}","{'training_iteration_time_ms': 78692.743, 'restore_workers_time_ms': 0.01, 'training_step_time_ms': 78692.708, 'sample_time_ms': 68868.33, 'learn_time_ms': 16366.8, 'learn_throughput': 132.382, 'synch_weights_time_ms': 6.894}"


 44%|████▍     | 44.0/100 [01:05<03:20,  3.58s/it, Starting Simulation at 07/01/2002 for SITE (01-01:31-12)][32m [repeated 6x across cluster][0m
 56%|█████▌    | 56.00000000000001/100 [01:11<00:54,  1.25s/it, Starting Simulation at 07/01/2002 for SITE (01-01:31-12)][32m [repeated 8x across cluster][0m
 62%|██████▏   | 62.0/100 [01:14<00:32,  1.16it/s, Updating Shadowing Calculations, Start Date=07/21/2002]  
 65%|██████▌   | 65.0/100 [01:15<00:26,  1.33it/s, Continuing Simulation at 07/21/2002 for SITE (01-01:31-12)]
 59%|█████▉    | 59.0/100 [01:12<00:41,  1.02s/it, Starting Simulation at 07/01/2002 for SITE (01-01:31-12)]             [32m [repeated 3x across cluster][0m
 62%|██████▏   | 62.0/100 [01:14<00:32,  1.16it/s, Updating Shadowing Calculations, Start Date=07/21/2002]  
 65%|██████▌   | 65.0/100 [01:15<00:26,  1.33it/s, Continuing Simulation at 07/21/2002 for SITE (01-01:31-12)]
 68%|██████▊   | 68.0/100 [01:28<00:57,  1.79s/it, Continuing Simulation at 07/21/2002 for SI

In [None]:
best_result = results.get_best_result()
print("Best Hyperparameters found: ", best_result)

NameError: name 'results' is not defined

In [None]:
best_result.metrics['env_runners']

In [None]:
from ray.rllib.algorithms.callbacks import DefaultCallbacks
from controllables.core import BaseVariable
from controllables.core.tools.records import VariableRecords

class PMVVariable(BaseVariable):
    def __init__(
        self, 
        tdb: BaseVariable,
        tr: BaseVariable,
        rh: BaseVariable,
        metab_rate=1.5, clothing=.5, pmv_limit=.5,
    ):
        self.tdb = tdb
        self.tr = tr
        self.rh = rh
        self._metab_rate = _numpy_.asarray(metab_rate)
        self._clothing = _numpy_.asarray(clothing)
        self._pmv_limit = _numpy_.asarray(pmv_limit)
    
    @property
    def value(self):
        res = pytc.models.pmv_ppd(
            tdb=self.tdb.value, 
            tr=self.tr.value, 
            # calculate relative air speed
            vr=pytc.utilities.v_relative(v=0.1, met=self._metab_rate), 
            rh=self.rh.value, 
            met=self._metab_rate, 
            # calculate dynamic clothing
            clo=pytc.utilities.clo_dynamic(clo=self._clothing, met=self._metab_rate),
            limit_inputs=False,
        )['pmv']
        return res

class PlottingCallbacks(DefaultCallbacks):
    def __init__(self):
        self.env_records: dict[object, VariableRecords] = None

    def on_episode_start(self, *, episode, worker, **kwargs):
        env: UserMultiAgentEnv = worker.env
        system = env.system
        if self.env_records is None:
            system.add('logging:progress')
            self.env_records = dict()
            for agent_ref in env.agents:
                if agent_ref == 'CHILLER' or agent_ref == 'AHU' or agent_ref == 'PV':
                    # self.env_records[agent_ref] = records = VariableRecords({
                    # 'AHU COOLING COIL': system[OutputVariable.Ref('Cooling Coil Total Cooling Rate', 'AIR LOOP AHU COOLING COIL')],
                    # 'Electricity': system[OutputMeter.Ref('Electricity:HVAC')]
                    # }, maxlen=10_000)
                    pass
                else:
                    tdb = env.agents[agent_ref].observation['temperature:drybulb']
                    tr = env.agents[agent_ref].observation['temperature:radiant']
                    rh = env.agents[agent_ref].observation['humidity']
                    pv = system[OutputMeter.Ref('Photovoltaic:ElectricityProduced')]/4
                    pmv = PMVVariable(tdb=tdb, tr=tr, rh=rh)
                    self.env_records[agent_ref] = records = VariableRecords({
                        '🕰️': system['time'],
                        '🍩': env.agents[agent_ref].reward,
                        'pmv': pmv,
                        'occupancy': env.agents[agent_ref].observation['Office Occupancy'],
                        # 'tstat': env.agents[agent_ref].action['thermostat'],
                        'temp': tdb,
                        # 'AHU COOLING COIL': system[OutputVariable.Ref('Cooling Coil Total Cooling Rate', 'AIR LOOP AHU COOLING COIL')],
                        'elec': system[OutputMeter.Ref('Electricity:HVAC')],
                        'pv':pv,
                    }, maxlen=10_000)
                    # display(
                    #     records.plot.scatter(x='🕰️', y='pv', label=repr(agent_ref))
                    #     .watch(records.events['change'] % 1_000)
                    # )

    def on_episode_step(self, *, episode, **kwargs):
        for _, env_records in self.env_records.items():
            env_records.poll()

    def on_episode_end(self, *, episode, **kwargs):
        for agent_ref, env_records in self.env_records.items():
            env_records.dataframe().to_csv(f'datasave/20241102/records_train_pv_change_reward-{agent_ref}.csv', index=False)


In [None]:
config_eval = (
    get_config()
    .env_runners(
        num_env_runners=0,
        create_env_on_local_worker=True,
    )
    .evaluation(
        evaluation_duration=1,
        evaluation_duration_unit='episodes',
        #evaluation_interval=1,
        evaluation_num_env_runners=0,
    )
    .callbacks(PlottingCallbacks)
)

algo_eval = PPO(config_eval)
#algo_eval.restore(best_result.checkpoint)
algo_eval.restore('/home/AD/user/ray_results/PPO_2024-11-02_06-02-37/PPO_UserMultiAgentEnv_1068a_00000_0_2024-11-02_06-02-38/checkpoint_000016')
#algo_eval.restore("/home/AD/user/ray_results/PPO_2024-10-18_15-32-34/PPO_UserMultiAgentEnv_331fb_00000_0_2024-10-18_15-32-35/checkpoint_000013")

In [None]:
for _ in range(2):
    algo_eval.evaluate()