In [1]:
import numpy as _numpy_
from ray.tune.schedulers import PopulationBasedTraining
from ray import tune , air
from controllables.energyplus import System
from controllables.core import TemporaryUnavailableError
from controllables.core.tools.gymnasium import DiscreteSpace, BoxSpace, DictSpace
from controllables.core.tools.rllib import MultiAgentEnv
from controllables.energyplus import Actuator, OutputVariable, OutputMeter
from controllables.energyplus import examples
import pythermalcomfort as pytc
import pandas as pd



class RewardFunction:
    def __init__(self, metab_rate=1.5, clothing=.5, pmv_limit=.5):
        self._metab_rate = _numpy_.asarray(metab_rate)
        self._clothing = _numpy_.asarray(clothing)
        self._pmv_limit = _numpy_.asarray(pmv_limit)
        self.df_baseline = pd.read_csv("datasave/records_-1FWEST.csv")
        self.df_baseline['time'] = pd.to_datetime(self.df_baseline['time'])
    
   
    def __call__(self, agent):
        try:
            system = agent.system
            observation = agent.observation.value    
        except TemporaryUnavailableError:
            return 0. 
        AHU_COOLING_COIL = observation['AHU COOLING COIL']
        HVAC_Electricity = observation['energy-consumption']
        # Fan_Electricity_Rate = observation['Fan Electricity Rate']
        Office_Occupancy = observation['Office Occupancy']
        pmv = pytc.models.pmv_ppd(
            tdb=(tdb := observation['temperature:drybulb']), 
            tr=observation['temperature:radiant'], 
            # calculate relative air speed
            vr=pytc.utilities.v_relative(v=observation.get('airspeed', .1), met=self._metab_rate), 
            rh=observation['humidity'], 
            met=self._metab_rate, 
            # calculate dynamic clothing
            clo=pytc.utilities.clo_dynamic(clo=self._clothing, met=self._metab_rate),
            limit_inputs=False ,
        )['pmv']
        current_time = system['time'].value
        penalty = 0
        if Office_Occupancy != 0:  
            if HVAC_Electricity > self.df_baseline[self.df_baseline['time'] == current_time]['elec'].max():
                penalty += 10
            # if _numpy_.abs(pmv) > _numpy_.abs(self.df_baseline[self.df_baseline['time'] == current_time]['pmv']).max():
            #     penalty += 10

        log_term = max(2 * _numpy_.abs(pmv), 1e-2) 
        
        reward = (
            2 * Office_Occupancy * (-_numpy_.log(log_term))  
            - penalty
        )
        reward = min(reward, 10)
        return reward


class UserMultiAgentEnv(MultiAgentEnv):
    config: MultiAgentEnv.Config = {
        'agents': {
            # room agents
            **{
                agent_id: {
                    'action_space': DictSpace({
                        'thermostat': BoxSpace(
                            low=20., high=30.,
                            dtype=_numpy_.float32,
                            shape=(),
                        ).bind(
                            Actuator.Ref(
                                type='Zone Temperature Control',
                                control_type='Cooling Setpoint',
                                key=var_key,
                            ) 
                        ),
                        # 'air-flow-rate': BoxSpace(
                        #     low=0., high=20.,
                        #     dtype=_numpy_.float32,
                        #     shape=(),
                        # ).bind(
                        #     Actuator.Ref(
                        #         type='System Node Setpoint',
                        #         control_type='Mass Flow Rate Setpoint',
                        #         key=f'{var_key} SINGLE DUCT VAV NO REHEAT SUPPLY OUTLET',
                        #     )
                        # ),                       
                    }),
                    'observation_space': DictSpace({
                        # 'temperature': BoxSpace(
                        #     low=-_numpy_.inf, high=+_numpy_.inf,
                        #     dtype=_numpy_.float32,
                        #     shape=(),
                        # ).bind(
                        #     OutputVariable.Ref(
                        #         type='Zone Air Temperature',
                        #         key=var_key,
                        #     )
                        # ),
                        'temperature:drybulb': BoxSpace(
                                low=-_numpy_.inf, high=+_numpy_.inf,
                                dtype=_numpy_.float32,
                                shape=(),
                            ).bind(OutputVariable.Ref(
                                type='Zone Mean Air Temperature',
                                key=var_key,
                            )),
                        'temperature:radiant': BoxSpace(
                                low=-_numpy_.inf, high=+_numpy_.inf,
                                dtype=_numpy_.float32,
                                shape=(),
                            ).bind(OutputVariable.Ref(
                                type='Zone Mean Radiant Temperature',
                                key=var_key,
                            )),
                        'humidity': BoxSpace(
                                low=-_numpy_.inf, high=+_numpy_.inf,
                                dtype=_numpy_.float32,
                                shape=(),
                            ).bind(OutputVariable.Ref(
                                type='Zone Air Relative Humidity',
                                key=var_key,
                            )),
                        'AHU COOLING COIL': BoxSpace(
                                low=-_numpy_.inf, high=+_numpy_.inf,
                                dtype=_numpy_.float32,
                                shape=(),
                            ).bind(OutputVariable.Ref(
                                type='Cooling Coil Total Cooling Rate',
                                key='AIR LOOP AHU COOLING COIL',
                            )),
                        'Fan Electricity Rate': BoxSpace(
                            low=-_numpy_.inf, high=+_numpy_.inf,
                            dtype=_numpy_.float32,
                            shape=(),
                        ).bind(OutputVariable.Ref(
                            type='Fan Electricity Rate',
                            key='AIR LOOP AHU SUPPLY FAN',
                        )),
                        'energy-consumption': BoxSpace(
                            low=-_numpy_.inf, high=+_numpy_.inf,
                            dtype=_numpy_.float32,
                            shape=(),
                        ).bind(OutputMeter.Ref(
                            type='Electricity:HVAC',
                        )
                        ),
                        'Office Occupancy':BoxSpace(
                            low=-_numpy_.inf, high=+_numpy_.inf,
                            dtype=_numpy_.float32,
                            shape=(),
                        ).bind(OutputVariable.Ref(
                            type='Schedule Value',
                            key='Office_OpenOff_Occ',
                        )),
                    }),
                    'reward': RewardFunction(),
                }
                for agent_id, var_key in [
                    ('1FWEST', '1FFIRSTFLOORWEST:OPENOFFICE'),
                    ('1FEAST', '1FFIRSTFLOOREAST:OPENOFFICE'),
                    ('0FWEST', '0FGROUNDFLOORWEST:OPENOFFICE'),
                    ('0FEAST', '0FGROUNDFLOOREAST:OPENOFFICE'),
                    ('1FWEST1', '1FFIRSTFLOORWEST1:OPENOFFICE'),
                    ('1FEAST1', '1FFIRSTFLOOREAST1:OPENOFFICE'),
                    ('0FWEST1', '0FGROUNDFLOORWEST1:OPENOFFICE'),
                    ('0FEAST1', '0FGROUNDFLOOREAST1:OPENOFFICE'),
                ]
            },
            # HVAC equipment agents
            'CHILLER': {
                'participation': lambda agent: False,
                'action_space': DictSpace({
                    'temperature': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(Actuator.Ref(
                        'System Node Setpoint',
                        'Temperature Setpoint',
                        'CHILLER CHW OUTLET NODE',
                    )),
                }),
                'observation_space': DictSpace({
                    'cooling-rate': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(OutputVariable.Ref(
                        'Chiller Evaporator Cooling Rate',
                        'CHILLER',
                    )),
                    'elec-rate': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(OutputVariable.Ref(
                        'Chiller Electricity Rate',
                        'CHILLER',
                    )),
                }),
                'reward': (
                    lambda agent: (
                        agent.observation.value['cooling-rate'] 
                        / agent.observation.value['elec-rate']
                    ) if agent.observation.value['elec-rate'] != 0. else 0.
                ),
            },
            'AHU': {
                # TODO disabled
                'participation': lambda agent: False,
                'action_space': DictSpace({
                    'air flow rate': BoxSpace(
                        low=0., high=10.,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(Actuator.Ref(
                        type='Fan',
                        control_type='Fan Air Mass Flow Rate',
                        key='AIR LOOP AHU SUPPLY FAN'
                    )),
                }),
                'observation_space': DictSpace({
                    'energy-consumption': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(
                        OutputMeter.Ref(
                            type='Electricity:HVAC',
                        )
                    ),
                    'AHU COOLING COIL': BoxSpace(
                        low=-_numpy_.inf, high=+_numpy_.inf,
                        dtype=_numpy_.float32,
                        shape=(),
                    ).bind(OutputVariable.Ref(
                        type='Cooling Coil Total Cooling Rate',
                        key='AIR LOOP AHU COOLING COIL',
                    )),
                }),
                'reward': (
                    lambda agent: (
                        agent.observation.value['AHU COOLING COIL'] 
                        / agent.observation.value['energy-consumption']
                    ) if agent.observation.value['energy-consumption'] != 0. else 0.
                ),
            },            
        }
    }

    def __init__(self, config: dict = dict()):
        super().__init__({
            **self.__class__.config,
            **config,
        })

    def run(self):
        system = System(
            building='all_room_have_hvac.idf',
            weather='SGP_Singapore_486980_IWEC.epw',
            # TODO
            report='tmp/',
            repeat=True,
        )
        # system.add('logging:progress')
        self.__attach__(system).schedule_episode()
        system.start().wait()

E0000 00:00:1731316989.611473 1149134 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731316989.614829 1149134 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
from ray.rllib.policy.policy import PolicySpec
from ray.rllib.algorithms.ppo import PPO, PPOConfig

def get_config():
    return (
        PPOConfig()
        .environment(UserMultiAgentEnv)
        .env_runners(
            # NOTE this env (an `ExternalEnv`) does not support connectors
            enable_connectors=False,
        )
        .multi_agent(
            policies={
                policy_id: PolicySpec(
                    action_space=agent_config['action_space'],
                    observation_space=agent_config['observation_space'],
                )
                for policy_id, agent_config in UserMultiAgentEnv.config['agents'].items()
            },
            policy_mapping_fn=lambda agent_id, *args, **kwargs: str(agent_id),
        )
        # TODO
        .resources(num_gpus=1)
    )

In [3]:
pbt = PopulationBasedTraining(
    time_attr="training_iteration",
    perturbation_interval=4,
    resample_probability=0.25,
    hyperparam_mutations={
        "lr": tune.uniform(1e-5, 0.1),
        "batch_mode": ["complete_episodes"],
        "train_batch_size": [4000],
        "sgd_minibatch_size": [32, 64, 128, 256, 512],
        "num_sgd_iter": [10, 20, 30],
        "clip_param": tune.uniform(0.1, 0.3),
    },
    #require_attrs=False,
)

tuner = tune.Tuner(
    "PPO",
    param_space=get_config().to_dict(),
    tune_config=tune.TuneConfig(
        scheduler=pbt,
        num_samples=1,
        metric="env_runners/episode_reward_mean",
        mode="max",
    ),
    run_config=air.RunConfig(
        stop={"training_iteration": 200},
        checkpoint_config=air.CheckpointConfig(
            checkpoint_at_end=True
        ),
        verbose=2,
    )
)

results = tuner.fit()


  self.start_gcs_server()
  self.start_gcs_server()
  self.start_monitor()
  self.start_monitor()
  self.start_api_server(
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_log_monitor()
2024-11-11 09:23:16,132	INFO worker.py:1786 -- Started a local Ray instance.
2024-11-11 09:23:16,710	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2024-11-11 09:23:16,711	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


0,1
Current time:,2024-11-11 13:02:05
Running for:,03:38:49.06
Memory:,19.3/31.7 GiB

Trial name,status,loc,iter,total time (s),ts,num_healthy_workers,num_in_flight_async_ sample_reqs,num_remote_worker_re starts
PPO_UserMultiAgentEnv_95577_00000,TERMINATED,192.168.200.249:1150781,200,13104.3,800000,2,0,0


[36m(pid=1150781)[0m E0000 00:00:1731316997.606674 1150781 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(pid=1150781)[0m E0000 00:00:1731316997.610106 1150781 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(pid=1150898)[0m E0000 00:00:1731317004.682765 1150898 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(pid=1150898)[0m E0000 00:00:1731317004.686129 1150898 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(pid=1150899)[0m E0000 00:00:1731317004.690776 1150899 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36

Trial name,agent_timesteps_total,counters,custom_metrics,env_runners,episode_media,info,num_agent_steps_sampled,num_agent_steps_sampled_lifetime,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_lifetime,num_env_steps_sampled_this_iter,num_env_steps_sampled_throughput_per_sec,num_env_steps_trained,num_env_steps_trained_this_iter,num_env_steps_trained_throughput_per_sec,num_healthy_workers,num_in_flight_async_sample_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,timers
PPO_UserMultiAgentEnv_95577_00000,6400000,"{'num_env_steps_sampled': 800000, 'num_env_steps_trained': 800000, 'num_agent_steps_sampled': 6400000, 'num_agent_steps_trained': 6400000}",{},"{'episode_reward_max': 21523.912350273145, 'episode_reward_min': -1294.441328966933, 'episode_reward_mean': 12376.505387689154, 'episode_len_mean': 4608.0, 'episode_media': {}, 'episodes_timesteps_total': 460800, 'policy_reward_min': {'1FWEST': -9.702664659765468, '0FEAST': 697.3971073260618, '0FWEST1': 243.07977132230377, '0FEAST1': -96.43125592674394, '1FEAST': -2991.564806040925, '1FEAST1': -291.9107205427739, '1FWEST1': 394.1520125029376, '0FWEST': 377.13293249579203}, 'policy_reward_max': {'1FWEST': 2835.0856095371905, '0FEAST': 3574.5898217238105, '0FWEST1': 2844.9992687501103, '0FEAST1': 2584.008911014819, '1FEAST': 766.9039536534499, '1FEAST1': 3181.2578708332107, '1FWEST1': 2995.341146290805, '0FWEST': 3425.820775257298}, 'policy_reward_mean': {'1FWEST': 1762.0650430236308, '0FEAST': 2264.443549494356, '0FWEST1': 1906.4401812636506, '0FEAST1': 1546.90042288662, '1FEAST': -859.3827858157906, '1FEAST1': 1585.3603888527848, '1FWEST1': 1947.3697782533272, '0FWEST': 2223.3088097304703}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [19356.388474519186, 21523.912350273145, 19382.329050939345, 20262.126907876765, 19480.88268891991, 20308.68636633254, 20834.441816805676, 20859.91832068693, 21008.52427233833, 20058.244385063437, 20781.66455547808, 19917.242466969703, 19979.472818680122, 20166.2913634102, 20449.278423139476, 19588.95911832442, 20592.574278698492, 20585.02402147616, 19819.314644943057, 19228.169026860716, 20336.6022534972, 20531.995006841047, 19770.95183572659, 19504.379327704806, 19533.419915518247, 19457.95312517416, 18425.52683283527, 17991.906585617697, 19008.846209937605, 19224.66280250741, 18666.498300301315, 19094.144642560914, 17176.448687630425, 17184.010867739496, 15621.990687950503, 16811.202810008002, 15434.542549019176, 16539.01438286176, 14617.039731754101, 14889.467988875385, 16061.302587659367, 14718.21042836946, 14593.458156827981, 14790.271499028438, 14567.89411667108, 13414.277148314752, 14302.214951306738, 13132.984717075728, 12665.849685117104, 13463.878355564768, 14087.628369414468, 14377.829277094177, 12520.89712291389, 11410.330025265033, 11721.850223667821, 11355.843645888146, 11090.018426695295, 11607.74440675556, 9558.416416327662, 10246.17542918068, 10604.961511155052, 7947.888450678331, 7787.084315370463, 8622.861214555618, 10049.370273388748, 8999.48246765506, 10524.112808613305, 9022.81461405557, 9245.821938802877, 9874.967053994822, 10170.726064459426, 7912.172526298933, 9789.235154468277, 9541.35302483809, 6925.002554134044, 6338.645030137196, 7808.558422971493, 6957.155982770855, 6891.052343136905, 2852.1417138491347, 5158.176218136755, 5023.617072659842, 3588.9299554745035, 4383.841708600497, 2978.296762257601, 4713.82388455862, 5023.122154160024, 2531.0873766406826, 2089.500432473673, 1379.4164579946134, 2610.317927141324, 1270.4142294387973, 946.9794266678266, 2245.91193983177, -1294.441328966933, 1945.7444599118307, -103.78417280063746, 2057.6653499845856, 526.4681473717994, -981.0815828909591], 'episode_lengths': [4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608, 4608], 'policy_1FWEST_reward': [2504.9918783419057, 2776.4931730419667, 2576.0272609555977, 2718.0816054668753, 2538.8542803506652, 2596.372757903572, 2485.9777732231505, 2428.6693966659163, 2662.30299483586, 2378.362626548332, 2835.0856095371905, 2663.444884625876, 2584.711294030578, 2559.247562557759, 2619.3937546967704, 2590.4120644974546, 2713.4322395884983, 2576.5478040045855, 2605.6866444922293, 2589.1536307747842, 2690.8336211130827, 2617.3857250735814, 2521.6785308951426, 2394.2655386579386, 2454.3625200829083, 2505.698674718308, 2323.792335284627, 2368.574789753391, 2528.8604984332896, 2382.944208281283, 2316.0292542900042, 2454.0941915353474, 2121.1611873745987, 2217.828388859722, 2097.520213706068, 2135.933416097077, 2110.9669947107654, 2098.351230383027, 2058.0259029027143, 2147.146236794274, 2329.422546839665, 2191.4208837369165, 2242.261332415363, 2147.4029161706976, 2297.67690934436, 2060.7691090905023, 2062.846955395462, 1964.807353823538, 1970.9496989702752, 2085.462283973725, 2162.322390091705, 2154.5891215728698, 2012.5834424832203, 1809.6869395658184, 1913.7365050039746, 1743.3199239068763, 1816.1476986150785, 1860.6311774640626, 1559.0551713229652, 1667.916661787359, 1594.9364342389617, 1479.2654234318686, 1370.460502239102, 1502.34537288014, 1603.645908007534, 1400.2163430204673, 1661.2791763010182, 1404.4453907743762, 1490.3320549981204, 1656.3782769776058, 1530.6981395633013, 1179.251280729778, 1510.0567282668183, 1577.8453843443504, 1236.3528015613806, 1079.3566585535534, 1253.6706589097892, 1028.5083615986755, 1067.1696294730725, 593.2975834802976, 927.8792250882351, 896.1018678405624, 804.9151730029441, 855.9851864185022, 726.9622276522546, 891.7005888585843, 909.855104996516, 594.1984035467582, 576.8147584964778, 494.6471893617449, 606.3148297441913, 247.9192580583018, 294.6236654530895, 455.91407928609596, -9.702664659765468, 441.51176432767795, 101.58730901458352, 380.1012832284611, 115.64538747731144, 72.30583915919124], 'policy_0FEAST_reward': [3226.237900975419, 3427.3764209801484, 3311.6779638245084, 3390.897010382466, 3348.360440835867, 3358.488453676886, 3467.5997009684547, 3336.9920748826057, 3375.322701687802, 3315.801933540923, 3316.236711286987, 3267.2649004921427, 3379.2749737336535, 3354.753160079605, 3510.7743414922375, 3337.9239227399553, 3444.0817614848365, 3574.5898217238105, 3509.5484050002733, 3335.399657572837, 3493.6046700132692, 3376.4234283643436, 3398.3170423975193, 3350.316831715216, 3367.464252551852, 3457.494458892292, 3168.3029566072164, 3046.9020662486705, 3038.427808626338, 3246.0985696514604, 3174.709210649836, 3286.0750501257885, 2921.568990642217, 2854.7822192643926, 2796.203710484079, 2929.800394969783, 2910.6217311546798, 2941.764861841292, 2732.849248019889, 2788.913835137856, 2900.9168855392, 2802.6888640553857, 2489.76386513332, 2498.6071812135024, 2276.498890324605, 2174.453638753169, 2292.9905329579956, 2096.242330462001, 1957.0629788325314, 2107.8755871279445, 2090.41069838819, 2220.6284267797546, 1932.4662423309344, 1704.7939736415904, 1800.0984006460906, 1870.983393939833, 1632.5607621166828, 1717.094546959688, 1471.5298984203762, 1585.0089324925018, 1560.1789563833897, 1167.9192613884618, 1293.040685866804, 1490.8587438518605, 1664.7011959615436, 1666.76194425132, 2033.597132659079, 1678.691831925477, 1949.090654639939, 1962.545009216906, 2076.693184779726, 1858.958096110541, 2082.6603753872787, 1952.9765293231237, 1493.4795904714094, 1396.4247323739444, 1554.8563248907528, 1370.7113176561138, 1409.786159370746, 876.9443985785651, 1295.5344019751292, 1370.6829480971428, 1207.106473320984, 1381.7397996831967, 1211.625295997717, 1429.9572460188672, 1561.0474711936554, 1188.178541804355, 1082.6041514500312, 992.9789315984065, 1253.0631934247872, 1032.2568764577986, 929.621153162775, 1168.4531823630698, 697.3971073260618, 1020.7158958408079, 935.4340276043029, 1049.0147260244867, 877.373139989883, 726.7746361804747], 'policy_0FWEST1_reward': [2537.555291731013, 2804.9635042957375, 2508.5407340725064, 2707.556401827917, 2562.2786419527442, 2758.042484857792, 2824.177358462665, 2844.9992687501103, 2748.9272517616987, 2409.0681786270193, 2659.5224175747526, 2545.8377536662065, 2551.2464146558427, 2512.3834258014595, 2525.4259767605204, 2372.6182666812083, 2593.7080981277945, 2506.98430319901, 2492.3711974244943, 2480.603079950728, 2532.7255106858297, 2598.3315775389096, 2558.7178331548744, 2461.4336356624794, 2765.0376041754294, 2582.428929875924, 2722.4893001402843, 2498.9389300298385, 2607.292858666021, 2688.4388914732585, 2734.7277825075616, 2750.721068959193, 2635.0791085906785, 2581.638496726152, 2375.799384875565, 2543.9754638899935, 2204.551976961376, 2367.0446943353113, 2135.2219038166927, 2158.1760271880294, 2127.8407842986603, 1995.4957020624188, 2155.6718439430324, 2280.2904847350546, 2226.453357192202, 2118.400971090695, 2292.269881572588, 1992.6363970474324, 2113.870597842434, 2199.3004437893333, 2337.494221727139, 2453.584074405112, 2110.1208496036797, 2077.8193305013297, 2019.8070552022416, 2013.9071481754036, 1943.2520942513606, 2221.877841801858, 1818.665709609714, 1862.2178553927467, 1816.8648091618993, 1513.167703107258, 1434.747899128911, 1487.3936098257752, 1687.071217349308, 1567.7054661936595, 1846.959158697441, 1731.0492709680366, 1535.2266925744866, 1614.4332113104467, 1702.0570736279024, 1581.211116955743, 1729.758197298842, 1574.002002036433, 1475.1841801625549, 1288.5063817115335, 1384.93912095222, 1470.1399543320383, 1155.3500497303114, 769.832711710575, 1018.6465032598105, 1050.0190651911848, 874.5709532457007, 946.100812488163, 822.7100839129246, 1065.7516661316117, 1246.5736472046997, 780.0289389861164, 764.5365107966612, 735.4827202777049, 786.7851927176209, 708.3129975408384, 648.5800548109479, 802.8193783882858, 243.07977132230377, 747.3182608098405, 340.93301662467786, 831.1677809876988, 734.5835869928356, 319.85968416099354], 'policy_0FEAST1_reward': [1998.6606786017535, 2364.47191187183, 2068.149101942693, 2115.574044117465, 2155.452234908247, 2390.0867105210627, 2466.082398919362, 2584.008911014819, 2501.6877475151296, 2530.481966145559, 2553.8290641102217, 2472.3211460960533, 2439.2803958333175, 2448.4286329027886, 2462.7031937872257, 2258.713777789062, 2433.3816001800647, 2424.663849815373, 2072.9438917482726, 2097.120966892187, 2353.450191959492, 2392.26596784091, 2490.5084179279033, 2312.1708522807166, 2182.125527249549, 2220.9742451528577, 2297.868856437076, 2272.4958256535056, 2320.993445784101, 2490.3953455127057, 2280.5034175909414, 2257.88395355472, 2138.5264093826186, 2198.058720394077, 1883.9219761182358, 2046.307397300687, 1845.4342747462051, 2066.472709659119, 1690.3518773146209, 1747.5410044590387, 1929.4629383329511, 1773.7404106710896, 1773.7648543816024, 1809.5213077306166, 1913.1439441974476, 1870.1583348363642, 1813.8261279809174, 1727.2718609588599, 1685.3342920113419, 1755.59760296998, 1724.201108633475, 1873.179601532172, 1682.7751343151165, 1449.5753674621133, 1636.462222321018, 1620.1103227840492, 1490.1694943926575, 1517.2228192083667, 1284.4545666398228, 1233.5463735038597, 1674.3949956080962, 1354.7965721874134, 1062.7964686172238, 1180.9227135451522, 1349.8244986126797, 1245.7698479676524, 1381.3031406109876, 1220.336607722811, 1320.0463612640663, 1454.7056396762907, 1376.1603462074681, 935.3339633129402, 1123.0732988451095, 1168.590134577923, 777.2527202149943, 839.3480133695294, 962.4483774287548, 1006.0289360130629, 974.2586105537364, 451.55883930965683, 725.488198689304, 568.1906030730854, 519.8884005759111, 670.7613743301504, 449.63054095433546, 703.4387538361049, 774.8590024630572, 429.01285470983044, 485.32965295596307, 344.58382919163796, 481.7661624111392, 464.14983748175194, 288.85426929622884, 319.4178684712387, -68.47305630048955, 386.8558649404874, 103.85394695658249, 322.61913079181073, 67.48489821971131, -96.43125592674394], 'policy_1FEAST_reward': [577.6080763264606, 766.9039536534499, 521.1817677594862, 594.3521196197971, 500.6978207250728, 592.0584576742248, 616.663683575549, 649.6958501116993, 633.4699052118851, 467.0332514262571, 598.525214211337, 468.2440080608271, 478.38845573681067, 541.2187503600625, 478.27449179460496, 406.7063644042932, 457.66520957719507, 482.7941462188944, 389.52391283207834, 323.3397271719732, 385.2953594410684, 394.52475754697787, 251.55165785888352, 203.86649213613384, 159.69296950266684, 141.9953009610769, 14.241733291714835, -41.30608083034088, 57.41142334665041, 41.58910720808417, -15.441615684391714, 5.959199283943293, -212.79691669315437, -231.59802555588803, -395.84755490032666, -244.46206178021987, -416.4660676501204, -282.9981307295795, -501.1919277929131, -513.56338565959, -367.92411373130045, -540.2300825131474, -553.2878203465941, -555.6253888202059, -506.91495509488124, -671.6705934456849, -633.5453643679715, -744.4889312034849, -859.9366385837222, -730.9518279317938, -620.6400535436928, -619.036105530929, -813.3539411367683, -985.5448024854059, -953.8865235431703, -987.3690114128492, -997.1678060200657, -951.3249109832096, -1222.663749710014, -1126.985056630163, -1091.975410153152, -1447.665151412898, -1418.5837699263966, -1361.1026651618495, -1217.074561060347, -1352.576736826413, -1273.538683188412, -1409.126175042114, -1462.4867263418748, -1378.5329301383738, -1356.6943800203198, -1644.6048391497543, -1419.0534203642014, -1455.025938226253, -1739.4314280952883, -1794.5250460081327, -1670.453555440467, -1779.2561912584824, -1811.8017651359864, -2315.1991399613153, -2041.145989949016, -2059.5967387355863, -2290.2702371338873, -2183.1089815440814, -2355.666784345629, -2098.4650773164076, -2072.716944628717, -2346.3945277078165, -2440.7605854560265, -2569.157959140404, -2437.8067474549694, -2625.9382999911095, -2693.4110066086196, -2511.423139629387, -2991.564806040925, -2641.249201462219, -2838.1257910448435, -2575.4148231649133, -2745.3495453939026, -2924.2566046361394], 'policy_1FEAST1_reward': [2670.0304090540058, 2987.330514332179, 2682.1645247788674, 2713.337161883294, 2900.1019168909065, 2813.502518099894, 3025.701937584481, 2888.733618444052, 3114.0583482541097, 2889.1070229809134, 2903.1809909020726, 2880.0622989292283, 2971.0966927666923, 3119.009059611818, 3035.85419026645, 3130.0674671501547, 3181.2578708332107, 3091.789739442091, 3026.855741991023, 2950.2710083223465, 2977.3504690753334, 3108.2892456443137, 2855.585526880955, 2955.8227363457177, 2856.4711553671823, 2949.596298874328, 2526.463950209484, 2451.699345979356, 2688.8804519816763, 2661.064013936375, 2557.9068793907195, 2739.912853783662, 2475.9598302408563, 2530.0063061951573, 2294.0275512588983, 2416.6502422821045, 1964.5948671176036, 2255.1228839899627, 2014.1622979032375, 2068.6874735673237, 2129.2082365100996, 2009.3259582036455, 1794.403309683469, 1937.4953544461184, 1758.2835150817987, 1542.012998220689, 1818.8791627510154, 1601.6350259800063, 1448.896208384518, 1528.0154735643182, 1468.9038434521995, 1555.9079653669676, 1129.0327103034033, 985.6629490878075, 965.5149482230185, 933.8641570301373, 952.4842932329715, 1012.6559099103184, 825.7952428564984, 935.1913767189797, 916.7417350383191, 599.9955144156025, 501.2916702467407, 719.2675290803811, 996.3592911278416, 885.23555184386, 900.3363404873481, 755.2206230409914, 758.4775130645994, 843.9014841650986, 996.1286459799807, 706.1026956734446, 999.3891055514541, 1038.0835318446923, 675.9747377688553, 621.1871788761034, 1039.2445489057282, 925.2561370065287, 1093.6386278747386, 508.00691641392274, 608.1279692978086, 619.4838892138594, 316.0878643948107, 431.412371914753, 76.73522107568807, 328.0439682250942, 107.70168577719315, -151.12695658306023, -151.06442850893364, -291.9107205427739, 58.54095377959624, -70.89954922779172, 72.31146022726004, 267.7129103300989, 63.53737438708716, 362.70787695192325, 186.52009636588878, 395.63538279183507, 162.08144987170687, 9.624635580169828], 'policy_1FWEST1_reward': [2679.408708265127, 2970.5520968403043, 2623.556690494879, 2884.5414623066667, 2601.692671846926, 2717.3908052742877, 2817.9431435984798, 2995.341146290805, 2824.6885297846875, 2852.852697163549, 2840.8855526245507, 2559.657583303956, 2495.516670250971, 2507.1243920832553, 2685.9240489043486, 2542.35996339358, 2717.79894543519, 2812.078762177102, 2797.0920290615118, 2563.9899561377524, 2783.6243353441287, 2931.8682888731264, 2690.7454122719632, 2813.162145717668, 2841.106701238265, 2561.208592475889, 2461.603868815465, 2531.1798133783873, 2663.3901813178354, 2684.693904017917, 2679.093738609073, 2650.1850379405796, 2420.0258834119472, 2393.4658283021504, 2212.4259419145083, 2394.0676347715703, 2372.0487345819124, 2545.4950324988426, 2168.7544951675573, 2166.724534594533, 2400.1223747859553, 2099.922671717542, 2281.6120405643765, 2223.7563415695226, 2212.664759830623, 2078.279425582357, 2246.58121214144, 2131.686813923445, 2034.0798126800007, 2105.375088029539, 2315.8087568124806, 2280.155022563193, 1997.920106390246, 2025.0961888310896, 1988.9965262521234, 1831.384178568216, 1904.3854063176398, 1917.797880839651, 1642.5982619029312, 1757.7591162889432, 1790.2751557774982, 1345.8026516693978, 1546.412436246942, 1516.8294419616775, 1810.6650911274305, 1552.981131828483, 1852.6195173849296, 1629.8761692253388, 1713.070650537982, 1769.4410647928264, 1862.718473768667, 1504.861459788936, 1749.4898214484951, 1718.50228070832, 1386.0893251991888, 1356.4325070507925, 1542.841286316721, 1355.2076672217777, 1348.951946986537, 881.9257617573661, 1192.6236827121609, 1136.065596297157, 969.0014009862065, 994.3111259417034, 906.5856925352109, 1094.6262979217838, 1124.6525232349534, 920.3469316143936, 772.9604445212822, 728.0852524947542, 870.384069444434, 672.851511802115, 666.4182277639067, 798.1771046538823, 394.1520125029376, 811.0708028245432, 577.3697237829709, 842.5381990968365, 646.1112382444321, 452.404200085213], 'policy_0FWEST_reward': [3161.895531223337, 3425.820775257298, 3091.0310071106933, 3137.787102272101, 2873.4446814093335, 3082.744178324731, 3130.2958204733873, 3131.478054526654, 3148.066793286908, 3215.5367086307956, 3074.3989952308357, 3060.409891795273, 3079.95792167215, 3124.126380013234, 3130.928425437124, 2950.157291668638, 3051.2485534714515, 3115.575594895122, 2925.2928223930235, 2888.2910000379625, 3119.7180958648396, 3112.906015958807, 3003.8474143391986, 3013.341095188835, 2907.1591853503423, 3038.556624223348, 2910.7638320493006, 2863.4218954048542, 3103.5895417814754, 3029.438762426149, 2938.969632947472, 2949.3132873775276, 2676.924194680582, 2639.828933553692, 2357.939464493447, 2588.930322476961, 2442.7900373967054, 2547.7611008836843, 2318.8659344222547, 2325.8422627938157, 2612.2529350840728, 2385.846020435516, 2409.268731053351, 2448.823301983068, 2390.087695794906, 2241.873264186591, 2408.3664428752554, 2363.193866083901, 2315.5927349796134, 2413.2037040416726, 2609.127403852907, 2458.821170405021, 2469.352578623949, 2343.240078660566, 2351.1210895623667, 2329.643532896389, 2348.1864837888284, 2311.789141554729, 2178.981315285229, 2331.5201696263, 2343.544835099916, 1934.6064758911193, 1996.9184229510352, 2086.3464685723943, 2154.1776322626265, 2033.3889193759378, 2121.557025660802, 2012.3208954405666, 1942.0647380653913, 1952.0952979938988, 1982.9645805526243, 1791.058752877137, 2013.8610480343605, 1966.379100229403, 1620.1006268508636, 1551.9146042097764, 1741.0116610078824, 1580.559800201048, 1653.6990842836365, 1085.7746425600126, 1431.0222270632278, 1442.6698416823133, 1187.629927081767, 1286.6400193680195, 1139.7144844750128, 1298.7704408828663, 1371.1496639185614, 1116.843190270066, 999.0799282181601, 944.7072147535224, 991.2702730743799, 841.7615973167831, 739.9816025622486, 944.8405559683928, 377.13293249579203, 816.8131956787468, 488.6434978952058, 812.0036702282596, 668.5379919698395, 458.6372825058605]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 0.89002530000189, 'mean_inference_ms': 5.126073727077315, 'mean_action_processing_ms': 0.5491815541431333, 'mean_env_wait_ms': 8.423381205064157, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0, 'connector_metrics': {}, 'num_episodes': 0, 'episode_return_max': 21523.912350273145, 'episode_return_min': -1294.441328966933, 'episode_return_mean': 12376.505387689154, 'episodes_this_iter': 0}",{},"{'learner': {'1FWEST': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 15.585890248945605, 'cur_kl_coeff': 9.262305278225771e-20, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 8.133694656193256, 'policy_loss': -0.0004515602010845517, 'vf_loss': 8.134146212538083, 'vf_explained_var': -4.121921956539154e-05, 'kl': 0.010957721841077964, 'entropy': -1.7783307994405428, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 191520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, '0FWEST': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 32.88093289838483, 'cur_kl_coeff': 3.469446951953615e-19, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 8.731304128964743, 'policy_loss': 0.003165417871787213, 'vf_loss': 8.72813870559136, 'vf_explained_var': -6.829698880513509e-10, 'kl': 0.008857792762683244, 'entropy': -2.439697431027889, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 191520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, '1FWEST1': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 26.129323599797985, 'cur_kl_coeff': 1.7347234759768074e-19, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 8.448007849852244, 'policy_loss': -0.0007037108346897488, 'vf_loss': 8.448711543281872, 'vf_explained_var': -6.07222318649292e-08, 'kl': 0.011219161814400103, 'entropy': -2.053095563004414, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 191520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, '1FEAST': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 29.29169760545095, 'cur_kl_coeff': 6.098637220230964e-21, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 9.249176223079363, 'policy_loss': 0.0009908958008357635, 'vf_loss': 9.248185348510741, 'vf_explained_var': 0.001598258875310421, 'kl': 0.010516965188668398, 'entropy': -2.261194821447134, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 191520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, '0FEAST': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 15.40239530969411, 'cur_kl_coeff': 6.352747104407251e-23, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 8.903997975587846, 'policy_loss': 0.00023792995683227975, 'vf_loss': 8.903760070602099, 'vf_explained_var': 0.00018682715793450673, 'kl': 0.0072154351218463465, 'entropy': -1.650610850006342, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 191520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, '0FEAST1': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 5.643188134759354, 'cur_kl_coeff': 3.993608332681371e-31, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 8.374669187764328, 'policy_loss': -0.0002259420587506611, 'vf_loss': 8.374895123640696, 'vf_explained_var': 0.0004347667098045349, 'kl': 0.005138272792661761, 'entropy': -0.6544906464715798, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 191520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, '0FWEST1': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 6.481838798088332, 'cur_kl_coeff': 3.94430452610506e-32, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 8.004426185786723, 'policy_loss': 0.0007836303314737355, 'vf_loss': 8.003642535209655, 'vf_explained_var': 0.0015106026704112688, 'kl': 0.00365569557169092, 'entropy': -0.702934006601572, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 191520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, '1FEAST1': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 24.85555084031075, 'cur_kl_coeff': 1.0231815394945445e-13, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 8.477653539180755, 'policy_loss': 0.0014592426271216633, 'vf_loss': 8.476194338003795, 'vf_explained_var': -5.774199962615967e-09, 'kl': 0.011032536034690565, 'entropy': -1.9453019631405672, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 191520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}}, 'num_env_steps_sampled': 800000, 'num_env_steps_trained': 800000, 'num_agent_steps_sampled': 6400000, 'num_agent_steps_trained': 6400000}",6400000,6400000,6400000,800000,800000,4000,68.2054,800000,4000,68.2054,2,0,0,4000,"{'cpu_util_percent': 7.025, 'ram_util_percent': 60.83333333333332}","{'training_iteration_time_ms': 59350.0, 'restore_workers_time_ms': 0.012, 'training_step_time_ms': 59349.971, 'sample_time_ms': 28158.075, 'learn_time_ms': 31185.322, 'learn_throughput': 128.265, 'synch_weights_time_ms': 6.278}"


[36m(PPO pid=1150781)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/AD/user/ray_results/PPO_2024-11-11_09-23-15/PPO_UserMultiAgentEnv_95577_00000_0_2024-11-11_09-23-16/checkpoint_000000)
2024-11-11 13:02:05,801	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/AD/user/ray_results/PPO_2024-11-11_09-23-15' in 0.0911s.
2024-11-11 13:02:06,114	INFO tune.py:1041 -- Total run time: 13129.40 seconds (13128.97 seconds for the tuning loop).


In [4]:
best_result = results.get_best_result()
print("Best Hyperparameters found: ", best_result)

Best Hyperparameters found:  Result(
  metrics={'custom_metrics': {}, 'episode_media': {}, 'info': {'learner': {'1FWEST': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 15.585890248945605, 'cur_kl_coeff': 9.262305278225771e-20, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 8.133694656193256, 'policy_loss': -0.0004515602010845517, 'vf_loss': 8.134146212538083, 'vf_explained_var': -4.121921956539154e-05, 'kl': 0.010957721841077964, 'entropy': -1.7783307994405428, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 191520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, '0FWEST': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 32.88093289838483, 'cur_kl_coeff': 3.469446951953615e-19, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 8.731304128964743, 'policy_loss': 0.003165417871787213, 'vf_loss': 8.72813870559136, 'vf_explained_var': -6.829698880513509e-10, 'kl': 0.008857792762683244, 'entro

In [6]:
from ray.rllib.algorithms.callbacks import DefaultCallbacks
from controllables.core import BaseVariable
from controllables.core.tools.records import VariableRecords

class PMVVariable(BaseVariable):
    def __init__(
        self, 
        tdb: BaseVariable,
        tr: BaseVariable,
        rh: BaseVariable,
        metab_rate=1.5, clothing=.5, pmv_limit=.5,
    ):
        self.tdb = tdb
        self.tr = tr
        self.rh = rh
        self._metab_rate = _numpy_.asarray(metab_rate)
        self._clothing = _numpy_.asarray(clothing)
        self._pmv_limit = _numpy_.asarray(pmv_limit)
    
    @property
    def value(self):
        res = pytc.models.pmv_ppd(
            tdb=self.tdb.value, 
            tr=self.tr.value, 
            # calculate relative air speed
            vr=pytc.utilities.v_relative(v=0.1, met=self._metab_rate), 
            rh=self.rh.value, 
            met=self._metab_rate, 
            # calculate dynamic clothing
            clo=pytc.utilities.clo_dynamic(clo=self._clothing, met=self._metab_rate),
            limit_inputs=False,
        )['pmv']

        return res

class PlottingCallbacks(DefaultCallbacks):
    def __init__(self):
        self.env_records: dict[object, VariableRecords] = None

    def on_episode_start(self, *, episode, worker, **kwargs):
        env: UserMultiAgentEnv = worker.env
        system = env.system
        if self.env_records is None:
            system.add('logging:progress')
            self.env_records = dict()
            for agent_ref in env.agents:
                if agent_ref == 'CHILLER' or agent_ref == 'AHU':
                    # self.env_records[agent_ref] = records = VariableRecords({
                    # 'AHU COOLING COIL': system[OutputVariable.Ref('Cooling Coil Total Cooling Rate', 'AIR LOOP AHU COOLING COIL')],
                    # 'Electricity': system[OutputMeter.Ref('Electricity:HVAC')]
                    # }, maxlen=10_000)
                    pass
                else:
                    tdb = env.agents[agent_ref].observation['temperature:drybulb']
                    tr = env.agents[agent_ref].observation['temperature:radiant']
                    rh = env.agents[agent_ref].observation['humidity']
                    pmv = PMVVariable(tdb=tdb, tr=tr, rh=rh)
                    self.env_records[agent_ref] = records = VariableRecords({
                        '🕰️': system['time'],
                        '🍩': env.agents[agent_ref].reward,
                        'pmv': pmv,
                        'occupancy': env.agents[agent_ref].observation['Office Occupancy'],
                        # 'tstat': env.agents[agent_ref].action['thermostat'],
                        'temp': tdb,
                        # 'AHU COOLING COIL': system[OutputVariable.Ref('Cooling Coil Total Cooling Rate', 'AIR LOOP AHU COOLING COIL')],
                        'elec': system[OutputMeter.Ref('Electricity:HVAC')],
                    }, maxlen=10_000)
                    # display(
                    #     records.plot.scatter(x='🕰️', y='pv', label=repr(agent_ref))
                    #     .watch(records.events['change'] % 1_000)
                    # )

    def on_episode_step(self, *, episode, **kwargs):
        for _, env_records in self.env_records.items():
            env_records.poll()

    def on_episode_end(self, *, episode, **kwargs):
        for agent_ref, env_records in self.env_records.items():
            env_records.dataframe().to_csv(f'datasave/20241110/records_train_tradiction-{agent_ref}.csv', index=False)


In [7]:
config_eval = (
    get_config()
    .env_runners(
        num_env_runners=0,
        create_env_on_local_worker=True,
    )
    .evaluation(
        evaluation_duration=1,
        evaluation_duration_unit='episodes',
        #evaluation_interval=1,
        evaluation_num_env_runners=0,
    )
    .callbacks(PlottingCallbacks)
)

algo_eval = PPO(config_eval)
#algo_eval.restore(best_result.checkpoint)
algo_eval.restore('/home/AD/user/ray_results/PPO_2024-11-10_04-06-03/PPO_UserMultiAgentEnv_1bb96_00001_1_2024-11-10_04-06-06/checkpoint_000004')
#algo_eval.restore("/home/AD/user/ray_results/PPO_2024-10-18_15-32-34/PPO_UserMultiAgentEnv_331fb_00000_0_2024-10-18_15-32-35/checkpoint_000013")

`UnifiedLogger` will be removed in Ray 2.7.
  return UnifiedLogger(config, logdir, loggers=None)
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
  self._loggers.append(cls(self.config, self.logdir, self.trial))
2024-11-10 05:21:06,345	INFO trainable.py:583 -- Restored on 192.168.200.249 from checkpoint: Checkpoint(filesystem=local, path=/home/AD/user/ray_results/PPO_2024-11-10_04-06-03/PPO_UserMultiAgentEnv_1bb96_00001_1_2024-11-10_04-06-06/checkpoint_000004)


In [8]:
for _ in range(2):
    algo_eval.evaluate()



  0%|          | 0/100 [00:00<?, ?it/s]