In [None]:
from ray.tune.schedulers import PopulationBasedTraining
from ray import tune, air
from ray.rllib.policy.policy import PolicySpec
from ray.rllib.algorithms.ppo import PPO, PPOConfig

from bmstestbedc2f2.envs import MultiAgentBuildingEnv
from bmstestbedc2f2.utils import resolve_path


pbt = PopulationBasedTraining(
    time_attr="training_iteration",
    perturbation_interval=4,
    resample_probability=0.25,
    hyperparam_mutations={
        "lr": tune.uniform(1e-5, 0.1),
        "batch_mode": ["complete_episodes"],
        "train_batch_size": [4000],
        "sgd_minibatch_size": [32, 64, 128, 256, 512],
        "num_sgd_iter": [10, 20, 30],
        "clip_param": tune.uniform(0.1, 0.3),
    },
)

tuner = tune.Tuner(
    PPO,
    param_space=(
        MultiAgentBuildingEnv.get_algo_config(
            PPOConfig()
            .api_stack(
                enable_rl_module_and_learner=False,
                enable_env_runner_and_connector_v2=False,
            )
            .rollouts(
                sample_timeout_s=60,
                num_env_runners=4, 
                rollout_fragment_length='auto',
                # rollout_fragment_length=200,
            )
            .resources(num_gpus=1.),
            env_config=dict(
                bms_system='energyplus',
            ),
        )
    ),
    tune_config=tune.TuneConfig(
        #reuse_actors=True,
        scheduler=pbt,
        num_samples=1,
        metric="env_runners/episode_reward_mean",
        mode="max",
    ),
    run_config=air.RunConfig(
        stop={"training_iteration": 200},
        checkpoint_config=air.CheckpointConfig(
            checkpoint_at_end=True
        ),
        verbose=2,
    )
)
results = tuner.fit()

  from .autonotebook import tqdm as notebook_tqdm
2025-01-22 16:38:16,573	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-01-22 16:38:16,900	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
  self.start_gcs_server()
  self.start_gcs_server()
  self.start_monitor()
  self.start_monitor()
  self.start_api_server(
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_log_monitor()
2025-01-22 16:38:23,032	INFO worker.py:1816 -- Started a local Ray instance.
2025-01-22 16:38:23,595	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2025-01-22 16:38:23,597	INFO tune.py:616 -- [output] This uses the legacy output and progress repor

0,1
Current time:,2025-01-22 16:44:00
Running for:,00:05:36.81
Memory:,51.7/62.8 GiB

Trial name,status,loc,num_sgd_iter,iter,total time (s),ts,num_healthy_workers,num_in_flight_async_ sample_reqs,num_remote_worker_re starts
PPO_MultiAgentBuildingEnv_4bfe2_00000,RUNNING,192.168.200.249:60173,20,4,253.184,16000,4,0,0


[36m(PPO pid=60173)[0m Install gputil for GPU system monitoring.


Trial name,agent_timesteps_total,counters,custom_metrics,env_runners,episode_media,info,num_agent_steps_sampled,num_agent_steps_sampled_lifetime,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_lifetime,num_env_steps_sampled_this_iter,num_env_steps_sampled_throughput_per_sec,num_env_steps_trained,num_env_steps_trained_this_iter,num_env_steps_trained_throughput_per_sec,num_healthy_workers,num_in_flight_async_sample_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,timers
PPO_MultiAgentBuildingEnv_4bfe2_00000,160000,"{'num_env_steps_sampled': 20000, 'num_env_steps_trained': 20000, 'num_agent_steps_sampled': 160000, 'num_agent_steps_trained': 160000}",{},"{'episode_reward_max': np.float64(4.558956759103771), 'episode_reward_min': np.float64(3.5911330384479125), 'episode_reward_mean': np.float64(4.00295894558848), 'episode_len_mean': np.float64(4608.0), 'episode_media': {}, 'episodes_timesteps_total': 18432, 'policy_reward_min': {'ART-01-12': np.float64(0.37167016719436274), 'ART-01-07': np.float64(0.3575280315706255), 'ART-01-11a': np.float64(0.3858123028180921), 'ART-01-10': np.float64(0.4648015323826841), 'ART-01-13': np.float64(0.3375223117691081), 'ART-01-09': np.float64(0.4648015323826889), 'ART-01-08': np.float64(0.4989493878079416), 'ART-01-14': np.float64(0.3246698345253709)}, 'policy_reward_max': {'ART-01-12': np.float64(0.49308580363014587), 'ART-01-07': np.float64(0.6640810894949116), 'ART-01-11a': np.float64(0.6216546826237189), 'ART-01-10': np.float64(0.6388125060536645), 'ART-01-13': np.float64(0.596386099182461), 'ART-01-09': np.float64(0.6529546416773826), 'ART-01-08': np.float64(0.5792282757525274), 'ART-01-14': np.float64(0.6105282348062041)}, 'policy_reward_mean': {'ART-01-12': np.float64(0.4499885100390165), 'ART-01-07': np.float64(0.4641306456627473), 'ART-01-11a': np.float64(0.4994859847220785), 'ART-01-10': np.float64(0.5454479254992023), 'ART-01-13': np.float64(0.4641306456627471), 'ART-01-09': np.float64(0.538376857687336), 'ART-01-08': np.float64(0.5348413237814075), 'ART-01-14': np.float64(0.5065570525339449)}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [np.float64(4.558956759103771), np.float64(3.722894525612614), np.float64(4.138851459189625), np.float64(3.5911330384479125)], 'episode_lengths': [4608, 4608, 4608, 4608], 'policy_ART-01-12_reward': [np.float64(0.4691068785688819), np.float64(0.37167016719436274), np.float64(0.4660911907626757), np.float64(0.49308580363014587)], 'policy_ART-01-07_reward': [np.float64(0.4408226073214212), np.float64(0.3575280315706255), np.float64(0.6640810894949116), np.float64(0.3940908542640308)], 'policy_ART-01-11a_reward': [np.float64(0.5963860991824699), np.float64(0.3858123028180921), np.float64(0.6216546826237189), np.float64(0.3940908542640331)], 'policy_ART-01-10_reward': [np.float64(0.6388125060536645), np.float64(0.4989493878079373), np.float64(0.5792282757525233), np.float64(0.4648015323826841)], 'policy_ART-01-13_reward': [np.float64(0.596386099182461), np.float64(0.5838022015503221), np.float64(0.3388119701490969), np.float64(0.3375223117691081)], 'policy_ART-01-09_reward': [np.float64(0.6529546416773826), np.float64(0.4706651165604797), np.float64(0.5650861401287924), np.float64(0.4648015323826889)], 'policy_ART-01-08_reward': [np.float64(0.5539596923112767), np.float64(0.4989493878079416), np.float64(0.5792282757525274), np.float64(0.5072279392538839)], 'policy_ART-01-14_reward': [np.float64(0.6105282348062041), np.float64(0.5555179303028613), np.float64(0.3246698345253709), np.float64(0.5355122105013433)]}, 'sampler_perf': {'mean_raw_obs_processing_ms': np.float64(1.0305325396773055), 'mean_inference_ms': np.float64(6.540494947517361), 'mean_action_processing_ms': np.float64(0.6689045701871533), 'mean_env_wait_ms': np.float64(15.430571555328674), 'mean_env_render_ms': np.float64(0.0)}, 'num_faulty_episodes': 0, 'connector_metrics': {}, 'num_episodes': 4, 'episode_return_max': np.float64(4.558956759103771), 'episode_return_min': np.float64(3.5911330384479125), 'episode_return_mean': np.float64(4.00295894558848), 'episodes_this_iter': 4}",{},"{'learner': {'ART-01-12': {'learner_stats': {'allreduce_latency': np.float64(0.0), 'grad_gnorm': np.float32(4.7929893), 'cur_kl_coeff': np.float64(0.025000000000000005), 'cur_lr': np.float64(5.0000000000000016e-05), 'total_loss': np.float64(0.02324621305654849), 'policy_loss': np.float64(-0.0015414080994863373), 'vf_loss': np.float64(0.02464271943899803), 'vf_explained_var': np.float64(0.6771598181376854), 'kl': np.float64(0.005796045886218812), 'entropy': np.float64(1.4458251502364874), 'entropy_coeff': np.float64(0.0)}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': np.float64(125.0), 'num_grad_updates_lifetime': np.float64(4320.5), 'diff_num_grad_updates_vs_sampler_policy': np.float64(479.5)}, 'ART-01-10': {'learner_stats': {'allreduce_latency': np.float64(0.0), 'grad_gnorm': np.float32(2.8303492), 'cur_kl_coeff': np.float64(0.05000000000000001), 'cur_lr': np.float64(5.0000000000000016e-05), 'total_loss': np.float64(0.03615045921192935), 'policy_loss': np.float64(-0.0035087897849734873), 'vf_loss': np.float64(0.03938844410004094), 'vf_explained_var': np.float64(0.5268643969049056), 'kl': np.float64(0.005416101908318185), 'entropy': np.float64(1.3522593535482883), 'entropy_coeff': np.float64(0.0)}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': np.float64(125.0), 'num_grad_updates_lifetime': np.float64(4320.5), 'diff_num_grad_updates_vs_sampler_policy': np.float64(479.5)}, 'ART-01-09': {'learner_stats': {'allreduce_latency': np.float64(0.0), 'grad_gnorm': np.float32(3.9600205), 'cur_kl_coeff': np.float64(0.025000000000000005), 'cur_lr': np.float64(5.0000000000000016e-05), 'total_loss': np.float64(0.0409047505752369), 'policy_loss': np.float64(-0.00252810136929232), 'vf_loss': np.float64(0.04329691142775118), 'vf_explained_var': np.float64(0.44377506064871947), 'kl': np.float64(0.0054376091257808), 'entropy': np.float64(1.3477832765628894), 'entropy_coeff': np.float64(0.0)}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': np.float64(125.0), 'num_grad_updates_lifetime': np.float64(4320.5), 'diff_num_grad_updates_vs_sampler_policy': np.float64(479.5)}, 'ART-01-08': {'learner_stats': {'allreduce_latency': np.float64(0.0), 'grad_gnorm': np.float32(3.6315522), 'cur_kl_coeff': np.float64(0.05000000000000001), 'cur_lr': np.float64(5.0000000000000016e-05), 'total_loss': np.float64(0.038545850205431026), 'policy_loss': np.float64(-0.00398709509948579), 'vf_loss': np.float64(0.04225110129142801), 'vf_explained_var': np.float64(0.5638547503078978), 'kl': np.float64(0.005636890790151488), 'entropy': np.float64(1.4397114124149084), 'entropy_coeff': np.float64(0.0)}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': np.float64(125.0), 'num_grad_updates_lifetime': np.float64(4320.5), 'diff_num_grad_updates_vs_sampler_policy': np.float64(479.5)}, 'ART-01-14': {'learner_stats': {'allreduce_latency': np.float64(0.0), 'grad_gnorm': np.float32(4.9424896), 'cur_kl_coeff': np.float64(0.05000000000000001), 'cur_lr': np.float64(5.0000000000000016e-05), 'total_loss': np.float64(0.03212064906259305), 'policy_loss': np.float64(-0.0044324951032952715), 'vf_loss': np.float64(0.03626562864519656), 'vf_explained_var': np.float64(0.43128875984499854), 'kl': np.float64(0.00575031687126237), 'entropy': np.float64(1.2618089934190115), 'entropy_coeff': np.float64(0.0)}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': np.float64(125.0), 'num_grad_updates_lifetime': np.float64(4320.5), 'diff_num_grad_updates_vs_sampler_policy': np.float64(479.5)}, 'ART-01-11a': {'learner_stats': {'allreduce_latency': np.float64(0.0), 'grad_gnorm': np.float32(3.6728013), 'cur_kl_coeff': np.float64(0.10000000000000002), 'cur_lr': np.float64(5.0000000000000016e-05), 'total_loss': np.float64(0.03632908400465264), 'policy_loss': np.float64(-0.0035892732989547464), 'vf_loss': np.float64(0.03938137197207349), 'vf_explained_var': np.float64(0.5106781883786122), 'kl': np.float64(0.00536985435699104), 'entropy': np.float64(1.341229240472118), 'entropy_coeff': np.float64(0.0)}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': np.float64(125.0), 'num_grad_updates_lifetime': np.float64(4320.5), 'diff_num_grad_updates_vs_sampler_policy': np.float64(479.5)}, 'ART-01-07': {'learner_stats': {'allreduce_latency': np.float64(0.0), 'grad_gnorm': np.float32(2.648308), 'cur_kl_coeff': np.float64(0.05000000000000001), 'cur_lr': np.float64(5.0000000000000016e-05), 'total_loss': np.float64(0.041479344680465144), 'policy_loss': np.float64(-0.0009378355801648771), 'vf_loss': np.float64(0.04212490142478297), 'vf_explained_var': np.float64(0.5236458928634723), 'kl': np.float64(0.00584558087314413), 'entropy': np.float64(1.4514542040725549), 'entropy_coeff': np.float64(0.0)}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': np.float64(125.0), 'num_grad_updates_lifetime': np.float64(4320.5), 'diff_num_grad_updates_vs_sampler_policy': np.float64(479.5)}, 'ART-01-13': {'learner_stats': {'allreduce_latency': np.float64(0.0), 'grad_gnorm': np.float32(3.383266), 'cur_kl_coeff': np.float64(0.05000000000000001), 'cur_lr': np.float64(5.0000000000000016e-05), 'total_loss': np.float64(0.027216074999220533), 'policy_loss': np.float64(-0.0045781071433642255), 'vf_loss': np.float64(0.03151584446701842), 'vf_explained_var': np.float64(0.49333726465702055), 'kl': np.float64(0.005566746422908868), 'entropy': np.float64(1.3360934479782978), 'entropy_coeff': np.float64(0.0)}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': np.float64(125.0), 'num_grad_updates_lifetime': np.float64(4320.5), 'diff_num_grad_updates_vs_sampler_policy': np.float64(479.5)}}, 'num_env_steps_sampled': 20000, 'num_env_steps_trained': 20000, 'num_agent_steps_sampled': 160000, 'num_agent_steps_trained': 160000}",160000,160000,160000,20000,20000,4000,56.324,20000,4000,56.324,4,0,0,4000,"{'cpu_util_percent': np.float64(37.780392156862746), 'ram_util_percent': np.float64(82.34215686274507)}","{'training_iteration_time_ms': 64836.375, 'restore_workers_time_ms': 0.019, 'training_step_time_ms': 64836.327, 'sample_time_ms': 24341.462, 'learn_time_ms': 40484.91, 'learn_throughput': 98.802, 'synch_weights_time_ms': 9.626}"


In [None]:
best_result = results.get_best_result()
best_result.checkpoint.to_directory(resolve_path('algo_checkpoint'))
best_result

[36m(PPO pid=57755)[0m 2025-01-22 16:35:37,921	ERROR actor_manager.py:804 -- Ray error ([36mray::RolloutWorker.apply()[39m (pid=57874, ip=192.168.200.249, actor_id=dedd1d90b621e1f07dd735e701000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f241765e7d0>)
[36m(PPO pid=57755)[0m            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[36m(PPO pid=57755)[0m            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[36m(PPO pid=57755)[0m   File "/home/AD/user/lab/bmstestbedc2f2/.venv/lib/python3.11/site-packages/ray/rllib/utils/actor_manager.py", line 194, in apply
[36m(PPO pid=57755)[0m     raise e
[36m(PPO pid=57755)[0m   File "/home/AD/user/lab/bmstestbedc2f2/.venv/lib/python3.11/site-packages/ray/rllib/utils/actor_manager.py", line 183, in apply
[36m(PPO pid=57755)[0m     return func(self, *args, **kwargs)
[36m(PPO pid=57755)[0m            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
[36m(PPO pid=57755)[0m   File "/home/AD/user/lab/bmstestbedc2f2/.venv/lib/python3.11/site-package

AttributeError: 'NoneType' object has no attribute 'to_directory'