In [1]:
from ray.tune.schedulers import PopulationBasedTraining
from ray import tune, air
from ray.rllib.policy.policy import PolicySpec
from ray.rllib.algorithms.ppo import PPO, PPOConfig
from ray.rllib.algorithms.callbacks import DefaultCallbacks
from bmstestbedc2f2.envs import _TODO_MultiAgentBuildingEnv, MultiAgentBuildingEnv
from bmstestbedc2f2.utils import resolve_path
import numpy as np

pbt = PopulationBasedTraining(
    time_attr="training_iteration",
    perturbation_interval=4,
    resample_probability=0.25,
    hyperparam_mutations={
        "lr": tune.uniform(1e-5, 0.1),
        "batch_mode": ["complete_episodes"],
        "train_batch_size": [4608],
        "sgd_minibatch_size": [32, 64, 128, 256, 512],
        "num_sgd_iter": [10, 20, 30],
        "clip_param": tune.uniform(0.1, 0.3),
    },
)

tuner = tune.Tuner(
    PPO,
    param_space=(
        _TODO_MultiAgentBuildingEnv.get_algo_config(
            PPOConfig()
            .api_stack(
                enable_rl_module_and_learner=False,
                enable_env_runner_and_connector_v2=False,
            )
            .rollouts(
                sample_timeout_s=60,
                #num_env_runners=1, 
                rollout_fragment_length='auto',
                # rollout_fragment_length=200,
            )
            .resources(num_gpus=.25),
            env_config=dict(
                #bms_system='energyplus',
                bms_system='neural_v2',
            ),
        )
    ),
    tune_config=tune.TuneConfig(
        #reuse_actors=True,
        scheduler=pbt,
        num_samples=1,
        metric="env_runners/episode_reward_mean",
        mode="max",
    ),
    run_config=air.RunConfig(
        stop={"training_iteration": 100},
        checkpoint_config=air.CheckpointConfig(
            checkpoint_at_end=True
        ),
        verbose=2,
    )
)
results = tuner.fit()

  from .autonotebook import tqdm as notebook_tqdm
2025-03-15 17:47:47,639	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-03-15 17:47:48,128	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
  self.start_gcs_server()
  self.start_gcs_server()
  self.start_monitor()
  self.start_monitor()
  self.start_api_server(
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_log_monitor()
2025-03-15 17:48:01,601	INFO worker.py:1816 -- Started a local Ray instance.
2025-03-15 17:48:02,563	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
2025-03-15 17:48:02,566	INFO tune.py:616 -- [output] This uses the legacy output and progress repor

0,1
Current time:,2025-03-15 20:33:12
Running for:,02:45:10.14
Memory:,33.8/251.7 GiB

Trial name,status,loc,num_sgd_iter,iter,total time (s),ts,num_healthy_workers,num_in_flight_async_ sample_reqs,num_remote_worker_re starts
PPO__TODO_MultiAgentBuildingEnv_a4554_00000,TERMINATED,192.168.200.249:1577594,30,100,9876.45,400000,2,0,0


[36m(PPO pid=1577594)[0m Trainable.setup took 12.998 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[36m(PPO pid=1577594)[0m Install gputil for GPU system monitoring.
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")
[36m(RolloutWorker pid=1577789)[0

Trial name,agent_timesteps_total,counters,custom_metrics,env_runners,episode_media,info,num_agent_steps_sampled,num_agent_steps_sampled_lifetime,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_lifetime,num_env_steps_sampled_this_iter,num_env_steps_sampled_throughput_per_sec,num_env_steps_trained,num_env_steps_trained_this_iter,num_env_steps_trained_throughput_per_sec,num_healthy_workers,num_in_flight_async_sample_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,timers
PPO__TODO_MultiAgentBuildingEnv_a4554_00000,3200000,"{'num_env_steps_sampled': 400000, 'num_env_steps_trained': 400000, 'num_agent_steps_sampled': 3200000, 'num_agent_steps_trained': 3200000}",{},"{'episode_reward_max': 809.8706512192474, 'episode_reward_min': 734.1581639625548, 'episode_reward_mean': 768.6047794690238, 'episode_len_mean': 1000.0, 'episode_media': {}, 'episodes_timesteps_total': 100000, 'policy_reward_min': {'ART-01-12': 35.367281421271834, 'ART-01-10': 104.08052100783752, 'ART-01-08': 103.20550929079909, 'ART-01-11a': 104.44760916383451, 'ART-01-07': 103.19001954576464, 'ART-01-14': 104.37678836597786, 'ART-01-13': 103.32099515750562, 'ART-01-09': 63.68459223907213}, 'policy_reward_max': {'ART-01-12': 75.2876970447222, 'ART-01-10': 107.59912523631449, 'ART-01-08': 107.22831576350401, 'ART-01-11a': 107.91756299212747, 'ART-01-07': 106.65983022021437, 'ART-01-14': 107.83598513356107, 'ART-01-13': 107.2064056785184, 'ART-01-09': 92.14188966527816}, 'policy_reward_mean': {'ART-01-12': 53.38047170401089, 'ART-01-10': 105.96667042163885, 'ART-01-08': 105.32961941731024, 'ART-01-11a': 106.43713869673624, 'ART-01-07': 104.90365483610651, 'ART-01-14': 106.33192620868093, 'ART-01-13': 105.38619644373095, 'ART-01-09': 80.86910174080947}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [740.2014297222865, 752.8107581395728, 749.7641706480733, 750.0312684137812, 734.1581639625548, 737.6475555677458, 737.2000107224072, 740.0953942513869, 739.1776264255703, 737.0623247272275, 747.3589949485809, 749.2321155118879, 760.6336899029104, 752.970949973286, 747.5681943078168, 756.0089131162698, 761.7023187459073, 761.7887511182275, 768.2502709055434, 770.3855510930696, 766.3119903137314, 763.5348318349406, 765.6039314458958, 763.993681654103, 762.6773462509443, 763.6547356689207, 766.2863241625535, 757.6105369123252, 762.5724505390892, 759.1970779057323, 766.8199316478574, 758.2968110814365, 761.7876763259981, 757.8089424908195, 756.1068535677196, 763.365891676049, 765.1949510070455, 755.482779205287, 761.8070510157238, 762.0696542302778, 762.4084549486172, 759.6754380640314, 762.1235267280558, 758.0897583932365, 740.8708250770545, 744.4462764472556, 740.5949493600764, 744.3324107245269, 744.0470954870427, 747.7541968485293, 746.2332454163022, 739.7716804395676, 748.3393458982537, 745.6303613046769, 744.0081415056828, 744.9738461879068, 762.1279668144701, 760.0097209948054, 766.5079642864416, 764.8805556200865, 763.0708887411901, 762.9005279131792, 762.2193525022756, 769.3783837906175, 777.5187832790612, 772.4508776446172, 770.9827814460187, 776.0792269050522, 780.2041652767665, 771.275112872558, 775.1652723462781, 774.4747254453915, 779.7530514930405, 783.5128272453856, 788.1590656286781, 786.7605244041067, 787.8689896159311, 795.7073974824328, 788.9125148584258, 791.7406056004966, 799.0493895998572, 795.1335726239299, 794.7531437675963, 802.8047594015005, 801.2129231324219, 800.6665055462804, 802.3289469906532, 799.2169136542217, 806.5614107313905, 809.8706512192474, 806.3875411179029, 807.9590164776249, 799.8503609814235, 802.660949250057, 802.5484191608211, 808.5645761239127, 801.6919515791254, 796.8100373518677, 802.4407305498827, 800.7734114639803], 'episode_lengths': [1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000], 'policy_ART-01-12_reward': [46.86796478518707, 48.10202019175193, 49.45267180396959, 47.72454963669129, 38.52106767783844, 42.301028947460324, 41.06515338967481, 42.66842489558366, 37.74464864980662, 35.367281421271834, 44.82655952858931, 44.23300059533598, 51.691795015581235, 45.88991226308896, 45.239171132242895, 50.93574105725385, 51.594555492145766, 49.50310398788387, 55.545167724520674, 53.62699528810725, 54.792182616032186, 53.571960896987555, 55.72207699270979, 55.379553039453704, 49.28115412969014, 48.77027716982229, 53.28863432188476, 49.92940897958651, 50.911969439404956, 51.91182479673889, 54.58418484139158, 49.6342982481825, 51.372552140543064, 48.9522266243231, 45.76594767450717, 51.771273383297924, 53.477690629336976, 47.037069758157436, 48.93161810340046, 48.475982549216695, 52.720135697796216, 48.33929576251602, 52.5274343674791, 48.482905732710634, 38.302835886383434, 41.258861317800346, 37.65846928088693, 41.92180306284336, 39.570283722951785, 42.24210365001346, 41.591046792644, 36.75877275959718, 39.79065496770055, 39.69679601114872, 38.71946371548508, 41.64530251396714, 48.505667600626694, 50.82995890322103, 53.60407056572949, 50.78399704642377, 47.96268847230059, 47.96752090513174, 47.174154049333325, 51.03984265106776, 56.085123642786264, 52.79846313427623, 51.98977470239799, 55.165367255410715, 56.837472314651265, 49.566198730429264, 54.092845936120085, 55.24642308506103, 57.41452026303535, 61.56642108884428, 62.184723323300176, 62.110276785918145, 60.44176285837911, 67.71397796081484, 60.74961673832089, 63.582332990286666, 66.42363205266432, 63.79423304962819, 63.7684272209215, 69.44836460865369, 69.0540646994639, 67.63008921406528, 69.75005446877586, 68.00529560047, 71.55165228102204, 75.2876970447222, 73.28513528656232, 74.30432376897897, 67.81697195474258, 68.95835176449027, 68.35775704271737, 72.52032050952104, 69.59639507673603, 66.05011146802687, 70.0585659210746, 69.28366130340756], 'policy_ART-01-10_reward': [104.80612961585625, 105.81091938607761, 105.45922919809529, 105.61702681766809, 105.54532256003145, 105.54084921606122, 105.58416387430967, 105.44519419633993, 106.19803376581696, 106.0395153389028, 105.99612429432909, 106.24150928179957, 106.1742865127812, 106.19773879442741, 105.8710960473091, 105.84704001262787, 106.19114341387758, 106.05951139701887, 106.27659655952206, 106.59708524143541, 106.0613729817112, 106.08773192106493, 106.11988745958907, 105.79245926453949, 105.9067356788269, 106.0410198490704, 105.95380644228334, 105.70163226391288, 105.31370759890149, 105.18086294639546, 105.33971905780555, 105.21769775532341, 105.42252264345707, 105.30436517511251, 105.24399998386063, 105.49872737563518, 105.63647718817309, 105.14499420755018, 105.37453885595691, 105.78835782949058, 104.8077424891979, 105.0891501092214, 105.1385120619774, 104.90965549368212, 104.26797351016762, 104.28488317209738, 104.13949419274886, 104.23782495570988, 104.43081946493699, 104.29916856552057, 104.28738647043025, 104.08052100783752, 104.95060589313955, 104.60663866540885, 104.68566599241022, 104.49776944204649, 105.41870909010231, 105.0838350610115, 105.30270449349807, 105.30410412883589, 105.61014504252354, 105.61127566154154, 105.42556917206448, 105.86517448301579, 106.27968260951384, 106.1680658611179, 106.2165745436338, 106.37078826660046, 106.36276442726057, 106.41033313472032, 106.37188514290014, 106.30593014213161, 106.41063386372004, 106.1948005540909, 106.40593083303123, 106.32762966008498, 107.10748944928673, 107.12534473837145, 107.13969236112355, 106.96043027233146, 107.48572884440375, 107.36980612045694, 107.35175032444229, 107.51490599813496, 107.18845901801106, 107.42406924862846, 107.21423662415539, 107.1211178115425, 107.59912523631449, 107.48193200196506, 107.3674186839373, 107.38624438207889, 107.28307514851107, 107.43276049505505, 107.45737005766215, 107.50966274903324, 107.10933935694175, 107.0495450076351, 107.0963228300272, 107.10173574295278], 'policy_ART-01-08_reward': [103.20550929079909, 104.64358906525706, 104.17568793894546, 104.47323440902613, 104.34748432915767, 104.36176211464078, 104.48490578641048, 104.30599859186049, 105.09651680284877, 105.04861492113903, 105.00138790134997, 105.2654369135228, 105.31904182457318, 105.26632319256787, 104.84332166547844, 104.77033904405288, 105.33185552388822, 105.05756336916428, 105.35267502369695, 105.72704007851775, 105.0538609985482, 105.22544809643243, 105.13972575075306, 104.67501468444416, 104.94765629939452, 105.24194648178157, 105.15909457993074, 104.9048088138966, 104.43990653371876, 104.47047957949913, 104.47451696664909, 104.3673869084332, 104.76485977539105, 104.56361352542845, 104.5335683193092, 104.81637689710635, 104.90062467202831, 104.48907053047205, 104.73132584019027, 105.22935593338568, 104.18016350348125, 104.39803368120134, 104.44413155080795, 104.30945539879643, 103.67012031651085, 103.6924265943054, 103.50282545410914, 103.63323081795511, 103.85746823271025, 103.81143670309118, 103.78333590032602, 103.47297713079972, 104.37574889241806, 104.10754046341285, 104.17049020398271, 104.0097931665198, 104.89031597265607, 104.58312184283777, 104.75166299109542, 104.78561015176693, 105.24852582130698, 105.16728303056739, 104.93585762791358, 105.36849728106122, 105.84591685064943, 105.74022325010387, 105.79583681502722, 105.95944515413079, 105.95446667148497, 105.98473708088824, 105.97474431089107, 105.97274313017257, 106.00836480316414, 105.82268234638126, 105.952833594568, 105.9267688386836, 106.75924201624275, 106.76547313528533, 106.74817782354926, 106.55791379646676, 107.18470514644618, 106.979135647786, 106.9555946150514, 107.11705193256194, 106.86300887826816, 107.04631130943389, 106.88886744408249, 106.74385560712746, 107.22831576350401, 107.21303504648496, 107.0392277027777, 107.08366182753281, 106.97372539962383, 107.10589176791613, 107.14107581255323, 107.12794091389276, 106.81766064908926, 106.7267992733796, 106.7801171030944, 106.81940856940433], 'policy_ART-01-11a_reward': [105.74042299463336, 106.57964068919152, 106.31785740431242, 106.4023258209442, 106.48720975946198, 106.48190301484203, 106.50178050245718, 106.30160577881291, 107.07098281836865, 106.86280939944662, 106.82605554446249, 107.03638507389924, 106.86514773066425, 106.92823133485292, 106.6400358215464, 106.62346257392166, 106.8429986681597, 106.61334144273641, 106.92427265697748, 107.1621900897379, 106.68421976112177, 106.73992299391428, 106.8104217653084, 106.47430069611832, 106.44193424106149, 106.53923429826803, 106.48533364250508, 106.31051701909372, 105.7766993917874, 105.70544088217616, 105.81586129970088, 105.70816062005704, 105.88359619184136, 105.80007763680892, 105.6828500775992, 105.9602552351753, 106.15304754044749, 105.60680971762972, 105.78061181548198, 106.23036462365431, 105.19351125983154, 105.48881907690675, 105.61369286926663, 105.2952899915532, 104.6712346855362, 104.67569040513486, 104.49895404836032, 104.6355869550077, 104.80632347366246, 104.63447025465112, 104.63393841682729, 104.44760916383451, 105.34252296555196, 104.99227086382913, 105.09768545577562, 104.9134517292419, 105.7955002018952, 105.49603071691962, 105.70908056443196, 105.66162873955301, 105.96364511657958, 105.98290803834335, 105.7453398409886, 106.21631511429399, 106.64021051239374, 106.54335062984775, 106.58046898128826, 106.74981248950945, 106.67757892554381, 106.75795037717494, 106.74991904229717, 106.68974998817438, 106.77264087001149, 106.52263932329518, 106.69735416239406, 106.63492605420565, 107.47853917902118, 107.51312782241217, 107.48391292458909, 107.29728242698681, 107.79963821788161, 107.69828174742925, 107.68547472738895, 107.8524309813448, 107.48357936729171, 107.7310942308646, 107.51088267100025, 107.42877293332353, 107.91756299212747, 107.79527785703958, 107.69666498433784, 107.70384035044128, 107.58048317909281, 107.72272236913132, 107.73913503550962, 107.78932977124387, 107.39970175093293, 107.32966750625985, 107.3621200405162, 107.38792673416476], 'policy_ART-01-07_reward': [103.30012202304053, 104.56353576875668, 103.963731183731, 104.41410289150558, 104.13045109527023, 104.10313510133946, 104.2837523220695, 104.2686320171489, 104.94267762911151, 104.92741598285964, 104.74803941970413, 105.052481596743, 104.91179479040314, 104.99895347527435, 104.63698976872217, 104.48518296296488, 105.18456468704012, 105.09032135656253, 105.1800539473387, 105.56307196599978, 104.88241108689257, 104.81333472678482, 104.87993721771153, 104.72355297004714, 104.79591937021848, 105.04667326807032, 104.86392506734758, 104.66798576381154, 104.22643915903625, 104.00511632940893, 104.43599909110524, 104.20351885235529, 104.46408098934012, 104.18597453072299, 104.26143870453629, 104.42868567203244, 104.61401875538736, 104.08032123899358, 104.42567544980751, 104.6616897363293, 103.80295942375027, 104.0363374054989, 104.12416494951171, 103.80974062724269, 103.19001954576464, 103.34110770460595, 103.3177541360403, 103.22584844901513, 103.54349695415695, 103.42777606171033, 103.2279389044859, 103.22418119105743, 103.93214210835005, 103.57629052199033, 103.77067281951693, 103.44814388250751, 104.3629417845063, 104.03341480023677, 104.11121058250409, 104.32034939163951, 104.625461133122, 104.47091775973149, 104.51687539560604, 104.7666406465669, 105.29032356168341, 105.02844028420877, 105.15635727392385, 105.3251121371127, 105.27157911083174, 105.27894222952597, 105.31304212735344, 105.19985562868129, 105.37500231462269, 105.34461651228185, 105.56471365414174, 105.27554207861581, 105.92227522453673, 105.95741754069353, 106.27015366470874, 105.91482257295053, 106.46187932131518, 106.15405464476456, 106.39632806023563, 106.3610499371154, 106.24245491952561, 106.4413624082289, 106.1679156594307, 106.17825013576137, 106.65983022021437, 106.41930572121905, 106.25444270229548, 106.42583016297917, 106.39065515733854, 106.56525715228774, 106.51375067191883, 106.64538096141995, 106.21038687314096, 106.2275643550279, 106.26601180228732, 106.27348268362738], 'policy_ART-01-14_reward': [105.48212943291166, 106.37857691258421, 106.09226070248977, 106.2120042311334, 106.26524177000255, 106.26807367110088, 106.28918202679237, 106.12590848478713, 106.88313172796357, 106.68773668535407, 106.64369973587434, 106.84446633414953, 106.70707986986865, 106.75680805902508, 106.48199734633108, 106.41772933306112, 106.68641078469336, 106.45694831754209, 106.74361515802276, 107.01456214696574, 106.52179738322448, 106.60260307562787, 106.62825866348783, 106.29398527851627, 106.29095543611, 106.41508326005794, 106.36024997359428, 106.18020942912699, 105.64641894790158, 105.57864078102072, 105.66257009149022, 105.58065818055391, 105.79307769393344, 105.68054745152155, 105.5819621740429, 105.85960587815875, 106.02136035162097, 105.5010570110203, 105.67985040467757, 106.14785008814258, 105.11100316546211, 105.39044252825214, 105.4958513795646, 105.22044998868279, 104.59375035733336, 104.61224399010831, 104.42876058272374, 104.5707141823189, 104.74139865550741, 104.58586645100684, 104.58680375759712, 104.37678836597786, 105.27034467493046, 104.92861471491481, 105.02637353890027, 104.85707633964685, 105.71909441119266, 105.42572611860393, 105.61107850123406, 105.58378342915279, 105.91676262467075, 105.9094360768546, 105.69183134020251, 106.14266954915165, 106.56307325249149, 106.46964813127853, 106.52755841732478, 106.68020988915518, 106.61842193150434, 106.6894568613696, 106.6743313519796, 106.63746480434222, 106.69398363333792, 106.46560089868773, 106.62882662999743, 106.56788430294688, 107.40051725237058, 107.42000380600483, 107.42170707022123, 107.21970236183498, 107.7427819718755, 107.62532757837636, 107.61345556999397, 107.76325558594483, 107.41550884867279, 107.65805041076743, 107.43492665856523, 107.35602192783213, 107.83598513356107, 107.72050538863346, 107.60921257772563, 107.628698445045, 107.52661680283994, 107.66295827369939, 107.68358220284773, 107.71942756282785, 107.33310148708716, 107.28067707897785, 107.30913341515063, 107.33383434434766], 'policy_ART-01-13_reward': [104.4627238814808, 105.31469315652573, 104.98435504979516, 105.0477302348737, 104.89903915907637, 104.90621126322405, 104.9083921112408, 104.82010153884528, 105.46661415744316, 105.30316149678552, 105.40344484454552, 105.6191797469076, 105.60769087877583, 105.61307493621133, 105.24782670761235, 105.36868822275643, 105.60704385493258, 105.49586760898045, 105.77850166562631, 106.0187791929217, 105.61642862006003, 105.53093221259249, 105.67837986240365, 105.33377907916199, 105.37126140690903, 105.39488594037152, 105.41199926406263, 105.07032855811889, 104.80751768046478, 104.63524182956542, 104.90686411196877, 104.67886176686191, 104.77680250156564, 104.71392669374787, 104.6219380057667, 104.9462555632785, 105.12235237673022, 104.48829356511881, 104.75766095074425, 105.10602065743231, 104.23519417029206, 104.4610766499464, 104.586344146189, 104.26735026688331, 103.48402730062088, 103.51311243637483, 103.37846791208861, 103.45571239510907, 103.68783990339766, 103.57281338777811, 103.52440703468322, 103.32099515750562, 104.18716264568006, 103.85145096118075, 103.92503426417666, 103.7493598194296, 104.78058239011087, 104.40220316791952, 104.73367220773837, 104.7192581387754, 104.86714133127217, 104.93787009471991, 104.70332369984106, 105.18480373791236, 105.6727380456714, 105.52406388292975, 105.53255763821372, 105.7705173198395, 105.75012555758235, 105.73642940984631, 105.73967522641568, 105.61628586381713, 105.85270739491911, 105.63426984475468, 105.88647362474238, 105.80001608491251, 106.54953596872224, 106.67759350551131, 106.593456729106, 106.44638075861494, 106.93050344656889, 106.84018511359935, 106.83743578307535, 107.09678708801515, 106.74606359829659, 106.97071485520175, 106.78254586716575, 106.65015590582051, 107.2064056785184, 107.11727978421516, 106.98543417809431, 107.01098628252602, 106.78665508123436, 106.94763854926492, 106.99109435052159, 107.11062399069539, 106.66395418181465, 106.54753598837955, 106.5969142911521, 106.64584992822158], 'policy_ART-01-09_reward': [66.33642769838087, 71.41778296942849, 69.31837736673434, 70.1402943719364, 63.962347611717604, 63.68459223907213, 64.08268070945425, 66.15952874801113, 65.77502087421084, 66.82578948146913, 67.91368367972599, 68.93965596953007, 73.35685328026204, 71.31990791783882, 68.60775581857476, 71.56072990963533, 74.26374632116688, 77.51209363833975, 76.4493881698403, 78.67582708938228, 76.69971686614315, 74.96289791153559, 74.62524373392873, 75.32103664182426, 79.64172968873744, 80.2056154014775, 78.76328087094585, 74.84564608477832, 81.44979178787258, 77.70947076092861, 81.60021618774812, 78.90622874967119, 79.31018438992832, 78.60821085315382, 80.41514862809798, 80.08471167136452, 79.26937949332245, 79.13516317634304, 82.12576959546246, 80.43003281263088, 82.3577452388063, 82.4722828504922, 80.19339540325747, 81.79491089368535, 78.6908634747363, 79.06795082682784, 79.67022375312116, 78.65168990656686, 79.40946507971972, 81.18056177476164, 80.59838813930993, 80.08983566295706, 80.49016375048213, 79.8707591027886, 78.61275551543758, 77.85294929455135, 82.65515536338008, 80.15543038405596, 82.68448438020968, 83.72182459393882, 82.87651919941473, 82.85331634628841, 84.02640137632685, 84.79444032754441, 85.14171480387203, 84.17862247085398, 83.18365307420963, 84.05797439328968, 86.73175633790791, 84.85106504859937, 84.24882920832123, 82.80627280301005, 85.22519835023053, 85.96179667704965, 88.83820980650309, 88.1174805987397, 86.20962766737007, 86.53445897333779, 86.50579754680614, 87.7617404210267, 89.02052059870448, 88.67254872189216, 88.14467746648712, 89.65091326972976, 90.21978380289359, 89.76481386908927, 90.57951759748, 89.7334437323405, 90.56253342613341, 90.83561837496843, 90.15000500217047, 90.41543125804384, 89.49217825804025, 90.26536887821032, 90.66465398708836, 92.14188966527816, 90.56141220338162, 89.59813667418527, 90.97154514658058, 89.92751215785597]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.0801296183001698, 'mean_inference_ms': 7.452237263246761, 'mean_action_processing_ms': 0.7075700468353893, 'mean_env_wait_ms': 12.452511775785629, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0, 'connector_metrics': {}, 'num_episodes': 4, 'episode_return_max': 809.8706512192474, 'episode_return_min': 734.1581639625548, 'episode_return_mean': 768.6047794690238, 'episodes_this_iter': 4}",{},"{'learner': {'ART-01-10': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 5.152261416831364, 'cur_kl_coeff': 1.4551915228366855e-12, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.5892773783920953, 'policy_loss': 2.3913738550618292e-05, 'vf_loss': 1.5892534672282637, 'vf_explained_var': 2.2972623507181803e-07, 'kl': 0.003471759666374965, 'entropy': -0.42080681075652443, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 95520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, 'ART-01-12': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 3.078672948728005, 'cur_kl_coeff': 1.7763568394002508e-16, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.3915500790501634, 'policy_loss': -0.0021695867229330665, 'vf_loss': 1.393719662974278, 'vf_explained_var': -9.370346864064535e-06, 'kl': 0.00893595194246853, 'entropy': 0.5408192223869264, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 95520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, 'ART-01-08': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 4.28591611093531, 'cur_kl_coeff': 2.910383045673371e-12, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.5804854882104944, 'policy_loss': -0.0039282897506685305, 'vf_loss': 1.58441378067558, 'vf_explained_var': 5.559995770454407e-07, 'kl': 0.009807144699053607, 'entropy': -0.1640133016121884, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 95520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, 'ART-01-11a': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 6.195897662080824, 'cur_kl_coeff': 4.6566128730773935e-11, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.5902124499203638, 'policy_loss': -0.0002469905215548351, 'vf_loss': 1.5904594428216419, 'vf_explained_var': 2.3953616619110106e-07, 'kl': 0.006395576856368814, 'entropy': -0.34800841885929307, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 95520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, 'ART-01-14': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 5.6464877732718985, 'cur_kl_coeff': 3.6379788070917137e-13, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.5906633885577321, 'policy_loss': -0.0012966135385795497, 'vf_loss': 1.5919600054000815, 'vf_explained_var': 2.2978832324345905e-07, 'kl': 0.0057140419501859165, 'entropy': -0.681518713136514, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 95520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, 'ART-01-07': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 5.56339784435307, 'cur_kl_coeff': 2.273736754432321e-14, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.5881005019570391, 'policy_loss': 0.0021044207552525524, 'vf_loss': 1.585996079693238, 'vf_explained_var': 2.5002906719843544e-07, 'kl': 0.005955794057172211, 'entropy': -0.027831771113172483, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 95520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, 'ART-01-09': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 45.003384361354016, 'cur_kl_coeff': 0.004687499999999999, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.4766456215914028, 'policy_loss': 0.003704727019066922, 'vf_loss': 1.4728831870791812, 'vf_explained_var': 8.853773276011149e-08, 'kl': 0.012310783940377998, 'entropy': -2.4336926341056824, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 95520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, 'ART-01-13': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 8.076037571734439, 'cur_kl_coeff': 1.1920928955078127e-08, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.582976449917381, 'policy_loss': 6.211603176780045e-05, 'vf_loss': 1.5829143371743462, 'vf_explained_var': 2.3264437913894654e-07, 'kl': 0.006170223985739303, 'entropy': -0.7492228706056873, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 95520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}}, 'num_env_steps_sampled': 400000, 'num_env_steps_trained': 400000, 'num_agent_steps_sampled': 3200000, 'num_agent_steps_trained': 3200000}",3200000,3200000,3200000,400000,400000,4000,45.4872,400000,4000,45.4872,2,0,0,4000,"{'cpu_util_percent': 7.703333333333334, 'ram_util_percent': 13.800000000000004}","{'training_iteration_time_ms': 88233.018, 'restore_workers_time_ms': 0.017, 'training_step_time_ms': 88232.972, 'sample_time_ms': 44001.728, 'learn_time_ms': 44221.227, 'learn_throughput': 90.454, 'synch_weights_time_ms': 9.526}"


[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")[32m [repeated 48x across cluster][0m
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")[32m [repeated 144x across cluster][0m
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")[32m [repeated 144x across cluster][0m
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")[32m [repeated 144x across cluster][0m
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")[32m [repeated 144x across cluster][0m
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")[32m [repeated 144x across cluster][0m
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")[32m [repeated 144x across cluster][0m
[36m(RolloutWorker pid=1577789)[0m   gym.logger.warn("Casting input x to numpy array.")[

In [2]:
results

ResultGrid<[
  Result(
    metrics={'custom_metrics': {}, 'episode_media': {}, 'info': {'learner': {'ART-01-10': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 5.152261416831364, 'cur_kl_coeff': 1.4551915228366855e-12, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.5892773783920953, 'policy_loss': 2.3913738550618292e-05, 'vf_loss': 1.5892534672282637, 'vf_explained_var': 2.2972623507181803e-07, 'kl': 0.003471759666374965, 'entropy': -0.42080681075652443, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 95520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, 'ART-01-12': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 3.078672948728005, 'cur_kl_coeff': 1.7763568394002508e-16, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.3915500790501634, 'policy_loss': -0.0021695867229330665, 'vf_loss': 1.393719662974278, 'vf_explained_var': -9.370346864064535e-06, 'kl': 0.00893595194246853, 'entro

In [6]:
import bmstestbedc2f2.checkpoints

best_result = results.get_best_result()
best_result.checkpoint.to_directory(
    resolve_path('ppo_neural', bmstestbedc2f2.checkpoints),
)
best_result

Result(
  metrics={'custom_metrics': {}, 'episode_media': {}, 'info': {'learner': {'ART-01-10': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 5.152261416831364, 'cur_kl_coeff': 1.4551915228366855e-12, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.5892773783920953, 'policy_loss': 2.3913738550618292e-05, 'vf_loss': 1.5892534672282637, 'vf_explained_var': 2.2972623507181803e-07, 'kl': 0.003471759666374965, 'entropy': -0.42080681075652443, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 125.0, 'num_grad_updates_lifetime': 95520.5, 'diff_num_grad_updates_vs_sampler_policy': 479.5}, 'ART-01-12': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 3.078672948728005, 'cur_kl_coeff': 1.7763568394002508e-16, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 1.3915500790501634, 'policy_loss': -0.0021695867229330665, 'vf_loss': 1.393719662974278, 'vf_explained_var': -9.370346864064535e-06, 'kl': 0.00893595194246853, 'entropy': 0.5408192223