In [1]:
from hvacmarl6e43.buildings import OfficeBuilding
from hvacmarl6e43.experiments import rl_experiments

metrics = {
    name: experiment.get_results().metrics_train
    for name, experiment in rl_experiments.items()
}

E0000 00:00:1737432714.595964  549794 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737432714.599266  549794 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
import holoviews as hv

hv.extension('matplotlib', 'bokeh')
hv.extension('bokeh')

import numpy as np

In [3]:
list(metrics['multiagent'].columns)

['num_healthy_workers',
 'num_in_flight_async_sample_reqs',
 'num_remote_worker_restarts',
 'num_agent_steps_sampled',
 'num_agent_steps_trained',
 'num_env_steps_sampled',
 'num_env_steps_trained',
 'num_env_steps_sampled_this_iter',
 'num_env_steps_trained_this_iter',
 'num_env_steps_sampled_throughput_per_sec',
 'num_env_steps_trained_throughput_per_sec',
 'timesteps_total',
 'num_env_steps_sampled_lifetime',
 'num_agent_steps_sampled_lifetime',
 'num_steps_trained_this_iter',
 'agent_timesteps_total',
 'done',
 'training_iteration',
 'trial_id',
 'date',
 'timestamp',
 'time_this_iter_s',
 'time_total_s',
 'pid',
 'hostname',
 'node_ip',
 'time_since_restore',
 'iterations_since_restore',
 'info/learner/0FWEST1/learner_stats/allreduce_latency',
 'info/learner/0FWEST1/learner_stats/grad_gnorm',
 'info/learner/0FWEST1/learner_stats/cur_kl_coeff',
 'info/learner/0FWEST1/learner_stats/cur_lr',
 'info/learner/0FWEST1/learner_stats/total_loss',
 'info/learner/0FWEST1/learner_stats/policy

In [4]:
hv.Overlay([
    hv.Curve(
        metrics['monoagent']['info/learner/default_policy/learner_stats/kl'],
        label='monoagent',
    ),
    hv.Curve(
        metrics['multiagent'][[
            f'info/learner/{zone_key}/learner_stats/kl' 
            for zone_key in OfficeBuilding.zone_keys
        ]].mean(axis='columns'),
        label='multiagent',
    ),
]).opts(xlabel='Training Iteration', ylabel='KL Divergence')

In [5]:
hv.Overlay([
    hv.Curve(
        metrics['monoagent']['info/learner/default_policy/learner_stats/total_loss'],
        label='monoagent',
    ),
    hv.Curve(
        metrics['multiagent'][[
            f'info/learner/{zone_key}/learner_stats/total_loss' 
            for zone_key in OfficeBuilding.zone_keys
        ]].mean(axis='columns'),
        label='multiagent',
    ),
]).opts(xlabel='Training Iteration', ylabel='Total Loss (Avg.)')

In [6]:
rewards = {
    'monoagent':
        metrics['monoagent']['env_runners/policy_reward_mean/default_policy']
        .dropna(),
    'multiagent': 
        metrics['multiagent'][[
            f'env_runners/hist_stats/policy_{zone_key}_reward' 
            for zone_key in OfficeBuilding.zone_keys
        ]]
        .dropna()
        .map(np.mean)
        .mean(axis='columns'),
}

In [7]:
hv.Layout([
    hv.Overlay([
        hv.Curve(
            reward_hist,
            label=label,
        )
        for label, reward_hist in rewards.items()
    ]).opts(xlabel='Training Iteration', ylabel='Reward'),
    hv.Overlay([
        hv.Curve(
            reward_hist.cumsum(),
            label=label,
        )
        for label, reward_hist in rewards.items()
    ]).opts(xlabel='Training Iteration', ylabel='Reward (Cumulative)'),
]).opts(shared_axes=False)
