In [1]:
%load_ext tensorboard
%load_ext autoreload
%autoreload 2

In [2]:
import time

from rmrl.experiments.configurations import *
from rmrl.experiments.with_transfer import WithTransferExperiment
from rmrl.experiments.runner import ExperimentsRunner
from tqdm.auto import tqdm

SEED = 42

In [3]:
def get_simple_exp(seed=SEED, mods=None):
    return WithTransferExperiment(
        cfg=ExperimentConfiguration(
            env=SupportedEnvironments.SMALL,
            cspace=ContextSpaces.FIXED_ENTITIES,
            alg=Algos.PPO,
            mods=mods or [],
            rm_kwargs={
                'grid_resolution': (2, 2)
            },
            model_kwargs=dict(
                gnn_hidden_dims=[32, 32],
            ),
            alg_kwargs={
                # 'learning_starts': 0,
                # 'exploration_fraction': 0.5,
                'n_steps': 1024
            },
            seed=seed
        ),
        total_timesteps=1e5,
        dump_dir='single_experiment_dumps',
        verbose=1,
        log_interval=1,
        eval_freq=500,
        min_evals=10,
        n_eval_episodes=10,
    )

no_geco_exp = get_simple_exp(mods=[Mods.AS, Mods.RS])
with_geco_exp = get_simple_exp(mods=[Mods.AS, Mods.RS, Mods.GECO])
geco_upt_exp = get_simple_exp(mods=[Mods.AS, Mods.RS, Mods.GECOUPT])

In [4]:
context_pairs = ExperimentsRunner.load_or_sample_contexts(exp=no_geco_exp,
                                                          num_samples=3,
                                                          sample_seed=24)

In [5]:
experiments = []
for seed in [SEED * i for i in range(1, 4)]:
    rs_only = get_simple_exp(seed, mods=[Mods.RS])
    rm_as = get_simple_exp(seed, mods=[Mods.AS, Mods.RS])
    rm_as_geco = get_simple_exp(seed, mods=[Mods.AS, Mods.RS, Mods.GECO])
    rm_as_gecoupt = get_simple_exp(seed, mods=[Mods.AS, Mods.RS, Mods.GECOUPT])
    experiments.extend([rs_only, rm_as, rm_as_geco, rm_as_gecoupt])

In [6]:
pbar = tqdm(total=len(experiments) * len(context_pairs))
for c_src, c_tgt in context_pairs:
    for exp in experiments:
        exp.run(c_src, c_tgt)
        pbar.update()

  0%|          | 0/36 [00:00<?, ?it/s]

training agent for task 8caa02ec824d4dd40d1f336941f3cc867029d31cc50f3690e28c241723778494
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to single_experiment_dumps/WithTransferExperiment/env-small/cspace-fixed_entities/alg-PPO/mods-(RS)/rm_kwargs-((grid_resolution-(2,2)))/alg_kwargs-((n_steps-1024))/model_kwargs-((gnn_hidden_dims-(32,32)))/seed-42/logs/tensorboard/8caa02ec824d4dd40d1f336941f3cc867029d31cc50f3690e28c241723778494_1


training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.5     |
|    ep_rew_mean      | -1.91    |
|    ep_true_rew_mean | -9.53    |
| time/               |          |
|    fps              | 211      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.8     |
|    ep_rew_mean      | -1.89    |
|    ep_true_rew_mean | -7.51    |
| time/               |          |
|    fps              | 217      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.8     |
|    ep_rew_mean      | -1.82    |
|    ep_true_rew_mean | -9.42    |
| time/               |          |
|    fps              | 224      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.2     |
|    ep_rew_mean      | -1.82    |
|    ep_true_rew_mean | -0.457   |
| time/               |          |
|    fps              | 226      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.8     |
|    ep_rew_mean      | -1.93    |
|    ep_true_rew_mean | -12.2    |
| time/               |          |
|    fps              | 222      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.4     |
|    ep_rew_mean      | -1.8     |
|    ep_true_rew_mean | -4.64    |
| time/               |          |
|    fps              | 219      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23       |
|    ep_rew_mean      | -1.87    |
|    ep_true_rew_mean | -4.82    |
| time/               |          |
|    fps              | 225      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=12.20 +/- 56.82
Episode length: 17.80 +/- 11.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 17.8     |
|    mean_reward     | 12.2     |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
New best mean reward!
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.2     |
|    ep_rew_mean      | -1.76    |
|    ep_true_rew_mean | -0.5     |
| time/               |          |
|    fps              | 230      |
|    iterations       | 1        |
|    time_elapsed     | 4 

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24.4     |
|    ep_rew_mean      | -1.96    |
|    ep_true_rew_mean | -19.5    |
| time/               |          |
|    fps              | 223      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.7     |
|    ep_rew_mean      | -1.88    |
|    ep_true_rew_mean | -2.69    |
| time/               |          |
|    fps              | 225      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-0.20 +/- 49.60
Episode length: 20.20 +/- 9.60
---------------------------------
| eval/              |          |
|    mean_ep_length  | 20.2     |
|    mean_reward     | -0.2     |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.2     |
|    ep_rew_mean      | -1.8     |
|    ep_true_rew_mean | -2.67    |
| time/               |          |
|    fps              | 224      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.5     |
|    ep_rew_mean      | -1.72    |
|    ep_true_rew_mean | -0.255   |
| time/               |          |
|    fps              | 223      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.1     |
|    ep_rew_mean      | -1.89    |
|    ep_true_rew_mean | -7.16    |
| time/               |          |
|    fps              | 226      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.3     |
|    ep_rew_mean      | -1.63    |
|    ep_true_rew_mean | 3.69     |
| time/               |          |
|    fps              | 218      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.8     |
|    ep_rew_mean      | -2.05    |
|    ep_true_rew_mean | -11.9    |
| time/               |          |
|    fps              | 224      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.9     |
|    ep_rew_mean      | -1.7     |
|    ep_true_rew_mean | 6.17     |
| time/               |          |
|    fps              | 225      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.2     |
|    ep_rew_mean      | -1.75    |
|    ep_true_rew_mean | -0.478   |
| time/               |          |
|    fps              | 222      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.1     |
|    ep_rew_mean      | -1.58    |
|    ep_true_rew_mean | 13.9     |
| time/               |          |
|    fps              | 223      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.7     |
|    ep_rew_mean      | -1.77    |
|    ep_true_rew_mean | 4.35     |
| time/               |          |
|    fps              | 224      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.4     |
|    ep_rew_mean      | -1.87    |
|    ep_true_rew_mean | -14.1    |
| time/               |          |
|    fps              | 222      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.6     |
|    ep_rew_mean      | -1.92    |
|    ep_true_rew_mean | -16.6    |
| time/               |          |
|    fps              | 223      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.9     |
|    ep_rew_mean      | -1.68    |
|    ep_true_rew_mean | 8.23     |
| time/               |          |
|    fps              | 225      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.4     |
|    ep_rew_mean      | -1.88    |
|    ep_true_rew_mean | -11.8    |
| time/               |          |
|    fps              | 221      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-0.40 +/- 49.20
Episode length: 20.40 +/- 9.20
---------------------------------
| eval/              |          |
|    mean_ep_length  | 20.4     |
|    mean_reward     | -0.4     |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-12.70 +/- 36.90
Episode length: 22.70 +/- 6.90
---------------------------------
| eval/              |          |
|    mean_ep_length  | 22.7     |
|    mean_reward     | -12.7    |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 18.8     |
|    ep_rew_mean      | -1.46    |
|    ep_true_rew_mean | 16.4     |
| time/               |          |
|    fps              | 229      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_ti

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.1     |
|    ep_rew_mean      | -1.88    |
|    ep_true_rew_mean | -9.5     |
| time/               |          |
|    fps              | 228      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.3     |
|    ep_rew_mean      | -1.66    |
|    ep_true_rew_mean | 2.06     |
| time/               |          |
|    fps              | 225      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 18.9     |
|    ep_rew_mean      | -1.48    |
|    ep_true_rew_mean | 18.9     |
| time/               |          |
|    fps              | 225      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-12.60 +/- 37.20
Episode length: 22.60 +/- 7.20
---------------------------------
| eval/              |          |
|    mean_ep_length  | 22.6     |
|    mean_reward     | -12.6    |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
New best mean reward!
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.2     |
|    ep_rew_mean      | -1.91    |
|    ep_true_rew_mean | -9.59    |
| time/               |          |
|    fps              | 233      |
|    iterations       | 1        |
|    time_elapsed     | 4 

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.3     |
|    ep_rew_mean      | -1.8     |
|    ep_true_rew_mean | -2.27    |
| time/               |          |
|    fps              | 226      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.4     |
|    ep_rew_mean      | -1.14    |
|    ep_true_rew_mean | 30       |
| time/               |          |
|    fps              | 225      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-0.20 +/- 49.60
Episode length: 20.20 +/- 9.60
---------------------------------
| eval/              |          |
|    mean_ep_length  | 20.2     |
|    mean_reward     | -0.2     |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.3     |
|    ep_rew_mean      | -1.76    |
|    ep_true_rew_mean | 1.58     |
| time/               |          |
|    fps              | 236      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.1     |
|    ep_rew_mean      | -1.77    |
|    ep_true_rew_mean | -2.57    |
| time/               |          |
|    fps              | 224      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.2     |
|    ep_rew_mean      | -1.69    |
|    ep_true_rew_mean | 1.75     |
| time/               |          |
|    fps              | 220      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-12.60 +/- 37.20
Episode length: 22.60 +/- 7.20
---------------------------------
| eval/              |          |
|    mean_ep_length  | 22.6     |
|    mean_reward     | -12.6    |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-12.60 +/- 37.20
Episode length: 22.60 +/- 7.20
---------------------------------
| eval/              |          |
|    mean_ep_length  | 22.6     |
|    mean_reward     | -12.6    |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.4     |
|    ep_rew_mean      | -1.92    |
|    ep_true_rew_mean | -11.8    |
| time/               |          |
|    fps              | 237      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_t

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24.1     |
|    ep_rew_mean      | -1.91    |
|    ep_true_rew_mean | -12.2    |
| time/               |          |
|    fps              | 224      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22       |
|    ep_rew_mean      | -1.79    |
|    ep_true_rew_mean | 4.07     |
| time/               |          |
|    fps              | 227      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24       |
|    ep_rew_mean      | -1.97    |
|    ep_true_rew_mean | -16.8    |
| time/               |          |
|    fps              | 231      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22       |
|    ep_rew_mean      | -1.79    |
|    ep_true_rew_mean | -4.65    |
| time/               |          |
|    fps              | 230      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25       |
|    ep_rew_mean      | -2.19    |
|    ep_true_rew_mean | -25      |
| time/               |          |
|    fps              | 231      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.9     |
|    ep_rew_mean      | -1.9     |
|    ep_true_rew_mean | -11.5    |
| time/               |          |
|    fps              | 229      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.4     |
|    ep_rew_mean      | -1.84    |
|    ep_true_rew_mean | -9.07    |
| time/               |          |
|    fps              | 228      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25       |
|    ep_rew_mean      | -2.2     |
|    ep_true_rew_mean | -25      |
| time/               |          |
|    fps              | 230      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.3     |
|    ep_rew_mean      | -1.83    |
|    ep_true_rew_mean | -4.53    |
| time/               |          |
|    fps              | 227      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.3     |
|    ep_rew_mean      | -1.79    |
|    ep_true_rew_mean | -4.53    |
| time/               |          |
|    fps              | 229      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24.1     |
|    ep_rew_mean      | -2.1     |
|    ep_true_rew_mean | -19.4    |
| time/               |          |
|    fps              | 230      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.4     |
|    ep_rew_mean      | -1.93    |
|    ep_true_rew_mean | -11.8    |
| time/               |          |
|    fps              | 229      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.4     |
|    ep_rew_mean      | -1.81    |
|    ep_true_rew_mean | -6.89    |
| time/               |          |
|    fps              | 228      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24.7     |
|    ep_rew_mean      | -2.18    |
|    ep_true_rew_mean | -22.3    |
| time/               |          |
|    fps              | 226      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.1     |
|    ep_rew_mean      | -1.87    |
|    ep_true_rew_mean | -11.8    |
| time/               |          |
|    fps              | 229      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.8     |
|    ep_rew_mean      | -1.88    |
|    ep_true_rew_mean | -6.89    |
| time/               |          |
|    fps              | 230      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24.4     |
|    ep_rew_mean      | -2.12    |
|    ep_true_rew_mean | -19.5    |
| time/               |          |
|    fps              | 230      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.7     |
|    ep_rew_mean      | -1.83    |
|    ep_true_rew_mean | -4.91    |
| time/               |          |
|    fps              | 227      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.8     |
|    ep_rew_mean      | -1.76    |
|    ep_true_rew_mean | -0.0652  |
| time/               |          |
|    fps              | 219      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25       |
|    ep_rew_mean      | -2.2     |
|    ep_true_rew_mean | -25      |
| time/               |          |
|    fps              | 212      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.3     |
|    ep_rew_mean      | -1.75    |
|    ep_true_rew_mean | -0.458   |
| time/               |          |
|    fps              | 226      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.9     |
|    ep_rew_mean      | -2       |
|    ep_true_rew_mean | -9.6     |
| time/               |          |
|    fps              | 217      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25       |
|    ep_rew_mean      | -2.19    |
|    ep_true_rew_mean | -25      |
| time/               |          |
|    fps              | 208      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.2     |
|    ep_rew_mean      | -1.88    |
|    ep_true_rew_mean | -8.91    |
| time/               |          |
|    fps              | 215      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23       |
|    ep_rew_mean      | -1.88    |
|    ep_true_rew_mean | -7.14    |
| time/               |          |
|    fps              | 222      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25       |
|    ep_rew_mean      | -2.2     |
|    ep_true_rew_mean | -25      |
| time/               |          |
|    fps              | 205      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.3     |
|    ep_rew_mean      | -1.83    |
|    ep_true_rew_mean | -0.0667  |
| time/               |          |
|    fps              | 217      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.9     |
|    ep_rew_mean      | -1.85    |
|    ep_true_rew_mean | -7       |
| time/               |          |
|    fps              | 228      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21       |
|    ep_rew_mean      | -1.78    |
|    ep_true_rew_mean | 14.4     |
| time/               |          |
|    fps              | 223      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22       |
|    ep_rew_mean      | -1.8     |
|    ep_true_rew_mean | 4.11     |
| time/               |          |
|    fps              | 213      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.4     |
|    ep_rew_mean      | -1.84    |
|    ep_true_rew_mean | -4.6     |
| time/               |          |
|    fps              | 207      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.6     |
|    ep_rew_mean      | -1.93    |
|    ep_true_rew_mean | -2.56    |
| time/               |          |
|    fps              | 214      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.5     |
|    ep_rew_mean      | -1.84    |
|    ep_true_rew_mean | 4.13     |
| time/               |          |
|    fps              | 208      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.2     |
|    ep_rew_mean      | -1.74    |
|    ep_true_rew_mean | 1.71     |
| time/               |          |
|    fps              | 226      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.1     |
|    ep_rew_mean      | -1.89    |
|    ep_true_rew_mean | 1.78     |
| time/               |          |
|    fps              | 215      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.3     |
|    ep_rew_mean      | -1.82    |
|    ep_true_rew_mean | 4.38     |
| time/               |          |
|    fps              | 205      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.8     |
|    ep_rew_mean      | -1.94    |
|    ep_true_rew_mean | -9.86    |
| time/               |          |
|    fps              | 213      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.1     |
|    ep_rew_mean      | -1.99    |
|    ep_true_rew_mean | -7.16    |
| time/               |          |
|    fps              | 223      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.4     |
|    ep_rew_mean      | -1.85    |
|    ep_true_rew_mean | -2.44    |
| time/               |          |
|    fps              | 215      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-0.20 +/- 49.60
Episode length: 20.20 +/- 9.60
---------------------------------
| eval/              |          |
|    mean_ep_length  | 20.2     |
|    mean_reward     | -0.2     |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-0.30 +/- 49.40
Episode length: 20.30 +/- 9.40
---------------------------------
| eval/              |          |
|    mean_ep_length  | 20.3     |
|    mean_reward     | -0.3     |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.5     |
|    ep_rew_mean      | -1.93    |
|    ep_true_rew_mean | -14.2    |
| time/               |          |
|    fps              | 226      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21       |
|    ep_rew_mean      | -1.72    |
|    ep_true_rew_mean | 8.12     |
| time/               |          |
|    fps              | 207      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.9     |
|    ep_rew_mean      | -1.79    |
|    ep_true_rew_mean | -13.8    |
| time/               |          |
|    fps              | 208      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.6     |
|    ep_rew_mean      | -1.83    |
|    ep_true_rew_mean | -0.378   |
| time/               |          |
|    fps              | 214      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24.4     |
|    ep_rew_mean      | -1.92    |
|    ep_true_rew_mean | -22      |
| time/               |          |
|    fps              | 205      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22       |
|    ep_rew_mean      | -1.8     |
|    ep_true_rew_mean | -0.304   |
| time/               |          |
|    fps              | 209      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25       |
|    ep_rew_mean      | -1.93    |
|    ep_true_rew_mean | -25      |
| time/               |          |
|    fps              | 214      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.8     |
|    ep_rew_mean      | -1.65    |
|    ep_true_rew_mean | 9.8      |
| time/               |          |
|    fps              | 228      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25       |
|    ep_rew_mean      | -1.97    |
|    ep_true_rew_mean | -25      |
| time/               |          |
|    fps              | 223      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.6     |
|    ep_rew_mean      | -1.81    |
|    ep_true_rew_mean | -0.356   |
| time/               |          |
|    fps              | 212      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.9     |
|    ep_rew_mean      | -1.89    |
|    ep_true_rew_mean | -19.1    |
| time/               |          |
|    fps              | 210      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23       |
|    ep_rew_mean      | -1.84    |
|    ep_true_rew_mean | -0.25    |
| time/               |          |
|    fps              | 208      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.9     |
|    ep_rew_mean      | -1.85    |
|    ep_true_rew_mean | -19.1    |
| time/               |          |
|    fps              | 212      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.2     |
|    ep_rew_mean      | -1.67    |
|    ep_true_rew_mean | 1.67     |
| time/               |          |
|    fps              | 208      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.4     |
|    ep_rew_mean      | -1.84    |
|    ep_true_rew_mean | -16.4    |
| time/               |          |
|    fps              | 202      |
|    iterations       | 1        |
|    time_elapsed     | 5        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.4     |
|    ep_rew_mean      | -1.92    |
|    ep_true_rew_mean | -9.49    |
| time/               |          |
|    fps              | 205      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25       |
|    ep_rew_mean      | -2.01    |
|    ep_true_rew_mean | -25      |
| time/               |          |
|    fps              | 208      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.8     |
|    ep_rew_mean      | -1.97    |
|    ep_true_rew_mean | -16.8    |
| time/               |          |
|    fps              | 228      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22       |
|    ep_rew_mean      | -1.81    |
|    ep_true_rew_mean | -4.65    |
| time/               |          |
|    fps              | 216      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.4     |
|    ep_rew_mean      | -1.91    |
|    ep_true_rew_mean | -11.8    |
| time/               |          |
|    fps              | 215      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.9     |
|    ep_rew_mean      | -1.87    |
|    ep_true_rew_mean | -19.2    |
| time/               |          |
|    fps              | 219      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.8     |
|    ep_rew_mean      | -2.03    |
|    ep_true_rew_mean | -11.9    |
| time/               |          |
|    fps              | 223      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24       |
|    ep_rew_mean      | -1.94    |
|    ep_true_rew_mean | -12      |
| time/               |          |
|    fps              | 218      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

In [7]:
pbar.close()

In [5]:
start = time.time()

no_geco_exp.run(*context_pair)

end = time.time()
print(f'execution time {end - start}')

training agent for task 53ad9c64ee168199805b6d596c9cd230efcabf1a9640eb7454ae65d6b1d7e256
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to single_experiment_dumps/WithTransferExperiment/env-small/cspace-fixed_entities/alg-PPO/mods-(AS,RS)/rm_kwargs-((grid_resolution-(2,2)))/alg_kwargs-((n_steps-1024))/model_kwargs-((gnn_hidden_dims-(32,32)))/seed-42/logs/tensorboard/53ad9c64ee168199805b6d596c9cd230efcabf1a9640eb7454ae65d6b1d7e256_1


training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.6     |
|    ep_rew_mean      | -1.96    |
|    ep_true_rew_mean | -12      |
| time/               |          |
|    fps              | 191      |
|    iterations       | 1        |
|    time_elapsed     | 5        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.3     |
|    ep_rew_mean      | -1.93    |
|    ep_true_rew_mean | -7.02    |
| time/               |          |
|    fps              | 229      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25       |
|    ep_rew_mean      | -2.2     |
|    ep_true_rew_mean | -25      |
| time/               |          |
|    fps              | 229      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

In [8]:
start = time.time()

with_geco_exp.run(*context_pair)

end = time.time()
print(f'execution time {end - start}')

training agent for task 53ad9c64ee168199805b6d596c9cd230efcabf1a9640eb7454ae65d6b1d7e256
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to single_experiment_dumps/WithTransferExperiment/env-small/cspace-fixed_entities/alg-PPO/mods-(AS,RS,GECO)/rm_kwargs-((grid_resolution-(2,2)))/alg_kwargs-((n_steps-1024))/model_kwargs-((gnn_hidden_dims-(32,32)))/seed-42/logs/tensorboard/53ad9c64ee168199805b6d596c9cd230efcabf1a9640eb7454ae65d6b1d7e256_1


training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.6     |
|    ep_rew_mean      | -1.78    |
|    ep_true_rew_mean | -4.57    |
| time/               |          |
|    fps              | 216      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.4     |
|    ep_rew_mean      | -1.94    |
|    ep_true_rew_mean | -7.16    |
| time/               |          |
|    fps              | 205      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24.7     |
|    ep_rew_mean      | -2.17    |
|    ep_true_rew_mean | -22.3    |
| time/               |          |
|    fps              | 161      |
|    iterations       | 1        |
|    time_elapsed     | 6        |
|    total_tim

In [9]:
start = time.time()

geco_upt_exp.run(*context_pair)

end = time.time()
print(f'execution time {end - start}')

training agent for task 53ad9c64ee168199805b6d596c9cd230efcabf1a9640eb7454ae65d6b1d7e256
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to single_experiment_dumps/WithTransferExperiment/env-small/cspace-fixed_entities/alg-PPO/mods-(AS,RS,GECOUPT)/rm_kwargs-((grid_resolution-(2,2)))/alg_kwargs-((n_steps-1024))/model_kwargs-((gnn_hidden_dims-(32,32)))/seed-42/logs/tensorboard/53ad9c64ee168199805b6d596c9cd230efcabf1a9640eb7454ae65d6b1d7e256_1


training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.4     |
|    ep_rew_mean      | -1.83    |
|    ep_true_rew_mean | -4.6     |
| time/               |          |
|    fps              | 204      |
|    iterations       | 1        |
|    time_elapsed     | 5        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.3     |
|    ep_rew_mean      | -1.92    |
|    ep_true_rew_mean | -2.37    |
| time/               |          |
|    fps              | 218      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim

training:   0%|          | 0/100000.0 [00:00<?, ?it/s]

Eval num_timesteps=500, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 500      |
---------------------------------
New best mean reward!
Eval num_timesteps=1000, episode_reward=-25.00 +/- 0.00
Episode length: 25.00 +/- 0.00
---------------------------------
| eval/              |          |
|    mean_ep_length  | 25       |
|    mean_reward     | -25      |
| time/              |          |
|    total_timesteps | 1000     |
---------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24.1     |
|    ep_rew_mean      | -2.11    |
|    ep_true_rew_mean | -14.6    |
| time/               |          |
|    fps              | 224      |
|    iterations       | 1        |
|    time_elapsed     | 4        |
|    total_tim