The basic algorithm will match the provided masked string (e.g. a _ _ l e) to all possible words in the dictionary, tabulate the frequency of letters appearing in these possible words, and then guess the letter with the highest frequency of appearence that has not already been guessed. If there are no remaining words that match then it will default back to the character frequency distribution of the entire dictionary.

This benchmark strategy is successful approximately 18% of the time. Your task is to design an algorithm that significantly outperforms this benchmark.

In [1]:
import json
import requests
import random
import string
import secrets
import time
import re
import collections

try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

In [2]:
# Read the words from the file
with open('words_250000_train.txt', 'r') as file:
    word_list = file.read().splitlines()
longest_word = max(word_list,key=len)
print("The longest word is:", longest_word)
print(len(longest_word))

The longest word is: cyclotrimethylenetrinitramine
29


### Thinking Process
I could think of 2 approaches for this problem, one was using reinforcement learning to learn using a given environment via some RL algorithm and the second was to use imitation learning (akin to supervised learning).
We first analyze the first approach.

#### Approach 1
I created a custom environment to play hangman (as written below) and trained the PPO algorithm with LSTM support. 
To check the feasibility of the approach, i trained the algorithm for 10000 timesteps while keeping the word to be guessed same (rabbit).

In [3]:
import gymnasium as gym
import numpy as np
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from tqdm.notebook import tqdm

class HangmanEnv1(gym.Env):
    metadata = {"render_modes": ["human"], "render_fps": 30}
    def __init__(self, word_list,render_mode="human",rword=True):
        super(HangmanEnv1, self).__init__()
        self.word_list = word_list
        self.word = None
        self.obscured_word = None
        self.guessed_letters = set()
        self.tries_remaining = None
        self.rword = rword
        self.action_space = spaces.Discrete(26)  # Guessing a letter
        num_values = np.array([28 for _ in range(29)] + [2 for _ in range(26)] + [7])
        self.observation_space = spaces.MultiDiscrete(num_values)

    def reset(self,seed=None,options=None):
        if self.rword:
            self.word = np.random.choice(self.word_list)
        else:
            self.word = "rabbit"
        self.obscured_word = ['_' for _ in self.word]
        self.guessed_letters = set()
        self.tries_remaining = 6
        return self._get_observation(),{}

    def step(self, action):
        guess = chr(action + ord('a'))
        if guess not in self.guessed_letters:
            self.guessed_letters.add(guess)
        else:
            return self._get_observation(),-3,False,False,{}
        
        if guess in self.word:
            reward = 0
            for i in range(len(self.word)):
                if(self.word[i] == guess):
                    self.obscured_word[i] = guess
            if '_' not in self.obscured_word:
                return self._get_observation(),5,True,False,{}
            else:
                return self._get_observation(),2,False,False,{}
        else:
            self.tries_remaining -= 1
            if self.tries_remaining == 0:
                return self._get_observation(),-2,True,False,{}
            else:
                return self._get_observation(),-1,False,False,{}

    def _get_observation(self):
        return np.array([ord(c) - ord('a') if c != '_' else 26 for c in self.obscured_word]+[27 for _ in range(29-len(self.obscured_word))]
                +[1 if chr(c + ord('a')) in self.guessed_letters else 0 for c in range(26)]+
                [self.tries_remaining])
    def render(self):
        x = 10
    
def accuracy_fn(model,env,verbose=False):
    success = 0
    trr = 0
    progbar = tqdm(range(10000))
    for e in progbar:
        state,_ = env.reset()
        done = False
        tr = 0
        ep_len = 0
        while not done:
            action,_ = model.predict(state)
            next_state, reward, done,oo,bhoo = env.step(action)
            state = next_state
            tr += reward
            ep_len += 1
            if(reward > 0 and done):
                success+=1
            if verbose:
                print(env.word,env.guessed_letters)
        trr += tr/ep_len
    return success/100,trr

2024-06-29 19:34:24.287847: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-29 19:34:24.571390: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
env = HangmanEnv1(word_list,rword=False)
model = PPO("MlpPolicy",env,verbose=1)
model.learn(total_timesteps=10000)
model.save("ppo_rabbit")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


KeyboardInterrupt: 

In [65]:
model_rabbit = PPO.load("ppo_rabbit",env)
percentage,mean_reward = accuracy_fn(model,env)
print("Success rate:",percentage)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  0%|          | 0/10000 [00:00<?, ?it/s]

Success rate: 6.07


As you can see by means of the above cell the performance of the model is not admissible and is even lesser than the benchmark. So we conclude that this approach is not feasible.

In [9]:
# Create the Hangman environment
env1 = HangmanEnv1(word_list)
# Train the agent using PPO
model = PPO("MlpPolicy", env1,verbose=1)
model.learn(total_timesteps=5000000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 10.3     |
|    ep_rew_mean     | -7.58    |
| time/              |          |
|    fps             | 198      |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 10.9         |
|    ep_rew_mean          | -7.66        |
| time/                   |              |
|    fps                  | 170          |
|    iterations           | 2            |
|    time_elapsed         | 24           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0133686215 |
|    clip_fraction        | 0.127        |
|    clip_range           | 0.2          |
|    en

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.71        |
|    ep_rew_mean          | -4.23       |
| time/                   |             |
|    fps                  | 145         |
|    iterations           | 11          |
|    time_elapsed         | 154         |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.015414957 |
|    clip_fraction        | 0.186       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.1        |
|    explained_variance   | 0.153       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.95        |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.0525     |
|    value_loss           | 6.86        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.99  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.9        |
|    ep_rew_mean          | -1.97       |
| time/                   |             |
|    fps                  | 147         |
|    iterations           | 21          |
|    time_elapsed         | 291         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.023664542 |
|    clip_fraction        | 0.289       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.79       |
|    explained_variance   | 0.104       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.97        |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.068      |
|    value_loss           | 4.99        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.3  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11          |
|    ep_rew_mean          | 0.03        |
| time/                   |             |
|    fps                  | 147         |
|    iterations           | 31          |
|    time_elapsed         | 431         |
|    total_timesteps      | 63488       |
| train/                  |             |
|    approx_kl            | 0.027173044 |
|    clip_fraction        | 0.295       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.42       |
|    explained_variance   | 0.249       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.93        |
|    n_updates            | 300         |
|    policy_gradient_loss | -0.0638     |
|    value_loss           | 4.49        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.1  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 0.95        |
| time/                   |             |
|    fps                  | 147         |
|    iterations           | 41          |
|    time_elapsed         | 569         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.025857233 |
|    clip_fraction        | 0.268       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.17       |
|    explained_variance   | 0.308       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.8         |
|    n_updates            | 400         |
|    policy_gradient_loss | -0.0617     |
|    value_loss           | 5.21        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.3       |
|    ep_rew_mean          | 1.09       |
| time/                   |            |
|    fps                  | 146        |
|    iterations           | 51         |
|    time_elapsed         | 714        |
|    total_timesteps      | 104448     |
| train/                  |            |
|    approx_kl            | 0.02663391 |
|    clip_fraction        | 0.278      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.94      |
|    explained_variance   | 0.324      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.36       |
|    n_updates            | 500        |
|    policy_gradient_loss | -0.06      |
|    value_loss           | 4.34       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.9        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 1.04       |
| time/                   |            |
|    fps                  | 143        |
|    iterations           | 61         |
|    time_elapsed         | 868        |
|    total_timesteps      | 124928     |
| train/                  |            |
|    approx_kl            | 0.03243557 |
|    clip_fraction        | 0.31       |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.78      |
|    explained_variance   | 0.377      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.57       |
|    n_updates            | 600        |
|    policy_gradient_loss | -0.0566    |
|    value_loss           | 4.93       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_mean          | 1.33        |
| time/                   |             |
|    fps                  | 144         |
|    iterations           | 71          |
|    time_elapsed         | 1004        |
|    total_timesteps      | 145408      |
| train/                  |             |
|    approx_kl            | 0.035003427 |
|    clip_fraction        | 0.303       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.62       |
|    explained_variance   | 0.42        |
|    learning_rate        | 0.0003      |
|    loss                 | 1.77        |
|    n_updates            | 700         |
|    policy_gradient_loss | -0.0552     |
|    value_loss           | 3.96        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 2.78        |
| time/                   |             |
|    fps                  | 154         |
|    iterations           | 81          |
|    time_elapsed         | 1075        |
|    total_timesteps      | 165888      |
| train/                  |             |
|    approx_kl            | 0.036627457 |
|    clip_fraction        | 0.312       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.5        |
|    explained_variance   | 0.232       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.95        |
|    n_updates            | 800         |
|    policy_gradient_loss | -0.059      |
|    value_loss           | 3.66        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.3    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5        |
|    ep_rew_mean          | 2.82        |
| time/                   |             |
|    fps                  | 162         |
|    iterations           | 91          |
|    time_elapsed         | 1147        |
|    total_timesteps      | 186368      |
| train/                  |             |
|    approx_kl            | 0.039203666 |
|    clip_fraction        | 0.293       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.35       |
|    explained_variance   | 0.421       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.62        |
|    n_updates            | 900         |
|    policy_gradient_loss | -0.0492     |
|    value_loss           | 4.92        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.2  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.8       |
|    ep_rew_mean          | 1.99       |
| time/                   |            |
|    fps                  | 169        |
|    iterations           | 101        |
|    time_elapsed         | 1223       |
|    total_timesteps      | 206848     |
| train/                  |            |
|    approx_kl            | 0.03407006 |
|    clip_fraction        | 0.284      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.23      |
|    explained_variance   | 0.519      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.58       |
|    n_updates            | 1000       |
|    policy_gradient_loss | -0.0465    |
|    value_loss           | 3.29       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.8       |
|    ep_rew_mean          | 2.72       |
| time/                   |            |
|    fps                  | 174        |
|    iterations           | 111        |
|    time_elapsed         | 1304       |
|    total_timesteps      | 227328     |
| train/                  |            |
|    approx_kl            | 0.02946819 |
|    clip_fraction        | 0.223      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.16      |
|    explained_variance   | 0.47       |
|    learning_rate        | 0.0003     |
|    loss                 | 1.41       |
|    n_updates            | 1100       |
|    policy_gradient_loss | -0.0488    |
|    value_loss           | 5.15       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 2.18       |
| time/                   |            |
|    fps                  | 179        |
|    iterations           | 121        |
|    time_elapsed         | 1376       |
|    total_timesteps      | 247808     |
| train/                  |            |
|    approx_kl            | 0.04103758 |
|    clip_fraction        | 0.258      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.02      |
|    explained_variance   | 0.396      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.17       |
|    n_updates            | 1200       |
|    policy_gradient_loss | -0.0444    |
|    value_loss           | 3.84       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.1        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 2.71        |
| time/                   |             |
|    fps                  | 185         |
|    iterations           | 131         |
|    time_elapsed         | 1450        |
|    total_timesteps      | 268288      |
| train/                  |             |
|    approx_kl            | 0.030831102 |
|    clip_fraction        | 0.212       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.998      |
|    explained_variance   | 0.542       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.17        |
|    n_updates            | 1300        |
|    policy_gradient_loss | -0.0454     |
|    value_loss           | 4.02        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.8      |
|    ep_rew_mean          | 2.41      |
| time/                   |           |
|    fps                  | 189       |
|    iterations           | 141       |
|    time_elapsed         | 1526      |
|    total_timesteps      | 288768    |
| train/                  |           |
|    approx_kl            | 0.0383839 |
|    clip_fraction        | 0.245     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.891    |
|    explained_variance   | 0.585     |
|    learning_rate        | 0.0003    |
|    loss                 | 2.1       |
|    n_updates            | 1400      |
|    policy_gradient_loss | -0.0402   |
|    value_loss           | 3.64      |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9        |
|    ep_rew_mean          | 3.34  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_mean          | 2.74        |
| time/                   |             |
|    fps                  | 193         |
|    iterations           | 151         |
|    time_elapsed         | 1599        |
|    total_timesteps      | 309248      |
| train/                  |             |
|    approx_kl            | 0.039425377 |
|    clip_fraction        | 0.242       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.871      |
|    explained_variance   | 0.37        |
|    learning_rate        | 0.0003      |
|    loss                 | 3.17        |
|    n_updates            | 1500        |
|    policy_gradient_loss | -0.0386     |
|    value_loss           | 6.46        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 3.34       |
| time/                   |            |
|    fps                  | 197        |
|    iterations           | 161        |
|    time_elapsed         | 1672       |
|    total_timesteps      | 329728     |
| train/                  |            |
|    approx_kl            | 0.04005605 |
|    clip_fraction        | 0.254      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.839     |
|    explained_variance   | 0.336      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.34       |
|    n_updates            | 1600       |
|    policy_gradient_loss | -0.0422    |
|    value_loss           | 3.82       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.1        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 3.76       |
| time/                   |            |
|    fps                  | 200        |
|    iterations           | 171        |
|    time_elapsed         | 1750       |
|    total_timesteps      | 350208     |
| train/                  |            |
|    approx_kl            | 0.03571972 |
|    clip_fraction        | 0.226      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.815     |
|    explained_variance   | 0.505      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.48       |
|    n_updates            | 1700       |
|    policy_gradient_loss | -0.0391    |
|    value_loss           | 4.58       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.5        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 2.81       |
| time/                   |            |
|    fps                  | 202        |
|    iterations           | 181        |
|    time_elapsed         | 1827       |
|    total_timesteps      | 370688     |
| train/                  |            |
|    approx_kl            | 0.03787295 |
|    clip_fraction        | 0.24       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.758     |
|    explained_variance   | 0.559      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.941      |
|    n_updates            | 1800       |
|    policy_gradient_loss | -0.0409    |
|    value_loss           | 3.61       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 2.99        |
| time/                   |             |
|    fps                  | 205         |
|    iterations           | 191         |
|    time_elapsed         | 1907        |
|    total_timesteps      | 391168      |
| train/                  |             |
|    approx_kl            | 0.032849655 |
|    clip_fraction        | 0.197       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.76       |
|    explained_variance   | 0.377       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.21        |
|    n_updates            | 1900        |
|    policy_gradient_loss | -0.0296     |
|    value_loss           | 8.5         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 3.85       |
| time/                   |            |
|    fps                  | 208        |
|    iterations           | 201        |
|    time_elapsed         | 1978       |
|    total_timesteps      | 411648     |
| train/                  |            |
|    approx_kl            | 0.03777519 |
|    clip_fraction        | 0.232      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.793     |
|    explained_variance   | 0.407      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.56       |
|    n_updates            | 2000       |
|    policy_gradient_loss | -0.042     |
|    value_loss           | 5.77       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.5        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5        |
|    ep_rew_mean          | 2.72        |
| time/                   |             |
|    fps                  | 210         |
|    iterations           | 211         |
|    time_elapsed         | 2054        |
|    total_timesteps      | 432128      |
| train/                  |             |
|    approx_kl            | 0.055125505 |
|    clip_fraction        | 0.268       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.786      |
|    explained_variance   | 0.475       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.56        |
|    n_updates            | 2100        |
|    policy_gradient_loss | -0.0424     |
|    value_loss           | 3.92        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.1  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_mean          | 2.51        |
| time/                   |             |
|    fps                  | 212         |
|    iterations           | 221         |
|    time_elapsed         | 2128        |
|    total_timesteps      | 452608      |
| train/                  |             |
|    approx_kl            | 0.033019267 |
|    clip_fraction        | 0.151       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.751      |
|    explained_variance   | 0.0592      |
|    learning_rate        | 0.0003      |
|    loss                 | 3.4         |
|    n_updates            | 2200        |
|    policy_gradient_loss | -0.0392     |
|    value_loss           | 19.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_mean          | 3.32        |
| time/                   |             |
|    fps                  | 215         |
|    iterations           | 231         |
|    time_elapsed         | 2200        |
|    total_timesteps      | 473088      |
| train/                  |             |
|    approx_kl            | 0.042169854 |
|    clip_fraction        | 0.228       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.662      |
|    explained_variance   | 0.54        |
|    learning_rate        | 0.0003      |
|    loss                 | 1.29        |
|    n_updates            | 2300        |
|    policy_gradient_loss | -0.0349     |
|    value_loss           | 4.12        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 2.22        |
| time/                   |             |
|    fps                  | 217         |
|    iterations           | 241         |
|    time_elapsed         | 2272        |
|    total_timesteps      | 493568      |
| train/                  |             |
|    approx_kl            | 0.045711294 |
|    clip_fraction        | 0.174       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.696      |
|    explained_variance   | 0.329       |
|    learning_rate        | 0.0003      |
|    loss                 | 18.4        |
|    n_updates            | 2400        |
|    policy_gradient_loss | -0.0364     |
|    value_loss           | 12.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.1  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5        |
|    ep_rew_mean          | 2.77        |
| time/                   |             |
|    fps                  | 218         |
|    iterations           | 251         |
|    time_elapsed         | 2348        |
|    total_timesteps      | 514048      |
| train/                  |             |
|    approx_kl            | 0.040532243 |
|    clip_fraction        | 0.215       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.576      |
|    explained_variance   | 0.483       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.22        |
|    n_updates            | 2500        |
|    policy_gradient_loss | -0.0354     |
|    value_loss           | 4.47        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 3.25        |
| time/                   |             |
|    fps                  | 220         |
|    iterations           | 261         |
|    time_elapsed         | 2425        |
|    total_timesteps      | 534528      |
| train/                  |             |
|    approx_kl            | 0.042614177 |
|    clip_fraction        | 0.221       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.606      |
|    explained_variance   | 0.588       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.923       |
|    n_updates            | 2600        |
|    policy_gradient_loss | -0.0313     |
|    value_loss           | 3.05        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 3.73        |
| time/                   |             |
|    fps                  | 221         |
|    iterations           | 271         |
|    time_elapsed         | 2502        |
|    total_timesteps      | 555008      |
| train/                  |             |
|    approx_kl            | 0.015508184 |
|    clip_fraction        | 0.134       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.653      |
|    explained_variance   | 0.147       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.42        |
|    n_updates            | 2700        |
|    policy_gradient_loss | -0.0372     |
|    value_loss           | 27.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.5       |
|    ep_rew_mean          | 3.34       |
| time/                   |            |
|    fps                  | 223        |
|    iterations           | 281        |
|    time_elapsed         | 2577       |
|    total_timesteps      | 575488     |
| train/                  |            |
|    approx_kl            | 0.05129689 |
|    clip_fraction        | 0.233      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.598     |
|    explained_variance   | 0.538      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.22       |
|    n_updates            | 2800       |
|    policy_gradient_loss | -0.0347    |
|    value_loss           | 3.47       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.7        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.5       |
|    ep_rew_mean          | 3.28       |
| time/                   |            |
|    fps                  | 224        |
|    iterations           | 291        |
|    time_elapsed         | 2649       |
|    total_timesteps      | 595968     |
| train/                  |            |
|    approx_kl            | 0.05673404 |
|    clip_fraction        | 0.131      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.572     |
|    explained_variance   | 0.161      |
|    learning_rate        | 0.0003     |
|    loss                 | 5.58       |
|    n_updates            | 2900       |
|    policy_gradient_loss | -0.0318    |
|    value_loss           | 19.4       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_mean          | 2.24        |
| time/                   |             |
|    fps                  | 226         |
|    iterations           | 301         |
|    time_elapsed         | 2723        |
|    total_timesteps      | 616448      |
| train/                  |             |
|    approx_kl            | 0.047690384 |
|    clip_fraction        | 0.222       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.589      |
|    explained_variance   | 0.612       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.1         |
|    n_updates            | 3000        |
|    policy_gradient_loss | -0.0336     |
|    value_loss           | 2.98        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 2.17        |
| time/                   |             |
|    fps                  | 227         |
|    iterations           | 311         |
|    time_elapsed         | 2798        |
|    total_timesteps      | 636928      |
| train/                  |             |
|    approx_kl            | 0.043390416 |
|    clip_fraction        | 0.201       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.577      |
|    explained_variance   | 0.34        |
|    learning_rate        | 0.0003      |
|    loss                 | 2.63        |
|    n_updates            | 3100        |
|    policy_gradient_loss | -0.0304     |
|    value_loss           | 7.29        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.3    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.1        |
|    ep_rew_mean          | 2.96        |
| time/                   |             |
|    fps                  | 228         |
|    iterations           | 321         |
|    time_elapsed         | 2874        |
|    total_timesteps      | 657408      |
| train/                  |             |
|    approx_kl            | 0.044330467 |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.554      |
|    explained_variance   | 0.632       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.617       |
|    n_updates            | 3200        |
|    policy_gradient_loss | -0.0366     |
|    value_loss           | 3.09        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.9       |
|    ep_rew_mean          | 3.38       |
| time/                   |            |
|    fps                  | 229        |
|    iterations           | 331        |
|    time_elapsed         | 2952       |
|    total_timesteps      | 677888     |
| train/                  |            |
|    approx_kl            | 0.07659775 |
|    clip_fraction        | 0.151      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.558     |
|    explained_variance   | 0.34       |
|    learning_rate        | 0.0003     |
|    loss                 | 11.5       |
|    n_updates            | 3300       |
|    policy_gradient_loss | -0.0307    |
|    value_loss           | 11.6       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9        |
|    ep_rew_mean          | 2.62        |
| time/                   |             |
|    fps                  | 230         |
|    iterations           | 341         |
|    time_elapsed         | 3033        |
|    total_timesteps      | 698368      |
| train/                  |             |
|    approx_kl            | 0.051504686 |
|    clip_fraction        | 0.203       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.508      |
|    explained_variance   | 0.467       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.5         |
|    n_updates            | 3400        |
|    policy_gradient_loss | -0.0303     |
|    value_loss           | 3.6         |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.3    

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.5       |
|    ep_rew_mean          | 2.95       |
| time/                   |            |
|    fps                  | 231        |
|    iterations           | 351        |
|    time_elapsed         | 3111       |
|    total_timesteps      | 718848     |
| train/                  |            |
|    approx_kl            | 0.03482617 |
|    clip_fraction        | 0.194      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.542     |
|    explained_variance   | 0.485      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.75       |
|    n_updates            | 3500       |
|    policy_gradient_loss | -0.031     |
|    value_loss           | 4.76       |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 14.2      |
|    ep_rew_mean   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.7        |
|    ep_rew_mean          | 3.94        |
| time/                   |             |
|    fps                  | 231         |
|    iterations           | 361         |
|    time_elapsed         | 3188        |
|    total_timesteps      | 739328      |
| train/                  |             |
|    approx_kl            | 0.054995157 |
|    clip_fraction        | 0.213       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.537      |
|    explained_variance   | 0.525       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.51        |
|    n_updates            | 3600        |
|    policy_gradient_loss | -0.0331     |
|    value_loss           | 4.23        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7    

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.9       |
|    ep_rew_mean          | 4.64       |
| time/                   |            |
|    fps                  | 232        |
|    iterations           | 371        |
|    time_elapsed         | 3267       |
|    total_timesteps      | 759808     |
| train/                  |            |
|    approx_kl            | 0.03773822 |
|    clip_fraction        | 0.194      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.543     |
|    explained_variance   | 0.565      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.53       |
|    n_updates            | 3700       |
|    policy_gradient_loss | -0.0328    |
|    value_loss           | 3.83       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4       |
|    ep_rew_mean          | 2.9        |
| time/                   |            |
|    fps                  | 233        |
|    iterations           | 381        |
|    time_elapsed         | 3344       |
|    total_timesteps      | 780288     |
| train/                  |            |
|    approx_kl            | 0.03089808 |
|    clip_fraction        | 0.181      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.559     |
|    explained_variance   | 0.29       |
|    learning_rate        | 0.0003     |
|    loss                 | 4.11       |
|    n_updates            | 3800       |
|    policy_gradient_loss | -0.0322    |
|    value_loss           | 8.29       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.8        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 3.26        |
| time/                   |             |
|    fps                  | 233         |
|    iterations           | 391         |
|    time_elapsed         | 3424        |
|    total_timesteps      | 800768      |
| train/                  |             |
|    approx_kl            | 0.050505318 |
|    clip_fraction        | 0.203       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.526      |
|    explained_variance   | 0.215       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.88        |
|    n_updates            | 3900        |
|    policy_gradient_loss | -0.0316     |
|    value_loss           | 9.1         |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.5    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.1        |
|    ep_rew_mean          | 0.36        |
| time/                   |             |
|    fps                  | 234         |
|    iterations           | 401         |
|    time_elapsed         | 3502        |
|    total_timesteps      | 821248      |
| train/                  |             |
|    approx_kl            | 0.052637793 |
|    clip_fraction        | 0.186       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.495      |
|    explained_variance   | 0.481       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.94        |
|    n_updates            | 4000        |
|    policy_gradient_loss | -0.034      |
|    value_loss           | 4.57        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.5    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 3.61        |
| time/                   |             |
|    fps                  | 235         |
|    iterations           | 411         |
|    time_elapsed         | 3581        |
|    total_timesteps      | 841728      |
| train/                  |             |
|    approx_kl            | 0.046361204 |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.511      |
|    explained_variance   | 0.481       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.08        |
|    n_updates            | 4100        |
|    policy_gradient_loss | -0.0307     |
|    value_loss           | 4.23        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.2  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_mean          | 3.11        |
| time/                   |             |
|    fps                  | 235         |
|    iterations           | 421         |
|    time_elapsed         | 3661        |
|    total_timesteps      | 862208      |
| train/                  |             |
|    approx_kl            | 0.046189647 |
|    clip_fraction        | 0.203       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.513      |
|    explained_variance   | 0.363       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.04        |
|    n_updates            | 4200        |
|    policy_gradient_loss | -0.0305     |
|    value_loss           | 3.18        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.8       |
|    ep_rew_mean          | -2.8       |
| time/                   |            |
|    fps                  | 236        |
|    iterations           | 431        |
|    time_elapsed         | 3738       |
|    total_timesteps      | 882688     |
| train/                  |            |
|    approx_kl            | 0.08398853 |
|    clip_fraction        | 0.131      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.481     |
|    explained_variance   | 0.0516     |
|    learning_rate        | 0.0003     |
|    loss                 | 17.9       |
|    n_updates            | 4300       |
|    policy_gradient_loss | -0.0315    |
|    value_loss           | 63.5       |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12        |
|    ep_rew_mean   

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.3       |
|    ep_rew_mean          | 0.61       |
| time/                   |            |
|    fps                  | 236        |
|    iterations           | 441        |
|    time_elapsed         | 3816       |
|    total_timesteps      | 903168     |
| train/                  |            |
|    approx_kl            | 0.04697474 |
|    clip_fraction        | 0.2        |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.474     |
|    explained_variance   | 0.596      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.44       |
|    n_updates            | 4400       |
|    policy_gradient_loss | -0.037     |
|    value_loss           | 3.48       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4       |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 13.1       |
|    ep_rew_mean          | -0.14      |
| time/                   |            |
|    fps                  | 237        |
|    iterations           | 451        |
|    time_elapsed         | 3888       |
|    total_timesteps      | 923648     |
| train/                  |            |
|    approx_kl            | 0.04721887 |
|    clip_fraction        | 0.203      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.459     |
|    explained_variance   | 0.594      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.52       |
|    n_updates            | 4500       |
|    policy_gradient_loss | -0.0336    |
|    value_loss           | 3.38       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4       |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 3.02       |
| time/                   |            |
|    fps                  | 238        |
|    iterations           | 461        |
|    time_elapsed         | 3966       |
|    total_timesteps      | 944128     |
| train/                  |            |
|    approx_kl            | 0.08580052 |
|    clip_fraction        | 0.131      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.469     |
|    explained_variance   | 0.263      |
|    learning_rate        | 0.0003     |
|    loss                 | 3.54       |
|    n_updates            | 4600       |
|    policy_gradient_loss | -0.0324    |
|    value_loss           | 22.2       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.5       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.1        |
|    ep_rew_mean          | 2.77        |
| time/                   |             |
|    fps                  | 238         |
|    iterations           | 471         |
|    time_elapsed         | 4044        |
|    total_timesteps      | 964608      |
| train/                  |             |
|    approx_kl            | 0.038697477 |
|    clip_fraction        | 0.192       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.451      |
|    explained_variance   | 0.593       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.79        |
|    n_updates            | 4700        |
|    policy_gradient_loss | -0.0295     |
|    value_loss           | 3.67        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.1  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_mean          | 2.82        |
| time/                   |             |
|    fps                  | 238         |
|    iterations           | 481         |
|    time_elapsed         | 4125        |
|    total_timesteps      | 985088      |
| train/                  |             |
|    approx_kl            | 0.052983064 |
|    clip_fraction        | 0.149       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.436      |
|    explained_variance   | 0.376       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.73        |
|    n_updates            | 4800        |
|    policy_gradient_loss | -0.0344     |
|    value_loss           | 8           |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 2.62        |
| time/                   |             |
|    fps                  | 239         |
|    iterations           | 491         |
|    time_elapsed         | 4199        |
|    total_timesteps      | 1005568     |
| train/                  |             |
|    approx_kl            | 0.046970688 |
|    clip_fraction        | 0.139       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.443      |
|    explained_variance   | 0.155       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.59        |
|    n_updates            | 4900        |
|    policy_gradient_loss | -0.043      |
|    value_loss           | 17.4        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.3  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_mean          | 2.84        |
| time/                   |             |
|    fps                  | 239         |
|    iterations           | 501         |
|    time_elapsed         | 4280        |
|    total_timesteps      | 1026048     |
| train/                  |             |
|    approx_kl            | 0.014107516 |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.478      |
|    explained_variance   | 0.185       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.91        |
|    n_updates            | 5000        |
|    policy_gradient_loss | -0.0408     |
|    value_loss           | 22.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.9       |
|    ep_rew_mean          | 3.78       |
| time/                   |            |
|    fps                  | 240        |
|    iterations           | 511        |
|    time_elapsed         | 4358       |
|    total_timesteps      | 1046528    |
| train/                  |            |
|    approx_kl            | 0.06588832 |
|    clip_fraction        | 0.157      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.486     |
|    explained_variance   | 0.386      |
|    learning_rate        | 0.0003     |
|    loss                 | 4.05       |
|    n_updates            | 5100       |
|    policy_gradient_loss | 0.00558    |
|    value_loss           | 12         |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 3.82        |
| time/                   |             |
|    fps                  | 240         |
|    iterations           | 521         |
|    time_elapsed         | 4432        |
|    total_timesteps      | 1067008     |
| train/                  |             |
|    approx_kl            | 0.048021946 |
|    clip_fraction        | 0.187       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.441      |
|    explained_variance   | 0.512       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.96        |
|    n_updates            | 5200        |
|    policy_gradient_loss | -0.0307     |
|    value_loss           | 4.86        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.9    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 3.63        |
| time/                   |             |
|    fps                  | 240         |
|    iterations           | 531         |
|    time_elapsed         | 4514        |
|    total_timesteps      | 1087488     |
| train/                  |             |
|    approx_kl            | 0.043923773 |
|    clip_fraction        | 0.192       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.452      |
|    explained_variance   | 0.612       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.1         |
|    n_updates            | 5300        |
|    policy_gradient_loss | -0.0354     |
|    value_loss           | 3.08        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9        |
|    ep_rew_mean          | 4.13        |
| time/                   |             |
|    fps                  | 241         |
|    iterations           | 541         |
|    time_elapsed         | 4593        |
|    total_timesteps      | 1107968     |
| train/                  |             |
|    approx_kl            | 0.051822554 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.486      |
|    explained_variance   | 0.634       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.35        |
|    n_updates            | 5400        |
|    policy_gradient_loss | -0.032      |
|    value_loss           | 3.52        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5        |
|    ep_rew_mean          | 3.28        |
| time/                   |             |
|    fps                  | 241         |
|    iterations           | 551         |
|    time_elapsed         | 4669        |
|    total_timesteps      | 1128448     |
| train/                  |             |
|    approx_kl            | 0.026550438 |
|    clip_fraction        | 0.134       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.498      |
|    explained_variance   | 0.355       |
|    learning_rate        | 0.0003      |
|    loss                 | 23.8        |
|    n_updates            | 5500        |
|    policy_gradient_loss | -0.0347     |
|    value_loss           | 37.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 4.64       |
| time/                   |            |
|    fps                  | 242        |
|    iterations           | 561        |
|    time_elapsed         | 4740       |
|    total_timesteps      | 1148928    |
| train/                  |            |
|    approx_kl            | 0.08418238 |
|    clip_fraction        | 0.26       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.525     |
|    explained_variance   | 0.5        |
|    learning_rate        | 0.0003     |
|    loss                 | 2.12       |
|    n_updates            | 5600       |
|    policy_gradient_loss | -0.0382    |
|    value_loss           | 5.17       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6       |
|    ep_rew_mean          | 4.01       |
| time/                   |            |
|    fps                  | 242        |
|    iterations           | 571        |
|    time_elapsed         | 4821       |
|    total_timesteps      | 1169408    |
| train/                  |            |
|    approx_kl            | 0.04638636 |
|    clip_fraction        | 0.207      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.515     |
|    explained_variance   | 0.512      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.76       |
|    n_updates            | 5700       |
|    policy_gradient_loss | -0.0333    |
|    value_loss           | 4.23       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.5        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9        |
|    ep_rew_mean          | 3.04        |
| time/                   |             |
|    fps                  | 242         |
|    iterations           | 581         |
|    time_elapsed         | 4899        |
|    total_timesteps      | 1189888     |
| train/                  |             |
|    approx_kl            | 0.047040083 |
|    clip_fraction        | 0.2         |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.519      |
|    explained_variance   | 0.534       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.91        |
|    n_updates            | 5800        |
|    policy_gradient_loss | -0.0338     |
|    value_loss           | 4.54        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.1    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.2        |
|    ep_rew_mean          | 4.33        |
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 591         |
|    time_elapsed         | 4977        |
|    total_timesteps      | 1210368     |
| train/                  |             |
|    approx_kl            | 0.048550885 |
|    clip_fraction        | 0.208       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.564      |
|    explained_variance   | 0.332       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.19        |
|    n_updates            | 5900        |
|    policy_gradient_loss | -0.0315     |
|    value_loss           | 7.19        |
-----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.5      

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 3.81       |
| time/                   |            |
|    fps                  | 243        |
|    iterations           | 601        |
|    time_elapsed         | 5053       |
|    total_timesteps      | 1230848    |
| train/                  |            |
|    approx_kl            | 0.06836805 |
|    clip_fraction        | 0.258      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.537     |
|    explained_variance   | 0.459      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.49       |
|    n_updates            | 6000       |
|    policy_gradient_loss | -0.0384    |
|    value_loss           | 4.03       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 15.5        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 3.89       |
| time/                   |            |
|    fps                  | 243        |
|    iterations           | 611        |
|    time_elapsed         | 5128       |
|    total_timesteps      | 1251328    |
| train/                  |            |
|    approx_kl            | 0.04920214 |
|    clip_fraction        | 0.143      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.524     |
|    explained_variance   | 0.218      |
|    learning_rate        | 0.0003     |
|    loss                 | 3.67       |
|    n_updates            | 6100       |
|    policy_gradient_loss | -0.0301    |
|    value_loss           | 16.8       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.8       |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4       |
|    ep_rew_mean          | 3.07       |
| time/                   |            |
|    fps                  | 244        |
|    iterations           | 621        |
|    time_elapsed         | 5202       |
|    total_timesteps      | 1271808    |
| train/                  |            |
|    approx_kl            | 0.06566015 |
|    clip_fraction        | 0.246      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.521     |
|    explained_variance   | 0.498      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.57       |
|    n_updates            | 6200       |
|    policy_gradient_loss | -0.0358    |
|    value_loss           | 3.15       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.7        |
|    ep_rew_mean          | 3.88        |
| time/                   |             |
|    fps                  | 244         |
|    iterations           | 631         |
|    time_elapsed         | 5276        |
|    total_timesteps      | 1292288     |
| train/                  |             |
|    approx_kl            | 0.058890425 |
|    clip_fraction        | 0.228       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.478      |
|    explained_variance   | 0.6         |
|    learning_rate        | 0.0003      |
|    loss                 | 1.28        |
|    n_updates            | 6300        |
|    policy_gradient_loss | -0.0338     |
|    value_loss           | 3.34        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.2       |
|    ep_rew_mean          | 0.84       |
| time/                   |            |
|    fps                  | 245        |
|    iterations           | 641        |
|    time_elapsed         | 5352       |
|    total_timesteps      | 1312768    |
| train/                  |            |
|    approx_kl            | 0.04181625 |
|    clip_fraction        | 0.196      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.449     |
|    explained_variance   | 0.449      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.29       |
|    n_updates            | 6400       |
|    policy_gradient_loss | -0.0333    |
|    value_loss           | 5.23       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.1       |
|    ep_rew_mean          | 3.06       |
| time/                   |            |
|    fps                  | 245        |
|    iterations           | 651        |
|    time_elapsed         | 5422       |
|    total_timesteps      | 1333248    |
| train/                  |            |
|    approx_kl            | 0.08372828 |
|    clip_fraction        | 0.153      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.5       |
|    explained_variance   | 0.283      |
|    learning_rate        | 0.0003     |
|    loss                 | 4.04       |
|    n_updates            | 6500       |
|    policy_gradient_loss | -0.0356    |
|    value_loss           | 15.7       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.9       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 3.81        |
| time/                   |             |
|    fps                  | 246         |
|    iterations           | 661         |
|    time_elapsed         | 5495        |
|    total_timesteps      | 1353728     |
| train/                  |             |
|    approx_kl            | 0.069851175 |
|    clip_fraction        | 0.194       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.485      |
|    explained_variance   | 0.416       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.874       |
|    n_updates            | 6600        |
|    policy_gradient_loss | -0.0345     |
|    value_loss           | 5.24        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.1  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.8       |
|    ep_rew_mean          | 3.62       |
| time/                   |            |
|    fps                  | 246        |
|    iterations           | 671        |
|    time_elapsed         | 5577       |
|    total_timesteps      | 1374208    |
| train/                  |            |
|    approx_kl            | 0.17081559 |
|    clip_fraction        | 0.137      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.537     |
|    explained_variance   | 0.349      |
|    learning_rate        | 0.0003     |
|    loss                 | 16.7       |
|    n_updates            | 6700       |
|    policy_gradient_loss | -0.0408    |
|    value_loss           | 20.6       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 3.56        |
| time/                   |             |
|    fps                  | 246         |
|    iterations           | 681         |
|    time_elapsed         | 5652        |
|    total_timesteps      | 1394688     |
| train/                  |             |
|    approx_kl            | 0.035256036 |
|    clip_fraction        | 0.197       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.521      |
|    explained_variance   | 0.423       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.53        |
|    n_updates            | 6800        |
|    policy_gradient_loss | -0.0351     |
|    value_loss           | 4.63        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 3.52        |
| time/                   |             |
|    fps                  | 246         |
|    iterations           | 691         |
|    time_elapsed         | 5736        |
|    total_timesteps      | 1415168     |
| train/                  |             |
|    approx_kl            | 0.040734254 |
|    clip_fraction        | 0.209       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.531      |
|    explained_variance   | 0.543       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.16        |
|    n_updates            | 6900        |
|    policy_gradient_loss | -0.0353     |
|    value_loss           | 3.12        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6       |
|    ep_rew_mean          | 3.64       |
| time/                   |            |
|    fps                  | 246        |
|    iterations           | 701        |
|    time_elapsed         | 5816       |
|    total_timesteps      | 1435648    |
| train/                  |            |
|    approx_kl            | 0.04796047 |
|    clip_fraction        | 0.211      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.509     |
|    explained_variance   | 0.608      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.27       |
|    n_updates            | 7000       |
|    policy_gradient_loss | -0.0367    |
|    value_loss           | 3.25       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4       |
|    ep_rew_mean

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 11.2     |
|    ep_rew_mean          | 2.78     |
| time/                   |          |
|    fps                  | 247      |
|    iterations           | 711      |
|    time_elapsed         | 5894     |
|    total_timesteps      | 1456128  |
| train/                  |          |
|    approx_kl            | 0.060501 |
|    clip_fraction        | 0.238    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.501   |
|    explained_variance   | 0.587    |
|    learning_rate        | 0.0003   |
|    loss                 | 1.62     |
|    n_updates            | 7100     |
|    policy_gradient_loss | -0.0353  |
|    value_loss           | 3.54     |
--------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 3.87       |
| time/          

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 4.24       |
| time/                   |            |
|    fps                  | 247        |
|    iterations           | 721        |
|    time_elapsed         | 5973       |
|    total_timesteps      | 1476608    |
| train/                  |            |
|    approx_kl            | 0.08933288 |
|    clip_fraction        | 0.168      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.519     |
|    explained_variance   | 0.44       |
|    learning_rate        | 0.0003     |
|    loss                 | 3.79       |
|    n_updates            | 7200       |
|    policy_gradient_loss | -0.0295    |
|    value_loss           | 8.16       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.1        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 4.09        |
| time/                   |             |
|    fps                  | 247         |
|    iterations           | 731         |
|    time_elapsed         | 6050        |
|    total_timesteps      | 1497088     |
| train/                  |             |
|    approx_kl            | 0.052060347 |
|    clip_fraction        | 0.221       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.513      |
|    explained_variance   | 0.67        |
|    learning_rate        | 0.0003      |
|    loss                 | 1.17        |
|    n_updates            | 7300        |
|    policy_gradient_loss | -0.0353     |
|    value_loss           | 2.72        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.1  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 3.91        |
| time/                   |             |
|    fps                  | 247         |
|    iterations           | 741         |
|    time_elapsed         | 6130        |
|    total_timesteps      | 1517568     |
| train/                  |             |
|    approx_kl            | 0.055723336 |
|    clip_fraction        | 0.215       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.547      |
|    explained_variance   | 0.341       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.79        |
|    n_updates            | 7400        |
|    policy_gradient_loss | -0.0114     |
|    value_loss           | 7.4         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.1  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_mean          | 3.79        |
| time/                   |             |
|    fps                  | 247         |
|    iterations           | 751         |
|    time_elapsed         | 6209        |
|    total_timesteps      | 1538048     |
| train/                  |             |
|    approx_kl            | 0.040693603 |
|    clip_fraction        | 0.179       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.528      |
|    explained_variance   | 0.524       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.48        |
|    n_updates            | 7500        |
|    policy_gradient_loss | -0.0332     |
|    value_loss           | 5.58        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4    

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 248       |
|    iterations           | 761       |
|    time_elapsed         | 6266      |
|    total_timesteps      | 1558528   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -9.57e-05 |
|    explained_variance   | -1.19e-07 |
|    learning_rate        | 0.0003    |
|    loss                 | 284       |
|    n_updates            | 7600      |
|    policy_gradient_loss | -3.88e-05 |
|    value_loss           | 792       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 250       |
|    iterations           | 771       |
|    time_elapsed         | 6292      |
|    total_timesteps      | 1579008   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -9.57e-05 |
|    explained_variance   | -1.19e-07 |
|    learning_rate        | 0.0003    |
|    loss                 | 107       |
|    n_updates            | 7700      |
|    policy_gradient_loss | -0.000708 |
|    value_loss           | 318       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 253       |
|    iterations           | 781       |
|    time_elapsed         | 6318      |
|    total_timesteps      | 1599488   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -9.57e-05 |
|    explained_variance   | 0         |
|    learning_rate        | 0.0003    |
|    loss                 | 8.74      |
|    n_updates            | 7800      |
|    policy_gradient_loss | -0.000222 |
|    value_loss           | 56.5      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 255       |
|    iterations           | 791       |
|    time_elapsed         | 6344      |
|    total_timesteps      | 1619968   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -9.64e-05 |
|    explained_variance   | 0         |
|    learning_rate        | 0.0003    |
|    loss                 | 0.00991   |
|    n_updates            | 7900      |
|    policy_gradient_loss | -0.000127 |
|    value_loss           | 0.521     |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 257       |
|    iterations           | 801       |
|    time_elapsed         | 6381      |
|    total_timesteps      | 1640448   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -6.32e-05 |
|    explained_variance   | 2.98e-07  |
|    learning_rate        | 0.0003    |
|    loss                 | 7.21e-05  |
|    n_updates            | 8000      |
|    policy_gradient_loss | -0.000456 |
|    value_loss           | 0.00312   |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 12.2     |
|    ep_rew_mean          | 3.71     |
| time/                   |          |
|    fps                  | 257      |
|    iterations           | 811      |
|    time_elapsed         | 6448     |
|    total_timesteps      | 1660928  |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -8e-05   |
|    explained_variance   | 0.000122 |
|    learning_rate        | 0.0003   |
|    loss                 | 5.77e-06 |
|    n_updates            | 8100     |
|    policy_gradient_loss | 3.1e-05  |
|    value_loss           | 5.86e-05 |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 258       |
|    iterations           | 821       |
|    time_elapsed         | 6498      |
|    total_timesteps      | 1681408   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.23e-05 |
|    explained_variance   | 0.002     |
|    learning_rate        | 0.0003    |
|    loss                 | 4.64e-06  |
|    n_updates            | 8200      |
|    policy_gradient_loss | 7.32e-05  |
|    value_loss           | 1.58e-06  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 259       |
|    iterations           | 831       |
|    time_elapsed         | 6549      |
|    total_timesteps      | 1701888   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.61e-05 |
|    explained_variance   | 0.383     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.32e-06  |
|    n_updates            | 8300      |
|    policy_gradient_loss | -6.09e-05 |
|    value_loss           | 1.21e-07  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 260       |
|    iterations           | 841       |
|    time_elapsed         | 6603      |
|    total_timesteps      | 1722368   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.78e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 1.19e-07  |
|    n_updates            | 8400      |
|    policy_gradient_loss | 0         |
|    value_loss           | 3.6e-07   |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 12.2     |
|    ep_rew_mean          | 3.71     |
| time/                   |          |
|    fps                  | 262      |
|    iterations           | 851      |
|    time_elapsed         | 6646     |
|    total_timesteps      | 1742848  |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -8.8e-05 |
|    explained_variance   | 1        |
|    learning_rate        | 0.0003   |
|    loss                 | 3.39e-07 |
|    n_updates            | 8500     |
|    policy_gradient_loss | 0        |
|    value_loss           | 2.24e-07 |
--------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 12.2     |
|    ep_rew_mean          | 3.71     |
| time/                  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 262       |
|    iterations           | 862       |
|    time_elapsed         | 6713      |
|    total_timesteps      | 1765376   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 1.51e-07  |
|    n_updates            | 8610      |
|    policy_gradient_loss | 0         |
|    value_loss           | 2.34e-07  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 263       |
|    iterations           | 872       |
|    time_elapsed         | 6774      |
|    total_timesteps      | 1785856   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 8710      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 264       |
|    iterations           | 882       |
|    time_elapsed         | 6839      |
|    total_timesteps      | 1806336   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 8810      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 264       |
|    iterations           | 892       |
|    time_elapsed         | 6915      |
|    total_timesteps      | 1826816   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 8910      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 264       |
|    iterations           | 902       |
|    time_elapsed         | 6985      |
|    total_timesteps      | 1847296   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 9010      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 265       |
|    iterations           | 912       |
|    time_elapsed         | 7033      |
|    total_timesteps      | 1867776   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 9110      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 266       |
|    iterations           | 922       |
|    time_elapsed         | 7084      |
|    total_timesteps      | 1888256   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 9210      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 268       |
|    iterations           | 932       |
|    time_elapsed         | 7113      |
|    total_timesteps      | 1908736   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 9310      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 270       |
|    iterations           | 942       |
|    time_elapsed         | 7141      |
|    total_timesteps      | 1929216   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 9410      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 271       |
|    iterations           | 952       |
|    time_elapsed         | 7170      |
|    total_timesteps      | 1949696   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 9510      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 273       |
|    iterations           | 962       |
|    time_elapsed         | 7210      |
|    total_timesteps      | 1970176   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 9610      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 274       |
|    iterations           | 972       |
|    time_elapsed         | 7239      |
|    total_timesteps      | 1990656   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 9710      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 275       |
|    iterations           | 982       |
|    time_elapsed         | 7301      |
|    total_timesteps      | 2011136   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 9810      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 276       |
|    iterations           | 992       |
|    time_elapsed         | 7350      |
|    total_timesteps      | 2031616   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 9910      |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 277       |
|    iterations           | 1002      |
|    time_elapsed         | 7400      |
|    total_timesteps      | 2052096   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 10010     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 277       |
|    iterations           | 1012      |
|    time_elapsed         | 7456      |
|    total_timesteps      | 2072576   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 10110     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 279       |
|    iterations           | 1022      |
|    time_elapsed         | 7498      |
|    total_timesteps      | 2093056   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 10210     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 280       |
|    iterations           | 1032      |
|    time_elapsed         | 7527      |
|    total_timesteps      | 2113536   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 10310     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 281       |
|    iterations           | 1042      |
|    time_elapsed         | 7581      |
|    total_timesteps      | 2134016   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 10410     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 282       |
|    iterations           | 1052      |
|    time_elapsed         | 7623      |
|    total_timesteps      | 2154496   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 10510     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 284       |
|    iterations           | 1062      |
|    time_elapsed         | 7652      |
|    total_timesteps      | 2174976   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 10610     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 285       |
|    iterations           | 1072      |
|    time_elapsed         | 7681      |
|    total_timesteps      | 2195456   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 10710     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 287       |
|    iterations           | 1082      |
|    time_elapsed         | 7709      |
|    total_timesteps      | 2215936   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 10810     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 288       |
|    iterations           | 1092      |
|    time_elapsed         | 7738      |
|    total_timesteps      | 2236416   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 10910     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 290       |
|    iterations           | 1102      |
|    time_elapsed         | 7775      |
|    total_timesteps      | 2256896   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 11010     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 290       |
|    iterations           | 1112      |
|    time_elapsed         | 7837      |
|    total_timesteps      | 2277376   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 11110     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 291       |
|    iterations           | 1122      |
|    time_elapsed         | 7888      |
|    total_timesteps      | 2297856   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 11210     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 292       |
|    iterations           | 1132      |
|    time_elapsed         | 7917      |
|    total_timesteps      | 2318336   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 11310     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 292       |
|    iterations           | 1142      |
|    time_elapsed         | 7985      |
|    total_timesteps      | 2338816   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 11410     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 293       |
|    iterations           | 1152      |
|    time_elapsed         | 8030      |
|    total_timesteps      | 2359296   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 11510     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 294       |
|    iterations           | 1162      |
|    time_elapsed         | 8093      |
|    total_timesteps      | 2379776   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 11610     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 294       |
|    iterations           | 1172      |
|    time_elapsed         | 8157      |
|    total_timesteps      | 2400256   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 11710     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 293       |
|    iterations           | 1182      |
|    time_elapsed         | 8240      |
|    total_timesteps      | 2420736   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 11810     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 295       |
|    iterations           | 1192      |
|    time_elapsed         | 8268      |
|    total_timesteps      | 2441216   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 11910     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 296       |
|    iterations           | 1202      |
|    time_elapsed         | 8296      |
|    total_timesteps      | 2461696   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 12010     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 296       |
|    iterations           | 1212      |
|    time_elapsed         | 8369      |
|    total_timesteps      | 2482176   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 12110     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 296       |
|    iterations           | 1222      |
|    time_elapsed         | 8444      |
|    total_timesteps      | 2502656   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 12210     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 296       |
|    iterations           | 1232      |
|    time_elapsed         | 8523      |
|    total_timesteps      | 2523136   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 12310     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 295       |
|    iterations           | 1242      |
|    time_elapsed         | 8601      |
|    total_timesteps      | 2543616   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 12410     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 296       |
|    iterations           | 1252      |
|    time_elapsed         | 8659      |
|    total_timesteps      | 2564096   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 12510     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 295       |
|    iterations           | 1262      |
|    time_elapsed         | 8731      |
|    total_timesteps      | 2584576   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 12610     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 296       |
|    iterations           | 1272      |
|    time_elapsed         | 8786      |
|    total_timesteps      | 2605056   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 12710     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 296       |
|    iterations           | 1282      |
|    time_elapsed         | 8857      |
|    total_timesteps      | 2625536   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 12810     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 296       |
|    iterations           | 1292      |
|    time_elapsed         | 8925      |
|    total_timesteps      | 2646016   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 12910     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 296       |
|    iterations           | 1302      |
|    time_elapsed         | 8985      |
|    total_timesteps      | 2666496   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 13010     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 298       |
|    iterations           | 1312      |
|    time_elapsed         | 9014      |
|    total_timesteps      | 2686976   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 13110     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 299       |
|    iterations           | 1322      |
|    time_elapsed         | 9049      |
|    total_timesteps      | 2707456   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 13210     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 299       |
|    iterations           | 1332      |
|    time_elapsed         | 9116      |
|    total_timesteps      | 2727936   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 13310     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 299       |
|    iterations           | 1342      |
|    time_elapsed         | 9163      |
|    total_timesteps      | 2748416   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 13410     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 300       |
|    iterations           | 1352      |
|    time_elapsed         | 9211      |
|    total_timesteps      | 2768896   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 13510     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 300       |
|    iterations           | 1362      |
|    time_elapsed         | 9289      |
|    total_timesteps      | 2789376   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 13610     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 300       |
|    iterations           | 1372      |
|    time_elapsed         | 9345      |
|    total_timesteps      | 2809856   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 13710     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 301       |
|    iterations           | 1382      |
|    time_elapsed         | 9386      |
|    total_timesteps      | 2830336   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 13810     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 301       |
|    iterations           | 1392      |
|    time_elapsed         | 9445      |
|    total_timesteps      | 2850816   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 13910     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 302       |
|    iterations           | 1402      |
|    time_elapsed         | 9494      |
|    total_timesteps      | 2871296   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 14010     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 303       |
|    iterations           | 1412      |
|    time_elapsed         | 9531      |
|    total_timesteps      | 2891776   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 14110     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 303       |
|    iterations           | 1422      |
|    time_elapsed         | 9581      |
|    total_timesteps      | 2912256   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 14210     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 304       |
|    iterations           | 1432      |
|    time_elapsed         | 9645      |
|    total_timesteps      | 2932736   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 14310     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 303       |
|    iterations           | 1442      |
|    time_elapsed         | 9726      |
|    total_timesteps      | 2953216   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 14410     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 304       |
|    iterations           | 1452      |
|    time_elapsed         | 9779      |
|    total_timesteps      | 2973696   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 14510     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 304       |
|    iterations           | 1462      |
|    time_elapsed         | 9826      |
|    total_timesteps      | 2994176   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 14610     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 305       |
|    iterations           | 1472      |
|    time_elapsed         | 9880      |
|    total_timesteps      | 3014656   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 14710     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 305       |
|    iterations           | 1482      |
|    time_elapsed         | 9943      |
|    total_timesteps      | 3035136   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 14810     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 305       |
|    iterations           | 1492      |
|    time_elapsed         | 9991      |
|    total_timesteps      | 3055616   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 14910     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 306       |
|    iterations           | 1502      |
|    time_elapsed         | 10032     |
|    total_timesteps      | 3076096   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 15010     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 307       |
|    iterations           | 1512      |
|    time_elapsed         | 10079     |
|    total_timesteps      | 3096576   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 15110     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 307       |
|    iterations           | 1522      |
|    time_elapsed         | 10126     |
|    total_timesteps      | 3117056   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 15210     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 308       |
|    iterations           | 1532      |
|    time_elapsed         | 10173     |
|    total_timesteps      | 3137536   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 15310     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 309       |
|    iterations           | 1542      |
|    time_elapsed         | 10214     |
|    total_timesteps      | 3158016   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 15410     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 309       |
|    iterations           | 1552      |
|    time_elapsed         | 10253     |
|    total_timesteps      | 3178496   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 15510     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 310       |
|    iterations           | 1562      |
|    time_elapsed         | 10306     |
|    total_timesteps      | 3198976   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 15610     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 310       |
|    iterations           | 1572      |
|    time_elapsed         | 10358     |
|    total_timesteps      | 3219456   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 15710     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 310       |
|    iterations           | 1582      |
|    time_elapsed         | 10425     |
|    total_timesteps      | 3239936   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 15810     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 310       |
|    iterations           | 1592      |
|    time_elapsed         | 10491     |
|    total_timesteps      | 3260416   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 15910     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 310       |
|    iterations           | 1602      |
|    time_elapsed         | 10557     |
|    total_timesteps      | 3280896   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 16010     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 1612      |
|    time_elapsed         | 10611     |
|    total_timesteps      | 3301376   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 16110     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 1622      |
|    time_elapsed         | 10665     |
|    total_timesteps      | 3321856   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 16210     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 1632      |
|    time_elapsed         | 10722     |
|    total_timesteps      | 3342336   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 16310     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 1642      |
|    time_elapsed         | 10781     |
|    total_timesteps      | 3362816   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 16410     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 1652      |
|    time_elapsed         | 10859     |
|    total_timesteps      | 3383296   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 16510     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 1662      |
|    time_elapsed         | 10934     |
|    total_timesteps      | 3403776   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 16610     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 1672      |
|    time_elapsed         | 10988     |
|    total_timesteps      | 3424256   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 16710     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 1682      |
|    time_elapsed         | 11048     |
|    total_timesteps      | 3444736   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 16810     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 1692      |
|    time_elapsed         | 11128     |
|    total_timesteps      | 3465216   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 16910     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 1702      |
|    time_elapsed         | 11182     |
|    total_timesteps      | 3485696   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 17010     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 310       |
|    iterations           | 1712      |
|    time_elapsed         | 11277     |
|    total_timesteps      | 3506176   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 17110     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 310       |
|    iterations           | 1722      |
|    time_elapsed         | 11342     |
|    total_timesteps      | 3526656   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 17210     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |
| time/                   |           |
|    fps                  | 311       |
|    iterations           | 1732      |
|    time_elapsed         | 11393     |
|    total_timesteps      | 3547136   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -8.83e-05 |
|    explained_variance   | 1         |
|    learning_rate        | 0.0003    |
|    loss                 | 0         |
|    n_updates            | 17310     |
|    policy_gradient_loss | 0         |
|    value_loss           | 0         |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.2      |
|    ep_rew_mean          | 3.71      |


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_mean          | 3.48        |
| time/                   |             |
|    fps                  | 311         |
|    iterations           | 1742        |
|    time_elapsed         | 11471       |
|    total_timesteps      | 3567616     |
| train/                  |             |
|    approx_kl            | 0.108294815 |
|    clip_fraction        | 0.308       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.54       |
|    explained_variance   | 0.235       |
|    learning_rate        | 0.0003      |
|    loss                 | 1           |
|    n_updates            | 17410       |
|    policy_gradient_loss | -0.0325     |
|    value_loss           | 4.08        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.3    

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 11.3     |
|    ep_rew_mean          | 3.36     |
| time/                   |          |
|    fps                  | 310      |
|    iterations           | 1752     |
|    time_elapsed         | 11552    |
|    total_timesteps      | 3588096  |
| train/                  |          |
|    approx_kl            | 5.280429 |
|    clip_fraction        | 0.333    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.174   |
|    explained_variance   | 0.159    |
|    learning_rate        | 0.0003   |
|    loss                 | 2.96     |
|    n_updates            | 17510    |
|    policy_gradient_loss | -0.0296  |
|    value_loss           | 154      |
--------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.3       |
|    ep_rew_mean          | 3.63       |
| time/          

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.5       |
|    ep_rew_mean          | 3.73       |
| time/                   |            |
|    fps                  | 310        |
|    iterations           | 1762       |
|    time_elapsed         | 11639      |
|    total_timesteps      | 3608576    |
| train/                  |            |
|    approx_kl            | 0.06457423 |
|    clip_fraction        | 0.217      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.447     |
|    explained_variance   | 0.649      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.84       |
|    n_updates            | 17610      |
|    policy_gradient_loss | -0.0347    |
|    value_loss           | 3.38       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.7        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5        |
|    ep_rew_mean          | 3.64        |
| time/                   |             |
|    fps                  | 309         |
|    iterations           | 1772        |
|    time_elapsed         | 11723       |
|    total_timesteps      | 3629056     |
| train/                  |             |
|    approx_kl            | 0.019703645 |
|    clip_fraction        | 0.133       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.451      |
|    explained_variance   | 0.139       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.21        |
|    n_updates            | 17710       |
|    policy_gradient_loss | -0.049      |
|    value_loss           | 32.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4       |
|    ep_rew_mean          | 3.54       |
| time/                   |            |
|    fps                  | 309        |
|    iterations           | 1782       |
|    time_elapsed         | 11806      |
|    total_timesteps      | 3649536    |
| train/                  |            |
|    approx_kl            | 0.04521876 |
|    clip_fraction        | 0.135      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.514     |
|    explained_variance   | 0.228      |
|    learning_rate        | 0.0003     |
|    loss                 | 5.73       |
|    n_updates            | 17810      |
|    policy_gradient_loss | -0.0483    |
|    value_loss           | 19.2       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 17.4       |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 19         |
|    ep_rew_mean          | -18.1      |
| time/                   |            |
|    fps                  | 308        |
|    iterations           | 1792       |
|    time_elapsed         | 11879      |
|    total_timesteps      | 3670016    |
| train/                  |            |
|    approx_kl            | 0.06931151 |
|    clip_fraction        | 0.187      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.488     |
|    explained_variance   | 0.457      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.8        |
|    n_updates            | 17910      |
|    policy_gradient_loss | -0.03      |
|    value_loss           | 6.6        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.2        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.9        |
|    ep_rew_mean          | 1.51        |
| time/                   |             |
|    fps                  | 308         |
|    iterations           | 1802        |
|    time_elapsed         | 11961       |
|    total_timesteps      | 3690496     |
| train/                  |             |
|    approx_kl            | 0.036235075 |
|    clip_fraction        | 0.152       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.452      |
|    explained_variance   | 0.407       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.21        |
|    n_updates            | 18010       |
|    policy_gradient_loss | -0.0327     |
|    value_loss           | 6.72        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 3.76        |
| time/                   |             |
|    fps                  | 308         |
|    iterations           | 1812        |
|    time_elapsed         | 12039       |
|    total_timesteps      | 3710976     |
| train/                  |             |
|    approx_kl            | 0.055117227 |
|    clip_fraction        | 0.174       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.435      |
|    explained_variance   | 0.454       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.42        |
|    n_updates            | 18110       |
|    policy_gradient_loss | -0.031      |
|    value_loss           | 6.7         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.44        |
| time/                   |             |
|    fps                  | 307         |
|    iterations           | 1822        |
|    time_elapsed         | 12120       |
|    total_timesteps      | 3731456     |
| train/                  |             |
|    approx_kl            | 0.056971014 |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.414      |
|    explained_variance   | 0.453       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.47        |
|    n_updates            | 18210       |
|    policy_gradient_loss | -0.0275     |
|    value_loss           | 4.56        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 3.84        |
| time/                   |             |
|    fps                  | 307         |
|    iterations           | 1832        |
|    time_elapsed         | 12198       |
|    total_timesteps      | 3751936     |
| train/                  |             |
|    approx_kl            | 0.031408507 |
|    clip_fraction        | 0.183       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.443      |
|    explained_variance   | 0.467       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.11        |
|    n_updates            | 18310       |
|    policy_gradient_loss | -0.034      |
|    value_loss           | 6.81        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.7  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_mean          | 3.54        |
| time/                   |             |
|    fps                  | 307         |
|    iterations           | 1842        |
|    time_elapsed         | 12269       |
|    total_timesteps      | 3772416     |
| train/                  |             |
|    approx_kl            | 0.059323676 |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.407      |
|    explained_variance   | -0.181      |
|    learning_rate        | 0.0003      |
|    loss                 | 1.2         |
|    n_updates            | 18410       |
|    policy_gradient_loss | -0.0225     |
|    value_loss           | 3.98        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.1        |
|    ep_rew_mean          | 3.01        |
| time/                   |             |
|    fps                  | 306         |
|    iterations           | 1852        |
|    time_elapsed         | 12356       |
|    total_timesteps      | 3792896     |
| train/                  |             |
|    approx_kl            | 0.040935554 |
|    clip_fraction        | 0.179       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.407      |
|    explained_variance   | 0.488       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.05        |
|    n_updates            | 18510       |
|    policy_gradient_loss | -0.0202     |
|    value_loss           | 5.48        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 4.04       |
| time/                   |            |
|    fps                  | 306        |
|    iterations           | 1862       |
|    time_elapsed         | 12425      |
|    total_timesteps      | 3813376    |
| train/                  |            |
|    approx_kl            | 0.04569357 |
|    clip_fraction        | 0.178      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.377     |
|    explained_variance   | 0.613      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.51       |
|    n_updates            | 18610      |
|    policy_gradient_loss | -0.0262    |
|    value_loss           | 3.53       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5        |
|    ep_rew_mean          | 2.75        |
| time/                   |             |
|    fps                  | 306         |
|    iterations           | 1872        |
|    time_elapsed         | 12496       |
|    total_timesteps      | 3833856     |
| train/                  |             |
|    approx_kl            | 0.050355844 |
|    clip_fraction        | 0.187       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.434      |
|    explained_variance   | 0.392       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.46        |
|    n_updates            | 18710       |
|    policy_gradient_loss | -0.0322     |
|    value_loss           | 6.77        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_mean          | 2.41        |
| time/                   |             |
|    fps                  | 306         |
|    iterations           | 1882        |
|    time_elapsed         | 12573       |
|    total_timesteps      | 3854336     |
| train/                  |             |
|    approx_kl            | 0.050462417 |
|    clip_fraction        | 0.17        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.441      |
|    explained_variance   | 0.558       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.7         |
|    n_updates            | 18810       |
|    policy_gradient_loss | -0.0289     |
|    value_loss           | 7.5         |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.3    

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.2      |
|    ep_rew_mean          | 3.22      |
| time/                   |           |
|    fps                  | 306       |
|    iterations           | 1892      |
|    time_elapsed         | 12656     |
|    total_timesteps      | 3874816   |
| train/                  |           |
|    approx_kl            | 0.3299641 |
|    clip_fraction        | 0.147     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.43     |
|    explained_variance   | 0.314     |
|    learning_rate        | 0.0003    |
|    loss                 | 2.91      |
|    n_updates            | 18910     |
|    policy_gradient_loss | -0.0293   |
|    value_loss           | 9.36      |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 4.14  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.7        |
|    ep_rew_mean          | 3.19        |
| time/                   |             |
|    fps                  | 305         |
|    iterations           | 1902        |
|    time_elapsed         | 12734       |
|    total_timesteps      | 3895296     |
| train/                  |             |
|    approx_kl            | 0.047239453 |
|    clip_fraction        | 0.204       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.446      |
|    explained_variance   | 0.551       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.63        |
|    n_updates            | 19010       |
|    policy_gradient_loss | -0.031      |
|    value_loss           | 4.71        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.1    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 3.63        |
| time/                   |             |
|    fps                  | 305         |
|    iterations           | 1912        |
|    time_elapsed         | 12817       |
|    total_timesteps      | 3915776     |
| train/                  |             |
|    approx_kl            | 0.035967506 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.458      |
|    explained_variance   | 0.402       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.51        |
|    n_updates            | 19110       |
|    policy_gradient_loss | -0.0309     |
|    value_loss           | 5.76        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.1  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 4.16       |
| time/                   |            |
|    fps                  | 305        |
|    iterations           | 1922       |
|    time_elapsed         | 12901      |
|    total_timesteps      | 3936256    |
| train/                  |            |
|    approx_kl            | 0.30051586 |
|    clip_fraction        | 0.146      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.416     |
|    explained_variance   | 0.124      |
|    learning_rate        | 0.0003     |
|    loss                 | 4.98       |
|    n_updates            | 19210      |
|    policy_gradient_loss | -0.0383    |
|    value_loss           | 77.5       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.8       |
|    ep_rew_mean          | 4.27       |
| time/                   |            |
|    fps                  | 304        |
|    iterations           | 1932       |
|    time_elapsed         | 12982      |
|    total_timesteps      | 3956736    |
| train/                  |            |
|    approx_kl            | 0.02777314 |
|    clip_fraction        | 0.166      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.483     |
|    explained_variance   | -0.279     |
|    learning_rate        | 0.0003     |
|    loss                 | 1.43       |
|    n_updates            | 19310      |
|    policy_gradient_loss | -0.0353    |
|    value_loss           | 13.6       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.8       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.7        |
|    ep_rew_mean          | 4.2         |
| time/                   |             |
|    fps                  | 304         |
|    iterations           | 1942        |
|    time_elapsed         | 13051       |
|    total_timesteps      | 3977216     |
| train/                  |             |
|    approx_kl            | 0.020326637 |
|    clip_fraction        | 0.028       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0622     |
|    explained_variance   | 0.156       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.41        |
|    n_updates            | 19410       |
|    policy_gradient_loss | -0.0132     |
|    value_loss           | 140         |
-----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 60.2      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_mean          | 3.17        |
| time/                   |             |
|    fps                  | 304         |
|    iterations           | 1952        |
|    time_elapsed         | 13130       |
|    total_timesteps      | 3997696     |
| train/                  |             |
|    approx_kl            | 0.065819845 |
|    clip_fraction        | 0.222       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.454      |
|    explained_variance   | 0.476       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.39        |
|    n_updates            | 19510       |
|    policy_gradient_loss | -0.0385     |
|    value_loss           | 4.01        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 12.4     |
|    ep_rew_mean          | 3.67     |
| time/                   |          |
|    fps                  | 304      |
|    iterations           | 1962     |
|    time_elapsed         | 13200    |
|    total_timesteps      | 4018176  |
| train/                  |          |
|    approx_kl            | 2.350192 |
|    clip_fraction        | 0.341    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.34    |
|    explained_variance   | 0.113    |
|    learning_rate        | 0.0003   |
|    loss                 | 22.9     |
|    n_updates            | 19610    |
|    policy_gradient_loss | -0.0465  |
|    value_loss           | 217      |
--------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.2       |
|    ep_rew_mean          | 2.75       |
| time/          

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.5       |
|    ep_rew_mean          | 3.54       |
| time/                   |            |
|    fps                  | 303        |
|    iterations           | 1972       |
|    time_elapsed         | 13287      |
|    total_timesteps      | 4038656    |
| train/                  |            |
|    approx_kl            | 0.01670219 |
|    clip_fraction        | 0.147      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.464     |
|    explained_variance   | 0.0758     |
|    learning_rate        | 0.0003     |
|    loss                 | 7.84       |
|    n_updates            | 19710      |
|    policy_gradient_loss | -0.05      |
|    value_loss           | 36.8       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_mean          | 4.41        |
| time/                   |             |
|    fps                  | 303         |
|    iterations           | 1982        |
|    time_elapsed         | 13366       |
|    total_timesteps      | 4059136     |
| train/                  |             |
|    approx_kl            | 0.059467092 |
|    clip_fraction        | 0.207       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.434      |
|    explained_variance   | 0.474       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.85        |
|    n_updates            | 19810       |
|    policy_gradient_loss | -0.0335     |
|    value_loss           | 4.33        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9        |
|    ep_rew_mean          | 3.97        |
| time/                   |             |
|    fps                  | 303         |
|    iterations           | 1992        |
|    time_elapsed         | 13448       |
|    total_timesteps      | 4079616     |
| train/                  |             |
|    approx_kl            | 0.042150646 |
|    clip_fraction        | 0.2         |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.471      |
|    explained_variance   | 0.284       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.24        |
|    n_updates            | 19910       |
|    policy_gradient_loss | -0.0311     |
|    value_loss           | 8.05        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 13      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_mean          | 4.12        |
| time/                   |             |
|    fps                  | 302         |
|    iterations           | 2002        |
|    time_elapsed         | 13534       |
|    total_timesteps      | 4100096     |
| train/                  |             |
|    approx_kl            | 0.062356558 |
|    clip_fraction        | 0.223       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.484      |
|    explained_variance   | 0.598       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.45        |
|    n_updates            | 20010       |
|    policy_gradient_loss | -0.0339     |
|    value_loss           | 3.28        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.9       |
|    ep_rew_mean          | 2.67       |
| time/                   |            |
|    fps                  | 302        |
|    iterations           | 2012       |
|    time_elapsed         | 13613      |
|    total_timesteps      | 4120576    |
| train/                  |            |
|    approx_kl            | 0.04858879 |
|    clip_fraction        | 0.195      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.5       |
|    explained_variance   | 0.653      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.984      |
|    n_updates            | 20110      |
|    policy_gradient_loss | -0.0353    |
|    value_loss           | 3.38       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.7        |
|    ep_rew_mean          | 3.63        |
| time/                   |             |
|    fps                  | 302         |
|    iterations           | 2022        |
|    time_elapsed         | 13690       |
|    total_timesteps      | 4141056     |
| train/                  |             |
|    approx_kl            | 0.044669963 |
|    clip_fraction        | 0.2         |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.516      |
|    explained_variance   | 0.636       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.66        |
|    n_updates            | 20210       |
|    policy_gradient_loss | -0.031      |
|    value_loss           | 3.48        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.9       |
|    ep_rew_mean          | -0.55      |
| time/                   |            |
|    fps                  | 302        |
|    iterations           | 2032       |
|    time_elapsed         | 13769      |
|    total_timesteps      | 4161536    |
| train/                  |            |
|    approx_kl            | 0.06832617 |
|    clip_fraction        | 0.162      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.473     |
|    explained_variance   | 0.231      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.29       |
|    n_updates            | 20310      |
|    policy_gradient_loss | -0.033     |
|    value_loss           | 14.5       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean          | 4.03       |
| time/                   |            |
|    fps                  | 301        |
|    iterations           | 2042       |
|    time_elapsed         | 13854      |
|    total_timesteps      | 4182016    |
| train/                  |            |
|    approx_kl            | 0.03420283 |
|    clip_fraction        | 0.158      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.502     |
|    explained_variance   | 0.32       |
|    learning_rate        | 0.0003     |
|    loss                 | 1.2        |
|    n_updates            | 20410      |
|    policy_gradient_loss | -0.0177    |
|    value_loss           | 7.43       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.7        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.5       |
|    ep_rew_mean          | 2.87       |
| time/                   |            |
|    fps                  | 301        |
|    iterations           | 2052       |
|    time_elapsed         | 13942      |
|    total_timesteps      | 4202496    |
| train/                  |            |
|    approx_kl            | 0.03296191 |
|    clip_fraction        | 0.163      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.534     |
|    explained_variance   | 0.394      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.06       |
|    n_updates            | 20510      |
|    policy_gradient_loss | -0.0326    |
|    value_loss           | 7.81       |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.9      |
|    ep_rew_mean   

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.5       |
|    ep_rew_mean          | 2.97       |
| time/                   |            |
|    fps                  | 301        |
|    iterations           | 2062       |
|    time_elapsed         | 14021      |
|    total_timesteps      | 4222976    |
| train/                  |            |
|    approx_kl            | 0.04987877 |
|    clip_fraction        | 0.183      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.5       |
|    explained_variance   | 0.475      |
|    learning_rate        | 0.0003     |
|    loss                 | 4.11       |
|    n_updates            | 20610      |
|    policy_gradient_loss | -0.0316    |
|    value_loss           | 5.89       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_m

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 11.6     |
|    ep_rew_mean          | 4.03     |
| time/                   |          |
|    fps                  | 300      |
|    iterations           | 2072     |
|    time_elapsed         | 14103    |
|    total_timesteps      | 4243456  |
| train/                  |          |
|    approx_kl            | 0.071099 |
|    clip_fraction        | 0.231    |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.421   |
|    explained_variance   | 0.474    |
|    learning_rate        | 0.0003   |
|    loss                 | 1.34     |
|    n_updates            | 20710    |
|    policy_gradient_loss | -0.0295  |
|    value_loss           | 3.74     |
--------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 2.89       |
| time/          

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9        |
|    ep_rew_mean          | 2.83        |
| time/                   |             |
|    fps                  | 300         |
|    iterations           | 2082        |
|    time_elapsed         | 14173       |
|    total_timesteps      | 4263936     |
| train/                  |             |
|    approx_kl            | 0.000292012 |
|    clip_fraction        | 0.00186     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0298     |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 1.18        |
|    n_updates            | 20810       |
|    policy_gradient_loss | 0.000165    |
|    value_loss           | 27.1        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 11.9

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 11.9     |
|    ep_rew_mean          | 2.83     |
| time/                   |          |
|    fps                  | 301      |
|    iterations           | 2092     |
|    time_elapsed         | 14218    |
|    total_timesteps      | 4284416  |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.00199 |
|    explained_variance   | 1.19e-07 |
|    learning_rate        | 0.0003   |
|    loss                 | 0.000326 |
|    n_updates            | 20910    |
|    policy_gradient_loss | -0.00297 |
|    value_loss           | 0.352    |
--------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 11.9     |
|    ep_rew_mean          | 2.83     |
| time/                  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.9      |
|    ep_rew_mean          | 2.83      |
| time/                   |           |
|    fps                  | 301       |
|    iterations           | 2102      |
|    time_elapsed         | 14266     |
|    total_timesteps      | 4304896   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.000777 |
|    explained_variance   | 2.38e-07  |
|    learning_rate        | 0.0003    |
|    loss                 | 0.000256  |
|    n_updates            | 21010     |
|    policy_gradient_loss | -0.00042  |
|    value_loss           | 0.00532   |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.9      |
|    ep_rew_mean          | 2.83      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.9      |
|    ep_rew_mean          | 2.83      |
| time/                   |           |
|    fps                  | 301       |
|    iterations           | 2112      |
|    time_elapsed         | 14330     |
|    total_timesteps      | 4325376   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.000944 |
|    explained_variance   | 2.15e-06  |
|    learning_rate        | 0.0003    |
|    loss                 | 1.12e-05  |
|    n_updates            | 21110     |
|    policy_gradient_loss | 5.34e-05  |
|    value_loss           | 0.000125  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.9      |
|    ep_rew_mean          | 2.83      |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 11.9     |
|    ep_rew_mean          | 2.83     |
| time/                   |          |
|    fps                  | 302      |
|    iterations           | 2122     |
|    time_elapsed         | 14389    |
|    total_timesteps      | 4345856  |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -5.2e-05 |
|    explained_variance   | 0.000195 |
|    learning_rate        | 0.0003   |
|    loss                 | 0.000189 |
|    n_updates            | 21210    |
|    policy_gradient_loss | 0.000561 |
|    value_loss           | 4.27e-06 |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.9      |
|    ep_rew_mean          | 2.83      |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.9      |
|    ep_rew_mean          | 2.83      |
| time/                   |           |
|    fps                  | 302       |
|    iterations           | 2132      |
|    time_elapsed         | 14418     |
|    total_timesteps      | 4366336   |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -5.19e-05 |
|    explained_variance   | 0.0148    |
|    learning_rate        | 0.0003    |
|    loss                 | 9.13e-07  |
|    n_updates            | 21310     |
|    policy_gradient_loss | -0.000118 |
|    value_loss           | 3.45e-07  |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.9      |
|    ep_rew_mean          | 2.83      |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.8      |
|    ep_rew_mean          | 3.6       |
| time/                   |           |
|    fps                  | 302       |
|    iterations           | 2142      |
|    time_elapsed         | 14486     |
|    total_timesteps      | 4386816   |
| train/                  |           |
|    approx_kl            | 0.0272311 |
|    clip_fraction        | 0.219     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.479    |
|    explained_variance   | 0.0136    |
|    learning_rate        | 0.0003    |
|    loss                 | 4.28      |
|    n_updates            | 21410     |
|    policy_gradient_loss | -0.0611   |
|    value_loss           | 52.7      |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.1        |
|    ep_rew_mean          | 3.32  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 11.4     |
|    ep_rew_mean          | 3.58     |
| time/                   |          |
|    fps                  | 302      |
|    iterations           | 2152     |
|    time_elapsed         | 14560    |
|    total_timesteps      | 4407296  |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.00104 |
|    explained_variance   | 0        |
|    learning_rate        | 0.0003   |
|    loss                 | 0.184    |
|    n_updates            | 21510    |
|    policy_gradient_loss | 2.29e-05 |
|    value_loss           | 69.8     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.4      |
|    ep_rew_mean          | 3.58      |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.7      |
|    ep_rew_mean          | 3.73      |
| time/                   |           |
|    fps                  | 302       |
|    iterations           | 2162      |
|    time_elapsed         | 14619     |
|    total_timesteps      | 4427776   |
| train/                  |           |
|    approx_kl            | 2.8626695 |
|    clip_fraction        | 0.204     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.495    |
|    explained_variance   | 0.975     |
|    learning_rate        | 0.0003    |
|    loss                 | 183       |
|    n_updates            | 21610     |
|    policy_gradient_loss | -0.0253   |
|    value_loss           | 303       |
---------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6       |
|    ep_rew_mean          | 3.22     

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6       |
|    ep_rew_mean          | 2.28       |
| time/                   |            |
|    fps                  | 302        |
|    iterations           | 2172       |
|    time_elapsed         | 14701      |
|    total_timesteps      | 4448256    |
| train/                  |            |
|    approx_kl            | 0.09577763 |
|    clip_fraction        | 0.183      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.516     |
|    explained_variance   | 0.126      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.65       |
|    n_updates            | 21710      |
|    policy_gradient_loss | -0.0543    |
|    value_loss           | 57         |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.7       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 3.84        |
| time/                   |             |
|    fps                  | 302         |
|    iterations           | 2182        |
|    time_elapsed         | 14784       |
|    total_timesteps      | 4468736     |
| train/                  |             |
|    approx_kl            | 0.054871667 |
|    clip_fraction        | 0.15        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.468      |
|    explained_variance   | -0.329      |
|    learning_rate        | 0.0003      |
|    loss                 | 11.1        |
|    n_updates            | 21810       |
|    policy_gradient_loss | -0.0443     |
|    value_loss           | 42.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 12       |
|    ep_rew_mean          | 3.71     |
| time/                   |          |
|    fps                  | 302      |
|    iterations           | 2192     |
|    time_elapsed         | 14846    |
|    total_timesteps      | 4489216  |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -0.00244 |
|    explained_variance   | 1.19e-07 |
|    learning_rate        | 0.0003   |
|    loss                 | 0.159    |
|    n_updates            | 21910    |
|    policy_gradient_loss | -0.00103 |
|    value_loss           | 27.8     |
--------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 3           |
| time/      

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.9       |
|    ep_rew_mean          | 1.84       |
| time/                   |            |
|    fps                  | 302        |
|    iterations           | 2202       |
|    time_elapsed         | 14932      |
|    total_timesteps      | 4509696    |
| train/                  |            |
|    approx_kl            | 0.41625258 |
|    clip_fraction        | 0.165      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.437     |
|    explained_variance   | 0.182      |
|    learning_rate        | 0.0003     |
|    loss                 | 19.8       |
|    n_updates            | 22010      |
|    policy_gradient_loss | -0.0508    |
|    value_loss           | 26.5       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.2       |
|    ep_rew_mean          | 3.25       |
| time/                   |            |
|    fps                  | 301        |
|    iterations           | 2212       |
|    time_elapsed         | 15019      |
|    total_timesteps      | 4530176    |
| train/                  |            |
|    approx_kl            | 0.04956068 |
|    clip_fraction        | 0.212      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.458     |
|    explained_variance   | 0.633      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.67       |
|    n_updates            | 22110      |
|    policy_gradient_loss | -0.0345    |
|    value_loss           | 3.85       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_mean          | 3.41        |
| time/                   |             |
|    fps                  | 301         |
|    iterations           | 2222        |
|    time_elapsed         | 15097       |
|    total_timesteps      | 4550656     |
| train/                  |             |
|    approx_kl            | 0.052712586 |
|    clip_fraction        | 0.201       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.48       |
|    explained_variance   | 0.198       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.5         |
|    n_updates            | 22210       |
|    policy_gradient_loss | -0.0336     |
|    value_loss           | 4.38        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.9       |
|    ep_rew_mean          | 4.42       |
| time/                   |            |
|    fps                  | 301        |
|    iterations           | 2232       |
|    time_elapsed         | 15179      |
|    total_timesteps      | 4571136    |
| train/                  |            |
|    approx_kl            | 0.02690301 |
|    clip_fraction        | 0.175      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.534     |
|    explained_variance   | 0.355      |
|    learning_rate        | 0.0003     |
|    loss                 | 5.49       |
|    n_updates            | 22310      |
|    policy_gradient_loss | -0.0339    |
|    value_loss           | 12.1       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.3       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_mean          | 3.99        |
| time/                   |             |
|    fps                  | 300         |
|    iterations           | 2242        |
|    time_elapsed         | 15258       |
|    total_timesteps      | 4591616     |
| train/                  |             |
|    approx_kl            | 0.041660003 |
|    clip_fraction        | 0.168       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.531      |
|    explained_variance   | 0.271       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.06        |
|    n_updates            | 22410       |
|    policy_gradient_loss | -0.0237     |
|    value_loss           | 10.8        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.8    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 3.65        |
| time/                   |             |
|    fps                  | 300         |
|    iterations           | 2252        |
|    time_elapsed         | 15339       |
|    total_timesteps      | 4612096     |
| train/                  |             |
|    approx_kl            | 0.056893267 |
|    clip_fraction        | 0.216       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.485      |
|    explained_variance   | 0.58        |
|    learning_rate        | 0.0003      |
|    loss                 | 1.54        |
|    n_updates            | 22510       |
|    policy_gradient_loss | -0.0332     |
|    value_loss           | 3.72        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.7  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6       |
|    ep_rew_mean          | 3.38       |
| time/                   |            |
|    fps                  | 300        |
|    iterations           | 2262       |
|    time_elapsed         | 15418      |
|    total_timesteps      | 4632576    |
| train/                  |            |
|    approx_kl            | 0.06755428 |
|    clip_fraction        | 0.182      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.453     |
|    explained_variance   | 0.327      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.897      |
|    n_updates            | 22610      |
|    policy_gradient_loss | -0.0282    |
|    value_loss           | 4.3        |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.5       |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.4       |
|    ep_rew_mean          | 2.68       |
| time/                   |            |
|    fps                  | 300        |
|    iterations           | 2272       |
|    time_elapsed         | 15502      |
|    total_timesteps      | 4653056    |
| train/                  |            |
|    approx_kl            | 0.05675859 |
|    clip_fraction        | 0.237      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.462     |
|    explained_variance   | 0.512      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.29       |
|    n_updates            | 22710      |
|    policy_gradient_loss | -0.033     |
|    value_loss           | 3.7        |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.3       |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 13.8       |
|    ep_rew_mean          | -3.9       |
| time/                   |            |
|    fps                  | 300        |
|    iterations           | 2282       |
|    time_elapsed         | 15576      |
|    total_timesteps      | 4673536    |
| train/                  |            |
|    approx_kl            | 0.04648214 |
|    clip_fraction        | 0.129      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.506     |
|    explained_variance   | 0.43       |
|    learning_rate        | 0.0003     |
|    loss                 | 14.3       |
|    n_updates            | 22810      |
|    policy_gradient_loss | -0.0352    |
|    value_loss           | 19.2       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_mean          | 3.45        |
| time/                   |             |
|    fps                  | 299         |
|    iterations           | 2292        |
|    time_elapsed         | 15659       |
|    total_timesteps      | 4694016     |
| train/                  |             |
|    approx_kl            | 0.063317835 |
|    clip_fraction        | 0.208       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.445      |
|    explained_variance   | 0.596       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.25        |
|    n_updates            | 22910       |
|    policy_gradient_loss | -0.0362     |
|    value_loss           | 4.27        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 20.2  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 3.57        |
| time/                   |             |
|    fps                  | 299         |
|    iterations           | 2302        |
|    time_elapsed         | 15735       |
|    total_timesteps      | 4714496     |
| train/                  |             |
|    approx_kl            | 0.109514035 |
|    clip_fraction        | 0.278       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.506      |
|    explained_variance   | 0.217       |
|    learning_rate        | 0.0003      |
|    loss                 | 14.4        |
|    n_updates            | 23010       |
|    policy_gradient_loss | -0.076      |
|    value_loss           | 57.2        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6    

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.7      |
|    ep_rew_mean          | 3.62      |
| time/                   |           |
|    fps                  | 299       |
|    iterations           | 2312      |
|    time_elapsed         | 15823     |
|    total_timesteps      | 4734976   |
| train/                  |           |
|    approx_kl            | 0.3895925 |
|    clip_fraction        | 0.17      |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.467    |
|    explained_variance   | 0.181     |
|    learning_rate        | 0.0003    |
|    loss                 | 16.6      |
|    n_updates            | 23110     |
|    policy_gradient_loss | -0.0504   |
|    value_loss           | 33.9      |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 1.84  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 3.91        |
| time/                   |             |
|    fps                  | 299         |
|    iterations           | 2322        |
|    time_elapsed         | 15895       |
|    total_timesteps      | 4755456     |
| train/                  |             |
|    approx_kl            | 0.050202597 |
|    clip_fraction        | 0.195       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.398      |
|    explained_variance   | 0.615       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.892       |
|    n_updates            | 23210       |
|    policy_gradient_loss | -0.0288     |
|    value_loss           | 3.35        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.7        |
|    ep_rew_mean          | 4.29        |
| time/                   |             |
|    fps                  | 299         |
|    iterations           | 2332        |
|    time_elapsed         | 15965       |
|    total_timesteps      | 4775936     |
| train/                  |             |
|    approx_kl            | 0.060091347 |
|    clip_fraction        | 0.212       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.401      |
|    explained_variance   | 0.573       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.34        |
|    n_updates            | 23310       |
|    policy_gradient_loss | -0.0336     |
|    value_loss           | 3.93        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.9    

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.3       |
|    ep_rew_mean          | 3.48       |
| time/                   |            |
|    fps                  | 298        |
|    iterations           | 2342       |
|    time_elapsed         | 16045      |
|    total_timesteps      | 4796416    |
| train/                  |            |
|    approx_kl            | 0.05167114 |
|    clip_fraction        | 0.174      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.41      |
|    explained_variance   | 0.441      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.54       |
|    n_updates            | 23410      |
|    policy_gradient_loss | -0.0292    |
|    value_loss           | 5.17       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 3.33        |
| time/                   |             |
|    fps                  | 298         |
|    iterations           | 2352        |
|    time_elapsed         | 16130       |
|    total_timesteps      | 4816896     |
| train/                  |             |
|    approx_kl            | 0.063465804 |
|    clip_fraction        | 0.192       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.423      |
|    explained_variance   | 0.608       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.21        |
|    n_updates            | 23510       |
|    policy_gradient_loss | -0.0328     |
|    value_loss           | 4.05        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5        |
|    ep_rew_mean          | 3.54        |
| time/                   |             |
|    fps                  | 298         |
|    iterations           | 2362        |
|    time_elapsed         | 16212       |
|    total_timesteps      | 4837376     |
| train/                  |             |
|    approx_kl            | 0.060376573 |
|    clip_fraction        | 0.209       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.43       |
|    explained_variance   | 0.649       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.85        |
|    n_updates            | 23610       |
|    policy_gradient_loss | -0.032      |
|    value_loss           | 3.58        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.8       |
|    ep_rew_mean          | 3.17       |
| time/                   |            |
|    fps                  | 298        |
|    iterations           | 2372       |
|    time_elapsed         | 16291      |
|    total_timesteps      | 4857856    |
| train/                  |            |
|    approx_kl            | 0.06781397 |
|    clip_fraction        | 0.216      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.412     |
|    explained_variance   | 0.629      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.12       |
|    n_updates            | 23710      |
|    policy_gradient_loss | -0.0349    |
|    value_loss           | 3          |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6       |
|    ep_rew_mean          | 3.34       |
| time/                   |            |
|    fps                  | 297        |
|    iterations           | 2382       |
|    time_elapsed         | 16376      |
|    total_timesteps      | 4878336    |
| train/                  |            |
|    approx_kl            | 0.08009713 |
|    clip_fraction        | 0.233      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.443     |
|    explained_variance   | 0.396      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.44       |
|    n_updates            | 23810      |
|    policy_gradient_loss | -0.0282    |
|    value_loss           | 3.64       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.4        |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.2       |
|    ep_rew_mean          | 2.91       |
| time/                   |            |
|    fps                  | 297        |
|    iterations           | 2392       |
|    time_elapsed         | 16463      |
|    total_timesteps      | 4898816    |
| train/                  |            |
|    approx_kl            | 0.11316824 |
|    clip_fraction        | 0.12       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.444     |
|    explained_variance   | 0.0154     |
|    learning_rate        | 0.0003     |
|    loss                 | 23.7       |
|    n_updates            | 23910      |
|    policy_gradient_loss | -0.0399    |
|    value_loss           | 21.1       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.2       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 2.03        |
| time/                   |             |
|    fps                  | 297         |
|    iterations           | 2402        |
|    time_elapsed         | 16547       |
|    total_timesteps      | 4919296     |
| train/                  |             |
|    approx_kl            | 0.061530545 |
|    clip_fraction        | 0.21        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.428      |
|    explained_variance   | 0.552       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.42        |
|    n_updates            | 24010       |
|    policy_gradient_loss | -0.0332     |
|    value_loss           | 3.72        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.1        |
|    ep_rew_mean          | 1.15        |
| time/                   |             |
|    fps                  | 296         |
|    iterations           | 2412        |
|    time_elapsed         | 16632       |
|    total_timesteps      | 4939776     |
| train/                  |             |
|    approx_kl            | 0.110925585 |
|    clip_fraction        | 0.134       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.453      |
|    explained_variance   | 0.169       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.66        |
|    n_updates            | 24110       |
|    policy_gradient_loss | -0.0372     |
|    value_loss           | 18.8        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.4    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.6        |
|    ep_rew_mean          | 4.11        |
| time/                   |             |
|    fps                  | 296         |
|    iterations           | 2422        |
|    time_elapsed         | 16718       |
|    total_timesteps      | 4960256     |
| train/                  |             |
|    approx_kl            | 0.059543915 |
|    clip_fraction        | 0.201       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.447      |
|    explained_variance   | 0.124       |
|    learning_rate        | 0.0003      |
|    loss                 | 36.1        |
|    n_updates            | 24210       |
|    policy_gradient_loss | -0.057      |
|    value_loss           | 97.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6       |
|    ep_rew_mean          | 2.8        |
| time/                   |            |
|    fps                  | 296        |
|    iterations           | 2432       |
|    time_elapsed         | 16798      |
|    total_timesteps      | 4980736    |
| train/                  |            |
|    approx_kl            | 0.06294669 |
|    clip_fraction        | 0.213      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.444     |
|    explained_variance   | 0.634      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.941      |
|    n_updates            | 24310      |
|    policy_gradient_loss | -0.0347    |
|    value_loss           | 3.02       |
----------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 16.4     |
|    ep_rew_mean      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_mean          | 3.43        |
| time/                   |             |
|    fps                  | 296         |
|    iterations           | 2442        |
|    time_elapsed         | 16877       |
|    total_timesteps      | 5001216     |
| train/                  |             |
|    approx_kl            | 0.028265957 |
|    clip_fraction        | 0.146       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.471      |
|    explained_variance   | 0.243       |
|    learning_rate        | 0.0003      |
|    loss                 | 10.2        |
|    n_updates            | 24410       |
|    policy_gradient_loss | -0.0314     |
|    value_loss           | 12.4        |
-----------------------------------------


<stable_baselines3.ppo.ppo.PPO at 0x734ca15123d0>

In [17]:
model.save("ppo_5mil")

In [66]:
model_1 = PPO.load("ppo_5mil",env =env1)
percentage,mean_reward = accuracy_fn(model_1,env1)
print("Success rate:",percentage)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  0%|          | 0/10000 [00:00<?, ?it/s]

Success rate: 0.0


I actually trained it for 5 million timesteps and random words too. It didn't have the desired accuracy on even the training set either.

#### Approach 2
We now think about using imitation learning to guess the word. I have used the following [github repository](https://github.com/YAPhoa/HangmanKeras.git) for reference. The changes i made to the code given there is make the network in pytorch instead of keras which decreased the training time by more than 10 times. I also made minor changes to the classes to decrease the training time even further

In [113]:
class Hangman:
    def __init__(self, words, max_tries = 6 ) :
        self.words = words
        self.max_tries = max_tries
        
    def reset(self) :
        self.tries_remaining = self.max_tries
        self.word = np.random.choice(self.words)
        self.obscurred_word = [27 for i in range(len(self.word))]
        self.guessed = set()
        self.correct_guesses = 0
        return self.obscurred_word
        
    def step(self, letter) :
        if letter not in self.guessed :
            self.guessed.add(letter)
        else :
            return self.obscurred_word, False, {}

        if letter in self.word :
            for i in range(len(self.word)) :
                if letter == self.word[i] :
                    self.obscurred_word[i] = ord(letter)-ord('a')+1
                    self.correct_guesses += 1
            if self.correct_guesses == len(self.word):
                return self.obscurred_word,True, {'ans' : self.word}
            else :                
                return self.obscurred_word,False,{}
        else :
            self.tries_remaining -= 1
            if self.tries_remaining == 0 :
                return self.obscurred_word,True, {'ans' : self.word}
            else :
                return self.obscurred_word,False,{}

In [114]:
from warnings import filterwarnings

filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from tensorflow.keras.utils import pad_sequences

In [117]:
class StateEmbedding(nn.Module):
    def __init__(self, maxlen=29):
        super(StateEmbedding, self).__init__()
        self.embedding = nn.Embedding(30, 100, padding_idx=0)
        self.lstm1 = nn.LSTM(100, 100, num_layers=2,batch_first=True, bidirectional=True,dropout=0.2,bias=False)
        self.drp = nn.Dropout(0.2)
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.dense = nn.Linear(200, 100)
    
    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm1(x)
        x = self.drp(x)
        x = x.permute(0, 2, 1)
        x = self.pool(x).squeeze(2)
        x = torch.tanh(self.dense(x))
        return x

class GuessedEmbedding(nn.Module):
    def __init__(self):
        super(GuessedEmbedding, self).__init__()
        self.dense1 = nn.Linear(26, 60)
        self.dense2 = nn.Linear(60, 60)
    
    def forward(self, x):
        x = torch.tanh(self.dense1(x))
        x = torch.tanh(self.dense2(x))
        return x

class Network(nn.Module):
    def __init__(self, maxlen=29):
        super(Network, self).__init__()
        self.state_embedding = StateEmbedding(maxlen)
        self.guessed_embedding = GuessedEmbedding()
        self.dense1 = nn.Linear(160, 100)
        self.dense2 = nn.Linear(100, 26)
        self.compile()
    
    def forward(self, state, guessed):
        state = torch.tensor(state, dtype=torch.long)
        guessed = torch.tensor(guessed, dtype=torch.float)
        state_emb = self.state_embedding(state)
        guessed_emb = self.guessed_embedding(guessed)
        x = torch.cat((state_emb, guessed_emb), dim=1)
        x = torch.tanh(self.dense1(x))
        x = self.dense2(x)
        return x

    def compile(self, lr=1e-3):
        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        self.loss_fn = nn.CrossEntropyLoss()
    
    def save(self, path='model.pth'):
        torch.save(self.state_dict(), path)
    
    def load(self, path='model.pth'):
        self.load_state_dict(torch.load(path))

class Agent(object) :

    def __init__(self, model) :
        self.guessed = set()
        self.model = model

    def select_action(self,state) :            
        probs = self.get_probs(state)
        i = 1
        sorted_probs = probs.argsort()
        while chr(sorted_probs[-i]+ord('a')) in self.guessed :
            i+= 1
        idx = sorted_probs[-i]
        guess = chr(idx+ord('a'))
        if guess not in self.guessed :
            self.guessed.add(guess)
        return guess

class NNAgent(Agent) :
    def __init__(self, model) :
        super().__init__(model)
        self.episode_memory = []
        self.states_history = []

    def train_model(self):
        inp_1, inp_2, obj = zip(*self.states_history)
        inp_1 = np.vstack(list(inp_1)).astype(float)
        inp_2 = np.vstack(list(inp_2)).astype(float)
        obj = np.vstack(list(obj)).astype(float)
        self.model.train()
        inp_1 = torch.tensor(inp_1,dtype=torch.long)
        inp_2 = torch.tensor(inp_2,dtype=torch.float)
        obj = torch.tensor(obj,dtype=torch.float)
        y = self.model(inp_1,inp_2)
        loss = self.model.loss_fn(y,obj)
        self.model.optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1)
        self.model.optimizer.step()
        self.states_history = []
        return loss

    def get_probs(self, state) :
        state = np.array(state).reshape(1,len(state))
        state = pad_sequences(state, maxlen = 29).reshape(1,29)
        guessed = np.array([1 if chr(i+ord('a')) in self.guessed else 0 for i in range(26)]).reshape(1,26)
        self.episode_memory.append((state,guessed))
        probs = self.model(state,guessed)
        probs = (torch.softmax(probs,dim=1)).squeeze()
        return probs

    def finalize_episode(self, answer) :
        inp_1, inp_2 = zip(*self.episode_memory)
        inp_1 = np.vstack(list(inp_1)).astype(float)      #stack the game state matrix
        inp_2 = np.vstack(list(inp_2)).astype(float)      #stack the one hot-encoded guessed matrix
        obj = 1.0 - inp_2                                 #compute the unused letters one-hot encoded
        len_ep = len(self.episode_memory)                 #length of episode
        correct_mask = np.array([[1 if chr(i+ord('a')) in answer else 0 for i in range(26)]]) # get mask from correct answer
        correct_mask = np.repeat(correct_mask, len_ep, axis = 0).astype(float)
        obj = correct_mask * obj  #the correct action is choosing the letters that are both unused AND exist in the word
        obj /= obj.sum(axis = 1).reshape(-1,1) #normalize so it sums to one
        self.states_history.append((inp_1, inp_2,obj))
        self.episode_memory = []
        self.guessed = set()

In [118]:
policy_net = Network()
player = NNAgent(policy_net)

save_episode = 500
view_episode = 100
update_episode = 5
avg_correct = 0
wins_avg = 0
n_trials = 20000
progbar = tqdm(range(n_trials))

""" 
Warmup Phase
Since the network can only collect data by interacting with the environment, 
to collect more data max_lives parameter or the maximum number of trial before game over is set 
slightly extra than the actual game parameter so episodes are longer. 
"""

game_params = {'max_tries' : 8}
env = Hangman(word_list, **game_params)

print('Training Start ...', end = '\n\n')

for episode_set in progbar :
    for _ in range(update_episode) :
        state = env.reset()
        done = False
        correct_count = 0
        while not done :
            guess = player.select_action(state)
            state, done, ans = env.step(guess)
        if env.correct_guesses == len(env.word):
            wins_avg += 1.0
        player.finalize_episode(ans['ans'])
        avg_correct += correct_count
    loss = player.train_model()
    progbar.set_description("Loss : {:.3f}              ".format(loss))

    if (episode_set +1) % view_episode == 0 :
        views = (episode_set + 1,avg_correct/(update_episode*view_episode), view_episode*update_episode, wins_avg/(update_episode*view_episode))
        print('Episode {} -------- Average Correct Count : {:.3f}     Last {} winrate : {:.3f}'.format(*views))
        if loss is not None :
            print('Loss :', loss)
            print()
            avg_correct = 0
            wins_avg = 0

    if (episode_set +1) % save_episode == 0 :
        player.model.save('model.pth')

print()
game_params = {'max_tries' : 6}
env = Hangman(word_list, **game_params)

n_trials = 70000
progbar = tqdm(range(n_trials))

for episode_set in progbar :
    for _ in range(update_episode) :
        total_reward = 0    
        state = env.reset()
        done = False
        correct_count = 0
        while not done :
            guess = player.select_action(state)
            state, done, ans = env.step(guess)
        if env.correct_guesses == len(env.word):
            wins_avg += 1.0
        player.finalize_episode(ans['ans'])
        avg_correct += correct_count
    loss = player.train_model()
    progbar.set_description("Loss : {:.3f}              ".format(loss))

    if (episode_set +1) % view_episode == 0 :
        views = (episode_set + 1,avg_correct/(update_episode*view_episode), view_episode, wins_avg/(update_episode*view_episode))
        print('Episode {} -------- Average Correct Count : {:.3f}     Last {} winrate : {:.3f}'.format(*views))
        if loss is not None :
            print('Loss :', loss)
            print()
            avg_correct = 0
            wins_avg = 0

    if (episode_set +1) % save_episode == 0 :
        player.model.save('model.pth')

  0%|          | 0/20000 [00:00<?, ?it/s]

Training Start ...



KeyboardInterrupt: 

In [87]:
guessed_letters = set()
policy = Network()
policy.load_state_dict(torch.load("model.pth"))
policy.eval()

def guess(self, word):
    len_word = len(word)
    state = np.array([ord(c)-ord('a')+1 if c != "_" else 27 for c in word])
    state = (pad_sequences([state],maxlen=29)).reshape(1,29)
    guessed = np.array([1 if chr(i+ord('a')) in self.guessed_letters else 0 for i in range(26)]).reshape(1,26)
    probs = torch.softmax(self.policy(state,guessed),dim=1).squeeze()
    i = 1
    sorted_probs = probs.argsort()
    while chr(sorted_probs[-i]+ord('a')) in self.guessed_letters :
        i+= 1
    idx = sorted_probs[-i]
    guess = chr(idx+ord('a'))
    guessed_letters.add(guess)
    return guess