# 1. Setup Mario

In [1]:
!pip install gym_super_mario_bros==7.3.0 nes_py

Defaulting to user installation because normal site-packages is not writeable




In [2]:
# Game
import gym_super_mario_bros
# Joypad
from nes_py.wrappers import JoypadSpace
# Controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [3]:
SIMPLE_MOVEMENT

[['NOOP'],
 ['right'],
 ['right', 'A'],
 ['right', 'B'],
 ['right', 'A', 'B'],
 ['A'],
 ['left']]

In [4]:
# Setup game
env = gym_super_mario_bros.make('SuperMarioBros-v0')
# Simplify movement variations (action_space)
env = JoypadSpace(env, SIMPLE_MOVEMENT)

In [5]:
# Flag
done = True

# Loop through each frame
for step in range(100):
    if done:
        # Start the game
        env.reset()
    # Do random actions
    state, reward, done, info = env.step(env.action_space.sample())
    # Show the game on the screen
    env.render()
env.close()



# 2. Preprocess Environment

In [6]:
!pip install torch torchvision torchaudio
# PPO for RL
!pip install stable-baselines3[extra]

Defaulting to user installation because normal site-packages is not writeable




Defaulting to user installation because normal site-packages is not writeable




In [7]:
# Frame Stacker Wrapper and Grayscaling Wrapper
# Framestack: Capture last couple of frames while Mario is playing
# GrayscaleObservation: Shave our data since it does not deal with no coloring, making our model faster
from gym.wrappers import GrayScaleObservation
# Vectorization Wrappers
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
# Plot the impact of framestacking
from matplotlib import pyplot as plt

In [8]:
# Create the base env 
env = gym_super_mario_bros.make('SuperMarioBros-v0')
# Simplify movement controls
env = JoypadSpace(env, SIMPLE_MOVEMENT)
# Grayscale the env
env = GrayScaleObservation(env, keep_dim=True)
# Wrap inside the dummy env
env = DummyVecEnv([lambda: env])
# Stack the frames
# Find how many frames to take in as you desire that works best
env = VecFrameStack(env, 4, channels_order='last')

In [9]:
# state = env.reset()
# plt.imshow(state)

In [23]:
env.reset()

array([[[[  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         ...,
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140]],

        [[  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         ...,
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140]],

        [[  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         ...,
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140]],

        ...,

        [[  0,   0,   0, 214],
         [  0,   0,   0, 124],
         [  0,   0,   0, 124],
         ...,
         [  0,   0,   0, 124],
         [  0,   0,   0, 124],
         [  0,   0,   0,   0]],

        [[  0,   0,   0, 214],
         [  0,   0,   0, 124],
         [  0,   0,   0, 124],
         ...,
         [  0,   0,   0, 124],
         [  0,   0,   0,   0],
         

In [24]:
state, reward, done, info = env.step([5])

In [12]:
#plt.figure(figsize=(20,16))
#for idx in range(state.shape[3]):
 #   plt.subplot(1,4,idx+1)
 #   plt.imshow(state[0][:,:,idx])
#plt.show()

# 3. Train the RL Model

In [9]:
# AREA: Agent, Reward, Environment, Action
# PPO: Proximal Policy Optimization
# OS for file path management
import os
# PPO classifier
from stable_baselines3 import PPO
# Base Callback for saving models
from stable_baselines3.common.callbacks import BaseCallback

In [10]:
# Save our model every x number of steps
class TrainAndLoggingCallback(BaseCallback):
    
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
            
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)
            
        return True

In [11]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [12]:
callback = TrainAndLoggingCallback(check_freq=1000000, save_path=CHECKPOINT_DIR)

In [20]:
# RL requires a policy network (such as the CNN that is the brain of the AI)
# CnnPolicy is fast at processing images, which fits our use case
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, n_steps=512)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [18]:
# Train the model
# Every single game gets a million frames
#model.learn(total_timesteps=4000000, callback=callback)

In [19]:
# No callback: model.save('name_of_file')
# To view the logs on tensorboard
# Activate the env on this dir and go into log dir (go into most recent ppo model)
#tensorboard --logdir=.

# 4. Test the Model

In [25]:
# Load the model
model = PPO.load('./train/best_model_1000000', env)

Wrapping the env in a VecTransposeImage.


In [None]:
# Continue training the model
model.learn(total_timesteps=4000000, callback=callback)

Logging to ./logs/PPO_12
----------------------------
| time/              |     |
|    fps             | 94  |
|    iterations      | 1   |
|    time_elapsed    | 5   |
|    total_timesteps | 512 |
----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 14           |
|    iterations           | 2            |
|    time_elapsed         | 69           |
|    total_timesteps      | 1024         |
| train/                  |              |
|    approx_kl            | 0.0006724793 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.263       |
|    explained_variance   | 0.907        |
|    learning_rate        | 1e-06        |
|    loss                 | 65.3         |
|    n_updates            | 19540        |
|    policy_gradient_loss | -0.000268    |
|    value_loss           | 157          |
-----------------------------------------

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 13            |
|    time_elapsed         | 796           |
|    total_timesteps      | 6656          |
| train/                  |               |
|    approx_kl            | 1.0414282e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.329        |
|    explained_variance   | 0.732         |
|    learning_rate        | 1e-06         |
|    loss                 | 332           |
|    n_updates            | 19650         |
|    policy_gradient_loss | 0.000355      |
|    value_loss           | 630           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 14           |
|    time_elapsed         | 863     

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 24           |
|    time_elapsed         | 1498         |
|    total_timesteps      | 12288        |
| train/                  |              |
|    approx_kl            | 0.0012308394 |
|    clip_fraction        | 0.00117      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.279       |
|    explained_variance   | 0.949        |
|    learning_rate        | 1e-06        |
|    loss                 | 44.7         |
|    n_updates            | 19760        |
|    policy_gradient_loss | -0.000854    |
|    value_loss           | 121          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 25            |
|    time_elapsed         | 1560          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 35            |
|    time_elapsed         | 2175          |
|    total_timesteps      | 17920         |
| train/                  |               |
|    approx_kl            | 0.00064398337 |
|    clip_fraction        | 0.000781      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.629        |
|    explained_variance   | 0.794         |
|    learning_rate        | 1e-06         |
|    loss                 | 329           |
|    n_updates            | 19870         |
|    policy_gradient_loss | -0.000659     |
|    value_loss           | 837           |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 36          |
|    time_elapsed         | 2232        

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 46            |
|    time_elapsed         | 2831          |
|    total_timesteps      | 23552         |
| train/                  |               |
|    approx_kl            | 0.00088433805 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.55         |
|    explained_variance   | -0.589        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.13          |
|    n_updates            | 19980         |
|    policy_gradient_loss | -0.00173      |
|    value_loss           | 0.321         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 47           |
|    time_elapsed         | 2897    

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 57            |
|    time_elapsed         | 3534          |
|    total_timesteps      | 29184         |
| train/                  |               |
|    approx_kl            | 8.6271204e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.238        |
|    explained_variance   | 0.575         |
|    learning_rate        | 1e-06         |
|    loss                 | 480           |
|    n_updates            | 20090         |
|    policy_gradient_loss | 9.19e-05      |
|    value_loss           | 519           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 58            |
|    time_elapsed         | 3598

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 68            |
|    time_elapsed         | 4238          |
|    total_timesteps      | 34816         |
| train/                  |               |
|    approx_kl            | 0.00021529733 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.219        |
|    explained_variance   | 0.393         |
|    learning_rate        | 1e-06         |
|    loss                 | 171           |
|    n_updates            | 20200         |
|    policy_gradient_loss | -0.00128      |
|    value_loss           | 754           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 69            |
|    time_elapsed         | 4304

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 79            |
|    time_elapsed         | 4946          |
|    total_timesteps      | 40448         |
| train/                  |               |
|    approx_kl            | 0.00029968773 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.317        |
|    explained_variance   | 0.716         |
|    learning_rate        | 1e-06         |
|    loss                 | 186           |
|    n_updates            | 20310         |
|    policy_gradient_loss | -0.000285     |
|    value_loss           | 659           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 80            |
|    time_elapsed         | 5009

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 90            |
|    time_elapsed         | 5665          |
|    total_timesteps      | 46080         |
| train/                  |               |
|    approx_kl            | 6.6404464e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.286        |
|    explained_variance   | 0.875         |
|    learning_rate        | 1e-06         |
|    loss                 | 134           |
|    n_updates            | 20420         |
|    policy_gradient_loss | 5.1e-06       |
|    value_loss           | 279           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 91           |
|    time_elapsed         | 5728    

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 101           |
|    time_elapsed         | 6377          |
|    total_timesteps      | 51712         |
| train/                  |               |
|    approx_kl            | 0.00026247406 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.184        |
|    explained_variance   | 0.748         |
|    learning_rate        | 1e-06         |
|    loss                 | 101           |
|    n_updates            | 20530         |
|    policy_gradient_loss | -0.00022      |
|    value_loss           | 226           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 102          |
|    time_elapsed         | 6443    

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 112           |
|    time_elapsed         | 7096          |
|    total_timesteps      | 57344         |
| train/                  |               |
|    approx_kl            | 2.4961191e-05 |
|    clip_fraction        | 0.000195      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.188        |
|    explained_variance   | 0.622         |
|    learning_rate        | 1e-06         |
|    loss                 | 762           |
|    n_updates            | 20640         |
|    policy_gradient_loss | 0.000209      |
|    value_loss           | 765           |
-------------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 8         |
|    iterations           | 113       |
|    time_elapsed         | 7158      |
|    tot

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 123          |
|    time_elapsed         | 7791         |
|    total_timesteps      | 62976        |
| train/                  |              |
|    approx_kl            | 7.768173e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.232       |
|    explained_variance   | 0.843        |
|    learning_rate        | 1e-06        |
|    loss                 | 288          |
|    n_updates            | 20750        |
|    policy_gradient_loss | 2.39e-06     |
|    value_loss           | 445          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 124           |
|    time_elapsed         | 7857          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 134          |
|    time_elapsed         | 8486         |
|    total_timesteps      | 68608        |
| train/                  |              |
|    approx_kl            | 0.0007664586 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.368       |
|    explained_variance   | 0.791        |
|    learning_rate        | 1e-06        |
|    loss                 | 156          |
|    n_updates            | 20860        |
|    policy_gradient_loss | -0.000312    |
|    value_loss           | 349          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 135          |
|    time_elapsed         | 8547         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 145          |
|    time_elapsed         | 9161         |
|    total_timesteps      | 74240        |
| train/                  |              |
|    approx_kl            | 0.0013127903 |
|    clip_fraction        | 0.0127       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.372       |
|    explained_variance   | 0.888        |
|    learning_rate        | 1e-06        |
|    loss                 | 117          |
|    n_updates            | 20970        |
|    policy_gradient_loss | -0.00214     |
|    value_loss           | 214          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 146           |
|    time_elapsed         | 9223          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 156           |
|    time_elapsed         | 9837          |
|    total_timesteps      | 79872         |
| train/                  |               |
|    approx_kl            | 0.00017941534 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.263        |
|    explained_variance   | 0.692         |
|    learning_rate        | 1e-06         |
|    loss                 | 138           |
|    n_updates            | 21080         |
|    policy_gradient_loss | -5.27e-06     |
|    value_loss           | 267           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 157           |
|    time_elapsed         | 9898

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 167          |
|    time_elapsed         | 10522        |
|    total_timesteps      | 85504        |
| train/                  |              |
|    approx_kl            | 0.0027281456 |
|    clip_fraction        | 0.0154       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.295       |
|    explained_variance   | 0.689        |
|    learning_rate        | 1e-06        |
|    loss                 | 171          |
|    n_updates            | 21190        |
|    policy_gradient_loss | -0.00192     |
|    value_loss           | 269          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 168          |
|    time_elapsed         | 10582        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 178           |
|    time_elapsed         | 11187         |
|    total_timesteps      | 91136         |
| train/                  |               |
|    approx_kl            | 0.00038782496 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.241        |
|    explained_variance   | 0.755         |
|    learning_rate        | 1e-06         |
|    loss                 | 237           |
|    n_updates            | 21300         |
|    policy_gradient_loss | -0.000393     |
|    value_loss           | 356           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 179          |
|    time_elapsed         | 11247   

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 189           |
|    time_elapsed         | 11840         |
|    total_timesteps      | 96768         |
| train/                  |               |
|    approx_kl            | 0.00016859511 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.202        |
|    explained_variance   | 0.757         |
|    learning_rate        | 1e-06         |
|    loss                 | 172           |
|    n_updates            | 21410         |
|    policy_gradient_loss | -0.000228     |
|    value_loss           | 493           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 190           |
|    time_elapsed         | 1190

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 200           |
|    time_elapsed         | 12492         |
|    total_timesteps      | 102400        |
| train/                  |               |
|    approx_kl            | 0.00043125055 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.257        |
|    explained_variance   | 0.624         |
|    learning_rate        | 1e-06         |
|    loss                 | 231           |
|    n_updates            | 21520         |
|    policy_gradient_loss | -0.000325     |
|    value_loss           | 492           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 201           |
|    time_elapsed         | 1255

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 211           |
|    time_elapsed         | 13144         |
|    total_timesteps      | 108032        |
| train/                  |               |
|    approx_kl            | 0.00014650647 |
|    clip_fraction        | 0.000195      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.282        |
|    explained_variance   | 0.704         |
|    learning_rate        | 1e-06         |
|    loss                 | 119           |
|    n_updates            | 21630         |
|    policy_gradient_loss | -2.44e-05     |
|    value_loss           | 643           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 212          |
|    time_elapsed         | 13203   

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 222           |
|    time_elapsed         | 13796         |
|    total_timesteps      | 113664        |
| train/                  |               |
|    approx_kl            | 0.00012817024 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.287        |
|    explained_variance   | 0.579         |
|    learning_rate        | 1e-06         |
|    loss                 | 482           |
|    n_updates            | 21740         |
|    policy_gradient_loss | 0.000123      |
|    value_loss           | 772           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 223          |
|    time_elapsed         | 13855   

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 233           |
|    time_elapsed         | 14446         |
|    total_timesteps      | 119296        |
| train/                  |               |
|    approx_kl            | 0.00019459426 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.297        |
|    explained_variance   | 0.872         |
|    learning_rate        | 1e-06         |
|    loss                 | 88.5          |
|    n_updates            | 21850         |
|    policy_gradient_loss | -0.0002       |
|    value_loss           | 200           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 234          |
|    time_elapsed         | 14506   

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 244          |
|    time_elapsed         | 15102        |
|    total_timesteps      | 124928       |
| train/                  |              |
|    approx_kl            | 0.0012570795 |
|    clip_fraction        | 0.0152       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.227       |
|    explained_variance   | 0.418        |
|    learning_rate        | 1e-06        |
|    loss                 | 202          |
|    n_updates            | 21960        |
|    policy_gradient_loss | -0.00153     |
|    value_loss           | 797          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 245           |
|    time_elapsed         | 15162         |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 255           |
|    time_elapsed         | 15774         |
|    total_timesteps      | 130560        |
| train/                  |               |
|    approx_kl            | 1.3630488e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.224        |
|    explained_variance   | 0.744         |
|    learning_rate        | 1e-06         |
|    loss                 | 193           |
|    n_updates            | 22070         |
|    policy_gradient_loss | 0.000222      |
|    value_loss           | 391           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 256           |
|    time_elapsed         | 1583

-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 266         |
|    time_elapsed         | 16447       |
|    total_timesteps      | 136192      |
| train/                  |             |
|    approx_kl            | 0.000586162 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.322      |
|    explained_variance   | 0.9         |
|    learning_rate        | 1e-06       |
|    loss                 | 64.9        |
|    n_updates            | 22180       |
|    policy_gradient_loss | -0.000418   |
|    value_loss           | 144         |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 267           |
|    time_elapsed         | 16509         |
|    total_timesteps    

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 277          |
|    time_elapsed         | 17121        |
|    total_timesteps      | 141824       |
| train/                  |              |
|    approx_kl            | 0.0021066125 |
|    clip_fraction        | 0.0234       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.346       |
|    explained_variance   | 0.54         |
|    learning_rate        | 1e-06        |
|    loss                 | 278          |
|    n_updates            | 22290        |
|    policy_gradient_loss | -0.00286     |
|    value_loss           | 703          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 278          |
|    time_elapsed         | 17182        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 288          |
|    time_elapsed         | 17794        |
|    total_timesteps      | 147456       |
| train/                  |              |
|    approx_kl            | 0.0021932875 |
|    clip_fraction        | 0.00273      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.449       |
|    explained_variance   | 0.785        |
|    learning_rate        | 1e-06        |
|    loss                 | 148          |
|    n_updates            | 22400        |
|    policy_gradient_loss | -0.000853    |
|    value_loss           | 440          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 289         |
|    time_elapsed         | 17855       |
|    total_times

----------------------------------------
| time/                   |            |
|    fps                  | 8          |
|    iterations           | 299        |
|    time_elapsed         | 18464      |
|    total_timesteps      | 153088     |
| train/                  |            |
|    approx_kl            | 5.1604e-05 |
|    clip_fraction        | 0          |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.592     |
|    explained_variance   | 0.844      |
|    learning_rate        | 1e-06      |
|    loss                 | 141        |
|    n_updates            | 22510      |
|    policy_gradient_loss | -8.15e-05  |
|    value_loss           | 515        |
----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 300           |
|    time_elapsed         | 18523         |
|    total_timesteps      | 153600        

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 310          |
|    time_elapsed         | 19110        |
|    total_timesteps      | 158720       |
| train/                  |              |
|    approx_kl            | 0.0015324408 |
|    clip_fraction        | 0.0119       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.314       |
|    explained_variance   | 0.476        |
|    learning_rate        | 1e-06        |
|    loss                 | 222          |
|    n_updates            | 22620        |
|    policy_gradient_loss | -0.000614    |
|    value_loss           | 452          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 311           |
|    time_elapsed         | 19171         |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 321          |
|    time_elapsed         | 19774        |
|    total_timesteps      | 164352       |
| train/                  |              |
|    approx_kl            | 0.0002968501 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.367       |
|    explained_variance   | 0.7          |
|    learning_rate        | 1e-06        |
|    loss                 | 221          |
|    n_updates            | 22730        |
|    policy_gradient_loss | 9.19e-05     |
|    value_loss           | 350          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 322          |
|    time_elapsed         | 19835        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 332          |
|    time_elapsed         | 20435        |
|    total_timesteps      | 169984       |
| train/                  |              |
|    approx_kl            | 0.0013032029 |
|    clip_fraction        | 0.00586      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.227       |
|    explained_variance   | 0.907        |
|    learning_rate        | 1e-06        |
|    loss                 | 105          |
|    n_updates            | 22840        |
|    policy_gradient_loss | -0.00151     |
|    value_loss           | 208          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 333          |
|    time_elapsed         | 20499        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 343           |
|    time_elapsed         | 21143         |
|    total_timesteps      | 175616        |
| train/                  |               |
|    approx_kl            | 0.00079592725 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.348        |
|    explained_variance   | 0.712         |
|    learning_rate        | 1e-06         |
|    loss                 | 378           |
|    n_updates            | 22950         |
|    policy_gradient_loss | -0.00232      |
|    value_loss           | 655           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 344          |
|    time_elapsed         | 21206   

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 354          |
|    time_elapsed         | 21844        |
|    total_timesteps      | 181248       |
| train/                  |              |
|    approx_kl            | 0.0009914236 |
|    clip_fraction        | 0.0105       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.237       |
|    explained_variance   | 0.434        |
|    learning_rate        | 1e-06        |
|    loss                 | 737          |
|    n_updates            | 23060        |
|    policy_gradient_loss | -0.00184     |
|    value_loss           | 1.13e+03     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 355          |
|    time_elapsed         | 21911        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 365          |
|    time_elapsed         | 22588        |
|    total_timesteps      | 186880       |
| train/                  |              |
|    approx_kl            | 0.0023162048 |
|    clip_fraction        | 0.0207       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.473       |
|    explained_variance   | 0.907        |
|    learning_rate        | 1e-06        |
|    loss                 | 112          |
|    n_updates            | 23170        |
|    policy_gradient_loss | -0.00165     |
|    value_loss           | 277          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 366          |
|    time_elapsed         | 22653        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 376          |
|    time_elapsed         | 23301        |
|    total_timesteps      | 192512       |
| train/                  |              |
|    approx_kl            | 0.0032434044 |
|    clip_fraction        | 0.0113       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.383       |
|    explained_variance   | 0.729        |
|    learning_rate        | 1e-06        |
|    loss                 | 157          |
|    n_updates            | 23280        |
|    policy_gradient_loss | -0.0021      |
|    value_loss           | 313          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 377          |
|    time_elapsed         | 23365        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 387          |
|    time_elapsed         | 23985        |
|    total_timesteps      | 198144       |
| train/                  |              |
|    approx_kl            | 0.0014114443 |
|    clip_fraction        | 0.018        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.441       |
|    explained_variance   | 0.449        |
|    learning_rate        | 1e-06        |
|    loss                 | 218          |
|    n_updates            | 23390        |
|    policy_gradient_loss | -0.00189     |
|    value_loss           | 1e+03        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 388          |
|    time_elapsed         | 24046        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 398          |
|    time_elapsed         | 24656        |
|    total_timesteps      | 203776       |
| train/                  |              |
|    approx_kl            | 0.0024047657 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.609       |
|    explained_variance   | 0.655        |
|    learning_rate        | 1e-06        |
|    loss                 | 295          |
|    n_updates            | 23500        |
|    policy_gradient_loss | -0.00132     |
|    value_loss           | 513          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 399         |
|    time_elapsed         | 24718       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 409         |
|    time_elapsed         | 25328       |
|    total_timesteps      | 209408      |
| train/                  |             |
|    approx_kl            | 0.004711329 |
|    clip_fraction        | 0.0709      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.627      |
|    explained_variance   | 0.64        |
|    learning_rate        | 1e-06       |
|    loss                 | 127         |
|    n_updates            | 23610       |
|    policy_gradient_loss | -0.00582    |
|    value_loss           | 338         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 410          |
|    time_elapsed         | 25389        |
|    total_timesteps      | 2

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 420          |
|    time_elapsed         | 25997        |
|    total_timesteps      | 215040       |
| train/                  |              |
|    approx_kl            | 0.0009141484 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.849       |
|    explained_variance   | 0.845        |
|    learning_rate        | 1e-06        |
|    loss                 | 199          |
|    n_updates            | 23720        |
|    policy_gradient_loss | 0.000341     |
|    value_loss           | 384          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 421          |
|    time_elapsed         | 26058        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 431          |
|    time_elapsed         | 26667        |
|    total_timesteps      | 220672       |
| train/                  |              |
|    approx_kl            | 0.0051666456 |
|    clip_fraction        | 0.00254      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.876       |
|    explained_variance   | 0.445        |
|    learning_rate        | 1e-06        |
|    loss                 | 867          |
|    n_updates            | 23830        |
|    policy_gradient_loss | -0.00341     |
|    value_loss           | 842          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 432          |
|    time_elapsed         | 26728        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 442          |
|    time_elapsed         | 27340        |
|    total_timesteps      | 226304       |
| train/                  |              |
|    approx_kl            | 0.0005295548 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.755       |
|    explained_variance   | 0.633        |
|    learning_rate        | 1e-06        |
|    loss                 | 197          |
|    n_updates            | 23940        |
|    policy_gradient_loss | -0.000246    |
|    value_loss           | 638          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 443          |
|    time_elapsed         | 27401        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 453          |
|    time_elapsed         | 28007        |
|    total_timesteps      | 231936       |
| train/                  |              |
|    approx_kl            | 0.0040598484 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.828       |
|    explained_variance   | 0.862        |
|    learning_rate        | 1e-06        |
|    loss                 | 106          |
|    n_updates            | 24050        |
|    policy_gradient_loss | -0.00154     |
|    value_loss           | 200          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 454         |
|    time_elapsed         | 28069       |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 464          |
|    time_elapsed         | 28684        |
|    total_timesteps      | 237568       |
| train/                  |              |
|    approx_kl            | 0.0030936399 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.819       |
|    explained_variance   | 0.83         |
|    learning_rate        | 1e-06        |
|    loss                 | 47.7         |
|    n_updates            | 24160        |
|    policy_gradient_loss | -0.000987    |
|    value_loss           | 143          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 465          |
|    time_elapsed         | 28745        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 475          |
|    time_elapsed         | 29357        |
|    total_timesteps      | 243200       |
| train/                  |              |
|    approx_kl            | 0.0015877946 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.843       |
|    explained_variance   | 0.854        |
|    learning_rate        | 1e-06        |
|    loss                 | 119          |
|    n_updates            | 24270        |
|    policy_gradient_loss | -0.0014      |
|    value_loss           | 413          |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 8          |
|    iterations           | 476        |
|    time_elapsed         | 29418      |
|    total_timesteps 

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 486          |
|    time_elapsed         | 30029        |
|    total_timesteps      | 248832       |
| train/                  |              |
|    approx_kl            | 0.0012726836 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.646       |
|    explained_variance   | 0.749        |
|    learning_rate        | 1e-06        |
|    loss                 | 244          |
|    n_updates            | 24380        |
|    policy_gradient_loss | -0.000126    |
|    value_loss           | 640          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 487         |
|    time_elapsed         | 30090       |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 497          |
|    time_elapsed         | 30703        |
|    total_timesteps      | 254464       |
| train/                  |              |
|    approx_kl            | 0.0057396754 |
|    clip_fraction        | 0.0229       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.76        |
|    explained_variance   | 0.879        |
|    learning_rate        | 1e-06        |
|    loss                 | 61.8         |
|    n_updates            | 24490        |
|    policy_gradient_loss | -8.82e-05    |
|    value_loss           | 197          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 498          |
|    time_elapsed         | 30765        |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 508         |
|    time_elapsed         | 31378       |
|    total_timesteps      | 260096      |
| train/                  |             |
|    approx_kl            | 0.007411372 |
|    clip_fraction        | 0.0373      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.864      |
|    explained_variance   | 0.878       |
|    learning_rate        | 1e-06       |
|    loss                 | 132         |
|    n_updates            | 24600       |
|    policy_gradient_loss | -0.00294    |
|    value_loss           | 440         |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 509           |
|    time_elapsed         | 31439         |
|    total_timesteps    

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 519          |
|    time_elapsed         | 32051        |
|    total_timesteps      | 265728       |
| train/                  |              |
|    approx_kl            | 6.496231e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.79        |
|    explained_variance   | 0.817        |
|    learning_rate        | 1e-06        |
|    loss                 | 172          |
|    n_updates            | 24710        |
|    policy_gradient_loss | -0.000225    |
|    value_loss           | 362          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 520          |
|    time_elapsed         | 32113        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 530          |
|    time_elapsed         | 32726        |
|    total_timesteps      | 271360       |
| train/                  |              |
|    approx_kl            | 0.0065507614 |
|    clip_fraction        | 0.0178       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.642       |
|    explained_variance   | 0.843        |
|    learning_rate        | 1e-06        |
|    loss                 | 203          |
|    n_updates            | 24820        |
|    policy_gradient_loss | -0.00362     |
|    value_loss           | 290          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 531          |
|    time_elapsed         | 32787        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 541           |
|    time_elapsed         | 33400         |
|    total_timesteps      | 276992        |
| train/                  |               |
|    approx_kl            | 0.00031459506 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.686        |
|    explained_variance   | 0.649         |
|    learning_rate        | 1e-06         |
|    loss                 | 651           |
|    n_updates            | 24930         |
|    policy_gradient_loss | -0.00031      |
|    value_loss           | 743           |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 542         |
|    time_elapsed         | 33461       

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 552          |
|    time_elapsed         | 34074        |
|    total_timesteps      | 282624       |
| train/                  |              |
|    approx_kl            | 0.0023147576 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.857       |
|    explained_variance   | 0.572        |
|    learning_rate        | 1e-06        |
|    loss                 | 343          |
|    n_updates            | 25040        |
|    policy_gradient_loss | -0.000777    |
|    value_loss           | 717          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 553          |
|    time_elapsed         | 34135        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 563          |
|    time_elapsed         | 34749        |
|    total_timesteps      | 288256       |
| train/                  |              |
|    approx_kl            | 0.0017297624 |
|    clip_fraction        | 0.0141       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.859       |
|    explained_variance   | 0.528        |
|    learning_rate        | 1e-06        |
|    loss                 | 190          |
|    n_updates            | 25150        |
|    policy_gradient_loss | -0.0019      |
|    value_loss           | 771          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 564         |
|    time_elapsed         | 34811       |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 574          |
|    time_elapsed         | 35430        |
|    total_timesteps      | 293888       |
| train/                  |              |
|    approx_kl            | 0.0035492904 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.913       |
|    explained_variance   | 0.92         |
|    learning_rate        | 1e-06        |
|    loss                 | 33.6         |
|    n_updates            | 25260        |
|    policy_gradient_loss | -0.00122     |
|    value_loss           | 222          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 575          |
|    time_elapsed         | 35492        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 585          |
|    time_elapsed         | 36114        |
|    total_timesteps      | 299520       |
| train/                  |              |
|    approx_kl            | 0.0027888084 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.03        |
|    explained_variance   | 0.898        |
|    learning_rate        | 1e-06        |
|    loss                 | 102          |
|    n_updates            | 25370        |
|    policy_gradient_loss | -0.00114     |
|    value_loss           | 200          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 586         |
|    time_elapsed         | 36176       |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 596           |
|    time_elapsed         | 36793         |
|    total_timesteps      | 305152        |
| train/                  |               |
|    approx_kl            | 0.00030121184 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.979        |
|    explained_variance   | 0.848         |
|    learning_rate        | 1e-06         |
|    loss                 | 76.3          |
|    n_updates            | 25480         |
|    policy_gradient_loss | -0.000405     |
|    value_loss           | 205           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 597           |
|    time_elapsed         | 3685

-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 607         |
|    time_elapsed         | 37469       |
|    total_timesteps      | 310784      |
| train/                  |             |
|    approx_kl            | 0.001067367 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.863      |
|    explained_variance   | 0.755       |
|    learning_rate        | 1e-06       |
|    loss                 | 200         |
|    n_updates            | 25590       |
|    policy_gradient_loss | -0.000335   |
|    value_loss           | 317         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 608          |
|    time_elapsed         | 37530        |
|    total_timesteps      | 3

-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 618         |
|    time_elapsed         | 38150       |
|    total_timesteps      | 316416      |
| train/                  |             |
|    approx_kl            | 0.002020122 |
|    clip_fraction        | 0.00332     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.836      |
|    explained_variance   | 0.806       |
|    learning_rate        | 1e-06       |
|    loss                 | 153         |
|    n_updates            | 25700       |
|    policy_gradient_loss | -0.00187    |
|    value_loss           | 243         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 619          |
|    time_elapsed         | 38212        |
|    total_timesteps      | 3

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 629          |
|    time_elapsed         | 38833        |
|    total_timesteps      | 322048       |
| train/                  |              |
|    approx_kl            | 0.0029415865 |
|    clip_fraction        | 0.00313      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.593       |
|    explained_variance   | 0.859        |
|    learning_rate        | 1e-06        |
|    loss                 | 98.9         |
|    n_updates            | 25810        |
|    policy_gradient_loss | -0.0018      |
|    value_loss           | 219          |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 8          |
|    iterations           | 630        |
|    time_elapsed         | 38896      |
|    total_timesteps 

-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 640         |
|    time_elapsed         | 39518       |
|    total_timesteps      | 327680      |
| train/                  |             |
|    approx_kl            | 0.006063188 |
|    clip_fraction        | 0.000391    |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.9        |
|    explained_variance   | 0.751       |
|    learning_rate        | 1e-06       |
|    loss                 | 46.3        |
|    n_updates            | 25920       |
|    policy_gradient_loss | -0.0017     |
|    value_loss           | 118         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 641         |
|    time_elapsed         | 39580       |
|    total_timesteps      | 328192

-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 651         |
|    time_elapsed         | 40202       |
|    total_timesteps      | 333312      |
| train/                  |             |
|    approx_kl            | 0.002620624 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.741      |
|    explained_variance   | 0.892       |
|    learning_rate        | 1e-06       |
|    loss                 | 69.4        |
|    n_updates            | 26030       |
|    policy_gradient_loss | -0.00133    |
|    value_loss           | 249         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 652          |
|    time_elapsed         | 40264        |
|    total_timesteps      | 3

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 662          |
|    time_elapsed         | 40889        |
|    total_timesteps      | 338944       |
| train/                  |              |
|    approx_kl            | 0.0016227114 |
|    clip_fraction        | 0.00469      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.706       |
|    explained_variance   | 0.497        |
|    learning_rate        | 1e-06        |
|    loss                 | 281          |
|    n_updates            | 26140        |
|    policy_gradient_loss | -0.000957    |
|    value_loss           | 707          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 663          |
|    time_elapsed         | 40951        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 673          |
|    time_elapsed         | 41579        |
|    total_timesteps      | 344576       |
| train/                  |              |
|    approx_kl            | 0.0018799922 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.849       |
|    explained_variance   | 0.533        |
|    learning_rate        | 1e-06        |
|    loss                 | 99.4         |
|    n_updates            | 26250        |
|    policy_gradient_loss | -0.00146     |
|    value_loss           | 226          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 674         |
|    time_elapsed         | 41642       |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 684          |
|    time_elapsed         | 42242        |
|    total_timesteps      | 350208       |
| train/                  |              |
|    approx_kl            | 0.0059566777 |
|    clip_fraction        | 0.000586     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.945       |
|    explained_variance   | 0.785        |
|    learning_rate        | 1e-06        |
|    loss                 | 55.1         |
|    n_updates            | 26360        |
|    policy_gradient_loss | -0.00184     |
|    value_loss           | 423          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 685          |
|    time_elapsed         | 42297        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 695           |
|    time_elapsed         | 42882         |
|    total_timesteps      | 355840        |
| train/                  |               |
|    approx_kl            | 0.00012454728 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.887        |
|    explained_variance   | 0.836         |
|    learning_rate        | 1e-06         |
|    loss                 | 133           |
|    n_updates            | 26470         |
|    policy_gradient_loss | 3.67e-06      |
|    value_loss           | 365           |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 696         |
|    time_elapsed         | 42945       

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 706           |
|    time_elapsed         | 43565         |
|    total_timesteps      | 361472        |
| train/                  |               |
|    approx_kl            | 0.00067213527 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.908        |
|    explained_variance   | 0.852         |
|    learning_rate        | 1e-06         |
|    loss                 | 67.4          |
|    n_updates            | 26580         |
|    policy_gradient_loss | -9.04e-05     |
|    value_loss           | 195           |
-------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 8          |
|    iterations           | 707        |
|    time_elapsed         | 43627      |
|  

-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 717         |
|    time_elapsed         | 44251       |
|    total_timesteps      | 367104      |
| train/                  |             |
|    approx_kl            | 0.011897487 |
|    clip_fraction        | 0.024       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.919      |
|    explained_variance   | 0.717       |
|    learning_rate        | 1e-06       |
|    loss                 | 81.2        |
|    n_updates            | 26690       |
|    policy_gradient_loss | -0.00588    |
|    value_loss           | 201         |
-----------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 718          |
|    time_elapsed         | 44314        |
|    total_timesteps      | 3

In [None]:
#state = env.reset()

#while True:
 #   action, _ = model.predict(state)
 #   state, reward, done, info = env.step(action)
 #   env.render()