# 1. Setup Mario

In [1]:
!pip install gym_super_mario_bros==7.3.0 nes_py

Defaulting to user installation because normal site-packages is not writeable


In [2]:
# Game
import gym_super_mario_bros
# Joypad
from nes_py.wrappers import JoypadSpace
# Controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [3]:
SIMPLE_MOVEMENT

[['NOOP'],
 ['right'],
 ['right', 'A'],
 ['right', 'B'],
 ['right', 'A', 'B'],
 ['A'],
 ['left']]

In [4]:
# Setup game
env = gym_super_mario_bros.make('SuperMarioBros-v0')
# Simplify movement variations (action_space)
env = JoypadSpace(env, SIMPLE_MOVEMENT)

In [5]:
# Flag
done = True

# Loop through each frame
for step in range(100):
    if done:
        # Start the game
        env.reset()
    # Do random actions
    state, reward, done, info = env.step(env.action_space.sample())
    # Show the game on the screen
    env.render()
env.close()



# 2. Preprocess Environment

In [6]:
!pip install torch torchvision torchaudio
# PPO for RL
!pip install stable-baselines3[extra]

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [7]:
# Frame Stacker Wrapper and Grayscaling Wrapper
# Framestack: Capture last couple of frames while Mario is playing
# GrayscaleObservation: Shave our data since it does not deal with no coloring, making our model faster
from gym.wrappers import GrayScaleObservation
# Vectorization Wrappers
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
# Plot the impact of framestacking
from matplotlib import pyplot as plt

In [8]:
# Create the base env 
env = gym_super_mario_bros.make('SuperMarioBros-v0')
# Simplify movement controls
env = JoypadSpace(env, SIMPLE_MOVEMENT)
# Grayscale the env
env = GrayScaleObservation(env, keep_dim=True)
# Wrap inside the dummy env
env = DummyVecEnv([lambda: env])
# Stack the frames
# Find how many frames to take in as you desire that works best
env = VecFrameStack(env, 4, channels_order='last')

In [9]:
# state = env.reset()
# plt.imshow(state)

In [10]:
env.reset()

array([[[[  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         ...,
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140]],

        [[  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         ...,
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140]],

        [[  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         ...,
         [  0,   0,   0, 140],
         [  0,   0,   0, 140],
         [  0,   0,   0, 140]],

        ...,

        [[  0,   0,   0, 214],
         [  0,   0,   0, 124],
         [  0,   0,   0, 124],
         ...,
         [  0,   0,   0, 124],
         [  0,   0,   0, 124],
         [  0,   0,   0,   0]],

        [[  0,   0,   0, 214],
         [  0,   0,   0, 124],
         [  0,   0,   0, 124],
         ...,
         [  0,   0,   0, 124],
         [  0,   0,   0,   0],
         

In [11]:
state, reward, done, info = env.step([5])

In [12]:
#plt.figure(figsize=(20,16))
#for idx in range(state.shape[3]):
 #   plt.subplot(1,4,idx+1)
 #   plt.imshow(state[0][:,:,idx])
#plt.show()

# 3. Train the RL Model

In [13]:
# AREA: Agent, Reward, Environment, Action
# PPO: Proximal Policy Optimization
# OS for file path management
import os
# PPO classifier
from stable_baselines3 import PPO
# Base Callback for saving models
from stable_baselines3.common.callbacks import BaseCallback

In [14]:
# Save our model every x number of steps
class TrainAndLoggingCallback(BaseCallback):
    
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
            
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)
            
        return True

In [15]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [16]:
callback = TrainAndLoggingCallback(check_freq=1000000, save_path=CHECKPOINT_DIR)

In [17]:
# RL requires a policy network (such as the CNN that is the brain of the AI)
# CnnPolicy is fast at processing images, which fits our use case
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, n_steps=512)

Using cpu device
Wrapping the env in a VecTransposeImage.


In [None]:
# Train the model
# Every single game gets a million frames
model.learn(total_timesteps=4000000, callback=callback)

Logging to ./logs/PPO_8


  return (self.ram[0x86] - self.ram[0x071c]) % 256


----------------------------
| time/              |     |
|    fps             | 94  |
|    iterations      | 1   |
|    time_elapsed    | 5   |
|    total_timesteps | 512 |
----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 17            |
|    iterations           | 2             |
|    time_elapsed         | 59            |
|    total_timesteps      | 1024          |
| train/                  |               |
|    approx_kl            | 3.6907964e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.00358       |
|    learning_rate        | 1e-06         |
|    loss                 | 191           |
|    n_updates            | 10            |
|    policy_gradient_loss | -0.000509     |
|    value_loss           | 540           |
-------------------------------------------
-----

-------------------------------------------
| time/                   |               |
|    fps                  | 9             |
|    iterations           | 13            |
|    time_elapsed         | 721           |
|    total_timesteps      | 6656          |
| train/                  |               |
|    approx_kl            | 1.1670287e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.95         |
|    explained_variance   | 0.00778       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0764        |
|    n_updates            | 120           |
|    policy_gradient_loss | -0.000158     |
|    value_loss           | 0.169         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 9             |
|    iterations           | 14            |
|    time_elapsed         | 785 

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 24           |
|    time_elapsed         | 1404         |
|    total_timesteps      | 12288        |
| train/                  |              |
|    approx_kl            | 1.982029e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | -0.000867    |
|    learning_rate        | 1e-06        |
|    loss                 | 0.491        |
|    n_updates            | 230          |
|    policy_gradient_loss | -0.000311    |
|    value_loss           | 1.43         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 25            |
|    time_elapsed         | 1470          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 35            |
|    time_elapsed         | 2121          |
|    total_timesteps      | 17920         |
| train/                  |               |
|    approx_kl            | 1.0914868e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.0128       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.303         |
|    n_updates            | 340           |
|    policy_gradient_loss | -8.91e-05     |
|    value_loss           | 0.806         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 36            |
|    time_elapsed         | 2183

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 46            |
|    time_elapsed         | 2835          |
|    total_timesteps      | 23552         |
| train/                  |               |
|    approx_kl            | 6.3874526e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.000822     |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0613        |
|    n_updates            | 450           |
|    policy_gradient_loss | -0.000558     |
|    value_loss           | 0.141         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 47            |
|    time_elapsed         | 2905

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 57           |
|    time_elapsed         | 3561         |
|    total_timesteps      | 29184        |
| train/                  |              |
|    approx_kl            | 6.762578e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0.0359       |
|    learning_rate        | 1e-06        |
|    loss                 | 24           |
|    n_updates            | 560          |
|    policy_gradient_loss | -7.79e-05    |
|    value_loss           | 78.1         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 58           |
|    time_elapsed         | 3630         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 68            |
|    time_elapsed         | 4291          |
|    total_timesteps      | 34816         |
| train/                  |               |
|    approx_kl            | 9.3403156e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | 0.138         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.153         |
|    n_updates            | 670           |
|    policy_gradient_loss | -0.000537     |
|    value_loss           | 0.843         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 69            |
|    time_elapsed         | 4354

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 79            |
|    time_elapsed         | 5052          |
|    total_timesteps      | 40448         |
| train/                  |               |
|    approx_kl            | 0.00015006715 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.91         |
|    explained_variance   | -0.0118       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.101         |
|    n_updates            | 780           |
|    policy_gradient_loss | -0.000845     |
|    value_loss           | 0.16          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 80           |
|    time_elapsed         | 5128    

-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 90            |
|    time_elapsed         | 5820          |
|    total_timesteps      | 46080         |
| train/                  |               |
|    approx_kl            | 1.9676518e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.91         |
|    explained_variance   | 0.451         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0493        |
|    n_updates            | 890           |
|    policy_gradient_loss | -7.32e-05     |
|    value_loss           | 0.66          |
-------------------------------------------
--------------------------------------------
| time/                   |                |
|    fps                  | 7              |
|    iterations           | 91             |
|    time_elapsed         | 

-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 101           |
|    time_elapsed         | 6581          |
|    total_timesteps      | 51712         |
| train/                  |               |
|    approx_kl            | 5.7055382e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.9          |
|    explained_variance   | 0.165         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0436        |
|    n_updates            | 1000          |
|    policy_gradient_loss | -0.000315     |
|    value_loss           | 0.32          |
-------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 7          |
|    iterations           | 102        |
|    time_elapsed         | 6649       |
|  

------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 112          |
|    time_elapsed         | 7362         |
|    total_timesteps      | 57344        |
| train/                  |              |
|    approx_kl            | 8.381868e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.89        |
|    explained_variance   | 0.00513      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.053        |
|    n_updates            | 1110         |
|    policy_gradient_loss | -0.000617    |
|    value_loss           | 0.126        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 113           |
|    time_elapsed         | 7437          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 123          |
|    time_elapsed         | 8133         |
|    total_timesteps      | 62976        |
| train/                  |              |
|    approx_kl            | 0.0013802014 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.86        |
|    explained_variance   | 0.21         |
|    learning_rate        | 1e-06        |
|    loss                 | 90.3         |
|    n_updates            | 1220         |
|    policy_gradient_loss | 0.000635     |
|    value_loss           | 250          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 124           |
|    time_elapsed         | 8201          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 134           |
|    time_elapsed         | 8904          |
|    total_timesteps      | 68608         |
| train/                  |               |
|    approx_kl            | 3.8965256e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.88         |
|    explained_variance   | 0.00744       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0553        |
|    n_updates            | 1330          |
|    policy_gradient_loss | -0.000272     |
|    value_loss           | 0.163         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 135           |
|    time_elapsed         | 8968

-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 145           |
|    time_elapsed         | 9632          |
|    total_timesteps      | 74240         |
| train/                  |               |
|    approx_kl            | 0.00015406136 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.87         |
|    explained_variance   | -0.0267       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0827        |
|    n_updates            | 1440          |
|    policy_gradient_loss | -0.000886     |
|    value_loss           | 0.599         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 146           |
|    time_elapsed         | 9705

-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 156           |
|    time_elapsed         | 10394         |
|    total_timesteps      | 79872         |
| train/                  |               |
|    approx_kl            | 4.0163635e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | -0.0262       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0799        |
|    n_updates            | 1550          |
|    policy_gradient_loss | 7.15e-06      |
|    value_loss           | 0.751         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 157          |
|    time_elapsed         | 10461   

-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 167           |
|    time_elapsed         | 11139         |
|    total_timesteps      | 85504         |
| train/                  |               |
|    approx_kl            | 0.00019746262 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | -0.026        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0465        |
|    n_updates            | 1660          |
|    policy_gradient_loss | -0.000788     |
|    value_loss           | 0.1           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 168           |
|    time_elapsed         | 1121

-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 178           |
|    time_elapsed         | 11861         |
|    total_timesteps      | 91136         |
| train/                  |               |
|    approx_kl            | 3.1930977e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | -0.0103       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0435        |
|    n_updates            | 1770          |
|    policy_gradient_loss | -5.23e-05     |
|    value_loss           | 0.27          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 179          |
|    time_elapsed         | 11925   

-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 189           |
|    time_elapsed         | 12576         |
|    total_timesteps      | 96768         |
| train/                  |               |
|    approx_kl            | 5.8056554e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.84         |
|    explained_variance   | 0.0223        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.124         |
|    n_updates            | 1880          |
|    policy_gradient_loss | -0.000303     |
|    value_loss           | 0.493         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 190           |
|    time_elapsed         | 1263

------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 200          |
|    time_elapsed         | 13248        |
|    total_timesteps      | 102400       |
| train/                  |              |
|    approx_kl            | 0.0001870814 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.84        |
|    explained_variance   | -0.0093      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0469       |
|    n_updates            | 1990         |
|    policy_gradient_loss | -0.00077     |
|    value_loss           | 0.133        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 201          |
|    time_elapsed         | 13313        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 211          |
|    time_elapsed         | 13927        |
|    total_timesteps      | 108032       |
| train/                  |              |
|    approx_kl            | 0.0002326417 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.84        |
|    explained_variance   | 0.00307      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0558       |
|    n_updates            | 2100         |
|    policy_gradient_loss | -0.00118     |
|    value_loss           | 0.15         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 212           |
|    time_elapsed         | 13985         |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 222          |
|    time_elapsed         | 14526        |
|    total_timesteps      | 113664       |
| train/                  |              |
|    approx_kl            | 7.213652e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.82        |
|    explained_variance   | -0.0186      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0435       |
|    n_updates            | 2210         |
|    policy_gradient_loss | -0.000166    |
|    value_loss           | 0.429        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 223          |
|    time_elapsed         | 14579        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 233          |
|    time_elapsed         | 15138        |
|    total_timesteps      | 119296       |
| train/                  |              |
|    approx_kl            | 0.0002791197 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.8         |
|    explained_variance   | -0.0554      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.232        |
|    n_updates            | 2320         |
|    policy_gradient_loss | -0.00108     |
|    value_loss           | 1.59         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 7            |
|    iterations           | 234          |
|    time_elapsed         | 15193        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 244           |
|    time_elapsed         | 15752         |
|    total_timesteps      | 124928        |
| train/                  |               |
|    approx_kl            | 2.6175985e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.81         |
|    explained_variance   | -0.00877      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0391        |
|    n_updates            | 2430          |
|    policy_gradient_loss | -6.43e-05     |
|    value_loss           | 0.155         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 245           |
|    time_elapsed         | 1580

-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 255           |
|    time_elapsed         | 16346         |
|    total_timesteps      | 130560        |
| train/                  |               |
|    approx_kl            | 0.00012927898 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.78         |
|    explained_variance   | -0.0129       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0485        |
|    n_updates            | 2540          |
|    policy_gradient_loss | -0.000391     |
|    value_loss           | 0.142         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 7             |
|    iterations           | 256           |
|    time_elapsed         | 1640

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 266          |
|    time_elapsed         | 16959        |
|    total_timesteps      | 136192       |
| train/                  |              |
|    approx_kl            | 7.729919e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.73        |
|    explained_variance   | 0.00185      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.209        |
|    n_updates            | 2650         |
|    policy_gradient_loss | -0.000392    |
|    value_loss           | 0.508        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 267           |
|    time_elapsed         | 17015         |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 277          |
|    time_elapsed         | 17578        |
|    total_timesteps      | 141824       |
| train/                  |              |
|    approx_kl            | 7.295527e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.76        |
|    explained_variance   | -0.0382      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0397       |
|    n_updates            | 2760         |
|    policy_gradient_loss | -0.000195    |
|    value_loss           | 0.173        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 278           |
|    time_elapsed         | 17635         |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 288          |
|    time_elapsed         | 18183        |
|    total_timesteps      | 147456       |
| train/                  |              |
|    approx_kl            | 0.0006377748 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.67        |
|    explained_variance   | 0.0477       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0895       |
|    n_updates            | 2870         |
|    policy_gradient_loss | -0.00157     |
|    value_loss           | 0.568        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 289          |
|    time_elapsed         | 18239        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 299           |
|    time_elapsed         | 18809         |
|    total_timesteps      | 153088        |
| train/                  |               |
|    approx_kl            | 0.00047115365 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.7          |
|    explained_variance   | -0.0253       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.159         |
|    n_updates            | 2980          |
|    policy_gradient_loss | -0.00134      |
|    value_loss           | 0.95          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 300           |
|    time_elapsed         | 1886

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 310           |
|    time_elapsed         | 19433         |
|    total_timesteps      | 158720        |
| train/                  |               |
|    approx_kl            | 0.00049276894 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.76         |
|    explained_variance   | -0.0255       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0365        |
|    n_updates            | 3090          |
|    policy_gradient_loss | -0.00154      |
|    value_loss           | 0.105         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 311           |
|    time_elapsed         | 1949

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 321           |
|    time_elapsed         | 20052         |
|    total_timesteps      | 164352        |
| train/                  |               |
|    approx_kl            | 0.00024598697 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.76         |
|    explained_variance   | -0.0337       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0439        |
|    n_updates            | 3200          |
|    policy_gradient_loss | -0.00103      |
|    value_loss           | 0.132         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 322           |
|    time_elapsed         | 2010

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 332           |
|    time_elapsed         | 20660         |
|    total_timesteps      | 169984        |
| train/                  |               |
|    approx_kl            | 0.00070886884 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.73         |
|    explained_variance   | -0.139        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.123         |
|    n_updates            | 3310          |
|    policy_gradient_loss | -0.00123      |
|    value_loss           | 1.12          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 333           |
|    time_elapsed         | 2071

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 343           |
|    time_elapsed         | 21279         |
|    total_timesteps      | 175616        |
| train/                  |               |
|    approx_kl            | 0.00020108745 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.73         |
|    explained_variance   | 0.871         |
|    learning_rate        | 1e-06         |
|    loss                 | 3.82          |
|    n_updates            | 3420          |
|    policy_gradient_loss | -0.00046      |
|    value_loss           | 14            |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 344         |
|    time_elapsed         | 21334       

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 354           |
|    time_elapsed         | 21888         |
|    total_timesteps      | 181248        |
| train/                  |               |
|    approx_kl            | 0.00022617751 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.72         |
|    explained_variance   | -0.0748       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0518        |
|    n_updates            | 3530          |
|    policy_gradient_loss | -0.000662     |
|    value_loss           | 0.189         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 355          |
|    time_elapsed         | 21944   

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 365           |
|    time_elapsed         | 22504         |
|    total_timesteps      | 186880        |
| train/                  |               |
|    approx_kl            | 4.4717337e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.69         |
|    explained_variance   | 0.047         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.125         |
|    n_updates            | 3640          |
|    policy_gradient_loss | -0.000202     |
|    value_loss           | 0.365         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 366          |
|    time_elapsed         | 22560   

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 376           |
|    time_elapsed         | 23125         |
|    total_timesteps      | 192512        |
| train/                  |               |
|    approx_kl            | 0.00046473532 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.6          |
|    explained_variance   | 0.746         |
|    learning_rate        | 1e-06         |
|    loss                 | 70.3          |
|    n_updates            | 3750          |
|    policy_gradient_loss | -0.000304     |
|    value_loss           | 113           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 377          |
|    time_elapsed         | 23181   

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 387           |
|    time_elapsed         | 23742         |
|    total_timesteps      | 198144        |
| train/                  |               |
|    approx_kl            | 0.00035687082 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.69         |
|    explained_variance   | 0.151         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0782        |
|    n_updates            | 3860          |
|    policy_gradient_loss | -0.000703     |
|    value_loss           | 0.145         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 388          |
|    time_elapsed         | 23798   

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 398          |
|    time_elapsed         | 24351        |
|    total_timesteps      | 203776       |
| train/                  |              |
|    approx_kl            | 8.090737e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.67        |
|    explained_variance   | 0.126        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0513       |
|    n_updates            | 3970         |
|    policy_gradient_loss | -0.000216    |
|    value_loss           | 0.144        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 399           |
|    time_elapsed         | 24409         |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 409           |
|    time_elapsed         | 24974         |
|    total_timesteps      | 209408        |
| train/                  |               |
|    approx_kl            | 0.00016043219 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.63         |
|    explained_variance   | 0.716         |
|    learning_rate        | 1e-06         |
|    loss                 | 37.9          |
|    n_updates            | 4080          |
|    policy_gradient_loss | -0.00101      |
|    value_loss           | 85.3          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 410           |
|    time_elapsed         | 2503

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 420           |
|    time_elapsed         | 25595         |
|    total_timesteps      | 215040        |
| train/                  |               |
|    approx_kl            | 0.00067709875 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.64         |
|    explained_variance   | -0.0517       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0593        |
|    n_updates            | 4190          |
|    policy_gradient_loss | -0.00172      |
|    value_loss           | 0.128         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 421           |
|    time_elapsed         | 2565

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 431          |
|    time_elapsed         | 26202        |
|    total_timesteps      | 220672       |
| train/                  |              |
|    approx_kl            | 0.0007343873 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.57        |
|    explained_variance   | -0.139       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0815       |
|    n_updates            | 4300         |
|    policy_gradient_loss | -0.0016      |
|    value_loss           | 0.238        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 432           |
|    time_elapsed         | 26259         |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 442          |
|    time_elapsed         | 26823        |
|    total_timesteps      | 226304       |
| train/                  |              |
|    approx_kl            | 0.0011036734 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.53        |
|    explained_variance   | -0.151       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.14         |
|    n_updates            | 4410         |
|    policy_gradient_loss | -0.00172     |
|    value_loss           | 0.975        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 443          |
|    time_elapsed         | 26879        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 453          |
|    time_elapsed         | 27444        |
|    total_timesteps      | 231936       |
| train/                  |              |
|    approx_kl            | 0.0002024807 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.54        |
|    explained_variance   | 0.659        |
|    learning_rate        | 1e-06        |
|    loss                 | 51           |
|    n_updates            | 4520         |
|    policy_gradient_loss | -0.000183    |
|    value_loss           | 96.2         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 454          |
|    time_elapsed         | 27503        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 464          |
|    time_elapsed         | 28047        |
|    total_timesteps      | 237568       |
| train/                  |              |
|    approx_kl            | 0.0003667879 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.52        |
|    explained_variance   | -0.172       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.091        |
|    n_updates            | 4630         |
|    policy_gradient_loss | -0.000711    |
|    value_loss           | 0.137        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 465          |
|    time_elapsed         | 28102        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 475          |
|    time_elapsed         | 28654        |
|    total_timesteps      | 243200       |
| train/                  |              |
|    approx_kl            | 9.684765e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.44        |
|    explained_variance   | -0.0323      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.216        |
|    n_updates            | 4740         |
|    policy_gradient_loss | -0.000281    |
|    value_loss           | 0.496        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 476          |
|    time_elapsed         | 28709        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 486           |
|    time_elapsed         | 29261         |
|    total_timesteps      | 248832        |
| train/                  |               |
|    approx_kl            | 0.00039436354 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.36         |
|    explained_variance   | 0.648         |
|    learning_rate        | 1e-06         |
|    loss                 | 112           |
|    n_updates            | 4850          |
|    policy_gradient_loss | -0.00152      |
|    value_loss           | 257           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 487          |
|    time_elapsed         | 29315   

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 497           |
|    time_elapsed         | 29860         |
|    total_timesteps      | 254464        |
| train/                  |               |
|    approx_kl            | 0.00043272844 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.41         |
|    explained_variance   | 0.133         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0505        |
|    n_updates            | 4960          |
|    policy_gradient_loss | -0.000627     |
|    value_loss           | 0.113         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 498          |
|    time_elapsed         | 29915   

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 508           |
|    time_elapsed         | 30465         |
|    total_timesteps      | 260096        |
| train/                  |               |
|    approx_kl            | 0.00046816678 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.38         |
|    explained_variance   | 0.0678        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.139         |
|    n_updates            | 5070          |
|    policy_gradient_loss | -0.000683     |
|    value_loss           | 0.353         |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 509         |
|    time_elapsed         | 30520       

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 519          |
|    time_elapsed         | 31071        |
|    total_timesteps      | 265728       |
| train/                  |              |
|    approx_kl            | 0.0025552842 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.26        |
|    explained_variance   | -0.179       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0582       |
|    n_updates            | 5180         |
|    policy_gradient_loss | -0.00285     |
|    value_loss           | 0.159        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 520         |
|    time_elapsed         | 31126       |
|    total_times

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 530          |
|    time_elapsed         | 31668        |
|    total_timesteps      | 271360       |
| train/                  |              |
|    approx_kl            | 0.0007043979 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.25        |
|    explained_variance   | 0.0638       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0452       |
|    n_updates            | 5290         |
|    policy_gradient_loss | -0.0017      |
|    value_loss           | 0.297        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 531          |
|    time_elapsed         | 31722        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 541          |
|    time_elapsed         | 32268        |
|    total_timesteps      | 276992       |
| train/                  |              |
|    approx_kl            | 0.0008428894 |
|    clip_fraction        | 0.000977     |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.22        |
|    explained_variance   | 0.902        |
|    learning_rate        | 1e-06        |
|    loss                 | 74.1         |
|    n_updates            | 5400         |
|    policy_gradient_loss | -0.000968    |
|    value_loss           | 167          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 542           |
|    time_elapsed         | 32322         |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 552           |
|    time_elapsed         | 32863         |
|    total_timesteps      | 282624        |
| train/                  |               |
|    approx_kl            | 0.00033799908 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.13         |
|    explained_variance   | 0.819         |
|    learning_rate        | 1e-06         |
|    loss                 | 78.5          |
|    n_updates            | 5510          |
|    policy_gradient_loss | -0.00108      |
|    value_loss           | 212           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 553          |
|    time_elapsed         | 32917   

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 563          |
|    time_elapsed         | 33462        |
|    total_timesteps      | 288256       |
| train/                  |              |
|    approx_kl            | 0.0012044456 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.23        |
|    explained_variance   | 0.106        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.052        |
|    n_updates            | 5620         |
|    policy_gradient_loss | -0.00166     |
|    value_loss           | 0.248        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 564          |
|    time_elapsed         | 33516        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 574           |
|    time_elapsed         | 34060         |
|    total_timesteps      | 293888        |
| train/                  |               |
|    approx_kl            | 0.00040549273 |
|    clip_fraction        | 0.00156       |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.08         |
|    explained_variance   | 0.911         |
|    learning_rate        | 1e-06         |
|    loss                 | 25.1          |
|    n_updates            | 5730          |
|    policy_gradient_loss | 0.00127       |
|    value_loss           | 50            |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 575           |
|    time_elapsed         | 3411

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 585           |
|    time_elapsed         | 34653         |
|    total_timesteps      | 299520        |
| train/                  |               |
|    approx_kl            | 0.00015332643 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.98         |
|    explained_variance   | 0.792         |
|    learning_rate        | 1e-06         |
|    loss                 | 104           |
|    n_updates            | 5840          |
|    policy_gradient_loss | 0.000117      |
|    value_loss           | 250           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 586           |
|    time_elapsed         | 3470

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 596          |
|    time_elapsed         | 35250        |
|    total_timesteps      | 305152       |
| train/                  |              |
|    approx_kl            | 0.0008419958 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.874       |
|    explained_variance   | 0.503        |
|    learning_rate        | 1e-06        |
|    loss                 | 155          |
|    n_updates            | 5950         |
|    policy_gradient_loss | -0.00111     |
|    value_loss           | 327          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 597           |
|    time_elapsed         | 35303         |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 607          |
|    time_elapsed         | 35843        |
|    total_timesteps      | 310784       |
| train/                  |              |
|    approx_kl            | 8.712802e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.954       |
|    explained_variance   | 0.584        |
|    learning_rate        | 1e-06        |
|    loss                 | 160          |
|    n_updates            | 6060         |
|    policy_gradient_loss | 3.32e-05     |
|    value_loss           | 383          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 608          |
|    time_elapsed         | 35897        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 618          |
|    time_elapsed         | 36439        |
|    total_timesteps      | 316416       |
| train/                  |              |
|    approx_kl            | 0.0012187863 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.884       |
|    explained_variance   | 0.175        |
|    learning_rate        | 1e-06        |
|    loss                 | 393          |
|    n_updates            | 6170         |
|    policy_gradient_loss | -0.000408    |
|    value_loss           | 693          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 619           |
|    time_elapsed         | 36493         |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 629           |
|    time_elapsed         | 37035         |
|    total_timesteps      | 322048        |
| train/                  |               |
|    approx_kl            | 0.00016105897 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.725        |
|    explained_variance   | 0.761         |
|    learning_rate        | 1e-06         |
|    loss                 | 154           |
|    n_updates            | 6280          |
|    policy_gradient_loss | -0.000604     |
|    value_loss           | 244           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 630           |
|    time_elapsed         | 3708

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 640           |
|    time_elapsed         | 37630         |
|    total_timesteps      | 327680        |
| train/                  |               |
|    approx_kl            | 0.00044552144 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.64         |
|    explained_variance   | 0.36          |
|    learning_rate        | 1e-06         |
|    loss                 | 568           |
|    n_updates            | 6390          |
|    policy_gradient_loss | -0.00116      |
|    value_loss           | 895           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 641           |
|    time_elapsed         | 3768

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 651          |
|    time_elapsed         | 38218        |
|    total_timesteps      | 333312       |
| train/                  |              |
|    approx_kl            | 0.0007901159 |
|    clip_fraction        | 0.00137      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.613       |
|    explained_variance   | 0.556        |
|    learning_rate        | 1e-06        |
|    loss                 | 259          |
|    n_updates            | 6500         |
|    policy_gradient_loss | -0.00123     |
|    value_loss           | 421          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 652          |
|    time_elapsed         | 38271        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 662           |
|    time_elapsed         | 38808         |
|    total_timesteps      | 338944        |
| train/                  |               |
|    approx_kl            | 0.00013624446 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.624        |
|    explained_variance   | 0.727         |
|    learning_rate        | 1e-06         |
|    loss                 | 113           |
|    n_updates            | 6610          |
|    policy_gradient_loss | -0.000319     |
|    value_loss           | 311           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 663          |
|    time_elapsed         | 38862   

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 673          |
|    time_elapsed         | 39403        |
|    total_timesteps      | 344576       |
| train/                  |              |
|    approx_kl            | 0.0002877392 |
|    clip_fraction        | 0.000586     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.694       |
|    explained_variance   | 0.501        |
|    learning_rate        | 1e-06        |
|    loss                 | 141          |
|    n_updates            | 6720         |
|    policy_gradient_loss | -0.000697    |
|    value_loss           | 373          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 674           |
|    time_elapsed         | 39457         |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 684           |
|    time_elapsed         | 39999         |
|    total_timesteps      | 350208        |
| train/                  |               |
|    approx_kl            | 0.00060330983 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.497        |
|    explained_variance   | 0.454         |
|    learning_rate        | 1e-06         |
|    loss                 | 151           |
|    n_updates            | 6830          |
|    policy_gradient_loss | -0.000494     |
|    value_loss           | 578           |
-------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 685         |
|    time_elapsed         | 40053       

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 695           |
|    time_elapsed         | 40593         |
|    total_timesteps      | 355840        |
| train/                  |               |
|    approx_kl            | 0.00010044919 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.458        |
|    explained_variance   | 0.803         |
|    learning_rate        | 1e-06         |
|    loss                 | 101           |
|    n_updates            | 6940          |
|    policy_gradient_loss | -0.0001       |
|    value_loss           | 234           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 696          |
|    time_elapsed         | 40647   

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 706          |
|    time_elapsed         | 41191        |
|    total_timesteps      | 361472       |
| train/                  |              |
|    approx_kl            | 8.057605e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.415       |
|    explained_variance   | 0.841        |
|    learning_rate        | 1e-06        |
|    loss                 | 115          |
|    n_updates            | 7050         |
|    policy_gradient_loss | -0.000188    |
|    value_loss           | 217          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 707          |
|    time_elapsed         | 41245        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 717          |
|    time_elapsed         | 41837        |
|    total_timesteps      | 367104       |
| train/                  |              |
|    approx_kl            | 0.0003183491 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.352       |
|    explained_variance   | 0.523        |
|    learning_rate        | 1e-06        |
|    loss                 | 84.4         |
|    n_updates            | 7160         |
|    policy_gradient_loss | -0.000279    |
|    value_loss           | 545          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 718           |
|    time_elapsed         | 41896         |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 728          |
|    time_elapsed         | 42524        |
|    total_timesteps      | 372736       |
| train/                  |              |
|    approx_kl            | 0.0011995871 |
|    clip_fraction        | 0.00586      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.402       |
|    explained_variance   | 0.0895       |
|    learning_rate        | 1e-06        |
|    loss                 | 571          |
|    n_updates            | 7270         |
|    policy_gradient_loss | -0.000834    |
|    value_loss           | 761          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 729          |
|    time_elapsed         | 42585        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 739           |
|    time_elapsed         | 43174         |
|    total_timesteps      | 378368        |
| train/                  |               |
|    approx_kl            | 0.00044955336 |
|    clip_fraction        | 0.00605       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.886        |
|    explained_variance   | -0.303        |
|    learning_rate        | 1e-06         |
|    loss                 | 1.77          |
|    n_updates            | 7380          |
|    policy_gradient_loss | 0.00125       |
|    value_loss           | 25.7          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 740          |
|    time_elapsed         | 43231   

------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 750          |
|    time_elapsed         | 43859        |
|    total_timesteps      | 384000       |
| train/                  |              |
|    approx_kl            | 0.0009872018 |
|    clip_fraction        | 0.00176      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.516       |
|    explained_variance   | 0.664        |
|    learning_rate        | 1e-06        |
|    loss                 | 412          |
|    n_updates            | 7490         |
|    policy_gradient_loss | 0.00157      |
|    value_loss           | 527          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 751          |
|    time_elapsed         | 43929        |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 761           |
|    time_elapsed         | 44544         |
|    total_timesteps      | 389632        |
| train/                  |               |
|    approx_kl            | 5.3216238e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.689        |
|    explained_variance   | 0.811         |
|    learning_rate        | 1e-06         |
|    loss                 | 104           |
|    n_updates            | 7600          |
|    policy_gradient_loss | 0.000141      |
|    value_loss           | 294           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 762           |
|    time_elapsed         | 4460

-------------------------------------------
| time/                   |               |
|    fps                  | 8             |
|    iterations           | 772           |
|    time_elapsed         | 45197         |
|    total_timesteps      | 395264        |
| train/                  |               |
|    approx_kl            | 0.00066157384 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.594        |
|    explained_variance   | -0.0252       |
|    learning_rate        | 1e-06         |
|    loss                 | 115           |
|    n_updates            | 7710          |
|    policy_gradient_loss | -0.000979     |
|    value_loss           | 303           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 8            |
|    iterations           | 773          |
|    time_elapsed         | 45255   

In [None]:
# No callback: model.save('name_of_file')
# To view the logs on tensorboard
# Activate the env on this dir and go into log dir (go into most recent ppo model)
#tensorboard --logdir=.

# 4. Test the Model

In [None]:
# Load the model
#model = PPO.load('./train/best_model_4000000')

In [None]:
#state = env.reset()

#while True:
 #   action, _ = model.predict(state)
 #   state, reward, done, info = env.step(action)
 #   env.render()