**1. Import Dependencies**

In [1]:
import os
import numpy as np
import pandas as pd
import time

In [2]:
import gym
from stable_baselines3 import PPO      #PPO -> Proximal Policy Optimization
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy  #to evaluate the model 
from stable_baselines3.common.callbacks import EvalCallback

In [None]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1024, 768))
display.start()


from matplotlib import pyplot as plt, animation
%matplotlib inline
from IPython import display

def create_anim(frames, dpi, fps):
    plt.figure(figsize=(frames[0].shape[1] / dpi, frames[0].shape[0] / dpi), dpi=dpi)
    patch = plt.imshow(frames[0])
    def setup():
        plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])
    anim = animation.FuncAnimation(plt.gcf(), animate, init_func=setup, frames=len(frames), interval=fps)
    return anim

def display_anim(frames, dpi=72, fps=60):
    anim = create_anim(frames, dpi, fps)
    return anim.to_jshtml()

def save_anim(frames, filename, dpi=72, fps=50):
    anim = create_anim(frames, dpi, fps)
    anim.save(filename)


class trigger:
    def __init__(self):
        self._trigger = True

    def __call__(self, e):
        return self._trigger

    def set(self, t):
        self._trigger = t

**2.Train Environment**

In [3]:
# Default environment
# environment_name = 'CarRacing-v0'    
# env = gym.make(environment_name)
# eval_env= env

# Curriculum environment
from gym.wrappers.time_limit import TimeLimit
from TurnRates.car_racing_curriculum import CarRacingCurriculum
# Best ranges chosen from BO
ranges=np.array([3429,6858,11421])
env = TimeLimit(CarRacingCurriculum(ranges),max_episode_steps=1000)

# Evaluation environment
from gym.wrappers.time_limit import TimeLimit
from TurnRates.car_racing_eval import CarRacingEval
eval_env = TimeLimit(CarRacingEval(),max_episode_steps=1000)

# Default obstacles environment
# from gym.wrappers.time_limit import TimeLimit
# from car_racing_obstacles import CarRacingObstacles
# env = TimeLimit(CarRacingObstacles(),max_episode_steps=1000)
# eval_env= env

# Curriculum obstacles environment
# from gym.wrappers.time_limit import TimeLimit
# from Obstacles.car_racing_obstacles_curriculum import CarRacingObstaclesCurriculum
# env = TimeLimit(CarRacingObstaclesCurriculum(),max_episode_steps=1000)

# Obstacles Evaluation environment
# from gym.wrappers.time_limit import TimeLimit
# from Obstacles.car_racing_obstacles_eval import CarRacingObstaclesEval
# eval_env = TimeLimit(CarRacingObstaclesEval(),max_episode_steps=1000)

# Curriculum both environment
# from gym.wrappers.time_limit import TimeLimit
# from Both.car_racing_obstacles_curriculum_both import CarRacingObstaclesCurriculumBoth
# env = TimeLimit(CarRacingObstaclesCurriculumBoth(),max_episode_steps=1000)

# Evaluation both environment
# from gym.wrappers.time_limit import TimeLimit
# from Both.car_racing_obstacles_eval_both import CarRacingObstaclesEvalBoth
# eval_env = TimeLimit(CarRacingObstaclesEvalBoth(),max_episode_steps=1000)


**3.Training Model**

In [14]:
seed=0

In [15]:
log_path = os.path.join('./runs')

# Learning Rates: 
# 0.0005- PPO Default, 0.00025- PPO Curriculum
# 0.0005- PPO Obstacles, 0.000475- PPO Obstacles Curriculum
# 0.0002- PPO Both Curriculum

model = PPO('CnnPolicy', env, learning_rate=0.00025, n_steps=1000, batch_size=1000, verbose=1, seed=0, tensorboard_log=log_path)
ppo_path = os.path.join('./Training/Saved_Models/PPO_car_best_Model_curriculum')

eval_callback = EvalCallback(eval_env=eval_env, best_model_save_path=ppo_path,
                             n_eval_episodes=10,
                             eval_freq=50000,verbose=1,
                             deterministic=True, render=False)
model.learn(total_timesteps=1000000,callback=eval_callback)
ppo_path = os.path.join('./Training/Saved_Models/PPO_Model_final_curriculum.zip')
model.save(ppo_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Logging to ./runs/PPO_1




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -59.7    |
| time/              |          |
|    fps             | 245      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 1000     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | -53.2        |
| time/                   |              |
|    fps                  | 91           |
|    iterations           | 2            |
|    time_elapsed         | 21           |
|    total_timesteps      | 2000         |
| train/                  |              |
|    approx_kl            | 0.0052174455 |
|    clip_fraction        | 0.0235       |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.26        |
|    explained_variance   | -0.000512    |
|    learning_r

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | -57.3        |
| time/                   |              |
|    fps                  | 60           |
|    iterations           | 11           |
|    time_elapsed         | 180          |
|    total_timesteps      | 11000        |
| train/                  |              |
|    approx_kl            | 0.0072619445 |
|    clip_fraction        | 0.0154       |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.25        |
|    explained_variance   | 0.628        |
|    learning_rate        | 0.00025      |
|    loss                 | 0.293        |
|    n_updates            | 100          |
|    policy_gradient_loss | -0.000981    |
|    std                  | 0.996        |
|    value_loss           | 0.614        |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | -63.5       |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 20          |
|    time_elapsed         | 340         |
|    total_timesteps      | 20000       |
| train/                  |             |
|    approx_kl            | 0.015570443 |
|    clip_fraction        | 0.0566      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.24       |
|    explained_variance   | 0.946       |
|    learning_rate        | 0.00025     |
|    loss                 | 0.0857      |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.00383    |
|    std                  | 0.994       |
|    value_loss           | 0.28        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | -59         |
| time/                   |             |
|    fps                  | 58          |
|    iterations           | 29          |
|    time_elapsed         | 499         |
|    total_timesteps      | 29000       |
| train/                  |             |
|    approx_kl            | 0.014723774 |
|    clip_fraction        | 0.15        |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.24       |
|    explained_variance   | 0.879       |
|    learning_rate        | 0.00025     |
|    loss                 | 0.753       |
|    n_updates            | 280         |
|    policy_gradient_loss | -0.0033     |
|    std                  | 0.995       |
|    value_loss           | 1.91        |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | -33.3       |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 38          |
|    time_elapsed         | 659         |
|    total_timesteps      | 38000       |
| train/                  |             |
|    approx_kl            | 0.018563066 |
|    clip_fraction        | 0.0854      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.24       |
|    explained_variance   | 0.895       |
|    learning_rate        | 0.00025     |
|    loss                 | 4.61        |
|    n_updates            | 370         |
|    policy_gradient_loss | -0.00525    |
|    std                  | 0.993       |
|    value_loss           | 12.1        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 3.71        |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 47          |
|    time_elapsed         | 819         |
|    total_timesteps      | 47000       |
| train/                  |             |
|    approx_kl            | 0.011088518 |
|    clip_fraction        | 0.132       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.23       |
|    explained_variance   | 0.942       |
|    learning_rate        | 0.00025     |
|    loss                 | 2.24        |
|    n_updates            | 460         |
|    policy_gradient_loss | -0.00394    |
|    std                  | 0.992       |
|    value_loss           | 5.58        |
-----------------------------------------
-----------------------------------------
| rollout/                |       



Eval num_timesteps=50000, episode_reward=-20.93 +/- 47.46
Episode length: 1000.00 +/- 0.00
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 1e+03        |
|    mean_reward          | -20.9        |
| time/                   |              |
|    total_timesteps      | 50000        |
| train/                  |              |
|    approx_kl            | 0.0114768725 |
|    clip_fraction        | 0.134        |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.23        |
|    explained_variance   | 0.977        |
|    learning_rate        | 0.00025      |
|    loss                 | 3.37         |
|    n_updates            | 490          |
|    policy_gradient_loss | -0.00118     |
|    std                  | 0.992        |
|    value_loss           | 7.69         |
------------------------------------------
New best mean reward!
---------------------------------
| rollout/           |          |
| 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 57.9        |
| time/                   |             |
|    fps                  | 55          |
|    iterations           | 59          |
|    time_elapsed         | 1069        |
|    total_timesteps      | 59000       |
| train/                  |             |
|    approx_kl            | 0.013503713 |
|    clip_fraction        | 0.112       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.23       |
|    explained_variance   | 0.917       |
|    learning_rate        | 0.00025     |
|    loss                 | 3.82        |
|    n_updates            | 580         |
|    policy_gradient_loss | -0.000865   |
|    std                  | 0.991       |
|    value_loss           | 8.2         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 92.1         |
| time/                   |              |
|    fps                  | 55           |
|    iterations           | 68           |
|    time_elapsed         | 1230         |
|    total_timesteps      | 68000        |
| train/                  |              |
|    approx_kl            | 0.0034563688 |
|    clip_fraction        | 0.083        |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.23        |
|    explained_variance   | 0.97         |
|    learning_rate        | 0.00025      |
|    loss                 | 2.58         |
|    n_updates            | 670          |
|    policy_gradient_loss | -0.00101     |
|    std                  | 0.99         |
|    value_loss           | 7.03         |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 104         |
| time/                   |             |
|    fps                  | 55          |
|    iterations           | 77          |
|    time_elapsed         | 1391        |
|    total_timesteps      | 77000       |
| train/                  |             |
|    approx_kl            | 0.010860515 |
|    clip_fraction        | 0.208       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.22       |
|    explained_variance   | 0.952       |
|    learning_rate        | 0.00025     |
|    loss                 | 3.77        |
|    n_updates            | 760         |
|    policy_gradient_loss | 0.00851     |
|    std                  | 0.989       |
|    value_loss           | 10.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 137        |
| time/                   |            |
|    fps                  | 55         |
|    iterations           | 86         |
|    time_elapsed         | 1552       |
|    total_timesteps      | 86000      |
| train/                  |            |
|    approx_kl            | 0.04105731 |
|    clip_fraction        | 0.247      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.22      |
|    explained_variance   | 0.114      |
|    learning_rate        | 0.00025    |
|    loss                 | 9.86       |
|    n_updates            | 850        |
|    policy_gradient_loss | 0.00905    |
|    std                  | 0.987      |
|    value_loss           | 20.4       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1e+03     |
|    ep_rew_mean          | 132       |
| time/                   |           |
|    fps                  | 55        |
|    iterations           | 95        |
|    time_elapsed         | 1713      |
|    total_timesteps      | 95000     |
| train/                  |           |
|    approx_kl            | 0.0524989 |
|    clip_fraction        | 0.199     |
|    clip_range           | 0.2       |
|    entropy_loss         | -4.22     |
|    explained_variance   | 0.674     |
|    learning_rate        | 0.00025   |
|    loss                 | 12.6      |
|    n_updates            | 940       |
|    policy_gradient_loss | 0.0131    |
|    std                  | 0.988     |
|    value_loss           | 30.9      |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03   

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 127        |
| time/                   |            |
|    fps                  | 54         |
|    iterations           | 104        |
|    time_elapsed         | 1902       |
|    total_timesteps      | 104000     |
| train/                  |            |
|    approx_kl            | 0.06996864 |
|    clip_fraction        | 0.546      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.22      |
|    explained_variance   | 0.924      |
|    learning_rate        | 0.00025    |
|    loss                 | 1.88       |
|    n_updates            | 1030       |
|    policy_gradient_loss | 0.0232     |
|    std                  | 0.989      |
|    value_loss           | 5.4        |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 978        |
|    ep_rew_mean          | 133        |
| time/                   |            |
|    fps                  | 55         |
|    iterations           | 114        |
|    time_elapsed         | 2069       |
|    total_timesteps      | 114000     |
| train/                  |            |
|    approx_kl            | 0.45489055 |
|    clip_fraction        | 0.548      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.23      |
|    explained_variance   | 0.528      |
|    learning_rate        | 0.00025    |
|    loss                 | 26.1       |
|    n_updates            | 1130       |
|    policy_gradient_loss | 0.0817     |
|    std                  | 0.991      |
|    value_loss           | 69.4       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 969         |
|    ep_rew_mean          | 137         |
| time/                   |             |
|    fps                  | 55          |
|    iterations           | 123         |
|    time_elapsed         | 2219        |
|    total_timesteps      | 123000      |
| train/                  |             |
|    approx_kl            | 0.031387556 |
|    clip_fraction        | 0.359       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.23       |
|    explained_variance   | 0.406       |
|    learning_rate        | 0.00025     |
|    loss                 | 30.3        |
|    n_updates            | 1220        |
|    policy_gradient_loss | 0.0141      |
|    std                  | 0.992       |
|    value_loss           | 74.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 950        |
|    ep_rew_mean          | 139        |
| time/                   |            |
|    fps                  | 55         |
|    iterations           | 132        |
|    time_elapsed         | 2370       |
|    total_timesteps      | 132000     |
| train/                  |            |
|    approx_kl            | 0.49631426 |
|    clip_fraction        | 0.528      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.23      |
|    explained_variance   | 0.68       |
|    learning_rate        | 0.00025    |
|    loss                 | 59.5       |
|    n_updates            | 1310       |
|    policy_gradient_loss | 0.153      |
|    std                  | 0.992      |
|    value_loss           | 145        |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 950        |
|    ep_rew_mean          | 137        |
| time/                   |            |
|    fps                  | 55         |
|    iterations           | 141        |
|    time_elapsed         | 2520       |
|    total_timesteps      | 141000     |
| train/                  |            |
|    approx_kl            | 0.04079236 |
|    clip_fraction        | 0.374      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.23      |
|    explained_variance   | 0.868      |
|    learning_rate        | 0.00025    |
|    loss                 | 10.5       |
|    n_updates            | 1400       |
|    policy_gradient_loss | 0.0288     |
|    std                  | 0.992      |
|    value_loss           | 24.5       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

Eval num_timesteps=150000, episode_reward=496.63 +/- 167.94
Episode length: 1000.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 1e+03       |
|    mean_reward          | 497         |
| time/                   |             |
|    total_timesteps      | 150000      |
| train/                  |             |
|    approx_kl            | 0.025244499 |
|    clip_fraction        | 0.281       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.23       |
|    explained_variance   | 0.874       |
|    learning_rate        | 0.00025     |
|    loss                 | 10.4        |
|    n_updates            | 1490        |
|    policy_gradient_loss | 0.00822     |
|    std                  | 0.991       |
|    value_loss           | 25.4        |
-----------------------------------------
New best mean reward!
---------------------------------
| rollout/           |          |
|    ep_len_mean   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 950         |
|    ep_rew_mean          | 166         |
| time/                   |             |
|    fps                  | 55          |
|    iterations           | 159         |
|    time_elapsed         | 2859        |
|    total_timesteps      | 159000      |
| train/                  |             |
|    approx_kl            | 0.083314694 |
|    clip_fraction        | 0.36        |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.23       |
|    explained_variance   | 0.908       |
|    learning_rate        | 0.00025     |
|    loss                 | 11.5        |
|    n_updates            | 1580        |
|    policy_gradient_loss | 0.0152      |
|    std                  | 0.99        |
|    value_loss           | 27.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 950          |
|    ep_rew_mean          | 192          |
| time/                   |              |
|    fps                  | 55           |
|    iterations           | 168          |
|    time_elapsed         | 3009         |
|    total_timesteps      | 168000       |
| train/                  |              |
|    approx_kl            | 0.0038651056 |
|    clip_fraction        | 0.129        |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.22        |
|    explained_variance   | 0.642        |
|    learning_rate        | 0.00025      |
|    loss                 | 3.91         |
|    n_updates            | 1670         |
|    policy_gradient_loss | -0.00278     |
|    std                  | 0.989        |
|    value_loss           | 9.19         |
------------------------------------------
-----------------------------------------
| rollout/  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 950         |
|    ep_rew_mean          | 216         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 177         |
|    time_elapsed         | 3159        |
|    total_timesteps      | 177000      |
| train/                  |             |
|    approx_kl            | 0.027692525 |
|    clip_fraction        | 0.0786      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.23       |
|    explained_variance   | 0.66        |
|    learning_rate        | 0.00025     |
|    loss                 | 3.98        |
|    n_updates            | 1760        |
|    policy_gradient_loss | 0.000138    |
|    std                  | 0.989       |
|    value_loss           | 6.73        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 950         |
|    ep_rew_mean          | 240         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 186         |
|    time_elapsed         | 3308        |
|    total_timesteps      | 186000      |
| train/                  |             |
|    approx_kl            | 0.038917616 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.22       |
|    explained_variance   | 0.465       |
|    learning_rate        | 0.00025     |
|    loss                 | 5.53        |
|    n_updates            | 1850        |
|    policy_gradient_loss | 0.0032      |
|    std                  | 0.989       |
|    value_loss           | 12.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 950          |
|    ep_rew_mean          | 280          |
| time/                   |              |
|    fps                  | 56           |
|    iterations           | 195          |
|    time_elapsed         | 3459         |
|    total_timesteps      | 195000       |
| train/                  |              |
|    approx_kl            | 0.0077149183 |
|    clip_fraction        | 0.111        |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.22        |
|    explained_variance   | 0.48         |
|    learning_rate        | 0.00025      |
|    loss                 | 4.01         |
|    n_updates            | 1940         |
|    policy_gradient_loss | -0.00291     |
|    std                  | 0.988        |
|    value_loss           | 9            |
------------------------------------------
----------------------------------------
| rollout/   

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 957        |
|    ep_rew_mean          | 326        |
| time/                   |            |
|    fps                  | 55         |
|    iterations           | 204        |
|    time_elapsed         | 3645       |
|    total_timesteps      | 204000     |
| train/                  |            |
|    approx_kl            | 0.03786489 |
|    clip_fraction        | 0.391      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.22      |
|    explained_variance   | 0.735      |
|    learning_rate        | 0.00025    |
|    loss                 | 18.1       |
|    n_updates            | 2030       |
|    policy_gradient_loss | 0.0264     |
|    std                  | 0.988      |
|    value_loss           | 42.6       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 973        |
|    ep_rew_mean          | 367        |
| time/                   |            |
|    fps                  | 56         |
|    iterations           | 213        |
|    time_elapsed         | 3795       |
|    total_timesteps      | 213000     |
| train/                  |            |
|    approx_kl            | 0.08016837 |
|    clip_fraction        | 0.324      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.21      |
|    explained_variance   | 0.707      |
|    learning_rate        | 0.00025    |
|    loss                 | 28.7       |
|    n_updates            | 2120       |
|    policy_gradient_loss | 0.0262     |
|    std                  | 0.985      |
|    value_loss           | 60.5       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 981         |
|    ep_rew_mean          | 408         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 222         |
|    time_elapsed         | 3945        |
|    total_timesteps      | 222000      |
| train/                  |             |
|    approx_kl            | 0.018562421 |
|    clip_fraction        | 0.295       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.21       |
|    explained_variance   | 0.979       |
|    learning_rate        | 0.00025     |
|    loss                 | 5.58        |
|    n_updates            | 2210        |
|    policy_gradient_loss | 0.0239      |
|    std                  | 0.984       |
|    value_loss           | 14.3        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 461         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 231         |
|    time_elapsed         | 4095        |
|    total_timesteps      | 231000      |
| train/                  |             |
|    approx_kl            | 0.036196355 |
|    clip_fraction        | 0.43        |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.21       |
|    explained_variance   | 0.575       |
|    learning_rate        | 0.00025     |
|    loss                 | 7.36        |
|    n_updates            | 2300        |
|    policy_gradient_loss | 0.0275      |
|    std                  | 0.983       |
|    value_loss           | 18.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 509         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 241         |
|    time_elapsed         | 4261        |
|    total_timesteps      | 241000      |
| train/                  |             |
|    approx_kl            | 0.013947659 |
|    clip_fraction        | 0.444       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.471       |
|    learning_rate        | 0.00025     |
|    loss                 | 4.76        |
|    n_updates            | 2400        |
|    policy_gradient_loss | 0.0294      |
|    std                  | 0.982       |
|    value_loss           | 10.5        |
-----------------------------------------
----------------------------------------
| rollout/                |        

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 525      |
| time/              |          |
|    fps             | 56       |
|    iterations      | 250      |
|    time_elapsed    | 4446     |
|    total_timesteps | 250000   |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 525        |
| time/                   |            |
|    fps                  | 56         |
|    iterations           | 251        |
|    time_elapsed         | 4463       |
|    total_timesteps      | 251000     |
| train/                  |            |
|    approx_kl            | 0.01972804 |
|    clip_fraction        | 0.203      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.856      |
|    learning_rate        | 0.00025    |
|   

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 996        |
|    ep_rew_mean          | 509        |
| time/                   |            |
|    fps                  | 56         |
|    iterations           | 260        |
|    time_elapsed         | 4612       |
|    total_timesteps      | 260000     |
| train/                  |            |
|    approx_kl            | 0.06345063 |
|    clip_fraction        | 0.321      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.885      |
|    learning_rate        | 0.00025    |
|    loss                 | 24.2       |
|    n_updates            | 2590       |
|    policy_gradient_loss | 0.0198     |
|    std                  | 0.982      |
|    value_loss           | 59.4       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 989       |
|    ep_rew_mean          | 484       |
| time/                   |           |
|    fps                  | 56        |
|    iterations           | 269       |
|    time_elapsed         | 4762      |
|    total_timesteps      | 269000    |
| train/                  |           |
|    approx_kl            | 0.1627238 |
|    clip_fraction        | 0.428     |
|    clip_range           | 0.2       |
|    entropy_loss         | -4.2      |
|    explained_variance   | 0.849     |
|    learning_rate        | 0.00025   |
|    loss                 | 37.6      |
|    n_updates            | 2680      |
|    policy_gradient_loss | 0.034     |
|    std                  | 0.981     |
|    value_loss           | 82.1      |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 989     

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 982       |
|    ep_rew_mean          | 462       |
| time/                   |           |
|    fps                  | 56        |
|    iterations           | 279       |
|    time_elapsed         | 4928      |
|    total_timesteps      | 279000    |
| train/                  |           |
|    approx_kl            | 0.1342328 |
|    clip_fraction        | 0.561     |
|    clip_range           | 0.2       |
|    entropy_loss         | -4.2      |
|    explained_variance   | 0.95      |
|    learning_rate        | 0.00025   |
|    loss                 | 18.6      |
|    n_updates            | 2780      |
|    policy_gradient_loss | 0.065     |
|    std                  | 0.981     |
|    value_loss           | 40.7      |
---------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 982       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 982        |
|    ep_rew_mean          | 455        |
| time/                   |            |
|    fps                  | 56         |
|    iterations           | 289        |
|    time_elapsed         | 5093       |
|    total_timesteps      | 289000     |
| train/                  |            |
|    approx_kl            | 0.06363917 |
|    clip_fraction        | 0.233      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.814      |
|    learning_rate        | 0.00025    |
|    loss                 | 32.9       |
|    n_updates            | 2880       |
|    policy_gradient_loss | 0.00678    |
|    std                  | 0.981      |
|    value_loss           | 75.3       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 976         |
|    ep_rew_mean          | 464         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 298         |
|    time_elapsed         | 5242        |
|    total_timesteps      | 298000      |
| train/                  |             |
|    approx_kl            | 0.090503916 |
|    clip_fraction        | 0.324       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.897       |
|    learning_rate        | 0.00025     |
|    loss                 | 55.2        |
|    n_updates            | 2970        |
|    policy_gradient_loss | 0.0152      |
|    std                  | 0.98        |
|    value_loss           | 128         |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 974        |
|    ep_rew_mean          | 472        |
| time/                   |            |
|    fps                  | 56         |
|    iterations           | 307        |
|    time_elapsed         | 5425       |
|    total_timesteps      | 307000     |
| train/                  |            |
|    approx_kl            | 0.02965652 |
|    clip_fraction        | 0.219      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.468      |
|    learning_rate        | 0.00025    |
|    loss                 | 30         |
|    n_updates            | 3060       |
|    policy_gradient_loss | 0.00384    |
|    std                  | 0.98       |
|    value_loss           | 77.5       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 974         |
|    ep_rew_mean          | 491         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 317         |
|    time_elapsed         | 5591        |
|    total_timesteps      | 317000      |
| train/                  |             |
|    approx_kl            | 0.028612724 |
|    clip_fraction        | 0.32        |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.19       |
|    explained_variance   | 0.942       |
|    learning_rate        | 0.00025     |
|    loss                 | 13.3        |
|    n_updates            | 3160        |
|    policy_gradient_loss | 0.0175      |
|    std                  | 0.979       |
|    value_loss           | 32.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 974        |
|    ep_rew_mean          | 494        |
| time/                   |            |
|    fps                  | 56         |
|    iterations           | 326        |
|    time_elapsed         | 5741       |
|    total_timesteps      | 326000     |
| train/                  |            |
|    approx_kl            | 0.03635069 |
|    clip_fraction        | 0.354      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.847      |
|    learning_rate        | 0.00025    |
|    loss                 | 39.6       |
|    n_updates            | 3250       |
|    policy_gradient_loss | 0.0183     |
|    std                  | 0.977      |
|    value_loss           | 95.7       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 974         |
|    ep_rew_mean          | 489         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 336         |
|    time_elapsed         | 5907        |
|    total_timesteps      | 336000      |
| train/                  |             |
|    approx_kl            | 0.078407526 |
|    clip_fraction        | 0.51        |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.19       |
|    explained_variance   | 0.662       |
|    learning_rate        | 0.00025     |
|    loss                 | 41.5        |
|    n_updates            | 3350        |
|    policy_gradient_loss | 0.0556      |
|    std                  | 0.977       |
|    value_loss           | 85.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 974        |
|    ep_rew_mean          | 485        |
| time/                   |            |
|    fps                  | 56         |
|    iterations           | 345        |
|    time_elapsed         | 6056       |
|    total_timesteps      | 345000     |
| train/                  |            |
|    approx_kl            | 0.04441912 |
|    clip_fraction        | 0.164      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.769      |
|    learning_rate        | 0.00025    |
|    loss                 | 31.6       |
|    n_updates            | 3440       |
|    policy_gradient_loss | 0.00659    |
|    std                  | 0.977      |
|    value_loss           | 79.8       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 978         |
|    ep_rew_mean          | 497         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 354         |
|    time_elapsed         | 6242        |
|    total_timesteps      | 354000      |
| train/                  |             |
|    approx_kl            | 0.062288437 |
|    clip_fraction        | 0.308       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.862       |
|    learning_rate        | 0.00025     |
|    loss                 | 36.5        |
|    n_updates            | 3530        |
|    policy_gradient_loss | 0.0157      |
|    std                  | 0.976       |
|    value_loss           | 86.7        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 978         |
|    ep_rew_mean          | 505         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 363         |
|    time_elapsed         | 6391        |
|    total_timesteps      | 363000      |
| train/                  |             |
|    approx_kl            | 0.030581675 |
|    clip_fraction        | 0.236       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.84        |
|    learning_rate        | 0.00025     |
|    loss                 | 33.4        |
|    n_updates            | 3620        |
|    policy_gradient_loss | 0.00228     |
|    std                  | 0.976       |
|    value_loss           | 75.1        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 984         |
|    ep_rew_mean          | 523         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 372         |
|    time_elapsed         | 6540        |
|    total_timesteps      | 372000      |
| train/                  |             |
|    approx_kl            | 0.043835167 |
|    clip_fraction        | 0.319       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.929       |
|    learning_rate        | 0.00025     |
|    loss                 | 18.4        |
|    n_updates            | 3710        |
|    policy_gradient_loss | 0.0218      |
|    std                  | 0.976       |
|    value_loss           | 41.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 991         |
|    ep_rew_mean          | 531         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 382         |
|    time_elapsed         | 6706        |
|    total_timesteps      | 382000      |
| train/                  |             |
|    approx_kl            | 0.034156676 |
|    clip_fraction        | 0.24        |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.813       |
|    learning_rate        | 0.00025     |
|    loss                 | 50.8        |
|    n_updates            | 3810        |
|    policy_gradient_loss | 0.0128      |
|    std                  | 0.976       |
|    value_loss           | 119         |
-----------------------------------------
---------------------------------------
| rollout/                |         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 996         |
|    ep_rew_mean          | 524         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 391         |
|    time_elapsed         | 6855        |
|    total_timesteps      | 391000      |
| train/                  |             |
|    approx_kl            | 0.021719063 |
|    clip_fraction        | 0.239       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.91        |
|    learning_rate        | 0.00025     |
|    loss                 | 24.2        |
|    n_updates            | 3900        |
|    policy_gradient_loss | 0.00659     |
|    std                  | 0.976       |
|    value_loss           | 60.3        |
-----------------------------------------
----------------------------------------
| rollout/                |        

Eval num_timesteps=400000, episode_reward=117.12 +/- 135.04
Episode length: 826.80 +/- 235.16
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 827         |
|    mean_reward          | 117         |
| time/                   |             |
|    total_timesteps      | 400000      |
| train/                  |             |
|    approx_kl            | 0.022949532 |
|    clip_fraction        | 0.242       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.961       |
|    learning_rate        | 0.00025     |
|    loss                 | 19.3        |
|    n_updates            | 3990        |
|    policy_gradient_loss | 0.00889     |
|    std                  | 0.975       |
|    value_loss           | 43.2        |
-----------------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 995      |
|    e

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 993         |
|    ep_rew_mean          | 502         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 409         |
|    time_elapsed         | 7182        |
|    total_timesteps      | 409000      |
| train/                  |             |
|    approx_kl            | 0.062870465 |
|    clip_fraction        | 0.45        |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.872       |
|    learning_rate        | 0.00025     |
|    loss                 | 52.2        |
|    n_updates            | 4080        |
|    policy_gradient_loss | 0.038       |
|    std                  | 0.975       |
|    value_loss           | 116         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 993         |
|    ep_rew_mean          | 491         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 418         |
|    time_elapsed         | 7330        |
|    total_timesteps      | 418000      |
| train/                  |             |
|    approx_kl            | 0.059564322 |
|    clip_fraction        | 0.366       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.818       |
|    learning_rate        | 0.00025     |
|    loss                 | 55.3        |
|    n_updates            | 4170        |
|    policy_gradient_loss | 0.0245      |
|    std                  | 0.975       |
|    value_loss           | 121         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 994         |
|    ep_rew_mean          | 483         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 427         |
|    time_elapsed         | 7479        |
|    total_timesteps      | 427000      |
| train/                  |             |
|    approx_kl            | 0.048515055 |
|    clip_fraction        | 0.206       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.882       |
|    learning_rate        | 0.00025     |
|    loss                 | 38.4        |
|    n_updates            | 4260        |
|    policy_gradient_loss | 0.00627     |
|    std                  | 0.975       |
|    value_loss           | 90.2        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 994         |
|    ep_rew_mean          | 477         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 436         |
|    time_elapsed         | 7627        |
|    total_timesteps      | 436000      |
| train/                  |             |
|    approx_kl            | 0.008592274 |
|    clip_fraction        | 0.152       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.9         |
|    learning_rate        | 0.00025     |
|    loss                 | 27.8        |
|    n_updates            | 4350        |
|    policy_gradient_loss | 0.00637     |
|    std                  | 0.975       |
|    value_loss           | 66.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 994         |
|    ep_rew_mean          | 493         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 445         |
|    time_elapsed         | 7776        |
|    total_timesteps      | 445000      |
| train/                  |             |
|    approx_kl            | 0.061626345 |
|    clip_fraction        | 0.311       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.787       |
|    learning_rate        | 0.00025     |
|    loss                 | 44.2        |
|    n_updates            | 4440        |
|    policy_gradient_loss | 0.0232      |
|    std                  | 0.974       |
|    value_loss           | 101         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 994         |
|    ep_rew_mean          | 515         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 454         |
|    time_elapsed         | 7959        |
|    total_timesteps      | 454000      |
| train/                  |             |
|    approx_kl            | 0.015303755 |
|    clip_fraction        | 0.179       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.17       |
|    explained_variance   | 0.739       |
|    learning_rate        | 0.00025     |
|    loss                 | 7.07        |
|    n_updates            | 4530        |
|    policy_gradient_loss | -0.00218    |
|    std                  | 0.973       |
|    value_loss           | 16.8        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 994        |
|    ep_rew_mean          | 527        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 463        |
|    time_elapsed         | 8108       |
|    total_timesteps      | 463000     |
| train/                  |            |
|    approx_kl            | 0.02436515 |
|    clip_fraction        | 0.333      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.17      |
|    explained_variance   | 0.926      |
|    learning_rate        | 0.00025    |
|    loss                 | 5.67       |
|    n_updates            | 4620       |
|    policy_gradient_loss | 0.0235     |
|    std                  | 0.97       |
|    value_loss           | 13         |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 995        |
|    ep_rew_mean          | 544        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 472        |
|    time_elapsed         | 8256       |
|    total_timesteps      | 472000     |
| train/                  |            |
|    approx_kl            | 0.47757262 |
|    clip_fraction        | 0.507      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.18      |
|    explained_variance   | 0.615      |
|    learning_rate        | 0.00025    |
|    loss                 | 7.29       |
|    n_updates            | 4710       |
|    policy_gradient_loss | 0.0782     |
|    std                  | 0.974      |
|    value_loss           | 14         |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 995        |
|    ep_rew_mean          | 531        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 481        |
|    time_elapsed         | 8404       |
|    total_timesteps      | 481000     |
| train/                  |            |
|    approx_kl            | 0.25616485 |
|    clip_fraction        | 0.358      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.17      |
|    explained_variance   | 0.185      |
|    learning_rate        | 0.00025    |
|    loss                 | 5.62       |
|    n_updates            | 4800       |
|    policy_gradient_loss | 0.044      |
|    std                  | 0.971      |
|    value_loss           | 20.2       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 990         |
|    ep_rew_mean          | 489         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 490         |
|    time_elapsed         | 8552        |
|    total_timesteps      | 490000      |
| train/                  |             |
|    approx_kl            | 0.010415409 |
|    clip_fraction        | 0.192       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.798       |
|    learning_rate        | 0.00025     |
|    loss                 | 1.08        |
|    n_updates            | 4890        |
|    policy_gradient_loss | 0.00137     |
|    std                  | 0.973       |
|    value_loss           | 2.66        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 994         |
|    ep_rew_mean          | 445         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 499         |
|    time_elapsed         | 8700        |
|    total_timesteps      | 499000      |
| train/                  |             |
|    approx_kl            | 0.025901385 |
|    clip_fraction        | 0.179       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.18       |
|    explained_variance   | 0.828       |
|    learning_rate        | 0.00025     |
|    loss                 | 1.39        |
|    n_updates            | 4980        |
|    policy_gradient_loss | 0.0029      |
|    std                  | 0.974       |
|    value_loss           | 3.52        |
-----------------------------------------
Eval num_timesteps=500000, episode_reward=-34.52 +/- 5.55
Episode length: 10

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 981       |
|    ep_rew_mean          | 403       |
| time/                   |           |
|    fps                  | 57        |
|    iterations           | 508       |
|    time_elapsed         | 8883      |
|    total_timesteps      | 508000    |
| train/                  |           |
|    approx_kl            | 0.5535114 |
|    clip_fraction        | 0.412     |
|    clip_range           | 0.2       |
|    entropy_loss         | -4.18     |
|    explained_variance   | 0.769     |
|    learning_rate        | 0.00025   |
|    loss                 | 41.2      |
|    n_updates            | 5070      |
|    policy_gradient_loss | 0.0726    |
|    std                  | 0.973     |
|    value_loss           | 125       |
---------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 973       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 973        |
|    ep_rew_mean          | 383        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 518        |
|    time_elapsed         | 9047       |
|    total_timesteps      | 518000     |
| train/                  |            |
|    approx_kl            | 0.26482767 |
|    clip_fraction        | 0.41       |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.18      |
|    explained_variance   | 0.951      |
|    learning_rate        | 0.00025    |
|    loss                 | 8.59       |
|    n_updates            | 5170       |
|    policy_gradient_loss | 0.0154     |
|    std                  | 0.975      |
|    value_loss           | 22.5       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 966        |
|    ep_rew_mean          | 366        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 528        |
|    time_elapsed         | 9212       |
|    total_timesteps      | 528000     |
| train/                  |            |
|    approx_kl            | 0.09585554 |
|    clip_fraction        | 0.453      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.18      |
|    explained_variance   | 0.865      |
|    learning_rate        | 0.00025    |
|    loss                 | 52         |
|    n_updates            | 5270       |
|    policy_gradient_loss | 0.0372     |
|    std                  | 0.976      |
|    value_loss           | 119        |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 966        |
|    ep_rew_mean          | 345        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 538        |
|    time_elapsed         | 9376       |
|    total_timesteps      | 538000     |
| train/                  |            |
|    approx_kl            | 0.04698991 |
|    clip_fraction        | 0.431      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.865      |
|    learning_rate        | 0.00025    |
|    loss                 | 68.5       |
|    n_updates            | 5370       |
|    policy_gradient_loss | 0.0544     |
|    std                  | 0.978      |
|    value_loss           | 156        |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 966        |
|    ep_rew_mean          | 309        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 548        |
|    time_elapsed         | 9541       |
|    total_timesteps      | 548000     |
| train/                  |            |
|    approx_kl            | 0.08874175 |
|    clip_fraction        | 0.483      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.842      |
|    learning_rate        | 0.00025    |
|    loss                 | 62.7       |
|    n_updates            | 5470       |
|    policy_gradient_loss | 0.0632     |
|    std                  | 0.978      |
|    value_loss           | 138        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 966        |
|    ep_rew_mean          | 295        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 557        |
|    time_elapsed         | 9724       |
|    total_timesteps      | 557000     |
| train/                  |            |
|    approx_kl            | 0.06423821 |
|    clip_fraction        | 0.472      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.901      |
|    learning_rate        | 0.00025    |
|    loss                 | 35.5       |
|    n_updates            | 5560       |
|    policy_gradient_loss | 0.0624     |
|    std                  | 0.979      |
|    value_loss           | 83         |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 966        |
|    ep_rew_mean          | 272        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 567        |
|    time_elapsed         | 9890       |
|    total_timesteps      | 567000     |
| train/                  |            |
|    approx_kl            | 0.07779727 |
|    clip_fraction        | 0.325      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.969      |
|    learning_rate        | 0.00025    |
|    loss                 | 12.6       |
|    n_updates            | 5660       |
|    policy_gradient_loss | 0.0147     |
|    std                  | 0.979      |
|    value_loss           | 35.1       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 959        |
|    ep_rew_mean          | 282        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 577        |
|    time_elapsed         | 10055      |
|    total_timesteps      | 577000     |
| train/                  |            |
|    approx_kl            | 0.03575795 |
|    clip_fraction        | 0.43       |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.944      |
|    learning_rate        | 0.00025    |
|    loss                 | 21.1       |
|    n_updates            | 5760       |
|    policy_gradient_loss | 0.0433     |
|    std                  | 0.979      |
|    value_loss           | 55.1       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 945        |
|    ep_rew_mean          | 338        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 587        |
|    time_elapsed         | 10220      |
|    total_timesteps      | 587000     |
| train/                  |            |
|    approx_kl            | 0.16394407 |
|    clip_fraction        | 0.5        |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.962      |
|    learning_rate        | 0.00025    |
|    loss                 | 23.1       |
|    n_updates            | 5860       |
|    policy_gradient_loss | 0.0365     |
|    std                  | 0.978      |
|    value_loss           | 59.9       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 936         |
|    ep_rew_mean          | 384         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 597         |
|    time_elapsed         | 10386       |
|    total_timesteps      | 597000      |
| train/                  |             |
|    approx_kl            | 0.060628604 |
|    clip_fraction        | 0.522       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.19       |
|    explained_variance   | 0.85        |
|    learning_rate        | 0.00025     |
|    loss                 | 55.8        |
|    n_updates            | 5960        |
|    policy_gradient_loss | 0.0889      |
|    std                  | 0.979       |
|    value_loss           | 123         |
-----------------------------------------
---------------------------------------
| rollout/                |         

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 946        |
|    ep_rew_mean          | 406        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 606        |
|    time_elapsed         | 10564      |
|    total_timesteps      | 606000     |
| train/                  |            |
|    approx_kl            | 0.33992758 |
|    clip_fraction        | 0.473      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.884      |
|    learning_rate        | 0.00025    |
|    loss                 | 35.8       |
|    n_updates            | 6050       |
|    policy_gradient_loss | 0.0627     |
|    std                  | 0.98       |
|    value_loss           | 81.7       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 946      |
|    ep_rew_mean          | 445      |
| time/                   |          |
|    fps                  | 57       |
|    iterations           | 615      |
|    time_elapsed         | 10714    |
|    total_timesteps      | 615000   |
| train/                  |          |
|    approx_kl            | 0.069574 |
|    clip_fraction        | 0.316    |
|    clip_range           | 0.2      |
|    entropy_loss         | -4.19    |
|    explained_variance   | 0.694    |
|    learning_rate        | 0.00025  |
|    loss                 | 45.4     |
|    n_updates            | 6140     |
|    policy_gradient_loss | 0.0267   |
|    std                  | 0.98     |
|    value_loss           | 96.8     |
--------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 946         |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 947        |
|    ep_rew_mean          | 474        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 625        |
|    time_elapsed         | 10880      |
|    total_timesteps      | 625000     |
| train/                  |            |
|    approx_kl            | 0.07473934 |
|    clip_fraction        | 0.383      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.939      |
|    learning_rate        | 0.00025    |
|    loss                 | 21.5       |
|    n_updates            | 6240       |
|    policy_gradient_loss | 0.0362     |
|    std                  | 0.979      |
|    value_loss           | 57.8       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 942        |
|    ep_rew_mean          | 496        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 634        |
|    time_elapsed         | 11030      |
|    total_timesteps      | 634000     |
| train/                  |            |
|    approx_kl            | 0.19934256 |
|    clip_fraction        | 0.328      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.905      |
|    learning_rate        | 0.00025    |
|    loss                 | 25         |
|    n_updates            | 6330       |
|    policy_gradient_loss | 0.0291     |
|    std                  | 0.98       |
|    value_loss           | 54.3       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 927        |
|    ep_rew_mean          | 511        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 643        |
|    time_elapsed         | 11180      |
|    total_timesteps      | 643000     |
| train/                  |            |
|    approx_kl            | 0.06185115 |
|    clip_fraction        | 0.271      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.939      |
|    learning_rate        | 0.00025    |
|    loss                 | 32         |
|    n_updates            | 6420       |
|    policy_gradient_loss | 0.014      |
|    std                  | 0.979      |
|    value_loss           | 81.3       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 927         |
|    ep_rew_mean          | 534         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 652         |
|    time_elapsed         | 11360       |
|    total_timesteps      | 652000      |
| train/                  |             |
|    approx_kl            | 0.023286887 |
|    clip_fraction        | 0.299       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.19       |
|    explained_variance   | 0.74        |
|    learning_rate        | 0.00025     |
|    loss                 | 32.1        |
|    n_updates            | 6510        |
|    policy_gradient_loss | 0.00955     |
|    std                  | 0.979       |
|    value_loss           | 65.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 927        |
|    ep_rew_mean          | 549        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 661        |
|    time_elapsed         | 11510      |
|    total_timesteps      | 661000     |
| train/                  |            |
|    approx_kl            | 0.11882383 |
|    clip_fraction        | 0.361      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.84       |
|    learning_rate        | 0.00025    |
|    loss                 | 16.6       |
|    n_updates            | 6600       |
|    policy_gradient_loss | 0.0333     |
|    std                  | 0.979      |
|    value_loss           | 49.6       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 933         |
|    ep_rew_mean          | 568         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 671         |
|    time_elapsed         | 11676       |
|    total_timesteps      | 671000      |
| train/                  |             |
|    approx_kl            | 0.025543842 |
|    clip_fraction        | 0.369       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.19       |
|    explained_variance   | 0.921       |
|    learning_rate        | 0.00025     |
|    loss                 | 40          |
|    n_updates            | 6700        |
|    policy_gradient_loss | 0.0336      |
|    std                  | 0.979       |
|    value_loss           | 95.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 949        |
|    ep_rew_mean          | 578        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 681        |
|    time_elapsed         | 11842      |
|    total_timesteps      | 681000     |
| train/                  |            |
|    approx_kl            | 0.09156466 |
|    clip_fraction        | 0.42       |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.859      |
|    learning_rate        | 0.00025    |
|    loss                 | 39.8       |
|    n_updates            | 6800       |
|    policy_gradient_loss | 0.0426     |
|    std                  | 0.979      |
|    value_loss           | 93.9       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 964        |
|    ep_rew_mean          | 591        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 691        |
|    time_elapsed         | 12006      |
|    total_timesteps      | 691000     |
| train/                  |            |
|    approx_kl            | 0.14475498 |
|    clip_fraction        | 0.364      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.19      |
|    explained_variance   | 0.889      |
|    learning_rate        | 0.00025    |
|    loss                 | 32.9       |
|    n_updates            | 6900       |
|    policy_gradient_loss | 0.0402     |
|    std                  | 0.98       |
|    value_loss           | 79.8       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 964      |
|    ep_rew_mean     | 589      |
| time/              |          |
|    fps             | 57       |
|    iterations      | 700      |
|    time_elapsed    | 12188    |
|    total_timesteps | 700000   |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 969        |
|    ep_rew_mean          | 593        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 701        |
|    time_elapsed         | 12204      |
|    total_timesteps      | 701000     |
| train/                  |            |
|    approx_kl            | 0.03544883 |
|    clip_fraction        | 0.33       |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.898      |
|    learning_rate        | 0.00025    |
|   

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 966      |
|    ep_rew_mean          | 581      |
| time/                   |          |
|    fps                  | 57       |
|    iterations           | 710      |
|    time_elapsed         | 12352    |
|    total_timesteps      | 710000   |
| train/                  |          |
|    approx_kl            | 0.093929 |
|    clip_fraction        | 0.544    |
|    clip_range           | 0.2      |
|    entropy_loss         | -4.2     |
|    explained_variance   | 0.912    |
|    learning_rate        | 0.00025  |
|    loss                 | 55.6     |
|    n_updates            | 7090     |
|    policy_gradient_loss | 0.0804   |
|    std                  | 0.983    |
|    value_loss           | 150      |
--------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 966         |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 970         |
|    ep_rew_mean          | 575         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 720         |
|    time_elapsed         | 12517       |
|    total_timesteps      | 720000      |
| train/                  |             |
|    approx_kl            | 0.015763937 |
|    clip_fraction        | 0.329       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.916       |
|    learning_rate        | 0.00025     |
|    loss                 | 31          |
|    n_updates            | 7190        |
|    policy_gradient_loss | 0.05        |
|    std                  | 0.983       |
|    value_loss           | 70.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 977         |
|    ep_rew_mean          | 579         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 730         |
|    time_elapsed         | 12682       |
|    total_timesteps      | 730000      |
| train/                  |             |
|    approx_kl            | 0.053863503 |
|    clip_fraction        | 0.553       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.21       |
|    explained_variance   | 0.883       |
|    learning_rate        | 0.00025     |
|    loss                 | 36.8        |
|    n_updates            | 7290        |
|    policy_gradient_loss | 0.0801      |
|    std                  | 0.984       |
|    value_loss           | 78.7        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 985         |
|    ep_rew_mean          | 584         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 739         |
|    time_elapsed         | 12830       |
|    total_timesteps      | 739000      |
| train/                  |             |
|    approx_kl            | 0.052607663 |
|    clip_fraction        | 0.453       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.21       |
|    explained_variance   | 0.856       |
|    learning_rate        | 0.00025     |
|    loss                 | 41.9        |
|    n_updates            | 7380        |
|    policy_gradient_loss | 0.0442      |
|    std                  | 0.984       |
|    value_loss           | 77.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 990        |
|    ep_rew_mean          | 599        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 748        |
|    time_elapsed         | 12978      |
|    total_timesteps      | 748000     |
| train/                  |            |
|    approx_kl            | 0.03641615 |
|    clip_fraction        | 0.158      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.21      |
|    explained_variance   | 0.606      |
|    learning_rate        | 0.00025    |
|    loss                 | 12.6       |
|    n_updates            | 7470       |
|    policy_gradient_loss | 0.00588    |
|    std                  | 0.985      |
|    value_loss           | 33.9       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 988         |
|    ep_rew_mean          | 605         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 757         |
|    time_elapsed         | 13162       |
|    total_timesteps      | 757000      |
| train/                  |             |
|    approx_kl            | 0.019772602 |
|    clip_fraction        | 0.316       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.718       |
|    learning_rate        | 0.00025     |
|    loss                 | 20.4        |
|    n_updates            | 7560        |
|    policy_gradient_loss | 0.0168      |
|    std                  | 0.983       |
|    value_loss           | 46.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 987        |
|    ep_rew_mean          | 606        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 766        |
|    time_elapsed         | 13310      |
|    total_timesteps      | 766000     |
| train/                  |            |
|    approx_kl            | 0.08240336 |
|    clip_fraction        | 0.476      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.839      |
|    learning_rate        | 0.00025    |
|    loss                 | 38.1       |
|    n_updates            | 7650       |
|    policy_gradient_loss | 0.0533     |
|    std                  | 0.983      |
|    value_loss           | 91.9       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 991         |
|    ep_rew_mean          | 627         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 775         |
|    time_elapsed         | 13458       |
|    total_timesteps      | 775000      |
| train/                  |             |
|    approx_kl            | 0.022248836 |
|    clip_fraction        | 0.178       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.897       |
|    learning_rate        | 0.00025     |
|    loss                 | 23.2        |
|    n_updates            | 7740        |
|    policy_gradient_loss | 0.0075      |
|    std                  | 0.982       |
|    value_loss           | 56.5        |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 991        |
|    ep_rew_mean          | 636        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 784        |
|    time_elapsed         | 13607      |
|    total_timesteps      | 784000     |
| train/                  |            |
|    approx_kl            | 0.12295924 |
|    clip_fraction        | 0.338      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.906      |
|    learning_rate        | 0.00025    |
|    loss                 | 32.8       |
|    n_updates            | 7830       |
|    policy_gradient_loss | 0.0231     |
|    std                  | 0.982      |
|    value_loss           | 75.5       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 991        |
|    ep_rew_mean          | 647        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 794        |
|    time_elapsed         | 13771      |
|    total_timesteps      | 794000     |
| train/                  |            |
|    approx_kl            | 0.08448947 |
|    clip_fraction        | 0.288      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.946      |
|    learning_rate        | 0.00025    |
|    loss                 | 19.5       |
|    n_updates            | 7930       |
|    policy_gradient_loss | 0.0167     |
|    std                  | 0.983      |
|    value_loss           | 51.3       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 991         |
|    ep_rew_mean          | 658         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 803         |
|    time_elapsed         | 13954       |
|    total_timesteps      | 803000      |
| train/                  |             |
|    approx_kl            | 0.036002785 |
|    clip_fraction        | 0.218       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.879       |
|    learning_rate        | 0.00025     |
|    loss                 | 36.2        |
|    n_updates            | 8020        |
|    policy_gradient_loss | 0.0191      |
|    std                  | 0.984       |
|    value_loss           | 85.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 995         |
|    ep_rew_mean          | 680         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 813         |
|    time_elapsed         | 14119       |
|    total_timesteps      | 813000      |
| train/                  |             |
|    approx_kl            | 0.021637166 |
|    clip_fraction        | 0.221       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.918       |
|    learning_rate        | 0.00025     |
|    loss                 | 24.7        |
|    n_updates            | 8120        |
|    policy_gradient_loss | 0.00345     |
|    std                  | 0.984       |
|    value_loss           | 54.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 995        |
|    ep_rew_mean          | 680        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 822        |
|    time_elapsed         | 14267      |
|    total_timesteps      | 822000     |
| train/                  |            |
|    approx_kl            | 0.05492637 |
|    clip_fraction        | 0.434      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.91       |
|    learning_rate        | 0.00025    |
|    loss                 | 34.7       |
|    n_updates            | 8210       |
|    policy_gradient_loss | 0.0312     |
|    std                  | 0.983      |
|    value_loss           | 81.8       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 995         |
|    ep_rew_mean          | 683         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 831         |
|    time_elapsed         | 14415       |
|    total_timesteps      | 831000      |
| train/                  |             |
|    approx_kl            | 0.050647154 |
|    clip_fraction        | 0.297       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.935       |
|    learning_rate        | 0.00025     |
|    loss                 | 24.8        |
|    n_updates            | 8300        |
|    policy_gradient_loss | 0.0228      |
|    std                  | 0.984       |
|    value_loss           | 61.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 995         |
|    ep_rew_mean          | 686         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 840         |
|    time_elapsed         | 14563       |
|    total_timesteps      | 840000      |
| train/                  |             |
|    approx_kl            | 0.037557576 |
|    clip_fraction        | 0.238       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.896       |
|    learning_rate        | 0.00025     |
|    loss                 | 22.2        |
|    n_updates            | 8390        |
|    policy_gradient_loss | 0.0112      |
|    std                  | 0.982       |
|    value_loss           | 48.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 998        |
|    ep_rew_mean          | 680        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 849        |
|    time_elapsed         | 14711      |
|    total_timesteps      | 849000     |
| train/                  |            |
|    approx_kl            | 0.09628395 |
|    clip_fraction        | 0.426      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.21      |
|    explained_variance   | 0.933      |
|    learning_rate        | 0.00025    |
|    loss                 | 30         |
|    n_updates            | 8480       |
|    policy_gradient_loss | 0.0358     |
|    std                  | 0.985      |
|    value_loss           | 75.5       |
----------------------------------------
Eval num_timesteps=850000, episode_reward=725.22 +/- 92.32
Episode length: 1000.00 +/- 0.00
------

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 680        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 858        |
|    time_elapsed         | 14894      |
|    total_timesteps      | 858000     |
| train/                  |            |
|    approx_kl            | 0.14591074 |
|    clip_fraction        | 0.432      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.948      |
|    learning_rate        | 0.00025    |
|    loss                 | 20.8       |
|    n_updates            | 8570       |
|    policy_gradient_loss | 0.0719     |
|    std                  | 0.985      |
|    value_loss           | 47.1       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 675        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 868        |
|    time_elapsed         | 15059      |
|    total_timesteps      | 868000     |
| train/                  |            |
|    approx_kl            | 0.06211298 |
|    clip_fraction        | 0.369      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.898      |
|    learning_rate        | 0.00025    |
|    loss                 | 54         |
|    n_updates            | 8670       |
|    policy_gradient_loss | 0.0302     |
|    std                  | 0.984      |
|    value_loss           | 110        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 998         |
|    ep_rew_mean          | 680         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 877         |
|    time_elapsed         | 15207       |
|    total_timesteps      | 877000      |
| train/                  |             |
|    approx_kl            | 0.032301184 |
|    clip_fraction        | 0.329       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.914       |
|    learning_rate        | 0.00025     |
|    loss                 | 38.7        |
|    n_updates            | 8760        |
|    policy_gradient_loss | 0.0289      |
|    std                  | 0.984       |
|    value_loss           | 68.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 998        |
|    ep_rew_mean          | 701        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 887        |
|    time_elapsed         | 15371      |
|    total_timesteps      | 887000     |
| train/                  |            |
|    approx_kl            | 0.09827158 |
|    clip_fraction        | 0.487      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.956      |
|    learning_rate        | 0.00025    |
|    loss                 | 28.7       |
|    n_updates            | 8860       |
|    policy_gradient_loss | 0.0352     |
|    std                  | 0.985      |
|    value_loss           | 83.9       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 998         |
|    ep_rew_mean          | 714         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 897         |
|    time_elapsed         | 15536       |
|    total_timesteps      | 897000      |
| train/                  |             |
|    approx_kl            | 0.029986208 |
|    clip_fraction        | 0.302       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.951       |
|    learning_rate        | 0.00025     |
|    loss                 | 19.3        |
|    n_updates            | 8960        |
|    policy_gradient_loss | 0.00805     |
|    std                  | 0.984       |
|    value_loss           | 45.1        |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 998        |
|    ep_rew_mean          | 721        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 906        |
|    time_elapsed         | 15719      |
|    total_timesteps      | 906000     |
| train/                  |            |
|    approx_kl            | 0.03188168 |
|    clip_fraction        | 0.255      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.912      |
|    learning_rate        | 0.00025    |
|    loss                 | 16.6       |
|    n_updates            | 9050       |
|    policy_gradient_loss | 0.0158     |
|    std                  | 0.983      |
|    value_loss           | 40.7       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 998         |
|    ep_rew_mean          | 732         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 915         |
|    time_elapsed         | 15868       |
|    total_timesteps      | 915000      |
| train/                  |             |
|    approx_kl            | 0.041645564 |
|    clip_fraction        | 0.344       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.2        |
|    explained_variance   | 0.937       |
|    learning_rate        | 0.00025     |
|    loss                 | 19          |
|    n_updates            | 9140        |
|    policy_gradient_loss | 0.0221      |
|    std                  | 0.984       |
|    value_loss           | 48.6        |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 994        |
|    ep_rew_mean          | 739        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 925        |
|    time_elapsed         | 16034      |
|    total_timesteps      | 925000     |
| train/                  |            |
|    approx_kl            | 0.44571435 |
|    clip_fraction        | 0.675      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.2       |
|    explained_variance   | 0.787      |
|    learning_rate        | 0.00025    |
|    loss                 | 31.5       |
|    n_updates            | 9240       |
|    policy_gradient_loss | 0.141      |
|    std                  | 0.984      |
|    value_loss           | 81.5       |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 989       |
|    ep_rew_mean          | 672       |
| time/                   |           |
|    fps                  | 57        |
|    iterations           | 935       |
|    time_elapsed         | 16199     |
|    total_timesteps      | 935000    |
| train/                  |           |
|    approx_kl            | 0.3299012 |
|    clip_fraction        | 0.7       |
|    clip_range           | 0.2       |
|    entropy_loss         | -4.21     |
|    explained_variance   | 0.914     |
|    learning_rate        | 0.00025   |
|    loss                 | 14        |
|    n_updates            | 9340      |
|    policy_gradient_loss | 0.227     |
|    std                  | 0.986     |
|    value_loss           | 34.4      |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 986     

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 984        |
|    ep_rew_mean          | 675        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 945        |
|    time_elapsed         | 16364      |
|    total_timesteps      | 945000     |
| train/                  |            |
|    approx_kl            | 0.04295652 |
|    clip_fraction        | 0.364      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.21      |
|    explained_variance   | 0.449      |
|    learning_rate        | 0.00025    |
|    loss                 | 31.6       |
|    n_updates            | 9440       |
|    policy_gradient_loss | 0.0193     |
|    std                  | 0.988      |
|    value_loss           | 65.7       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 982         |
|    ep_rew_mean          | 683         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 954         |
|    time_elapsed         | 16547       |
|    total_timesteps      | 954000      |
| train/                  |             |
|    approx_kl            | 0.049845118 |
|    clip_fraction        | 0.316       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.22       |
|    explained_variance   | 0.787       |
|    learning_rate        | 0.00025     |
|    loss                 | 39.2        |
|    n_updates            | 9530        |
|    policy_gradient_loss | 0.00998     |
|    std                  | 0.99        |
|    value_loss           | 90.2        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 982         |
|    ep_rew_mean          | 698         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 963         |
|    time_elapsed         | 16695       |
|    total_timesteps      | 963000      |
| train/                  |             |
|    approx_kl            | 0.014922332 |
|    clip_fraction        | 0.222       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.22       |
|    explained_variance   | 0.773       |
|    learning_rate        | 0.00025     |
|    loss                 | 41.3        |
|    n_updates            | 9620        |
|    policy_gradient_loss | 0.0064      |
|    std                  | 0.99        |
|    value_loss           | 104         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 985        |
|    ep_rew_mean          | 692        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 973        |
|    time_elapsed         | 16860      |
|    total_timesteps      | 973000     |
| train/                  |            |
|    approx_kl            | 0.57599086 |
|    clip_fraction        | 0.451      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.22      |
|    explained_variance   | 0.913      |
|    learning_rate        | 0.00025    |
|    loss                 | 27.9       |
|    n_updates            | 9720       |
|    policy_gradient_loss | 0.0478     |
|    std                  | 0.99       |
|    value_loss           | 77.5       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 985         |
|    ep_rew_mean          | 691         |
| time/                   |             |
|    fps                  | 57          |
|    iterations           | 982         |
|    time_elapsed         | 17008       |
|    total_timesteps      | 982000      |
| train/                  |             |
|    approx_kl            | 0.069795355 |
|    clip_fraction        | 0.357       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.21       |
|    explained_variance   | 0.816       |
|    learning_rate        | 0.00025     |
|    loss                 | 34.8        |
|    n_updates            | 9810        |
|    policy_gradient_loss | 0.0287      |
|    std                  | 0.989       |
|    value_loss           | 86.6        |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 985        |
|    ep_rew_mean          | 691        |
| time/                   |            |
|    fps                  | 57         |
|    iterations           | 991        |
|    time_elapsed         | 17157      |
|    total_timesteps      | 991000     |
| train/                  |            |
|    approx_kl            | 0.12211408 |
|    clip_fraction        | 0.428      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.22      |
|    explained_variance   | 0.893      |
|    learning_rate        | 0.00025    |
|    loss                 | 23.7       |
|    n_updates            | 9900       |
|    policy_gradient_loss | 0.0355     |
|    std                  | 0.99       |
|    value_loss           | 62         |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 985      |
|    ep_rew_mean     | 688      |
| time/              |          |
|    fps             | 57       |
|    iterations      | 1000     |
|    time_elapsed    | 17342    |
|    total_timesteps | 1000000  |
---------------------------------


**4.Evaluating Models**- Best Model

In [4]:
ppo_path = os.path.join('./Training/Saved_Models/PPO_car_best_Model_curriculum/best_model.zip')

In [5]:
best_model = PPO.load(ppo_path, env=eval_env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [6]:
evalue = evaluate_policy(best_model, eval_env, n_eval_episodes=500, render = False)
eval_env.close()
evalue



KeyboardInterrupt: 

In [38]:
# To calculate number of tiles covered and time taken in default environment

episodes = 500
frames = []

tiles=0
times=0

for episode in range(1, episodes+1):
    obs = eval_env.reset()  #state = env.reset()
    done = False
    score = 0
    
    info={}
    
    while not done:
        # frames.append(env.render(mode='rgb_array'))
        action , _ = best_model.predict(obs.copy()) 
        obs, reward, done, info = eval_env.step(action) 
        score += reward
    
    tiles+=eval_env.tile_visited_count
    times+=eval_env.t
    
    print("Episode:{} Score:{}".format(episode,score))
    
print("Number of tiles:",tiles/500)
print("Time taken:",times/500)
eval_env.close()

Track generation: 1128..1422 -> 294-tiles track
Total number of obstacles in the track: 9
Episode:1 Score:331.22866894198035
Track generation: 1087..1369 -> 282-tiles track
Total number of obstacles in the track: 8
Episode:2 Score:49.46619217082231
Track generation: 972..1215 -> 243-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1160..1454 -> 294-tiles track
Total number of obstacles in the track: 8
Episode:3 Score:654.436860068249
Track generation: 1262..1581 -> 319-tiles track
Total number of obstacles in the track: 9
Episode:4 Score:513.8364779874099
Track generation: 1199..1503 -> 304-tiles track
Total number of obstacles in the track: 7
Episode:5 Score:301.65016501649194
Track generation: 1091..1368 -> 277-tiles track
Total number of obstacles in the track: 8
Episode:6 Score:137.49420289854834
Track generation: 1182..1481 -> 299-tiles track
Total number of obstacles in the track: 8
Episode:7 Score:505.0335570469758
Tra

Episode:59 Score:618.6311787072092
Track generation: 969..1220 -> 251-tiles track
Total number of obstacles in the track: 6
Episode:60 Score:815.999999999986
Track generation: 1220..1529 -> 309-tiles track
Total number of obstacles in the track: 8
Episode:61 Score:205.19480519480746
Track generation: 1062..1332 -> 270-tiles track
Total number of obstacles in the track: 6
Episode:62 Score:121.37546468401919
Track generation: 1143..1433 -> 290-tiles track
Total number of obstacles in the track: 9
Episode:63 Score:638.9273356401314
Track generation: 1063..1333 -> 270-tiles track
Total number of obstacles in the track: 8
Episode:64 Score:764.4981412639281
Track generation: 1106..1387 -> 281-tiles track
Total number of obstacles in the track: 9
Episode:65 Score:657.1428571428429
Track generation: 1067..1338 -> 271-tiles track
Total number of obstacles in the track: 9
Episode:66 Score:668.5185185185062
Track generation: 1285..1611 -> 326-tiles track
Total number of obstacles in the track: 10

Episode:119 Score:570.8480565370911
Track generation: 1143..1433 -> 290-tiles track
Total number of obstacles in the track: 10
Episode:120 Score:480.44982698961263
Track generation: 1171..1468 -> 297-tiles track
Total number of obstacles in the track: 9
Episode:121 Score:472.9729729729625
Track generation: 1128..1414 -> 286-tiles track
Total number of obstacles in the track: 4
Episode:122 Score:702.6315789473532
Track generation: 1173..1470 -> 297-tiles track
Total number of obstacles in the track: 7
Episode:123 Score:377.1459459459356
Track generation: 1119..1403 -> 284-tiles track
Total number of obstacles in the track: 11
Episode:124 Score:567.3144876324985
Track generation: 1148..1439 -> 291-tiles track
Total number of obstacles in the track: 11
Episode:125 Score:544.827586206885
Track generation: 1159..1453 -> 294-tiles track
Total number of obstacles in the track: 8
Episode:126 Score:399.48805460749526
Track generation: 1207..1513 -> 306-tiles track
Total number of obstacles in t

Episode:179 Score:563.0662020905842
Track generation: 1247..1563 -> 316-tiles track
Total number of obstacles in the track: 6
Episode:180 Score:554.761904761895
Track generation: 1192..1494 -> 302-tiles track
Total number of obstacles in the track: 7
Episode:181 Score:624.0863787375292
Track generation: 1314..1647 -> 333-tiles track
Total number of obstacles in the track: 12
Episode:182 Score:283.7349397590298
Track generation: 1104..1384 -> 280-tiles track
Total number of obstacles in the track: 9
Episode:183 Score:781.720430107514
Track generation: 1269..1590 -> 321-tiles track
Total number of obstacles in the track: 9
Episode:184 Score:521.8749999999905
Track generation: 1092..1370 -> 278-tiles track
Total number of obstacles in the track: 11
Episode:185 Score:583.3935018050457
Track generation: 1167..1463 -> 296-tiles track
Total number of obstacles in the track: 7
Episode:186 Score:704.2372881355816
Track generation: 1191..1493 -> 302-tiles track
Total number of obstacles in the t

Episode:239 Score:588.7417218542955
Track generation: 1099..1378 -> 279-tiles track
Total number of obstacles in the track: 9
Episode:240 Score:688.1294964028616
Track generation: 1147..1438 -> 291-tiles track
Total number of obstacles in the track: 10
Episode:241 Score:444.82758620688196
Track generation: 1167..1466 -> 299-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1106..1389 -> 283-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1164..1459 -> 295-tiles track
Total number of obstacles in the track: 9
Episode:242 Score:707.142857142848
Track generation: 1107..1389 -> 282-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1178..1477 -> 299-tiles track
Total number of obstacles in the track: 9
Episode:243 Score:547.9865771811998
Track generation: 1112..1394 -> 282-tiles track
Total number of obstacles i

Episode:299 Score:581.4814814814681
Track generation: 1247..1563 -> 316-tiles track
Total number of obstacles in the track: 8
Episode:300 Score:353.9682539682434
Track generation: 1078..1351 -> 273-tiles track
Total number of obstacles in the track: 5
Episode:301 Score:774.9999999999844
Track generation: 914..1152 -> 238-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1155..1448 -> 293-tiles track
Total number of obstacles in the track: 9
Episode:302 Score:669.8630136986163
Track generation: 1262..1582 -> 320-tiles track
Total number of obstacles in the track: 10
Episode:303 Score:599.2163009404261
Track generation: 1111..1393 -> 282-tiles track
Total number of obstacles in the track: 9
Episode:304 Score:657.2953736654714
Track generation: 1065..1340 -> 275-tiles track
Total number of obstacles in the track: 6
Episode:305 Score:757.6642335766279
Track generation: 1136..1425 -> 289-tiles track
Total number of obstacles in the

Episode:360 Score:187.82237762236707
Track generation: 1139..1428 -> 289-tiles track
Total number of obstacles in the track: 10
Episode:361 Score:533.3333333333236
Track generation: 1165..1460 -> 295-tiles track
Total number of obstacles in the track: 8
Episode:362 Score:642.5170068027118
Track generation: 1152..1444 -> 292-tiles track
Total number of obstacles in the track: 10
Episode:363 Score:712.5429553264468
Track generation: 1166..1471 -> 305-tiles track
Total number of obstacles in the track: 7
Episode:364 Score:636.8421052631463
Track generation: 999..1253 -> 254-tiles track
Total number of obstacles in the track: 9
Episode:365 Score:744.6640316205434
Track generation: 1095..1373 -> 278-tiles track
Total number of obstacles in the track: 8
Episode:366 Score:780.8664259927707
Track generation: 1191..1493 -> 302-tiles track
Total number of obstacles in the track: 7
Episode:367 Score:500.99667774085424
Track generation: 1069..1340 -> 271-tiles track
Total number of obstacles in th

Episode:422 Score:687.931034482745
Track generation: 1132..1421 -> 289-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1100..1381 -> 281-tiles track
Total number of obstacles in the track: 6
Episode:423 Score:507.14285714284546
Track generation: 1075..1348 -> 273-tiles track
Total number of obstacles in the track: 7
Episode:424 Score:674.9999999999856
Track generation: 1147..1437 -> 290-tiles track
Total number of obstacles in the track: 8
Episode:425 Score:502.4221453287107
Track generation: 1027..1295 -> 268-tiles track
Total number of obstacles in the track: 6
Episode:426 Score:737.6404494381918
Track generation: 1086..1366 -> 280-tiles track
Total number of obstacles in the track: 8
Episode:427 Score:527.2401433691699
Track generation: 922..1162 -> 240-tiles track
Total number of obstacles in the track: 9
Episode:428 Score:720.5020920501943
Track generation: 1095..1373 -> 278-tiles track
Total number of obstacles in the 

Episode:483 Score:716.4259927797746
Track generation: 1027..1293 -> 266-tiles track
Total number of obstacles in the track: 5
Episode:484 Score:838.679245283009
Track generation: 1187..1488 -> 301-tiles track
Total number of obstacles in the track: 10
Episode:485 Score:556.6666666666518
Track generation: 1119..1403 -> 284-tiles track
Total number of obstacles in the track: 6
Episode:486 Score:704.5936395759591
Track generation: 1322..1657 -> 335-tiles track
Total number of obstacles in the track: 8
Episode:487 Score:490.71856287424146
Track generation: 1274..1597 -> 323-tiles track
Total number of obstacles in the track: 10
Episode:488 Score:514.5962732919191
Track generation: 1197..1500 -> 303-tiles track
Total number of obstacles in the track: 9
Episode:489 Score:498.34437086092134
Track generation: 1224..1534 -> 310-tiles track
Total number of obstacles in the track: 9
Episode:490 Score:481.06796116503796
Track generation: 1120..1404 -> 284-tiles track
Total number of obstacles in t

In [28]:
# To calculate number of tiles covered, time taken and collision/obstacle ratio in obstacle variant environment
episodes = 500
frames = []

ratios=0
tiles=0
times=0

for episode in range(1, episodes+1):
    obs = eval_env.reset()  #state = env.reset()
    done = False
    score = 0
    
    info={}
    
    while not done:
        # frames.append(env.render(mode='rgb_array'))
        action , _ = best_model.predict(obs.copy()) 
        obs, reward, done, info = eval_env.step(action) 
        score += reward
    
    tiles+=eval_env.tile_visited_count
    times+=eval_env.t
    ratios+=info['num_collisions']/info['num_obstacles']
    
    print("Episode:{} Score:{}".format(episode,score))
    
print("Number of tiles:",tiles/500)
print("Time taken:",times/500)
print("Ratio:",ratios/500)
eval_env.close()

Track generation: 1143..1442 -> 299-tiles track
Total number of obstacles in the track: 9
Episode:1 Score:437.5838926174462
Track generation: 1087..1369 -> 282-tiles track
Total number of obstacles in the track: 8
Episode:2 Score:537.0106761565746
Track generation: 964..1212 -> 248-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1176..1474 -> 298-tiles track
Total number of obstacles in the track: 5
Episode:3 Score:406.06060606060055
Track generation: 1283..1608 -> 325-tiles track
Total number of obstacles in the track: 9
Episode:4 Score:489.5061728395013
Track generation: 1217..1526 -> 309-tiles track
Total number of obstacles in the track: 9
Episode:5 Score:424.6753246753151
Track generation: 1096..1374 -> 278-tiles track
Total number of obstacles in the track: 8
Episode:6 Score:518.411552346558
Track generation: 1198..1501 -> 303-tiles track
Total number of obstacles in the track: 9
Episode:7 Score:458.9403973509901
Track

Episode:60 Score:261.19402985073623
Track generation: 981..1236 -> 255-tiles track
Total number of obstacles in the track: 6
Episode:61 Score:541.732283464555
Track generation: 1220..1529 -> 309-tiles track
Total number of obstacles in the track: 11
Episode:62 Score:300.00000000000176
Track generation: 1062..1332 -> 270-tiles track
Total number of obstacles in the track: 7
Episode:63 Score:669.5167286245228
Track generation: 1148..1439 -> 291-tiles track
Total number of obstacles in the track: 6
Episode:64 Score:446.5517241379186
Track generation: 983..1235 -> 252-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1106..1387 -> 281-tiles track
Total number of obstacles in the track: 10
Episode:65 Score:260.7142857142904
Track generation: 1068..1339 -> 271-tiles track
Total number of obstacles in the track: 9
Episode:66 Score:540.7407407407301
Track generation: 1285..1611 -> 326-tiles track
Total number of obstacles in the track

Episode:117 Score:556.7137809187185
Track generation: 1154..1447 -> 293-tiles track
Total number of obstacles in the track: 8
Episode:118 Score:442.4657534246514
Track generation: 1171..1468 -> 297-tiles track
Total number of obstacles in the track: 8
Episode:119 Score:562.1621621621529
Track generation: 1139..1428 -> 289-tiles track
Total number of obstacles in the track: 7
Episode:120 Score:544.4444444444345
Track generation: 1061..1339 -> 278-tiles track
Total number of obstacles in the track: 3
Episode:121 Score:668.953068592047
Track generation: 1006..1269 -> 263-tiles track
Total number of obstacles in the track: 6
Episode:122 Score:659.1603053435015
Track generation: 1161..1455 -> 294-tiles track
Total number of obstacles in the track: 5
Episode:123 Score:575.7679180887249
Track generation: 1156..1448 -> 292-tiles track
Total number of obstacles in the track: 9
Episode:124 Score:568.2130584192322
Track generation: 1207..1513 -> 306-tiles track
Total number of obstacles in the tr

Exception ignored in: <function Viewer.__del__ at 0x15fc738b0>
Traceback (most recent call last):
  File "/Users/PrishitaRay1/opt/anaconda3/lib/python3.9/site-packages/gym/envs/classic_control/rendering.py", line 185, in __del__
    self.close()
  File "/Users/PrishitaRay1/opt/anaconda3/lib/python3.9/site-packages/gym/envs/classic_control/rendering.py", line 99, in close
    if self.isopen and sys.meta_path:
AttributeError: 'Viewer' object has no attribute 'isopen'


Episode:133 Score:383.16831683167396
Track generation: 1059..1328 -> 269-tiles track
Total number of obstacles in the track: 6
Episode:134 Score:694.7761194029724
Track generation: 945..1191 -> 246-tiles track
Total number of obstacles in the track: 5
Episode:135 Score:687.755102040801
Track generation: 1224..1534 -> 310-tiles track
Total number of obstacles in the track: 8
Episode:136 Score:482.5242718446499
Track generation: 1128..1414 -> 286-tiles track
Total number of obstacles in the track: 8
Episode:137 Score:636.842105263145
Track generation: 1151..1443 -> 292-tiles track
Total number of obstacles in the track: 5
Episode:138 Score:638.8316151202632
Track generation: 1261..1581 -> 320-tiles track
Total number of obstacles in the track: 8
Episode:139 Score:492.4764890282034
Track generation: 1064..1333 -> 269-tiles track
Total number of obstacles in the track: 6
Episode:140 Score:663.4328358208817
Track generation: 1287..1613 -> 326-tiles track
Total number of obstacles in the tra

Episode:193 Score:433.06188925081244
Track generation: 1248..1564 -> 316-tiles track
Total number of obstacles in the track: 8
Episode:194 Score:465.8730158730101
Track generation: 1199..1503 -> 304-tiles track
Total number of obstacles in the track: 9
Episode:195 Score:533.663366336627
Track generation: 1170..1466 -> 296-tiles track
Total number of obstacles in the track: 9
Episode:196 Score:454.23728813558876
Track generation: 1145..1444 -> 299-tiles track
Total number of obstacles in the track: 9
Episode:197 Score:520.8053691275097
Track generation: 1247..1563 -> 316-tiles track
Total number of obstacles in the track: 10
Episode:198 Score:475.3968253968186
Track generation: 1209..1515 -> 306-tiles track
Total number of obstacles in the track: 9
Episode:199 Score:370.49180327868896
Track generation: 1256..1574 -> 318-tiles track
Total number of obstacles in the track: 10
Episode:200 Score:499.369085173491
Track generation: 1149..1440 -> 291-tiles track
Total number of obstacles in th

Episode:251 Score:394.40559440558917
Track generation: 1020..1279 -> 259-tiles track
Total number of obstacles in the track: 4
Episode:252 Score:671.3178294573562
Track generation: 1037..1305 -> 268-tiles track
Total number of obstacles in the track: 5
Episode:253 Score:657.6779026217123
Track generation: 1099..1378 -> 279-tiles track
Total number of obstacles in the track: 8
Episode:254 Score:612.230215827325
Track generation: 1158..1452 -> 294-tiles track
Total number of obstacles in the track: 8
Episode:255 Score:480.204778156984
Track generation: 1068..1344 -> 276-tiles track
Total number of obstacles in the track: 8
Episode:256 Score:551.8181818181681
Track generation: 1176..1483 -> 307-tiles track
Total number of obstacles in the track: 11
Episode:257 Score:514.3790849673109
Track generation: 1156..1449 -> 293-tiles track
Total number of obstacles in the track: 9
Episode:258 Score:341.0958904109491
Track generation: 1161..1456 -> 295-tiles track
Total number of obstacles in the t

Episode:312 Score:604.2087542087407
Track generation: 1171..1467 -> 296-tiles track
Total number of obstacles in the track: 5
Episode:313 Score:619.4915254237173
Track generation: 1147..1441 -> 294-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 1129..1416 -> 287-tiles track
Total number of obstacles in the track: 6
Episode:314 Score:445.45454545453714
Track generation: 1063..1333 -> 270-tiles track
Total number of obstacles in the track: 10
Episode:315 Score:419.14498141263607
Track generation: 1224..1534 -> 310-tiles track
Total number of obstacles in the track: 11
Episode:316 Score:359.87055016180284
Track generation: 1140..1429 -> 289-tiles track
Total number of obstacles in the track: 8
Episode:317 Score:604.8611111111012
Track generation: 1186..1487 -> 301-tiles track
Total number of obstacles in the track: 4
Episode:318 Score:333.333333333334
Track generation: 1128..1414 -> 286-tiles track
Total number of obstacles in

Episode:373 Score:362.49999999999346
Track generation: 1251..1568 -> 317-tiles track
Total number of obstacles in the track: 7
Episode:374 Score:536.7088607594835
Track generation: 1028..1289 -> 261-tiles track
Total number of obstacles in the track: 6
Episode:375 Score:688.4615384615261
Track generation: 1144..1434 -> 290-tiles track
Total number of obstacles in the track: 9
Episode:376 Score:573.1833910034518
Track generation: 1172..1472 -> 300-tiles track
retry to generate track (normal if there are not manyinstances of this message)
Track generation: 928..1164 -> 236-tiles track
Total number of obstacles in the track: 4
Episode:377 Score:721.2765957446701
Track generation: 1160..1454 -> 294-tiles track
Total number of obstacles in the track: 8
Episode:378 Score:515.5290102388958
Track generation: 1066..1343 -> 277-tiles track
Total number of obstacles in the track: 6
Episode:379 Score:517.3913043478202
Track generation: 1162..1464 -> 302-tiles track
Total number of obstacles in the

Episode:434 Score:283.80281690141175
Track generation: 1152..1444 -> 292-tiles track
Total number of obstacles in the track: 9
Episode:435 Score:482.30240549827437
Track generation: 1251..1568 -> 317-tiles track
Total number of obstacles in the track: 9
Episode:436 Score:334.81012658227934
Track generation: 1270..1592 -> 322-tiles track
Total number of obstacles in the track: 6
Episode:437 Score:420.2492211837896
Track generation: 1099..1384 -> 285-tiles track
Total number of obstacles in the track: 6
Episode:438 Score:625.3521126760446
Track generation: 1068..1339 -> 271-tiles track
Total number of obstacles in the track: 7
Episode:439 Score:609.2592592592484
Track generation: 1120..1405 -> 285-tiles track
Total number of obstacles in the track: 8
Episode:440 Score:546.4788732394309
Track generation: 1107..1388 -> 281-tiles track
Total number of obstacles in the track: 6
Episode:441 Score:607.1428571428446
Track generation: 1112..1400 -> 288-tiles track
Total number of obstacles in th

Episode:495 Score:401.72413793102646
Track generation: 1134..1431 -> 297-tiles track
Total number of obstacles in the track: 6
Episode:496 Score:502.7027027026928
Track generation: 1123..1408 -> 285-tiles track
Total number of obstacles in the track: 8
Episode:497 Score:523.2394366197091
Track generation: 1033..1296 -> 263-tiles track
Total number of obstacles in the track: 6
Episode:498 Score:555.7251908396894
Track generation: 1131..1418 -> 287-tiles track
Total number of obstacles in the track: 8
Episode:499 Score:546.8531468531366
Track generation: 1345..1685 -> 340-tiles track
Total number of obstacles in the track: 12
Episode:500 Score:292.6253687315628
Number of tiles: 188.102
Time taken: 20.019999999999726
Ratio: 0.09067024642024649
