In [1]:
import gym 
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete 
from gym.utils import seeding
import numpy as np
import random
import os
from stable_baselines3 import PPO, DQN, A2C, HER
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy

from typing import Final, Any
import math
from shapely.geometry.polygon import Polygon

import PIL.ImageDraw as ImageDraw
import PIL.Image as Image
import cv2

class ActivVisionEnv(Env):
    """
    ### Description
    This environment corresponds to a environment to adjust the position and angle of a camera,
    to optimize the area of view in regard to the objects, which are present in the field.
    With this environment it is possible to train a neural network to work on this task, which later can be put on the
    Robotino to test it in real life.

    ### Action Space
    The agent take a 1-element vector for actions.
    The action space is `(action)` in `[0, 1, 2, 3, 4]`, where `action` is used to move the robot or the camera.
    The Code can be easily altered to also provide the possibility to drive forewards and backwards, this is just
    commented out. Then the observation spaces had to be expanded.

    | Num | Action                 |
    |-----|------------------------|
    | 0   | accelerate right       |
    | 1   | accelerate left        |
    | 2   | turn cam right         |
    | 3   | turn cam left          |
    | 4   | do nothing             |




    ### Observation Space
    The observation is a `ndarray` with shape `(3 + num_objects * 4,)` where the elements correspond to the following:
    | Num  | Observation           | Min                  | Max                |
    |------|-----------------------|----------------------|--------------------|
    | 0    | Player Position X     | 0                    | 1                  |
    | 1    | Player Velocity X     | 0                    | 1                  |
    | 2    | Player Camera Angle   | 0                    | 1                  |
    |3+i*4 | Object Position X     | 0                    | 1                  |
    |4+i*4 | Object Position Y     | 0                    | 1                  |
    |5+i*4 | Object Velocity X     | 0                    | 1                  |
    |6+i*4 | Object Velocity Y     | 0                    | 1                  |


    ### Rewards
    Reward is 1 for every object in vision multiplied with a penalty for a non streight camera
    ### Starting State
    All Objects are in vision with random velocity. the agent is placed in the center with no velocity
    ### Episode Termination
    After a specific amount of time steps (1000)
    ### Arguments
    See init
    """

    def __init__(self, num_objects: int = 3,
                 simulation_frequency: float = 5,
                 width: int = 3000, height: int = 1500, max_velocity_player: int = 200, view: int = 78,
                 img_format: float = (1920 / 1080), is_random_enabled: bool = False):
        """

        :param num_objects:  number of objects to project
        :param simulation_frequency: control speed in Hz
        :param width: width of 2D Area in mm
        :param height: height of 2D Area in mm
        :param max_velocity_player: max velocity of player
        :param view: camera view angele
        :param img_format: pixel_width/pixel_height if pixel are square. Otherwise width [mm] / height [mm]
        :param is_random_enabled: is the movement deterministic or is there a random factor in it
        """

        # consts
        self._object_min_size: Final = 60  # in mm
        self._object_max_size: Final = 120  # in mm
        self._object_max_z_velocity: Final = 20  # in mm
        self._object_max_velocity: Final = 200  # in mm/s
        self._camera_height: Final = 390  # in mm
        self._camera_max_angle: Final = 20  # in degree
        self._velocity_player_per_step = 50  # max velocity change at each cycle
        self._camera_per_step = 1  # max camera angle change at each cycle
        # player is not allowed to leave
        self._player_max_z = width + 1
        self._player_min_z = 1
        self._player_pos_y = height - self._camera_height
        # one cycle goes for timer time steps
        self.timer = 3000
        self._object_max_rnd_force_p_sec = 10  # in 1/percent from max

        # params
        self._num_objects = num_objects
        self._simulation_frequency = simulation_frequency
        self._width = width
        self._height = height
        self._max_velocity_player = max_velocity_player
        self._view = view
        self._img_format = img_format
        self._height_view = (1 / img_format) * view
        self._random_force_enabled = is_random_enabled

        # Actions we can take, down, stay, up
        self.action_space = Discrete(5)
        # positions and velocities
        self.observation_space = Box(0, 1, shape=(3 + num_objects * 4,))
        # Set start state
        self.state = []

        # viewer
        self.viewer = None

        for _ in range(0, 3 + 4 * num_objects):
            self.state.append(0)

        # states for player and objects as dictionary
        self.player = {}
        self._objects = []

        # for drawing
        self._last_points = 0

        self.reset()

    def _get_normalized_state(self):
        """
        normalize values to 0...1
        :return: normalized observation state
        """
        """
        self.state[0] = self.player["pos_x"] / self._width
        self.state[1] = self.player["pos_z"] / self._player_max_z
        self.state[2] = (self.player["vel_x"] / (2 * self._max_velocity_player)) + 0.5
        self.state[3] = (self.player["vel_z"] / (2 * self._max_velocity_player)) + 0.5
        self.state[4] = (self.player["angle"] / (2 * self._camera_max_angle)) + 0.5

        for i in range(0, self._num_objects):
            self.state[5 + 6 * i] = self._objects[i]["pos_x"] / self._width
            self.state[6 + 6 * i] = self._objects[i]["pos_y"] / self._height
            self.state[7 + 6 * i] = (self._objects[i]["size"] - self._object_min_size) / (
                        self._object_max_size - self._object_min_size)
            self.state[8 + 6 * i] = (self._objects[i]["vel_x"] / (2 * self._object_max_velocity)) + 0.5
            self.state[9 + 6 * i] = (self._objects[i]["vel_y"] / (2 * self._object_max_velocity)) + 0.5
            self.state[10 + 6 * i] = (self._objects[i]["vel_z"] / (2 * self._object_max_z_velocity)) + 0.5
        """
        self.state[0] = self.player["pos_x"] / self._width
        self.state[1] = (self.player["vel_x"] / (2 * self._max_velocity_player)) + 0.5
        self.state[2] = (self.player["angle"] / (2 * self._camera_max_angle)) + 0.5

        for i in range(0, self._num_objects):
            self.state[3 + 4 * i] = self._objects[i]["pos_x"] / self._width
            self.state[4 + 4 * i] = self._objects[i]["pos_y"] / self._height
            self.state[5 + 4 * i] = (self._objects[i]["vel_x"] / (2 * self._object_max_velocity)) + 0.5
            self.state[6 + 4 * i] = (self._objects[i]["vel_y"] / (2 * self._object_max_velocity)) + 0.5

        return self.state

    def step(self, action):
        """
        progress for one frame
        :param action: action to take in this step
        :return: observation space after the step
        """
        # actions:
        # 0 go right
        # 1 go left

        # 2 turn cam right
        # 3 turn cam left

        # 4 do nothing

        # --- go front
        # --- go back

        if action == 0:
            if self.player["vel_x"] < self._max_velocity_player:
                self.player["vel_x"] += self._velocity_player_per_step
            if self.player["vel_x"] > self._max_velocity_player:
                self.player["vel_x"] = self._max_velocity_player

        if action == 1:
            if self.player["vel_x"] > -self._max_velocity_player:
                self.player["vel_x"] -= self._velocity_player_per_step
            if self.player["vel_x"] < -self._max_velocity_player:
                self.player["vel_x"] = -self._max_velocity_player
        """
        if action == 2:
            if self.player["vel_z"] < self._max_velocity_player:
                self.player["vel_z"] += self._velocity_player_per_step
            if self.player["vel_z"] > self._max_velocity_player:
                self.player["vel_z"] = self._max_velocity_player

        if action == 3:
            if self.player["vel_z"] > -self._max_velocity_player:
                self.player["vel_z"] -= self._velocity_player_per_step
            if self.player["vel_z"] < -self._max_velocity_player:
                self.player["vel_z"] = -self._max_velocity_player

        """

        if action == 2:
            if self.player["angle"] < self._camera_max_angle:
                self.player["angle"] += self._camera_per_step
            if self.player["angle"] > self._camera_max_angle:
                self.player["angle"] = self._camera_max_angle

        if action == 3:
            if self.player["angle"] > -self._camera_max_angle:
                self.player["angle"] -= self._camera_per_step
            if self.player["angle"] < -self._camera_max_angle:
                self.player["angle"] = -self._camera_max_angle

        # calc next frame
        self._progress_simulation()
        reward = self._add_points()

        # Reduce timer by one step
        self.timer -= 1

        # Calculate reward
        reward = self._add_points()
        reward *= (1 - (abs(self.player["angle"] / self._camera_max_angle)) * (1 / self._num_objects))

        # reward -= abs(self.player["angle"] / self._camera_max_angle) * 0.05

        """
        if self.player["angle"] > self._camera_per_step and action == 2:
            reward -= (1/self._camera_max_angle+1)
        if self.player["angle"] > self._camera_per_step and action == 3:
            reward += 0.5 * (1/self._camera_max_angle+1)

        if self.player["angle"] < -self._camera_per_step and action == 3:
            reward -= (1/self._camera_max_angle+1)
        if self.player["angle"] < -self._camera_per_step and action == 2:
            reward += 0.5 * (1/self._camera_max_angle+1)
        """

        # Check if shower is done
        if self.timer <= 0:
            done = True
        else:
            done = False

        # Set placeholder for info
        info = {}

        self._get_normalized_state()

        # Return step information
        self._last_points = reward
        return np.asarray(self.state), reward, done, info

    def _progress_simulation(self):
        """
        does the math to process one step all objects
        :return:
        """
        seconds_passed = 1 / self._simulation_frequency

        # move player
        self.player["pos_x"] = self.player["pos_x"] + seconds_passed * self.player["vel_x"]
        self.player["pos_z"] = self.player["pos_z"] + seconds_passed * self.player["vel_z"]

        # push in constraints
        if self.player["pos_x"] < 0:
            self.player["pos_x"] = 0
        if self.player["pos_x"] > self._width:
            self.player["pos_x"] = self._width
        if self.player["pos_z"] < self._player_min_z:
            self.player["pos_z"] = self._player_min_z
        if self.player["pos_z"] > self._player_max_z:
            self.player["pos_z"] = self._player_max_z

        # move objects
        for i in range(0, self._num_objects):
            self._objects[i]["pos_x"] = self._objects[i]["pos_x"] + seconds_passed * self._objects[i]["vel_x"]
            self._objects[i]["pos_y"] = self._objects[i]["pos_y"] + seconds_passed * self._objects[i]["vel_y"]
            self._objects[i]["size"] = self._objects[i]["size"] + seconds_passed * self._objects[i]["vel_z"]

        # collision with wall
        for i in range(0, self._num_objects):
            if self._objects[i]["pos_x"] - self._objects[i]["size"] / 2 <= 0:
                self._objects[i]["vel_x"] = -self._objects[i]["vel_x"]
                self._objects[i]["pos_x"] = self._objects[i]["size"] / 2 + 1
                self._objects[i]["vel_z"] = 0

            if self._objects[i]["pos_x"] + self._objects[i]["size"] / 2 >= self._width:
                self._objects[i]["vel_x"] = -self._objects[i]["vel_x"]
                self._objects[i]["pos_x"] = self._width - self._objects[i]["size"] / 2 - 1
                self._objects[i]["vel_z"] = 0

            if self._objects[i]["pos_y"] - self._objects[i]["size"] / 2 <= 0:
                self._objects[i]["vel_y"] = -self._objects[i]["vel_y"]
                self._objects[i]["pos_y"] = self._objects[i]["size"] / 2 + 1
                self._objects[i]["vel_z"] = 0

            if self._objects[i]["pos_y"] + self._objects[i]["size"] / 2 >= self._height:
                self._objects[i]["vel_y"] = -self._objects[i]["vel_y"]
                self._objects[i]["pos_y"] = self._height - self._objects[i]["size"] / 2 - 1
                self._objects[i]["vel_z"] = 0

            if self._objects[i]["size"] <= self._object_min_size:
                self._objects[i]["vel_z"] = -self._objects[i]["vel_z"]
                self._objects[i]["size"] = self._object_min_size + 1

            if self._objects[i]["size"] >= self._object_max_size:
                self._objects[i]["vel_z"] = -self._objects[i]["vel_z"]
                self._objects[i]["size"] = self._object_max_size - 1

        # object collision
        for i in range(0, self._num_objects):
            for j in range(i + 1, self._num_objects):
                if i == j:
                    continue
                polygon_i = Polygon([(self._objects[i]["pos_x"] - self._objects[i]["size"] / 2,
                                      self._objects[i]["pos_y"] - self._objects[i]["size"] / 2),
                                     (self._objects[i]["pos_x"] + self._objects[i]["size"] / 2,
                                      self._objects[i]["pos_y"] - self._objects[i]["size"] / 2),
                                     (self._objects[i]["pos_x"] - self._objects[i]["size"] / 2,
                                      self._objects[i]["pos_y"] + self._objects[i]["size"] / 2),
                                     (self._objects[i]["pos_x"] + self._objects[i]["size"] / 2,
                                      self._objects[i]["pos_y"] + self._objects[i]["size"] / 2)])

                polygon_j = Polygon([(self._objects[j]["pos_x"] - self._objects[j]["size"] / 2,
                                      self._objects[j]["pos_y"] - self._objects[j]["size"] / 2),
                                     (self._objects[j]["pos_x"] + self._objects[j]["size"] / 2,
                                      self._objects[j]["pos_y"] - self._objects[j]["size"] / 2),
                                     (self._objects[j]["pos_x"] - self._objects[j]["size"] / 2,
                                      self._objects[j]["pos_y"] + self._objects[j]["size"] / 2),
                                     (self._objects[j]["pos_x"] + self._objects[j]["size"] / 2,
                                      self._objects[j]["pos_y"] + self._objects[j]["size"] / 2)])

                if polygon_i.intersects(polygon_j):
                    v_x = self._objects[i]["vel_x"]
                    self._objects[i]["vel_x"] = self._objects[j]["vel_x"]
                    self._objects[j]["vel_x"] = v_x

                    v_y = self._objects[i]["vel_y"]
                    self._objects[i]["vel_y"] = self._objects[j]["vel_y"]
                    self._objects[j]["vel_y"] = v_y

                    v_z = self._objects[i]["vel_z"]
                    self._objects[i]["vel_z"] = self._objects[j]["vel_z"]
                    self._objects[j]["vel_z"] = v_z

        if self._random_force_enabled:
            # apply force
            for i in range(0, self._num_objects):
                dvx = random.randint(-self._object_max_velocity, self._object_max_velocity) / \
                      (self._simulation_frequency * self._object_max_rnd_force_p_sec)
                dvy = random.randint(-self._object_max_velocity, self._object_max_velocity) / \
                      (self._simulation_frequency * self._object_max_rnd_force_p_sec)
                dvz = random.randint(-self._object_max_z_velocity, self._object_max_z_velocity) / \
                      (self._simulation_frequency * self._object_max_rnd_force_p_sec)

                self._objects[i]["vel_x"] += dvx
                self._objects[i]["vel_y"] += dvy
                self._objects[i]["vel_z"] += dvz

                # push it in the constraints
                if self._objects[i]["vel_x"] < -self._object_max_velocity:
                    self._objects[i]["vel_x"] = -self._object_max_velocity

                if self._objects[i]["vel_x"] > self._object_max_velocity:
                    self._objects[i]["vel_x"] = self._object_max_velocity

                if self._objects[i]["vel_y"] < -self._object_max_velocity:
                    self._objects[i]["vel_y"] = -self._object_max_velocity

                if self._objects[i]["vel_y"] > self._object_max_velocity:
                    self._objects[i]["vel_y"] = self._object_max_velocity

                if self._objects[i]["vel_z"] < -self._object_max_z_velocity:
                    self._objects[i]["vel_z"] = -self._object_max_z_velocity

                if self._objects[i]["vel_z"] > self._object_max_z_velocity:
                    self._objects[i]["vel_z"] = self._object_max_z_velocity

    def _add_points(self):
        """
        reward calculation
        :return:
        """
        corners_view = self._get_view()
        polygon = Polygon([corners_view["left_top"], corners_view["right_top"],
                           corners_view["right_bot"], corners_view["left_bot"]])

        intersections_sum = 0
        intersections_num = 0

        area_view = polygon.area
        for i in range(0, self._num_objects):
            size = self._objects[i]["size"]
            obj_x = self._objects[i]["pos_x"]
            obj_y = self._objects[i]["pos_y"]
            polygon_object = Polygon([
                (round(obj_x - size / 2), round(obj_y - size / 2)),
                (round(obj_x + size / 2), round(obj_y - size / 2)),
                (round(obj_x + size / 2), round(obj_y + size / 2)),
                (round(obj_x - size / 2), round(obj_y + size / 2))
            ])
            if area_view > 1:
                intersection = (polygon.intersection(polygon_object)).area
                if intersection > (size / 4):
                    intersections_sum += intersection
                    intersections_num += 1

        return intersections_num

    def _get_view(self) -> Any:
        """
        calculates the four corner points of the view of the camera in the big canvas in points in mm from top left
        :return: dict of the four points
        """

        # calc left , right border
        right_border = self.player["pos_x"] + self.player["pos_z"] * \
                       math.tan((self._view / 2 - self.player["angle"]) * (math.pi / 180))
        left_border = self.player["pos_x"] - self.player["pos_z"] * \
                      math.tan((self._view / 2 + self.player["angle"]) * (math.pi / 180))

        # calc top and bot borders, thinking of distortion
        left_height_diff = math.tan((self._height_view / 2) * (math.pi / 180)) * \
                           (self.player["pos_z"] / math.sin((self._view / 2 - self.player["angle"]) * (math.pi / 180)))

        right_height_diff = math.tan((self._height_view / 2) * (math.pi / 180)) * \
                            (self.player["pos_z"] / math.sin((self._view / 2 + self.player["angle"]) * (math.pi / 180)))

        right_top = self.player["pos_y"] - right_height_diff
        right_bot = self.player["pos_y"] + right_height_diff
        left_top = self.player["pos_y"] - left_height_diff
        left_bot = self.player["pos_y"] + left_height_diff

        left_border = round(left_border)
        left_top = round(left_top)
        right_border = round(right_border)
        right_top = round(right_top)
        left_bot = round(left_bot)
        right_bot = round(right_bot)

        return {
            "left_top": (left_border, left_top),
            "right_top": (right_border, right_top),
            "left_bot": (left_border, left_bot),
            "right_bot": (right_border, right_bot)
        }

    def render(self, mode: str = "human"):
        """
        renders the env
        :param mode: - not used - used cause override
        :return: - not used - used cause override
        """
        image = Image.new("RGB", (self._width, self._height))
        draw = ImageDraw.Draw(image)

        corners_view = self._get_view()
        points = (corners_view["left_top"], corners_view["right_top"], corners_view["right_bot"],
                  corners_view["left_bot"], corners_view["left_top"])

        draw.line(points, fill="white", width=20)
        colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (120, 120, 0), (0, 120, 120), (120, 0, 120)]
        for i in range(0, self._num_objects):
            point_1 = (round(self._objects[i]["pos_x"]) - 20, round(self._objects[i]["pos_y"]) - 20)
            point_2 = (round(self._objects[i]["pos_x"]) + 20, round(self._objects[i]["pos_y"]) + 20)
            draw.ellipse([point_1, point_2], fill=colors[i % len(colors)])
            size = self._objects[i]["size"]
            left_top_x = self._objects[i]["pos_x"]
            left_top_y = self._objects[i]["pos_y"]

            poly = [(round(left_top_x - size / 2), round(left_top_y - size / 2)),
                    (round(left_top_x + size / 2), round(left_top_y - size / 2)),
                    (round(left_top_x + size / 2), round(left_top_y + size / 2)),
                    (round(left_top_x - size / 2), round(left_top_y + size / 2)),
                    (round(left_top_x - size / 2), round(left_top_y - size / 2))]
            draw.line(poly, fill=colors[i % len(colors)], width=20)

        open_cv_image = np.array(image)
        open_cv_image = open_cv_image[:, :, ::-1].copy()
        open_cv_image = cv2.resize(open_cv_image, (900, round(640 / self._img_format)))
        font = cv2.FONT_HERSHEY_SIMPLEX
        text = str("%.2f" % round(self._last_points, 2))
        cv2.putText(open_cv_image, text, (10, 50), font, 1, (0, 0, 255), 2, cv2.LINE_AA)
        cv2.imshow("simulation", open_cv_image)
        cv2.waitKey(1)
        
        
    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None
            cv2.destroyAllWindows()

    def reset(self):
        """
        resets to default
        :return: observation space
        """
        self.points = 0.0
        self.timer = 1000

        self.player = {
            "pos_x": self._width / 2 * 1.0,
            "pos_y": self._height - self._camera_height * 1.0,
            "pos_z": 900.0,
            "vel_x": 0.0,
            "vel_z": 0.0,
            "angle": 0
        }
        self._objects = []
        for i in range(0, self._num_objects):
            dict_to_append = {
                "pos_x": (self._width / 2) + (i - math.floor(self._num_objects / 2)) * self._object_max_size,
                "pos_y": (self._height / 2) + i * self._object_max_size,
                "vel_x": random.randint(-self._object_max_velocity, self._object_max_velocity) * 0.5,
                "vel_y": random.randint(-self._object_max_velocity, self._object_max_velocity) * 0.3,
                "vel_z": random.randint(-self._object_max_z_velocity, self._object_max_z_velocity) * 0.3,
                "size": random.randint(self._object_min_size, self._object_max_size) * 1.0
            }
            self._objects.append(dict_to_append)

        self._get_normalized_state()
        return np.asarray(self.state, dtype=np.float32)

In [2]:
env=ActivVisionEnv()

In [26]:
env.observation_space.sample()

array([0.46007735, 0.7807394 , 0.00603248, 0.9075361 , 0.07060986,
       0.2002937 , 0.18527961, 0.6766044 , 0.1867383 , 0.09771245,
       0.30663255, 0.8489442 , 0.8585215 , 0.09938314, 0.4837195 ],
      dtype=float32)

In [27]:
from stable_baselines3.common.env_checker import check_env

In [28]:
check_env(env, warn=True)

In [29]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render("mode")
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:619.6666666666672
Episode:2 Score:764.8833333333329
Episode:3 Score:853.1333333333314
Episode:4 Score:472.18333333333254
Episode:5 Score:778.6833333333324


In [30]:
env.close()

In [31]:
log_path = os.path.join('Training', 'Logs')

In [32]:
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [33]:
model.learn(total_timesteps=2000000)

Logging to Training\Logs\PPO_20
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | 552      |
| time/              |          |
|    fps             | 961      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 582         |
| time/                   |             |
|    fps                  | 830         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008329704 |
|    clip_fraction        | 0.0574      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.6        |
|    explained_variance   | -0.00173    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 682         |
| time/                   |             |
|    fps                  | 713         |
|    iterations           | 11          |
|    time_elapsed         | 31          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.012984355 |
|    clip_fraction        | 0.0217      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.57       |
|    explained_variance   | 0.193       |
|    learning_rate        | 0.0003      |
|    loss                 | 41.9        |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.00316    |
|    value_loss           | 138         |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 944          |
| time/                   |              |
|    fps                  | 695          |
|    iterations           | 21           |
|    time_elapsed         | 61           |
|    total_timesteps      | 43008        |
| train/                  |              |
|    approx_kl            | 0.0042734207 |
|    clip_fraction        | 0.00894      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.34        |
|    explained_variance   | 0.714        |
|    learning_rate        | 0.0003       |
|    loss                 | 46.4         |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.0015      |
|    value_loss           | 119          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.06e+03     |
| time/                   |              |
|    fps                  | 691          |
|    iterations           | 31           |
|    time_elapsed         | 91           |
|    total_timesteps      | 63488        |
| train/                  |              |
|    approx_kl            | 0.0040884777 |
|    clip_fraction        | 0.0344       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.366        |
|    learning_rate        | 0.0003       |
|    loss                 | 57.6         |
|    n_updates            | 300          |
|    policy_gradient_loss | -0.00545     |
|    value_loss           | 189          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.14e+03    |
| time/                   |             |
|    fps                  | 687         |
|    iterations           | 41          |
|    time_elapsed         | 122         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.005264521 |
|    clip_fraction        | 0.0326      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.13       |
|    explained_variance   | 0.566       |
|    learning_rate        | 0.0003      |
|    loss                 | 32.5        |
|    n_updates            | 400         |
|    policy_gradient_loss | -0.00411    |
|    value_loss           | 135         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.19e+03    |
| time/                   |             |
|    fps                  | 686         |
|    iterations           | 51          |
|    time_elapsed         | 152         |
|    total_timesteps      | 104448      |
| train/                  |             |
|    approx_kl            | 0.010593316 |
|    clip_fraction        | 0.0728      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.34       |
|    explained_variance   | 0.55        |
|    learning_rate        | 0.0003      |
|    loss                 | 14.2        |
|    n_updates            | 500         |
|    policy_gradient_loss | -0.00381    |
|    value_loss           | 119         |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.28e+03     |
| time/                   |              |
|    fps                  | 687          |
|    iterations           | 61           |
|    time_elapsed         | 181          |
|    total_timesteps      | 124928       |
| train/                  |              |
|    approx_kl            | 0.0073806564 |
|    clip_fraction        | 0.0489       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.2         |
|    explained_variance   | 0.782        |
|    learning_rate        | 0.0003       |
|    loss                 | 29.4         |
|    n_updates            | 600          |
|    policy_gradient_loss | -0.0045      |
|    value_loss           | 85.8         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.29e+03    |
| time/                   |             |
|    fps                  | 687         |
|    iterations           | 71          |
|    time_elapsed         | 211         |
|    total_timesteps      | 145408      |
| train/                  |             |
|    approx_kl            | 0.006701176 |
|    clip_fraction        | 0.0507      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.24       |
|    explained_variance   | 0.731       |
|    learning_rate        | 0.0003      |
|    loss                 | 36.7        |
|    n_updates            | 700         |
|    policy_gradient_loss | -0.0048     |
|    value_loss           | 136         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.3e+03     |
| time/                   |             |
|    fps                  | 687         |
|    iterations           | 81          |
|    time_elapsed         | 241         |
|    total_timesteps      | 165888      |
| train/                  |             |
|    approx_kl            | 0.008295294 |
|    clip_fraction        | 0.0632      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.34       |
|    explained_variance   | 0.803       |
|    learning_rate        | 0.0003      |
|    loss                 | 31.3        |
|    n_updates            | 800         |
|    policy_gradient_loss | -0.0045     |
|    value_loss           | 87.8        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.3e+03     |
| time/                   |             |
|    fps                  | 688         |
|    iterations           | 91          |
|    time_elapsed         | 270         |
|    total_timesteps      | 186368      |
| train/                  |             |
|    approx_kl            | 0.006839982 |
|    clip_fraction        | 0.0565      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.41       |
|    explained_variance   | 0.791       |
|    learning_rate        | 0.0003      |
|    loss                 | 61.3        |
|    n_updates            | 900         |
|    policy_gradient_loss | -0.00559    |
|    value_loss           | 96.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.32e+03    |
| time/                   |             |
|    fps                  | 689         |
|    iterations           | 101         |
|    time_elapsed         | 300         |
|    total_timesteps      | 206848      |
| train/                  |             |
|    approx_kl            | 0.007492339 |
|    clip_fraction        | 0.102       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.47       |
|    explained_variance   | 0.833       |
|    learning_rate        | 0.0003      |
|    loss                 | 10.3        |
|    n_updates            | 1000        |
|    policy_gradient_loss | -0.00938    |
|    value_loss           | 79.2        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.34e+03    |
| time/                   |             |
|    fps                  | 679         |
|    iterations           | 111         |
|    time_elapsed         | 334         |
|    total_timesteps      | 227328      |
| train/                  |             |
|    approx_kl            | 0.010713188 |
|    clip_fraction        | 0.0909      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.44       |
|    explained_variance   | 0.851       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.9         |
|    n_updates            | 1100        |
|    policy_gradient_loss | -0.00594    |
|    value_loss           | 65.8        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.35e+03     |
| time/                   |              |
|    fps                  | 671          |
|    iterations           | 121          |
|    time_elapsed         | 369          |
|    total_timesteps      | 247808       |
| train/                  |              |
|    approx_kl            | 0.0065864087 |
|    clip_fraction        | 0.0564       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.45        |
|    explained_variance   | 0.801        |
|    learning_rate        | 0.0003       |
|    loss                 | 47.1         |
|    n_updates            | 1200         |
|    policy_gradient_loss | -0.00428     |
|    value_loss           | 92.5         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.37e+03    |
| time/                   |             |
|    fps                  | 666         |
|    iterations           | 131         |
|    time_elapsed         | 402         |
|    total_timesteps      | 268288      |
| train/                  |             |
|    approx_kl            | 0.009734695 |
|    clip_fraction        | 0.0981      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.43       |
|    explained_variance   | 0.462       |
|    learning_rate        | 0.0003      |
|    loss                 | 18.5        |
|    n_updates            | 1300        |
|    policy_gradient_loss | -0.0052     |
|    value_loss           | 164         |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.37e+03    |
| time/                   |             |
|    fps                  | 667         |
|    iterations           | 141         |
|    time_elapsed         | 432         |
|    total_timesteps      | 288768      |
| train/                  |             |
|    approx_kl            | 0.004542713 |
|    clip_fraction        | 0.0467      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.44       |
|    explained_variance   | 0.767       |
|    learning_rate        | 0.0003      |
|    loss                 | 175         |
|    n_updates            | 1400        |
|    policy_gradient_loss | -0.00486    |
|    value_loss           | 166         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.38e+03    |
| time/                   |             |
|    fps                  | 667         |
|    iterations           | 151         |
|    time_elapsed         | 463         |
|    total_timesteps      | 309248      |
| train/                  |             |
|    approx_kl            | 0.008548943 |
|    clip_fraction        | 0.0724      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.41       |
|    explained_variance   | 0.749       |
|    learning_rate        | 0.0003      |
|    loss                 | 22.7        |
|    n_updates            | 1500        |
|    policy_gradient_loss | -0.00554    |
|    value_loss           | 128         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.38e+03    |
| time/                   |             |
|    fps                  | 667         |
|    iterations           | 161         |
|    time_elapsed         | 493         |
|    total_timesteps      | 329728      |
| train/                  |             |
|    approx_kl            | 0.008004551 |
|    clip_fraction        | 0.0713      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0.69        |
|    learning_rate        | 0.0003      |
|    loss                 | 33.5        |
|    n_updates            | 1600        |
|    policy_gradient_loss | -0.00742    |
|    value_loss           | 131         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.38e+03     |
| time/                   |              |
|    fps                  | 667          |
|    iterations           | 171          |
|    time_elapsed         | 524          |
|    total_timesteps      | 350208       |
| train/                  |              |
|    approx_kl            | 0.0058136955 |
|    clip_fraction        | 0.0437       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.38        |
|    explained_variance   | 0.5          |
|    learning_rate        | 0.0003       |
|    loss                 | 70.4         |
|    n_updates            | 1700         |
|    policy_gradient_loss | -0.00452     |
|    value_loss           | 149          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.37e+03     |
| time/                   |              |
|    fps                  | 668          |
|    iterations           | 181          |
|    time_elapsed         | 554          |
|    total_timesteps      | 370688       |
| train/                  |              |
|    approx_kl            | 0.0055445745 |
|    clip_fraction        | 0.0329       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.28        |
|    explained_variance   | 0.74         |
|    learning_rate        | 0.0003       |
|    loss                 | 39.6         |
|    n_updates            | 1800         |
|    policy_gradient_loss | -0.00249     |
|    value_loss           | 93.2         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.38e+03    |
| time/                   |             |
|    fps                  | 668         |
|    iterations           | 191         |
|    time_elapsed         | 585         |
|    total_timesteps      | 391168      |
| train/                  |             |
|    approx_kl            | 0.008399573 |
|    clip_fraction        | 0.0702      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | 0.783       |
|    learning_rate        | 0.0003      |
|    loss                 | 25.8        |
|    n_updates            | 1900        |
|    policy_gradient_loss | -0.00264    |
|    value_loss           | 108         |
-----------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1e+03    |
|

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.38e+03    |
| time/                   |             |
|    fps                  | 667         |
|    iterations           | 201         |
|    time_elapsed         | 616         |
|    total_timesteps      | 411648      |
| train/                  |             |
|    approx_kl            | 0.009728624 |
|    clip_fraction        | 0.0899      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.35       |
|    explained_variance   | 0.767       |
|    learning_rate        | 0.0003      |
|    loss                 | 8.71        |
|    n_updates            | 2000        |
|    policy_gradient_loss | -0.00648    |
|    value_loss           | 93.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.38e+03    |
| time/                   |             |
|    fps                  | 667         |
|    iterations           | 211         |
|    time_elapsed         | 646         |
|    total_timesteps      | 432128      |
| train/                  |             |
|    approx_kl            | 0.008209621 |
|    clip_fraction        | 0.0651      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.32       |
|    explained_variance   | 0.709       |
|    learning_rate        | 0.0003      |
|    loss                 | 12          |
|    n_updates            | 2100        |
|    policy_gradient_loss | -0.00524    |
|    value_loss           | 72.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.38e+03     |
| time/                   |              |
|    fps                  | 667          |
|    iterations           | 221          |
|    time_elapsed         | 677          |
|    total_timesteps      | 452608       |
| train/                  |              |
|    approx_kl            | 0.0104299635 |
|    clip_fraction        | 0.0807       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.25        |
|    explained_variance   | 0.785        |
|    learning_rate        | 0.0003       |
|    loss                 | 12.8         |
|    n_updates            | 2200         |
|    policy_gradient_loss | -0.00688     |
|    value_loss           | 103          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.4e+03     |
| time/                   |             |
|    fps                  | 667         |
|    iterations           | 231         |
|    time_elapsed         | 708         |
|    total_timesteps      | 473088      |
| train/                  |             |
|    approx_kl            | 0.006748859 |
|    clip_fraction        | 0.0737      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.27       |
|    explained_variance   | 0.765       |
|    learning_rate        | 0.0003      |
|    loss                 | 64.9        |
|    n_updates            | 2300        |
|    policy_gradient_loss | -0.00721    |
|    value_loss           | 144         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.38e+03     |
| time/                   |              |
|    fps                  | 667          |
|    iterations           | 241          |
|    time_elapsed         | 739          |
|    total_timesteps      | 493568       |
| train/                  |              |
|    approx_kl            | 0.0102045555 |
|    clip_fraction        | 0.0874       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.29        |
|    explained_variance   | 0.786        |
|    learning_rate        | 0.0003       |
|    loss                 | 17.4         |
|    n_updates            | 2400         |
|    policy_gradient_loss | -0.00712     |
|    value_loss           | 94.9         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.39e+03     |
| time/                   |              |
|    fps                  | 667          |
|    iterations           | 251          |
|    time_elapsed         | 769          |
|    total_timesteps      | 514048       |
| train/                  |              |
|    approx_kl            | 0.0061857793 |
|    clip_fraction        | 0.0328       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.37        |
|    explained_variance   | 0.738        |
|    learning_rate        | 0.0003       |
|    loss                 | 70.4         |
|    n_updates            | 2500         |
|    policy_gradient_loss | -0.00343     |
|    value_loss           | 143          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.4e+03      |
| time/                   |              |
|    fps                  | 668          |
|    iterations           | 261          |
|    time_elapsed         | 800          |
|    total_timesteps      | 534528       |
| train/                  |              |
|    approx_kl            | 0.0069580176 |
|    clip_fraction        | 0.0353       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.28        |
|    explained_variance   | 0.7          |
|    learning_rate        | 0.0003       |
|    loss                 | 8.53         |
|    n_updates            | 2600         |
|    policy_gradient_loss | -0.00273     |
|    value_loss           | 108          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.42e+03     |
| time/                   |              |
|    fps                  | 668          |
|    iterations           | 271          |
|    time_elapsed         | 830          |
|    total_timesteps      | 555008       |
| train/                  |              |
|    approx_kl            | 0.0074569425 |
|    clip_fraction        | 0.0823       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.24        |
|    explained_variance   | 0.779        |
|    learning_rate        | 0.0003       |
|    loss                 | 11.9         |
|    n_updates            | 2700         |
|    policy_gradient_loss | -0.00535     |
|    value_loss           | 81.2         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.39e+03     |
| time/                   |              |
|    fps                  | 667          |
|    iterations           | 281          |
|    time_elapsed         | 861          |
|    total_timesteps      | 575488       |
| train/                  |              |
|    approx_kl            | 0.0067775175 |
|    clip_fraction        | 0.0815       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.18        |
|    explained_variance   | 0.813        |
|    learning_rate        | 0.0003       |
|    loss                 | 4.13         |
|    n_updates            | 2800         |
|    policy_gradient_loss | -0.00679     |
|    value_loss           | 64.4         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.39e+03     |
| time/                   |              |
|    fps                  | 668          |
|    iterations           | 291          |
|    time_elapsed         | 892          |
|    total_timesteps      | 595968       |
| train/                  |              |
|    approx_kl            | 0.0072744293 |
|    clip_fraction        | 0.0572       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.07        |
|    explained_variance   | 0.588        |
|    learning_rate        | 0.0003       |
|    loss                 | 12.4         |
|    n_updates            | 2900         |
|    policy_gradient_loss | -0.00575     |
|    value_loss           | 109          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.37e+03    |
| time/                   |             |
|    fps                  | 668         |
|    iterations           | 301         |
|    time_elapsed         | 922         |
|    total_timesteps      | 616448      |
| train/                  |             |
|    approx_kl            | 0.007803803 |
|    clip_fraction        | 0.0717      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.17       |
|    explained_variance   | 0.749       |
|    learning_rate        | 0.0003      |
|    loss                 | 11          |
|    n_updates            | 3000        |
|    policy_gradient_loss | -0.00588    |
|    value_loss           | 98          |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.37e+03    |
| time/                   |             |
|    fps                  | 668         |
|    iterations           | 311         |
|    time_elapsed         | 952         |
|    total_timesteps      | 636928      |
| train/                  |             |
|    approx_kl            | 0.010468569 |
|    clip_fraction        | 0.0885      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.948      |
|    explained_variance   | 0.67        |
|    learning_rate        | 0.0003      |
|    loss                 | 18          |
|    n_updates            | 3100        |
|    policy_gradient_loss | -0.00933    |
|    value_loss           | 163         |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.34e+03    |
| time/                   |             |
|    fps                  | 667         |
|    iterations           | 321         |
|    time_elapsed         | 985         |
|    total_timesteps      | 657408      |
| train/                  |             |
|    approx_kl            | 0.008420067 |
|    clip_fraction        | 0.0563      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.18       |
|    explained_variance   | 0.683       |
|    learning_rate        | 0.0003      |
|    loss                 | 20.7        |
|    n_updates            | 3200        |
|    policy_gradient_loss | -0.00359    |
|    value_loss           | 113         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.36e+03    |
| time/                   |             |
|    fps                  | 666         |
|    iterations           | 331         |
|    time_elapsed         | 1016        |
|    total_timesteps      | 677888      |
| train/                  |             |
|    approx_kl            | 0.007902788 |
|    clip_fraction        | 0.081       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.971      |
|    explained_variance   | 0.82        |
|    learning_rate        | 0.0003      |
|    loss                 | 10.6        |
|    n_updates            | 3300        |
|    policy_gradient_loss | -0.00663    |
|    value_loss           | 90.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1e+03      |
|    ep_rew_mean          | 1.39e+03   |
| time/                   |            |
|    fps                  | 667        |
|    iterations           | 341        |
|    time_elapsed         | 1046       |
|    total_timesteps      | 698368     |
| train/                  |            |
|    approx_kl            | 0.00510526 |
|    clip_fraction        | 0.0468     |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.18      |
|    explained_variance   | 0.776      |
|    learning_rate        | 0.0003     |
|    loss                 | 18.4       |
|    n_updates            | 3400       |
|    policy_gradient_loss | -0.00184   |
|    value_loss           | 80         |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.39e+03    |
| time/                   |             |
|    fps                  | 667         |
|    iterations           | 351         |
|    time_elapsed         | 1077        |
|    total_timesteps      | 718848      |
| train/                  |             |
|    approx_kl            | 0.005155436 |
|    clip_fraction        | 0.0808      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.961      |
|    explained_variance   | 0.784       |
|    learning_rate        | 0.0003      |
|    loss                 | 80.2        |
|    n_updates            | 3500        |
|    policy_gradient_loss | -0.00661    |
|    value_loss           | 139         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.39e+03     |
| time/                   |              |
|    fps                  | 667          |
|    iterations           | 361          |
|    time_elapsed         | 1107         |
|    total_timesteps      | 739328       |
| train/                  |              |
|    approx_kl            | 0.0085379835 |
|    clip_fraction        | 0.0931       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.811       |
|    explained_variance   | 0.771        |
|    learning_rate        | 0.0003       |
|    loss                 | 6.18         |
|    n_updates            | 3600         |
|    policy_gradient_loss | -0.00532     |
|    value_loss           | 59           |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.41e+03    |
| time/                   |             |
|    fps                  | 667         |
|    iterations           | 371         |
|    time_elapsed         | 1138        |
|    total_timesteps      | 759808      |
| train/                  |             |
|    approx_kl            | 0.006701013 |
|    clip_fraction        | 0.0426      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.921      |
|    explained_variance   | 0.785       |
|    learning_rate        | 0.0003      |
|    loss                 | 70          |
|    n_updates            | 3700        |
|    policy_gradient_loss | -0.00212    |
|    value_loss           | 103         |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.38e+03    |
| time/                   |             |
|    fps                  | 667         |
|    iterations           | 381         |
|    time_elapsed         | 1168        |
|    total_timesteps      | 780288      |
| train/                  |             |
|    approx_kl            | 0.009020455 |
|    clip_fraction        | 0.0672      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.798      |
|    explained_variance   | 0.741       |
|    learning_rate        | 0.0003      |
|    loss                 | 12.4        |
|    n_updates            | 3800        |
|    policy_gradient_loss | -0.00723    |
|    value_loss           | 78.4        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.37e+03    |
| time/                   |             |
|    fps                  | 667         |
|    iterations           | 391         |
|    time_elapsed         | 1198        |
|    total_timesteps      | 800768      |
| train/                  |             |
|    approx_kl            | 0.008230592 |
|    clip_fraction        | 0.04        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.928      |
|    explained_variance   | 0.777       |
|    learning_rate        | 0.0003      |
|    loss                 | 20.7        |
|    n_updates            | 3900        |
|    policy_gradient_loss | -0.00309    |
|    value_loss           | 104         |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.36e+03     |
| time/                   |              |
|    fps                  | 668          |
|    iterations           | 401          |
|    time_elapsed         | 1229         |
|    total_timesteps      | 821248       |
| train/                  |              |
|    approx_kl            | 0.0051214024 |
|    clip_fraction        | 0.0513       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.898       |
|    explained_variance   | 0.825        |
|    learning_rate        | 0.0003       |
|    loss                 | 7.72         |
|    n_updates            | 4000         |
|    policy_gradient_loss | -0.00214     |
|    value_loss           | 75.6         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.39e+03     |
| time/                   |              |
|    fps                  | 668          |
|    iterations           | 411          |
|    time_elapsed         | 1259         |
|    total_timesteps      | 841728       |
| train/                  |              |
|    approx_kl            | 0.0053321924 |
|    clip_fraction        | 0.0386       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1           |
|    explained_variance   | 0.744        |
|    learning_rate        | 0.0003       |
|    loss                 | 133          |
|    n_updates            | 4100         |
|    policy_gradient_loss | -0.00296     |
|    value_loss           | 137          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.39e+03    |
| time/                   |             |
|    fps                  | 668         |
|    iterations           | 421         |
|    time_elapsed         | 1290        |
|    total_timesteps      | 862208      |
| train/                  |             |
|    approx_kl            | 0.010357177 |
|    clip_fraction        | 0.079       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.05       |
|    explained_variance   | 0.762       |
|    learning_rate        | 0.0003      |
|    loss                 | 15          |
|    n_updates            | 4200        |
|    policy_gradient_loss | -0.00495    |
|    value_loss           | 75.9        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.41e+03     |
| time/                   |              |
|    fps                  | 668          |
|    iterations           | 431          |
|    time_elapsed         | 1321         |
|    total_timesteps      | 882688       |
| train/                  |              |
|    approx_kl            | 0.0059823645 |
|    clip_fraction        | 0.0461       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.989       |
|    explained_variance   | 0.584        |
|    learning_rate        | 0.0003       |
|    loss                 | 13.9         |
|    n_updates            | 4300         |
|    policy_gradient_loss | -0.00224     |
|    value_loss           | 139          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.41e+03    |
| time/                   |             |
|    fps                  | 668         |
|    iterations           | 441         |
|    time_elapsed         | 1351        |
|    total_timesteps      | 903168      |
| train/                  |             |
|    approx_kl            | 0.006765807 |
|    clip_fraction        | 0.0498      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.968      |
|    explained_variance   | 0.801       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.16        |
|    n_updates            | 4400        |
|    policy_gradient_loss | -0.00228    |
|    value_loss           | 71.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.41e+03    |
| time/                   |             |
|    fps                  | 668         |
|    iterations           | 451         |
|    time_elapsed         | 1381        |
|    total_timesteps      | 923648      |
| train/                  |             |
|    approx_kl            | 0.008771795 |
|    clip_fraction        | 0.0662      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.93       |
|    explained_variance   | 0.833       |
|    learning_rate        | 0.0003      |
|    loss                 | 24.7        |
|    n_updates            | 4500        |
|    policy_gradient_loss | -0.0058     |
|    value_loss           | 87.1        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.4e+03      |
| time/                   |              |
|    fps                  | 668          |
|    iterations           | 461          |
|    time_elapsed         | 1411         |
|    total_timesteps      | 944128       |
| train/                  |              |
|    approx_kl            | 0.0059659434 |
|    clip_fraction        | 0.0539       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.962       |
|    explained_variance   | 0.747        |
|    learning_rate        | 0.0003       |
|    loss                 | 155          |
|    n_updates            | 4600         |
|    policy_gradient_loss | -0.0019      |
|    value_loss           | 148          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.42e+03    |
| time/                   |             |
|    fps                  | 668         |
|    iterations           | 471         |
|    time_elapsed         | 1442        |
|    total_timesteps      | 964608      |
| train/                  |             |
|    approx_kl            | 0.008437295 |
|    clip_fraction        | 0.0748      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.831      |
|    explained_variance   | 0.768       |
|    learning_rate        | 0.0003      |
|    loss                 | 13          |
|    n_updates            | 4700        |
|    policy_gradient_loss | -0.00402    |
|    value_loss           | 132         |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.41e+03     |
| time/                   |              |
|    fps                  | 668          |
|    iterations           | 481          |
|    time_elapsed         | 1472         |
|    total_timesteps      | 985088       |
| train/                  |              |
|    approx_kl            | 0.0076417793 |
|    clip_fraction        | 0.0638       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.839       |
|    explained_variance   | 0.812        |
|    learning_rate        | 0.0003       |
|    loss                 | 40.5         |
|    n_updates            | 4800         |
|    policy_gradient_loss | -0.00559     |
|    value_loss           | 115          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.41e+03     |
| time/                   |              |
|    fps                  | 669          |
|    iterations           | 491          |
|    time_elapsed         | 1503         |
|    total_timesteps      | 1005568      |
| train/                  |              |
|    approx_kl            | 0.0071428735 |
|    clip_fraction        | 0.0581       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.876       |
|    explained_variance   | 0.823        |
|    learning_rate        | 0.0003       |
|    loss                 | 30.8         |
|    n_updates            | 4900         |
|    policy_gradient_loss | -0.00365     |
|    value_loss           | 88.4         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.41e+03    |
| time/                   |             |
|    fps                  | 669         |
|    iterations           | 501         |
|    time_elapsed         | 1533        |
|    total_timesteps      | 1026048     |
| train/                  |             |
|    approx_kl            | 0.006276888 |
|    clip_fraction        | 0.0676      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.941      |
|    explained_variance   | 0.786       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.7        |
|    n_updates            | 5000        |
|    policy_gradient_loss | -0.00409    |
|    value_loss           | 154         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.4e+03      |
| time/                   |              |
|    fps                  | 669          |
|    iterations           | 511          |
|    time_elapsed         | 1563         |
|    total_timesteps      | 1046528      |
| train/                  |              |
|    approx_kl            | 0.0065122945 |
|    clip_fraction        | 0.0708       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.916       |
|    explained_variance   | 0.423        |
|    learning_rate        | 0.0003       |
|    loss                 | 31.4         |
|    n_updates            | 5100         |
|    policy_gradient_loss | -0.00317     |
|    value_loss           | 131          |
------------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.38e+03    |
| time/                   |             |
|    fps                  | 669         |
|    iterations           | 521         |
|    time_elapsed         | 1594        |
|    total_timesteps      | 1067008     |
| train/                  |             |
|    approx_kl            | 0.008610634 |
|    clip_fraction        | 0.0989      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.798      |
|    explained_variance   | 0.834       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.93        |
|    n_updates            | 5200        |
|    policy_gradient_loss | -0.00718    |
|    value_loss           | 66          |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.37e+03     |
| time/                   |              |
|    fps                  | 669          |
|    iterations           | 531          |
|    time_elapsed         | 1624         |
|    total_timesteps      | 1087488      |
| train/                  |              |
|    approx_kl            | 0.0074684448 |
|    clip_fraction        | 0.0735       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.844       |
|    explained_variance   | 0.828        |
|    learning_rate        | 0.0003       |
|    loss                 | 9            |
|    n_updates            | 5300         |
|    policy_gradient_loss | -0.00568     |
|    value_loss           | 73           |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.36e+03    |
| time/                   |             |
|    fps                  | 669         |
|    iterations           | 541         |
|    time_elapsed         | 1654        |
|    total_timesteps      | 1107968     |
| train/                  |             |
|    approx_kl            | 0.006840384 |
|    clip_fraction        | 0.0477      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.761      |
|    explained_variance   | 0.762       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.3        |
|    n_updates            | 5400        |
|    policy_gradient_loss | -0.0037     |
|    value_loss           | 114         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.34e+03    |
| time/                   |             |
|    fps                  | 669         |
|    iterations           | 551         |
|    time_elapsed         | 1684        |
|    total_timesteps      | 1128448     |
| train/                  |             |
|    approx_kl            | 0.007516043 |
|    clip_fraction        | 0.0682      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.816      |
|    explained_variance   | 0.845       |
|    learning_rate        | 0.0003      |
|    loss                 | 20.2        |
|    n_updates            | 5500        |
|    policy_gradient_loss | -0.00175    |
|    value_loss           | 74.8        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.37e+03    |
| time/                   |             |
|    fps                  | 669         |
|    iterations           | 561         |
|    time_elapsed         | 1715        |
|    total_timesteps      | 1148928     |
| train/                  |             |
|    approx_kl            | 0.008512601 |
|    clip_fraction        | 0.0813      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.858      |
|    explained_variance   | 0.814       |
|    learning_rate        | 0.0003      |
|    loss                 | 9.66        |
|    n_updates            | 5600        |
|    policy_gradient_loss | -0.00634    |
|    value_loss           | 137         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.39e+03    |
| time/                   |             |
|    fps                  | 669         |
|    iterations           | 571         |
|    time_elapsed         | 1745        |
|    total_timesteps      | 1169408     |
| train/                  |             |
|    approx_kl            | 0.002820779 |
|    clip_fraction        | 0.0277      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.939      |
|    explained_variance   | 0.688       |
|    learning_rate        | 0.0003      |
|    loss                 | 86.6        |
|    n_updates            | 5700        |
|    policy_gradient_loss | -0.00171    |
|    value_loss           | 244         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.4e+03     |
| time/                   |             |
|    fps                  | 669         |
|    iterations           | 581         |
|    time_elapsed         | 1776        |
|    total_timesteps      | 1189888     |
| train/                  |             |
|    approx_kl            | 0.008570536 |
|    clip_fraction        | 0.0796      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.898      |
|    explained_variance   | 0.775       |
|    learning_rate        | 0.0003      |
|    loss                 | 148         |
|    n_updates            | 5800        |
|    policy_gradient_loss | -0.00146    |
|    value_loss           | 124         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.42e+03     |
| time/                   |              |
|    fps                  | 670          |
|    iterations           | 591          |
|    time_elapsed         | 1806         |
|    total_timesteps      | 1210368      |
| train/                  |              |
|    approx_kl            | 0.0033434196 |
|    clip_fraction        | 0.0383       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.851       |
|    explained_variance   | 0.841        |
|    learning_rate        | 0.0003       |
|    loss                 | 13           |
|    n_updates            | 5900         |
|    policy_gradient_loss | -0.000673    |
|    value_loss           | 97.8         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.41e+03     |
| time/                   |              |
|    fps                  | 670          |
|    iterations           | 601          |
|    time_elapsed         | 1836         |
|    total_timesteps      | 1230848      |
| train/                  |              |
|    approx_kl            | 0.0072080586 |
|    clip_fraction        | 0.0768       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.805       |
|    explained_variance   | 0.736        |
|    learning_rate        | 0.0003       |
|    loss                 | 26.3         |
|    n_updates            | 6000         |
|    policy_gradient_loss | -0.0059      |
|    value_loss           | 136          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.41e+03     |
| time/                   |              |
|    fps                  | 670          |
|    iterations           | 611          |
|    time_elapsed         | 1866         |
|    total_timesteps      | 1251328      |
| train/                  |              |
|    approx_kl            | 0.0059306915 |
|    clip_fraction        | 0.054        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.801       |
|    explained_variance   | 0.841        |
|    learning_rate        | 0.0003       |
|    loss                 | 11.5         |
|    n_updates            | 6100         |
|    policy_gradient_loss | -0.00275     |
|    value_loss           | 139          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.4e+03      |
| time/                   |              |
|    fps                  | 670          |
|    iterations           | 621          |
|    time_elapsed         | 1896         |
|    total_timesteps      | 1271808      |
| train/                  |              |
|    approx_kl            | 0.0070600165 |
|    clip_fraction        | 0.0728       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.858       |
|    explained_variance   | 0.859        |
|    learning_rate        | 0.0003       |
|    loss                 | 23.3         |
|    n_updates            | 6200         |
|    policy_gradient_loss | -0.00463     |
|    value_loss           | 107          |
------------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mea

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.39e+03    |
| time/                   |             |
|    fps                  | 670         |
|    iterations           | 631         |
|    time_elapsed         | 1926        |
|    total_timesteps      | 1292288     |
| train/                  |             |
|    approx_kl            | 0.008070317 |
|    clip_fraction        | 0.0659      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.933      |
|    explained_variance   | 0.771       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.84        |
|    n_updates            | 6300        |
|    policy_gradient_loss | -0.00346    |
|    value_loss           | 71.6        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.36e+03     |
| time/                   |              |
|    fps                  | 670          |
|    iterations           | 641          |
|    time_elapsed         | 1956         |
|    total_timesteps      | 1312768      |
| train/                  |              |
|    approx_kl            | 0.0033175333 |
|    clip_fraction        | 0.0465       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.735       |
|    explained_variance   | 0.695        |
|    learning_rate        | 0.0003       |
|    loss                 | 43.3         |
|    n_updates            | 6400         |
|    policy_gradient_loss | -0.00261     |
|    value_loss           | 103          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.35e+03     |
| time/                   |              |
|    fps                  | 671          |
|    iterations           | 651          |
|    time_elapsed         | 1986         |
|    total_timesteps      | 1333248      |
| train/                  |              |
|    approx_kl            | 0.0046992796 |
|    clip_fraction        | 0.0486       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.814       |
|    explained_variance   | 0.822        |
|    learning_rate        | 0.0003       |
|    loss                 | 8.58         |
|    n_updates            | 6500         |
|    policy_gradient_loss | -0.00306     |
|    value_loss           | 78.7         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.34e+03     |
| time/                   |              |
|    fps                  | 671          |
|    iterations           | 661          |
|    time_elapsed         | 2017         |
|    total_timesteps      | 1353728      |
| train/                  |              |
|    approx_kl            | 0.0067201303 |
|    clip_fraction        | 0.0711       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.95        |
|    explained_variance   | 0.792        |
|    learning_rate        | 0.0003       |
|    loss                 | 5.17         |
|    n_updates            | 6600         |
|    policy_gradient_loss | -0.00324     |
|    value_loss           | 54.2         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.33e+03    |
| time/                   |             |
|    fps                  | 671         |
|    iterations           | 671         |
|    time_elapsed         | 2047        |
|    total_timesteps      | 1374208     |
| train/                  |             |
|    approx_kl            | 0.006983569 |
|    clip_fraction        | 0.0505      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.91       |
|    explained_variance   | 0.702       |
|    learning_rate        | 0.0003      |
|    loss                 | 19.9        |
|    n_updates            | 6700        |
|    policy_gradient_loss | -0.0016     |
|    value_loss           | 100         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.36e+03    |
| time/                   |             |
|    fps                  | 671         |
|    iterations           | 681         |
|    time_elapsed         | 2078        |
|    total_timesteps      | 1394688     |
| train/                  |             |
|    approx_kl            | 0.007220693 |
|    clip_fraction        | 0.0536      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.794      |
|    explained_variance   | 0.791       |
|    learning_rate        | 0.0003      |
|    loss                 | 14.4        |
|    n_updates            | 6800        |
|    policy_gradient_loss | -0.00749    |
|    value_loss           | 159         |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.39e+03    |
| time/                   |             |
|    fps                  | 671         |
|    iterations           | 691         |
|    time_elapsed         | 2108        |
|    total_timesteps      | 1415168     |
| train/                  |             |
|    approx_kl            | 0.008057748 |
|    clip_fraction        | 0.0612      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.811      |
|    explained_variance   | 0.742       |
|    learning_rate        | 0.0003      |
|    loss                 | 18.2        |
|    n_updates            | 6900        |
|    policy_gradient_loss | -0.00374    |
|    value_loss           | 128         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.41e+03     |
| time/                   |              |
|    fps                  | 671          |
|    iterations           | 701          |
|    time_elapsed         | 2138         |
|    total_timesteps      | 1435648      |
| train/                  |              |
|    approx_kl            | 0.0068991175 |
|    clip_fraction        | 0.0509       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.831       |
|    explained_variance   | 0.837        |
|    learning_rate        | 0.0003       |
|    loss                 | 54.9         |
|    n_updates            | 7000         |
|    policy_gradient_loss | -0.00243     |
|    value_loss           | 112          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.41e+03     |
| time/                   |              |
|    fps                  | 671          |
|    iterations           | 711          |
|    time_elapsed         | 2169         |
|    total_timesteps      | 1456128      |
| train/                  |              |
|    approx_kl            | 0.0060918676 |
|    clip_fraction        | 0.0407       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.982       |
|    explained_variance   | 0.799        |
|    learning_rate        | 0.0003       |
|    loss                 | 14.5         |
|    n_updates            | 7100         |
|    policy_gradient_loss | -0.00301     |
|    value_loss           | 141          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.4e+03     |
| time/                   |             |
|    fps                  | 671         |
|    iterations           | 721         |
|    time_elapsed         | 2199        |
|    total_timesteps      | 1476608     |
| train/                  |             |
|    approx_kl            | 0.011417211 |
|    clip_fraction        | 0.093       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.944      |
|    explained_variance   | 0.823       |
|    learning_rate        | 0.0003      |
|    loss                 | 8.25        |
|    n_updates            | 7200        |
|    policy_gradient_loss | -0.00327    |
|    value_loss           | 60.3        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.39e+03    |
| time/                   |             |
|    fps                  | 671         |
|    iterations           | 731         |
|    time_elapsed         | 2229        |
|    total_timesteps      | 1497088     |
| train/                  |             |
|    approx_kl            | 0.005988026 |
|    clip_fraction        | 0.0568      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.85       |
|    explained_variance   | 0.616       |
|    learning_rate        | 0.0003      |
|    loss                 | 28.6        |
|    n_updates            | 7300        |
|    policy_gradient_loss | -0.00245    |
|    value_loss           | 173         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.37e+03     |
| time/                   |              |
|    fps                  | 671          |
|    iterations           | 741          |
|    time_elapsed         | 2260         |
|    total_timesteps      | 1517568      |
| train/                  |              |
|    approx_kl            | 0.0076109874 |
|    clip_fraction        | 0.0823       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.97        |
|    explained_variance   | 0.717        |
|    learning_rate        | 0.0003       |
|    loss                 | 26.7         |
|    n_updates            | 7400         |
|    policy_gradient_loss | -0.00276     |
|    value_loss           | 64.4         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.37e+03     |
| time/                   |              |
|    fps                  | 671          |
|    iterations           | 751          |
|    time_elapsed         | 2290         |
|    total_timesteps      | 1538048      |
| train/                  |              |
|    approx_kl            | 0.0049724113 |
|    clip_fraction        | 0.0571       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.88        |
|    explained_variance   | 0.787        |
|    learning_rate        | 0.0003       |
|    loss                 | 45.9         |
|    n_updates            | 7500         |
|    policy_gradient_loss | -0.00315     |
|    value_loss           | 120          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.39e+03     |
| time/                   |              |
|    fps                  | 671          |
|    iterations           | 761          |
|    time_elapsed         | 2321         |
|    total_timesteps      | 1558528      |
| train/                  |              |
|    approx_kl            | 0.0052068476 |
|    clip_fraction        | 0.0705       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.836       |
|    explained_variance   | 0.866        |
|    learning_rate        | 0.0003       |
|    loss                 | 7.57         |
|    n_updates            | 7600         |
|    policy_gradient_loss | -0.00114     |
|    value_loss           | 59.2         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.41e+03     |
| time/                   |              |
|    fps                  | 671          |
|    iterations           | 771          |
|    time_elapsed         | 2351         |
|    total_timesteps      | 1579008      |
| train/                  |              |
|    approx_kl            | 0.0066239308 |
|    clip_fraction        | 0.0709       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.866       |
|    explained_variance   | 0.547        |
|    learning_rate        | 0.0003       |
|    loss                 | 10.6         |
|    n_updates            | 7700         |
|    policy_gradient_loss | -0.006       |
|    value_loss           | 104          |
------------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mea

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.4e+03     |
| time/                   |             |
|    fps                  | 670         |
|    iterations           | 781         |
|    time_elapsed         | 2386        |
|    total_timesteps      | 1599488     |
| train/                  |             |
|    approx_kl            | 0.014755025 |
|    clip_fraction        | 0.106       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.923      |
|    explained_variance   | 0.821       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.38        |
|    n_updates            | 7800        |
|    policy_gradient_loss | -0.00262    |
|    value_loss           | 45.6        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.4e+03     |
| time/                   |             |
|    fps                  | 669         |
|    iterations           | 791         |
|    time_elapsed         | 2419        |
|    total_timesteps      | 1619968     |
| train/                  |             |
|    approx_kl            | 0.006816001 |
|    clip_fraction        | 0.0612      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.893      |
|    explained_variance   | 0.565       |
|    learning_rate        | 0.0003      |
|    loss                 | 41          |
|    n_updates            | 7900        |
|    policy_gradient_loss | -0.00333    |
|    value_loss           | 162         |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.4e+03      |
| time/                   |              |
|    fps                  | 668          |
|    iterations           | 801          |
|    time_elapsed         | 2452         |
|    total_timesteps      | 1640448      |
| train/                  |              |
|    approx_kl            | 0.0043876846 |
|    clip_fraction        | 0.04         |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.841       |
|    explained_variance   | 0.742        |
|    learning_rate        | 0.0003       |
|    loss                 | 19.9         |
|    n_updates            | 8000         |
|    policy_gradient_loss | -0.00283     |
|    value_loss           | 105          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.39e+03     |
| time/                   |              |
|    fps                  | 667          |
|    iterations           | 811          |
|    time_elapsed         | 2486         |
|    total_timesteps      | 1660928      |
| train/                  |              |
|    approx_kl            | 0.0062734084 |
|    clip_fraction        | 0.0649       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.812       |
|    explained_variance   | 0.801        |
|    learning_rate        | 0.0003       |
|    loss                 | 12.8         |
|    n_updates            | 8100         |
|    policy_gradient_loss | -0.00273     |
|    value_loss           | 101          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.38e+03     |
| time/                   |              |
|    fps                  | 667          |
|    iterations           | 821          |
|    time_elapsed         | 2519         |
|    total_timesteps      | 1681408      |
| train/                  |              |
|    approx_kl            | 0.0065020183 |
|    clip_fraction        | 0.0711       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.777       |
|    explained_variance   | 0.79         |
|    learning_rate        | 0.0003       |
|    loss                 | 50.3         |
|    n_updates            | 8200         |
|    policy_gradient_loss | -0.00423     |
|    value_loss           | 108          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.39e+03     |
| time/                   |              |
|    fps                  | 666          |
|    iterations           | 831          |
|    time_elapsed         | 2553         |
|    total_timesteps      | 1701888      |
| train/                  |              |
|    approx_kl            | 0.0055442513 |
|    clip_fraction        | 0.0585       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.679       |
|    explained_variance   | 0.81         |
|    learning_rate        | 0.0003       |
|    loss                 | 8.38         |
|    n_updates            | 8300         |
|    policy_gradient_loss | -0.00575     |
|    value_loss           | 84.3         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.41e+03    |
| time/                   |             |
|    fps                  | 665         |
|    iterations           | 841         |
|    time_elapsed         | 2586        |
|    total_timesteps      | 1722368     |
| train/                  |             |
|    approx_kl            | 0.008793211 |
|    clip_fraction        | 0.0669      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.641      |
|    explained_variance   | 0.804       |
|    learning_rate        | 0.0003      |
|    loss                 | 16.5        |
|    n_updates            | 8400        |
|    policy_gradient_loss | -0.00183    |
|    value_loss           | 91.1        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.41e+03     |
| time/                   |              |
|    fps                  | 665          |
|    iterations           | 851          |
|    time_elapsed         | 2620         |
|    total_timesteps      | 1742848      |
| train/                  |              |
|    approx_kl            | 0.0040829615 |
|    clip_fraction        | 0.0462       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.678       |
|    explained_variance   | 0.69         |
|    learning_rate        | 0.0003       |
|    loss                 | 47.4         |
|    n_updates            | 8500         |
|    policy_gradient_loss | -0.00118     |
|    value_loss           | 163          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.42e+03     |
| time/                   |              |
|    fps                  | 664          |
|    iterations           | 861          |
|    time_elapsed         | 2655         |
|    total_timesteps      | 1763328      |
| train/                  |              |
|    approx_kl            | 0.0038962532 |
|    clip_fraction        | 0.0563       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.563       |
|    explained_variance   | 0.791        |
|    learning_rate        | 0.0003       |
|    loss                 | 44.9         |
|    n_updates            | 8600         |
|    policy_gradient_loss | -0.0033      |
|    value_loss           | 98.1         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.42e+03    |
| time/                   |             |
|    fps                  | 663         |
|    iterations           | 871         |
|    time_elapsed         | 2686        |
|    total_timesteps      | 1783808     |
| train/                  |             |
|    approx_kl            | 0.003986103 |
|    clip_fraction        | 0.0504      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.557      |
|    explained_variance   | 0.716       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.4        |
|    n_updates            | 8700        |
|    policy_gradient_loss | -0.00162    |
|    value_loss           | 124         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.41e+03     |
| time/                   |              |
|    fps                  | 662          |
|    iterations           | 881          |
|    time_elapsed         | 2721         |
|    total_timesteps      | 1804288      |
| train/                  |              |
|    approx_kl            | 0.0057662707 |
|    clip_fraction        | 0.066        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.695       |
|    explained_variance   | 0.853        |
|    learning_rate        | 0.0003       |
|    loss                 | 123          |
|    n_updates            | 8800         |
|    policy_gradient_loss | -0.00461     |
|    value_loss           | 92.4         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.4e+03      |
| time/                   |              |
|    fps                  | 661          |
|    iterations           | 891          |
|    time_elapsed         | 2757         |
|    total_timesteps      | 1824768      |
| train/                  |              |
|    approx_kl            | 0.0068907263 |
|    clip_fraction        | 0.0705       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.793       |
|    explained_variance   | 0.797        |
|    learning_rate        | 0.0003       |
|    loss                 | 9.48         |
|    n_updates            | 8900         |
|    policy_gradient_loss | -0.00138     |
|    value_loss           | 88.5         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.41e+03    |
| time/                   |             |
|    fps                  | 660         |
|    iterations           | 901         |
|    time_elapsed         | 2791        |
|    total_timesteps      | 1845248     |
| train/                  |             |
|    approx_kl            | 0.008600887 |
|    clip_fraction        | 0.0556      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.707      |
|    explained_variance   | 0.897       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.48        |
|    n_updates            | 9000        |
|    policy_gradient_loss | -0.00205    |
|    value_loss           | 50.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.4e+03     |
| time/                   |             |
|    fps                  | 659         |
|    iterations           | 911         |
|    time_elapsed         | 2828        |
|    total_timesteps      | 1865728     |
| train/                  |             |
|    approx_kl            | 0.009907683 |
|    clip_fraction        | 0.0691      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.819      |
|    explained_variance   | 0.795       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.2        |
|    n_updates            | 9100        |
|    policy_gradient_loss | -0.00333    |
|    value_loss           | 90          |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.41e+03    |
| time/                   |             |
|    fps                  | 658         |
|    iterations           | 921         |
|    time_elapsed         | 2864        |
|    total_timesteps      | 1886208     |
| train/                  |             |
|    approx_kl            | 0.005970427 |
|    clip_fraction        | 0.0588      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.746      |
|    explained_variance   | 0.823       |
|    learning_rate        | 0.0003      |
|    loss                 | 67.6        |
|    n_updates            | 9200        |
|    policy_gradient_loss | -0.00377    |
|    value_loss           | 96.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.39e+03     |
| time/                   |              |
|    fps                  | 657          |
|    iterations           | 931          |
|    time_elapsed         | 2899         |
|    total_timesteps      | 1906688      |
| train/                  |              |
|    approx_kl            | 0.0044612214 |
|    clip_fraction        | 0.0591       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.737       |
|    explained_variance   | 0.848        |
|    learning_rate        | 0.0003       |
|    loss                 | 21.8         |
|    n_updates            | 9300         |
|    policy_gradient_loss | -0.00238     |
|    value_loss           | 77.6         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.4e+03     |
| time/                   |             |
|    fps                  | 657         |
|    iterations           | 941         |
|    time_elapsed         | 2932        |
|    total_timesteps      | 1927168     |
| train/                  |             |
|    approx_kl            | 0.004482735 |
|    clip_fraction        | 0.0502      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.878      |
|    explained_variance   | 0.872       |
|    learning_rate        | 0.0003      |
|    loss                 | 10.1        |
|    n_updates            | 9400        |
|    policy_gradient_loss | -0.00187    |
|    value_loss           | 123         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | 1.38e+03    |
| time/                   |             |
|    fps                  | 657         |
|    iterations           | 951         |
|    time_elapsed         | 2962        |
|    total_timesteps      | 1947648     |
| train/                  |             |
|    approx_kl            | 0.005605928 |
|    clip_fraction        | 0.0434      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.761      |
|    explained_variance   | 0.816       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.7        |
|    n_updates            | 9500        |
|    policy_gradient_loss | -0.00095    |
|    value_loss           | 89.1        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.37e+03     |
| time/                   |              |
|    fps                  | 657          |
|    iterations           | 961          |
|    time_elapsed         | 2993         |
|    total_timesteps      | 1968128      |
| train/                  |              |
|    approx_kl            | 0.0066091795 |
|    clip_fraction        | 0.0641       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.784       |
|    explained_variance   | 0.766        |
|    learning_rate        | 0.0003       |
|    loss                 | 8.15         |
|    n_updates            | 9600         |
|    policy_gradient_loss | -0.00627     |
|    value_loss           | 94.1         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1e+03        |
|    ep_rew_mean          | 1.39e+03     |
| time/                   |              |
|    fps                  | 657          |
|    iterations           | 971          |
|    time_elapsed         | 3026         |
|    total_timesteps      | 1988608      |
| train/                  |              |
|    approx_kl            | 0.0040359646 |
|    clip_fraction        | 0.0349       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.624       |
|    explained_variance   | 0.813        |
|    learning_rate        | 0.0003       |
|    loss                 | 13.5         |
|    n_updates            | 9700         |
|    policy_gradient_loss | -0.00313     |
|    value_loss           | 65.9         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

<stable_baselines3.ppo.ppo.PPO at 0x22abef053a0>

In [4]:
PPO_path = os.path.join('Training', 'Saved Models', 'PPO_model')

In [35]:
model.save(PPO_path)

In [12]:
del model

In [5]:
model = PPO.load(PPO_path, env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [6]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)



KeyboardInterrupt: 

In [7]:
env=ActivVisionEnv()

model = PPO.load(PPO_path, env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
